klaude-code 2.4.1__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/app/runtime.py +2 -6
- klaude_code/cli/main.py +0 -1
- klaude_code/config/assets/builtin_config.yaml +7 -0
- klaude_code/const.py +7 -4
- klaude_code/core/agent.py +10 -1
- klaude_code/core/agent_profile.py +47 -35
- klaude_code/core/executor.py +6 -21
- klaude_code/core/manager/sub_agent_manager.py +17 -1
- klaude_code/core/prompts/prompt-sub-agent-web.md +4 -4
- klaude_code/core/task.py +65 -4
- klaude_code/core/tool/__init__.py +0 -5
- klaude_code/core/tool/context.py +12 -1
- klaude_code/core/tool/offload.py +311 -0
- klaude_code/core/tool/shell/bash_tool.md +1 -43
- klaude_code/core/tool/sub_agent_tool.py +1 -0
- klaude_code/core/tool/todo/todo_write_tool.md +0 -23
- klaude_code/core/tool/tool_runner.py +14 -9
- klaude_code/core/tool/web/web_fetch_tool.md +1 -1
- klaude_code/core/tool/web/web_fetch_tool.py +14 -39
- klaude_code/core/turn.py +128 -138
- klaude_code/llm/anthropic/client.py +176 -82
- klaude_code/llm/bedrock/client.py +8 -12
- klaude_code/llm/claude/client.py +11 -15
- klaude_code/llm/client.py +31 -4
- klaude_code/llm/codex/client.py +7 -11
- klaude_code/llm/google/client.py +150 -69
- klaude_code/llm/openai_compatible/client.py +10 -15
- klaude_code/llm/openai_compatible/stream.py +68 -6
- klaude_code/llm/openrouter/client.py +9 -15
- klaude_code/llm/partial_message.py +35 -0
- klaude_code/llm/responses/client.py +134 -68
- klaude_code/llm/usage.py +30 -0
- klaude_code/protocol/commands.py +0 -4
- klaude_code/protocol/events/metadata.py +1 -0
- klaude_code/protocol/events/streaming.py +1 -0
- klaude_code/protocol/events/system.py +0 -4
- klaude_code/protocol/model.py +2 -15
- klaude_code/protocol/sub_agent/explore.py +0 -10
- klaude_code/protocol/sub_agent/image_gen.py +0 -7
- klaude_code/protocol/sub_agent/task.py +0 -10
- klaude_code/protocol/sub_agent/web.py +4 -12
- klaude_code/session/templates/export_session.html +4 -4
- klaude_code/skill/manager.py +2 -1
- klaude_code/tui/components/metadata.py +41 -49
- klaude_code/tui/components/rich/markdown.py +1 -3
- klaude_code/tui/components/rich/theme.py +2 -2
- klaude_code/tui/components/sub_agent.py +9 -1
- klaude_code/tui/components/tools.py +0 -31
- klaude_code/tui/components/welcome.py +1 -32
- klaude_code/tui/input/prompt_toolkit.py +25 -9
- klaude_code/tui/machine.py +40 -8
- klaude_code/tui/renderer.py +1 -0
- {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/METADATA +2 -2
- {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/RECORD +56 -56
- klaude_code/core/prompts/prompt-nano-banana.md +0 -1
- klaude_code/core/tool/truncation.py +0 -203
- {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/WHEEL +0 -0
- {klaude_code-2.4.1.dist-info → klaude_code-2.5.0.dist-info}/entry_points.txt +0 -0
klaude_code/llm/google/client.py
CHANGED
|
@@ -21,11 +21,12 @@ from google.genai.types import (
|
|
|
21
21
|
UsageMetadata,
|
|
22
22
|
)
|
|
23
23
|
|
|
24
|
-
from klaude_code.llm.client import LLMClientABC
|
|
24
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
25
25
|
from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
|
|
26
26
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
27
|
+
from klaude_code.llm.partial_message import degrade_thinking_to_text
|
|
27
28
|
from klaude_code.llm.registry import register
|
|
28
|
-
from klaude_code.llm.usage import MetadataTracker
|
|
29
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
29
30
|
from klaude_code.log import DebugType, log_debug
|
|
30
31
|
from klaude_code.protocol import llm_param, message, model
|
|
31
32
|
|
|
@@ -135,19 +136,83 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
|
135
136
|
return mapping.get(normalized)
|
|
136
137
|
|
|
137
138
|
|
|
139
|
+
class GoogleStreamStateManager:
|
|
140
|
+
"""Manages streaming state for Google LLM responses.
|
|
141
|
+
|
|
142
|
+
Accumulates thinking content, assistant text, and tool calls during streaming
|
|
143
|
+
to support get_partial_message() for cancellation scenarios.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
def __init__(self, param_model: str) -> None:
|
|
147
|
+
self.param_model = param_model
|
|
148
|
+
self.accumulated_thoughts: list[str] = []
|
|
149
|
+
self.accumulated_text: list[str] = []
|
|
150
|
+
self.thought_signature: str | None = None
|
|
151
|
+
self.assistant_parts: list[message.Part] = []
|
|
152
|
+
self.response_id: str | None = None
|
|
153
|
+
self.stop_reason: model.StopReason | None = None
|
|
154
|
+
|
|
155
|
+
def flush_thinking(self) -> None:
|
|
156
|
+
"""Flush accumulated thinking content into assistant_parts."""
|
|
157
|
+
if self.accumulated_thoughts:
|
|
158
|
+
self.assistant_parts.append(
|
|
159
|
+
message.ThinkingTextPart(
|
|
160
|
+
text="".join(self.accumulated_thoughts),
|
|
161
|
+
model_id=self.param_model,
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
self.accumulated_thoughts.clear()
|
|
165
|
+
if self.thought_signature:
|
|
166
|
+
self.assistant_parts.append(
|
|
167
|
+
message.ThinkingSignaturePart(
|
|
168
|
+
signature=self.thought_signature,
|
|
169
|
+
model_id=self.param_model,
|
|
170
|
+
format="google_thought_signature",
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
self.thought_signature = None
|
|
174
|
+
|
|
175
|
+
def flush_text(self) -> None:
|
|
176
|
+
"""Flush accumulated text content into assistant_parts."""
|
|
177
|
+
if not self.accumulated_text:
|
|
178
|
+
return
|
|
179
|
+
self.assistant_parts.append(message.TextPart(text="".join(self.accumulated_text)))
|
|
180
|
+
self.accumulated_text.clear()
|
|
181
|
+
|
|
182
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
183
|
+
"""Build a partial AssistantMessage from accumulated state.
|
|
184
|
+
|
|
185
|
+
Flushes all accumulated content and returns the message.
|
|
186
|
+
Returns None if no content has been accumulated yet.
|
|
187
|
+
"""
|
|
188
|
+
self.flush_thinking()
|
|
189
|
+
self.flush_text()
|
|
190
|
+
|
|
191
|
+
filtered_parts: list[message.Part] = []
|
|
192
|
+
for part in self.assistant_parts:
|
|
193
|
+
if isinstance(part, message.ToolCallPart):
|
|
194
|
+
continue
|
|
195
|
+
filtered_parts.append(part)
|
|
196
|
+
|
|
197
|
+
filtered_parts = degrade_thinking_to_text(filtered_parts)
|
|
198
|
+
|
|
199
|
+
if not filtered_parts:
|
|
200
|
+
return None
|
|
201
|
+
return message.AssistantMessage(
|
|
202
|
+
parts=filtered_parts,
|
|
203
|
+
response_id=self.response_id,
|
|
204
|
+
stop_reason="aborted",
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
138
208
|
async def parse_google_stream(
|
|
139
209
|
stream: AsyncIterator[Any],
|
|
140
210
|
param: llm_param.LLMCallParameter,
|
|
141
211
|
metadata_tracker: MetadataTracker,
|
|
212
|
+
state: GoogleStreamStateManager,
|
|
142
213
|
) -> AsyncGenerator[message.LLMStreamItem]:
|
|
143
|
-
response_id: str | None = None
|
|
144
214
|
stage: Literal["waiting", "thinking", "assistant", "tool"] = "waiting"
|
|
145
215
|
|
|
146
|
-
accumulated_text: list[str] = []
|
|
147
|
-
accumulated_thoughts: list[str] = []
|
|
148
|
-
thought_signature: str | None = None
|
|
149
|
-
assistant_parts: list[message.Part] = []
|
|
150
|
-
|
|
151
216
|
# Track tool calls where args arrive as partial updates.
|
|
152
217
|
partial_args_by_call: dict[str, dict[str, Any]] = {}
|
|
153
218
|
started_tool_calls: dict[str, str] = {} # call_id -> name
|
|
@@ -155,33 +220,6 @@ async def parse_google_stream(
|
|
|
155
220
|
completed_tool_items: set[str] = set()
|
|
156
221
|
|
|
157
222
|
last_usage_metadata: UsageMetadata | None = None
|
|
158
|
-
stop_reason: model.StopReason | None = None
|
|
159
|
-
|
|
160
|
-
def flush_thinking() -> None:
|
|
161
|
-
nonlocal thought_signature
|
|
162
|
-
if accumulated_thoughts:
|
|
163
|
-
assistant_parts.append(
|
|
164
|
-
message.ThinkingTextPart(
|
|
165
|
-
text="".join(accumulated_thoughts),
|
|
166
|
-
model_id=str(param.model_id),
|
|
167
|
-
)
|
|
168
|
-
)
|
|
169
|
-
accumulated_thoughts.clear()
|
|
170
|
-
if thought_signature:
|
|
171
|
-
assistant_parts.append(
|
|
172
|
-
message.ThinkingSignaturePart(
|
|
173
|
-
signature=thought_signature,
|
|
174
|
-
model_id=str(param.model_id),
|
|
175
|
-
format="google_thought_signature",
|
|
176
|
-
)
|
|
177
|
-
)
|
|
178
|
-
thought_signature = None
|
|
179
|
-
|
|
180
|
-
def flush_text() -> None:
|
|
181
|
-
if not accumulated_text:
|
|
182
|
-
return
|
|
183
|
-
assistant_parts.append(message.TextPart(text="".join(accumulated_text)))
|
|
184
|
-
accumulated_text.clear()
|
|
185
223
|
|
|
186
224
|
async for chunk in stream:
|
|
187
225
|
log_debug(
|
|
@@ -190,8 +228,8 @@ async def parse_google_stream(
|
|
|
190
228
|
debug_type=DebugType.LLM_STREAM,
|
|
191
229
|
)
|
|
192
230
|
|
|
193
|
-
if response_id is None:
|
|
194
|
-
response_id = getattr(chunk, "response_id", None) or uuid4().hex
|
|
231
|
+
if state.response_id is None:
|
|
232
|
+
state.response_id = getattr(chunk, "response_id", None) or uuid4().hex
|
|
195
233
|
|
|
196
234
|
if getattr(chunk, "usage_metadata", None) is not None:
|
|
197
235
|
last_usage_metadata = chunk.usage_metadata
|
|
@@ -204,7 +242,7 @@ async def parse_google_stream(
|
|
|
204
242
|
reason_value = finish_reason
|
|
205
243
|
else:
|
|
206
244
|
reason_value = getattr(finish_reason, "name", None) or str(finish_reason)
|
|
207
|
-
stop_reason = _map_finish_reason(reason_value)
|
|
245
|
+
state.stop_reason = _map_finish_reason(reason_value)
|
|
208
246
|
content = getattr(candidate0, "content", None) if candidate0 else None
|
|
209
247
|
content_parts = getattr(content, "parts", None) if content else None
|
|
210
248
|
if not content_parts:
|
|
@@ -218,18 +256,18 @@ async def parse_google_stream(
|
|
|
218
256
|
metadata_tracker.record_token()
|
|
219
257
|
if getattr(part, "thought", False) is True:
|
|
220
258
|
if stage == "assistant":
|
|
221
|
-
flush_text()
|
|
259
|
+
state.flush_text()
|
|
222
260
|
stage = "thinking"
|
|
223
|
-
accumulated_thoughts.append(text)
|
|
261
|
+
state.accumulated_thoughts.append(text)
|
|
224
262
|
if getattr(part, "thought_signature", None):
|
|
225
|
-
thought_signature = part.thought_signature
|
|
226
|
-
yield message.ThinkingTextDelta(content=text, response_id=response_id)
|
|
263
|
+
state.thought_signature = part.thought_signature
|
|
264
|
+
yield message.ThinkingTextDelta(content=text, response_id=state.response_id)
|
|
227
265
|
else:
|
|
228
266
|
if stage == "thinking":
|
|
229
|
-
flush_thinking()
|
|
267
|
+
state.flush_thinking()
|
|
230
268
|
stage = "assistant"
|
|
231
|
-
accumulated_text.append(text)
|
|
232
|
-
yield message.AssistantTextDelta(content=text, response_id=response_id)
|
|
269
|
+
state.accumulated_text.append(text)
|
|
270
|
+
yield message.AssistantTextDelta(content=text, response_id=state.response_id)
|
|
233
271
|
|
|
234
272
|
function_call = getattr(part, "function_call", None)
|
|
235
273
|
if function_call is None:
|
|
@@ -242,16 +280,16 @@ async def parse_google_stream(
|
|
|
242
280
|
|
|
243
281
|
if call_id not in started_tool_items:
|
|
244
282
|
started_tool_items.add(call_id)
|
|
245
|
-
yield message.ToolCallStartDelta(response_id=response_id, call_id=call_id, name=name)
|
|
283
|
+
yield message.ToolCallStartDelta(response_id=state.response_id, call_id=call_id, name=name)
|
|
246
284
|
|
|
247
285
|
args_obj = getattr(function_call, "args", None)
|
|
248
286
|
if args_obj is not None:
|
|
249
287
|
if stage == "thinking":
|
|
250
|
-
flush_thinking()
|
|
288
|
+
state.flush_thinking()
|
|
251
289
|
if stage == "assistant":
|
|
252
|
-
flush_text()
|
|
290
|
+
state.flush_text()
|
|
253
291
|
stage = "tool"
|
|
254
|
-
assistant_parts.append(
|
|
292
|
+
state.assistant_parts.append(
|
|
255
293
|
message.ToolCallPart(
|
|
256
294
|
call_id=call_id,
|
|
257
295
|
tool_name=name,
|
|
@@ -269,11 +307,11 @@ async def parse_google_stream(
|
|
|
269
307
|
will_continue = getattr(function_call, "will_continue", None)
|
|
270
308
|
if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
|
|
271
309
|
if stage == "thinking":
|
|
272
|
-
flush_thinking()
|
|
310
|
+
state.flush_thinking()
|
|
273
311
|
if stage == "assistant":
|
|
274
|
-
flush_text()
|
|
312
|
+
state.flush_text()
|
|
275
313
|
stage = "tool"
|
|
276
|
-
assistant_parts.append(
|
|
314
|
+
state.assistant_parts.append(
|
|
277
315
|
message.ToolCallPart(
|
|
278
316
|
call_id=call_id,
|
|
279
317
|
tool_name=name,
|
|
@@ -287,7 +325,7 @@ async def parse_google_stream(
|
|
|
287
325
|
if call_id in completed_tool_items:
|
|
288
326
|
continue
|
|
289
327
|
args = partial_args_by_call.get(call_id, {})
|
|
290
|
-
assistant_parts.append(
|
|
328
|
+
state.assistant_parts.append(
|
|
291
329
|
message.ToolCallPart(
|
|
292
330
|
call_id=call_id,
|
|
293
331
|
tool_name=name,
|
|
@@ -295,23 +333,64 @@ async def parse_google_stream(
|
|
|
295
333
|
)
|
|
296
334
|
)
|
|
297
335
|
|
|
298
|
-
flush_thinking()
|
|
299
|
-
flush_text()
|
|
336
|
+
state.flush_thinking()
|
|
337
|
+
state.flush_text()
|
|
300
338
|
|
|
301
339
|
usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
|
|
302
340
|
if usage is not None:
|
|
303
341
|
metadata_tracker.set_usage(usage)
|
|
304
342
|
metadata_tracker.set_model_name(str(param.model_id))
|
|
305
|
-
metadata_tracker.set_response_id(response_id)
|
|
343
|
+
metadata_tracker.set_response_id(state.response_id)
|
|
306
344
|
metadata = metadata_tracker.finalize()
|
|
307
345
|
yield message.AssistantMessage(
|
|
308
|
-
parts=assistant_parts,
|
|
309
|
-
response_id=response_id,
|
|
346
|
+
parts=state.assistant_parts,
|
|
347
|
+
response_id=state.response_id,
|
|
310
348
|
usage=metadata,
|
|
311
|
-
stop_reason=stop_reason,
|
|
349
|
+
stop_reason=state.stop_reason,
|
|
312
350
|
)
|
|
313
351
|
|
|
314
352
|
|
|
353
|
+
class GoogleLLMStream(LLMStreamABC):
|
|
354
|
+
"""LLMStream implementation for Google LLM clients."""
|
|
355
|
+
|
|
356
|
+
def __init__(
|
|
357
|
+
self,
|
|
358
|
+
stream: AsyncIterator[Any],
|
|
359
|
+
*,
|
|
360
|
+
param: llm_param.LLMCallParameter,
|
|
361
|
+
metadata_tracker: MetadataTracker,
|
|
362
|
+
state: GoogleStreamStateManager,
|
|
363
|
+
) -> None:
|
|
364
|
+
self._stream = stream
|
|
365
|
+
self._param = param
|
|
366
|
+
self._metadata_tracker = metadata_tracker
|
|
367
|
+
self._state = state
|
|
368
|
+
self._completed = False
|
|
369
|
+
|
|
370
|
+
def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
371
|
+
return self._iterate()
|
|
372
|
+
|
|
373
|
+
async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
374
|
+
try:
|
|
375
|
+
async for item in parse_google_stream(
|
|
376
|
+
self._stream,
|
|
377
|
+
param=self._param,
|
|
378
|
+
metadata_tracker=self._metadata_tracker,
|
|
379
|
+
state=self._state,
|
|
380
|
+
):
|
|
381
|
+
if isinstance(item, message.AssistantMessage):
|
|
382
|
+
self._completed = True
|
|
383
|
+
yield item
|
|
384
|
+
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|
|
385
|
+
yield message.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
386
|
+
yield message.AssistantMessage(parts=[], response_id=None, usage=self._metadata_tracker.finalize())
|
|
387
|
+
|
|
388
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
389
|
+
if self._completed:
|
|
390
|
+
return None
|
|
391
|
+
return self._state.get_partial_message()
|
|
392
|
+
|
|
393
|
+
|
|
315
394
|
@register(llm_param.LLMClientProtocol.GOOGLE)
|
|
316
395
|
class GoogleClient(LLMClientABC):
|
|
317
396
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -332,7 +411,7 @@ class GoogleClient(LLMClientABC):
|
|
|
332
411
|
return cls(config)
|
|
333
412
|
|
|
334
413
|
@override
|
|
335
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
414
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
336
415
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
337
416
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
338
417
|
|
|
@@ -359,13 +438,15 @@ class GoogleClient(LLMClientABC):
|
|
|
359
438
|
config=config,
|
|
360
439
|
)
|
|
361
440
|
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
441
|
+
return error_llm_stream(
|
|
442
|
+
metadata_tracker,
|
|
443
|
+
error=f"{e.__class__.__name__} {e!s}",
|
|
444
|
+
)
|
|
365
445
|
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
446
|
+
state = GoogleStreamStateManager(param_model=str(param.model_id))
|
|
447
|
+
return GoogleLLMStream(
|
|
448
|
+
stream,
|
|
449
|
+
param=param,
|
|
450
|
+
metadata_tracker=metadata_tracker,
|
|
451
|
+
state=state,
|
|
452
|
+
)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from collections.abc import AsyncGenerator
|
|
3
2
|
from typing import Any, override
|
|
4
3
|
|
|
5
4
|
import httpx
|
|
@@ -7,14 +6,14 @@ import openai
|
|
|
7
6
|
from openai.types.chat.completion_create_params import CompletionCreateParamsStreaming
|
|
8
7
|
|
|
9
8
|
from klaude_code.const import LLM_HTTP_TIMEOUT_CONNECT, LLM_HTTP_TIMEOUT_READ, LLM_HTTP_TIMEOUT_TOTAL
|
|
10
|
-
from klaude_code.llm.client import LLMClientABC
|
|
9
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
11
10
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
12
11
|
from klaude_code.llm.openai_compatible.input import convert_history_to_input, convert_tool_schema
|
|
13
|
-
from klaude_code.llm.openai_compatible.stream import DefaultReasoningHandler,
|
|
12
|
+
from klaude_code.llm.openai_compatible.stream import DefaultReasoningHandler, OpenAILLMStream
|
|
14
13
|
from klaude_code.llm.registry import register
|
|
15
|
-
from klaude_code.llm.usage import MetadataTracker
|
|
14
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
16
15
|
from klaude_code.log import DebugType, log_debug
|
|
17
|
-
from klaude_code.protocol import llm_param
|
|
16
|
+
from klaude_code.protocol import llm_param
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
def build_payload(param: llm_param.LLMCallParameter) -> tuple[CompletionCreateParamsStreaming, dict[str, object]]:
|
|
@@ -77,7 +76,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
77
76
|
return cls(config)
|
|
78
77
|
|
|
79
78
|
@override
|
|
80
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
79
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
81
80
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
82
81
|
|
|
83
82
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -85,9 +84,8 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
85
84
|
try:
|
|
86
85
|
payload, extra_body = build_payload(param)
|
|
87
86
|
except (ValueError, OSError) as e:
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
return
|
|
87
|
+
return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
|
|
88
|
+
|
|
91
89
|
extra_headers: dict[str, str] = {"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)}
|
|
92
90
|
|
|
93
91
|
log_debug(
|
|
@@ -103,9 +101,7 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
103
101
|
extra_headers=extra_headers,
|
|
104
102
|
)
|
|
105
103
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
106
|
-
|
|
107
|
-
yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
|
|
108
|
-
return
|
|
104
|
+
return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
|
|
109
105
|
|
|
110
106
|
reasoning_handler = DefaultReasoningHandler(
|
|
111
107
|
param_model=str(param.model_id),
|
|
@@ -119,11 +115,10 @@ class OpenAICompatibleClient(LLMClientABC):
|
|
|
119
115
|
debug_type=DebugType.LLM_STREAM,
|
|
120
116
|
)
|
|
121
117
|
|
|
122
|
-
|
|
118
|
+
return OpenAILLMStream(
|
|
123
119
|
stream,
|
|
124
120
|
param=param,
|
|
125
121
|
metadata_tracker=metadata_tracker,
|
|
126
122
|
reasoning_handler=reasoning_handler,
|
|
127
123
|
on_event=on_event,
|
|
128
|
-
)
|
|
129
|
-
yield item
|
|
124
|
+
)
|
|
@@ -4,7 +4,7 @@ This module provides reusable primitives for OpenAI-compatible providers:
|
|
|
4
4
|
|
|
5
5
|
- ``StreamStateManager``: accumulates assistant content and tool calls.
|
|
6
6
|
- ``ReasoningHandlerABC``: provider-specific reasoning extraction + buffering.
|
|
7
|
-
- ``
|
|
7
|
+
- ``OpenAILLMStream``: LLMStream implementation for OpenAI-compatible clients.
|
|
8
8
|
|
|
9
9
|
OpenRouter uses the same OpenAI Chat Completions API surface but differs in
|
|
10
10
|
how reasoning is represented (``reasoning_details`` vs ``reasoning_content``).
|
|
@@ -24,8 +24,10 @@ import pydantic
|
|
|
24
24
|
from openai import AsyncStream
|
|
25
25
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
26
26
|
|
|
27
|
+
from klaude_code.llm.client import LLMStreamABC
|
|
27
28
|
from klaude_code.llm.image import save_assistant_image
|
|
28
29
|
from klaude_code.llm.openai_compatible.tool_call_accumulator import BasicToolCallAccumulator, ToolCallAccumulatorABC
|
|
30
|
+
from klaude_code.llm.partial_message import degrade_thinking_to_text
|
|
29
31
|
from klaude_code.llm.usage import MetadataTracker, convert_usage
|
|
30
32
|
from klaude_code.protocol import llm_param, message, model
|
|
31
33
|
|
|
@@ -93,6 +95,23 @@ class StreamStateManager:
|
|
|
93
95
|
self.flush_tool_calls()
|
|
94
96
|
return list(self.parts)
|
|
95
97
|
|
|
98
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
99
|
+
"""Build a partial AssistantMessage from accumulated state.
|
|
100
|
+
|
|
101
|
+
Flushes all accumulated content (reasoning, assistant text, tool calls)
|
|
102
|
+
and returns the message. Returns None if no content has been accumulated.
|
|
103
|
+
"""
|
|
104
|
+
self.flush_reasoning()
|
|
105
|
+
self.flush_assistant()
|
|
106
|
+
parts = degrade_thinking_to_text(list(self.parts))
|
|
107
|
+
if not parts:
|
|
108
|
+
return None
|
|
109
|
+
return message.AssistantMessage(
|
|
110
|
+
parts=parts,
|
|
111
|
+
response_id=self.response_id,
|
|
112
|
+
stop_reason="aborted",
|
|
113
|
+
)
|
|
114
|
+
|
|
96
115
|
|
|
97
116
|
@dataclass(slots=True)
|
|
98
117
|
class ReasoningDeltaResult:
|
|
@@ -168,6 +187,7 @@ def _map_finish_reason(reason: str) -> model.StopReason | None:
|
|
|
168
187
|
async def parse_chat_completions_stream(
|
|
169
188
|
stream: AsyncStream[ChatCompletionChunk],
|
|
170
189
|
*,
|
|
190
|
+
state: StreamStateManager,
|
|
171
191
|
param: llm_param.LLMCallParameter,
|
|
172
192
|
metadata_tracker: MetadataTracker,
|
|
173
193
|
reasoning_handler: ReasoningHandlerABC,
|
|
@@ -176,13 +196,10 @@ async def parse_chat_completions_stream(
|
|
|
176
196
|
"""Parse OpenAI Chat Completions stream into stream items.
|
|
177
197
|
|
|
178
198
|
This is shared by OpenAI-compatible and OpenRouter clients.
|
|
199
|
+
The state parameter allows external access to accumulated content
|
|
200
|
+
for cancellation scenarios.
|
|
179
201
|
"""
|
|
180
202
|
|
|
181
|
-
state = StreamStateManager(
|
|
182
|
-
param_model=str(param.model_id),
|
|
183
|
-
reasoning_flusher=reasoning_handler.flush,
|
|
184
|
-
)
|
|
185
|
-
|
|
186
203
|
def _extract_image_url(image_obj: object) -> str | None:
|
|
187
204
|
image_url = getattr(image_obj, "image_url", None)
|
|
188
205
|
if image_url is not None:
|
|
@@ -323,3 +340,48 @@ async def parse_chat_completions_stream(
|
|
|
323
340
|
usage=metadata,
|
|
324
341
|
stop_reason=state.stop_reason,
|
|
325
342
|
)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class OpenAILLMStream(LLMStreamABC):
|
|
346
|
+
"""LLMStream implementation for OpenAI-compatible clients."""
|
|
347
|
+
|
|
348
|
+
def __init__(
|
|
349
|
+
self,
|
|
350
|
+
stream: AsyncStream[ChatCompletionChunk],
|
|
351
|
+
*,
|
|
352
|
+
param: llm_param.LLMCallParameter,
|
|
353
|
+
metadata_tracker: MetadataTracker,
|
|
354
|
+
reasoning_handler: ReasoningHandlerABC,
|
|
355
|
+
on_event: Callable[[object], None] | None = None,
|
|
356
|
+
) -> None:
|
|
357
|
+
self._stream = stream
|
|
358
|
+
self._param = param
|
|
359
|
+
self._metadata_tracker = metadata_tracker
|
|
360
|
+
self._reasoning_handler = reasoning_handler
|
|
361
|
+
self._on_event = on_event
|
|
362
|
+
self._state = StreamStateManager(
|
|
363
|
+
param_model=str(param.model_id),
|
|
364
|
+
reasoning_flusher=reasoning_handler.flush,
|
|
365
|
+
)
|
|
366
|
+
self._completed = False
|
|
367
|
+
|
|
368
|
+
def __aiter__(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
369
|
+
return self._iterate()
|
|
370
|
+
|
|
371
|
+
async def _iterate(self) -> AsyncGenerator[message.LLMStreamItem]:
|
|
372
|
+
async for item in parse_chat_completions_stream(
|
|
373
|
+
self._stream,
|
|
374
|
+
state=self._state,
|
|
375
|
+
param=self._param,
|
|
376
|
+
metadata_tracker=self._metadata_tracker,
|
|
377
|
+
reasoning_handler=self._reasoning_handler,
|
|
378
|
+
on_event=self._on_event,
|
|
379
|
+
):
|
|
380
|
+
if isinstance(item, message.AssistantMessage):
|
|
381
|
+
self._completed = True
|
|
382
|
+
yield item
|
|
383
|
+
|
|
384
|
+
def get_partial_message(self) -> message.AssistantMessage | None:
|
|
385
|
+
if self._completed:
|
|
386
|
+
return None
|
|
387
|
+
return self._state.get_partial_message()
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from collections.abc import AsyncGenerator
|
|
3
2
|
from typing import Any, cast, override
|
|
4
3
|
|
|
5
4
|
import httpx
|
|
@@ -14,16 +13,16 @@ from klaude_code.const import (
|
|
|
14
13
|
LLM_HTTP_TIMEOUT_TOTAL,
|
|
15
14
|
OPENROUTER_BASE_URL,
|
|
16
15
|
)
|
|
17
|
-
from klaude_code.llm.client import LLMClientABC
|
|
16
|
+
from klaude_code.llm.client import LLMClientABC, LLMStreamABC
|
|
18
17
|
from klaude_code.llm.input_common import apply_config_defaults
|
|
19
18
|
from klaude_code.llm.openai_compatible.input import convert_tool_schema
|
|
20
|
-
from klaude_code.llm.openai_compatible.stream import
|
|
19
|
+
from klaude_code.llm.openai_compatible.stream import OpenAILLMStream
|
|
21
20
|
from klaude_code.llm.openrouter.input import convert_history_to_input, is_claude_model
|
|
22
21
|
from klaude_code.llm.openrouter.reasoning import ReasoningStreamHandler
|
|
23
22
|
from klaude_code.llm.registry import register
|
|
24
|
-
from klaude_code.llm.usage import MetadataTracker
|
|
23
|
+
from klaude_code.llm.usage import MetadataTracker, error_llm_stream
|
|
25
24
|
from klaude_code.log import DebugType, is_debug_enabled, log_debug
|
|
26
|
-
from klaude_code.protocol import llm_param
|
|
25
|
+
from klaude_code.protocol import llm_param
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
def build_payload(
|
|
@@ -103,7 +102,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
103
102
|
return cls(config)
|
|
104
103
|
|
|
105
104
|
@override
|
|
106
|
-
async def call(self, param: llm_param.LLMCallParameter) ->
|
|
105
|
+
async def call(self, param: llm_param.LLMCallParameter) -> LLMStreamABC:
|
|
107
106
|
param = apply_config_defaults(param, self.get_llm_config())
|
|
108
107
|
|
|
109
108
|
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
@@ -111,9 +110,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
111
110
|
try:
|
|
112
111
|
payload, extra_body, extra_headers = build_payload(param)
|
|
113
112
|
except (ValueError, OSError) as e:
|
|
114
|
-
|
|
115
|
-
yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
|
|
116
|
-
return
|
|
113
|
+
return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
|
|
117
114
|
|
|
118
115
|
log_debug(
|
|
119
116
|
json.dumps({**payload, **extra_body}, ensure_ascii=False, default=str),
|
|
@@ -128,9 +125,7 @@ class OpenRouterClient(LLMClientABC):
|
|
|
128
125
|
extra_headers=extra_headers,
|
|
129
126
|
)
|
|
130
127
|
except (openai.OpenAIError, httpx.HTTPError) as e:
|
|
131
|
-
|
|
132
|
-
yield message.AssistantMessage(parts=[], response_id=None, usage=metadata_tracker.finalize())
|
|
133
|
-
return
|
|
128
|
+
return error_llm_stream(metadata_tracker, error=f"{e.__class__.__name__} {e!s}")
|
|
134
129
|
|
|
135
130
|
reasoning_handler = ReasoningStreamHandler(
|
|
136
131
|
param_model=str(param.model_id),
|
|
@@ -144,11 +139,10 @@ class OpenRouterClient(LLMClientABC):
|
|
|
144
139
|
debug_type=DebugType.LLM_STREAM,
|
|
145
140
|
)
|
|
146
141
|
|
|
147
|
-
|
|
142
|
+
return OpenAILLMStream(
|
|
148
143
|
stream,
|
|
149
144
|
param=param,
|
|
150
145
|
metadata_tracker=metadata_tracker,
|
|
151
146
|
reasoning_handler=reasoning_handler,
|
|
152
147
|
on_event=on_event,
|
|
153
|
-
)
|
|
154
|
-
yield item
|
|
148
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from klaude_code.protocol import message
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def degrade_thinking_to_text(parts: list[message.Part]) -> list[message.Part]:
|
|
7
|
+
"""Degrade thinking parts into a regular TextPart.
|
|
8
|
+
|
|
9
|
+
Some providers require thinking signatures/encrypted content to be echoed back
|
|
10
|
+
for subsequent calls. During interruption we cannot reliably determine whether
|
|
11
|
+
we have a complete signature, so we persist thinking as plain text instead.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
thinking_texts: list[str] = []
|
|
15
|
+
non_thinking_parts: list[message.Part] = []
|
|
16
|
+
|
|
17
|
+
for part in parts:
|
|
18
|
+
if isinstance(part, message.ThinkingTextPart):
|
|
19
|
+
text = part.text
|
|
20
|
+
if text and text.strip():
|
|
21
|
+
thinking_texts.append(text)
|
|
22
|
+
continue
|
|
23
|
+
if isinstance(part, message.ThinkingSignaturePart):
|
|
24
|
+
continue
|
|
25
|
+
non_thinking_parts.append(part)
|
|
26
|
+
|
|
27
|
+
if not thinking_texts:
|
|
28
|
+
return non_thinking_parts
|
|
29
|
+
|
|
30
|
+
joined = "\n".join(thinking_texts).strip()
|
|
31
|
+
thinking_block = f"<thinking>\n{joined}\n</thinking>"
|
|
32
|
+
if non_thinking_parts:
|
|
33
|
+
thinking_block += "\n\n"
|
|
34
|
+
|
|
35
|
+
return [message.TextPart(text=thinking_block), *non_thinking_parts]
|