klaude-code 1.6.0__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- klaude_code/cli/list_model.py +55 -4
- klaude_code/cli/main.py +10 -0
- klaude_code/cli/runtime.py +2 -2
- klaude_code/cli/session_cmd.py +3 -2
- klaude_code/command/fork_session_cmd.py +7 -0
- klaude_code/config/assets/builtin_config.yaml +61 -2
- klaude_code/config/builtin_config.py +1 -0
- klaude_code/config/config.py +19 -0
- klaude_code/config/thinking.py +14 -0
- klaude_code/const.py +17 -2
- klaude_code/core/executor.py +16 -3
- klaude_code/core/task.py +5 -3
- klaude_code/core/tool/shell/command_safety.py +3 -5
- klaude_code/llm/anthropic/client.py +127 -114
- klaude_code/llm/bedrock/__init__.py +3 -0
- klaude_code/llm/bedrock/client.py +60 -0
- klaude_code/llm/google/__init__.py +3 -0
- klaude_code/llm/google/client.py +309 -0
- klaude_code/llm/google/input.py +215 -0
- klaude_code/llm/registry.py +10 -5
- klaude_code/protocol/events.py +1 -0
- klaude_code/protocol/llm_param.py +9 -0
- klaude_code/session/export.py +14 -2
- klaude_code/session/session.py +52 -3
- klaude_code/session/store.py +3 -0
- klaude_code/session/templates/export_session.html +210 -18
- klaude_code/ui/modes/repl/input_prompt_toolkit.py +6 -46
- klaude_code/ui/modes/repl/renderer.py +5 -1
- klaude_code/ui/renderers/developer.py +1 -1
- klaude_code/ui/renderers/sub_agent.py +1 -1
- {klaude_code-1.6.0.dist-info → klaude_code-1.7.1.dist-info}/METADATA +82 -10
- {klaude_code-1.6.0.dist-info → klaude_code-1.7.1.dist-info}/RECORD +34 -29
- {klaude_code-1.6.0.dist-info → klaude_code-1.7.1.dist-info}/WHEEL +0 -0
- {klaude_code-1.6.0.dist-info → klaude_code-1.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from collections.abc import AsyncGenerator
|
|
4
|
-
from typing import override
|
|
4
|
+
from typing import Any, override
|
|
5
5
|
|
|
6
6
|
import anthropic
|
|
7
7
|
import httpx
|
|
@@ -58,6 +58,130 @@ def build_payload(param: llm_param.LLMCallParameter) -> MessageCreateParamsStrea
|
|
|
58
58
|
return payload
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
async def parse_anthropic_stream(
|
|
62
|
+
stream: Any,
|
|
63
|
+
param: llm_param.LLMCallParameter,
|
|
64
|
+
metadata_tracker: MetadataTracker,
|
|
65
|
+
) -> AsyncGenerator[model.ConversationItem]:
|
|
66
|
+
"""Parse Anthropic beta messages stream and yield conversation items.
|
|
67
|
+
|
|
68
|
+
This function is shared between AnthropicClient and BedrockClient.
|
|
69
|
+
"""
|
|
70
|
+
accumulated_thinking: list[str] = []
|
|
71
|
+
accumulated_content: list[str] = []
|
|
72
|
+
response_id: str | None = None
|
|
73
|
+
|
|
74
|
+
current_tool_name: str | None = None
|
|
75
|
+
current_tool_call_id: str | None = None
|
|
76
|
+
current_tool_inputs: list[str] | None = None
|
|
77
|
+
|
|
78
|
+
input_token = 0
|
|
79
|
+
cached_token = 0
|
|
80
|
+
|
|
81
|
+
async for event in await stream:
|
|
82
|
+
log_debug(
|
|
83
|
+
f"[{event.type}]",
|
|
84
|
+
event.model_dump_json(exclude_none=True),
|
|
85
|
+
style="blue",
|
|
86
|
+
debug_type=DebugType.LLM_STREAM,
|
|
87
|
+
)
|
|
88
|
+
match event:
|
|
89
|
+
case BetaRawMessageStartEvent() as event:
|
|
90
|
+
response_id = event.message.id
|
|
91
|
+
cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
92
|
+
input_token = event.message.usage.input_tokens
|
|
93
|
+
yield model.StartItem(response_id=response_id)
|
|
94
|
+
case BetaRawContentBlockDeltaEvent() as event:
|
|
95
|
+
match event.delta:
|
|
96
|
+
case BetaThinkingDelta() as delta:
|
|
97
|
+
if delta.thinking:
|
|
98
|
+
metadata_tracker.record_token()
|
|
99
|
+
accumulated_thinking.append(delta.thinking)
|
|
100
|
+
yield model.ReasoningTextDelta(
|
|
101
|
+
content=delta.thinking,
|
|
102
|
+
response_id=response_id,
|
|
103
|
+
)
|
|
104
|
+
case BetaSignatureDelta() as delta:
|
|
105
|
+
yield model.ReasoningEncryptedItem(
|
|
106
|
+
encrypted_content=delta.signature,
|
|
107
|
+
response_id=response_id,
|
|
108
|
+
model=str(param.model),
|
|
109
|
+
)
|
|
110
|
+
case BetaTextDelta() as delta:
|
|
111
|
+
if delta.text:
|
|
112
|
+
metadata_tracker.record_token()
|
|
113
|
+
accumulated_content.append(delta.text)
|
|
114
|
+
yield model.AssistantMessageDelta(
|
|
115
|
+
content=delta.text,
|
|
116
|
+
response_id=response_id,
|
|
117
|
+
)
|
|
118
|
+
case BetaInputJSONDelta() as delta:
|
|
119
|
+
if current_tool_inputs is not None:
|
|
120
|
+
if delta.partial_json:
|
|
121
|
+
metadata_tracker.record_token()
|
|
122
|
+
current_tool_inputs.append(delta.partial_json)
|
|
123
|
+
case _:
|
|
124
|
+
pass
|
|
125
|
+
case BetaRawContentBlockStartEvent() as event:
|
|
126
|
+
match event.content_block:
|
|
127
|
+
case BetaToolUseBlock() as block:
|
|
128
|
+
metadata_tracker.record_token()
|
|
129
|
+
yield model.ToolCallStartItem(
|
|
130
|
+
response_id=response_id,
|
|
131
|
+
call_id=block.id,
|
|
132
|
+
name=block.name,
|
|
133
|
+
)
|
|
134
|
+
current_tool_name = block.name
|
|
135
|
+
current_tool_call_id = block.id
|
|
136
|
+
current_tool_inputs = []
|
|
137
|
+
case _:
|
|
138
|
+
pass
|
|
139
|
+
case BetaRawContentBlockStopEvent():
|
|
140
|
+
if len(accumulated_thinking) > 0:
|
|
141
|
+
metadata_tracker.record_token()
|
|
142
|
+
full_thinking = "".join(accumulated_thinking)
|
|
143
|
+
yield model.ReasoningTextItem(
|
|
144
|
+
content=full_thinking,
|
|
145
|
+
response_id=response_id,
|
|
146
|
+
model=str(param.model),
|
|
147
|
+
)
|
|
148
|
+
accumulated_thinking.clear()
|
|
149
|
+
if len(accumulated_content) > 0:
|
|
150
|
+
metadata_tracker.record_token()
|
|
151
|
+
yield model.AssistantMessageItem(
|
|
152
|
+
content="".join(accumulated_content),
|
|
153
|
+
response_id=response_id,
|
|
154
|
+
)
|
|
155
|
+
accumulated_content.clear()
|
|
156
|
+
if current_tool_name and current_tool_call_id:
|
|
157
|
+
metadata_tracker.record_token()
|
|
158
|
+
yield model.ToolCallItem(
|
|
159
|
+
name=current_tool_name,
|
|
160
|
+
call_id=current_tool_call_id,
|
|
161
|
+
arguments="".join(current_tool_inputs) if current_tool_inputs else "",
|
|
162
|
+
response_id=response_id,
|
|
163
|
+
)
|
|
164
|
+
current_tool_name = None
|
|
165
|
+
current_tool_call_id = None
|
|
166
|
+
current_tool_inputs = None
|
|
167
|
+
case BetaRawMessageDeltaEvent() as event:
|
|
168
|
+
metadata_tracker.set_usage(
|
|
169
|
+
model.Usage(
|
|
170
|
+
input_tokens=input_token + cached_token,
|
|
171
|
+
output_tokens=event.usage.output_tokens,
|
|
172
|
+
cached_tokens=cached_token,
|
|
173
|
+
context_size=input_token + cached_token + event.usage.output_tokens,
|
|
174
|
+
context_limit=param.context_limit,
|
|
175
|
+
max_tokens=param.max_tokens,
|
|
176
|
+
)
|
|
177
|
+
)
|
|
178
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
179
|
+
metadata_tracker.set_response_id(response_id)
|
|
180
|
+
yield metadata_tracker.finalize()
|
|
181
|
+
case _:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
61
185
|
@register(llm_param.LLMClientProtocol.ANTHROPIC)
|
|
62
186
|
class AnthropicClient(LLMClientABC):
|
|
63
187
|
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
@@ -102,119 +226,8 @@ class AnthropicClient(LLMClientABC):
|
|
|
102
226
|
extra_headers={"extra": json.dumps({"session_id": param.session_id}, sort_keys=True)},
|
|
103
227
|
)
|
|
104
228
|
|
|
105
|
-
accumulated_thinking: list[str] = []
|
|
106
|
-
accumulated_content: list[str] = []
|
|
107
|
-
response_id: str | None = None
|
|
108
|
-
|
|
109
|
-
current_tool_name: str | None = None
|
|
110
|
-
current_tool_call_id: str | None = None
|
|
111
|
-
current_tool_inputs: list[str] | None = None
|
|
112
|
-
|
|
113
|
-
input_token = 0
|
|
114
|
-
cached_token = 0
|
|
115
|
-
|
|
116
229
|
try:
|
|
117
|
-
async for
|
|
118
|
-
|
|
119
|
-
f"[{event.type}]",
|
|
120
|
-
event.model_dump_json(exclude_none=True),
|
|
121
|
-
style="blue",
|
|
122
|
-
debug_type=DebugType.LLM_STREAM,
|
|
123
|
-
)
|
|
124
|
-
match event:
|
|
125
|
-
case BetaRawMessageStartEvent() as event:
|
|
126
|
-
response_id = event.message.id
|
|
127
|
-
cached_token = event.message.usage.cache_read_input_tokens or 0
|
|
128
|
-
input_token = event.message.usage.input_tokens
|
|
129
|
-
yield model.StartItem(response_id=response_id)
|
|
130
|
-
case BetaRawContentBlockDeltaEvent() as event:
|
|
131
|
-
match event.delta:
|
|
132
|
-
case BetaThinkingDelta() as delta:
|
|
133
|
-
if delta.thinking:
|
|
134
|
-
metadata_tracker.record_token()
|
|
135
|
-
accumulated_thinking.append(delta.thinking)
|
|
136
|
-
yield model.ReasoningTextDelta(
|
|
137
|
-
content=delta.thinking,
|
|
138
|
-
response_id=response_id,
|
|
139
|
-
)
|
|
140
|
-
case BetaSignatureDelta() as delta:
|
|
141
|
-
yield model.ReasoningEncryptedItem(
|
|
142
|
-
encrypted_content=delta.signature,
|
|
143
|
-
response_id=response_id,
|
|
144
|
-
model=str(param.model),
|
|
145
|
-
)
|
|
146
|
-
case BetaTextDelta() as delta:
|
|
147
|
-
if delta.text:
|
|
148
|
-
metadata_tracker.record_token()
|
|
149
|
-
accumulated_content.append(delta.text)
|
|
150
|
-
yield model.AssistantMessageDelta(
|
|
151
|
-
content=delta.text,
|
|
152
|
-
response_id=response_id,
|
|
153
|
-
)
|
|
154
|
-
case BetaInputJSONDelta() as delta:
|
|
155
|
-
if current_tool_inputs is not None:
|
|
156
|
-
if delta.partial_json:
|
|
157
|
-
metadata_tracker.record_token()
|
|
158
|
-
current_tool_inputs.append(delta.partial_json)
|
|
159
|
-
case _:
|
|
160
|
-
pass
|
|
161
|
-
case BetaRawContentBlockStartEvent() as event:
|
|
162
|
-
match event.content_block:
|
|
163
|
-
case BetaToolUseBlock() as block:
|
|
164
|
-
metadata_tracker.record_token()
|
|
165
|
-
yield model.ToolCallStartItem(
|
|
166
|
-
response_id=response_id,
|
|
167
|
-
call_id=block.id,
|
|
168
|
-
name=block.name,
|
|
169
|
-
)
|
|
170
|
-
current_tool_name = block.name
|
|
171
|
-
current_tool_call_id = block.id
|
|
172
|
-
current_tool_inputs = []
|
|
173
|
-
case _:
|
|
174
|
-
pass
|
|
175
|
-
case BetaRawContentBlockStopEvent() as event:
|
|
176
|
-
if len(accumulated_thinking) > 0:
|
|
177
|
-
metadata_tracker.record_token()
|
|
178
|
-
full_thinking = "".join(accumulated_thinking)
|
|
179
|
-
yield model.ReasoningTextItem(
|
|
180
|
-
content=full_thinking,
|
|
181
|
-
response_id=response_id,
|
|
182
|
-
model=str(param.model),
|
|
183
|
-
)
|
|
184
|
-
accumulated_thinking.clear()
|
|
185
|
-
if len(accumulated_content) > 0:
|
|
186
|
-
metadata_tracker.record_token()
|
|
187
|
-
yield model.AssistantMessageItem(
|
|
188
|
-
content="".join(accumulated_content),
|
|
189
|
-
response_id=response_id,
|
|
190
|
-
)
|
|
191
|
-
accumulated_content.clear()
|
|
192
|
-
if current_tool_name and current_tool_call_id:
|
|
193
|
-
metadata_tracker.record_token()
|
|
194
|
-
yield model.ToolCallItem(
|
|
195
|
-
name=current_tool_name,
|
|
196
|
-
call_id=current_tool_call_id,
|
|
197
|
-
arguments="".join(current_tool_inputs) if current_tool_inputs else "",
|
|
198
|
-
response_id=response_id,
|
|
199
|
-
)
|
|
200
|
-
current_tool_name = None
|
|
201
|
-
current_tool_call_id = None
|
|
202
|
-
current_tool_inputs = None
|
|
203
|
-
case BetaRawMessageDeltaEvent() as event:
|
|
204
|
-
metadata_tracker.set_usage(
|
|
205
|
-
model.Usage(
|
|
206
|
-
input_tokens=input_token + cached_token,
|
|
207
|
-
output_tokens=event.usage.output_tokens,
|
|
208
|
-
cached_tokens=cached_token,
|
|
209
|
-
context_size=input_token + cached_token + event.usage.output_tokens,
|
|
210
|
-
context_limit=param.context_limit,
|
|
211
|
-
max_tokens=param.max_tokens,
|
|
212
|
-
)
|
|
213
|
-
)
|
|
214
|
-
metadata_tracker.set_model_name(str(param.model))
|
|
215
|
-
metadata_tracker.set_response_id(response_id)
|
|
216
|
-
yield metadata_tracker.finalize()
|
|
217
|
-
case _:
|
|
218
|
-
pass
|
|
230
|
+
async for item in parse_anthropic_stream(stream, param, metadata_tracker):
|
|
231
|
+
yield item
|
|
219
232
|
except (APIError, httpx.HTTPError) as e:
|
|
220
233
|
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""AWS Bedrock LLM client using Anthropic SDK."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
from typing import override
|
|
6
|
+
|
|
7
|
+
import anthropic
|
|
8
|
+
import httpx
|
|
9
|
+
from anthropic import APIError
|
|
10
|
+
|
|
11
|
+
from klaude_code.llm.anthropic.client import build_payload, parse_anthropic_stream
|
|
12
|
+
from klaude_code.llm.client import LLMClientABC
|
|
13
|
+
from klaude_code.llm.input_common import apply_config_defaults
|
|
14
|
+
from klaude_code.llm.registry import register
|
|
15
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
16
|
+
from klaude_code.protocol import llm_param, model
|
|
17
|
+
from klaude_code.trace import DebugType, log_debug
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@register(llm_param.LLMClientProtocol.BEDROCK)
|
|
21
|
+
class BedrockClient(LLMClientABC):
|
|
22
|
+
"""LLM client for AWS Bedrock using Anthropic SDK."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
25
|
+
super().__init__(config)
|
|
26
|
+
self.client = anthropic.AsyncAnthropicBedrock(
|
|
27
|
+
aws_access_key=config.aws_access_key,
|
|
28
|
+
aws_secret_key=config.aws_secret_key,
|
|
29
|
+
aws_region=config.aws_region,
|
|
30
|
+
aws_session_token=config.aws_session_token,
|
|
31
|
+
aws_profile=config.aws_profile,
|
|
32
|
+
timeout=httpx.Timeout(300.0, connect=15.0, read=285.0),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
@override
|
|
37
|
+
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
38
|
+
return cls(config)
|
|
39
|
+
|
|
40
|
+
@override
|
|
41
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
42
|
+
param = apply_config_defaults(param, self.get_llm_config())
|
|
43
|
+
|
|
44
|
+
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
45
|
+
|
|
46
|
+
payload = build_payload(param)
|
|
47
|
+
|
|
48
|
+
log_debug(
|
|
49
|
+
json.dumps(payload, ensure_ascii=False, default=str),
|
|
50
|
+
style="yellow",
|
|
51
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
stream = self.client.beta.messages.create(**payload)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
async for item in parse_anthropic_stream(stream, param, metadata_tracker):
|
|
58
|
+
yield item
|
|
59
|
+
except (APIError, httpx.HTTPError) as e:
|
|
60
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
# pyright: reportUnknownMemberType=false
|
|
2
|
+
# pyright: reportUnknownVariableType=false
|
|
3
|
+
# pyright: reportUnknownArgumentType=false
|
|
4
|
+
# pyright: reportAttributeAccessIssue=false
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from collections.abc import AsyncGenerator, AsyncIterator
|
|
8
|
+
from typing import Any, cast, override
|
|
9
|
+
from uuid import uuid4
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
from google.genai import Client
|
|
13
|
+
from google.genai.errors import APIError, ClientError, ServerError
|
|
14
|
+
from google.genai.types import (
|
|
15
|
+
FunctionCallingConfig,
|
|
16
|
+
FunctionCallingConfigMode,
|
|
17
|
+
GenerateContentConfig,
|
|
18
|
+
HttpOptions,
|
|
19
|
+
ThinkingConfig,
|
|
20
|
+
ToolConfig,
|
|
21
|
+
UsageMetadata,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from klaude_code.llm.client import LLMClientABC
|
|
25
|
+
from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
|
|
26
|
+
from klaude_code.llm.input_common import apply_config_defaults
|
|
27
|
+
from klaude_code.llm.registry import register
|
|
28
|
+
from klaude_code.llm.usage import MetadataTracker
|
|
29
|
+
from klaude_code.protocol import llm_param, model
|
|
30
|
+
from klaude_code.trace import DebugType, log_debug
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _build_config(param: llm_param.LLMCallParameter) -> GenerateContentConfig:
|
|
34
|
+
tool_list = convert_tool_schema(param.tools)
|
|
35
|
+
tool_config: ToolConfig | None = None
|
|
36
|
+
|
|
37
|
+
if tool_list:
|
|
38
|
+
tool_config = ToolConfig(
|
|
39
|
+
function_calling_config=FunctionCallingConfig(
|
|
40
|
+
mode=FunctionCallingConfigMode.AUTO,
|
|
41
|
+
# Gemini streams tool args; keep this enabled to maximize fidelity.
|
|
42
|
+
stream_function_call_arguments=True,
|
|
43
|
+
)
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
thinking_config: ThinkingConfig | None = None
|
|
47
|
+
if param.thinking and param.thinking.type == "enabled":
|
|
48
|
+
thinking_config = ThinkingConfig(
|
|
49
|
+
include_thoughts=True,
|
|
50
|
+
thinking_budget=param.thinking.budget_tokens,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return GenerateContentConfig(
|
|
54
|
+
system_instruction=param.system,
|
|
55
|
+
temperature=param.temperature,
|
|
56
|
+
max_output_tokens=param.max_tokens,
|
|
57
|
+
tools=tool_list or None,
|
|
58
|
+
tool_config=tool_config,
|
|
59
|
+
thinking_config=thinking_config,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _usage_from_metadata(
|
|
64
|
+
usage: UsageMetadata | None,
|
|
65
|
+
*,
|
|
66
|
+
context_limit: int | None,
|
|
67
|
+
max_tokens: int | None,
|
|
68
|
+
) -> model.Usage | None:
|
|
69
|
+
if usage is None:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
cached = usage.cached_content_token_count or 0
|
|
73
|
+
prompt = usage.prompt_token_count or 0
|
|
74
|
+
response = usage.response_token_count or 0
|
|
75
|
+
thoughts = usage.thoughts_token_count or 0
|
|
76
|
+
|
|
77
|
+
total = usage.total_token_count
|
|
78
|
+
if total is None:
|
|
79
|
+
total = prompt + cached + response + thoughts
|
|
80
|
+
|
|
81
|
+
return model.Usage(
|
|
82
|
+
input_tokens=prompt + cached,
|
|
83
|
+
cached_tokens=cached,
|
|
84
|
+
output_tokens=response + thoughts,
|
|
85
|
+
reasoning_tokens=thoughts,
|
|
86
|
+
context_size=total,
|
|
87
|
+
context_limit=context_limit,
|
|
88
|
+
max_tokens=max_tokens,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _partial_arg_value(partial: Any) -> Any:
|
|
93
|
+
if getattr(partial, "string_value", None) is not None:
|
|
94
|
+
return partial.string_value
|
|
95
|
+
if getattr(partial, "number_value", None) is not None:
|
|
96
|
+
return partial.number_value
|
|
97
|
+
if getattr(partial, "bool_value", None) is not None:
|
|
98
|
+
return partial.bool_value
|
|
99
|
+
if getattr(partial, "null_value", None) is not None:
|
|
100
|
+
return None
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _merge_partial_args(dst: dict[str, Any], partial_args: list[Any] | None) -> None:
|
|
105
|
+
if not partial_args:
|
|
106
|
+
return
|
|
107
|
+
for partial in partial_args:
|
|
108
|
+
json_path = getattr(partial, "json_path", None)
|
|
109
|
+
if not isinstance(json_path, str) or not json_path.startswith("$."):
|
|
110
|
+
continue
|
|
111
|
+
key = json_path[2:]
|
|
112
|
+
if not key or any(ch in key for ch in "[]"):
|
|
113
|
+
continue
|
|
114
|
+
dst[key] = _partial_arg_value(partial)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
async def parse_google_stream(
|
|
118
|
+
stream: AsyncIterator[Any],
|
|
119
|
+
param: llm_param.LLMCallParameter,
|
|
120
|
+
metadata_tracker: MetadataTracker,
|
|
121
|
+
) -> AsyncGenerator[model.ConversationItem]:
|
|
122
|
+
response_id: str | None = None
|
|
123
|
+
started = False
|
|
124
|
+
|
|
125
|
+
accumulated_text: list[str] = []
|
|
126
|
+
accumulated_thoughts: list[str] = []
|
|
127
|
+
thought_signature: str | None = None
|
|
128
|
+
|
|
129
|
+
# Track tool calls where args arrive as partial updates.
|
|
130
|
+
partial_args_by_call: dict[str, dict[str, Any]] = {}
|
|
131
|
+
started_tool_calls: dict[str, str] = {} # call_id -> name
|
|
132
|
+
started_tool_items: set[str] = set()
|
|
133
|
+
emitted_tool_items: set[str] = set()
|
|
134
|
+
|
|
135
|
+
last_usage_metadata: UsageMetadata | None = None
|
|
136
|
+
|
|
137
|
+
async for chunk in stream:
|
|
138
|
+
log_debug(
|
|
139
|
+
chunk.model_dump_json(exclude_none=True),
|
|
140
|
+
style="blue",
|
|
141
|
+
debug_type=DebugType.LLM_STREAM,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if response_id is None:
|
|
145
|
+
response_id = getattr(chunk, "response_id", None) or uuid4().hex
|
|
146
|
+
assert response_id is not None
|
|
147
|
+
if not started:
|
|
148
|
+
started = True
|
|
149
|
+
yield model.StartItem(response_id=response_id)
|
|
150
|
+
|
|
151
|
+
if getattr(chunk, "usage_metadata", None) is not None:
|
|
152
|
+
last_usage_metadata = chunk.usage_metadata
|
|
153
|
+
|
|
154
|
+
candidates = getattr(chunk, "candidates", None) or []
|
|
155
|
+
candidate0 = candidates[0] if candidates else None
|
|
156
|
+
content = getattr(candidate0, "content", None) if candidate0 else None
|
|
157
|
+
parts = getattr(content, "parts", None) if content else None
|
|
158
|
+
if not parts:
|
|
159
|
+
continue
|
|
160
|
+
|
|
161
|
+
for part in parts:
|
|
162
|
+
if getattr(part, "text", None) is not None:
|
|
163
|
+
metadata_tracker.record_token()
|
|
164
|
+
text = part.text
|
|
165
|
+
if getattr(part, "thought", False) is True:
|
|
166
|
+
accumulated_thoughts.append(text)
|
|
167
|
+
if getattr(part, "thought_signature", None):
|
|
168
|
+
thought_signature = part.thought_signature
|
|
169
|
+
yield model.ReasoningTextDelta(content=text, response_id=response_id)
|
|
170
|
+
else:
|
|
171
|
+
accumulated_text.append(text)
|
|
172
|
+
yield model.AssistantMessageDelta(content=text, response_id=response_id)
|
|
173
|
+
|
|
174
|
+
function_call = getattr(part, "function_call", None)
|
|
175
|
+
if function_call is None:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
metadata_tracker.record_token()
|
|
179
|
+
call_id = getattr(function_call, "id", None) or uuid4().hex
|
|
180
|
+
name = getattr(function_call, "name", None) or ""
|
|
181
|
+
started_tool_calls.setdefault(call_id, name)
|
|
182
|
+
|
|
183
|
+
if call_id not in started_tool_items:
|
|
184
|
+
started_tool_items.add(call_id)
|
|
185
|
+
yield model.ToolCallStartItem(response_id=response_id, call_id=call_id, name=name)
|
|
186
|
+
|
|
187
|
+
args_obj = getattr(function_call, "args", None)
|
|
188
|
+
if args_obj is not None:
|
|
189
|
+
emitted_tool_items.add(call_id)
|
|
190
|
+
yield model.ToolCallItem(
|
|
191
|
+
response_id=response_id,
|
|
192
|
+
call_id=call_id,
|
|
193
|
+
name=name,
|
|
194
|
+
arguments=json.dumps(args_obj, ensure_ascii=False),
|
|
195
|
+
)
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
partial_args = getattr(function_call, "partial_args", None)
|
|
199
|
+
if partial_args is not None:
|
|
200
|
+
acc = partial_args_by_call.setdefault(call_id, {})
|
|
201
|
+
_merge_partial_args(acc, partial_args)
|
|
202
|
+
|
|
203
|
+
will_continue = getattr(function_call, "will_continue", None)
|
|
204
|
+
if will_continue is False and call_id in partial_args_by_call and call_id not in emitted_tool_items:
|
|
205
|
+
emitted_tool_items.add(call_id)
|
|
206
|
+
yield model.ToolCallItem(
|
|
207
|
+
response_id=response_id,
|
|
208
|
+
call_id=call_id,
|
|
209
|
+
name=name,
|
|
210
|
+
arguments=json.dumps(partial_args_by_call[call_id], ensure_ascii=False),
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Flush any pending tool calls that never produced args.
|
|
214
|
+
for call_id, name in started_tool_calls.items():
|
|
215
|
+
if call_id in emitted_tool_items:
|
|
216
|
+
continue
|
|
217
|
+
args = partial_args_by_call.get(call_id, {})
|
|
218
|
+
emitted_tool_items.add(call_id)
|
|
219
|
+
yield model.ToolCallItem(
|
|
220
|
+
response_id=response_id,
|
|
221
|
+
call_id=call_id,
|
|
222
|
+
name=name,
|
|
223
|
+
arguments=json.dumps(args, ensure_ascii=False),
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
if accumulated_thoughts:
|
|
227
|
+
metadata_tracker.record_token()
|
|
228
|
+
yield model.ReasoningTextItem(
|
|
229
|
+
content="".join(accumulated_thoughts),
|
|
230
|
+
response_id=response_id,
|
|
231
|
+
model=str(param.model),
|
|
232
|
+
)
|
|
233
|
+
if thought_signature:
|
|
234
|
+
yield model.ReasoningEncryptedItem(
|
|
235
|
+
encrypted_content=thought_signature,
|
|
236
|
+
response_id=response_id,
|
|
237
|
+
model=str(param.model),
|
|
238
|
+
format="google_thought_signature",
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if accumulated_text:
|
|
242
|
+
metadata_tracker.record_token()
|
|
243
|
+
yield model.AssistantMessageItem(content="".join(accumulated_text), response_id=response_id)
|
|
244
|
+
|
|
245
|
+
usage = _usage_from_metadata(last_usage_metadata, context_limit=param.context_limit, max_tokens=param.max_tokens)
|
|
246
|
+
if usage is not None:
|
|
247
|
+
metadata_tracker.set_usage(usage)
|
|
248
|
+
metadata_tracker.set_model_name(str(param.model))
|
|
249
|
+
metadata_tracker.set_response_id(response_id)
|
|
250
|
+
yield metadata_tracker.finalize()
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
@register(llm_param.LLMClientProtocol.GOOGLE)
|
|
254
|
+
class GoogleClient(LLMClientABC):
|
|
255
|
+
def __init__(self, config: llm_param.LLMConfigParameter):
|
|
256
|
+
super().__init__(config)
|
|
257
|
+
http_options: HttpOptions | None = None
|
|
258
|
+
if config.base_url:
|
|
259
|
+
# If base_url already contains version path, don't append api_version.
|
|
260
|
+
http_options = HttpOptions(base_url=str(config.base_url), api_version="")
|
|
261
|
+
|
|
262
|
+
self.client = Client(
|
|
263
|
+
api_key=config.api_key,
|
|
264
|
+
http_options=http_options,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
@classmethod
|
|
268
|
+
@override
|
|
269
|
+
def create(cls, config: llm_param.LLMConfigParameter) -> "LLMClientABC":
|
|
270
|
+
return cls(config)
|
|
271
|
+
|
|
272
|
+
@override
|
|
273
|
+
async def call(self, param: llm_param.LLMCallParameter) -> AsyncGenerator[model.ConversationItem]:
|
|
274
|
+
param = apply_config_defaults(param, self.get_llm_config())
|
|
275
|
+
metadata_tracker = MetadataTracker(cost_config=self.get_llm_config().cost)
|
|
276
|
+
|
|
277
|
+
contents = convert_history_to_contents(param.input, model_name=str(param.model))
|
|
278
|
+
config = _build_config(param)
|
|
279
|
+
|
|
280
|
+
log_debug(
|
|
281
|
+
json.dumps(
|
|
282
|
+
{
|
|
283
|
+
"model": str(param.model),
|
|
284
|
+
"contents": [c.model_dump(exclude_none=True) for c in contents],
|
|
285
|
+
"config": config.model_dump(exclude_none=True),
|
|
286
|
+
},
|
|
287
|
+
ensure_ascii=False,
|
|
288
|
+
),
|
|
289
|
+
style="yellow",
|
|
290
|
+
debug_type=DebugType.LLM_PAYLOAD,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
stream = await self.client.aio.models.generate_content_stream(
|
|
295
|
+
model=str(param.model),
|
|
296
|
+
contents=cast(Any, contents),
|
|
297
|
+
config=config,
|
|
298
|
+
)
|
|
299
|
+
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|
|
300
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
301
|
+
yield metadata_tracker.finalize()
|
|
302
|
+
return
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
async for item in parse_google_stream(stream, param=param, metadata_tracker=metadata_tracker):
|
|
306
|
+
yield item
|
|
307
|
+
except (APIError, ClientError, ServerError, httpx.HTTPError) as e:
|
|
308
|
+
yield model.StreamErrorItem(error=f"{e.__class__.__name__} {e!s}")
|
|
309
|
+
yield metadata_tracker.finalize()
|