letta-nightly 0.7.20.dev20250521104258__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +290 -3
- letta/agents/base_agent.py +0 -55
- letta/agents/helpers.py +5 -0
- letta/agents/letta_agent.py +314 -64
- letta/agents/letta_agent_batch.py +102 -55
- letta/agents/voice_agent.py +5 -5
- letta/client/client.py +9 -18
- letta/constants.py +55 -1
- letta/functions/function_sets/builtin.py +27 -0
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +10 -1
- letta/interfaces/openai_streaming_interface.py +9 -2
- letta/llm_api/anthropic.py +21 -2
- letta/llm_api/anthropic_client.py +33 -6
- letta/llm_api/google_ai_client.py +136 -423
- letta/llm_api/google_vertex_client.py +173 -22
- letta/llm_api/llm_api_tools.py +27 -0
- letta/llm_api/llm_client.py +1 -1
- letta/llm_api/llm_client_base.py +32 -21
- letta/llm_api/openai.py +57 -0
- letta/llm_api/openai_client.py +7 -11
- letta/memory.py +0 -1
- letta/orm/__init__.py +1 -0
- letta/orm/enums.py +1 -0
- letta/orm/provider_trace.py +26 -0
- letta/orm/step.py +1 -0
- letta/schemas/provider_trace.py +43 -0
- letta/schemas/providers.py +210 -65
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +37 -19
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +57 -34
- letta/server/rest_api/routers/v1/blocks.py +3 -3
- letta/server/rest_api/routers/v1/identities.py +24 -26
- letta/server/rest_api/routers/v1/jobs.py +3 -3
- letta/server/rest_api/routers/v1/llms.py +13 -8
- letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
- letta/server/rest_api/routers/v1/tags.py +3 -3
- letta/server/rest_api/routers/v1/telemetry.py +18 -0
- letta/server/rest_api/routers/v1/tools.py +6 -6
- letta/server/rest_api/streaming_response.py +105 -0
- letta/server/rest_api/utils.py +4 -0
- letta/server/server.py +140 -1
- letta/services/agent_manager.py +251 -18
- letta/services/block_manager.py +52 -37
- letta/services/helpers/noop_helper.py +10 -0
- letta/services/identity_manager.py +43 -38
- letta/services/job_manager.py +29 -0
- letta/services/message_manager.py +111 -0
- letta/services/sandbox_config_manager.py +36 -0
- letta/services/step_manager.py +146 -0
- letta/services/telemetry_manager.py +58 -0
- letta/services/tool_executor/tool_execution_manager.py +49 -5
- letta/services/tool_executor/tool_execution_sandbox.py +47 -0
- letta/services/tool_executor/tool_executor.py +236 -7
- letta/services/tool_manager.py +160 -1
- letta/services/tool_sandbox/e2b_sandbox.py +65 -3
- letta/settings.py +10 -2
- letta/tracing.py +5 -5
- {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
- {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +66 -59
- {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0
@@ -1,422 +1,21 @@
|
|
1
|
-
import json
|
2
|
-
import uuid
|
3
1
|
from typing import List, Optional, Tuple
|
4
2
|
|
5
|
-
import
|
3
|
+
import httpx
|
6
4
|
from google import genai
|
7
|
-
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
|
8
5
|
|
9
|
-
from letta.constants import NON_USER_MSG_PREFIX
|
10
6
|
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
11
|
-
from letta.helpers.datetime_helpers import get_utc_time_int
|
12
|
-
from letta.helpers.json_helpers import json_dumps
|
13
7
|
from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
|
14
|
-
from letta.llm_api.
|
15
|
-
from letta.llm_api.llm_client_base import LLMClientBase
|
16
|
-
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
17
|
-
from letta.local_llm.utils import count_tokens
|
8
|
+
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
18
9
|
from letta.log import get_logger
|
19
|
-
from letta.schemas.enums import ProviderCategory
|
20
|
-
from letta.schemas.llm_config import LLMConfig
|
21
|
-
from letta.schemas.message import Message as PydanticMessage
|
22
|
-
from letta.schemas.openai.chat_completion_request import Tool
|
23
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
24
10
|
from letta.settings import model_settings
|
25
|
-
from letta.utils import get_tool_call_id
|
26
11
|
|
27
12
|
logger = get_logger(__name__)
|
28
13
|
|
29
14
|
|
30
|
-
class GoogleAIClient(
|
15
|
+
class GoogleAIClient(GoogleVertexClient):
|
31
16
|
|
32
|
-
def
|
33
|
-
|
34
|
-
Performs underlying request to llm and returns raw response.
|
35
|
-
"""
|
36
|
-
api_key = None
|
37
|
-
if llm_config.provider_category == ProviderCategory.byok:
|
38
|
-
from letta.services.provider_manager import ProviderManager
|
39
|
-
|
40
|
-
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
|
41
|
-
|
42
|
-
if not api_key:
|
43
|
-
api_key = model_settings.gemini_api_key
|
44
|
-
|
45
|
-
# print("[google_ai request]", json.dumps(request_data, indent=2))
|
46
|
-
url, headers = get_gemini_endpoint_and_headers(
|
47
|
-
base_url=str(llm_config.model_endpoint),
|
48
|
-
model=llm_config.model,
|
49
|
-
api_key=str(api_key),
|
50
|
-
key_in_header=True,
|
51
|
-
generate_content=True,
|
52
|
-
)
|
53
|
-
return make_post_request(url, headers, request_data)
|
54
|
-
|
55
|
-
def build_request_data(
|
56
|
-
self,
|
57
|
-
messages: List[PydanticMessage],
|
58
|
-
llm_config: LLMConfig,
|
59
|
-
tools: List[dict],
|
60
|
-
force_tool_call: Optional[str] = None,
|
61
|
-
) -> dict:
|
62
|
-
"""
|
63
|
-
Constructs a request object in the expected data format for this client.
|
64
|
-
"""
|
65
|
-
if tools:
|
66
|
-
tools = [{"type": "function", "function": f} for f in tools]
|
67
|
-
tool_objs = [Tool(**t) for t in tools]
|
68
|
-
tool_names = [t.function.name for t in tool_objs]
|
69
|
-
# Convert to the exact payload style Google expects
|
70
|
-
tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
|
71
|
-
else:
|
72
|
-
tool_names = []
|
73
|
-
|
74
|
-
contents = self.add_dummy_model_messages(
|
75
|
-
[m.to_google_ai_dict() for m in messages],
|
76
|
-
)
|
77
|
-
|
78
|
-
request_data = {
|
79
|
-
"contents": contents,
|
80
|
-
"tools": tools,
|
81
|
-
"generation_config": {
|
82
|
-
"temperature": llm_config.temperature,
|
83
|
-
"max_output_tokens": llm_config.max_tokens,
|
84
|
-
},
|
85
|
-
}
|
86
|
-
|
87
|
-
# write tool config
|
88
|
-
tool_config = ToolConfig(
|
89
|
-
function_calling_config=FunctionCallingConfig(
|
90
|
-
# ANY mode forces the model to predict only function calls
|
91
|
-
mode=FunctionCallingConfigMode.ANY,
|
92
|
-
# Provide the list of tools (though empty should also work, it seems not to)
|
93
|
-
allowed_function_names=tool_names,
|
94
|
-
)
|
95
|
-
)
|
96
|
-
request_data["tool_config"] = tool_config.model_dump()
|
97
|
-
return request_data
|
98
|
-
|
99
|
-
def convert_response_to_chat_completion(
|
100
|
-
self,
|
101
|
-
response_data: dict,
|
102
|
-
input_messages: List[PydanticMessage],
|
103
|
-
llm_config: LLMConfig,
|
104
|
-
) -> ChatCompletionResponse:
|
105
|
-
"""
|
106
|
-
Converts custom response format from llm client into an OpenAI
|
107
|
-
ChatCompletionsResponse object.
|
108
|
-
|
109
|
-
Example Input:
|
110
|
-
{
|
111
|
-
"candidates": [
|
112
|
-
{
|
113
|
-
"content": {
|
114
|
-
"parts": [
|
115
|
-
{
|
116
|
-
"text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
|
117
|
-
}
|
118
|
-
]
|
119
|
-
}
|
120
|
-
}
|
121
|
-
],
|
122
|
-
"usageMetadata": {
|
123
|
-
"promptTokenCount": 9,
|
124
|
-
"candidatesTokenCount": 27,
|
125
|
-
"totalTokenCount": 36
|
126
|
-
}
|
127
|
-
}
|
128
|
-
"""
|
129
|
-
# print("[google_ai response]", json.dumps(response_data, indent=2))
|
130
|
-
|
131
|
-
try:
|
132
|
-
choices = []
|
133
|
-
index = 0
|
134
|
-
for candidate in response_data["candidates"]:
|
135
|
-
content = candidate["content"]
|
136
|
-
|
137
|
-
if "role" not in content or not content["role"]:
|
138
|
-
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
139
|
-
# NOTE: must be a ValueError to trigger a retry
|
140
|
-
raise ValueError(f"Error in response data from LLM: {response_data}")
|
141
|
-
role = content["role"]
|
142
|
-
assert role == "model", f"Unknown role in response: {role}"
|
143
|
-
|
144
|
-
parts = content["parts"]
|
145
|
-
|
146
|
-
# NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
|
147
|
-
# so let's disable it for now
|
148
|
-
|
149
|
-
# NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
|
150
|
-
# {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
|
151
|
-
# To patch this, if we have multiple parts we can take the last one
|
152
|
-
if len(parts) > 1:
|
153
|
-
logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
|
154
|
-
parts = [parts[-1]]
|
155
|
-
|
156
|
-
# TODO support parts / multimodal
|
157
|
-
# TODO support parallel tool calling natively
|
158
|
-
# TODO Alternative here is to throw away everything else except for the first part
|
159
|
-
for response_message in parts:
|
160
|
-
# Convert the actual message style to OpenAI style
|
161
|
-
if "functionCall" in response_message and response_message["functionCall"] is not None:
|
162
|
-
function_call = response_message["functionCall"]
|
163
|
-
assert isinstance(function_call, dict), function_call
|
164
|
-
function_name = function_call["name"]
|
165
|
-
assert isinstance(function_name, str), function_name
|
166
|
-
function_args = function_call["args"]
|
167
|
-
assert isinstance(function_args, dict), function_args
|
168
|
-
|
169
|
-
# NOTE: this also involves stripping the inner monologue out of the function
|
170
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
171
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
172
|
-
|
173
|
-
assert (
|
174
|
-
INNER_THOUGHTS_KWARG_VERTEX in function_args
|
175
|
-
), f"Couldn't find inner thoughts in function args:\n{function_call}"
|
176
|
-
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
177
|
-
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
178
|
-
else:
|
179
|
-
inner_thoughts = None
|
180
|
-
|
181
|
-
# Google AI API doesn't generate tool call IDs
|
182
|
-
openai_response_message = Message(
|
183
|
-
role="assistant", # NOTE: "model" -> "assistant"
|
184
|
-
content=inner_thoughts,
|
185
|
-
tool_calls=[
|
186
|
-
ToolCall(
|
187
|
-
id=get_tool_call_id(),
|
188
|
-
type="function",
|
189
|
-
function=FunctionCall(
|
190
|
-
name=function_name,
|
191
|
-
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
192
|
-
),
|
193
|
-
)
|
194
|
-
],
|
195
|
-
)
|
196
|
-
|
197
|
-
else:
|
198
|
-
|
199
|
-
# Inner thoughts are the content by default
|
200
|
-
inner_thoughts = response_message["text"]
|
201
|
-
|
202
|
-
# Google AI API doesn't generate tool call IDs
|
203
|
-
openai_response_message = Message(
|
204
|
-
role="assistant", # NOTE: "model" -> "assistant"
|
205
|
-
content=inner_thoughts,
|
206
|
-
)
|
207
|
-
|
208
|
-
# Google AI API uses different finish reason strings than OpenAI
|
209
|
-
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
210
|
-
# see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
211
|
-
# Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
|
212
|
-
# see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
|
213
|
-
finish_reason = candidate["finishReason"]
|
214
|
-
if finish_reason == "STOP":
|
215
|
-
openai_finish_reason = (
|
216
|
-
"function_call"
|
217
|
-
if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
|
218
|
-
else "stop"
|
219
|
-
)
|
220
|
-
elif finish_reason == "MAX_TOKENS":
|
221
|
-
openai_finish_reason = "length"
|
222
|
-
elif finish_reason == "SAFETY":
|
223
|
-
openai_finish_reason = "content_filter"
|
224
|
-
elif finish_reason == "RECITATION":
|
225
|
-
openai_finish_reason = "content_filter"
|
226
|
-
else:
|
227
|
-
raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
|
228
|
-
|
229
|
-
choices.append(
|
230
|
-
Choice(
|
231
|
-
finish_reason=openai_finish_reason,
|
232
|
-
index=index,
|
233
|
-
message=openai_response_message,
|
234
|
-
)
|
235
|
-
)
|
236
|
-
index += 1
|
237
|
-
|
238
|
-
# if len(choices) > 1:
|
239
|
-
# raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
|
240
|
-
|
241
|
-
# NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
|
242
|
-
# "usageMetadata": {
|
243
|
-
# "promptTokenCount": 9,
|
244
|
-
# "candidatesTokenCount": 27,
|
245
|
-
# "totalTokenCount": 36
|
246
|
-
# }
|
247
|
-
if "usageMetadata" in response_data:
|
248
|
-
usage_data = response_data["usageMetadata"]
|
249
|
-
if "promptTokenCount" not in usage_data:
|
250
|
-
raise ValueError(f"promptTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
|
251
|
-
if "totalTokenCount" not in usage_data:
|
252
|
-
raise ValueError(f"totalTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
|
253
|
-
if "candidatesTokenCount" not in usage_data:
|
254
|
-
raise ValueError(f"candidatesTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
|
255
|
-
|
256
|
-
prompt_tokens = usage_data["promptTokenCount"]
|
257
|
-
completion_tokens = usage_data["candidatesTokenCount"]
|
258
|
-
total_tokens = usage_data["totalTokenCount"]
|
259
|
-
|
260
|
-
usage = UsageStatistics(
|
261
|
-
prompt_tokens=prompt_tokens,
|
262
|
-
completion_tokens=completion_tokens,
|
263
|
-
total_tokens=total_tokens,
|
264
|
-
)
|
265
|
-
else:
|
266
|
-
# Count it ourselves
|
267
|
-
assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
|
268
|
-
prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
|
269
|
-
completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
|
270
|
-
total_tokens = prompt_tokens + completion_tokens
|
271
|
-
usage = UsageStatistics(
|
272
|
-
prompt_tokens=prompt_tokens,
|
273
|
-
completion_tokens=completion_tokens,
|
274
|
-
total_tokens=total_tokens,
|
275
|
-
)
|
276
|
-
|
277
|
-
response_id = str(uuid.uuid4())
|
278
|
-
return ChatCompletionResponse(
|
279
|
-
id=response_id,
|
280
|
-
choices=choices,
|
281
|
-
model=llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
282
|
-
created=get_utc_time_int(),
|
283
|
-
usage=usage,
|
284
|
-
)
|
285
|
-
except KeyError as e:
|
286
|
-
raise e
|
287
|
-
|
288
|
-
def _clean_google_ai_schema_properties(self, schema_part: dict):
|
289
|
-
"""Recursively clean schema parts to remove unsupported Google AI keywords."""
|
290
|
-
if not isinstance(schema_part, dict):
|
291
|
-
return
|
292
|
-
|
293
|
-
# Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
|
294
|
-
# * Only a subset of the OpenAPI schema is supported.
|
295
|
-
# * Supported parameter types in Python are limited.
|
296
|
-
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
|
297
|
-
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
298
|
-
for key_to_remove in keys_to_remove_at_this_level:
|
299
|
-
logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
300
|
-
del schema_part[key_to_remove]
|
301
|
-
|
302
|
-
if schema_part.get("type") == "string" and "format" in schema_part:
|
303
|
-
allowed_formats = ["enum", "date-time"]
|
304
|
-
if schema_part["format"] not in allowed_formats:
|
305
|
-
logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
|
306
|
-
del schema_part["format"]
|
307
|
-
|
308
|
-
# Check properties within the current level
|
309
|
-
if "properties" in schema_part and isinstance(schema_part["properties"], dict):
|
310
|
-
for prop_name, prop_schema in schema_part["properties"].items():
|
311
|
-
self._clean_google_ai_schema_properties(prop_schema)
|
312
|
-
|
313
|
-
# Check items within arrays
|
314
|
-
if "items" in schema_part and isinstance(schema_part["items"], dict):
|
315
|
-
self._clean_google_ai_schema_properties(schema_part["items"])
|
316
|
-
|
317
|
-
# Check within anyOf, allOf, oneOf lists
|
318
|
-
for key in ["anyOf", "allOf", "oneOf"]:
|
319
|
-
if key in schema_part and isinstance(schema_part[key], list):
|
320
|
-
for item_schema in schema_part[key]:
|
321
|
-
self._clean_google_ai_schema_properties(item_schema)
|
322
|
-
|
323
|
-
def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
|
324
|
-
"""
|
325
|
-
OpenAI style:
|
326
|
-
"tools": [{
|
327
|
-
"type": "function",
|
328
|
-
"function": {
|
329
|
-
"name": "find_movies",
|
330
|
-
"description": "find ....",
|
331
|
-
"parameters": {
|
332
|
-
"type": "object",
|
333
|
-
"properties": {
|
334
|
-
PARAM: {
|
335
|
-
"type": PARAM_TYPE, # eg "string"
|
336
|
-
"description": PARAM_DESCRIPTION,
|
337
|
-
},
|
338
|
-
...
|
339
|
-
},
|
340
|
-
"required": List[str],
|
341
|
-
}
|
342
|
-
}
|
343
|
-
}
|
344
|
-
]
|
345
|
-
|
346
|
-
Google AI style:
|
347
|
-
"tools": [{
|
348
|
-
"functionDeclarations": [{
|
349
|
-
"name": "find_movies",
|
350
|
-
"description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
|
351
|
-
"parameters": {
|
352
|
-
"type": "OBJECT",
|
353
|
-
"properties": {
|
354
|
-
"location": {
|
355
|
-
"type": "STRING",
|
356
|
-
"description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
|
357
|
-
},
|
358
|
-
"description": {
|
359
|
-
"type": "STRING",
|
360
|
-
"description": "Any kind of description including category or genre, title words, attributes, etc."
|
361
|
-
}
|
362
|
-
},
|
363
|
-
"required": ["description"]
|
364
|
-
}
|
365
|
-
}, {
|
366
|
-
"name": "find_theaters",
|
367
|
-
...
|
368
|
-
"""
|
369
|
-
function_list = [
|
370
|
-
dict(
|
371
|
-
name=t.function.name,
|
372
|
-
description=t.function.description,
|
373
|
-
parameters=t.function.parameters, # TODO need to unpack
|
374
|
-
)
|
375
|
-
for t in tools
|
376
|
-
]
|
377
|
-
|
378
|
-
# Add inner thoughts if needed
|
379
|
-
for func in function_list:
|
380
|
-
# Note: Google AI API used to have weird casing requirements, but not any more
|
381
|
-
|
382
|
-
# Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
|
383
|
-
if "parameters" in func and isinstance(func["parameters"], dict):
|
384
|
-
self._clean_google_ai_schema_properties(func["parameters"])
|
385
|
-
|
386
|
-
# Add inner thoughts
|
387
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
388
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
|
389
|
-
|
390
|
-
func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
|
391
|
-
"type": "string",
|
392
|
-
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
393
|
-
}
|
394
|
-
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
|
395
|
-
|
396
|
-
return [{"functionDeclarations": function_list}]
|
397
|
-
|
398
|
-
def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
|
399
|
-
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
400
|
-
|
401
|
-
In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
|
402
|
-
so there is no natural follow-up 'model' role message.
|
403
|
-
|
404
|
-
To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
|
405
|
-
with role == 'model' that is placed in-betweeen and function output
|
406
|
-
(role == 'tool') and user message (role == 'user').
|
407
|
-
"""
|
408
|
-
dummy_yield_message = {
|
409
|
-
"role": "model",
|
410
|
-
"parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
|
411
|
-
}
|
412
|
-
messages_with_padding = []
|
413
|
-
for i, message in enumerate(messages):
|
414
|
-
messages_with_padding.append(message)
|
415
|
-
# Check if the current message role is 'tool' and the next message role is 'user'
|
416
|
-
if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
|
417
|
-
messages_with_padding.append(dummy_yield_message)
|
418
|
-
|
419
|
-
return messages_with_padding
|
17
|
+
def _get_client(self):
|
18
|
+
return genai.Client(api_key=model_settings.gemini_api_key)
|
420
19
|
|
421
20
|
|
422
21
|
def get_gemini_endpoint_and_headers(
|
@@ -464,20 +63,111 @@ def google_ai_check_valid_api_key(api_key: str):
|
|
464
63
|
|
465
64
|
|
466
65
|
def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
|
66
|
+
"""Synchronous version to get model list from Google AI API using httpx."""
|
67
|
+
import httpx
|
68
|
+
|
69
|
+
from letta.utils import printd
|
70
|
+
|
71
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
|
72
|
+
|
73
|
+
try:
|
74
|
+
with httpx.Client() as client:
|
75
|
+
response = client.get(url, headers=headers)
|
76
|
+
response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
|
77
|
+
response_data = response.json() # convert to dict from string
|
78
|
+
|
79
|
+
# Grab the models out
|
80
|
+
model_list = response_data["models"]
|
81
|
+
return model_list
|
82
|
+
|
83
|
+
except httpx.HTTPStatusError as http_err:
|
84
|
+
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
85
|
+
printd(f"Got HTTPError, exception={http_err}")
|
86
|
+
# Print the HTTP status code
|
87
|
+
print(f"HTTP Error: {http_err.response.status_code}")
|
88
|
+
# Print the response content (error message from server)
|
89
|
+
print(f"Message: {http_err.response.text}")
|
90
|
+
raise http_err
|
91
|
+
|
92
|
+
except httpx.RequestError as req_err:
|
93
|
+
# Handle other httpx-related errors (e.g., connection error)
|
94
|
+
printd(f"Got RequestException, exception={req_err}")
|
95
|
+
raise req_err
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
# Handle other potential errors
|
99
|
+
printd(f"Got unknown Exception, exception={e}")
|
100
|
+
raise e
|
101
|
+
|
102
|
+
|
103
|
+
async def google_ai_get_model_list_async(
|
104
|
+
base_url: str, api_key: str, key_in_header: bool = True, client: Optional[httpx.AsyncClient] = None
|
105
|
+
) -> List[dict]:
|
106
|
+
"""Asynchronous version to get model list from Google AI API using httpx."""
|
467
107
|
from letta.utils import printd
|
468
108
|
|
469
109
|
url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
|
470
110
|
|
111
|
+
# Determine if we need to close the client at the end
|
112
|
+
close_client = False
|
113
|
+
if client is None:
|
114
|
+
client = httpx.AsyncClient()
|
115
|
+
close_client = True
|
116
|
+
|
471
117
|
try:
|
472
|
-
response =
|
473
|
-
response.raise_for_status() # Raises
|
474
|
-
|
118
|
+
response = await client.get(url, headers=headers)
|
119
|
+
response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
|
120
|
+
response_data = response.json() # convert to dict from string
|
475
121
|
|
476
122
|
# Grab the models out
|
477
|
-
model_list =
|
123
|
+
model_list = response_data["models"]
|
478
124
|
return model_list
|
479
125
|
|
480
|
-
except
|
126
|
+
except httpx.HTTPStatusError as http_err:
|
127
|
+
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
128
|
+
printd(f"Got HTTPError, exception={http_err}")
|
129
|
+
# Print the HTTP status code
|
130
|
+
print(f"HTTP Error: {http_err.response.status_code}")
|
131
|
+
# Print the response content (error message from server)
|
132
|
+
print(f"Message: {http_err.response.text}")
|
133
|
+
raise http_err
|
134
|
+
|
135
|
+
except httpx.RequestError as req_err:
|
136
|
+
# Handle other httpx-related errors (e.g., connection error)
|
137
|
+
printd(f"Got RequestException, exception={req_err}")
|
138
|
+
raise req_err
|
139
|
+
|
140
|
+
except Exception as e:
|
141
|
+
# Handle other potential errors
|
142
|
+
printd(f"Got unknown Exception, exception={e}")
|
143
|
+
raise e
|
144
|
+
|
145
|
+
finally:
|
146
|
+
# Close the client if we created it
|
147
|
+
if close_client:
|
148
|
+
await client.aclose()
|
149
|
+
|
150
|
+
|
151
|
+
def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> dict:
|
152
|
+
"""Synchronous version to get model details from Google AI API using httpx."""
|
153
|
+
import httpx
|
154
|
+
|
155
|
+
from letta.utils import printd
|
156
|
+
|
157
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
|
158
|
+
|
159
|
+
try:
|
160
|
+
with httpx.Client() as client:
|
161
|
+
response = client.get(url, headers=headers)
|
162
|
+
printd(f"response = {response}")
|
163
|
+
response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
|
164
|
+
response_data = response.json() # convert to dict from string
|
165
|
+
printd(f"response.json = {response_data}")
|
166
|
+
|
167
|
+
# Return the model details
|
168
|
+
return response_data
|
169
|
+
|
170
|
+
except httpx.HTTPStatusError as http_err:
|
481
171
|
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
482
172
|
printd(f"Got HTTPError, exception={http_err}")
|
483
173
|
# Print the HTTP status code
|
@@ -486,8 +176,8 @@ def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool =
|
|
486
176
|
print(f"Message: {http_err.response.text}")
|
487
177
|
raise http_err
|
488
178
|
|
489
|
-
except
|
490
|
-
# Handle other
|
179
|
+
except httpx.RequestError as req_err:
|
180
|
+
# Handle other httpx-related errors (e.g., connection error)
|
491
181
|
printd(f"Got RequestException, exception={req_err}")
|
492
182
|
raise req_err
|
493
183
|
|
@@ -497,22 +187,33 @@ def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool =
|
|
497
187
|
raise e
|
498
188
|
|
499
189
|
|
500
|
-
def
|
190
|
+
async def google_ai_get_model_details_async(
|
191
|
+
base_url: str, api_key: str, model: str, key_in_header: bool = True, client: Optional[httpx.AsyncClient] = None
|
192
|
+
) -> dict:
|
193
|
+
"""Asynchronous version to get model details from Google AI API using httpx."""
|
194
|
+
import httpx
|
195
|
+
|
501
196
|
from letta.utils import printd
|
502
197
|
|
503
198
|
url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
|
504
199
|
|
200
|
+
# Determine if we need to close the client at the end
|
201
|
+
close_client = False
|
202
|
+
if client is None:
|
203
|
+
client = httpx.AsyncClient()
|
204
|
+
close_client = True
|
205
|
+
|
505
206
|
try:
|
506
|
-
response =
|
207
|
+
response = await client.get(url, headers=headers)
|
507
208
|
printd(f"response = {response}")
|
508
|
-
response.raise_for_status() # Raises
|
509
|
-
|
510
|
-
printd(f"response.json = {
|
209
|
+
response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
|
210
|
+
response_data = response.json() # convert to dict from string
|
211
|
+
printd(f"response.json = {response_data}")
|
511
212
|
|
512
|
-
#
|
513
|
-
return
|
213
|
+
# Return the model details
|
214
|
+
return response_data
|
514
215
|
|
515
|
-
except
|
216
|
+
except httpx.HTTPStatusError as http_err:
|
516
217
|
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
517
218
|
printd(f"Got HTTPError, exception={http_err}")
|
518
219
|
# Print the HTTP status code
|
@@ -521,8 +222,8 @@ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_
|
|
521
222
|
print(f"Message: {http_err.response.text}")
|
522
223
|
raise http_err
|
523
224
|
|
524
|
-
except
|
525
|
-
# Handle other
|
225
|
+
except httpx.RequestError as req_err:
|
226
|
+
# Handle other httpx-related errors (e.g., connection error)
|
526
227
|
printd(f"Got RequestException, exception={req_err}")
|
527
228
|
raise req_err
|
528
229
|
|
@@ -531,9 +232,21 @@ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_
|
|
531
232
|
printd(f"Got unknown Exception, exception={e}")
|
532
233
|
raise e
|
533
234
|
|
235
|
+
finally:
|
236
|
+
# Close the client if we created it
|
237
|
+
if close_client:
|
238
|
+
await client.aclose()
|
239
|
+
|
534
240
|
|
535
241
|
def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
|
536
242
|
model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
|
537
243
|
# TODO should this be:
|
538
244
|
# return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
|
539
245
|
return int(model_details["inputTokenLimit"])
|
246
|
+
|
247
|
+
|
248
|
+
async def google_ai_get_model_context_window_async(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
|
249
|
+
model_details = await google_ai_get_model_details_async(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
|
250
|
+
# TODO should this be:
|
251
|
+
# return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
|
252
|
+
return int(model_details["inputTokenLimit"])
|