letta-nightly 0.7.20.dev20250520104253__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +290 -3
  3. letta/agents/base_agent.py +0 -55
  4. letta/agents/helpers.py +5 -0
  5. letta/agents/letta_agent.py +314 -64
  6. letta/agents/letta_agent_batch.py +102 -55
  7. letta/agents/voice_agent.py +5 -5
  8. letta/client/client.py +9 -18
  9. letta/constants.py +55 -1
  10. letta/functions/function_sets/builtin.py +27 -0
  11. letta/functions/mcp_client/stdio_client.py +1 -1
  12. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  13. letta/interfaces/anthropic_streaming_interface.py +10 -1
  14. letta/interfaces/openai_streaming_interface.py +9 -2
  15. letta/llm_api/anthropic.py +21 -2
  16. letta/llm_api/anthropic_client.py +33 -6
  17. letta/llm_api/google_ai_client.py +136 -423
  18. letta/llm_api/google_vertex_client.py +173 -22
  19. letta/llm_api/llm_api_tools.py +27 -0
  20. letta/llm_api/llm_client.py +1 -1
  21. letta/llm_api/llm_client_base.py +32 -21
  22. letta/llm_api/openai.py +57 -0
  23. letta/llm_api/openai_client.py +7 -11
  24. letta/memory.py +0 -1
  25. letta/orm/__init__.py +1 -0
  26. letta/orm/enums.py +1 -0
  27. letta/orm/provider_trace.py +26 -0
  28. letta/orm/step.py +1 -0
  29. letta/schemas/provider_trace.py +43 -0
  30. letta/schemas/providers.py +210 -65
  31. letta/schemas/step.py +1 -0
  32. letta/schemas/tool.py +4 -0
  33. letta/server/db.py +37 -19
  34. letta/server/rest_api/routers/v1/__init__.py +2 -0
  35. letta/server/rest_api/routers/v1/agents.py +57 -34
  36. letta/server/rest_api/routers/v1/blocks.py +3 -3
  37. letta/server/rest_api/routers/v1/identities.py +24 -26
  38. letta/server/rest_api/routers/v1/jobs.py +3 -3
  39. letta/server/rest_api/routers/v1/llms.py +13 -8
  40. letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
  41. letta/server/rest_api/routers/v1/tags.py +3 -3
  42. letta/server/rest_api/routers/v1/telemetry.py +18 -0
  43. letta/server/rest_api/routers/v1/tools.py +6 -6
  44. letta/server/rest_api/streaming_response.py +105 -0
  45. letta/server/rest_api/utils.py +4 -0
  46. letta/server/server.py +140 -0
  47. letta/services/agent_manager.py +251 -18
  48. letta/services/block_manager.py +52 -37
  49. letta/services/helpers/noop_helper.py +10 -0
  50. letta/services/identity_manager.py +43 -38
  51. letta/services/job_manager.py +29 -0
  52. letta/services/message_manager.py +111 -0
  53. letta/services/sandbox_config_manager.py +36 -0
  54. letta/services/step_manager.py +146 -0
  55. letta/services/telemetry_manager.py +58 -0
  56. letta/services/tool_executor/tool_execution_manager.py +49 -5
  57. letta/services/tool_executor/tool_execution_sandbox.py +47 -0
  58. letta/services/tool_executor/tool_executor.py +236 -7
  59. letta/services/tool_manager.py +160 -1
  60. letta/services/tool_sandbox/e2b_sandbox.py +65 -3
  61. letta/settings.py +10 -2
  62. letta/tracing.py +5 -5
  63. {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
  64. {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +67 -60
  65. {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
  66. {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
  67. {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0
@@ -1,422 +1,21 @@
1
- import json
2
- import uuid
3
1
  from typing import List, Optional, Tuple
4
2
 
5
- import requests
3
+ import httpx
6
4
  from google import genai
7
- from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
8
5
 
9
- from letta.constants import NON_USER_MSG_PREFIX
10
6
  from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
11
- from letta.helpers.datetime_helpers import get_utc_time_int
12
- from letta.helpers.json_helpers import json_dumps
13
7
  from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
14
- from letta.llm_api.helpers import make_post_request
15
- from letta.llm_api.llm_client_base import LLMClientBase
16
- from letta.local_llm.json_parser import clean_json_string_extra_backslash
17
- from letta.local_llm.utils import count_tokens
8
+ from letta.llm_api.google_vertex_client import GoogleVertexClient
18
9
  from letta.log import get_logger
19
- from letta.schemas.enums import ProviderCategory
20
- from letta.schemas.llm_config import LLMConfig
21
- from letta.schemas.message import Message as PydanticMessage
22
- from letta.schemas.openai.chat_completion_request import Tool
23
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
24
10
  from letta.settings import model_settings
25
- from letta.utils import get_tool_call_id
26
11
 
27
12
  logger = get_logger(__name__)
28
13
 
29
14
 
30
- class GoogleAIClient(LLMClientBase):
15
+ class GoogleAIClient(GoogleVertexClient):
31
16
 
32
- def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
33
- """
34
- Performs underlying request to llm and returns raw response.
35
- """
36
- api_key = None
37
- if llm_config.provider_category == ProviderCategory.byok:
38
- from letta.services.provider_manager import ProviderManager
39
-
40
- api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
41
-
42
- if not api_key:
43
- api_key = model_settings.gemini_api_key
44
-
45
- # print("[google_ai request]", json.dumps(request_data, indent=2))
46
- url, headers = get_gemini_endpoint_and_headers(
47
- base_url=str(llm_config.model_endpoint),
48
- model=llm_config.model,
49
- api_key=str(api_key),
50
- key_in_header=True,
51
- generate_content=True,
52
- )
53
- return make_post_request(url, headers, request_data)
54
-
55
- def build_request_data(
56
- self,
57
- messages: List[PydanticMessage],
58
- llm_config: LLMConfig,
59
- tools: List[dict],
60
- force_tool_call: Optional[str] = None,
61
- ) -> dict:
62
- """
63
- Constructs a request object in the expected data format for this client.
64
- """
65
- if tools:
66
- tools = [{"type": "function", "function": f} for f in tools]
67
- tool_objs = [Tool(**t) for t in tools]
68
- tool_names = [t.function.name for t in tool_objs]
69
- # Convert to the exact payload style Google expects
70
- tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
71
- else:
72
- tool_names = []
73
-
74
- contents = self.add_dummy_model_messages(
75
- [m.to_google_ai_dict() for m in messages],
76
- )
77
-
78
- request_data = {
79
- "contents": contents,
80
- "tools": tools,
81
- "generation_config": {
82
- "temperature": llm_config.temperature,
83
- "max_output_tokens": llm_config.max_tokens,
84
- },
85
- }
86
-
87
- # write tool config
88
- tool_config = ToolConfig(
89
- function_calling_config=FunctionCallingConfig(
90
- # ANY mode forces the model to predict only function calls
91
- mode=FunctionCallingConfigMode.ANY,
92
- # Provide the list of tools (though empty should also work, it seems not to)
93
- allowed_function_names=tool_names,
94
- )
95
- )
96
- request_data["tool_config"] = tool_config.model_dump()
97
- return request_data
98
-
99
- def convert_response_to_chat_completion(
100
- self,
101
- response_data: dict,
102
- input_messages: List[PydanticMessage],
103
- llm_config: LLMConfig,
104
- ) -> ChatCompletionResponse:
105
- """
106
- Converts custom response format from llm client into an OpenAI
107
- ChatCompletionsResponse object.
108
-
109
- Example Input:
110
- {
111
- "candidates": [
112
- {
113
- "content": {
114
- "parts": [
115
- {
116
- "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
117
- }
118
- ]
119
- }
120
- }
121
- ],
122
- "usageMetadata": {
123
- "promptTokenCount": 9,
124
- "candidatesTokenCount": 27,
125
- "totalTokenCount": 36
126
- }
127
- }
128
- """
129
- # print("[google_ai response]", json.dumps(response_data, indent=2))
130
-
131
- try:
132
- choices = []
133
- index = 0
134
- for candidate in response_data["candidates"]:
135
- content = candidate["content"]
136
-
137
- if "role" not in content or not content["role"]:
138
- # This means the response is malformed like MALFORMED_FUNCTION_CALL
139
- # NOTE: must be a ValueError to trigger a retry
140
- raise ValueError(f"Error in response data from LLM: {response_data}")
141
- role = content["role"]
142
- assert role == "model", f"Unknown role in response: {role}"
143
-
144
- parts = content["parts"]
145
-
146
- # NOTE: we aren't properly supported multi-parts here anyways (we're just appending choices),
147
- # so let's disable it for now
148
-
149
- # NOTE(Apr 9, 2025): there's a very strange bug on 2.5 where the response has a part with broken text
150
- # {'candidates': [{'content': {'parts': [{'functionCall': {'name': 'send_message', 'args': {'request_heartbeat': False, 'message': 'Hello! How can I make your day better?', 'inner_thoughts': 'User has initiated contact. Sending a greeting.'}}}], 'role': 'model'}, 'finishReason': 'STOP', 'avgLogprobs': -0.25891534213362066}], 'usageMetadata': {'promptTokenCount': 2493, 'candidatesTokenCount': 29, 'totalTokenCount': 2522, 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 2493}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 29}]}, 'modelVersion': 'gemini-1.5-pro-002'}
151
- # To patch this, if we have multiple parts we can take the last one
152
- if len(parts) > 1:
153
- logger.warning(f"Unexpected multiple parts in response from Google AI: {parts}")
154
- parts = [parts[-1]]
155
-
156
- # TODO support parts / multimodal
157
- # TODO support parallel tool calling natively
158
- # TODO Alternative here is to throw away everything else except for the first part
159
- for response_message in parts:
160
- # Convert the actual message style to OpenAI style
161
- if "functionCall" in response_message and response_message["functionCall"] is not None:
162
- function_call = response_message["functionCall"]
163
- assert isinstance(function_call, dict), function_call
164
- function_name = function_call["name"]
165
- assert isinstance(function_name, str), function_name
166
- function_args = function_call["args"]
167
- assert isinstance(function_args, dict), function_args
168
-
169
- # NOTE: this also involves stripping the inner monologue out of the function
170
- if llm_config.put_inner_thoughts_in_kwargs:
171
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
172
-
173
- assert (
174
- INNER_THOUGHTS_KWARG_VERTEX in function_args
175
- ), f"Couldn't find inner thoughts in function args:\n{function_call}"
176
- inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
177
- assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
178
- else:
179
- inner_thoughts = None
180
-
181
- # Google AI API doesn't generate tool call IDs
182
- openai_response_message = Message(
183
- role="assistant", # NOTE: "model" -> "assistant"
184
- content=inner_thoughts,
185
- tool_calls=[
186
- ToolCall(
187
- id=get_tool_call_id(),
188
- type="function",
189
- function=FunctionCall(
190
- name=function_name,
191
- arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
192
- ),
193
- )
194
- ],
195
- )
196
-
197
- else:
198
-
199
- # Inner thoughts are the content by default
200
- inner_thoughts = response_message["text"]
201
-
202
- # Google AI API doesn't generate tool call IDs
203
- openai_response_message = Message(
204
- role="assistant", # NOTE: "model" -> "assistant"
205
- content=inner_thoughts,
206
- )
207
-
208
- # Google AI API uses different finish reason strings than OpenAI
209
- # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
210
- # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
211
- # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
212
- # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
213
- finish_reason = candidate["finishReason"]
214
- if finish_reason == "STOP":
215
- openai_finish_reason = (
216
- "function_call"
217
- if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
218
- else "stop"
219
- )
220
- elif finish_reason == "MAX_TOKENS":
221
- openai_finish_reason = "length"
222
- elif finish_reason == "SAFETY":
223
- openai_finish_reason = "content_filter"
224
- elif finish_reason == "RECITATION":
225
- openai_finish_reason = "content_filter"
226
- else:
227
- raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
228
-
229
- choices.append(
230
- Choice(
231
- finish_reason=openai_finish_reason,
232
- index=index,
233
- message=openai_response_message,
234
- )
235
- )
236
- index += 1
237
-
238
- # if len(choices) > 1:
239
- # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
240
-
241
- # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
242
- # "usageMetadata": {
243
- # "promptTokenCount": 9,
244
- # "candidatesTokenCount": 27,
245
- # "totalTokenCount": 36
246
- # }
247
- if "usageMetadata" in response_data:
248
- usage_data = response_data["usageMetadata"]
249
- if "promptTokenCount" not in usage_data:
250
- raise ValueError(f"promptTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
251
- if "totalTokenCount" not in usage_data:
252
- raise ValueError(f"totalTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
253
- if "candidatesTokenCount" not in usage_data:
254
- raise ValueError(f"candidatesTokenCount not found in usageMetadata:\n{json.dumps(usage_data, indent=2)}")
255
-
256
- prompt_tokens = usage_data["promptTokenCount"]
257
- completion_tokens = usage_data["candidatesTokenCount"]
258
- total_tokens = usage_data["totalTokenCount"]
259
-
260
- usage = UsageStatistics(
261
- prompt_tokens=prompt_tokens,
262
- completion_tokens=completion_tokens,
263
- total_tokens=total_tokens,
264
- )
265
- else:
266
- # Count it ourselves
267
- assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
268
- prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
269
- completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
270
- total_tokens = prompt_tokens + completion_tokens
271
- usage = UsageStatistics(
272
- prompt_tokens=prompt_tokens,
273
- completion_tokens=completion_tokens,
274
- total_tokens=total_tokens,
275
- )
276
-
277
- response_id = str(uuid.uuid4())
278
- return ChatCompletionResponse(
279
- id=response_id,
280
- choices=choices,
281
- model=llm_config.model, # NOTE: Google API doesn't pass back model in the response
282
- created=get_utc_time_int(),
283
- usage=usage,
284
- )
285
- except KeyError as e:
286
- raise e
287
-
288
- def _clean_google_ai_schema_properties(self, schema_part: dict):
289
- """Recursively clean schema parts to remove unsupported Google AI keywords."""
290
- if not isinstance(schema_part, dict):
291
- return
292
-
293
- # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
294
- # * Only a subset of the OpenAPI schema is supported.
295
- # * Supported parameter types in Python are limited.
296
- unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
297
- keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
298
- for key_to_remove in keys_to_remove_at_this_level:
299
- logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
300
- del schema_part[key_to_remove]
301
-
302
- if schema_part.get("type") == "string" and "format" in schema_part:
303
- allowed_formats = ["enum", "date-time"]
304
- if schema_part["format"] not in allowed_formats:
305
- logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
306
- del schema_part["format"]
307
-
308
- # Check properties within the current level
309
- if "properties" in schema_part and isinstance(schema_part["properties"], dict):
310
- for prop_name, prop_schema in schema_part["properties"].items():
311
- self._clean_google_ai_schema_properties(prop_schema)
312
-
313
- # Check items within arrays
314
- if "items" in schema_part and isinstance(schema_part["items"], dict):
315
- self._clean_google_ai_schema_properties(schema_part["items"])
316
-
317
- # Check within anyOf, allOf, oneOf lists
318
- for key in ["anyOf", "allOf", "oneOf"]:
319
- if key in schema_part and isinstance(schema_part[key], list):
320
- for item_schema in schema_part[key]:
321
- self._clean_google_ai_schema_properties(item_schema)
322
-
323
- def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
324
- """
325
- OpenAI style:
326
- "tools": [{
327
- "type": "function",
328
- "function": {
329
- "name": "find_movies",
330
- "description": "find ....",
331
- "parameters": {
332
- "type": "object",
333
- "properties": {
334
- PARAM: {
335
- "type": PARAM_TYPE, # eg "string"
336
- "description": PARAM_DESCRIPTION,
337
- },
338
- ...
339
- },
340
- "required": List[str],
341
- }
342
- }
343
- }
344
- ]
345
-
346
- Google AI style:
347
- "tools": [{
348
- "functionDeclarations": [{
349
- "name": "find_movies",
350
- "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
351
- "parameters": {
352
- "type": "OBJECT",
353
- "properties": {
354
- "location": {
355
- "type": "STRING",
356
- "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
357
- },
358
- "description": {
359
- "type": "STRING",
360
- "description": "Any kind of description including category or genre, title words, attributes, etc."
361
- }
362
- },
363
- "required": ["description"]
364
- }
365
- }, {
366
- "name": "find_theaters",
367
- ...
368
- """
369
- function_list = [
370
- dict(
371
- name=t.function.name,
372
- description=t.function.description,
373
- parameters=t.function.parameters, # TODO need to unpack
374
- )
375
- for t in tools
376
- ]
377
-
378
- # Add inner thoughts if needed
379
- for func in function_list:
380
- # Note: Google AI API used to have weird casing requirements, but not any more
381
-
382
- # Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
383
- if "parameters" in func and isinstance(func["parameters"], dict):
384
- self._clean_google_ai_schema_properties(func["parameters"])
385
-
386
- # Add inner thoughts
387
- if llm_config.put_inner_thoughts_in_kwargs:
388
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
389
-
390
- func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
391
- "type": "string",
392
- "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
393
- }
394
- func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
395
-
396
- return [{"functionDeclarations": function_list}]
397
-
398
- def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
399
- """Google AI API requires all function call returns are immediately followed by a 'model' role message.
400
-
401
- In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
402
- so there is no natural follow-up 'model' role message.
403
-
404
- To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
405
- with role == 'model' that is placed in-betweeen and function output
406
- (role == 'tool') and user message (role == 'user').
407
- """
408
- dummy_yield_message = {
409
- "role": "model",
410
- "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
411
- }
412
- messages_with_padding = []
413
- for i, message in enumerate(messages):
414
- messages_with_padding.append(message)
415
- # Check if the current message role is 'tool' and the next message role is 'user'
416
- if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
417
- messages_with_padding.append(dummy_yield_message)
418
-
419
- return messages_with_padding
17
+ def _get_client(self):
18
+ return genai.Client(api_key=model_settings.gemini_api_key)
420
19
 
421
20
 
422
21
  def get_gemini_endpoint_and_headers(
@@ -464,20 +63,111 @@ def google_ai_check_valid_api_key(api_key: str):
464
63
 
465
64
 
466
65
  def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
66
+ """Synchronous version to get model list from Google AI API using httpx."""
67
+ import httpx
68
+
69
+ from letta.utils import printd
70
+
71
+ url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
72
+
73
+ try:
74
+ with httpx.Client() as client:
75
+ response = client.get(url, headers=headers)
76
+ response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
77
+ response_data = response.json() # convert to dict from string
78
+
79
+ # Grab the models out
80
+ model_list = response_data["models"]
81
+ return model_list
82
+
83
+ except httpx.HTTPStatusError as http_err:
84
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
85
+ printd(f"Got HTTPError, exception={http_err}")
86
+ # Print the HTTP status code
87
+ print(f"HTTP Error: {http_err.response.status_code}")
88
+ # Print the response content (error message from server)
89
+ print(f"Message: {http_err.response.text}")
90
+ raise http_err
91
+
92
+ except httpx.RequestError as req_err:
93
+ # Handle other httpx-related errors (e.g., connection error)
94
+ printd(f"Got RequestException, exception={req_err}")
95
+ raise req_err
96
+
97
+ except Exception as e:
98
+ # Handle other potential errors
99
+ printd(f"Got unknown Exception, exception={e}")
100
+ raise e
101
+
102
+
103
+ async def google_ai_get_model_list_async(
104
+ base_url: str, api_key: str, key_in_header: bool = True, client: Optional[httpx.AsyncClient] = None
105
+ ) -> List[dict]:
106
+ """Asynchronous version to get model list from Google AI API using httpx."""
467
107
  from letta.utils import printd
468
108
 
469
109
  url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
470
110
 
111
+ # Determine if we need to close the client at the end
112
+ close_client = False
113
+ if client is None:
114
+ client = httpx.AsyncClient()
115
+ close_client = True
116
+
471
117
  try:
472
- response = requests.get(url, headers=headers)
473
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
474
- response = response.json() # convert to dict from string
118
+ response = await client.get(url, headers=headers)
119
+ response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
120
+ response_data = response.json() # convert to dict from string
475
121
 
476
122
  # Grab the models out
477
- model_list = response["models"]
123
+ model_list = response_data["models"]
478
124
  return model_list
479
125
 
480
- except requests.exceptions.HTTPError as http_err:
126
+ except httpx.HTTPStatusError as http_err:
127
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
128
+ printd(f"Got HTTPError, exception={http_err}")
129
+ # Print the HTTP status code
130
+ print(f"HTTP Error: {http_err.response.status_code}")
131
+ # Print the response content (error message from server)
132
+ print(f"Message: {http_err.response.text}")
133
+ raise http_err
134
+
135
+ except httpx.RequestError as req_err:
136
+ # Handle other httpx-related errors (e.g., connection error)
137
+ printd(f"Got RequestException, exception={req_err}")
138
+ raise req_err
139
+
140
+ except Exception as e:
141
+ # Handle other potential errors
142
+ printd(f"Got unknown Exception, exception={e}")
143
+ raise e
144
+
145
+ finally:
146
+ # Close the client if we created it
147
+ if close_client:
148
+ await client.aclose()
149
+
150
+
151
+ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> dict:
152
+ """Synchronous version to get model details from Google AI API using httpx."""
153
+ import httpx
154
+
155
+ from letta.utils import printd
156
+
157
+ url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
158
+
159
+ try:
160
+ with httpx.Client() as client:
161
+ response = client.get(url, headers=headers)
162
+ printd(f"response = {response}")
163
+ response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
164
+ response_data = response.json() # convert to dict from string
165
+ printd(f"response.json = {response_data}")
166
+
167
+ # Return the model details
168
+ return response_data
169
+
170
+ except httpx.HTTPStatusError as http_err:
481
171
  # Handle HTTP errors (e.g., response 4XX, 5XX)
482
172
  printd(f"Got HTTPError, exception={http_err}")
483
173
  # Print the HTTP status code
@@ -486,8 +176,8 @@ def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool =
486
176
  print(f"Message: {http_err.response.text}")
487
177
  raise http_err
488
178
 
489
- except requests.exceptions.RequestException as req_err:
490
- # Handle other requests-related errors (e.g., connection error)
179
+ except httpx.RequestError as req_err:
180
+ # Handle other httpx-related errors (e.g., connection error)
491
181
  printd(f"Got RequestException, exception={req_err}")
492
182
  raise req_err
493
183
 
@@ -497,22 +187,33 @@ def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool =
497
187
  raise e
498
188
 
499
189
 
500
- def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
190
+ async def google_ai_get_model_details_async(
191
+ base_url: str, api_key: str, model: str, key_in_header: bool = True, client: Optional[httpx.AsyncClient] = None
192
+ ) -> dict:
193
+ """Asynchronous version to get model details from Google AI API using httpx."""
194
+ import httpx
195
+
501
196
  from letta.utils import printd
502
197
 
503
198
  url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
504
199
 
200
+ # Determine if we need to close the client at the end
201
+ close_client = False
202
+ if client is None:
203
+ client = httpx.AsyncClient()
204
+ close_client = True
205
+
505
206
  try:
506
- response = requests.get(url, headers=headers)
207
+ response = await client.get(url, headers=headers)
507
208
  printd(f"response = {response}")
508
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
509
- response = response.json() # convert to dict from string
510
- printd(f"response.json = {response}")
209
+ response.raise_for_status() # Raises HTTPStatusError for 4XX/5XX status
210
+ response_data = response.json() # convert to dict from string
211
+ printd(f"response.json = {response_data}")
511
212
 
512
- # Grab the models out
513
- return response
213
+ # Return the model details
214
+ return response_data
514
215
 
515
- except requests.exceptions.HTTPError as http_err:
216
+ except httpx.HTTPStatusError as http_err:
516
217
  # Handle HTTP errors (e.g., response 4XX, 5XX)
517
218
  printd(f"Got HTTPError, exception={http_err}")
518
219
  # Print the HTTP status code
@@ -521,8 +222,8 @@ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_
521
222
  print(f"Message: {http_err.response.text}")
522
223
  raise http_err
523
224
 
524
- except requests.exceptions.RequestException as req_err:
525
- # Handle other requests-related errors (e.g., connection error)
225
+ except httpx.RequestError as req_err:
226
+ # Handle other httpx-related errors (e.g., connection error)
526
227
  printd(f"Got RequestException, exception={req_err}")
527
228
  raise req_err
528
229
 
@@ -531,9 +232,21 @@ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_
531
232
  printd(f"Got unknown Exception, exception={e}")
532
233
  raise e
533
234
 
235
+ finally:
236
+ # Close the client if we created it
237
+ if close_client:
238
+ await client.aclose()
239
+
534
240
 
535
241
  def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
536
242
  model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
537
243
  # TODO should this be:
538
244
  # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
539
245
  return int(model_details["inputTokenLimit"])
246
+
247
+
248
+ async def google_ai_get_model_context_window_async(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
249
+ model_details = await google_ai_get_model_details_async(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
250
+ # TODO should this be:
251
+ # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
252
+ return int(model_details["inputTokenLimit"])