letta-nightly 0.6.24.dev20250212104045__py3-none-any.whl → 0.6.25.dev20250213023354__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.6.24"
1
+ __version__ = "0.6.25"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -61,6 +61,7 @@ from letta.utils import (
61
61
  get_utc_time,
62
62
  json_dumps,
63
63
  json_loads,
64
+ log_telemetry,
64
65
  parse_json,
65
66
  printd,
66
67
  validate_function_response,
@@ -306,7 +307,7 @@ class Agent(BaseAgent):
306
307
  last_function_failed: bool = False,
307
308
  ) -> ChatCompletionResponse:
308
309
  """Get response from LLM API with robust retry mechanism."""
309
-
310
+ log_telemetry(self.logger, "_get_ai_reply start")
310
311
  allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names(last_function_response=self.last_function_response)
311
312
  agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
312
313
 
@@ -337,6 +338,7 @@ class Agent(BaseAgent):
337
338
 
338
339
  for attempt in range(1, empty_response_retry_limit + 1):
339
340
  try:
341
+ log_telemetry(self.logger, "_get_ai_reply create start")
340
342
  response = create(
341
343
  llm_config=self.agent_state.llm_config,
342
344
  messages=message_sequence,
@@ -349,6 +351,7 @@ class Agent(BaseAgent):
349
351
  stream=stream,
350
352
  stream_interface=self.interface,
351
353
  )
354
+ log_telemetry(self.logger, "_get_ai_reply create finish")
352
355
 
353
356
  # These bottom two are retryable
354
357
  if len(response.choices) == 0 or response.choices[0] is None:
@@ -360,12 +363,13 @@ class Agent(BaseAgent):
360
363
  raise RuntimeError("Finish reason was length (maximum context length)")
361
364
  else:
362
365
  raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
363
-
366
+ log_telemetry(self.logger, "_handle_ai_response finish")
364
367
  return response
365
368
 
366
369
  except ValueError as ve:
367
370
  if attempt >= empty_response_retry_limit:
368
371
  warnings.warn(f"Retry limit reached. Final error: {ve}")
372
+ log_telemetry(self.logger, "_handle_ai_response finish ValueError")
369
373
  raise Exception(f"Retries exhausted and no valid response received. Final error: {ve}")
370
374
  else:
371
375
  delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
@@ -374,8 +378,10 @@ class Agent(BaseAgent):
374
378
 
375
379
  except Exception as e:
376
380
  # For non-retryable errors, exit immediately
381
+ log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
377
382
  raise e
378
383
 
384
+ log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
379
385
  raise Exception("Retries exhausted and no valid response received.")
380
386
 
381
387
  def _handle_ai_response(
@@ -388,7 +394,7 @@ class Agent(BaseAgent):
388
394
  response_message_id: Optional[str] = None,
389
395
  ) -> Tuple[List[Message], bool, bool]:
390
396
  """Handles parsing and function execution"""
391
-
397
+ log_telemetry(self.logger, "_handle_ai_response start")
392
398
  # Hacky failsafe for now to make sure we didn't implement the streaming Message ID creation incorrectly
393
399
  if response_message_id is not None:
394
400
  assert response_message_id.startswith("message-"), response_message_id
@@ -506,7 +512,13 @@ class Agent(BaseAgent):
506
512
  self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1])
507
513
  try:
508
514
  # handle tool execution (sandbox) and state updates
515
+ log_telemetry(
516
+ self.logger, "_handle_ai_response execute tool start", function_name=function_name, function_args=function_args
517
+ )
509
518
  function_response, sandbox_run_result = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
519
+ log_telemetry(
520
+ self.logger, "_handle_ai_response execute tool finish", function_name=function_name, function_args=function_args
521
+ )
510
522
 
511
523
  if sandbox_run_result and sandbox_run_result.status == "error":
512
524
  messages = self._handle_function_error_response(
@@ -597,6 +609,7 @@ class Agent(BaseAgent):
597
609
  elif self.tool_rules_solver.is_terminal_tool(function_name):
598
610
  heartbeat_request = False
599
611
 
612
+ log_telemetry(self.logger, "_handle_ai_response finish")
600
613
  return messages, heartbeat_request, function_failed
601
614
 
602
615
  def step(
@@ -684,6 +697,9 @@ class Agent(BaseAgent):
684
697
  else:
685
698
  break
686
699
 
700
+ if self.agent_state.message_buffer_autoclear:
701
+ self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
702
+
687
703
  return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
688
704
 
689
705
  def inner_step(
letta/client/client.py CHANGED
@@ -73,6 +73,7 @@ class AbstractClient(object):
73
73
  metadata: Optional[Dict] = {"human:": DEFAULT_HUMAN, "persona": DEFAULT_PERSONA},
74
74
  description: Optional[str] = None,
75
75
  tags: Optional[List[str]] = None,
76
+ message_buffer_autoclear: bool = False,
76
77
  ) -> AgentState:
77
78
  raise NotImplementedError
78
79
 
@@ -540,6 +541,7 @@ class RESTClient(AbstractClient):
540
541
  description: Optional[str] = None,
541
542
  initial_message_sequence: Optional[List[Message]] = None,
542
543
  tags: Optional[List[str]] = None,
544
+ message_buffer_autoclear: bool = False,
543
545
  ) -> AgentState:
544
546
  """Create an agent
545
547
 
@@ -600,6 +602,7 @@ class RESTClient(AbstractClient):
600
602
  "initial_message_sequence": initial_message_sequence,
601
603
  "tags": tags,
602
604
  "include_base_tools": include_base_tools,
605
+ "message_buffer_autoclear": message_buffer_autoclear,
603
606
  }
604
607
 
605
608
  # Only add name if it's not None
@@ -2353,6 +2356,7 @@ class LocalClient(AbstractClient):
2353
2356
  description: Optional[str] = None,
2354
2357
  initial_message_sequence: Optional[List[Message]] = None,
2355
2358
  tags: Optional[List[str]] = None,
2359
+ message_buffer_autoclear: bool = False,
2356
2360
  ) -> AgentState:
2357
2361
  """Create an agent
2358
2362
 
@@ -2404,6 +2408,7 @@ class LocalClient(AbstractClient):
2404
2408
  "embedding_config": embedding_config if embedding_config else self._default_embedding_config,
2405
2409
  "initial_message_sequence": initial_message_sequence,
2406
2410
  "tags": tags,
2411
+ "message_buffer_autoclear": message_buffer_autoclear,
2407
2412
  }
2408
2413
 
2409
2414
  # Only add name if it's not None
letta/embeddings.py CHANGED
@@ -188,6 +188,19 @@ class GoogleEmbeddings:
188
188
  return response_json["embedding"]["values"]
189
189
 
190
190
 
191
+ class GoogleVertexEmbeddings:
192
+
193
+ def __init__(self, model: str, project_id: str, region: str):
194
+ from google import genai
195
+
196
+ self.client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
197
+ self.model = model
198
+
199
+ def get_text_embedding(self, text: str):
200
+ response = self.client.generate_embeddings(content=text, model=self.model)
201
+ return response.embeddings[0].embedding
202
+
203
+
191
204
  def query_embedding(embedding_model, query_text: str):
192
205
  """Generate padded embedding for querying database"""
193
206
  query_vec = embedding_model.get_text_embedding(query_text)
@@ -267,5 +280,13 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
267
280
  )
268
281
  return model
269
282
 
283
+ elif endpoint_type == "google_vertex":
284
+ model = GoogleVertexEmbeddings(
285
+ model=config.embedding_model,
286
+ api_key=model_settings.gemini_api_key,
287
+ base_url=model_settings.gemini_base_url,
288
+ )
289
+ return model
290
+
270
291
  else:
271
292
  raise ValueError(f"Unknown endpoint type {endpoint_type}")
@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
17
17
  from letta.schemas.user import User
18
18
  from letta.server.rest_api.utils import get_letta_server
19
19
  from letta.settings import settings
20
+ from letta.utils import log_telemetry
20
21
 
21
22
 
22
23
  # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -341,10 +342,16 @@ async def async_send_message_with_retries(
341
342
  timeout: int,
342
343
  logging_prefix: Optional[str] = None,
343
344
  ) -> str:
344
-
345
345
  logging_prefix = logging_prefix or "[async_send_message_with_retries]"
346
+ log_telemetry(sender_agent.logger, f"async_send_message_with_retries start", target_agent_id=target_agent_id)
347
+
346
348
  for attempt in range(1, max_retries + 1):
347
349
  try:
350
+ log_telemetry(
351
+ sender_agent.logger,
352
+ f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream start",
353
+ target_agent_id=target_agent_id,
354
+ )
348
355
  response = await asyncio.wait_for(
349
356
  send_message_to_agent_no_stream(
350
357
  server=server,
@@ -354,15 +361,24 @@ async def async_send_message_with_retries(
354
361
  ),
355
362
  timeout=timeout,
356
363
  )
364
+ log_telemetry(
365
+ sender_agent.logger,
366
+ f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream finish",
367
+ target_agent_id=target_agent_id,
368
+ )
357
369
 
358
370
  # Then parse out the assistant message
359
371
  assistant_message = parse_letta_response_for_assistant_message(target_agent_id, response)
360
372
  if assistant_message:
361
373
  sender_agent.logger.info(f"{logging_prefix} - {assistant_message}")
374
+ log_telemetry(
375
+ sender_agent.logger, f"async_send_message_with_retries finish with assistant message", target_agent_id=target_agent_id
376
+ )
362
377
  return assistant_message
363
378
  else:
364
379
  msg = f"(No response from agent {target_agent_id})"
365
380
  sender_agent.logger.info(f"{logging_prefix} - {msg}")
381
+ log_telemetry(sender_agent.logger, f"async_send_message_with_retries finish no response", target_agent_id=target_agent_id)
366
382
  return msg
367
383
 
368
384
  except asyncio.TimeoutError:
@@ -380,6 +396,12 @@ async def async_send_message_with_retries(
380
396
  await asyncio.sleep(backoff)
381
397
  else:
382
398
  sender_agent.logger.error(f"{logging_prefix} - Fatal error: {error_msg}")
399
+ log_telemetry(
400
+ sender_agent.logger,
401
+ f"async_send_message_with_retries finish fatal error",
402
+ target_agent_id=target_agent_id,
403
+ error_msg=error_msg,
404
+ )
383
405
  raise Exception(error_msg)
384
406
 
385
407
 
@@ -468,6 +490,7 @@ def fire_and_forget_send_to_agent(
468
490
 
469
491
 
470
492
  async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent", message: str, tags: List[str]) -> List[str]:
493
+ log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async start", message=message, tags=tags)
471
494
  server = get_letta_server()
472
495
 
473
496
  augmented_message = (
@@ -477,7 +500,9 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
477
500
  )
478
501
 
479
502
  # Retrieve up to 100 matching agents
503
+ log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents start", message=message, tags=tags)
480
504
  matching_agents = server.agent_manager.list_agents(actor=sender_agent.user, tags=tags, match_all_tags=True, limit=100)
505
+ log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents finish", message=message, tags=tags)
481
506
 
482
507
  # Create a system message
483
508
  messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=sender_agent.agent_state.name)]
@@ -504,4 +529,6 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
504
529
  final.append(str(r))
505
530
  else:
506
531
  final.append(r)
532
+
533
+ log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async finish", message=message, tags=tags)
507
534
  return final
@@ -0,0 +1,328 @@
1
+ import uuid
2
+ from typing import List, Optional
3
+
4
+ from letta.constants import NON_USER_MSG_PREFIX
5
+ from letta.local_llm.json_parser import clean_json_string_extra_backslash
6
+ from letta.local_llm.utils import count_tokens
7
+ from letta.schemas.openai.chat_completion_request import Tool
8
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
9
+ from letta.utils import get_tool_call_id, get_utc_time, json_dumps
10
+
11
+
12
+ def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
13
+ """Google AI API requires all function call returns are immediately followed by a 'model' role message.
14
+
15
+ In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
16
+ so there is no natural follow-up 'model' role message.
17
+
18
+ To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
19
+ with role == 'model' that is placed in-betweeen and function output
20
+ (role == 'tool') and user message (role == 'user').
21
+ """
22
+ dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]}
23
+ messages_with_padding = []
24
+ for i, message in enumerate(messages):
25
+ messages_with_padding.append(message)
26
+ # Check if the current message role is 'tool' and the next message role is 'user'
27
+ if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
28
+ messages_with_padding.append(dummy_yield_message)
29
+
30
+ return messages_with_padding
31
+
32
+
33
+ # TODO use pydantic model as input
34
+ def to_google_ai(openai_message_dict: dict) -> dict:
35
+
36
+ # TODO supports "parts" as part of multimodal support
37
+ assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported"
38
+ if openai_message_dict["role"] == "user":
39
+ google_ai_message_dict = {
40
+ "role": "user",
41
+ "parts": [{"text": openai_message_dict["content"]}],
42
+ }
43
+ elif openai_message_dict["role"] == "assistant":
44
+ google_ai_message_dict = {
45
+ "role": "model", # NOTE: diff
46
+ "parts": [{"text": openai_message_dict["content"]}],
47
+ }
48
+ elif openai_message_dict["role"] == "tool":
49
+ google_ai_message_dict = {
50
+ "role": "function", # NOTE: diff
51
+ "parts": [{"text": openai_message_dict["content"]}],
52
+ }
53
+ else:
54
+ raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}")
55
+
56
+
57
+ # TODO convert return type to pydantic
58
+ def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
59
+ """
60
+ OpenAI style:
61
+ "tools": [{
62
+ "type": "function",
63
+ "function": {
64
+ "name": "find_movies",
65
+ "description": "find ....",
66
+ "parameters": {
67
+ "type": "object",
68
+ "properties": {
69
+ PARAM: {
70
+ "type": PARAM_TYPE, # eg "string"
71
+ "description": PARAM_DESCRIPTION,
72
+ },
73
+ ...
74
+ },
75
+ "required": List[str],
76
+ }
77
+ }
78
+ }
79
+ ]
80
+
81
+ Google AI style:
82
+ "tools": [{
83
+ "functionDeclarations": [{
84
+ "name": "find_movies",
85
+ "description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
86
+ "parameters": {
87
+ "type": "OBJECT",
88
+ "properties": {
89
+ "location": {
90
+ "type": "STRING",
91
+ "description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
92
+ },
93
+ "description": {
94
+ "type": "STRING",
95
+ "description": "Any kind of description including category or genre, title words, attributes, etc."
96
+ }
97
+ },
98
+ "required": ["description"]
99
+ }
100
+ }, {
101
+ "name": "find_theaters",
102
+ ...
103
+ """
104
+ function_list = [
105
+ dict(
106
+ name=t.function.name,
107
+ description=t.function.description,
108
+ parameters=t.function.parameters, # TODO need to unpack
109
+ )
110
+ for t in tools
111
+ ]
112
+
113
+ # Correct casing + add inner thoughts if needed
114
+ for func in function_list:
115
+ func["parameters"]["type"] = "OBJECT"
116
+ for param_name, param_fields in func["parameters"]["properties"].items():
117
+ param_fields["type"] = param_fields["type"].upper()
118
+ # Add inner thoughts
119
+ if inner_thoughts_in_kwargs:
120
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
121
+
122
+ func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
123
+ "type": "STRING",
124
+ "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
125
+ }
126
+ func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
127
+
128
+ return [{"functionDeclarations": function_list}]
129
+
130
+
131
+ def convert_google_ai_response_to_chatcompletion(
132
+ response,
133
+ model: str, # Required since not returned
134
+ input_messages: Optional[List[dict]] = None, # Required if the API doesn't return UsageMetadata
135
+ pull_inner_thoughts_from_args: Optional[bool] = True,
136
+ ) -> ChatCompletionResponse:
137
+ """Google AI API response format is not the same as ChatCompletion, requires unpacking
138
+
139
+ Example:
140
+ {
141
+ "candidates": [
142
+ {
143
+ "content": {
144
+ "parts": [
145
+ {
146
+ "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
147
+ }
148
+ ]
149
+ }
150
+ }
151
+ ],
152
+ "usageMetadata": {
153
+ "promptTokenCount": 9,
154
+ "candidatesTokenCount": 27,
155
+ "totalTokenCount": 36
156
+ }
157
+ }
158
+ """
159
+ try:
160
+ choices = []
161
+ index = 0
162
+ for candidate in response.candidates:
163
+ content = candidate.content
164
+
165
+ role = content.role
166
+ assert role == "model", f"Unknown role in response: {role}"
167
+
168
+ parts = content.parts
169
+ # TODO support parts / multimodal
170
+ # TODO support parallel tool calling natively
171
+ # TODO Alternative here is to throw away everything else except for the first part
172
+ for response_message in parts:
173
+ # Convert the actual message style to OpenAI style
174
+ if response_message.function_call:
175
+ function_call = response_message.function_call
176
+ function_name = function_call.name
177
+ function_args = function_call.args
178
+ assert isinstance(function_args, dict), function_args
179
+
180
+ # NOTE: this also involves stripping the inner monologue out of the function
181
+ if pull_inner_thoughts_from_args:
182
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
183
+
184
+ assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
185
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
186
+ assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
187
+ else:
188
+ inner_thoughts = None
189
+
190
+ # Google AI API doesn't generate tool call IDs
191
+ openai_response_message = Message(
192
+ role="assistant", # NOTE: "model" -> "assistant"
193
+ content=inner_thoughts,
194
+ tool_calls=[
195
+ ToolCall(
196
+ id=get_tool_call_id(),
197
+ type="function",
198
+ function=FunctionCall(
199
+ name=function_name,
200
+ arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
201
+ ),
202
+ )
203
+ ],
204
+ )
205
+
206
+ else:
207
+
208
+ # Inner thoughts are the content by default
209
+ inner_thoughts = response_message.text
210
+
211
+ # Google AI API doesn't generate tool call IDs
212
+ openai_response_message = Message(
213
+ role="assistant", # NOTE: "model" -> "assistant"
214
+ content=inner_thoughts,
215
+ )
216
+
217
+ # Google AI API uses different finish reason strings than OpenAI
218
+ # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
219
+ # see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
220
+ # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
221
+ # see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
222
+ finish_reason = candidate.finish_reason.value
223
+ if finish_reason == "STOP":
224
+ openai_finish_reason = (
225
+ "function_call"
226
+ if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
227
+ else "stop"
228
+ )
229
+ elif finish_reason == "MAX_TOKENS":
230
+ openai_finish_reason = "length"
231
+ elif finish_reason == "SAFETY":
232
+ openai_finish_reason = "content_filter"
233
+ elif finish_reason == "RECITATION":
234
+ openai_finish_reason = "content_filter"
235
+ else:
236
+ raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
237
+
238
+ choices.append(
239
+ Choice(
240
+ finish_reason=openai_finish_reason,
241
+ index=index,
242
+ message=openai_response_message,
243
+ )
244
+ )
245
+ index += 1
246
+
247
+ # if len(choices) > 1:
248
+ # raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
249
+
250
+ # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
251
+ # "usageMetadata": {
252
+ # "promptTokenCount": 9,
253
+ # "candidatesTokenCount": 27,
254
+ # "totalTokenCount": 36
255
+ # }
256
+ if response.usage_metadata:
257
+ usage = UsageStatistics(
258
+ prompt_tokens=response.usage_metadata.prompt_token_count,
259
+ completion_tokens=response.usage_metadata.candidates_token_count,
260
+ total_tokens=response.usage_metadata.total_token_count,
261
+ )
262
+ else:
263
+ # Count it ourselves
264
+ assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
265
+ prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
266
+ completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
267
+ total_tokens = prompt_tokens + completion_tokens
268
+ usage = UsageStatistics(
269
+ prompt_tokens=prompt_tokens,
270
+ completion_tokens=completion_tokens,
271
+ total_tokens=total_tokens,
272
+ )
273
+
274
+ response_id = str(uuid.uuid4())
275
+ return ChatCompletionResponse(
276
+ id=response_id,
277
+ choices=choices,
278
+ model=model, # NOTE: Google API doesn't pass back model in the response
279
+ created=get_utc_time(),
280
+ usage=usage,
281
+ )
282
+ except KeyError as e:
283
+ raise e
284
+
285
+
286
+ # TODO convert 'data' type to pydantic
287
+ def google_vertex_chat_completions_request(
288
+ model: str,
289
+ project_id: str,
290
+ region: str,
291
+ contents: List[dict],
292
+ config: dict,
293
+ add_postfunc_model_messages: bool = True,
294
+ # NOTE: Google AI API doesn't support mixing parts 'text' and 'function',
295
+ # so there's no clean way to put inner thoughts in the same message as a function call
296
+ inner_thoughts_in_kwargs: bool = True,
297
+ ) -> ChatCompletionResponse:
298
+ """https://ai.google.dev/docs/function_calling
299
+
300
+ From https://ai.google.dev/api/rest#service-endpoint:
301
+ "A service endpoint is a base URL that specifies the network address of an API service.
302
+ One service might have multiple service endpoints.
303
+ This service has the following service endpoint and all URIs below are relative to this service endpoint:
304
+ https://xxx.googleapis.com
305
+ """
306
+
307
+ from google import genai
308
+
309
+ client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
310
+ # add dummy model messages to the end of the input
311
+ if add_postfunc_model_messages:
312
+ contents = add_dummy_model_messages(contents)
313
+
314
+ # make request to client
315
+ response = client.models.generate_content(model=model, contents=contents, config=config)
316
+ print(response)
317
+
318
+ # convert back response
319
+ try:
320
+ return convert_google_ai_response_to_chatcompletion(
321
+ response=response,
322
+ model=model,
323
+ input_messages=contents,
324
+ pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
325
+ )
326
+ except Exception as conversion_error:
327
+ print(f"Error during response conversion: {conversion_error}")
328
+ raise conversion_error
@@ -252,6 +252,32 @@ def create(
252
252
  inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
253
253
  )
254
254
 
255
+ elif llm_config.model_endpoint_type == "google_vertex":
256
+ from letta.llm_api.google_vertex import google_vertex_chat_completions_request
257
+
258
+ if stream:
259
+ raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
260
+ if not use_tool_naming:
261
+ raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests")
262
+
263
+ if functions is not None:
264
+ tools = [{"type": "function", "function": f} for f in functions]
265
+ tools = [Tool(**t) for t in tools]
266
+ tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
267
+ else:
268
+ tools = None
269
+
270
+ config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}
271
+
272
+ return google_vertex_chat_completions_request(
273
+ model=llm_config.model,
274
+ project_id=model_settings.google_cloud_project,
275
+ region=model_settings.google_cloud_location,
276
+ contents=[m.to_google_ai_dict() for m in messages],
277
+ config=config,
278
+ inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
279
+ )
280
+
255
281
  elif llm_config.model_endpoint_type == "anthropic":
256
282
  if not use_tool_naming:
257
283
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
letta/orm/agent.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import uuid
2
2
  from typing import TYPE_CHECKING, List, Optional
3
3
 
4
- from sqlalchemy import JSON, Index, String
4
+ from sqlalchemy import JSON, Boolean, Index, String
5
5
  from sqlalchemy.orm import Mapped, mapped_column, relationship
6
6
 
7
7
  from letta.orm.block import Block
@@ -62,6 +62,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
62
62
  # Tool rules
63
63
  tool_rules: Mapped[Optional[List[ToolRule]]] = mapped_column(ToolRulesColumn, doc="the tool rules for this agent.")
64
64
 
65
+ # Stateless
66
+ message_buffer_autoclear: Mapped[bool] = mapped_column(
67
+ Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
68
+ )
69
+
65
70
  # relationships
66
71
  organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
67
72
  tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
@@ -146,6 +151,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
146
151
  "project_id": self.project_id,
147
152
  "template_id": self.template_id,
148
153
  "base_template_id": self.base_template_id,
154
+ "message_buffer_autoclear": self.message_buffer_autoclear,
149
155
  }
150
156
 
151
157
  return self.__pydantic_model__(**state)