letta-nightly 0.6.24.dev20250212104045__py3-none-any.whl → 0.6.25.dev20250213104102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +19 -3
- letta/client/client.py +5 -0
- letta/embeddings.py +21 -0
- letta/functions/helpers.py +28 -1
- letta/llm_api/google_vertex.py +328 -0
- letta/llm_api/llm_api_tools.py +26 -0
- letta/orm/agent.py +7 -1
- letta/schemas/agent.py +14 -1
- letta/schemas/embedding_config.py +1 -0
- letta/schemas/llm_config.py +1 -0
- letta/schemas/message.py +0 -11
- letta/schemas/providers.py +42 -3
- letta/server/rest_api/routers/v1/tools.py +15 -2
- letta/server/server.py +10 -4
- letta/services/agent_manager.py +5 -0
- letta/services/message_manager.py +89 -64
- letta/settings.py +8 -0
- letta/utils.py +17 -0
- {letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/METADATA +3 -2
- {letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/RECORD +24 -23
- {letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.24.dev20250212104045.dist-info → letta_nightly-0.6.25.dev20250213104102.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
|
@@ -61,6 +61,7 @@ from letta.utils import (
|
|
|
61
61
|
get_utc_time,
|
|
62
62
|
json_dumps,
|
|
63
63
|
json_loads,
|
|
64
|
+
log_telemetry,
|
|
64
65
|
parse_json,
|
|
65
66
|
printd,
|
|
66
67
|
validate_function_response,
|
|
@@ -306,7 +307,7 @@ class Agent(BaseAgent):
|
|
|
306
307
|
last_function_failed: bool = False,
|
|
307
308
|
) -> ChatCompletionResponse:
|
|
308
309
|
"""Get response from LLM API with robust retry mechanism."""
|
|
309
|
-
|
|
310
|
+
log_telemetry(self.logger, "_get_ai_reply start")
|
|
310
311
|
allowed_tool_names = self.tool_rules_solver.get_allowed_tool_names(last_function_response=self.last_function_response)
|
|
311
312
|
agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
|
|
312
313
|
|
|
@@ -337,6 +338,7 @@ class Agent(BaseAgent):
|
|
|
337
338
|
|
|
338
339
|
for attempt in range(1, empty_response_retry_limit + 1):
|
|
339
340
|
try:
|
|
341
|
+
log_telemetry(self.logger, "_get_ai_reply create start")
|
|
340
342
|
response = create(
|
|
341
343
|
llm_config=self.agent_state.llm_config,
|
|
342
344
|
messages=message_sequence,
|
|
@@ -349,6 +351,7 @@ class Agent(BaseAgent):
|
|
|
349
351
|
stream=stream,
|
|
350
352
|
stream_interface=self.interface,
|
|
351
353
|
)
|
|
354
|
+
log_telemetry(self.logger, "_get_ai_reply create finish")
|
|
352
355
|
|
|
353
356
|
# These bottom two are retryable
|
|
354
357
|
if len(response.choices) == 0 or response.choices[0] is None:
|
|
@@ -360,12 +363,13 @@ class Agent(BaseAgent):
|
|
|
360
363
|
raise RuntimeError("Finish reason was length (maximum context length)")
|
|
361
364
|
else:
|
|
362
365
|
raise ValueError(f"Bad finish reason from API: {response.choices[0].finish_reason}")
|
|
363
|
-
|
|
366
|
+
log_telemetry(self.logger, "_handle_ai_response finish")
|
|
364
367
|
return response
|
|
365
368
|
|
|
366
369
|
except ValueError as ve:
|
|
367
370
|
if attempt >= empty_response_retry_limit:
|
|
368
371
|
warnings.warn(f"Retry limit reached. Final error: {ve}")
|
|
372
|
+
log_telemetry(self.logger, "_handle_ai_response finish ValueError")
|
|
369
373
|
raise Exception(f"Retries exhausted and no valid response received. Final error: {ve}")
|
|
370
374
|
else:
|
|
371
375
|
delay = min(backoff_factor * (2 ** (attempt - 1)), max_delay)
|
|
@@ -374,8 +378,10 @@ class Agent(BaseAgent):
|
|
|
374
378
|
|
|
375
379
|
except Exception as e:
|
|
376
380
|
# For non-retryable errors, exit immediately
|
|
381
|
+
log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
|
|
377
382
|
raise e
|
|
378
383
|
|
|
384
|
+
log_telemetry(self.logger, "_handle_ai_response finish catch-all exception")
|
|
379
385
|
raise Exception("Retries exhausted and no valid response received.")
|
|
380
386
|
|
|
381
387
|
def _handle_ai_response(
|
|
@@ -388,7 +394,7 @@ class Agent(BaseAgent):
|
|
|
388
394
|
response_message_id: Optional[str] = None,
|
|
389
395
|
) -> Tuple[List[Message], bool, bool]:
|
|
390
396
|
"""Handles parsing and function execution"""
|
|
391
|
-
|
|
397
|
+
log_telemetry(self.logger, "_handle_ai_response start")
|
|
392
398
|
# Hacky failsafe for now to make sure we didn't implement the streaming Message ID creation incorrectly
|
|
393
399
|
if response_message_id is not None:
|
|
394
400
|
assert response_message_id.startswith("message-"), response_message_id
|
|
@@ -506,7 +512,13 @@ class Agent(BaseAgent):
|
|
|
506
512
|
self.interface.function_message(f"Running {function_name}({function_args})", msg_obj=messages[-1])
|
|
507
513
|
try:
|
|
508
514
|
# handle tool execution (sandbox) and state updates
|
|
515
|
+
log_telemetry(
|
|
516
|
+
self.logger, "_handle_ai_response execute tool start", function_name=function_name, function_args=function_args
|
|
517
|
+
)
|
|
509
518
|
function_response, sandbox_run_result = self.execute_tool_and_persist_state(function_name, function_args, target_letta_tool)
|
|
519
|
+
log_telemetry(
|
|
520
|
+
self.logger, "_handle_ai_response execute tool finish", function_name=function_name, function_args=function_args
|
|
521
|
+
)
|
|
510
522
|
|
|
511
523
|
if sandbox_run_result and sandbox_run_result.status == "error":
|
|
512
524
|
messages = self._handle_function_error_response(
|
|
@@ -597,6 +609,7 @@ class Agent(BaseAgent):
|
|
|
597
609
|
elif self.tool_rules_solver.is_terminal_tool(function_name):
|
|
598
610
|
heartbeat_request = False
|
|
599
611
|
|
|
612
|
+
log_telemetry(self.logger, "_handle_ai_response finish")
|
|
600
613
|
return messages, heartbeat_request, function_failed
|
|
601
614
|
|
|
602
615
|
def step(
|
|
@@ -684,6 +697,9 @@ class Agent(BaseAgent):
|
|
|
684
697
|
else:
|
|
685
698
|
break
|
|
686
699
|
|
|
700
|
+
if self.agent_state.message_buffer_autoclear:
|
|
701
|
+
self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
|
|
702
|
+
|
|
687
703
|
return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count)
|
|
688
704
|
|
|
689
705
|
def inner_step(
|
letta/client/client.py
CHANGED
|
@@ -73,6 +73,7 @@ class AbstractClient(object):
|
|
|
73
73
|
metadata: Optional[Dict] = {"human:": DEFAULT_HUMAN, "persona": DEFAULT_PERSONA},
|
|
74
74
|
description: Optional[str] = None,
|
|
75
75
|
tags: Optional[List[str]] = None,
|
|
76
|
+
message_buffer_autoclear: bool = False,
|
|
76
77
|
) -> AgentState:
|
|
77
78
|
raise NotImplementedError
|
|
78
79
|
|
|
@@ -540,6 +541,7 @@ class RESTClient(AbstractClient):
|
|
|
540
541
|
description: Optional[str] = None,
|
|
541
542
|
initial_message_sequence: Optional[List[Message]] = None,
|
|
542
543
|
tags: Optional[List[str]] = None,
|
|
544
|
+
message_buffer_autoclear: bool = False,
|
|
543
545
|
) -> AgentState:
|
|
544
546
|
"""Create an agent
|
|
545
547
|
|
|
@@ -600,6 +602,7 @@ class RESTClient(AbstractClient):
|
|
|
600
602
|
"initial_message_sequence": initial_message_sequence,
|
|
601
603
|
"tags": tags,
|
|
602
604
|
"include_base_tools": include_base_tools,
|
|
605
|
+
"message_buffer_autoclear": message_buffer_autoclear,
|
|
603
606
|
}
|
|
604
607
|
|
|
605
608
|
# Only add name if it's not None
|
|
@@ -2353,6 +2356,7 @@ class LocalClient(AbstractClient):
|
|
|
2353
2356
|
description: Optional[str] = None,
|
|
2354
2357
|
initial_message_sequence: Optional[List[Message]] = None,
|
|
2355
2358
|
tags: Optional[List[str]] = None,
|
|
2359
|
+
message_buffer_autoclear: bool = False,
|
|
2356
2360
|
) -> AgentState:
|
|
2357
2361
|
"""Create an agent
|
|
2358
2362
|
|
|
@@ -2404,6 +2408,7 @@ class LocalClient(AbstractClient):
|
|
|
2404
2408
|
"embedding_config": embedding_config if embedding_config else self._default_embedding_config,
|
|
2405
2409
|
"initial_message_sequence": initial_message_sequence,
|
|
2406
2410
|
"tags": tags,
|
|
2411
|
+
"message_buffer_autoclear": message_buffer_autoclear,
|
|
2407
2412
|
}
|
|
2408
2413
|
|
|
2409
2414
|
# Only add name if it's not None
|
letta/embeddings.py
CHANGED
|
@@ -188,6 +188,19 @@ class GoogleEmbeddings:
|
|
|
188
188
|
return response_json["embedding"]["values"]
|
|
189
189
|
|
|
190
190
|
|
|
191
|
+
class GoogleVertexEmbeddings:
|
|
192
|
+
|
|
193
|
+
def __init__(self, model: str, project_id: str, region: str):
|
|
194
|
+
from google import genai
|
|
195
|
+
|
|
196
|
+
self.client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
|
|
197
|
+
self.model = model
|
|
198
|
+
|
|
199
|
+
def get_text_embedding(self, text: str):
|
|
200
|
+
response = self.client.generate_embeddings(content=text, model=self.model)
|
|
201
|
+
return response.embeddings[0].embedding
|
|
202
|
+
|
|
203
|
+
|
|
191
204
|
def query_embedding(embedding_model, query_text: str):
|
|
192
205
|
"""Generate padded embedding for querying database"""
|
|
193
206
|
query_vec = embedding_model.get_text_embedding(query_text)
|
|
@@ -267,5 +280,13 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
|
|
|
267
280
|
)
|
|
268
281
|
return model
|
|
269
282
|
|
|
283
|
+
elif endpoint_type == "google_vertex":
|
|
284
|
+
model = GoogleVertexEmbeddings(
|
|
285
|
+
model=config.embedding_model,
|
|
286
|
+
api_key=model_settings.gemini_api_key,
|
|
287
|
+
base_url=model_settings.gemini_base_url,
|
|
288
|
+
)
|
|
289
|
+
return model
|
|
290
|
+
|
|
270
291
|
else:
|
|
271
292
|
raise ValueError(f"Unknown endpoint type {endpoint_type}")
|
letta/functions/helpers.py
CHANGED
|
@@ -17,6 +17,7 @@ from letta.schemas.message import Message, MessageCreate
|
|
|
17
17
|
from letta.schemas.user import User
|
|
18
18
|
from letta.server.rest_api.utils import get_letta_server
|
|
19
19
|
from letta.settings import settings
|
|
20
|
+
from letta.utils import log_telemetry
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
# TODO: This is kind of hacky, as this is used to search up the action later on composio's side
|
|
@@ -341,10 +342,16 @@ async def async_send_message_with_retries(
|
|
|
341
342
|
timeout: int,
|
|
342
343
|
logging_prefix: Optional[str] = None,
|
|
343
344
|
) -> str:
|
|
344
|
-
|
|
345
345
|
logging_prefix = logging_prefix or "[async_send_message_with_retries]"
|
|
346
|
+
log_telemetry(sender_agent.logger, f"async_send_message_with_retries start", target_agent_id=target_agent_id)
|
|
347
|
+
|
|
346
348
|
for attempt in range(1, max_retries + 1):
|
|
347
349
|
try:
|
|
350
|
+
log_telemetry(
|
|
351
|
+
sender_agent.logger,
|
|
352
|
+
f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream start",
|
|
353
|
+
target_agent_id=target_agent_id,
|
|
354
|
+
)
|
|
348
355
|
response = await asyncio.wait_for(
|
|
349
356
|
send_message_to_agent_no_stream(
|
|
350
357
|
server=server,
|
|
@@ -354,15 +361,24 @@ async def async_send_message_with_retries(
|
|
|
354
361
|
),
|
|
355
362
|
timeout=timeout,
|
|
356
363
|
)
|
|
364
|
+
log_telemetry(
|
|
365
|
+
sender_agent.logger,
|
|
366
|
+
f"async_send_message_with_retries -> asyncio wait for send_message_to_agent_no_stream finish",
|
|
367
|
+
target_agent_id=target_agent_id,
|
|
368
|
+
)
|
|
357
369
|
|
|
358
370
|
# Then parse out the assistant message
|
|
359
371
|
assistant_message = parse_letta_response_for_assistant_message(target_agent_id, response)
|
|
360
372
|
if assistant_message:
|
|
361
373
|
sender_agent.logger.info(f"{logging_prefix} - {assistant_message}")
|
|
374
|
+
log_telemetry(
|
|
375
|
+
sender_agent.logger, f"async_send_message_with_retries finish with assistant message", target_agent_id=target_agent_id
|
|
376
|
+
)
|
|
362
377
|
return assistant_message
|
|
363
378
|
else:
|
|
364
379
|
msg = f"(No response from agent {target_agent_id})"
|
|
365
380
|
sender_agent.logger.info(f"{logging_prefix} - {msg}")
|
|
381
|
+
log_telemetry(sender_agent.logger, f"async_send_message_with_retries finish no response", target_agent_id=target_agent_id)
|
|
366
382
|
return msg
|
|
367
383
|
|
|
368
384
|
except asyncio.TimeoutError:
|
|
@@ -380,6 +396,12 @@ async def async_send_message_with_retries(
|
|
|
380
396
|
await asyncio.sleep(backoff)
|
|
381
397
|
else:
|
|
382
398
|
sender_agent.logger.error(f"{logging_prefix} - Fatal error: {error_msg}")
|
|
399
|
+
log_telemetry(
|
|
400
|
+
sender_agent.logger,
|
|
401
|
+
f"async_send_message_with_retries finish fatal error",
|
|
402
|
+
target_agent_id=target_agent_id,
|
|
403
|
+
error_msg=error_msg,
|
|
404
|
+
)
|
|
383
405
|
raise Exception(error_msg)
|
|
384
406
|
|
|
385
407
|
|
|
@@ -468,6 +490,7 @@ def fire_and_forget_send_to_agent(
|
|
|
468
490
|
|
|
469
491
|
|
|
470
492
|
async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent", message: str, tags: List[str]) -> List[str]:
|
|
493
|
+
log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async start", message=message, tags=tags)
|
|
471
494
|
server = get_letta_server()
|
|
472
495
|
|
|
473
496
|
augmented_message = (
|
|
@@ -477,7 +500,9 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
|
|
|
477
500
|
)
|
|
478
501
|
|
|
479
502
|
# Retrieve up to 100 matching agents
|
|
503
|
+
log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents start", message=message, tags=tags)
|
|
480
504
|
matching_agents = server.agent_manager.list_agents(actor=sender_agent.user, tags=tags, match_all_tags=True, limit=100)
|
|
505
|
+
log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async listing agents finish", message=message, tags=tags)
|
|
481
506
|
|
|
482
507
|
# Create a system message
|
|
483
508
|
messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=sender_agent.agent_state.name)]
|
|
@@ -504,4 +529,6 @@ async def _send_message_to_agents_matching_all_tags_async(sender_agent: "Agent",
|
|
|
504
529
|
final.append(str(r))
|
|
505
530
|
else:
|
|
506
531
|
final.append(r)
|
|
532
|
+
|
|
533
|
+
log_telemetry(sender_agent.logger, "_send_message_to_agents_matching_all_tags_async finish", message=message, tags=tags)
|
|
507
534
|
return final
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from letta.constants import NON_USER_MSG_PREFIX
|
|
5
|
+
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
6
|
+
from letta.local_llm.utils import count_tokens
|
|
7
|
+
from letta.schemas.openai.chat_completion_request import Tool
|
|
8
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
|
9
|
+
from letta.utils import get_tool_call_id, get_utc_time, json_dumps
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
|
|
13
|
+
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
|
14
|
+
|
|
15
|
+
In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
|
|
16
|
+
so there is no natural follow-up 'model' role message.
|
|
17
|
+
|
|
18
|
+
To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
|
|
19
|
+
with role == 'model' that is placed in-betweeen and function output
|
|
20
|
+
(role == 'tool') and user message (role == 'user').
|
|
21
|
+
"""
|
|
22
|
+
dummy_yield_message = {"role": "model", "parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}]}
|
|
23
|
+
messages_with_padding = []
|
|
24
|
+
for i, message in enumerate(messages):
|
|
25
|
+
messages_with_padding.append(message)
|
|
26
|
+
# Check if the current message role is 'tool' and the next message role is 'user'
|
|
27
|
+
if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
|
|
28
|
+
messages_with_padding.append(dummy_yield_message)
|
|
29
|
+
|
|
30
|
+
return messages_with_padding
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# TODO use pydantic model as input
|
|
34
|
+
def to_google_ai(openai_message_dict: dict) -> dict:
|
|
35
|
+
|
|
36
|
+
# TODO supports "parts" as part of multimodal support
|
|
37
|
+
assert not isinstance(openai_message_dict["content"], list), "Multi-part content is message not yet supported"
|
|
38
|
+
if openai_message_dict["role"] == "user":
|
|
39
|
+
google_ai_message_dict = {
|
|
40
|
+
"role": "user",
|
|
41
|
+
"parts": [{"text": openai_message_dict["content"]}],
|
|
42
|
+
}
|
|
43
|
+
elif openai_message_dict["role"] == "assistant":
|
|
44
|
+
google_ai_message_dict = {
|
|
45
|
+
"role": "model", # NOTE: diff
|
|
46
|
+
"parts": [{"text": openai_message_dict["content"]}],
|
|
47
|
+
}
|
|
48
|
+
elif openai_message_dict["role"] == "tool":
|
|
49
|
+
google_ai_message_dict = {
|
|
50
|
+
"role": "function", # NOTE: diff
|
|
51
|
+
"parts": [{"text": openai_message_dict["content"]}],
|
|
52
|
+
}
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError(f"Unsupported conversion (OpenAI -> Google AI) from role {openai_message_dict['role']}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# TODO convert return type to pydantic
|
|
58
|
+
def convert_tools_to_google_ai_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
|
|
59
|
+
"""
|
|
60
|
+
OpenAI style:
|
|
61
|
+
"tools": [{
|
|
62
|
+
"type": "function",
|
|
63
|
+
"function": {
|
|
64
|
+
"name": "find_movies",
|
|
65
|
+
"description": "find ....",
|
|
66
|
+
"parameters": {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"properties": {
|
|
69
|
+
PARAM: {
|
|
70
|
+
"type": PARAM_TYPE, # eg "string"
|
|
71
|
+
"description": PARAM_DESCRIPTION,
|
|
72
|
+
},
|
|
73
|
+
...
|
|
74
|
+
},
|
|
75
|
+
"required": List[str],
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
Google AI style:
|
|
82
|
+
"tools": [{
|
|
83
|
+
"functionDeclarations": [{
|
|
84
|
+
"name": "find_movies",
|
|
85
|
+
"description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
|
|
86
|
+
"parameters": {
|
|
87
|
+
"type": "OBJECT",
|
|
88
|
+
"properties": {
|
|
89
|
+
"location": {
|
|
90
|
+
"type": "STRING",
|
|
91
|
+
"description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
|
|
92
|
+
},
|
|
93
|
+
"description": {
|
|
94
|
+
"type": "STRING",
|
|
95
|
+
"description": "Any kind of description including category or genre, title words, attributes, etc."
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
"required": ["description"]
|
|
99
|
+
}
|
|
100
|
+
}, {
|
|
101
|
+
"name": "find_theaters",
|
|
102
|
+
...
|
|
103
|
+
"""
|
|
104
|
+
function_list = [
|
|
105
|
+
dict(
|
|
106
|
+
name=t.function.name,
|
|
107
|
+
description=t.function.description,
|
|
108
|
+
parameters=t.function.parameters, # TODO need to unpack
|
|
109
|
+
)
|
|
110
|
+
for t in tools
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
# Correct casing + add inner thoughts if needed
|
|
114
|
+
for func in function_list:
|
|
115
|
+
func["parameters"]["type"] = "OBJECT"
|
|
116
|
+
for param_name, param_fields in func["parameters"]["properties"].items():
|
|
117
|
+
param_fields["type"] = param_fields["type"].upper()
|
|
118
|
+
# Add inner thoughts
|
|
119
|
+
if inner_thoughts_in_kwargs:
|
|
120
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
|
121
|
+
|
|
122
|
+
func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
|
|
123
|
+
"type": "STRING",
|
|
124
|
+
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
125
|
+
}
|
|
126
|
+
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
|
|
127
|
+
|
|
128
|
+
return [{"functionDeclarations": function_list}]
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def convert_google_ai_response_to_chatcompletion(
|
|
132
|
+
response,
|
|
133
|
+
model: str, # Required since not returned
|
|
134
|
+
input_messages: Optional[List[dict]] = None, # Required if the API doesn't return UsageMetadata
|
|
135
|
+
pull_inner_thoughts_from_args: Optional[bool] = True,
|
|
136
|
+
) -> ChatCompletionResponse:
|
|
137
|
+
"""Google AI API response format is not the same as ChatCompletion, requires unpacking
|
|
138
|
+
|
|
139
|
+
Example:
|
|
140
|
+
{
|
|
141
|
+
"candidates": [
|
|
142
|
+
{
|
|
143
|
+
"content": {
|
|
144
|
+
"parts": [
|
|
145
|
+
{
|
|
146
|
+
"text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
|
|
147
|
+
}
|
|
148
|
+
]
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
],
|
|
152
|
+
"usageMetadata": {
|
|
153
|
+
"promptTokenCount": 9,
|
|
154
|
+
"candidatesTokenCount": 27,
|
|
155
|
+
"totalTokenCount": 36
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
choices = []
|
|
161
|
+
index = 0
|
|
162
|
+
for candidate in response.candidates:
|
|
163
|
+
content = candidate.content
|
|
164
|
+
|
|
165
|
+
role = content.role
|
|
166
|
+
assert role == "model", f"Unknown role in response: {role}"
|
|
167
|
+
|
|
168
|
+
parts = content.parts
|
|
169
|
+
# TODO support parts / multimodal
|
|
170
|
+
# TODO support parallel tool calling natively
|
|
171
|
+
# TODO Alternative here is to throw away everything else except for the first part
|
|
172
|
+
for response_message in parts:
|
|
173
|
+
# Convert the actual message style to OpenAI style
|
|
174
|
+
if response_message.function_call:
|
|
175
|
+
function_call = response_message.function_call
|
|
176
|
+
function_name = function_call.name
|
|
177
|
+
function_args = function_call.args
|
|
178
|
+
assert isinstance(function_args, dict), function_args
|
|
179
|
+
|
|
180
|
+
# NOTE: this also involves stripping the inner monologue out of the function
|
|
181
|
+
if pull_inner_thoughts_from_args:
|
|
182
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
|
183
|
+
|
|
184
|
+
assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
|
|
185
|
+
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
|
|
186
|
+
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
|
187
|
+
else:
|
|
188
|
+
inner_thoughts = None
|
|
189
|
+
|
|
190
|
+
# Google AI API doesn't generate tool call IDs
|
|
191
|
+
openai_response_message = Message(
|
|
192
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
|
193
|
+
content=inner_thoughts,
|
|
194
|
+
tool_calls=[
|
|
195
|
+
ToolCall(
|
|
196
|
+
id=get_tool_call_id(),
|
|
197
|
+
type="function",
|
|
198
|
+
function=FunctionCall(
|
|
199
|
+
name=function_name,
|
|
200
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
|
201
|
+
),
|
|
202
|
+
)
|
|
203
|
+
],
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
else:
|
|
207
|
+
|
|
208
|
+
# Inner thoughts are the content by default
|
|
209
|
+
inner_thoughts = response_message.text
|
|
210
|
+
|
|
211
|
+
# Google AI API doesn't generate tool call IDs
|
|
212
|
+
openai_response_message = Message(
|
|
213
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
|
214
|
+
content=inner_thoughts,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Google AI API uses different finish reason strings than OpenAI
|
|
218
|
+
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
|
219
|
+
# see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
|
220
|
+
# Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
|
|
221
|
+
# see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
|
|
222
|
+
finish_reason = candidate.finish_reason.value
|
|
223
|
+
if finish_reason == "STOP":
|
|
224
|
+
openai_finish_reason = (
|
|
225
|
+
"function_call"
|
|
226
|
+
if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
|
|
227
|
+
else "stop"
|
|
228
|
+
)
|
|
229
|
+
elif finish_reason == "MAX_TOKENS":
|
|
230
|
+
openai_finish_reason = "length"
|
|
231
|
+
elif finish_reason == "SAFETY":
|
|
232
|
+
openai_finish_reason = "content_filter"
|
|
233
|
+
elif finish_reason == "RECITATION":
|
|
234
|
+
openai_finish_reason = "content_filter"
|
|
235
|
+
else:
|
|
236
|
+
raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
|
|
237
|
+
|
|
238
|
+
choices.append(
|
|
239
|
+
Choice(
|
|
240
|
+
finish_reason=openai_finish_reason,
|
|
241
|
+
index=index,
|
|
242
|
+
message=openai_response_message,
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
index += 1
|
|
246
|
+
|
|
247
|
+
# if len(choices) > 1:
|
|
248
|
+
# raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
|
|
249
|
+
|
|
250
|
+
# NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
|
|
251
|
+
# "usageMetadata": {
|
|
252
|
+
# "promptTokenCount": 9,
|
|
253
|
+
# "candidatesTokenCount": 27,
|
|
254
|
+
# "totalTokenCount": 36
|
|
255
|
+
# }
|
|
256
|
+
if response.usage_metadata:
|
|
257
|
+
usage = UsageStatistics(
|
|
258
|
+
prompt_tokens=response.usage_metadata.prompt_token_count,
|
|
259
|
+
completion_tokens=response.usage_metadata.candidates_token_count,
|
|
260
|
+
total_tokens=response.usage_metadata.total_token_count,
|
|
261
|
+
)
|
|
262
|
+
else:
|
|
263
|
+
# Count it ourselves
|
|
264
|
+
assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
|
|
265
|
+
prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
|
|
266
|
+
completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
|
|
267
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
268
|
+
usage = UsageStatistics(
|
|
269
|
+
prompt_tokens=prompt_tokens,
|
|
270
|
+
completion_tokens=completion_tokens,
|
|
271
|
+
total_tokens=total_tokens,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
response_id = str(uuid.uuid4())
|
|
275
|
+
return ChatCompletionResponse(
|
|
276
|
+
id=response_id,
|
|
277
|
+
choices=choices,
|
|
278
|
+
model=model, # NOTE: Google API doesn't pass back model in the response
|
|
279
|
+
created=get_utc_time(),
|
|
280
|
+
usage=usage,
|
|
281
|
+
)
|
|
282
|
+
except KeyError as e:
|
|
283
|
+
raise e
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# TODO convert 'data' type to pydantic
|
|
287
|
+
def google_vertex_chat_completions_request(
|
|
288
|
+
model: str,
|
|
289
|
+
project_id: str,
|
|
290
|
+
region: str,
|
|
291
|
+
contents: List[dict],
|
|
292
|
+
config: dict,
|
|
293
|
+
add_postfunc_model_messages: bool = True,
|
|
294
|
+
# NOTE: Google AI API doesn't support mixing parts 'text' and 'function',
|
|
295
|
+
# so there's no clean way to put inner thoughts in the same message as a function call
|
|
296
|
+
inner_thoughts_in_kwargs: bool = True,
|
|
297
|
+
) -> ChatCompletionResponse:
|
|
298
|
+
"""https://ai.google.dev/docs/function_calling
|
|
299
|
+
|
|
300
|
+
From https://ai.google.dev/api/rest#service-endpoint:
|
|
301
|
+
"A service endpoint is a base URL that specifies the network address of an API service.
|
|
302
|
+
One service might have multiple service endpoints.
|
|
303
|
+
This service has the following service endpoint and all URIs below are relative to this service endpoint:
|
|
304
|
+
https://xxx.googleapis.com
|
|
305
|
+
"""
|
|
306
|
+
|
|
307
|
+
from google import genai
|
|
308
|
+
|
|
309
|
+
client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
|
|
310
|
+
# add dummy model messages to the end of the input
|
|
311
|
+
if add_postfunc_model_messages:
|
|
312
|
+
contents = add_dummy_model_messages(contents)
|
|
313
|
+
|
|
314
|
+
# make request to client
|
|
315
|
+
response = client.models.generate_content(model=model, contents=contents, config=config)
|
|
316
|
+
print(response)
|
|
317
|
+
|
|
318
|
+
# convert back response
|
|
319
|
+
try:
|
|
320
|
+
return convert_google_ai_response_to_chatcompletion(
|
|
321
|
+
response=response,
|
|
322
|
+
model=model,
|
|
323
|
+
input_messages=contents,
|
|
324
|
+
pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
|
|
325
|
+
)
|
|
326
|
+
except Exception as conversion_error:
|
|
327
|
+
print(f"Error during response conversion: {conversion_error}")
|
|
328
|
+
raise conversion_error
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -252,6 +252,32 @@ def create(
|
|
|
252
252
|
inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
253
253
|
)
|
|
254
254
|
|
|
255
|
+
elif llm_config.model_endpoint_type == "google_vertex":
|
|
256
|
+
from letta.llm_api.google_vertex import google_vertex_chat_completions_request
|
|
257
|
+
|
|
258
|
+
if stream:
|
|
259
|
+
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
260
|
+
if not use_tool_naming:
|
|
261
|
+
raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests")
|
|
262
|
+
|
|
263
|
+
if functions is not None:
|
|
264
|
+
tools = [{"type": "function", "function": f} for f in functions]
|
|
265
|
+
tools = [Tool(**t) for t in tools]
|
|
266
|
+
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
|
|
267
|
+
else:
|
|
268
|
+
tools = None
|
|
269
|
+
|
|
270
|
+
config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}
|
|
271
|
+
|
|
272
|
+
return google_vertex_chat_completions_request(
|
|
273
|
+
model=llm_config.model,
|
|
274
|
+
project_id=model_settings.google_cloud_project,
|
|
275
|
+
region=model_settings.google_cloud_location,
|
|
276
|
+
contents=[m.to_google_ai_dict() for m in messages],
|
|
277
|
+
config=config,
|
|
278
|
+
inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
279
|
+
)
|
|
280
|
+
|
|
255
281
|
elif llm_config.model_endpoint_type == "anthropic":
|
|
256
282
|
if not use_tool_naming:
|
|
257
283
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
letta/orm/agent.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import uuid
|
|
2
2
|
from typing import TYPE_CHECKING, List, Optional
|
|
3
3
|
|
|
4
|
-
from sqlalchemy import JSON, Index, String
|
|
4
|
+
from sqlalchemy import JSON, Boolean, Index, String
|
|
5
5
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
6
6
|
|
|
7
7
|
from letta.orm.block import Block
|
|
@@ -62,6 +62,11 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
|
62
62
|
# Tool rules
|
|
63
63
|
tool_rules: Mapped[Optional[List[ToolRule]]] = mapped_column(ToolRulesColumn, doc="the tool rules for this agent.")
|
|
64
64
|
|
|
65
|
+
# Stateless
|
|
66
|
+
message_buffer_autoclear: Mapped[bool] = mapped_column(
|
|
67
|
+
Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
|
|
68
|
+
)
|
|
69
|
+
|
|
65
70
|
# relationships
|
|
66
71
|
organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
|
|
67
72
|
tool_exec_environment_variables: Mapped[List["AgentEnvironmentVariable"]] = relationship(
|
|
@@ -146,6 +151,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
|
146
151
|
"project_id": self.project_id,
|
|
147
152
|
"template_id": self.template_id,
|
|
148
153
|
"base_template_id": self.base_template_id,
|
|
154
|
+
"message_buffer_autoclear": self.message_buffer_autoclear,
|
|
149
155
|
}
|
|
150
156
|
|
|
151
157
|
return self.__pydantic_model__(**state)
|