letta-nightly 0.7.1.dev20250423104245__py3-none-any.whl → 0.7.3.dev20250424054013__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +2 -1
- letta/agents/letta_agent.py +2 -1
- letta/agents/letta_agent_batch.py +8 -3
- letta/agents/voice_agent.py +2 -2
- letta/client/client.py +3 -0
- letta/functions/functions.py +2 -1
- letta/functions/schema_generator.py +5 -0
- letta/helpers/composio_helpers.py +1 -1
- letta/helpers/datetime_helpers.py +9 -0
- letta/jobs/llm_batch_job_polling.py +2 -1
- letta/llm_api/anthropic.py +10 -6
- letta/llm_api/anthropic_client.py +7 -6
- letta/llm_api/cohere.py +2 -2
- letta/llm_api/google_ai_client.py +11 -45
- letta/llm_api/google_vertex_client.py +8 -7
- letta/llm_api/llm_client.py +8 -14
- letta/llm_api/llm_client_base.py +17 -16
- letta/llm_api/openai.py +11 -4
- letta/llm_api/openai_client.py +47 -14
- letta/local_llm/chat_completion_proxy.py +2 -2
- letta/memory.py +2 -1
- letta/personas/examples/sleeptime_memory_persona.txt +5 -0
- letta/schemas/enums.py +3 -0
- letta/schemas/letta_message_content.py +2 -1
- letta/schemas/llm_config.py +12 -2
- letta/schemas/message.py +17 -0
- letta/schemas/openai/chat_completion_response.py +52 -3
- letta/server/rest_api/chat_completions_interface.py +2 -2
- letta/server/rest_api/interface.py +1 -1
- letta/server/rest_api/routers/v1/messages.py +9 -1
- letta/server/server.py +1 -6
- letta/services/agent_manager.py +6 -1
- {letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/METADATA +1 -1
- {letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/RECORD +38 -38
- letta/personas/examples/offline_memory_persona.txt +0 -4
- {letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.1.dev20250423104245.dist-info → letta_nightly-0.7.3.dev20250424054013.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
@@ -332,13 +332,14 @@ class Agent(BaseAgent):
|
|
332
332
|
log_telemetry(self.logger, "_get_ai_reply create start")
|
333
333
|
# New LLM client flow
|
334
334
|
llm_client = LLMClient.create(
|
335
|
-
|
335
|
+
provider=self.agent_state.llm_config.model_endpoint_type,
|
336
336
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
337
337
|
)
|
338
338
|
|
339
339
|
if llm_client and not stream:
|
340
340
|
response = llm_client.send_llm_request(
|
341
341
|
messages=message_sequence,
|
342
|
+
llm_config=self.agent_state.llm_config,
|
342
343
|
tools=allowed_functions,
|
343
344
|
stream=stream,
|
344
345
|
force_tool_call=force_tool_call,
|
letta/agents/letta_agent.py
CHANGED
@@ -66,7 +66,7 @@ class LettaAgent(BaseAgent):
|
|
66
66
|
)
|
67
67
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
68
68
|
llm_client = LLMClient.create(
|
69
|
-
|
69
|
+
provider=agent_state.llm_config.model_endpoint_type,
|
70
70
|
put_inner_thoughts_first=True,
|
71
71
|
)
|
72
72
|
for step in range(max_steps):
|
@@ -182,6 +182,7 @@ class LettaAgent(BaseAgent):
|
|
182
182
|
|
183
183
|
response = await llm_client.send_llm_request_async(
|
184
184
|
messages=in_context_messages,
|
185
|
+
llm_config=agent_state.llm_config,
|
185
186
|
tools=allowed_tools,
|
186
187
|
force_tool_call=force_tool_call,
|
187
188
|
stream=stream,
|
@@ -156,7 +156,7 @@ class LettaAgentBatch:
|
|
156
156
|
|
157
157
|
log_event(name="init_llm_client")
|
158
158
|
llm_client = LLMClient.create(
|
159
|
-
|
159
|
+
provider=agent_states[0].llm_config.model_endpoint_type,
|
160
160
|
put_inner_thoughts_first=True,
|
161
161
|
)
|
162
162
|
agent_llm_config_mapping = {s.id: s.llm_config for s in agent_states}
|
@@ -272,9 +272,14 @@ class LettaAgentBatch:
|
|
272
272
|
request_status_updates.append(RequestStatusUpdateInfo(llm_batch_id=llm_batch_id, agent_id=aid, request_status=status))
|
273
273
|
|
274
274
|
# translate provider‑specific response → OpenAI‑style tool call (unchanged)
|
275
|
-
llm_client = LLMClient.create(
|
275
|
+
llm_client = LLMClient.create(
|
276
|
+
provider=item.llm_config.model_endpoint_type,
|
277
|
+
put_inner_thoughts_first=True,
|
278
|
+
)
|
276
279
|
tool_call = (
|
277
|
-
llm_client.convert_response_to_chat_completion(
|
280
|
+
llm_client.convert_response_to_chat_completion(
|
281
|
+
response_data=pr.message.model_dump(), input_messages=[], llm_config=item.llm_config
|
282
|
+
)
|
278
283
|
.choices[0]
|
279
284
|
.message.tool_calls[0]
|
280
285
|
)
|
letta/agents/voice_agent.py
CHANGED
@@ -90,7 +90,7 @@ class VoiceAgent(BaseAgent):
|
|
90
90
|
# )
|
91
91
|
self.message_buffer_limit = message_buffer_limit
|
92
92
|
# self.message_buffer_min = message_buffer_min
|
93
|
-
self.
|
93
|
+
self.sleeptime_memory_agent = EphemeralMemoryAgent(
|
94
94
|
agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
|
95
95
|
)
|
96
96
|
|
@@ -372,7 +372,7 @@ class VoiceAgent(BaseAgent):
|
|
372
372
|
return f"Failed to call tool. Error: {e}", False
|
373
373
|
|
374
374
|
async def _recall_memory(self, query, agent_state: AgentState) -> None:
|
375
|
-
results = await self.
|
375
|
+
results = await self.sleeptime_memory_agent.step([MessageCreate(role="user", content=[TextContent(text=query)])])
|
376
376
|
target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
|
377
377
|
self.block_manager.update_block(
|
378
378
|
block_id=target_block.id, block_update=BlockUpdate(value=results[0].content[0].text), actor=self.actor
|
letta/client/client.py
CHANGED
@@ -85,6 +85,7 @@ class AbstractClient(object):
|
|
85
85
|
description: Optional[str] = None,
|
86
86
|
tags: Optional[List[str]] = None,
|
87
87
|
message_buffer_autoclear: bool = False,
|
88
|
+
response_format: Optional[ResponseFormatUnion] = None,
|
88
89
|
) -> AgentState:
|
89
90
|
raise NotImplementedError
|
90
91
|
|
@@ -2352,6 +2353,7 @@ class LocalClient(AbstractClient):
|
|
2352
2353
|
initial_message_sequence: Optional[List[Message]] = None,
|
2353
2354
|
tags: Optional[List[str]] = None,
|
2354
2355
|
message_buffer_autoclear: bool = False,
|
2356
|
+
response_format: Optional[ResponseFormatUnion] = None,
|
2355
2357
|
) -> AgentState:
|
2356
2358
|
"""Create an agent
|
2357
2359
|
|
@@ -2405,6 +2407,7 @@ class LocalClient(AbstractClient):
|
|
2405
2407
|
"initial_message_sequence": initial_message_sequence,
|
2406
2408
|
"tags": tags,
|
2407
2409
|
"message_buffer_autoclear": message_buffer_autoclear,
|
2410
|
+
"response_format": response_format,
|
2408
2411
|
}
|
2409
2412
|
|
2410
2413
|
# Only add name if it's not None
|
letta/functions/functions.py
CHANGED
@@ -2,7 +2,7 @@ import importlib
|
|
2
2
|
import inspect
|
3
3
|
from textwrap import dedent # remove indentation
|
4
4
|
from types import ModuleType
|
5
|
-
from typing import Dict, List, Optional
|
5
|
+
from typing import Dict, List, Literal, Optional
|
6
6
|
|
7
7
|
from letta.errors import LettaToolCreateError
|
8
8
|
from letta.functions.schema_generator import generate_schema
|
@@ -20,6 +20,7 @@ def derive_openai_json_schema(source_code: str, name: Optional[str] = None) -> d
|
|
20
20
|
"Optional": Optional,
|
21
21
|
"List": List,
|
22
22
|
"Dict": Dict,
|
23
|
+
"Literal": Literal,
|
23
24
|
# To support Pydantic models
|
24
25
|
# "BaseModel": BaseModel,
|
25
26
|
# "Field": Field,
|
@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Type, Union, get_args, get_origin
|
|
5
5
|
from composio.client.collections import ActionParametersModel
|
6
6
|
from docstring_parser import parse
|
7
7
|
from pydantic import BaseModel
|
8
|
+
from typing_extensions import Literal
|
8
9
|
|
9
10
|
from letta.functions.mcp_client.types import MCPTool
|
10
11
|
|
@@ -70,6 +71,10 @@ def type_to_json_schema_type(py_type) -> dict:
|
|
70
71
|
"items": type_to_json_schema_type(args[0]),
|
71
72
|
}
|
72
73
|
|
74
|
+
# Handle literals
|
75
|
+
if get_origin(py_type) is Literal:
|
76
|
+
return {"type": "string", "enum": get_args(py_type)}
|
77
|
+
|
73
78
|
# Handle object types
|
74
79
|
if py_type == dict or origin in (dict, Dict):
|
75
80
|
args = get_args(py_type)
|
@@ -10,7 +10,7 @@ def get_composio_api_key(actor: User, logger: Optional[Logger] = None) -> Option
|
|
10
10
|
api_keys = SandboxConfigManager().list_sandbox_env_vars_by_key(key="COMPOSIO_API_KEY", actor=actor)
|
11
11
|
if not api_keys:
|
12
12
|
if logger:
|
13
|
-
logger.
|
13
|
+
logger.debug(f"No API keys found for Composio. Defaulting to the environment variable...")
|
14
14
|
if tool_settings.composio_api_key:
|
15
15
|
return tool_settings.composio_api_key
|
16
16
|
else:
|
@@ -66,6 +66,15 @@ def get_utc_time() -> datetime:
|
|
66
66
|
return datetime.now(timezone.utc)
|
67
67
|
|
68
68
|
|
69
|
+
def get_utc_time_int() -> int:
|
70
|
+
return int(get_utc_time().timestamp())
|
71
|
+
|
72
|
+
|
73
|
+
def timestamp_to_datetime(timestamp_seconds: int) -> datetime:
|
74
|
+
"""Convert Unix timestamp in seconds to UTC datetime object"""
|
75
|
+
return datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
|
76
|
+
|
77
|
+
|
69
78
|
def format_datetime(dt):
|
70
79
|
return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z")
|
71
80
|
|
@@ -73,7 +73,8 @@ async def fetch_batch_items(server: SyncServer, batch_id: str, batch_resp_id: st
|
|
73
73
|
"""
|
74
74
|
updates = []
|
75
75
|
try:
|
76
|
-
|
76
|
+
results = await server.anthropic_async_client.beta.messages.batches.results(batch_resp_id)
|
77
|
+
async for item_result in results:
|
77
78
|
# Here, custom_id should be the agent_id
|
78
79
|
item_status = map_anthropic_individual_batch_item_status_to_job_status(item_result)
|
79
80
|
updates.append(ItemUpdateInfo(batch_id, item_result.custom_id, item_status, item_result))
|
letta/llm_api/anthropic.py
CHANGED
@@ -20,7 +20,7 @@ from anthropic.types.beta import (
|
|
20
20
|
)
|
21
21
|
|
22
22
|
from letta.errors import BedrockError, BedrockPermissionError
|
23
|
-
from letta.helpers.datetime_helpers import
|
23
|
+
from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
|
24
24
|
from letta.llm_api.aws_bedrock import get_bedrock_client
|
25
25
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
@@ -396,7 +396,7 @@ def convert_anthropic_response_to_chatcompletion(
|
|
396
396
|
return ChatCompletionResponse(
|
397
397
|
id=response.id,
|
398
398
|
choices=[choice],
|
399
|
-
created=
|
399
|
+
created=get_utc_time_int(),
|
400
400
|
model=response.model,
|
401
401
|
usage=UsageStatistics(
|
402
402
|
prompt_tokens=prompt_tokens,
|
@@ -451,7 +451,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
451
451
|
'logprobs': None
|
452
452
|
}
|
453
453
|
],
|
454
|
-
'created':
|
454
|
+
'created': 1713216662,
|
455
455
|
'model': 'gpt-4o-mini-2024-07-18',
|
456
456
|
'system_fingerprint': 'fp_bd83329f63',
|
457
457
|
'object': 'chat.completion.chunk'
|
@@ -613,7 +613,7 @@ def convert_anthropic_stream_event_to_chatcompletion(
|
|
613
613
|
return ChatCompletionChunkResponse(
|
614
614
|
id=message_id,
|
615
615
|
choices=[choice],
|
616
|
-
created=
|
616
|
+
created=get_utc_time_int(),
|
617
617
|
model=model,
|
618
618
|
output_tokens=completion_chunk_tokens,
|
619
619
|
)
|
@@ -920,7 +920,7 @@ def anthropic_chat_completions_process_stream(
|
|
920
920
|
chat_completion_response = ChatCompletionResponse(
|
921
921
|
id=dummy_message.id if create_message_id else TEMP_STREAM_RESPONSE_ID,
|
922
922
|
choices=[],
|
923
|
-
created=dummy_message.created_at,
|
923
|
+
created=int(dummy_message.created_at.timestamp()),
|
924
924
|
model=chat_completion_request.model,
|
925
925
|
usage=UsageStatistics(
|
926
926
|
prompt_tokens=prompt_tokens,
|
@@ -954,7 +954,11 @@ def anthropic_chat_completions_process_stream(
|
|
954
954
|
message_type = stream_interface.process_chunk(
|
955
955
|
chat_completion_chunk,
|
956
956
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
957
|
-
message_date=
|
957
|
+
message_date=(
|
958
|
+
timestamp_to_datetime(chat_completion_response.created)
|
959
|
+
if create_message_datetime
|
960
|
+
else timestamp_to_datetime(chat_completion_chunk.created)
|
961
|
+
),
|
958
962
|
# if extended_thinking is on, then reasoning_content will be flowing as chunks
|
959
963
|
# TODO handle emitting redacted reasoning content (e.g. as concat?)
|
960
964
|
expect_reasoning_content=extended_thinking,
|
@@ -22,7 +22,7 @@ from letta.errors import (
|
|
22
22
|
LLMServerError,
|
23
23
|
LLMUnprocessableEntityError,
|
24
24
|
)
|
25
|
-
from letta.helpers.datetime_helpers import
|
25
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
26
26
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
27
27
|
from letta.llm_api.llm_client_base import LLMClientBase
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
@@ -43,18 +43,18 @@ logger = get_logger(__name__)
|
|
43
43
|
|
44
44
|
class AnthropicClient(LLMClientBase):
|
45
45
|
|
46
|
-
def request(self, request_data: dict) -> dict:
|
46
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
47
47
|
client = self._get_anthropic_client(async_client=False)
|
48
48
|
response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
49
49
|
return response.model_dump()
|
50
50
|
|
51
|
-
async def request_async(self, request_data: dict) -> dict:
|
51
|
+
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
52
52
|
client = self._get_anthropic_client(async_client=True)
|
53
53
|
response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
54
54
|
return response.model_dump()
|
55
55
|
|
56
56
|
@trace_method
|
57
|
-
async def stream_async(self, request_data: dict) -> AsyncStream[BetaRawMessageStreamEvent]:
|
57
|
+
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
|
58
58
|
client = self._get_anthropic_client(async_client=True)
|
59
59
|
request_data["stream"] = True
|
60
60
|
return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
@@ -310,6 +310,7 @@ class AnthropicClient(LLMClientBase):
|
|
310
310
|
self,
|
311
311
|
response_data: dict,
|
312
312
|
input_messages: List[PydanticMessage],
|
313
|
+
llm_config: LLMConfig,
|
313
314
|
) -> ChatCompletionResponse:
|
314
315
|
"""
|
315
316
|
Example response from Claude 3:
|
@@ -403,7 +404,7 @@ class AnthropicClient(LLMClientBase):
|
|
403
404
|
chat_completion_response = ChatCompletionResponse(
|
404
405
|
id=response.id,
|
405
406
|
choices=[choice],
|
406
|
-
created=
|
407
|
+
created=get_utc_time_int(),
|
407
408
|
model=response.model,
|
408
409
|
usage=UsageStatistics(
|
409
410
|
prompt_tokens=prompt_tokens,
|
@@ -411,7 +412,7 @@ class AnthropicClient(LLMClientBase):
|
|
411
412
|
total_tokens=prompt_tokens + completion_tokens,
|
412
413
|
),
|
413
414
|
)
|
414
|
-
if
|
415
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
415
416
|
chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
|
416
417
|
response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
|
417
418
|
)
|
letta/llm_api/cohere.py
CHANGED
@@ -4,7 +4,7 @@ from typing import List, Optional, Union
|
|
4
4
|
|
5
5
|
import requests
|
6
6
|
|
7
|
-
from letta.helpers.datetime_helpers import
|
7
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
8
8
|
from letta.helpers.json_helpers import json_dumps
|
9
9
|
from letta.local_llm.utils import count_tokens
|
10
10
|
from letta.schemas.message import Message
|
@@ -207,7 +207,7 @@ def convert_cohere_response_to_chatcompletion(
|
|
207
207
|
return ChatCompletionResponse(
|
208
208
|
id=response_json["response_id"],
|
209
209
|
choices=[choice],
|
210
|
-
created=
|
210
|
+
created=get_utc_time_int(),
|
211
211
|
model=model,
|
212
212
|
usage=UsageStatistics(
|
213
213
|
prompt_tokens=prompt_tokens,
|
@@ -6,7 +6,7 @@ import requests
|
|
6
6
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
|
7
7
|
|
8
8
|
from letta.constants import NON_USER_MSG_PREFIX
|
9
|
-
from letta.helpers.datetime_helpers import
|
9
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
10
10
|
from letta.helpers.json_helpers import json_dumps
|
11
11
|
from letta.llm_api.helpers import make_post_request
|
12
12
|
from letta.llm_api.llm_client_base import LLMClientBase
|
@@ -25,15 +25,15 @@ logger = get_logger(__name__)
|
|
25
25
|
|
26
26
|
class GoogleAIClient(LLMClientBase):
|
27
27
|
|
28
|
-
def request(self, request_data: dict) -> dict:
|
28
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
29
29
|
"""
|
30
30
|
Performs underlying request to llm and returns raw response.
|
31
31
|
"""
|
32
32
|
# print("[google_ai request]", json.dumps(request_data, indent=2))
|
33
33
|
|
34
34
|
url, headers = get_gemini_endpoint_and_headers(
|
35
|
-
base_url=str(
|
36
|
-
model=
|
35
|
+
base_url=str(llm_config.model_endpoint),
|
36
|
+
model=llm_config.model,
|
37
37
|
api_key=str(model_settings.gemini_api_key),
|
38
38
|
key_in_header=True,
|
39
39
|
generate_content=True,
|
@@ -55,7 +55,7 @@ class GoogleAIClient(LLMClientBase):
|
|
55
55
|
tool_objs = [Tool(**t) for t in tools]
|
56
56
|
tool_names = [t.function.name for t in tool_objs]
|
57
57
|
# Convert to the exact payload style Google expects
|
58
|
-
tools = self.convert_tools_to_google_ai_format(tool_objs)
|
58
|
+
tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
|
59
59
|
else:
|
60
60
|
tool_names = []
|
61
61
|
|
@@ -88,6 +88,7 @@ class GoogleAIClient(LLMClientBase):
|
|
88
88
|
self,
|
89
89
|
response_data: dict,
|
90
90
|
input_messages: List[PydanticMessage],
|
91
|
+
llm_config: LLMConfig,
|
91
92
|
) -> ChatCompletionResponse:
|
92
93
|
"""
|
93
94
|
Converts custom response format from llm client into an OpenAI
|
@@ -150,7 +151,7 @@ class GoogleAIClient(LLMClientBase):
|
|
150
151
|
assert isinstance(function_args, dict), function_args
|
151
152
|
|
152
153
|
# NOTE: this also involves stripping the inner monologue out of the function
|
153
|
-
if
|
154
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
154
155
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
155
156
|
|
156
157
|
assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
|
@@ -259,49 +260,14 @@ class GoogleAIClient(LLMClientBase):
|
|
259
260
|
return ChatCompletionResponse(
|
260
261
|
id=response_id,
|
261
262
|
choices=choices,
|
262
|
-
model=
|
263
|
-
created=
|
263
|
+
model=llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
264
|
+
created=get_utc_time_int(),
|
264
265
|
usage=usage,
|
265
266
|
)
|
266
267
|
except KeyError as e:
|
267
268
|
raise e
|
268
269
|
|
269
|
-
def
|
270
|
-
"""Recursively clean schema parts to remove unsupported Google AI keywords."""
|
271
|
-
if not isinstance(schema_part, dict):
|
272
|
-
return
|
273
|
-
|
274
|
-
# Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
|
275
|
-
# * Only a subset of the OpenAPI schema is supported.
|
276
|
-
# * Supported parameter types in Python are limited.
|
277
|
-
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum"]
|
278
|
-
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
279
|
-
for key_to_remove in keys_to_remove_at_this_level:
|
280
|
-
logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
281
|
-
del schema_part[key_to_remove]
|
282
|
-
|
283
|
-
if schema_part.get("type") == "string" and "format" in schema_part:
|
284
|
-
allowed_formats = ["enum", "date-time"]
|
285
|
-
if schema_part["format"] not in allowed_formats:
|
286
|
-
logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
|
287
|
-
del schema_part["format"]
|
288
|
-
|
289
|
-
# Check properties within the current level
|
290
|
-
if "properties" in schema_part and isinstance(schema_part["properties"], dict):
|
291
|
-
for prop_name, prop_schema in schema_part["properties"].items():
|
292
|
-
self._clean_google_ai_schema_properties(prop_schema)
|
293
|
-
|
294
|
-
# Check items within arrays
|
295
|
-
if "items" in schema_part and isinstance(schema_part["items"], dict):
|
296
|
-
self._clean_google_ai_schema_properties(schema_part["items"])
|
297
|
-
|
298
|
-
# Check within anyOf, allOf, oneOf lists
|
299
|
-
for key in ["anyOf", "allOf", "oneOf"]:
|
300
|
-
if key in schema_part and isinstance(schema_part[key], list):
|
301
|
-
for item_schema in schema_part[key]:
|
302
|
-
self._clean_google_ai_schema_properties(item_schema)
|
303
|
-
|
304
|
-
def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
|
270
|
+
def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
|
305
271
|
"""
|
306
272
|
OpenAI style:
|
307
273
|
"tools": [{
|
@@ -365,7 +331,7 @@ class GoogleAIClient(LLMClientBase):
|
|
365
331
|
self._clean_google_ai_schema_properties(func["parameters"])
|
366
332
|
|
367
333
|
# Add inner thoughts
|
368
|
-
if
|
334
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
369
335
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
370
336
|
|
371
337
|
func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
|
@@ -4,7 +4,7 @@ from typing import List, Optional
|
|
4
4
|
from google import genai
|
5
5
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
|
6
6
|
|
7
|
-
from letta.helpers.datetime_helpers import
|
7
|
+
from letta.helpers.datetime_helpers import get_utc_time_int
|
8
8
|
from letta.helpers.json_helpers import json_dumps
|
9
9
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
10
10
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
@@ -18,7 +18,7 @@ from letta.utils import get_tool_call_id
|
|
18
18
|
|
19
19
|
class GoogleVertexClient(GoogleAIClient):
|
20
20
|
|
21
|
-
def request(self, request_data: dict) -> dict:
|
21
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
22
22
|
"""
|
23
23
|
Performs underlying request to llm and returns raw response.
|
24
24
|
"""
|
@@ -29,7 +29,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
29
29
|
http_options={"api_version": "v1"},
|
30
30
|
)
|
31
31
|
response = client.models.generate_content(
|
32
|
-
model=
|
32
|
+
model=llm_config.model,
|
33
33
|
contents=request_data["contents"],
|
34
34
|
config=request_data["config"],
|
35
35
|
)
|
@@ -45,7 +45,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
45
45
|
"""
|
46
46
|
Constructs a request object in the expected data format for this client.
|
47
47
|
"""
|
48
|
-
request_data = super().build_request_data(messages,
|
48
|
+
request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
49
49
|
request_data["config"] = request_data.pop("generation_config")
|
50
50
|
request_data["config"]["tools"] = request_data.pop("tools")
|
51
51
|
|
@@ -75,6 +75,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
75
75
|
self,
|
76
76
|
response_data: dict,
|
77
77
|
input_messages: List[PydanticMessage],
|
78
|
+
llm_config: LLMConfig,
|
78
79
|
) -> ChatCompletionResponse:
|
79
80
|
"""
|
80
81
|
Converts custom response format from llm client into an OpenAI
|
@@ -136,7 +137,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
136
137
|
assert isinstance(function_args, dict), function_args
|
137
138
|
|
138
139
|
# NOTE: this also involves stripping the inner monologue out of the function
|
139
|
-
if
|
140
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
140
141
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
141
142
|
|
142
143
|
assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
|
@@ -233,8 +234,8 @@ class GoogleVertexClient(GoogleAIClient):
|
|
233
234
|
return ChatCompletionResponse(
|
234
235
|
id=response_id,
|
235
236
|
choices=choices,
|
236
|
-
model=
|
237
|
-
created=
|
237
|
+
model=llm_config.model, # NOTE: Google API doesn't pass back model in the response
|
238
|
+
created=get_utc_time_int(),
|
238
239
|
usage=usage,
|
239
240
|
)
|
240
241
|
except KeyError as e:
|
letta/llm_api/llm_client.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from typing import Optional
|
2
2
|
|
3
3
|
from letta.llm_api.llm_client_base import LLMClientBase
|
4
|
-
from letta.schemas.
|
4
|
+
from letta.schemas.enums import ProviderType
|
5
5
|
|
6
6
|
|
7
7
|
class LLMClient:
|
@@ -9,17 +9,15 @@ class LLMClient:
|
|
9
9
|
|
10
10
|
@staticmethod
|
11
11
|
def create(
|
12
|
-
|
12
|
+
provider: ProviderType,
|
13
13
|
put_inner_thoughts_first: bool = True,
|
14
14
|
) -> Optional[LLMClientBase]:
|
15
15
|
"""
|
16
16
|
Create an LLM client based on the model endpoint type.
|
17
17
|
|
18
18
|
Args:
|
19
|
-
|
19
|
+
provider: The model endpoint type
|
20
20
|
put_inner_thoughts_first: Whether to put inner thoughts first in the response
|
21
|
-
use_structured_output: Whether to use structured output
|
22
|
-
use_tool_naming: Whether to use tool naming
|
23
21
|
|
24
22
|
Returns:
|
25
23
|
An instance of LLMClientBase subclass
|
@@ -27,33 +25,29 @@ class LLMClient:
|
|
27
25
|
Raises:
|
28
26
|
ValueError: If the model endpoint type is not supported
|
29
27
|
"""
|
30
|
-
match
|
31
|
-
case
|
28
|
+
match provider:
|
29
|
+
case ProviderType.google_ai:
|
32
30
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
33
31
|
|
34
32
|
return GoogleAIClient(
|
35
|
-
llm_config=llm_config,
|
36
33
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
37
34
|
)
|
38
|
-
case
|
35
|
+
case ProviderType.google_vertex:
|
39
36
|
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
40
37
|
|
41
38
|
return GoogleVertexClient(
|
42
|
-
llm_config=llm_config,
|
43
39
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
44
40
|
)
|
45
|
-
case
|
41
|
+
case ProviderType.anthropic:
|
46
42
|
from letta.llm_api.anthropic_client import AnthropicClient
|
47
43
|
|
48
44
|
return AnthropicClient(
|
49
|
-
llm_config=llm_config,
|
50
45
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
51
46
|
)
|
52
|
-
case
|
47
|
+
case ProviderType.openai:
|
53
48
|
from letta.llm_api.openai_client import OpenAIClient
|
54
49
|
|
55
50
|
return OpenAIClient(
|
56
|
-
llm_config=llm_config,
|
57
51
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
58
52
|
)
|
59
53
|
case _:
|