letta-nightly 0.11.7.dev20251006104136__py3-none-any.whl → 0.11.7.dev20251008104128__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +1 -0
- letta/adapters/letta_llm_request_adapter.py +0 -1
- letta/adapters/letta_llm_stream_adapter.py +7 -2
- letta/adapters/simple_llm_request_adapter.py +88 -0
- letta/adapters/simple_llm_stream_adapter.py +192 -0
- letta/agents/agent_loop.py +6 -0
- letta/agents/ephemeral_summary_agent.py +2 -1
- letta/agents/helpers.py +142 -6
- letta/agents/letta_agent.py +13 -33
- letta/agents/letta_agent_batch.py +2 -4
- letta/agents/letta_agent_v2.py +87 -77
- letta/agents/letta_agent_v3.py +899 -0
- letta/agents/voice_agent.py +2 -6
- letta/constants.py +8 -4
- letta/errors.py +40 -0
- letta/functions/function_sets/base.py +84 -4
- letta/functions/function_sets/multi_agent.py +0 -3
- letta/functions/schema_generator.py +113 -71
- letta/groups/dynamic_multi_agent.py +3 -2
- letta/groups/helpers.py +1 -2
- letta/groups/round_robin_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +17 -17
- letta/groups/supervisor_multi_agent.py +84 -80
- letta/helpers/converters.py +3 -0
- letta/helpers/message_helper.py +4 -0
- letta/helpers/tool_rule_solver.py +92 -5
- letta/interfaces/anthropic_streaming_interface.py +409 -0
- letta/interfaces/gemini_streaming_interface.py +296 -0
- letta/interfaces/openai_streaming_interface.py +752 -1
- letta/llm_api/anthropic_client.py +126 -16
- letta/llm_api/bedrock_client.py +4 -2
- letta/llm_api/deepseek_client.py +4 -1
- letta/llm_api/google_vertex_client.py +123 -42
- letta/llm_api/groq_client.py +4 -1
- letta/llm_api/llm_api_tools.py +11 -4
- letta/llm_api/llm_client_base.py +6 -2
- letta/llm_api/openai.py +32 -2
- letta/llm_api/openai_client.py +423 -18
- letta/llm_api/xai_client.py +4 -1
- letta/main.py +9 -5
- letta/memory.py +1 -0
- letta/orm/__init__.py +1 -1
- letta/orm/agent.py +10 -0
- letta/orm/block.py +7 -16
- letta/orm/blocks_agents.py +8 -2
- letta/orm/files_agents.py +2 -0
- letta/orm/job.py +7 -5
- letta/orm/mcp_oauth.py +1 -0
- letta/orm/message.py +21 -6
- letta/orm/organization.py +2 -0
- letta/orm/provider.py +6 -2
- letta/orm/run.py +71 -0
- letta/orm/sandbox_config.py +7 -1
- letta/orm/sqlalchemy_base.py +0 -306
- letta/orm/step.py +6 -5
- letta/orm/step_metrics.py +5 -5
- letta/otel/tracing.py +28 -3
- letta/plugins/defaults.py +4 -4
- letta/prompts/system_prompts/__init__.py +2 -0
- letta/prompts/system_prompts/letta_v1.py +25 -0
- letta/schemas/agent.py +3 -2
- letta/schemas/agent_file.py +9 -3
- letta/schemas/block.py +23 -10
- letta/schemas/enums.py +21 -2
- letta/schemas/job.py +17 -4
- letta/schemas/letta_message_content.py +71 -2
- letta/schemas/letta_stop_reason.py +5 -5
- letta/schemas/llm_config.py +53 -3
- letta/schemas/memory.py +1 -1
- letta/schemas/message.py +504 -117
- letta/schemas/openai/responses_request.py +64 -0
- letta/schemas/providers/__init__.py +2 -0
- letta/schemas/providers/anthropic.py +16 -0
- letta/schemas/providers/ollama.py +115 -33
- letta/schemas/providers/openrouter.py +52 -0
- letta/schemas/providers/vllm.py +2 -1
- letta/schemas/run.py +48 -42
- letta/schemas/step.py +2 -2
- letta/schemas/step_metrics.py +1 -1
- letta/schemas/tool.py +15 -107
- letta/schemas/tool_rule.py +88 -5
- letta/serialize_schemas/marshmallow_agent.py +1 -0
- letta/server/db.py +86 -408
- letta/server/rest_api/app.py +61 -10
- letta/server/rest_api/dependencies.py +14 -0
- letta/server/rest_api/redis_stream_manager.py +19 -8
- letta/server/rest_api/routers/v1/agents.py +364 -292
- letta/server/rest_api/routers/v1/blocks.py +14 -20
- letta/server/rest_api/routers/v1/identities.py +45 -110
- letta/server/rest_api/routers/v1/internal_templates.py +21 -0
- letta/server/rest_api/routers/v1/jobs.py +23 -6
- letta/server/rest_api/routers/v1/messages.py +1 -1
- letta/server/rest_api/routers/v1/runs.py +126 -85
- letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
- letta/server/rest_api/routers/v1/tools.py +281 -594
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/rest_api/streaming_response.py +29 -29
- letta/server/rest_api/utils.py +122 -64
- letta/server/server.py +160 -887
- letta/services/agent_manager.py +236 -919
- letta/services/agent_serialization_manager.py +16 -0
- letta/services/archive_manager.py +0 -100
- letta/services/block_manager.py +211 -168
- letta/services/file_manager.py +1 -1
- letta/services/files_agents_manager.py +24 -33
- letta/services/group_manager.py +0 -142
- letta/services/helpers/agent_manager_helper.py +7 -2
- letta/services/helpers/run_manager_helper.py +85 -0
- letta/services/job_manager.py +96 -411
- letta/services/lettuce/__init__.py +6 -0
- letta/services/lettuce/lettuce_client_base.py +86 -0
- letta/services/mcp_manager.py +38 -6
- letta/services/message_manager.py +165 -362
- letta/services/organization_manager.py +0 -36
- letta/services/passage_manager.py +0 -345
- letta/services/provider_manager.py +0 -80
- letta/services/run_manager.py +301 -0
- letta/services/sandbox_config_manager.py +0 -234
- letta/services/step_manager.py +62 -39
- letta/services/summarizer/summarizer.py +9 -7
- letta/services/telemetry_manager.py +0 -16
- letta/services/tool_executor/builtin_tool_executor.py +35 -0
- letta/services/tool_executor/core_tool_executor.py +397 -2
- letta/services/tool_executor/files_tool_executor.py +3 -3
- letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
- letta/services/tool_executor/tool_execution_manager.py +6 -8
- letta/services/tool_executor/tool_executor_base.py +3 -3
- letta/services/tool_manager.py +85 -339
- letta/services/tool_sandbox/base.py +24 -13
- letta/services/tool_sandbox/e2b_sandbox.py +16 -1
- letta/services/tool_schema_generator.py +123 -0
- letta/services/user_manager.py +0 -99
- letta/settings.py +20 -4
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/METADATA +3 -5
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/RECORD +140 -132
- letta/agents/temporal/activities/__init__.py +0 -4
- letta/agents/temporal/activities/example_activity.py +0 -7
- letta/agents/temporal/activities/prepare_messages.py +0 -10
- letta/agents/temporal/temporal_agent_workflow.py +0 -56
- letta/agents/temporal/types.py +0 -25
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20251006104136.dist-info → letta_nightly-0.11.7.dev20251008104128.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
import asyncio
|
2
|
+
import base64
|
3
|
+
import json
|
4
|
+
from collections.abc import AsyncGenerator
|
5
|
+
from datetime import datetime, timezone
|
6
|
+
from typing import AsyncIterator, List, Optional
|
7
|
+
|
8
|
+
from google.genai.types import (
|
9
|
+
GenerateContentResponse,
|
10
|
+
)
|
11
|
+
|
12
|
+
from letta.log import get_logger
|
13
|
+
from letta.schemas.letta_message import (
|
14
|
+
ApprovalRequestMessage,
|
15
|
+
AssistantMessage,
|
16
|
+
LettaMessage,
|
17
|
+
ReasoningMessage,
|
18
|
+
ToolCallDelta,
|
19
|
+
ToolCallMessage,
|
20
|
+
)
|
21
|
+
from letta.schemas.letta_message_content import (
|
22
|
+
ReasoningContent,
|
23
|
+
TextContent,
|
24
|
+
ToolCallContent,
|
25
|
+
)
|
26
|
+
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
27
|
+
from letta.schemas.message import Message
|
28
|
+
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
29
|
+
from letta.utils import get_tool_call_id
|
30
|
+
|
31
|
+
logger = get_logger(__name__)
|
32
|
+
|
33
|
+
|
34
|
+
class SimpleGeminiStreamingInterface:
|
35
|
+
"""
|
36
|
+
Encapsulates the logic for streaming responses from Gemini API:
|
37
|
+
https://ai.google.dev/gemini-api/docs/text-generation#streaming-responses
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
requires_approval_tools: list = [],
|
43
|
+
run_id: str | None = None,
|
44
|
+
step_id: str | None = None,
|
45
|
+
):
|
46
|
+
self.run_id = run_id
|
47
|
+
self.step_id = step_id
|
48
|
+
|
49
|
+
# self.messages = messages
|
50
|
+
# self.tools = tools
|
51
|
+
self.requires_approval_tools = requires_approval_tools
|
52
|
+
# ID responses used
|
53
|
+
self.message_id = None
|
54
|
+
|
55
|
+
# In Gemini streaming, tool call comes all at once
|
56
|
+
self.tool_call_id: str | None = None
|
57
|
+
self.tool_call_name: str | None = None
|
58
|
+
self.tool_call_args: dict | None = None # NOTE: Not a str!
|
59
|
+
|
60
|
+
# NOTE: signature only is included if tools are present
|
61
|
+
self.thinking_signature: str | None = None
|
62
|
+
|
63
|
+
# Regular text content too
|
64
|
+
self.text_content: str | None = None
|
65
|
+
|
66
|
+
# Premake IDs for database writes
|
67
|
+
self.letta_message_id = Message.generate_id()
|
68
|
+
# self.model = model
|
69
|
+
|
70
|
+
# Sadly, Gemini's encrypted reasoning logic forces us to store stream parts in state
|
71
|
+
self.content_parts: List[ReasoningContent | TextContent | ToolCallContent] = []
|
72
|
+
|
73
|
+
def get_content(self) -> List[ReasoningContent | TextContent | ToolCallContent]:
|
74
|
+
"""This is (unusually) in chunked format, instead of merged"""
|
75
|
+
for content in self.content_parts:
|
76
|
+
if isinstance(content, ReasoningContent):
|
77
|
+
# This assumes there is only one signature per turn
|
78
|
+
content.signature = self.thinking_signature
|
79
|
+
return self.content_parts
|
80
|
+
|
81
|
+
def get_tool_call_object(self) -> ToolCall:
|
82
|
+
"""Useful for agent loop"""
|
83
|
+
if self.tool_call_id is None:
|
84
|
+
raise ValueError("No tool call ID available")
|
85
|
+
if self.tool_call_name is None:
|
86
|
+
raise ValueError("No tool call name available")
|
87
|
+
if self.tool_call_args is None:
|
88
|
+
raise ValueError("No tool call arguments available")
|
89
|
+
|
90
|
+
# TODO use json_dumps?
|
91
|
+
tool_call_args_str = json.dumps(self.tool_call_args)
|
92
|
+
|
93
|
+
return ToolCall(
|
94
|
+
id=self.tool_call_id,
|
95
|
+
type="function",
|
96
|
+
function=FunctionCall(
|
97
|
+
name=self.tool_call_name,
|
98
|
+
arguments=tool_call_args_str,
|
99
|
+
),
|
100
|
+
)
|
101
|
+
|
102
|
+
async def process(
|
103
|
+
self,
|
104
|
+
stream: AsyncIterator[GenerateContentResponse],
|
105
|
+
ttft_span: Optional["Span"] = None,
|
106
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
107
|
+
"""
|
108
|
+
Iterates over the Gemini stream, yielding SSE events.
|
109
|
+
It also collects tokens and detects if a tool call is triggered.
|
110
|
+
"""
|
111
|
+
prev_message_type = None
|
112
|
+
message_index = 0
|
113
|
+
try:
|
114
|
+
async for event in stream:
|
115
|
+
try:
|
116
|
+
async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
|
117
|
+
new_message_type = message.message_type
|
118
|
+
if new_message_type != prev_message_type:
|
119
|
+
if prev_message_type != None:
|
120
|
+
message_index += 1
|
121
|
+
prev_message_type = new_message_type
|
122
|
+
yield message
|
123
|
+
except asyncio.CancelledError as e:
|
124
|
+
import traceback
|
125
|
+
|
126
|
+
logger.info("Cancelled stream attempt but overriding %s: %s", e, traceback.format_exc())
|
127
|
+
async for message in self._process_event(event, ttft_span, prev_message_type, message_index):
|
128
|
+
new_message_type = message.message_type
|
129
|
+
if new_message_type != prev_message_type:
|
130
|
+
if prev_message_type != None:
|
131
|
+
message_index += 1
|
132
|
+
prev_message_type = new_message_type
|
133
|
+
yield message
|
134
|
+
|
135
|
+
# Don't raise the exception here
|
136
|
+
continue
|
137
|
+
|
138
|
+
except Exception as e:
|
139
|
+
import traceback
|
140
|
+
|
141
|
+
logger.error("Error processing stream: %s\n%s", e, traceback.format_exc())
|
142
|
+
if ttft_span:
|
143
|
+
ttft_span.add_event(
|
144
|
+
name="stop_reason",
|
145
|
+
attributes={"stop_reason": StopReasonType.error.value, "error": str(e), "stacktrace": traceback.format_exc()},
|
146
|
+
)
|
147
|
+
yield LettaStopReason(stop_reason=StopReasonType.error)
|
148
|
+
raise e
|
149
|
+
finally:
|
150
|
+
logger.info("GeminiStreamingInterface: Stream processing complete.")
|
151
|
+
|
152
|
+
async def _process_event(
|
153
|
+
self,
|
154
|
+
event: GenerateContentResponse,
|
155
|
+
ttft_span: Optional["Span"] = None,
|
156
|
+
prev_message_type: Optional[str] = None,
|
157
|
+
message_index: int = 0,
|
158
|
+
) -> AsyncGenerator[LettaMessage | LettaStopReason, None]:
|
159
|
+
# Every event has usage data + model info on it,
|
160
|
+
# so we can continually extract
|
161
|
+
self.model = event.model_version
|
162
|
+
self.message_id = event.response_id
|
163
|
+
usage_metadata = event.usage_metadata
|
164
|
+
if usage_metadata:
|
165
|
+
if usage_metadata.prompt_token_count:
|
166
|
+
self.input_tokens = usage_metadata.prompt_token_count
|
167
|
+
if usage_metadata.total_token_count:
|
168
|
+
self.output_tokens = usage_metadata.total_token_count - usage_metadata.prompt_token_count
|
169
|
+
|
170
|
+
if not event.candidates or len(event.candidates) == 0:
|
171
|
+
return
|
172
|
+
else:
|
173
|
+
# NOTE: should always be len 1
|
174
|
+
candidate = event.candidates[0]
|
175
|
+
|
176
|
+
if not candidate.content or not candidate.content.parts:
|
177
|
+
return
|
178
|
+
|
179
|
+
for part in candidate.content.parts:
|
180
|
+
# NOTE: the thought signature often comes after the thought text, eg with the tool call
|
181
|
+
if part.thought_signature:
|
182
|
+
# NOTE: the thought_signature comes on the Part with the function_call
|
183
|
+
thought_signature = part.thought_signature
|
184
|
+
self.thinking_signature = base64.b64encode(thought_signature).decode("utf-8")
|
185
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
186
|
+
message_index += 1
|
187
|
+
yield ReasoningMessage(
|
188
|
+
id=self.letta_message_id,
|
189
|
+
date=datetime.now(timezone.utc).isoformat(),
|
190
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
191
|
+
source="reasoner_model",
|
192
|
+
reasoning="",
|
193
|
+
signature=self.thinking_signature,
|
194
|
+
)
|
195
|
+
prev_message_type = "reasoning_message"
|
196
|
+
|
197
|
+
# Thinking summary content part (bool means text is thought part)
|
198
|
+
if part.thought:
|
199
|
+
reasoning_summary = part.text
|
200
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
201
|
+
message_index += 1
|
202
|
+
yield ReasoningMessage(
|
203
|
+
id=self.letta_message_id,
|
204
|
+
date=datetime.now(timezone.utc).isoformat(),
|
205
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
206
|
+
source="reasoner_model",
|
207
|
+
reasoning=reasoning_summary,
|
208
|
+
run_id=self.run_id,
|
209
|
+
step_id=self.step_id,
|
210
|
+
)
|
211
|
+
prev_message_type = "reasoning_message"
|
212
|
+
self.content_parts.append(
|
213
|
+
ReasoningContent(
|
214
|
+
is_native=True,
|
215
|
+
reasoning=reasoning_summary,
|
216
|
+
signature=self.thinking_signature,
|
217
|
+
)
|
218
|
+
)
|
219
|
+
|
220
|
+
# Plain text content part
|
221
|
+
elif part.text:
|
222
|
+
content = part.text
|
223
|
+
self.text_content = content if self.text_content is None else self.text_content + content
|
224
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
225
|
+
message_index += 1
|
226
|
+
yield AssistantMessage(
|
227
|
+
id=self.letta_message_id,
|
228
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
229
|
+
date=datetime.now(timezone.utc),
|
230
|
+
content=content,
|
231
|
+
run_id=self.run_id,
|
232
|
+
step_id=self.step_id,
|
233
|
+
)
|
234
|
+
prev_message_type = "assistant_message"
|
235
|
+
self.content_parts.append(
|
236
|
+
TextContent(
|
237
|
+
text=content,
|
238
|
+
signature=self.thinking_signature,
|
239
|
+
)
|
240
|
+
)
|
241
|
+
|
242
|
+
# Tool call function part
|
243
|
+
# NOTE: in gemini, this comes all at once, and the args are JSON dict, not stringified
|
244
|
+
elif part.function_call:
|
245
|
+
function_call = part.function_call
|
246
|
+
|
247
|
+
# Look for call_id, name, and possibly arguments (though likely always empty string)
|
248
|
+
call_id = get_tool_call_id()
|
249
|
+
name = function_call.name
|
250
|
+
arguments = function_call.args # NOTE: dict, not str
|
251
|
+
arguments_str = json.dumps(arguments) # NOTE: use json_dumps?
|
252
|
+
|
253
|
+
self.tool_call_id = call_id
|
254
|
+
self.tool_call_name = name
|
255
|
+
self.tool_call_args = arguments
|
256
|
+
|
257
|
+
if self.tool_call_name and self.tool_call_name in self.requires_approval_tools:
|
258
|
+
if prev_message_type and prev_message_type != "approval_request_message":
|
259
|
+
message_index += 1
|
260
|
+
yield ApprovalRequestMessage(
|
261
|
+
id=self.letta_message_id,
|
262
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
263
|
+
date=datetime.now(timezone.utc),
|
264
|
+
tool_call=ToolCallDelta(
|
265
|
+
name=name,
|
266
|
+
arguments=arguments_str,
|
267
|
+
tool_call_id=call_id,
|
268
|
+
),
|
269
|
+
run_id=self.run_id,
|
270
|
+
step_id=self.step_id,
|
271
|
+
)
|
272
|
+
prev_message_type = "approval_request_message"
|
273
|
+
else:
|
274
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
275
|
+
message_index += 1
|
276
|
+
yield ToolCallMessage(
|
277
|
+
id=self.letta_message_id,
|
278
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
279
|
+
date=datetime.now(timezone.utc),
|
280
|
+
tool_call=ToolCallDelta(
|
281
|
+
name=name,
|
282
|
+
arguments=arguments_str,
|
283
|
+
tool_call_id=call_id,
|
284
|
+
),
|
285
|
+
run_id=self.run_id,
|
286
|
+
step_id=self.step_id,
|
287
|
+
)
|
288
|
+
prev_message_type = "tool_call_message"
|
289
|
+
self.content_parts.append(
|
290
|
+
ToolCallContent(
|
291
|
+
id=call_id,
|
292
|
+
name=name,
|
293
|
+
input=arguments,
|
294
|
+
signature=self.thinking_signature,
|
295
|
+
)
|
296
|
+
)
|