letta-nightly 0.6.34.dev20250302104001__py3-none-any.whl → 0.6.34.dev20250303230404__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (55) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +40 -15
  3. letta/agents/__init__.py +0 -0
  4. letta/agents/base_agent.py +51 -0
  5. letta/agents/ephemeral_agent.py +72 -0
  6. letta/agents/low_latency_agent.py +315 -0
  7. letta/constants.py +3 -1
  8. letta/functions/ast_parsers.py +50 -1
  9. letta/functions/helpers.py +79 -2
  10. letta/functions/schema_generator.py +3 -0
  11. letta/helpers/converters.py +3 -3
  12. letta/interfaces/__init__.py +0 -0
  13. letta/interfaces/openai_chat_completions_streaming_interface.py +109 -0
  14. letta/interfaces/utils.py +11 -0
  15. letta/llm_api/anthropic.py +9 -1
  16. letta/llm_api/azure_openai.py +3 -0
  17. letta/llm_api/google_ai.py +3 -0
  18. letta/llm_api/google_vertex.py +4 -0
  19. letta/llm_api/llm_api_tools.py +1 -1
  20. letta/llm_api/openai.py +6 -0
  21. letta/local_llm/chat_completion_proxy.py +6 -1
  22. letta/log.py +2 -2
  23. letta/orm/step.py +1 -0
  24. letta/orm/tool.py +1 -1
  25. letta/prompts/system/memgpt_convo_only.txt +3 -5
  26. letta/prompts/system/memgpt_memory_only.txt +29 -0
  27. letta/schemas/agent.py +0 -1
  28. letta/schemas/step.py +1 -1
  29. letta/schemas/tool.py +16 -2
  30. letta/server/rest_api/app.py +5 -1
  31. letta/server/rest_api/routers/v1/agents.py +32 -21
  32. letta/server/rest_api/routers/v1/identities.py +9 -1
  33. letta/server/rest_api/routers/v1/runs.py +49 -0
  34. letta/server/rest_api/routers/v1/tools.py +1 -0
  35. letta/server/rest_api/routers/v1/voice.py +19 -255
  36. letta/server/rest_api/utils.py +3 -2
  37. letta/server/server.py +15 -7
  38. letta/services/agent_manager.py +10 -6
  39. letta/services/helpers/agent_manager_helper.py +0 -2
  40. letta/services/helpers/tool_execution_helper.py +18 -0
  41. letta/services/job_manager.py +98 -0
  42. letta/services/step_manager.py +2 -0
  43. letta/services/summarizer/__init__.py +0 -0
  44. letta/services/summarizer/enums.py +9 -0
  45. letta/services/summarizer/summarizer.py +102 -0
  46. letta/services/tool_execution_sandbox.py +20 -3
  47. letta/services/tool_manager.py +1 -1
  48. letta/settings.py +2 -0
  49. letta/tracing.py +176 -156
  50. {letta_nightly-0.6.34.dev20250302104001.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/METADATA +6 -5
  51. {letta_nightly-0.6.34.dev20250302104001.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/RECORD +54 -44
  52. letta/chat_only_agent.py +0 -101
  53. {letta_nightly-0.6.34.dev20250302104001.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/LICENSE +0 -0
  54. {letta_nightly-0.6.34.dev20250302104001.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/WHEEL +0 -0
  55. {letta_nightly-0.6.34.dev20250302104001.dist-info → letta_nightly-0.6.34.dev20250303230404.dist-info}/entry_points.txt +0 -0
@@ -17,14 +17,13 @@ from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
17
17
  from letta.schemas.letta_response import LettaResponse
18
18
  from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
19
19
  from letta.schemas.message import Message, MessageUpdate
20
- from letta.schemas.passage import Passage
20
+ from letta.schemas.passage import Passage, PassageUpdate
21
21
  from letta.schemas.run import Run
22
22
  from letta.schemas.source import Source
23
23
  from letta.schemas.tool import Tool
24
24
  from letta.schemas.user import User
25
25
  from letta.server.rest_api.utils import get_letta_server
26
26
  from letta.server.server import SyncServer
27
- from letta.tracing import trace_method
28
27
 
29
28
  # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
30
29
 
@@ -273,14 +272,14 @@ def retrieve_agent_memory(
273
272
 
274
273
 
275
274
  @router.get("/{agent_id}/core-memory/blocks/{block_label}", response_model=Block, operation_id="retrieve_core_memory_block")
276
- def retrieve_core_memory_block(
275
+ def retrieve_block(
277
276
  agent_id: str,
278
277
  block_label: str,
279
278
  server: "SyncServer" = Depends(get_letta_server),
280
279
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
281
280
  ):
282
281
  """
283
- Retrieve a memory block from an agent.
282
+ Retrieve a core memory block from an agent.
284
283
  """
285
284
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
286
285
 
@@ -291,13 +290,13 @@ def retrieve_core_memory_block(
291
290
 
292
291
 
293
292
  @router.get("/{agent_id}/core-memory/blocks", response_model=List[Block], operation_id="list_core_memory_blocks")
294
- def list_core_memory_blocks(
293
+ def list_blocks(
295
294
  agent_id: str,
296
295
  server: "SyncServer" = Depends(get_letta_server),
297
296
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
298
297
  ):
299
298
  """
300
- Retrieve the memory blocks of a specific agent.
299
+ Retrieve the core memory blocks of a specific agent.
301
300
  """
302
301
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
303
302
  try:
@@ -308,7 +307,7 @@ def list_core_memory_blocks(
308
307
 
309
308
 
310
309
  @router.patch("/{agent_id}/core-memory/blocks/{block_label}", response_model=Block, operation_id="modify_core_memory_block")
311
- def modify_core_memory_block(
310
+ def modify_block(
312
311
  agent_id: str,
313
312
  block_label: str,
314
313
  block_update: BlockUpdate = Body(...),
@@ -316,7 +315,7 @@ def modify_core_memory_block(
316
315
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
317
316
  ):
318
317
  """
319
- Updates a memory block of an agent.
318
+ Updates a core memory block of an agent.
320
319
  """
321
320
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
322
321
 
@@ -330,35 +329,35 @@ def modify_core_memory_block(
330
329
 
331
330
 
332
331
  @router.patch("/{agent_id}/core-memory/blocks/attach/{block_id}", response_model=AgentState, operation_id="attach_core_memory_block")
333
- def attach_core_memory_block(
332
+ def attach_block(
334
333
  agent_id: str,
335
334
  block_id: str,
336
335
  server: "SyncServer" = Depends(get_letta_server),
337
336
  actor_id: Optional[str] = Header(None, alias="user_id"),
338
337
  ):
339
338
  """
340
- Attach a block to an agent.
339
+ Attach a core memoryblock to an agent.
341
340
  """
342
341
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
343
342
  return server.agent_manager.attach_block(agent_id=agent_id, block_id=block_id, actor=actor)
344
343
 
345
344
 
346
345
  @router.patch("/{agent_id}/core-memory/blocks/detach/{block_id}", response_model=AgentState, operation_id="detach_core_memory_block")
347
- def detach_core_memory_block(
346
+ def detach_block(
348
347
  agent_id: str,
349
348
  block_id: str,
350
349
  server: "SyncServer" = Depends(get_letta_server),
351
350
  actor_id: Optional[str] = Header(None, alias="user_id"),
352
351
  ):
353
352
  """
354
- Detach a block from an agent.
353
+ Detach a core memory block from an agent.
355
354
  """
356
355
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
357
356
  return server.agent_manager.detach_block(agent_id=agent_id, block_id=block_id, actor=actor)
358
357
 
359
358
 
360
- @router.get("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="list_archival_memory")
361
- def list_archival_memory(
359
+ @router.get("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="list_passages")
360
+ def list_passages(
362
361
  agent_id: str,
363
362
  server: "SyncServer" = Depends(get_letta_server),
364
363
  after: Optional[int] = Query(None, description="Unique ID of the memory to start the query range at."),
@@ -380,8 +379,8 @@ def list_archival_memory(
380
379
  )
381
380
 
382
381
 
383
- @router.post("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="create_archival_memory")
384
- def create_archival_memory(
382
+ @router.post("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="create_passage")
383
+ def create_passage(
385
384
  agent_id: str,
386
385
  request: CreateArchivalMemory = Body(...),
387
386
  server: "SyncServer" = Depends(get_letta_server),
@@ -395,10 +394,25 @@ def create_archival_memory(
395
394
  return server.insert_archival_memory(agent_id=agent_id, memory_contents=request.text, actor=actor)
396
395
 
397
396
 
397
+ @router.patch("/{agent_id}/archival-memory/{memory_id}", response_model=List[Passage], operation_id="modify_passage")
398
+ def modify_passage(
399
+ agent_id: str,
400
+ memory_id: str,
401
+ passage: PassageUpdate = Body(...),
402
+ server: "SyncServer" = Depends(get_letta_server),
403
+ actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
404
+ ):
405
+ """
406
+ Modify a memory in the agent's archival memory store.
407
+ """
408
+ actor = server.user_manager.get_user_or_default(user_id=actor_id)
409
+ return server.modify_archival_memory(agent_id=agent_id, memory_id=memory_id, passage=passage, actor=actor)
410
+
411
+
398
412
  # TODO(ethan): query or path parameter for memory_id?
399
413
  # @router.delete("/{agent_id}/archival")
400
- @router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_archival_memory")
401
- def delete_archival_memory(
414
+ @router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_passage")
415
+ def delete_passage(
402
416
  agent_id: str,
403
417
  memory_id: str,
404
418
  # memory_id: str = Query(..., description="Unique ID of the memory to be deleted."),
@@ -471,7 +485,6 @@ def modify_message(
471
485
  response_model=LettaResponse,
472
486
  operation_id="send_message",
473
487
  )
474
- @trace_method("POST /v1/agents/{agent_id}/messages")
475
488
  async def send_message(
476
489
  agent_id: str,
477
490
  server: SyncServer = Depends(get_letta_server),
@@ -510,7 +523,6 @@ async def send_message(
510
523
  }
511
524
  },
512
525
  )
513
- @trace_method("POST /v1/agents/{agent_id}/messages/stream")
514
526
  async def send_message_streaming(
515
527
  agent_id: str,
516
528
  server: SyncServer = Depends(get_letta_server),
@@ -586,7 +598,6 @@ async def process_message_background(
586
598
  response_model=Run,
587
599
  operation_id="create_agent_message_async",
588
600
  )
589
- @trace_method("POST /v1/agents/{agent_id}/messages/async")
590
601
  async def send_message_async(
591
602
  agent_id: str,
592
603
  background_tasks: BackgroundTasks,
@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, List, Optional
2
2
 
3
3
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
4
4
 
5
- from letta.orm.errors import NoResultFound
5
+ from letta.orm.errors import NoResultFound, UniqueConstraintViolationError
6
6
  from letta.schemas.identity import Identity, IdentityCreate, IdentityType, IdentityUpdate
7
7
  from letta.server.rest_api.utils import get_letta_server
8
8
 
@@ -72,6 +72,14 @@ def create_identity(
72
72
  return server.identity_manager.create_identity(identity=identity, actor=actor)
73
73
  except HTTPException:
74
74
  raise
75
+ except UniqueConstraintViolationError:
76
+ if identity.project_id:
77
+ raise HTTPException(
78
+ status_code=400,
79
+ detail=f"An identity with identifier key {identity.identifier_key} already exists for project {identity.project_id}",
80
+ )
81
+ else:
82
+ raise HTTPException(status_code=400, detail=f"An identity with identifier key {identity.identifier_key} already exists")
75
83
  except Exception as e:
76
84
  raise HTTPException(status_code=500, detail=f"{e}")
77
85
 
@@ -9,6 +9,7 @@ from letta.schemas.enums import JobStatus, MessageRole
9
9
  from letta.schemas.letta_message import LettaMessageUnion
10
10
  from letta.schemas.openai.chat_completion_response import UsageStatistics
11
11
  from letta.schemas.run import Run
12
+ from letta.schemas.step import Step
12
13
  from letta.server.rest_api.utils import get_letta_server
13
14
  from letta.server.server import SyncServer
14
15
 
@@ -137,6 +138,54 @@ def retrieve_run_usage(
137
138
  raise HTTPException(status_code=404, detail=f"Run '{run_id}' not found")
138
139
 
139
140
 
141
+ @router.get(
142
+ "/{run_id}/steps",
143
+ response_model=List[Step],
144
+ operation_id="list_run_steps",
145
+ )
146
+ async def list_run_steps(
147
+ run_id: str,
148
+ server: "SyncServer" = Depends(get_letta_server),
149
+ actor_id: Optional[str] = Header(None, alias="user_id"),
150
+ before: Optional[str] = Query(None, description="Cursor for pagination"),
151
+ after: Optional[str] = Query(None, description="Cursor for pagination"),
152
+ limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
153
+ order: str = Query(
154
+ "desc", description="Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order."
155
+ ),
156
+ ):
157
+ """
158
+ Get messages associated with a run with filtering options.
159
+
160
+ Args:
161
+ run_id: ID of the run
162
+ before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, starting with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.
163
+ after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
164
+ limit: Maximum number of steps to return
165
+ order: Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.
166
+
167
+ Returns:
168
+ A list of steps associated with the run.
169
+ """
170
+ if order not in ["asc", "desc"]:
171
+ raise HTTPException(status_code=400, detail="Order must be 'asc' or 'desc'")
172
+
173
+ actor = server.user_manager.get_user_or_default(user_id=actor_id)
174
+
175
+ try:
176
+ steps = server.job_manager.get_job_steps(
177
+ job_id=run_id,
178
+ actor=actor,
179
+ limit=limit,
180
+ before=before,
181
+ after=after,
182
+ ascending=(order == "asc"),
183
+ )
184
+ return steps
185
+ except NoResultFound as e:
186
+ raise HTTPException(status_code=404, detail=str(e))
187
+
188
+
140
189
  @router.delete("/{run_id}", response_model=Run, operation_id="delete_run")
141
190
  def delete_run(
142
191
  run_id: str,
@@ -190,6 +190,7 @@ def run_tool_from_source(
190
190
  tool_args=request.args,
191
191
  tool_env_vars=request.env_vars,
192
192
  tool_name=request.name,
193
+ tool_args_json_schema=request.args_json_schema,
193
194
  actor=actor,
194
195
  )
195
196
  except LettaToolCreateError as e:
@@ -1,42 +1,15 @@
1
- import json
2
- import uuid
3
1
  from typing import TYPE_CHECKING, Optional
4
2
 
5
3
  import httpx
6
4
  import openai
7
5
  from fastapi import APIRouter, Body, Depends, Header, HTTPException
8
6
  from fastapi.responses import StreamingResponse
9
- from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
10
7
  from openai.types.chat.completion_create_params import CompletionCreateParams
11
- from starlette.concurrency import run_in_threadpool
12
8
 
13
- from letta.constants import LETTA_TOOL_SET, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG
14
- from letta.helpers.tool_execution_helper import (
15
- add_pre_execution_message,
16
- enable_strict_mode,
17
- execute_external_tool,
18
- remove_request_heartbeat,
19
- )
9
+ from letta.agents.low_latency_agent import LowLatencyAgent
20
10
  from letta.log import get_logger
21
- from letta.orm.enums import ToolType
22
- from letta.schemas.openai.chat_completion_request import (
23
- AssistantMessage,
24
- ChatCompletionRequest,
25
- Tool,
26
- ToolCall,
27
- ToolCallFunction,
28
- ToolMessage,
29
- UserMessage,
30
- )
31
- from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
32
- from letta.server.rest_api.utils import (
33
- convert_letta_messages_to_openai,
34
- create_assistant_messages_from_openai_response,
35
- create_tool_call_messages_from_openai_response,
36
- create_user_message,
37
- get_letta_server,
38
- get_messages_from_completion_request,
39
- )
11
+ from letta.schemas.openai.chat_completions import UserMessage
12
+ from letta.server.rest_api.utils import get_letta_server, get_messages_from_completion_request
40
13
  from letta.settings import model_settings
41
14
 
42
15
  if TYPE_CHECKING:
@@ -72,42 +45,10 @@ async def create_voice_chat_completions(
72
45
  if agent_id is None:
73
46
  raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")
74
47
 
75
- agent_state = server.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
76
- if agent_state.llm_config.model_endpoint_type != "openai":
77
- raise HTTPException(status_code=400, detail="Only OpenAI models are supported by this endpoint.")
78
-
79
- # Convert Letta messages to OpenAI messages
80
- in_context_messages = server.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=actor)
81
- openai_messages = convert_letta_messages_to_openai(in_context_messages)
82
-
83
- # Also parse user input from completion_request and append
84
- input_message = get_messages_from_completion_request(completion_request)[-1]
85
- openai_messages.append(input_message)
48
+ # Also parse the user's new input
49
+ input_message = UserMessage(**get_messages_from_completion_request(completion_request)[-1])
86
50
 
87
- # Tools we allow this agent to call
88
- tools = [t for t in agent_state.tools if t.name not in LETTA_TOOL_SET and t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
89
-
90
- # Initial request
91
- openai_request = ChatCompletionRequest(
92
- model=agent_state.llm_config.model,
93
- messages=openai_messages,
94
- # TODO: This nested thing here is so ugly, need to refactor
95
- tools=(
96
- [
97
- Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
98
- for t in tools
99
- ]
100
- if tools
101
- else None
102
- ),
103
- tool_choice="auto",
104
- user=user_id,
105
- max_completion_tokens=agent_state.llm_config.max_tokens,
106
- temperature=agent_state.llm_config.temperature,
107
- stream=True,
108
- )
109
-
110
- # Create the OpenAI async client
51
+ # Create OpenAI async client
111
52
  client = openai.AsyncClient(
112
53
  api_key=model_settings.openai_api_key,
113
54
  max_retries=0,
@@ -122,194 +63,17 @@ async def create_voice_chat_completions(
122
63
  ),
123
64
  )
124
65
 
125
- # The messages we want to persist to the Letta agent
126
- user_message = create_user_message(input_message=input_message, agent_id=agent_id, actor=actor)
127
- message_db_queue = [user_message]
128
-
129
- async def event_stream():
130
- """
131
- A function-calling loop:
132
- - We stream partial tokens.
133
- - If we detect a tool call (finish_reason="tool_calls"), we parse it,
134
- add two messages to the conversation:
135
- (a) assistant message with tool_calls referencing the same ID
136
- (b) a tool message referencing that ID, containing the tool result.
137
- - Re-invoke the OpenAI request with updated conversation, streaming again.
138
- - End when finish_reason="stop" or no more tool calls.
139
- """
140
-
141
- # We'll keep updating this conversation in a loop
142
- conversation = openai_messages[:]
143
-
144
- while True:
145
- # Make the streaming request to OpenAI
146
- stream = await client.chat.completions.create(**openai_request.model_dump(exclude_unset=True))
147
-
148
- content_buffer = []
149
- tool_call_name = None
150
- tool_call_args_str = ""
151
- tool_call_id = None
152
- tool_call_happened = False
153
- finish_reason_stop = False
154
- optimistic_json_parser = OptimisticJSONParser(strict=True)
155
- current_parsed_json_result = {}
156
-
157
- async with stream:
158
- async for chunk in stream:
159
- choice = chunk.choices[0]
160
- delta = choice.delta
161
- finish_reason = choice.finish_reason # "tool_calls", "stop", or None
162
-
163
- if delta.content:
164
- content_buffer.append(delta.content)
165
- yield f"data: {chunk.model_dump_json()}\n\n"
166
-
167
- # CASE B: Partial tool call info
168
- if delta.tool_calls:
169
- # Typically there's only one in delta.tool_calls
170
- tc = delta.tool_calls[0]
171
- if tc.function.name:
172
- tool_call_name = tc.function.name
173
- if tc.function.arguments:
174
- tool_call_args_str += tc.function.arguments
175
-
176
- # See if we can stream out the pre-execution message
177
- parsed_args = optimistic_json_parser.parse(tool_call_args_str)
178
- if parsed_args.get(
179
- PRE_EXECUTION_MESSAGE_ARG
180
- ) and current_parsed_json_result.get( # Ensure key exists and is not None/empty
181
- PRE_EXECUTION_MESSAGE_ARG
182
- ) != parsed_args.get(
183
- PRE_EXECUTION_MESSAGE_ARG
184
- ):
185
- # Only stream if there's something new to stream
186
- # We do this way to avoid hanging JSON at the end of the stream, e.g. '}'
187
- if parsed_args != current_parsed_json_result:
188
- current_parsed_json_result = parsed_args
189
- synthetic_chunk = ChatCompletionChunk(
190
- id=chunk.id,
191
- object=chunk.object,
192
- created=chunk.created,
193
- model=chunk.model,
194
- choices=[
195
- Choice(
196
- index=choice.index,
197
- delta=ChoiceDelta(content=tc.function.arguments, role="assistant"),
198
- finish_reason=None,
199
- )
200
- ],
201
- )
202
-
203
- yield f"data: {synthetic_chunk.model_dump_json()}\n\n"
204
-
205
- # We might generate a unique ID for the tool call
206
- if tc.id:
207
- tool_call_id = tc.id
208
-
209
- # Check finish_reason
210
- if finish_reason == "tool_calls":
211
- tool_call_happened = True
212
- break
213
- elif finish_reason == "stop":
214
- finish_reason_stop = True
215
- break
216
-
217
- if content_buffer:
218
- # We treat that partial text as an assistant message
219
- content = "".join(content_buffer)
220
- conversation.append({"role": "assistant", "content": content})
221
-
222
- # Create an assistant message here to persist later
223
- assistant_messages = create_assistant_messages_from_openai_response(
224
- response_text=content, agent_id=agent_id, model=agent_state.llm_config.model, actor=actor
225
- )
226
- message_db_queue.extend(assistant_messages)
227
-
228
- if tool_call_happened:
229
- # Parse the tool call arguments
230
- try:
231
- tool_args = json.loads(tool_call_args_str)
232
- except json.JSONDecodeError:
233
- tool_args = {}
234
-
235
- if not tool_call_id:
236
- # If no tool_call_id given by the model, generate one
237
- tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
238
-
239
- # 1) Insert the "assistant" message with the tool_calls field
240
- # referencing the same tool_call_id
241
- assistant_tool_call_msg = AssistantMessage(
242
- content=None,
243
- tool_calls=[ToolCall(id=tool_call_id, function=ToolCallFunction(name=tool_call_name, arguments=tool_call_args_str))],
244
- )
245
-
246
- conversation.append(assistant_tool_call_msg.model_dump())
247
-
248
- # 2) Execute the tool
249
- target_tool = next((x for x in tools if x.name == tool_call_name), None)
250
- if not target_tool:
251
- # Tool not found, handle error
252
- yield f"data: {json.dumps({'error': 'Tool not found', 'tool': tool_call_name})}\n\n"
253
- break
254
-
255
- try:
256
- tool_result, _ = execute_external_tool(
257
- agent_state=agent_state,
258
- function_name=tool_call_name,
259
- function_args=tool_args,
260
- target_letta_tool=target_tool,
261
- actor=actor,
262
- allow_agent_state_modifications=False,
263
- )
264
- function_call_success = True
265
- except Exception as e:
266
- tool_result = f"Failed to call tool. Error: {e}"
267
- function_call_success = False
268
-
269
- # 3) Insert the "tool" message referencing the same tool_call_id
270
- tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
271
-
272
- conversation.append(tool_message.model_dump())
273
-
274
- # 4) Add a user message prompting the tool call result summarization
275
- heartbeat_user_message = UserMessage(
276
- content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user.",
277
- )
278
- conversation.append(heartbeat_user_message.model_dump())
279
-
280
- # Now, re-invoke OpenAI with the updated conversation
281
- openai_request.messages = conversation
282
-
283
- # Create a tool call message and append to message_db_queue
284
- tool_call_messages = create_tool_call_messages_from_openai_response(
285
- agent_id=agent_state.id,
286
- model=agent_state.llm_config.model,
287
- function_name=tool_call_name,
288
- function_arguments=tool_args,
289
- tool_call_id=tool_call_id,
290
- function_call_success=function_call_success,
291
- function_response=tool_result,
292
- actor=actor,
293
- add_heartbeat_request_system_message=True,
294
- )
295
- message_db_queue.extend(tool_call_messages)
296
-
297
- continue # Start the while loop again
298
-
299
- if finish_reason_stop:
300
- break
301
-
302
- # If we reach here, no tool call, no "stop", but we've ended streaming
303
- # Possibly a model error or some other finish reason. We'll just end.
304
- break
305
-
306
- await run_in_threadpool(
307
- server.agent_manager.append_to_in_context_messages,
308
- message_db_queue,
309
- agent_id=agent_id,
310
- actor=actor,
311
- )
312
-
313
- yield "data: [DONE]\n\n"
66
+ # Instantiate our LowLatencyAgent
67
+ agent = LowLatencyAgent(
68
+ agent_id=agent_id,
69
+ openai_client=client,
70
+ message_manager=server.message_manager,
71
+ agent_manager=server.agent_manager,
72
+ block_manager=server.block_manager,
73
+ actor=actor,
74
+ message_buffer_limit=10,
75
+ message_buffer_min=4,
76
+ )
314
77
 
315
- return StreamingResponse(event_stream(), media_type="text/event-stream")
78
+ # Return the streaming generator
79
+ return StreamingResponse(agent.step_stream(input_message=input_message), media_type="text/event-stream")
@@ -13,7 +13,7 @@ from openai.types.chat.chat_completion_message_tool_call import Function as Open
13
13
  from openai.types.chat.completion_create_params import CompletionCreateParams
14
14
  from pydantic import BaseModel
15
15
 
16
- from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REQ_HEARTBEAT_MESSAGE
16
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
17
17
  from letta.errors import ContextWindowExceededError, RateLimitExceededError
18
18
  from letta.helpers.datetime_helpers import get_utc_time
19
19
  from letta.log import get_logger
@@ -216,9 +216,10 @@ def create_tool_call_messages_from_openai_response(
216
216
  messages.append(tool_message)
217
217
 
218
218
  if add_heartbeat_request_system_message:
219
+ text_content = REQ_HEARTBEAT_MESSAGE if function_call_success else FUNC_FAILED_HEARTBEAT_MESSAGE
219
220
  heartbeat_system_message = Message(
220
221
  role=MessageRole.user,
221
- content=[TextContent(text=get_heartbeat(REQ_HEARTBEAT_MESSAGE))],
222
+ content=[TextContent(text=get_heartbeat(text_content))],
222
223
  organization_id=actor.organization_id,
223
224
  agent_id=agent_id,
224
225
  model=model,
letta/server/server.py CHANGED
@@ -6,7 +6,7 @@ import traceback
6
6
  import warnings
7
7
  from abc import abstractmethod
8
8
  from datetime import datetime
9
- from typing import Callable, Dict, List, Optional, Tuple, Union
9
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10
10
 
11
11
  from composio.client import Composio
12
12
  from composio.client.collections import ActionModel, AppModel
@@ -17,7 +17,6 @@ import letta.constants as constants
17
17
  import letta.server.utils as server_utils
18
18
  import letta.system as system
19
19
  from letta.agent import Agent, save_agent
20
- from letta.chat_only_agent import ChatOnlyAgent
21
20
  from letta.config import LettaConfig
22
21
  from letta.data_sources.connectors import DataConnector, load_data
23
22
  from letta.helpers.datetime_helpers import get_utc_time
@@ -43,7 +42,7 @@ from letta.schemas.llm_config import LLMConfig
43
42
  from letta.schemas.memory import ArchivalMemorySummary, ContextWindowOverview, Memory, RecallMemorySummary
44
43
  from letta.schemas.message import Message, MessageCreate, MessageRole, MessageUpdate, TextContent
45
44
  from letta.schemas.organization import Organization
46
- from letta.schemas.passage import Passage
45
+ from letta.schemas.passage import Passage, PassageUpdate
47
46
  from letta.schemas.providers import (
48
47
  AnthropicBedrockProvider,
49
48
  AnthropicProvider,
@@ -326,8 +325,6 @@ class SyncServer(Server):
326
325
  agent = Agent(agent_state=agent_state, interface=interface, user=actor)
327
326
  elif agent_state.agent_type == AgentType.offline_memory_agent:
328
327
  agent = OfflineMemoryAgent(agent_state=agent_state, interface=interface, user=actor)
329
- elif agent_state.agent_type == AgentType.chat_only_agent:
330
- agent = ChatOnlyAgent(agent_state=agent_state, interface=interface, user=actor)
331
328
  else:
332
329
  raise ValueError(f"Invalid agent type {agent_state.agent_type}")
333
330
 
@@ -770,6 +767,11 @@ class SyncServer(Server):
770
767
 
771
768
  return passages
772
769
 
770
+ def modify_archival_memory(self, agent_id: str, memory_id: str, passage: PassageUpdate, actor: User) -> List[Passage]:
771
+ passage = Passage(**passage.model_dump(exclude_unset=True, exclude_none=True))
772
+ passages = self.passage_manager.update_passage_by_id(passage_id=memory_id, passage=passage, actor=actor)
773
+ return passages
774
+
773
775
  def delete_archival_memory(self, memory_id: str, actor: User):
774
776
  # TODO check if it exists first, and throw error if not
775
777
  # TODO: @mindy make this return the deleted passage instead
@@ -978,6 +980,10 @@ class SyncServer(Server):
978
980
  warnings.warn(f"An error occurred while listing LLM models for provider {provider}: {e}")
979
981
 
980
982
  llm_models.extend(self.get_local_llm_configs())
983
+
984
+ # respect global maximum
985
+ for llm_config in llm_models:
986
+ llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
981
987
  return llm_models
982
988
 
983
989
  def list_embedding_models(self) -> List[EmbeddingConfig]:
@@ -1023,7 +1029,7 @@ class SyncServer(Server):
1023
1029
  raise ValueError(f"Context window limit ({context_window_limit}) is greater than maximum of ({llm_config.context_window})")
1024
1030
  llm_config.context_window = context_window_limit
1025
1031
  else:
1026
- llm_config.context_window = min(llm_config.context_window, constants.DEFAULT_CONTEXT_WINDOW_SIZE)
1032
+ llm_config.context_window = min(llm_config.context_window, model_settings.global_max_context_window_limit)
1027
1033
 
1028
1034
  return llm_config
1029
1035
 
@@ -1098,6 +1104,7 @@ class SyncServer(Server):
1098
1104
  tool_env_vars: Optional[Dict[str, str]] = None,
1099
1105
  tool_source_type: Optional[str] = None,
1100
1106
  tool_name: Optional[str] = None,
1107
+ tool_args_json_schema: Optional[Dict[str, Any]] = None,
1101
1108
  ) -> ToolReturnMessage:
1102
1109
  """Run a tool from source code"""
1103
1110
  if tool_source_type is not None and tool_source_type != "python":
@@ -1107,6 +1114,7 @@ class SyncServer(Server):
1107
1114
  tool = Tool(
1108
1115
  name=tool_name,
1109
1116
  source_code=tool_source,
1117
+ args_json_schema=tool_args_json_schema,
1110
1118
  )
1111
1119
  assert tool.name is not None, "Failed to create tool object"
1112
1120
 
@@ -1164,7 +1172,7 @@ class SyncServer(Server):
1164
1172
  actions = self.get_composio_client(api_key=api_key).actions.get(apps=[composio_app_name])
1165
1173
  return actions
1166
1174
 
1167
- @trace_method("Send Message")
1175
+ @trace_method
1168
1176
  async def send_message_to_agent(
1169
1177
  self,
1170
1178
  agent_id: str,