letta-nightly 0.11.7.dev20250912104045__py3-none-any.whl → 0.11.7.dev20250914103918__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_stream_adapter.py +1 -1
- letta/agents/letta_agent_v2.py +17 -2
- letta/interfaces/openai_streaming_interface.py +14 -3
- letta/llm_api/anthropic_client.py +2 -2
- letta/llm_api/azure_client.py +5 -2
- letta/llm_api/google_vertex_client.py +154 -17
- letta/llm_api/openai_client.py +5 -3
- letta/otel/sqlalchemy_instrumentation.py +6 -1
- letta/schemas/letta_stop_reason.py +2 -0
- letta/server/rest_api/app.py +61 -1
- letta/server/rest_api/interface.py +22 -75
- letta/server/rest_api/routers/v1/agents.py +51 -15
- letta/services/job_manager.py +10 -1
- letta/streaming_utils.py +79 -18
- {letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/METADATA +1 -1
- {letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/RECORD +19 -19
- {letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/licenses/LICENSE +0 -0
@@ -149,7 +149,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
|
|
149
149
|
request_json=self.request_data,
|
150
150
|
response_json={
|
151
151
|
"content": {
|
152
|
-
"tool_call": self.tool_call.model_dump_json(),
|
152
|
+
"tool_call": self.tool_call.model_dump_json() if self.tool_call else None,
|
153
153
|
"reasoning": [content.model_dump_json() for content in self.reasoning_content],
|
154
154
|
},
|
155
155
|
"id": self.interface.message_id,
|
letta/agents/letta_agent_v2.py
CHANGED
@@ -19,7 +19,7 @@ from letta.agents.helpers import (
|
|
19
19
|
generate_step_id,
|
20
20
|
)
|
21
21
|
from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
|
22
|
-
from letta.errors import ContextWindowExceededError
|
22
|
+
from letta.errors import ContextWindowExceededError, LLMError
|
23
23
|
from letta.helpers import ToolRulesSolver
|
24
24
|
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
|
25
25
|
from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
|
@@ -306,7 +306,7 @@ class LettaAgentV2(BaseAgentV2):
|
|
306
306
|
)
|
307
307
|
|
308
308
|
except:
|
309
|
-
if self.stop_reason:
|
309
|
+
if self.stop_reason and not first_chunk:
|
310
310
|
yield f"data: {self.stop_reason.model_dump_json()}\n\n"
|
311
311
|
raise
|
312
312
|
|
@@ -431,6 +431,9 @@ class LettaAgentV2(BaseAgentV2):
|
|
431
431
|
except ValueError as e:
|
432
432
|
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
|
433
433
|
raise e
|
434
|
+
except LLMError as e:
|
435
|
+
self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
|
436
|
+
raise e
|
434
437
|
except Exception as e:
|
435
438
|
if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
|
436
439
|
# Retry case
|
@@ -497,6 +500,17 @@ class LettaAgentV2(BaseAgentV2):
|
|
497
500
|
if include_return_message_types is None or message.message_type in include_return_message_types:
|
498
501
|
yield message
|
499
502
|
|
503
|
+
# Persist approval responses immediately to prevent agent from getting into a bad state
|
504
|
+
if (
|
505
|
+
len(input_messages_to_persist) == 1
|
506
|
+
and input_messages_to_persist[0].role == "approval"
|
507
|
+
and persisted_messages[0].role == "approval"
|
508
|
+
and persisted_messages[1].role == "tool"
|
509
|
+
):
|
510
|
+
self.agent_state.message_ids = self.agent_state.message_ids + [m.id for m in persisted_messages[:2]]
|
511
|
+
await self.agent_manager.update_message_ids_async(
|
512
|
+
agent_id=self.agent_state.id, message_ids=self.agent_state.message_ids, actor=self.actor
|
513
|
+
)
|
500
514
|
step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
|
501
515
|
except Exception as e:
|
502
516
|
self.logger.error(f"Error during step processing: {e}")
|
@@ -511,6 +525,7 @@ class LettaAgentV2(BaseAgentV2):
|
|
511
525
|
StopReasonType.no_tool_call,
|
512
526
|
StopReasonType.invalid_tool_call,
|
513
527
|
StopReasonType.invalid_llm_response,
|
528
|
+
StopReasonType.llm_api_error,
|
514
529
|
):
|
515
530
|
self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
|
516
531
|
raise e
|
@@ -24,7 +24,11 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
|
24
24
|
from letta.schemas.message import Message
|
25
25
|
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
26
26
|
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
27
|
-
from letta.streaming_utils import
|
27
|
+
from letta.streaming_utils import (
|
28
|
+
FunctionArgumentsStreamHandler,
|
29
|
+
JSONInnerThoughtsExtractor,
|
30
|
+
sanitize_streamed_message_content,
|
31
|
+
)
|
28
32
|
from letta.utils import count_tokens
|
29
33
|
|
30
34
|
logger = get_logger(__name__)
|
@@ -332,8 +336,15 @@ class OpenAIStreamingInterface:
|
|
332
336
|
self.last_flushed_function_name is not None
|
333
337
|
and self.last_flushed_function_name == self.assistant_message_tool_name
|
334
338
|
):
|
335
|
-
# Minimal, robust extraction: only emit the value of "message"
|
336
|
-
|
339
|
+
# Minimal, robust extraction: only emit the value of "message".
|
340
|
+
# If we buffered a prefix while name was streaming, feed it first.
|
341
|
+
if self.function_args_buffer:
|
342
|
+
payload = self.function_args_buffer + tool_call.function.arguments
|
343
|
+
self.function_args_buffer = None
|
344
|
+
else:
|
345
|
+
payload = tool_call.function.arguments
|
346
|
+
extracted = self.assistant_message_json_reader.process_json_chunk(payload)
|
347
|
+
extracted = sanitize_streamed_message_content(extracted or "")
|
337
348
|
if extracted:
|
338
349
|
if prev_message_type and prev_message_type != "assistant_message":
|
339
350
|
message_index += 1
|
@@ -497,7 +497,7 @@ class AnthropicClient(LLMClientBase):
|
|
497
497
|
try:
|
498
498
|
args_json = json.loads(arguments)
|
499
499
|
if not isinstance(args_json, dict):
|
500
|
-
raise
|
500
|
+
raise LLMServerError("Expected parseable json object for arguments")
|
501
501
|
except:
|
502
502
|
arguments = str(tool_input["function"]["arguments"])
|
503
503
|
else:
|
@@ -854,7 +854,7 @@ def remap_finish_reason(stop_reason: str) -> str:
|
|
854
854
|
elif stop_reason == "tool_use":
|
855
855
|
return "function_call"
|
856
856
|
else:
|
857
|
-
raise
|
857
|
+
raise LLMServerError(f"Unexpected stop_reason: {stop_reason}")
|
858
858
|
|
859
859
|
|
860
860
|
def strip_xml_tags(string: str, tag: Optional[str]) -> str:
|
letta/llm_api/azure_client.py
CHANGED
@@ -54,9 +54,12 @@ class AzureClient(OpenAIClient):
|
|
54
54
|
api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
|
55
55
|
base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
|
56
56
|
api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
|
57
|
+
try:
|
58
|
+
client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
|
59
|
+
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
60
|
+
except Exception as e:
|
61
|
+
raise self.handle_llm_error(e)
|
57
62
|
|
58
|
-
client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
|
59
|
-
response: ChatCompletion = await client.chat.completions.create(**request_data)
|
60
63
|
return response.model_dump()
|
61
64
|
|
62
65
|
@trace_method
|
@@ -14,6 +14,19 @@ from google.genai.types import (
|
|
14
14
|
)
|
15
15
|
|
16
16
|
from letta.constants import NON_USER_MSG_PREFIX
|
17
|
+
from letta.errors import (
|
18
|
+
ContextWindowExceededError,
|
19
|
+
ErrorCode,
|
20
|
+
LLMAuthenticationError,
|
21
|
+
LLMBadRequestError,
|
22
|
+
LLMConnectionError,
|
23
|
+
LLMNotFoundError,
|
24
|
+
LLMPermissionDeniedError,
|
25
|
+
LLMRateLimitError,
|
26
|
+
LLMServerError,
|
27
|
+
LLMTimeoutError,
|
28
|
+
LLMUnprocessableEntityError,
|
29
|
+
)
|
17
30
|
from letta.helpers.datetime_helpers import get_utc_time_int
|
18
31
|
from letta.helpers.json_helpers import json_dumps, json_loads
|
19
32
|
from letta.llm_api.llm_client_base import LLMClientBase
|
@@ -48,13 +61,16 @@ class GoogleVertexClient(LLMClientBase):
|
|
48
61
|
"""
|
49
62
|
Performs underlying request to llm and returns raw response.
|
50
63
|
"""
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
64
|
+
try:
|
65
|
+
client = self._get_client()
|
66
|
+
response = client.models.generate_content(
|
67
|
+
model=llm_config.model,
|
68
|
+
contents=request_data["contents"],
|
69
|
+
config=request_data["config"],
|
70
|
+
)
|
71
|
+
return response.model_dump()
|
72
|
+
except Exception as e:
|
73
|
+
raise self.handle_llm_error(e)
|
58
74
|
|
59
75
|
@trace_method
|
60
76
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
@@ -77,15 +93,15 @@ class GoogleVertexClient(LLMClientBase):
|
|
77
93
|
)
|
78
94
|
except errors.APIError as e:
|
79
95
|
# Retry on 503 and 500 errors as well, usually ephemeral from Gemini
|
80
|
-
if e.code == 503 or e.code == 500:
|
96
|
+
if e.code == 503 or e.code == 500 or e.code == 504:
|
81
97
|
logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
|
82
98
|
retry_count += 1
|
83
99
|
if retry_count > self.MAX_RETRIES:
|
84
|
-
raise e
|
100
|
+
raise self.handle_llm_error(e)
|
85
101
|
continue
|
86
|
-
raise e
|
102
|
+
raise self.handle_llm_error(e)
|
87
103
|
except Exception as e:
|
88
|
-
raise e
|
104
|
+
raise self.handle_llm_error(e)
|
89
105
|
response_data = response.model_dump()
|
90
106
|
is_malformed_function_call = self.is_malformed_function_call(response_data)
|
91
107
|
if is_malformed_function_call:
|
@@ -363,11 +379,10 @@ class GoogleVertexClient(LLMClientBase):
|
|
363
379
|
|
364
380
|
if content is None or content.role is None or content.parts is None:
|
365
381
|
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
366
|
-
# NOTE: must be a ValueError to trigger a retry
|
367
382
|
if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
|
368
|
-
raise
|
383
|
+
raise LLMServerError(f"Malformed response from Google Vertex: {candidate.finish_reason}")
|
369
384
|
else:
|
370
|
-
raise
|
385
|
+
raise LLMServerError(f"Invalid response data from Google Vertex: {candidate.model_dump()}")
|
371
386
|
|
372
387
|
role = content.role
|
373
388
|
assert role == "model", f"Unknown role in response: {role}"
|
@@ -461,7 +476,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
461
476
|
|
462
477
|
except json.decoder.JSONDecodeError:
|
463
478
|
if candidate.finish_reason == "MAX_TOKENS":
|
464
|
-
raise
|
479
|
+
raise LLMServerError("Could not parse response data from LLM: exceeded max token limit")
|
465
480
|
# Inner thoughts are the content by default
|
466
481
|
inner_thoughts = response_message.text
|
467
482
|
|
@@ -490,7 +505,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
490
505
|
elif finish_reason == "RECITATION":
|
491
506
|
openai_finish_reason = "content_filter"
|
492
507
|
else:
|
493
|
-
raise
|
508
|
+
raise LLMServerError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
|
494
509
|
|
495
510
|
choices.append(
|
496
511
|
Choice(
|
@@ -581,5 +596,127 @@ class GoogleVertexClient(LLMClientBase):
|
|
581
596
|
|
582
597
|
@trace_method
|
583
598
|
def handle_llm_error(self, e: Exception) -> Exception:
|
584
|
-
#
|
599
|
+
# Handle Google GenAI specific errors
|
600
|
+
if isinstance(e, errors.ClientError):
|
601
|
+
logger.warning(f"[Google Vertex] Client error ({e.code}): {e}")
|
602
|
+
|
603
|
+
# Handle specific error codes
|
604
|
+
if e.code == 400:
|
605
|
+
error_str = str(e).lower()
|
606
|
+
if "context" in error_str and ("exceed" in error_str or "limit" in error_str or "too long" in error_str):
|
607
|
+
return ContextWindowExceededError(
|
608
|
+
message=f"Bad request to Google Vertex (context window exceeded): {str(e)}",
|
609
|
+
)
|
610
|
+
else:
|
611
|
+
return LLMBadRequestError(
|
612
|
+
message=f"Bad request to Google Vertex: {str(e)}",
|
613
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
614
|
+
)
|
615
|
+
elif e.code == 401:
|
616
|
+
return LLMAuthenticationError(
|
617
|
+
message=f"Authentication failed with Google Vertex: {str(e)}",
|
618
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
619
|
+
)
|
620
|
+
elif e.code == 403:
|
621
|
+
return LLMPermissionDeniedError(
|
622
|
+
message=f"Permission denied by Google Vertex: {str(e)}",
|
623
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
624
|
+
)
|
625
|
+
elif e.code == 404:
|
626
|
+
return LLMNotFoundError(
|
627
|
+
message=f"Resource not found in Google Vertex: {str(e)}",
|
628
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
629
|
+
)
|
630
|
+
elif e.code == 408:
|
631
|
+
return LLMTimeoutError(
|
632
|
+
message=f"Request to Google Vertex timed out: {str(e)}",
|
633
|
+
code=ErrorCode.TIMEOUT,
|
634
|
+
details={"cause": str(e.__cause__) if e.__cause__ else None},
|
635
|
+
)
|
636
|
+
elif e.code == 422:
|
637
|
+
return LLMUnprocessableEntityError(
|
638
|
+
message=f"Invalid request content for Google Vertex: {str(e)}",
|
639
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
640
|
+
)
|
641
|
+
elif e.code == 429:
|
642
|
+
logger.warning("[Google Vertex] Rate limited (429). Consider backoff.")
|
643
|
+
return LLMRateLimitError(
|
644
|
+
message=f"Rate limited by Google Vertex: {str(e)}",
|
645
|
+
code=ErrorCode.RATE_LIMIT_EXCEEDED,
|
646
|
+
)
|
647
|
+
else:
|
648
|
+
return LLMServerError(
|
649
|
+
message=f"Google Vertex client error: {str(e)}",
|
650
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
651
|
+
details={
|
652
|
+
"status_code": e.code,
|
653
|
+
"response_json": getattr(e, "response_json", None),
|
654
|
+
},
|
655
|
+
)
|
656
|
+
|
657
|
+
if isinstance(e, errors.ServerError):
|
658
|
+
logger.warning(f"[Google Vertex] Server error ({e.code}): {e}")
|
659
|
+
|
660
|
+
# Handle specific server error codes
|
661
|
+
if e.code == 500:
|
662
|
+
return LLMServerError(
|
663
|
+
message=f"Google Vertex internal server error: {str(e)}",
|
664
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
665
|
+
details={
|
666
|
+
"status_code": e.code,
|
667
|
+
"response_json": getattr(e, "response_json", None),
|
668
|
+
},
|
669
|
+
)
|
670
|
+
elif e.code == 502:
|
671
|
+
return LLMConnectionError(
|
672
|
+
message=f"Bad gateway from Google Vertex: {str(e)}",
|
673
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
674
|
+
details={"cause": str(e.__cause__) if e.__cause__ else None},
|
675
|
+
)
|
676
|
+
elif e.code == 503:
|
677
|
+
return LLMServerError(
|
678
|
+
message=f"Google Vertex service unavailable: {str(e)}",
|
679
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
680
|
+
details={
|
681
|
+
"status_code": e.code,
|
682
|
+
"response_json": getattr(e, "response_json", None),
|
683
|
+
},
|
684
|
+
)
|
685
|
+
elif e.code == 504:
|
686
|
+
return LLMTimeoutError(
|
687
|
+
message=f"Gateway timeout from Google Vertex: {str(e)}",
|
688
|
+
code=ErrorCode.TIMEOUT,
|
689
|
+
details={"cause": str(e.__cause__) if e.__cause__ else None},
|
690
|
+
)
|
691
|
+
else:
|
692
|
+
return LLMServerError(
|
693
|
+
message=f"Google Vertex server error: {str(e)}",
|
694
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
695
|
+
details={
|
696
|
+
"status_code": e.code,
|
697
|
+
"response_json": getattr(e, "response_json", None),
|
698
|
+
},
|
699
|
+
)
|
700
|
+
|
701
|
+
if isinstance(e, errors.APIError):
|
702
|
+
logger.warning(f"[Google Vertex] API error ({e.code}): {e}")
|
703
|
+
return LLMServerError(
|
704
|
+
message=f"Google Vertex API error: {str(e)}",
|
705
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
706
|
+
details={
|
707
|
+
"status_code": e.code,
|
708
|
+
"response_json": getattr(e, "response_json", None),
|
709
|
+
},
|
710
|
+
)
|
711
|
+
|
712
|
+
# Handle connection-related errors
|
713
|
+
if "connection" in str(e).lower() or "timeout" in str(e).lower():
|
714
|
+
logger.warning(f"[Google Vertex] Connection/timeout error: {e}")
|
715
|
+
return LLMConnectionError(
|
716
|
+
message=f"Failed to connect to Google Vertex: {str(e)}",
|
717
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
718
|
+
details={"cause": str(e.__cause__) if e.__cause__ else None},
|
719
|
+
)
|
720
|
+
|
721
|
+
# Fallback to base implementation for other errors
|
585
722
|
return super().handle_llm_error(e)
|
letta/llm_api/openai_client.py
CHANGED
@@ -99,7 +99,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
|
|
99
99
|
|
100
100
|
# FIXME pretty hacky - turn off for providers we know users will use,
|
101
101
|
# but also don't support structured output
|
102
|
-
if "nebius.com" in llm_config.model_endpoint:
|
102
|
+
if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
|
103
103
|
return False
|
104
104
|
else:
|
105
105
|
return True
|
@@ -108,7 +108,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
|
|
108
108
|
# TODO move into LLMConfig as a field?
|
109
109
|
def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
|
110
110
|
"""Certain providers require the tool choice to be set to 'auto'."""
|
111
|
-
if "nebius.com" in llm_config.model_endpoint:
|
111
|
+
if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
|
112
112
|
return True
|
113
113
|
if llm_config.handle and "vllm" in llm_config.handle:
|
114
114
|
return True
|
@@ -168,7 +168,9 @@ class OpenAIClient(LLMClientBase):
|
|
168
168
|
# Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
169
169
|
# TODO(fix)
|
170
170
|
inner_thoughts_desc = (
|
171
|
-
INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
171
|
+
INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
172
|
+
if llm_config.model_endpoint and ":1234" in llm_config.model_endpoint
|
173
|
+
else INNER_THOUGHTS_KWARG_DESCRIPTION
|
172
174
|
)
|
173
175
|
tools = add_inner_thoughts_to_functions(
|
174
176
|
functions=tools,
|
@@ -146,11 +146,16 @@ def _instrument_engine_events(engine: Engine) -> None:
|
|
146
146
|
span.end()
|
147
147
|
context._sync_instrumentation_span = None
|
148
148
|
|
149
|
-
def handle_cursor_error(
|
149
|
+
def handle_cursor_error(exception_context):
|
150
150
|
"""Handle cursor execution errors."""
|
151
151
|
if not _config["enabled"]:
|
152
152
|
return
|
153
153
|
|
154
|
+
# Extract context from exception_context
|
155
|
+
context = getattr(exception_context, "execution_context", None)
|
156
|
+
if not context:
|
157
|
+
return
|
158
|
+
|
154
159
|
span = getattr(context, "_sync_instrumentation_span", None)
|
155
160
|
if span:
|
156
161
|
span.set_status(Status(StatusCode.ERROR, "Database operation failed"))
|
@@ -9,6 +9,7 @@ from letta.schemas.enums import JobStatus
|
|
9
9
|
class StopReasonType(str, Enum):
|
10
10
|
end_turn = "end_turn"
|
11
11
|
error = "error"
|
12
|
+
llm_api_error = "llm_api_error"
|
12
13
|
invalid_llm_response = "invalid_llm_response"
|
13
14
|
invalid_tool_call = "invalid_tool_call"
|
14
15
|
max_steps = "max_steps"
|
@@ -31,6 +32,7 @@ class StopReasonType(str, Enum):
|
|
31
32
|
StopReasonType.invalid_tool_call,
|
32
33
|
StopReasonType.no_tool_call,
|
33
34
|
StopReasonType.invalid_llm_response,
|
35
|
+
StopReasonType.llm_api_error,
|
34
36
|
):
|
35
37
|
return JobStatus.failed
|
36
38
|
elif self == StopReasonType.cancelled:
|
letta/server/rest_api/app.py
CHANGED
@@ -17,7 +17,15 @@ from starlette.middleware.cors import CORSMiddleware
|
|
17
17
|
from letta.__init__ import __version__ as letta_version
|
18
18
|
from letta.agents.exceptions import IncompatibleAgentType
|
19
19
|
from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
|
20
|
-
from letta.errors import
|
20
|
+
from letta.errors import (
|
21
|
+
BedrockPermissionError,
|
22
|
+
LettaAgentNotFoundError,
|
23
|
+
LettaUserNotFoundError,
|
24
|
+
LLMAuthenticationError,
|
25
|
+
LLMError,
|
26
|
+
LLMRateLimitError,
|
27
|
+
LLMTimeoutError,
|
28
|
+
)
|
21
29
|
from letta.helpers.pinecone_utils import get_pinecone_indices, should_use_pinecone, upsert_pinecone_indices
|
22
30
|
from letta.jobs.scheduler import start_scheduler_with_leader_election
|
23
31
|
from letta.log import get_logger
|
@@ -276,6 +284,58 @@ def create_application() -> "FastAPI":
|
|
276
284
|
},
|
277
285
|
)
|
278
286
|
|
287
|
+
@app.exception_handler(LLMTimeoutError)
|
288
|
+
async def llm_timeout_error_handler(request: Request, exc: LLMTimeoutError):
|
289
|
+
return JSONResponse(
|
290
|
+
status_code=504,
|
291
|
+
content={
|
292
|
+
"error": {
|
293
|
+
"type": "llm_timeout",
|
294
|
+
"message": "The LLM request timed out. Please try again.",
|
295
|
+
"detail": str(exc),
|
296
|
+
}
|
297
|
+
},
|
298
|
+
)
|
299
|
+
|
300
|
+
@app.exception_handler(LLMRateLimitError)
|
301
|
+
async def llm_rate_limit_error_handler(request: Request, exc: LLMRateLimitError):
|
302
|
+
return JSONResponse(
|
303
|
+
status_code=429,
|
304
|
+
content={
|
305
|
+
"error": {
|
306
|
+
"type": "llm_rate_limit",
|
307
|
+
"message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
|
308
|
+
"detail": str(exc),
|
309
|
+
}
|
310
|
+
},
|
311
|
+
)
|
312
|
+
|
313
|
+
@app.exception_handler(LLMAuthenticationError)
|
314
|
+
async def llm_auth_error_handler(request: Request, exc: LLMAuthenticationError):
|
315
|
+
return JSONResponse(
|
316
|
+
status_code=401,
|
317
|
+
content={
|
318
|
+
"error": {
|
319
|
+
"type": "llm_authentication",
|
320
|
+
"message": "Authentication failed with the LLM model provider.",
|
321
|
+
"detail": str(exc),
|
322
|
+
}
|
323
|
+
},
|
324
|
+
)
|
325
|
+
|
326
|
+
@app.exception_handler(LLMError)
|
327
|
+
async def llm_error_handler(request: Request, exc: LLMError):
|
328
|
+
return JSONResponse(
|
329
|
+
status_code=502,
|
330
|
+
content={
|
331
|
+
"error": {
|
332
|
+
"type": "llm_error",
|
333
|
+
"message": "An error occurred with the LLM request.",
|
334
|
+
"detail": str(exc),
|
335
|
+
}
|
336
|
+
},
|
337
|
+
)
|
338
|
+
|
279
339
|
settings.cors_origins.append("https://app.letta.com")
|
280
340
|
|
281
341
|
if (os.getenv("LETTA_SERVER_SECURE") == "true") or "--secure" in sys.argv:
|
@@ -808,86 +808,33 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
808
808
|
# If there was nothing in the name buffer, we can proceed to
|
809
809
|
# output the arguments chunk as a ToolCallMessage
|
810
810
|
else:
|
811
|
-
#
|
811
|
+
# use_assistant_message means we should emit only the value of "message"
|
812
812
|
if self.use_assistant_message and (
|
813
813
|
self.last_flushed_function_name is not None
|
814
814
|
and self.last_flushed_function_name == self.assistant_message_tool_name
|
815
815
|
):
|
816
|
-
#
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
updates_main_json = None
|
825
|
-
|
826
|
-
else:
|
827
|
-
# Some hardcoding to strip off the trailing "}"
|
828
|
-
if updates_main_json in ["}", '"}']:
|
829
|
-
updates_main_json = None
|
830
|
-
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
831
|
-
updates_main_json = updates_main_json[:-1]
|
832
|
-
|
833
|
-
if not updates_main_json:
|
834
|
-
# early exit to turn into content mode
|
816
|
+
# Feed any buffered prefix first to avoid missing the start of the value
|
817
|
+
payload = (self.function_args_buffer or "") + (updates_main_json or "")
|
818
|
+
self.function_args_buffer = None
|
819
|
+
cleaned = self.streaming_chat_completion_json_reader.process_json_chunk(payload)
|
820
|
+
from letta.streaming_utils import sanitize_streamed_message_content
|
821
|
+
|
822
|
+
cleaned = sanitize_streamed_message_content(cleaned or "")
|
823
|
+
if not cleaned:
|
835
824
|
return None
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
name=name,
|
850
|
-
otid=Message.generate_otid_from_id(message_id, message_index),
|
851
|
-
)
|
852
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
853
|
-
if self.function_id_buffer:
|
854
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
855
|
-
# clear buffer
|
856
|
-
self.function_args_buffer = None
|
857
|
-
self.function_id_buffer = None
|
858
|
-
|
859
|
-
else:
|
860
|
-
# If there's no buffer to clear, just output a new chunk with new data
|
861
|
-
# TODO: THIS IS HORRIBLE
|
862
|
-
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
863
|
-
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
864
|
-
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
865
|
-
|
866
|
-
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
867
|
-
self.assistant_message_tool_kwarg
|
868
|
-
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
869
|
-
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
870
|
-
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
871
|
-
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
872
|
-
diff = new_content.replace(prev_content, "", 1)
|
873
|
-
self.current_json_parse_result = parsed_args
|
874
|
-
if prev_message_type and prev_message_type != "assistant_message":
|
875
|
-
message_index += 1
|
876
|
-
processed_chunk = AssistantMessage(
|
877
|
-
id=message_id,
|
878
|
-
date=message_date,
|
879
|
-
content=diff,
|
880
|
-
name=name,
|
881
|
-
otid=Message.generate_otid_from_id(message_id, message_index),
|
882
|
-
)
|
883
|
-
else:
|
884
|
-
return None
|
885
|
-
|
886
|
-
# Store the ID of the tool call so allow skipping the corresponding response
|
887
|
-
if self.function_id_buffer:
|
888
|
-
self.prev_assistant_message_id = self.function_id_buffer
|
889
|
-
# clear buffers
|
890
|
-
self.function_id_buffer = None
|
825
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
826
|
+
message_index += 1
|
827
|
+
processed_chunk = AssistantMessage(
|
828
|
+
id=message_id,
|
829
|
+
date=message_date,
|
830
|
+
content=cleaned,
|
831
|
+
name=name,
|
832
|
+
otid=Message.generate_otid_from_id(message_id, message_index),
|
833
|
+
)
|
834
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
835
|
+
if self.function_id_buffer:
|
836
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
837
|
+
# Do not clear function_id_buffer here — we may still need it
|
891
838
|
else:
|
892
839
|
# There may be a buffer from a previous chunk, for example
|
893
840
|
# if the previous chunk had arguments but we needed to flush name
|
@@ -536,9 +536,7 @@ async def attach_source(
|
|
536
536
|
|
537
537
|
if agent_state.enable_sleeptime:
|
538
538
|
source = await server.source_manager.get_source_by_id(source_id=source_id)
|
539
|
-
safe_create_task(
|
540
|
-
server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
|
541
|
-
)
|
539
|
+
safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
|
542
540
|
|
543
541
|
return agent_state
|
544
542
|
|
@@ -565,9 +563,7 @@ async def attach_folder_to_agent(
|
|
565
563
|
|
566
564
|
if agent_state.enable_sleeptime:
|
567
565
|
source = await server.source_manager.get_source_by_id(source_id=folder_id)
|
568
|
-
safe_create_task(
|
569
|
-
server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
|
570
|
-
)
|
566
|
+
safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
|
571
567
|
|
572
568
|
return agent_state
|
573
569
|
|
@@ -1320,15 +1316,55 @@ async def send_message_streaming(
|
|
1320
1316
|
try:
|
1321
1317
|
if agent_eligible and model_compatible:
|
1322
1318
|
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1319
|
+
|
1320
|
+
async def error_aware_stream():
|
1321
|
+
"""Stream that handles early LLM errors gracefully in streaming format."""
|
1322
|
+
from letta.errors import LLMAuthenticationError, LLMError, LLMRateLimitError, LLMTimeoutError
|
1323
|
+
|
1324
|
+
try:
|
1325
|
+
stream = agent_loop.stream(
|
1326
|
+
input_messages=request.messages,
|
1327
|
+
max_steps=request.max_steps,
|
1328
|
+
stream_tokens=request.stream_tokens and model_compatible_token_streaming,
|
1329
|
+
run_id=run.id if run else None,
|
1330
|
+
use_assistant_message=request.use_assistant_message,
|
1331
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
1332
|
+
include_return_message_types=request.include_return_message_types,
|
1333
|
+
)
|
1334
|
+
async for chunk in stream:
|
1335
|
+
yield chunk
|
1336
|
+
|
1337
|
+
except LLMTimeoutError as e:
|
1338
|
+
error_data = {
|
1339
|
+
"error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}
|
1340
|
+
}
|
1341
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 504)
|
1342
|
+
except LLMRateLimitError as e:
|
1343
|
+
error_data = {
|
1344
|
+
"error": {
|
1345
|
+
"type": "llm_rate_limit",
|
1346
|
+
"message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
|
1347
|
+
"detail": str(e),
|
1348
|
+
}
|
1349
|
+
}
|
1350
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 429)
|
1351
|
+
except LLMAuthenticationError as e:
|
1352
|
+
error_data = {
|
1353
|
+
"error": {
|
1354
|
+
"type": "llm_authentication",
|
1355
|
+
"message": "Authentication failed with the LLM model provider.",
|
1356
|
+
"detail": str(e),
|
1357
|
+
}
|
1358
|
+
}
|
1359
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 401)
|
1360
|
+
except LLMError as e:
|
1361
|
+
error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
|
1362
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 502)
|
1363
|
+
except Exception as e:
|
1364
|
+
error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
|
1365
|
+
yield (f"data: {json.dumps(error_data)}\n\n", 500)
|
1366
|
+
|
1367
|
+
raw_stream = error_aware_stream()
|
1332
1368
|
|
1333
1369
|
from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
|
1334
1370
|
|
letta/services/job_manager.py
CHANGED
@@ -218,8 +218,17 @@ class JobManager:
|
|
218
218
|
"""
|
219
219
|
try:
|
220
220
|
job_update_builder = partial(JobUpdate, status=new_status)
|
221
|
+
|
222
|
+
# If metadata is provided, merge it with existing metadata
|
221
223
|
if metadata:
|
222
|
-
|
224
|
+
# Get the current job to access existing metadata
|
225
|
+
current_job = await self.get_job_by_id_async(job_id=job_id, actor=actor)
|
226
|
+
merged_metadata = {}
|
227
|
+
if current_job.metadata:
|
228
|
+
merged_metadata.update(current_job.metadata)
|
229
|
+
merged_metadata.update(metadata)
|
230
|
+
job_update_builder = partial(job_update_builder, metadata=merged_metadata)
|
231
|
+
|
223
232
|
if new_status.is_terminal:
|
224
233
|
job_update_builder = partial(job_update_builder, completed_at=get_utc_time())
|
225
234
|
|
letta/streaming_utils.py
CHANGED
@@ -264,39 +264,100 @@ class FunctionArgumentsStreamHandler:
|
|
264
264
|
|
265
265
|
def process_json_chunk(self, chunk: str) -> Optional[str]:
|
266
266
|
"""Process a chunk from the function arguments and return the plaintext version"""
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
if
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
return None
|
267
|
+
clean_chunk = chunk.strip()
|
268
|
+
# Not in message yet: accumulate until we see '<json_key>': (robust to split fragments)
|
269
|
+
if not self.in_message:
|
270
|
+
if clean_chunk == "{":
|
271
|
+
self.key_buffer = ""
|
272
|
+
self.accumulating = True
|
273
|
+
return None
|
275
274
|
self.key_buffer += clean_chunk
|
275
|
+
if self.json_key in self.key_buffer and ":" in clean_chunk:
|
276
|
+
# Enter value mode; attempt to extract inline content if it exists in this same chunk
|
277
|
+
self.in_message = True
|
278
|
+
self.accumulating = False
|
279
|
+
# Try to find the first quote after the colon within the original (unstripped) chunk
|
280
|
+
s = chunk
|
281
|
+
colon_idx = s.find(":")
|
282
|
+
if colon_idx != -1:
|
283
|
+
q_idx = s.find('"', colon_idx + 1)
|
284
|
+
if q_idx != -1:
|
285
|
+
self.message_started = True
|
286
|
+
rem = s[q_idx + 1 :]
|
287
|
+
# Check if this same chunk also contains the terminating quote (and optional delimiter)
|
288
|
+
j = len(rem) - 1
|
289
|
+
while j >= 0 and rem[j] in " \t\r\n":
|
290
|
+
j -= 1
|
291
|
+
if j >= 1 and rem[j - 1] == '"' and rem[j] in ",}]":
|
292
|
+
out = rem[: j - 1]
|
293
|
+
self.in_message = False
|
294
|
+
self.message_started = False
|
295
|
+
return out
|
296
|
+
if j >= 0 and rem[j] == '"':
|
297
|
+
out = rem[:j]
|
298
|
+
self.in_message = False
|
299
|
+
self.message_started = False
|
300
|
+
return out
|
301
|
+
# No terminator yet; emit remainder as content
|
302
|
+
return rem
|
303
|
+
return None
|
304
|
+
if clean_chunk == "}":
|
305
|
+
self.in_message = False
|
306
|
+
self.message_started = False
|
307
|
+
self.key_buffer = ""
|
276
308
|
return None
|
277
309
|
|
310
|
+
# Inside message value
|
278
311
|
if self.in_message:
|
279
|
-
|
312
|
+
# Bare opening/closing quote tokens
|
313
|
+
if clean_chunk == '"' and self.message_started:
|
280
314
|
self.in_message = False
|
281
315
|
self.message_started = False
|
282
316
|
return None
|
283
|
-
if not self.message_started and
|
317
|
+
if not self.message_started and clean_chunk == '"':
|
284
318
|
self.message_started = True
|
285
319
|
return None
|
286
320
|
if self.message_started:
|
287
|
-
|
321
|
+
# Detect closing patterns: '"', '",', '"}' (with optional whitespace)
|
322
|
+
i = len(chunk) - 1
|
323
|
+
while i >= 0 and chunk[i] in " \t\r\n":
|
324
|
+
i -= 1
|
325
|
+
if i >= 1 and chunk[i - 1] == '"' and chunk[i] in ",}]":
|
326
|
+
out = chunk[: i - 1]
|
288
327
|
self.in_message = False
|
289
|
-
|
328
|
+
self.message_started = False
|
329
|
+
return out
|
330
|
+
if i >= 0 and chunk[i] == '"':
|
331
|
+
out = chunk[:i]
|
332
|
+
self.in_message = False
|
333
|
+
self.message_started = False
|
334
|
+
return out
|
335
|
+
# Otherwise, still mid-string
|
290
336
|
return chunk
|
291
337
|
|
292
|
-
if
|
293
|
-
self.key_buffer = ""
|
294
|
-
self.accumulating = True
|
295
|
-
return None
|
296
|
-
|
297
|
-
if chunk.strip() == "}":
|
338
|
+
if clean_chunk == "}":
|
298
339
|
self.in_message = False
|
299
340
|
self.message_started = False
|
341
|
+
self.key_buffer = ""
|
300
342
|
return None
|
301
343
|
|
302
344
|
return None
|
345
|
+
|
346
|
+
|
347
|
+
def sanitize_streamed_message_content(text: str) -> str:
|
348
|
+
"""Remove trailing JSON delimiters that can leak into assistant text.
|
349
|
+
|
350
|
+
Specifically handles cases where a message string is immediately followed
|
351
|
+
by a JSON delimiter in the stream (e.g., '"', '",', '"}', '" ]').
|
352
|
+
Internal commas inside the message are preserved.
|
353
|
+
"""
|
354
|
+
if not text:
|
355
|
+
return text
|
356
|
+
t = text.rstrip()
|
357
|
+
# strip trailing quote + delimiter
|
358
|
+
if len(t) >= 2 and t[-2] == '"' and t[-1] in ",}]":
|
359
|
+
return t[:-2]
|
360
|
+
# strip lone trailing quote
|
361
|
+
if t.endswith('"'):
|
362
|
+
return t[:-1]
|
363
|
+
return t
|
@@ -11,12 +11,12 @@ letta/memory.py,sha256=l5iNhLAR_xzgTb0GBlQx4SVgH8kuZh8siJdC_CFPKEs,4278
|
|
11
11
|
letta/pytest.ini,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
letta/settings.py,sha256=QEjNUwRXGBgsQpQAs2kksQmGN5CbxKlxPPydrklx_Ms,15011
|
13
13
|
letta/streaming_interface.py,sha256=rPMfwUcjqITWk2tVqFQm1hmP99tU2IOHg9gU2dgPSo8,16400
|
14
|
-
letta/streaming_utils.py,sha256=
|
14
|
+
letta/streaming_utils.py,sha256=ZRFGFpQqn9ujCEbgZdLM7yTjiuNNvqQ47sNhV8ix-yQ,16553
|
15
15
|
letta/system.py,sha256=kHF7n3Viq7gV5UIUEXixod2gWa2jroUgztpEzMC1Sew,8925
|
16
16
|
letta/utils.py,sha256=bSq3St7MUw9gN1g0ICdOhNNaUFYBC3EfJLG6qsRLSFA,43290
|
17
17
|
letta/adapters/letta_llm_adapter.py,sha256=11wkOkEQfPXUuJoJxbK22wCa-8gnWiDAb3UOXOxLt5U,3427
|
18
18
|
letta/adapters/letta_llm_request_adapter.py,sha256=wJhK5M_qOhRPAhgMmYI7EJcM8Op19tClnXe0kJ29a3Q,4831
|
19
|
-
letta/adapters/letta_llm_stream_adapter.py,sha256=
|
19
|
+
letta/adapters/letta_llm_stream_adapter.py,sha256=G8IqtXor0LUuW-dKtGJWsUt6DfJreVCn5h6W2lHEPBg,7658
|
20
20
|
letta/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
21
|
letta/agents/agent_loop.py,sha256=cTSlGt1g9aZWG5vIMYtzdeJG1UcrqfjpLGmZU6j89zU,854
|
22
22
|
letta/agents/base_agent.py,sha256=rUAcPxWmTnmi50AWOXwrWc-v5sPIod0W_xXaPQShjcE,8540
|
@@ -27,7 +27,7 @@ letta/agents/exceptions.py,sha256=BQY4D4w32OYHM63CM19ko7dPwZiAzUs3NbKvzmCTcJg,31
|
|
27
27
|
letta/agents/helpers.py,sha256=eCHsvZEkTe0L_uZHYkfNAztsEJW0FTnKZMgVbqlI0Yg,11618
|
28
28
|
letta/agents/letta_agent.py,sha256=6nRTh5kzUpqK7eNMk4DlcgEoPmDxFmRb5ysoVHa-vh8,99488
|
29
29
|
letta/agents/letta_agent_batch.py,sha256=17RpYVXpGh9dlKxdMOLMCOHWFsi6N5S9FJHxooxkJCI,27998
|
30
|
-
letta/agents/letta_agent_v2.py,sha256=
|
30
|
+
letta/agents/letta_agent_v2.py,sha256=vMzVZL6Px5XBODEh3BHbbADtBSDIuNJrjsHqMGQfnwg,59930
|
31
31
|
letta/agents/voice_agent.py,sha256=y-n6qadfKsswvGODzXH02pLIQQ44wnaDSE6oUgKHVkA,23381
|
32
32
|
letta/agents/voice_sleeptime_agent.py,sha256=_JzCbWBOKrmo1cTaqZFTrQudpJEapwAyrXYtAHUILGo,8675
|
33
33
|
letta/cli/cli.py,sha256=tKtghlX36Rp0_HbkMosvlAapL07JXhA0vKLGTNKnxSQ,1615
|
@@ -85,7 +85,7 @@ letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k
|
|
85
85
|
letta/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
letta/interfaces/anthropic_streaming_interface.py,sha256=0VyK8kTRgCLNDLQN6vX1gJ0dfJhqguL_NL1GYgFr6fU,25614
|
87
87
|
letta/interfaces/openai_chat_completions_streaming_interface.py,sha256=3xHXh8cW79EkiMUTYfvcH_s92nkLjxXfvtVOVC3bfLo,5050
|
88
|
-
letta/interfaces/openai_streaming_interface.py,sha256=
|
88
|
+
letta/interfaces/openai_streaming_interface.py,sha256=YLArar2ypOEaVt7suJxpg1QZr0ErwEmPSEVhzaP6JWc,24166
|
89
89
|
letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
|
90
90
|
letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
91
|
letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
|
@@ -93,13 +93,13 @@ letta/jobs/llm_batch_job_polling.py,sha256=HUCTa1lTOiLAB_8m95RUfeNJa4lxlF8paGdCV
|
|
93
93
|
letta/jobs/scheduler.py,sha256=Ub5VTCA8P5C9Y-0mPK2YIPJSEzKbSd2l5Sp0sOWctD8,8697
|
94
94
|
letta/jobs/types.py,sha256=K8GKEnqEgAT6Kq4F2hUrBC4ZAFM9OkfOjVMStzxKuXQ,742
|
95
95
|
letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
96
|
-
letta/llm_api/anthropic_client.py,sha256=
|
97
|
-
letta/llm_api/azure_client.py,sha256=
|
96
|
+
letta/llm_api/anthropic_client.py,sha256=L8M4i08bHfNh1uS_M2_bDf3yeEuHpr5pungyu0pqo60,37380
|
97
|
+
letta/llm_api/azure_client.py,sha256=BeChGsH4brrSgZBbCf8UE5RkW-3ZughpKnsBY2VYxwI,3841
|
98
98
|
letta/llm_api/bedrock_client.py,sha256=gNKSFGCbrrLMPvtBItAOz1nme4K_opgkZdFa3cUzp7M,3434
|
99
99
|
letta/llm_api/deepseek_client.py,sha256=di6ApSQu1DewXw0_JIP7AK4IHvXQHd0e32tQfFf5F34,16975
|
100
100
|
letta/llm_api/google_ai_client.py,sha256=JweTUHZXvK6kcZBGXA7XEU53KP4vM7_zdD7AorCtsdI,8166
|
101
101
|
letta/llm_api/google_constants.py,sha256=eOjOv-FImyJ4b4QGIaod-mEROMtrBFz0yhuYHqOEkwY,797
|
102
|
-
letta/llm_api/google_vertex_client.py,sha256=
|
102
|
+
letta/llm_api/google_vertex_client.py,sha256=p6MNUFHhkzFkGfWgldjVJC6SIvSMriMeCoenNYynU6E,34970
|
103
103
|
letta/llm_api/groq_client.py,sha256=nNeWSgDVOLn3iFiicDKyhHj7f73JxrB9-7_M2Pv2e1I,3192
|
104
104
|
letta/llm_api/helpers.py,sha256=GXV_SuaU7uSCDj6bxDcCCF7CUjuZQCVWd5qZ3OsHVNk,17587
|
105
105
|
letta/llm_api/llm_api_tools.py,sha256=lsZ6OeIHesyOfbNQi5CVw5hn1lTQP5gJyforp-D0nk8,12294
|
@@ -107,7 +107,7 @@ letta/llm_api/llm_client.py,sha256=iXiPbrhluP2DBczv9nkFlAXdwWGOkg0lNDA9LzLrG4o,3
|
|
107
107
|
letta/llm_api/llm_client_base.py,sha256=RFo8H4ILxVyzB3DeF4rJoJJYjRF8ScVO4yyDrhuN0DY,10052
|
108
108
|
letta/llm_api/mistral.py,sha256=ruOTBt07Uzx7S30_eXhedVWngtpjtlzG6Ox1Iw0_mQs,662
|
109
109
|
letta/llm_api/openai.py,sha256=56cwdS9l-75cMTtY9df6Dbb1M9crH8YQsSdF3Pm3Rpg,27393
|
110
|
-
letta/llm_api/openai_client.py,sha256=
|
110
|
+
letta/llm_api/openai_client.py,sha256=QDIRIG-4MVA-Jug8qx0HUkhg3qtUfHGvE6QCbSYGK-c,22597
|
111
111
|
letta/llm_api/together_client.py,sha256=HeDMDDa525yfDTKciODDfX_t93QBfFmX0n2P-FT1QTU,2284
|
112
112
|
letta/llm_api/xai_client.py,sha256=3mpSQ9OoWyjqo2VhNM_m0EPBzS69r4p-OEwL7UWc9oY,3772
|
113
113
|
letta/llm_api/sample_response_jsons/aws_bedrock.json,sha256=RS3VqyxPB9hQQCPm42hWoga0bisKv_0e8ZF-c3Ag1FA,930
|
@@ -202,7 +202,7 @@ letta/otel/events.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
202
|
letta/otel/metric_registry.py,sha256=TdRBJrwDuyZV2Uretnq0lYIoYKA2JUqWkENGqLhOCBc,9344
|
203
203
|
letta/otel/metrics.py,sha256=GlIt8XLkP-igTXptah8UBonpHF7nEtSqTONSkAEERAs,4740
|
204
204
|
letta/otel/resource.py,sha256=kqvEywP2LTmuxv2Or3Irtm2zwic863j1DWUvBC0IONc,735
|
205
|
-
letta/otel/sqlalchemy_instrumentation.py,sha256=
|
205
|
+
letta/otel/sqlalchemy_instrumentation.py,sha256=yiZvHjDA8Sd5j5RGbokiaOgRwCIE5hkvhWavVSOXs7U,18892
|
206
206
|
letta/otel/sqlalchemy_instrumentation_integration.py,sha256=CwGPd5mb4PasBDnSlulSqfaupN-kB8Wz6EBHWBDNuuo,3902
|
207
207
|
letta/otel/tracing.py,sha256=kyLsl00Zka3z3uEnOZqgantHya_bsmpvulABYHvsUo8,10422
|
208
208
|
letta/personas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -270,7 +270,7 @@ letta/schemas/letta_message_content.py,sha256=7FuholmKauP5Z-FJdsCH_-4IUGl_8jVqi5
|
|
270
270
|
letta/schemas/letta_ping.py,sha256=9JphoKhWZ63JqsakIx4aaj8dYMtYVa7HxSkT5cMh5cI,863
|
271
271
|
letta/schemas/letta_request.py,sha256=ll0QTt-tzaJ3zxpPyaifz7mtWcPy6QmvPUDOzngbxfQ,4526
|
272
272
|
letta/schemas/letta_response.py,sha256=e6FcAhRX3heB0FoWAAozB3RJboMwi_JpelTdc5JupVA,8188
|
273
|
-
letta/schemas/letta_stop_reason.py,sha256=
|
273
|
+
letta/schemas/letta_stop_reason.py,sha256=6vF66Dsyzog3X_d2PjfQxJLyiDarlqJ-hG7NMJpxbuc,2349
|
274
274
|
letta/schemas/llm_batch_job.py,sha256=xr7RmMc9ItmL344vcIn1MJaT2nOf0F7qEHrsXkQNFQI,3136
|
275
275
|
letta/schemas/llm_config.py,sha256=8nyi9r3o3feh_hUy6pdRWp3E6M612xZhvV3gkFB4aqE,13642
|
276
276
|
letta/schemas/llm_config_overrides.py,sha256=E6qJuVA8TwAAy3VjGitJ5jSQo5PbN-6VPcZOF5qhP9A,1815
|
@@ -337,10 +337,10 @@ letta/server/server.py,sha256=KFFbyl7Djn8CS0aPxz3jL8RwmXPr9nKY3wDu3ymUWjI,109265
|
|
337
337
|
letta/server/startup.sh,sha256=z-Fea-7LiuS_aG1tJqS8JAsDQaamwC_kuDhv9D3PPPY,2698
|
338
338
|
letta/server/utils.py,sha256=rRvW6L1lzau4u9boamiyZH54lf5tQ91ypXzUW9cfSPA,1667
|
339
339
|
letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
340
|
-
letta/server/rest_api/app.py,sha256=
|
340
|
+
letta/server/rest_api/app.py,sha256=T3LLveXRJmfWqR0uEzoaLY8LXwYrwCQGb80XMbSCDUo,21172
|
341
341
|
letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
|
342
342
|
letta/server/rest_api/chat_completions_interface.py,sha256=-7wO7pNBWXMqblVkJpuZ8JPJ-LjudLTtT6BJu-q_XAM,11138
|
343
|
-
letta/server/rest_api/interface.py,sha256=
|
343
|
+
letta/server/rest_api/interface.py,sha256=_GQfKYUp9w4Wo2HSE_8Ff7QU16t1blspLaqmukpER9s,67099
|
344
344
|
letta/server/rest_api/json_parser.py,sha256=yoakaCkSMdf0Y_pyILoFKZlvzXeqF-E1KNeHzatLMDc,9157
|
345
345
|
letta/server/rest_api/redis_stream_manager.py,sha256=hz85CigFWdLkK1FWUmF-i6ObgoKkuoEgkiwshZ6QPKI,10764
|
346
346
|
letta/server/rest_api/static_files.py,sha256=NG8sN4Z5EJ8JVQdj19tkFa9iQ1kBPTab9f_CUxd_u4Q,3143
|
@@ -355,7 +355,7 @@ letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
|
|
355
355
|
letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
356
356
|
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=ohM1i8BsNxTiw8duuRT5X_0tSUzBwctQM4fJ5DXURic,5157
|
357
357
|
letta/server/rest_api/routers/v1/__init__.py,sha256=9MnEA7CgtIxyU_dDNG0jm-Ziqu1somBml-e5gKjgd9I,1997
|
358
|
-
letta/server/rest_api/routers/v1/agents.py,sha256=
|
358
|
+
letta/server/rest_api/routers/v1/agents.py,sha256=2lGLtfgB2ZtAa0EgaiaDlNV0GJhAi_kQQy7XqXB2DG0,77771
|
359
359
|
letta/server/rest_api/routers/v1/blocks.py,sha256=ykI77xnmIxPLqdAy5kzGyGw0w0ZRyVXn-O5Xcdj6-70,7690
|
360
360
|
letta/server/rest_api/routers/v1/embeddings.py,sha256=PRaQlrmEXPiIdWsTbadrFsv3Afyv5oEFUdhgHA8FTi8,989
|
361
361
|
letta/server/rest_api/routers/v1/folders.py,sha256=8Yb-bw2JdXBxMfrJNIZQk9_FKN2fet9Ccp8T83_c2sc,23539
|
@@ -397,7 +397,7 @@ letta/services/file_manager.py,sha256=d4uX8RblmqNGk1MsfeGzQ5uDWKVFP-AH63Jz5xOkj2
|
|
397
397
|
letta/services/files_agents_manager.py,sha256=QJrJTgDn3RXUjZIGiIw4GQ5k2iKj-Wvzs-WQetpQ154,30059
|
398
398
|
letta/services/group_manager.py,sha256=dD4DDHjOptMrtbWqw1ErlhpBqChw2ubLJdILjeLTY8I,29183
|
399
399
|
letta/services/identity_manager.py,sha256=JI9Xc7EsBagSwDS2na4rFNhoO_LuaxlkVO_1oIK_ITQ,11841
|
400
|
-
letta/services/job_manager.py,sha256=
|
400
|
+
letta/services/job_manager.py,sha256=E-w9_4BMErMuqVf2dFlTPTobrvBKhPyyEDfuqLnbACI,35970
|
401
401
|
letta/services/llm_batch_manager.py,sha256=iDzLFfmgpQooGY4zpN_w8q1SZ27fr2Cv6Ks3ltZErL8,20929
|
402
402
|
letta/services/mcp_manager.py,sha256=QuvKQnwxMXrhiCaYlF50GZwXmbSU7PxmcOZ85sQ3t7I,47848
|
403
403
|
letta/services/message_manager.py,sha256=tomsZidPT-I95sJsEsls-vj3qglehV7XNTs-m2zF8Bg,60629
|
@@ -470,8 +470,8 @@ letta/templates/sandbox_code_file_async.py.j2,sha256=lb7nh_P2W9VZHzU_9TxSCEMUod7
|
|
470
470
|
letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUIXjgXKiizuc,842
|
471
471
|
letta/templates/template_helper.py,sha256=HkG3zwRc5NVGmSTQu5PUTpz7LevK43bzXVaQuN8urf0,1634
|
472
472
|
letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
|
473
|
-
letta_nightly-0.11.7.
|
474
|
-
letta_nightly-0.11.7.
|
475
|
-
letta_nightly-0.11.7.
|
476
|
-
letta_nightly-0.11.7.
|
477
|
-
letta_nightly-0.11.7.
|
473
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/METADATA,sha256=znAgbibaDvvLthC_McJ-W-HokPJdRIUijKN7KtgqoE0,24424
|
474
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
475
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
|
476
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
|
477
|
+
letta_nightly-0.11.7.dev20250914103918.dist-info/RECORD,,
|
File without changes
|
File without changes
|