letta-nightly 0.4.1.dev20241004104123__py3-none-any.whl → 0.4.1.dev20241006104046__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/cli/cli.py +30 -365
- letta/cli/cli_config.py +70 -27
- letta/client/client.py +103 -11
- letta/config.py +80 -80
- letta/constants.py +6 -0
- letta/credentials.py +10 -1
- letta/errors.py +63 -5
- letta/llm_api/llm_api_tools.py +110 -52
- letta/local_llm/chat_completion_proxy.py +0 -3
- letta/main.py +1 -2
- letta/metadata.py +12 -0
- letta/providers.py +232 -0
- letta/schemas/block.py +1 -1
- letta/schemas/letta_request.py +17 -0
- letta/schemas/letta_response.py +11 -0
- letta/schemas/llm_config.py +18 -2
- letta/schemas/message.py +40 -13
- letta/server/rest_api/app.py +5 -0
- letta/server/rest_api/interface.py +115 -24
- letta/server/rest_api/routers/v1/agents.py +36 -3
- letta/server/rest_api/routers/v1/llms.py +6 -2
- letta/server/server.py +60 -87
- letta/server/static_files/assets/index-3ab03d5b.css +1 -0
- letta/server/static_files/assets/{index-4d08d8a3.js → index-9a9c449b.js} +69 -69
- letta/server/static_files/index.html +2 -2
- letta/settings.py +144 -114
- letta/utils.py +6 -1
- {letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241006104046.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241006104046.dist-info}/RECORD +32 -32
- letta/local_llm/groq/api.py +0 -97
- letta/server/static_files/assets/index-156816da.css +0 -1
- {letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241006104046.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241006104046.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241004104123.dist-info → letta_nightly-0.4.1.dev20241006104046.dist-info}/entry_points.txt +0 -0
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -9,7 +9,6 @@ from typing import List, Optional, Union
|
|
|
9
9
|
import requests
|
|
10
10
|
|
|
11
11
|
from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
12
|
-
from letta.credentials import LettaCredentials
|
|
13
12
|
from letta.llm_api.anthropic import anthropic_chat_completions_request
|
|
14
13
|
from letta.llm_api.azure_openai import (
|
|
15
14
|
MODEL_TO_AZURE_ENGINE,
|
|
@@ -29,6 +28,7 @@ from letta.local_llm.constants import (
|
|
|
29
28
|
INNER_THOUGHTS_KWARG,
|
|
30
29
|
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
31
30
|
)
|
|
31
|
+
from letta.providers import GoogleAIProvider
|
|
32
32
|
from letta.schemas.enums import OptionState
|
|
33
33
|
from letta.schemas.llm_config import LLMConfig
|
|
34
34
|
from letta.schemas.message import Message
|
|
@@ -37,14 +37,14 @@ from letta.schemas.openai.chat_completion_request import (
|
|
|
37
37
|
Tool,
|
|
38
38
|
cast_message_to_subtype,
|
|
39
39
|
)
|
|
40
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
40
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
|
|
41
41
|
from letta.streaming_interface import (
|
|
42
42
|
AgentChunkStreamingInterface,
|
|
43
43
|
AgentRefreshStreamingInterface,
|
|
44
44
|
)
|
|
45
45
|
from letta.utils import json_dumps
|
|
46
46
|
|
|
47
|
-
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local"]
|
|
47
|
+
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
# TODO update to use better types
|
|
@@ -83,7 +83,7 @@ def add_inner_thoughts_to_functions(
|
|
|
83
83
|
return new_functions
|
|
84
84
|
|
|
85
85
|
|
|
86
|
-
def
|
|
86
|
+
def unpack_all_inner_thoughts_from_kwargs(
|
|
87
87
|
response: ChatCompletionResponse,
|
|
88
88
|
inner_thoughts_key: str,
|
|
89
89
|
) -> ChatCompletionResponse:
|
|
@@ -93,36 +93,7 @@ def unpack_inner_thoughts_from_kwargs(
|
|
|
93
93
|
|
|
94
94
|
new_choices = []
|
|
95
95
|
for choice in response.choices:
|
|
96
|
-
|
|
97
|
-
if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) >= 1:
|
|
98
|
-
if len(msg.tool_calls) > 1:
|
|
99
|
-
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(msg.tool_calls)}) is not supported")
|
|
100
|
-
# TODO support multiple tool calls
|
|
101
|
-
tool_call = msg.tool_calls[0]
|
|
102
|
-
|
|
103
|
-
try:
|
|
104
|
-
# Sadly we need to parse the JSON since args are in string format
|
|
105
|
-
func_args = dict(json.loads(tool_call.function.arguments))
|
|
106
|
-
if inner_thoughts_key in func_args:
|
|
107
|
-
# extract the inner thoughts
|
|
108
|
-
inner_thoughts = func_args.pop(inner_thoughts_key)
|
|
109
|
-
|
|
110
|
-
# replace the kwargs
|
|
111
|
-
new_choice = choice.model_copy(deep=True)
|
|
112
|
-
new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
|
|
113
|
-
# also replace the message content
|
|
114
|
-
if new_choice.message.content is not None:
|
|
115
|
-
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
|
|
116
|
-
new_choice.message.content = inner_thoughts
|
|
117
|
-
|
|
118
|
-
# save copy
|
|
119
|
-
new_choices.append(new_choice)
|
|
120
|
-
else:
|
|
121
|
-
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
|
|
122
|
-
|
|
123
|
-
except json.JSONDecodeError as e:
|
|
124
|
-
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
|
125
|
-
raise e
|
|
96
|
+
new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
|
|
126
97
|
|
|
127
98
|
# return an updated copy
|
|
128
99
|
new_response = response.model_copy(deep=True)
|
|
@@ -130,6 +101,38 @@ def unpack_inner_thoughts_from_kwargs(
|
|
|
130
101
|
return new_response
|
|
131
102
|
|
|
132
103
|
|
|
104
|
+
def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
|
|
105
|
+
message = choice.message
|
|
106
|
+
if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
|
|
107
|
+
if len(message.tool_calls) > 1:
|
|
108
|
+
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
|
|
109
|
+
# TODO support multiple tool calls
|
|
110
|
+
tool_call = message.tool_calls[0]
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
# Sadly we need to parse the JSON since args are in string format
|
|
114
|
+
func_args = dict(json.loads(tool_call.function.arguments))
|
|
115
|
+
if inner_thoughts_key in func_args:
|
|
116
|
+
# extract the inner thoughts
|
|
117
|
+
inner_thoughts = func_args.pop(inner_thoughts_key)
|
|
118
|
+
|
|
119
|
+
# replace the kwargs
|
|
120
|
+
new_choice = choice.model_copy(deep=True)
|
|
121
|
+
new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
|
|
122
|
+
# also replace the message content
|
|
123
|
+
if new_choice.message.content is not None:
|
|
124
|
+
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
|
|
125
|
+
new_choice.message.content = inner_thoughts
|
|
126
|
+
|
|
127
|
+
return new_choice
|
|
128
|
+
else:
|
|
129
|
+
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
|
|
130
|
+
|
|
131
|
+
except json.JSONDecodeError as e:
|
|
132
|
+
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
|
133
|
+
raise e
|
|
134
|
+
|
|
135
|
+
|
|
133
136
|
def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
|
|
134
137
|
"""Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
|
|
135
138
|
from letta.utils import printd
|
|
@@ -246,15 +249,17 @@ def create(
|
|
|
246
249
|
# TODO move to llm_config?
|
|
247
250
|
# if unspecified (None), default to something we've tested
|
|
248
251
|
inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
|
|
252
|
+
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
|
249
253
|
) -> ChatCompletionResponse:
|
|
250
254
|
"""Return response to chat completion with backoff"""
|
|
251
255
|
from letta.utils import printd
|
|
252
256
|
|
|
253
|
-
|
|
257
|
+
if not model_settings:
|
|
258
|
+
from letta.settings import model_settings
|
|
254
259
|
|
|
255
|
-
|
|
260
|
+
model_settings = model_settings
|
|
256
261
|
|
|
257
|
-
|
|
262
|
+
printd(f"Using model {llm_config.model_endpoint_type}, endpoint: {llm_config.model_endpoint}")
|
|
258
263
|
|
|
259
264
|
if function_call and not functions:
|
|
260
265
|
printd("unsetting function_call because functions is None")
|
|
@@ -286,7 +291,7 @@ def create(
|
|
|
286
291
|
]
|
|
287
292
|
|
|
288
293
|
# TODO do the same for Azure?
|
|
289
|
-
if
|
|
294
|
+
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
|
|
290
295
|
# only is a problem if we are *not* using an openai proxy
|
|
291
296
|
raise ValueError(f"OpenAI key is missing from letta config file")
|
|
292
297
|
if use_tool_naming:
|
|
@@ -323,7 +328,7 @@ def create(
|
|
|
323
328
|
), type(stream_inferface)
|
|
324
329
|
response = openai_chat_completions_process_stream(
|
|
325
330
|
url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
|
|
326
|
-
api_key=
|
|
331
|
+
api_key=model_settings.openai_api_key,
|
|
327
332
|
chat_completion_request=data,
|
|
328
333
|
stream_inferface=stream_inferface,
|
|
329
334
|
)
|
|
@@ -332,10 +337,9 @@ def create(
|
|
|
332
337
|
if isinstance(stream_inferface, AgentChunkStreamingInterface):
|
|
333
338
|
stream_inferface.stream_start()
|
|
334
339
|
try:
|
|
335
|
-
|
|
336
340
|
response = openai_chat_completions_request(
|
|
337
341
|
url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
|
|
338
|
-
api_key=
|
|
342
|
+
api_key=model_settings.openai_api_key,
|
|
339
343
|
chat_completion_request=data,
|
|
340
344
|
)
|
|
341
345
|
finally:
|
|
@@ -343,7 +347,7 @@ def create(
|
|
|
343
347
|
stream_inferface.stream_end()
|
|
344
348
|
|
|
345
349
|
if inner_thoughts_in_kwargs:
|
|
346
|
-
response =
|
|
350
|
+
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
347
351
|
|
|
348
352
|
return response
|
|
349
353
|
|
|
@@ -353,7 +357,7 @@ def create(
|
|
|
353
357
|
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
354
358
|
|
|
355
359
|
azure_deployment = (
|
|
356
|
-
|
|
360
|
+
model_settings.azure_deployment if model_settings.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
|
|
357
361
|
)
|
|
358
362
|
if use_tool_naming:
|
|
359
363
|
data = dict(
|
|
@@ -374,10 +378,10 @@ def create(
|
|
|
374
378
|
user=str(user_id),
|
|
375
379
|
)
|
|
376
380
|
return azure_openai_chat_completions_request(
|
|
377
|
-
resource_name=
|
|
381
|
+
resource_name=model_settings.azure_endpoint,
|
|
378
382
|
deployment_id=azure_deployment,
|
|
379
|
-
api_version=
|
|
380
|
-
api_key=
|
|
383
|
+
api_version=model_settings.azure_version,
|
|
384
|
+
api_key=model_settings.azure_key,
|
|
381
385
|
data=data,
|
|
382
386
|
)
|
|
383
387
|
|
|
@@ -400,9 +404,9 @@ def create(
|
|
|
400
404
|
|
|
401
405
|
return google_ai_chat_completions_request(
|
|
402
406
|
inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg,
|
|
403
|
-
service_endpoint=
|
|
407
|
+
service_endpoint=GoogleAIProvider(model_settings.gemini_api_key).service_endpoint,
|
|
404
408
|
model=llm_config.model,
|
|
405
|
-
api_key=
|
|
409
|
+
api_key=model_settings.gemini_api_key,
|
|
406
410
|
# see structure of payload here: https://ai.google.dev/docs/function_calling
|
|
407
411
|
data=dict(
|
|
408
412
|
contents=[m.to_google_ai_dict() for m in messages],
|
|
@@ -424,7 +428,7 @@ def create(
|
|
|
424
428
|
|
|
425
429
|
return anthropic_chat_completions_request(
|
|
426
430
|
url=llm_config.model_endpoint,
|
|
427
|
-
api_key=
|
|
431
|
+
api_key=model_settings.anthropic_api_key,
|
|
428
432
|
data=ChatCompletionRequest(
|
|
429
433
|
model=llm_config.model,
|
|
430
434
|
messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
|
@@ -455,7 +459,7 @@ def create(
|
|
|
455
459
|
chat_completion_request=ChatCompletionRequest(
|
|
456
460
|
model="command-r-plus", # TODO
|
|
457
461
|
messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
|
458
|
-
tools=
|
|
462
|
+
tools=tools,
|
|
459
463
|
tool_choice=function_call,
|
|
460
464
|
# user=str(user_id),
|
|
461
465
|
# NOTE: max_tokens is required for Anthropic API
|
|
@@ -463,6 +467,60 @@ def create(
|
|
|
463
467
|
),
|
|
464
468
|
)
|
|
465
469
|
|
|
470
|
+
elif llm_config.model_endpoint_type == "groq":
|
|
471
|
+
if stream:
|
|
472
|
+
raise NotImplementedError(f"Streaming not yet implemented for Groq.")
|
|
473
|
+
|
|
474
|
+
if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
|
|
475
|
+
# only is a problem if we are *not* using an openai proxy
|
|
476
|
+
raise ValueError(f"Groq key is missing from letta config file")
|
|
477
|
+
|
|
478
|
+
# force to true for groq, since they don't support 'content' is non-null
|
|
479
|
+
inner_thoughts_in_kwargs = True
|
|
480
|
+
if inner_thoughts_in_kwargs:
|
|
481
|
+
functions = add_inner_thoughts_to_functions(
|
|
482
|
+
functions=functions,
|
|
483
|
+
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
484
|
+
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
|
|
488
|
+
data = ChatCompletionRequest(
|
|
489
|
+
model=llm_config.model,
|
|
490
|
+
messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs) for m in messages],
|
|
491
|
+
tools=tools,
|
|
492
|
+
tool_choice=function_call,
|
|
493
|
+
user=str(user_id),
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
# https://console.groq.com/docs/openai
|
|
497
|
+
# "The following fields are currently not supported and will result in a 400 error (yikes) if they are supplied:"
|
|
498
|
+
assert data.top_logprobs is None
|
|
499
|
+
assert data.logit_bias is None
|
|
500
|
+
assert data.logprobs == False
|
|
501
|
+
assert data.n == 1
|
|
502
|
+
# They mention that none of the messages can have names, but it seems to not error out (for now)
|
|
503
|
+
|
|
504
|
+
data.stream = False
|
|
505
|
+
if isinstance(stream_inferface, AgentChunkStreamingInterface):
|
|
506
|
+
stream_inferface.stream_start()
|
|
507
|
+
try:
|
|
508
|
+
# groq uses the openai chat completions API, so this component should be reusable
|
|
509
|
+
assert model_settings.groq_api_key is not None, "Groq key is missing"
|
|
510
|
+
response = openai_chat_completions_request(
|
|
511
|
+
url=llm_config.model_endpoint,
|
|
512
|
+
api_key=model_settings.groq_api_key,
|
|
513
|
+
chat_completion_request=data,
|
|
514
|
+
)
|
|
515
|
+
finally:
|
|
516
|
+
if isinstance(stream_inferface, AgentChunkStreamingInterface):
|
|
517
|
+
stream_inferface.stream_end()
|
|
518
|
+
|
|
519
|
+
if inner_thoughts_in_kwargs:
|
|
520
|
+
response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
521
|
+
|
|
522
|
+
return response
|
|
523
|
+
|
|
466
524
|
# local model
|
|
467
525
|
else:
|
|
468
526
|
if stream:
|
|
@@ -481,6 +539,6 @@ def create(
|
|
|
481
539
|
# hint
|
|
482
540
|
first_message=first_message,
|
|
483
541
|
# auth-related
|
|
484
|
-
auth_type=
|
|
485
|
-
auth_key=
|
|
542
|
+
auth_type=model_settings.openllm_auth_type,
|
|
543
|
+
auth_key=model_settings.openllm_api_key,
|
|
486
544
|
)
|
|
@@ -12,7 +12,6 @@ from letta.local_llm.grammars.gbnf_grammar_generator import (
|
|
|
12
12
|
create_dynamic_model_from_function,
|
|
13
13
|
generate_gbnf_grammar_and_documentation,
|
|
14
14
|
)
|
|
15
|
-
from letta.local_llm.groq.api import get_groq_completion
|
|
16
15
|
from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
|
|
17
16
|
from letta.local_llm.llamacpp.api import get_llamacpp_completion
|
|
18
17
|
from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
|
|
@@ -170,8 +169,6 @@ def get_chat_completion(
|
|
|
170
169
|
result, usage = get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
|
|
171
170
|
elif endpoint_type == "vllm":
|
|
172
171
|
result, usage = get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user)
|
|
173
|
-
elif endpoint_type == "groq":
|
|
174
|
-
result, usage = get_groq_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
|
|
175
172
|
else:
|
|
176
173
|
raise LocalLLMError(
|
|
177
174
|
f"Invalid endpoint type {endpoint_type}, please set variable depending on your backend (webui, lmstudio, llamacpp, koboldcpp)"
|
letta/main.py
CHANGED
|
@@ -14,7 +14,7 @@ import letta.system as system
|
|
|
14
14
|
# import benchmark
|
|
15
15
|
from letta import create_client
|
|
16
16
|
from letta.benchmark.benchmark import bench
|
|
17
|
-
from letta.cli.cli import delete_agent, open_folder,
|
|
17
|
+
from letta.cli.cli import delete_agent, open_folder, run, server, version
|
|
18
18
|
from letta.cli.cli_config import add, add_tool, configure, delete, list, list_tools
|
|
19
19
|
from letta.cli.cli_load import app as load_app
|
|
20
20
|
from letta.config import LettaConfig
|
|
@@ -38,7 +38,6 @@ app.command(name="list-tools")(list_tools)
|
|
|
38
38
|
app.command(name="delete")(delete)
|
|
39
39
|
app.command(name="server")(server)
|
|
40
40
|
app.command(name="folder")(open_folder)
|
|
41
|
-
app.command(name="quickstart")(quickstart)
|
|
42
41
|
# load data commands
|
|
43
42
|
app.add_typer(load_app, name="load")
|
|
44
43
|
# benchmark command
|
letta/metadata.py
CHANGED
|
@@ -151,6 +151,18 @@ class OrganizationModel(Base):
|
|
|
151
151
|
return Organization(id=self.id, name=self.name, created_at=self.created_at)
|
|
152
152
|
|
|
153
153
|
|
|
154
|
+
# TODO: eventually store providers?
|
|
155
|
+
# class Provider(Base):
|
|
156
|
+
# __tablename__ = "providers"
|
|
157
|
+
# __table_args__ = {"extend_existing": True}
|
|
158
|
+
#
|
|
159
|
+
# id = Column(String, primary_key=True)
|
|
160
|
+
# name = Column(String, nullable=False)
|
|
161
|
+
# created_at = Column(DateTime(timezone=True))
|
|
162
|
+
# api_key = Column(String, nullable=False)
|
|
163
|
+
# base_url = Column(String, nullable=False)
|
|
164
|
+
|
|
165
|
+
|
|
154
166
|
class APIKeyModel(Base):
|
|
155
167
|
"""Data model for authentication tokens. One-to-many relationship with UserModel (1 User - N tokens)."""
|
|
156
168
|
|
letta/providers.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
from letta.constants import LLM_MAX_TOKENS
|
|
6
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
|
7
|
+
from letta.schemas.llm_config import LLMConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Provider(BaseModel):
|
|
11
|
+
base_url: str
|
|
12
|
+
|
|
13
|
+
def list_llm_models(self):
|
|
14
|
+
return []
|
|
15
|
+
|
|
16
|
+
def list_embedding_models(self):
|
|
17
|
+
return []
|
|
18
|
+
|
|
19
|
+
def get_model_context_window(self, model_name: str):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class OpenAIProvider(Provider):
|
|
24
|
+
name: str = "openai"
|
|
25
|
+
api_key: str = Field(..., description="API key for the OpenAI API.")
|
|
26
|
+
base_url: str = "https://api.openai.com/v1"
|
|
27
|
+
|
|
28
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
29
|
+
from letta.llm_api.openai import openai_get_model_list
|
|
30
|
+
|
|
31
|
+
response = openai_get_model_list(self.base_url, api_key=self.api_key)
|
|
32
|
+
model_options = [obj["id"] for obj in response["data"]]
|
|
33
|
+
|
|
34
|
+
configs = []
|
|
35
|
+
for model_name in model_options:
|
|
36
|
+
context_window_size = self.get_model_context_window_size(model_name)
|
|
37
|
+
|
|
38
|
+
if not context_window_size:
|
|
39
|
+
continue
|
|
40
|
+
configs.append(
|
|
41
|
+
LLMConfig(model=model_name, model_endpoint_type="openai", model_endpoint=self.base_url, context_window=context_window_size)
|
|
42
|
+
)
|
|
43
|
+
return configs
|
|
44
|
+
|
|
45
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
46
|
+
|
|
47
|
+
# TODO: actually automatically list models
|
|
48
|
+
return [
|
|
49
|
+
EmbeddingConfig(
|
|
50
|
+
embedding_model="text-embedding-ada-002",
|
|
51
|
+
embedding_endpoint_type="openai",
|
|
52
|
+
embedding_endpoint="https://api.openai.com/v1",
|
|
53
|
+
embedding_dim=1536,
|
|
54
|
+
embedding_chunk_size=300,
|
|
55
|
+
)
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
def get_model_context_window_size(self, model_name: str):
|
|
59
|
+
if model_name in LLM_MAX_TOKENS:
|
|
60
|
+
return LLM_MAX_TOKENS[model_name]
|
|
61
|
+
else:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class AnthropicProvider(Provider):
|
|
66
|
+
name: str = "anthropic"
|
|
67
|
+
api_key: str = Field(..., description="API key for the Anthropic API.")
|
|
68
|
+
base_url: str = "https://api.anthropic.com/v1"
|
|
69
|
+
|
|
70
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
71
|
+
from letta.llm_api.anthropic import anthropic_get_model_list
|
|
72
|
+
|
|
73
|
+
models = anthropic_get_model_list(self.base_url, api_key=self.api_key)
|
|
74
|
+
|
|
75
|
+
configs = []
|
|
76
|
+
for model in models:
|
|
77
|
+
configs.append(
|
|
78
|
+
LLMConfig(
|
|
79
|
+
model=model["name"],
|
|
80
|
+
model_endpoint_type="anthropic",
|
|
81
|
+
model_endpoint=self.base_url,
|
|
82
|
+
context_window=model["context_window"],
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
return configs
|
|
86
|
+
|
|
87
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
88
|
+
return []
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class OllamaProvider(OpenAIProvider):
|
|
92
|
+
name: str = "ollama"
|
|
93
|
+
base_url: str = Field(..., description="Base URL for the Ollama API.")
|
|
94
|
+
api_key: Optional[str] = Field(None, description="API key for the Ollama API (default: `None`).")
|
|
95
|
+
|
|
96
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
97
|
+
# https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
|
|
98
|
+
import requests
|
|
99
|
+
|
|
100
|
+
response = requests.get(f"{self.base_url}/api/tags")
|
|
101
|
+
if response.status_code != 200:
|
|
102
|
+
raise Exception(f"Failed to list Ollama models: {response.text}")
|
|
103
|
+
response_json = response.json()
|
|
104
|
+
|
|
105
|
+
configs = []
|
|
106
|
+
for model in response_json["models"]:
|
|
107
|
+
context_window = self.get_model_context_window(model["name"])
|
|
108
|
+
configs.append(
|
|
109
|
+
LLMConfig(
|
|
110
|
+
model=model["name"],
|
|
111
|
+
model_endpoint_type="ollama",
|
|
112
|
+
model_endpoint=self.base_url,
|
|
113
|
+
context_window=context_window,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
return configs
|
|
117
|
+
|
|
118
|
+
def get_model_context_window(self, model_name: str):
|
|
119
|
+
|
|
120
|
+
import requests
|
|
121
|
+
|
|
122
|
+
response = requests.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True})
|
|
123
|
+
response_json = response.json()
|
|
124
|
+
|
|
125
|
+
# thank you vLLM: https://github.com/vllm-project/vllm/blob/main/vllm/config.py#L1675
|
|
126
|
+
possible_keys = [
|
|
127
|
+
# OPT
|
|
128
|
+
"max_position_embeddings",
|
|
129
|
+
# GPT-2
|
|
130
|
+
"n_positions",
|
|
131
|
+
# MPT
|
|
132
|
+
"max_seq_len",
|
|
133
|
+
# ChatGLM2
|
|
134
|
+
"seq_length",
|
|
135
|
+
# Command-R
|
|
136
|
+
"model_max_length",
|
|
137
|
+
# Others
|
|
138
|
+
"max_sequence_length",
|
|
139
|
+
"max_seq_length",
|
|
140
|
+
"seq_len",
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
# max_position_embeddings
|
|
144
|
+
# parse model cards: nous, dolphon, llama
|
|
145
|
+
for key, value in response_json["model_info"].items():
|
|
146
|
+
if "context_window" in key:
|
|
147
|
+
return value
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
151
|
+
# TODO: filter embedding models
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class GroqProvider(OpenAIProvider):
|
|
156
|
+
name: str = "groq"
|
|
157
|
+
base_url: str = "https://api.groq.com/openai/v1"
|
|
158
|
+
api_key: str = Field(..., description="API key for the Groq API.")
|
|
159
|
+
|
|
160
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
161
|
+
from letta.llm_api.openai import openai_get_model_list
|
|
162
|
+
|
|
163
|
+
response = openai_get_model_list(self.base_url, api_key=self.api_key)
|
|
164
|
+
configs = []
|
|
165
|
+
for model in response["data"]:
|
|
166
|
+
if not "context_window" in model:
|
|
167
|
+
continue
|
|
168
|
+
configs.append(
|
|
169
|
+
LLMConfig(
|
|
170
|
+
model=model["id"], model_endpoint_type="openai", model_endpoint=self.base_url, context_window=model["context_window"]
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
return configs
|
|
174
|
+
|
|
175
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
176
|
+
return []
|
|
177
|
+
|
|
178
|
+
def get_model_context_window_size(self, model_name: str):
|
|
179
|
+
raise NotImplementedError
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class GoogleAIProvider(Provider):
|
|
183
|
+
# gemini
|
|
184
|
+
api_key: str = Field(..., description="API key for the Google AI API.")
|
|
185
|
+
service_endpoint: str = "generativelanguage"
|
|
186
|
+
base_url: str = "https://generativelanguage.googleapis.com"
|
|
187
|
+
|
|
188
|
+
def list_llm_models(self):
|
|
189
|
+
from letta.llm_api.google_ai import google_ai_get_model_list
|
|
190
|
+
|
|
191
|
+
# TODO: use base_url instead
|
|
192
|
+
model_options = google_ai_get_model_list(service_endpoint=self.service_endpoint, api_key=self.api_key)
|
|
193
|
+
model_options = [str(m["name"]) for m in model_options]
|
|
194
|
+
model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
|
|
195
|
+
# TODO remove manual filtering for gemini-pro
|
|
196
|
+
model_options = [mo for mo in model_options if str(mo).startswith("gemini") and "-pro" in str(mo)]
|
|
197
|
+
# TODO: add context windows
|
|
198
|
+
# model_options = ["gemini-pro"]
|
|
199
|
+
|
|
200
|
+
configs = []
|
|
201
|
+
for model in model_options:
|
|
202
|
+
configs.append(
|
|
203
|
+
LLMConfig(
|
|
204
|
+
model=model,
|
|
205
|
+
model_endpoint_type="google_ai",
|
|
206
|
+
model_endpoint=self.base_url,
|
|
207
|
+
context_window=self.get_model_context_window(model),
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
return configs
|
|
211
|
+
|
|
212
|
+
def list_embedding_models(self):
|
|
213
|
+
return []
|
|
214
|
+
|
|
215
|
+
def get_model_context_window(self, model_name: str):
|
|
216
|
+
from letta.llm_api.google_ai import google_ai_get_model_context_window
|
|
217
|
+
|
|
218
|
+
# TODO: use base_url instead
|
|
219
|
+
return google_ai_get_model_context_window(self.service_endpoint, self.api_key, model_name)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class AzureProvider(Provider):
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class VLLMProvider(OpenAIProvider):
|
|
227
|
+
# NOTE: vLLM only serves one model at a time (so could configure that through env variables)
|
|
228
|
+
pass
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class CohereProvider(OpenAIProvider):
|
|
232
|
+
pass
|
letta/schemas/block.py
CHANGED
|
@@ -53,7 +53,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
|
|
|
53
53
|
super().__setattr__(name, value)
|
|
54
54
|
if name == "value":
|
|
55
55
|
# run validation
|
|
56
|
-
self.__class__.
|
|
56
|
+
self.__class__.model_validate(self.model_dump(exclude_unset=True))
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
class Block(BaseBlock):
|
letta/schemas/letta_request.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import List
|
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, Field
|
|
4
4
|
|
|
5
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
5
6
|
from letta.schemas.message import MessageCreate
|
|
6
7
|
|
|
7
8
|
|
|
@@ -21,3 +22,19 @@ class LettaRequest(BaseModel):
|
|
|
21
22
|
default=False,
|
|
22
23
|
description="Set True to return the raw Message object. Set False to return the Message in the format of the Letta API.",
|
|
23
24
|
)
|
|
25
|
+
|
|
26
|
+
# Flags to support the use of AssistantMessage message types
|
|
27
|
+
|
|
28
|
+
use_assistant_message: bool = Field(
|
|
29
|
+
default=False,
|
|
30
|
+
description="[Only applicable if return_message_object is False] If true, returns AssistantMessage objects when the agent calls a designated message tool. If false, return FunctionCallMessage objects for all tool calls.",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
assistant_message_function_name: str = Field(
|
|
34
|
+
default=DEFAULT_MESSAGE_TOOL,
|
|
35
|
+
description="[Only applicable if use_assistant_message is True] The name of the designated message tool.",
|
|
36
|
+
)
|
|
37
|
+
assistant_message_function_kwarg: str = Field(
|
|
38
|
+
default=DEFAULT_MESSAGE_TOOL_KWARG,
|
|
39
|
+
description="[Only applicable if use_assistant_message is True] The name of the message argument in the designated message tool.",
|
|
40
|
+
)
|
letta/schemas/letta_response.py
CHANGED
|
@@ -6,6 +6,7 @@ from letta.schemas.enums import MessageStreamStatus
|
|
|
6
6
|
from letta.schemas.letta_message import LettaMessage
|
|
7
7
|
from letta.schemas.message import Message
|
|
8
8
|
from letta.schemas.usage import LettaUsageStatistics
|
|
9
|
+
from letta.utils import json_dumps
|
|
9
10
|
|
|
10
11
|
# TODO: consider moving into own file
|
|
11
12
|
|
|
@@ -23,6 +24,16 @@ class LettaResponse(BaseModel):
|
|
|
23
24
|
messages: Union[List[Message], List[LettaMessage]] = Field(..., description="The messages returned by the agent.")
|
|
24
25
|
usage: LettaUsageStatistics = Field(..., description="The usage statistics of the agent.")
|
|
25
26
|
|
|
27
|
+
def __str__(self):
|
|
28
|
+
return json_dumps(
|
|
29
|
+
{
|
|
30
|
+
"messages": [message.model_dump() for message in self.messages],
|
|
31
|
+
# Assume `Message` and `LettaMessage` have a `dict()` method
|
|
32
|
+
"usage": self.usage.model_dump(), # Assume `LettaUsageStatistics` has a `dict()` method
|
|
33
|
+
},
|
|
34
|
+
indent=4,
|
|
35
|
+
)
|
|
36
|
+
|
|
26
37
|
|
|
27
38
|
# The streaming response is either [DONE], [DONE_STEP], [DONE], an error, or a LettaMessage
|
|
28
39
|
LettaStreamingResponse = Union[LettaMessage, MessageStreamStatus]
|
letta/schemas/llm_config.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Literal, Optional
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel, ConfigDict, Field
|
|
4
4
|
|
|
@@ -17,7 +17,23 @@ class LLMConfig(BaseModel):
|
|
|
17
17
|
|
|
18
18
|
# TODO: 🤮 don't default to a vendor! bug city!
|
|
19
19
|
model: str = Field(..., description="LLM model name. ")
|
|
20
|
-
model_endpoint_type:
|
|
20
|
+
model_endpoint_type: Literal[
|
|
21
|
+
"openai",
|
|
22
|
+
"anthropic",
|
|
23
|
+
"cohere",
|
|
24
|
+
"google_ai",
|
|
25
|
+
"azure",
|
|
26
|
+
"groq",
|
|
27
|
+
"ollama",
|
|
28
|
+
"webui",
|
|
29
|
+
"webui-legacy",
|
|
30
|
+
"lmstudio",
|
|
31
|
+
"lmstudio-legacy",
|
|
32
|
+
"llamacpp",
|
|
33
|
+
"koboldcpp",
|
|
34
|
+
"vllm",
|
|
35
|
+
"hugging-face",
|
|
36
|
+
] = Field(..., description="The endpoint type for the model.")
|
|
21
37
|
model_endpoint: str = Field(..., description="The endpoint for the model.")
|
|
22
38
|
model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
|
|
23
39
|
context_window: int = Field(..., description="The context window size for the model.")
|