letta-nightly 0.4.1.dev20241004012408__py3-none-any.whl → 0.4.1.dev20241005104008__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (34) hide show
  1. letta/cli/cli.py +30 -365
  2. letta/cli/cli_config.py +70 -27
  3. letta/client/client.py +103 -11
  4. letta/config.py +80 -80
  5. letta/constants.py +6 -0
  6. letta/credentials.py +10 -1
  7. letta/errors.py +63 -5
  8. letta/llm_api/llm_api_tools.py +110 -52
  9. letta/local_llm/chat_completion_proxy.py +0 -3
  10. letta/main.py +1 -2
  11. letta/metadata.py +12 -0
  12. letta/providers.py +232 -0
  13. letta/schemas/block.py +1 -1
  14. letta/schemas/letta_request.py +17 -0
  15. letta/schemas/letta_response.py +11 -0
  16. letta/schemas/llm_config.py +18 -2
  17. letta/schemas/message.py +40 -13
  18. letta/server/rest_api/app.py +5 -0
  19. letta/server/rest_api/interface.py +115 -24
  20. letta/server/rest_api/routers/v1/agents.py +36 -3
  21. letta/server/rest_api/routers/v1/llms.py +6 -2
  22. letta/server/server.py +60 -87
  23. letta/server/static_files/assets/index-3ab03d5b.css +1 -0
  24. letta/server/static_files/assets/{index-4d08d8a3.js → index-9a9c449b.js} +69 -69
  25. letta/server/static_files/index.html +2 -2
  26. letta/settings.py +144 -114
  27. letta/utils.py +6 -1
  28. {letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/METADATA +1 -1
  29. {letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/RECORD +32 -32
  30. letta/local_llm/groq/api.py +0 -97
  31. letta/server/static_files/assets/index-156816da.css +0 -1
  32. {letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/LICENSE +0 -0
  33. {letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/WHEEL +0 -0
  34. {letta_nightly-0.4.1.dev20241004012408.dist-info → letta_nightly-0.4.1.dev20241005104008.dist-info}/entry_points.txt +0 -0
@@ -9,7 +9,6 @@ from typing import List, Optional, Union
9
9
  import requests
10
10
 
11
11
  from letta.constants import CLI_WARNING_PREFIX, OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
12
- from letta.credentials import LettaCredentials
13
12
  from letta.llm_api.anthropic import anthropic_chat_completions_request
14
13
  from letta.llm_api.azure_openai import (
15
14
  MODEL_TO_AZURE_ENGINE,
@@ -29,6 +28,7 @@ from letta.local_llm.constants import (
29
28
  INNER_THOUGHTS_KWARG,
30
29
  INNER_THOUGHTS_KWARG_DESCRIPTION,
31
30
  )
31
+ from letta.providers import GoogleAIProvider
32
32
  from letta.schemas.enums import OptionState
33
33
  from letta.schemas.llm_config import LLMConfig
34
34
  from letta.schemas.message import Message
@@ -37,14 +37,14 @@ from letta.schemas.openai.chat_completion_request import (
37
37
  Tool,
38
38
  cast_message_to_subtype,
39
39
  )
40
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
40
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
41
41
  from letta.streaming_interface import (
42
42
  AgentChunkStreamingInterface,
43
43
  AgentRefreshStreamingInterface,
44
44
  )
45
45
  from letta.utils import json_dumps
46
46
 
47
- LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local"]
47
+ LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
48
48
 
49
49
 
50
50
  # TODO update to use better types
@@ -83,7 +83,7 @@ def add_inner_thoughts_to_functions(
83
83
  return new_functions
84
84
 
85
85
 
86
- def unpack_inner_thoughts_from_kwargs(
86
+ def unpack_all_inner_thoughts_from_kwargs(
87
87
  response: ChatCompletionResponse,
88
88
  inner_thoughts_key: str,
89
89
  ) -> ChatCompletionResponse:
@@ -93,36 +93,7 @@ def unpack_inner_thoughts_from_kwargs(
93
93
 
94
94
  new_choices = []
95
95
  for choice in response.choices:
96
- msg = choice.message
97
- if msg.role == "assistant" and msg.tool_calls and len(msg.tool_calls) >= 1:
98
- if len(msg.tool_calls) > 1:
99
- warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(msg.tool_calls)}) is not supported")
100
- # TODO support multiple tool calls
101
- tool_call = msg.tool_calls[0]
102
-
103
- try:
104
- # Sadly we need to parse the JSON since args are in string format
105
- func_args = dict(json.loads(tool_call.function.arguments))
106
- if inner_thoughts_key in func_args:
107
- # extract the inner thoughts
108
- inner_thoughts = func_args.pop(inner_thoughts_key)
109
-
110
- # replace the kwargs
111
- new_choice = choice.model_copy(deep=True)
112
- new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
113
- # also replace the message content
114
- if new_choice.message.content is not None:
115
- warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
116
- new_choice.message.content = inner_thoughts
117
-
118
- # save copy
119
- new_choices.append(new_choice)
120
- else:
121
- warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
122
-
123
- except json.JSONDecodeError as e:
124
- warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
125
- raise e
96
+ new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
126
97
 
127
98
  # return an updated copy
128
99
  new_response = response.model_copy(deep=True)
@@ -130,6 +101,38 @@ def unpack_inner_thoughts_from_kwargs(
130
101
  return new_response
131
102
 
132
103
 
104
+ def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
105
+ message = choice.message
106
+ if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
107
+ if len(message.tool_calls) > 1:
108
+ warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
109
+ # TODO support multiple tool calls
110
+ tool_call = message.tool_calls[0]
111
+
112
+ try:
113
+ # Sadly we need to parse the JSON since args are in string format
114
+ func_args = dict(json.loads(tool_call.function.arguments))
115
+ if inner_thoughts_key in func_args:
116
+ # extract the inner thoughts
117
+ inner_thoughts = func_args.pop(inner_thoughts_key)
118
+
119
+ # replace the kwargs
120
+ new_choice = choice.model_copy(deep=True)
121
+ new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
122
+ # also replace the message content
123
+ if new_choice.message.content is not None:
124
+ warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
125
+ new_choice.message.content = inner_thoughts
126
+
127
+ return new_choice
128
+ else:
129
+ warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
130
+
131
+ except json.JSONDecodeError as e:
132
+ warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
133
+ raise e
134
+
135
+
133
136
  def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
134
137
  """Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
135
138
  from letta.utils import printd
@@ -246,15 +249,17 @@ def create(
246
249
  # TODO move to llm_config?
247
250
  # if unspecified (None), default to something we've tested
248
251
  inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
252
+ model_settings: Optional[dict] = None, # TODO: eventually pass from server
249
253
  ) -> ChatCompletionResponse:
250
254
  """Return response to chat completion with backoff"""
251
255
  from letta.utils import printd
252
256
 
253
- printd(f"Using model {llm_config.model_endpoint_type}, endpoint: {llm_config.model_endpoint}")
257
+ if not model_settings:
258
+ from letta.settings import model_settings
254
259
 
255
- # TODO eventually refactor so that credentials are passed through
260
+ model_settings = model_settings
256
261
 
257
- credentials = LettaCredentials.load()
262
+ printd(f"Using model {llm_config.model_endpoint_type}, endpoint: {llm_config.model_endpoint}")
258
263
 
259
264
  if function_call and not functions:
260
265
  printd("unsetting function_call because functions is None")
@@ -286,7 +291,7 @@ def create(
286
291
  ]
287
292
 
288
293
  # TODO do the same for Azure?
289
- if credentials.openai_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
294
+ if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
290
295
  # only is a problem if we are *not* using an openai proxy
291
296
  raise ValueError(f"OpenAI key is missing from letta config file")
292
297
  if use_tool_naming:
@@ -323,7 +328,7 @@ def create(
323
328
  ), type(stream_inferface)
324
329
  response = openai_chat_completions_process_stream(
325
330
  url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
326
- api_key=credentials.openai_key,
331
+ api_key=model_settings.openai_api_key,
327
332
  chat_completion_request=data,
328
333
  stream_inferface=stream_inferface,
329
334
  )
@@ -332,10 +337,9 @@ def create(
332
337
  if isinstance(stream_inferface, AgentChunkStreamingInterface):
333
338
  stream_inferface.stream_start()
334
339
  try:
335
-
336
340
  response = openai_chat_completions_request(
337
341
  url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
338
- api_key=credentials.openai_key,
342
+ api_key=model_settings.openai_api_key,
339
343
  chat_completion_request=data,
340
344
  )
341
345
  finally:
@@ -343,7 +347,7 @@ def create(
343
347
  stream_inferface.stream_end()
344
348
 
345
349
  if inner_thoughts_in_kwargs:
346
- response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
350
+ response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
347
351
 
348
352
  return response
349
353
 
@@ -353,7 +357,7 @@ def create(
353
357
  raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
354
358
 
355
359
  azure_deployment = (
356
- credentials.azure_deployment if credentials.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
360
+ model_settings.azure_deployment if model_settings.azure_deployment is not None else MODEL_TO_AZURE_ENGINE[llm_config.model]
357
361
  )
358
362
  if use_tool_naming:
359
363
  data = dict(
@@ -374,10 +378,10 @@ def create(
374
378
  user=str(user_id),
375
379
  )
376
380
  return azure_openai_chat_completions_request(
377
- resource_name=credentials.azure_endpoint,
381
+ resource_name=model_settings.azure_endpoint,
378
382
  deployment_id=azure_deployment,
379
- api_version=credentials.azure_version,
380
- api_key=credentials.azure_key,
383
+ api_version=model_settings.azure_version,
384
+ api_key=model_settings.azure_key,
381
385
  data=data,
382
386
  )
383
387
 
@@ -400,9 +404,9 @@ def create(
400
404
 
401
405
  return google_ai_chat_completions_request(
402
406
  inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg,
403
- service_endpoint=credentials.google_ai_service_endpoint,
407
+ service_endpoint=GoogleAIProvider(model_settings.gemini_api_key).service_endpoint,
404
408
  model=llm_config.model,
405
- api_key=credentials.google_ai_key,
409
+ api_key=model_settings.gemini_api_key,
406
410
  # see structure of payload here: https://ai.google.dev/docs/function_calling
407
411
  data=dict(
408
412
  contents=[m.to_google_ai_dict() for m in messages],
@@ -424,7 +428,7 @@ def create(
424
428
 
425
429
  return anthropic_chat_completions_request(
426
430
  url=llm_config.model_endpoint,
427
- api_key=credentials.anthropic_key,
431
+ api_key=model_settings.anthropic_api_key,
428
432
  data=ChatCompletionRequest(
429
433
  model=llm_config.model,
430
434
  messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
@@ -455,7 +459,7 @@ def create(
455
459
  chat_completion_request=ChatCompletionRequest(
456
460
  model="command-r-plus", # TODO
457
461
  messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
458
- tools=[{"type": "function", "function": f} for f in functions] if functions else None,
462
+ tools=tools,
459
463
  tool_choice=function_call,
460
464
  # user=str(user_id),
461
465
  # NOTE: max_tokens is required for Anthropic API
@@ -463,6 +467,60 @@ def create(
463
467
  ),
464
468
  )
465
469
 
470
+ elif llm_config.model_endpoint_type == "groq":
471
+ if stream:
472
+ raise NotImplementedError(f"Streaming not yet implemented for Groq.")
473
+
474
+ if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
475
+ # only is a problem if we are *not* using an openai proxy
476
+ raise ValueError(f"Groq key is missing from letta config file")
477
+
478
+ # force to true for groq, since they don't support 'content' is non-null
479
+ inner_thoughts_in_kwargs = True
480
+ if inner_thoughts_in_kwargs:
481
+ functions = add_inner_thoughts_to_functions(
482
+ functions=functions,
483
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
484
+ inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
485
+ )
486
+
487
+ tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
488
+ data = ChatCompletionRequest(
489
+ model=llm_config.model,
490
+ messages=[m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs) for m in messages],
491
+ tools=tools,
492
+ tool_choice=function_call,
493
+ user=str(user_id),
494
+ )
495
+
496
+ # https://console.groq.com/docs/openai
497
+ # "The following fields are currently not supported and will result in a 400 error (yikes) if they are supplied:"
498
+ assert data.top_logprobs is None
499
+ assert data.logit_bias is None
500
+ assert data.logprobs == False
501
+ assert data.n == 1
502
+ # They mention that none of the messages can have names, but it seems to not error out (for now)
503
+
504
+ data.stream = False
505
+ if isinstance(stream_inferface, AgentChunkStreamingInterface):
506
+ stream_inferface.stream_start()
507
+ try:
508
+ # groq uses the openai chat completions API, so this component should be reusable
509
+ assert model_settings.groq_api_key is not None, "Groq key is missing"
510
+ response = openai_chat_completions_request(
511
+ url=llm_config.model_endpoint,
512
+ api_key=model_settings.groq_api_key,
513
+ chat_completion_request=data,
514
+ )
515
+ finally:
516
+ if isinstance(stream_inferface, AgentChunkStreamingInterface):
517
+ stream_inferface.stream_end()
518
+
519
+ if inner_thoughts_in_kwargs:
520
+ response = unpack_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
521
+
522
+ return response
523
+
466
524
  # local model
467
525
  else:
468
526
  if stream:
@@ -481,6 +539,6 @@ def create(
481
539
  # hint
482
540
  first_message=first_message,
483
541
  # auth-related
484
- auth_type=credentials.openllm_auth_type,
485
- auth_key=credentials.openllm_key,
542
+ auth_type=model_settings.openllm_auth_type,
543
+ auth_key=model_settings.openllm_api_key,
486
544
  )
@@ -12,7 +12,6 @@ from letta.local_llm.grammars.gbnf_grammar_generator import (
12
12
  create_dynamic_model_from_function,
13
13
  generate_gbnf_grammar_and_documentation,
14
14
  )
15
- from letta.local_llm.groq.api import get_groq_completion
16
15
  from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
17
16
  from letta.local_llm.llamacpp.api import get_llamacpp_completion
18
17
  from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
@@ -170,8 +169,6 @@ def get_chat_completion(
170
169
  result, usage = get_ollama_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
171
170
  elif endpoint_type == "vllm":
172
171
  result, usage = get_vllm_completion(endpoint, auth_type, auth_key, model, prompt, context_window, user)
173
- elif endpoint_type == "groq":
174
- result, usage = get_groq_completion(endpoint, auth_type, auth_key, model, prompt, context_window)
175
172
  else:
176
173
  raise LocalLLMError(
177
174
  f"Invalid endpoint type {endpoint_type}, please set variable depending on your backend (webui, lmstudio, llamacpp, koboldcpp)"
letta/main.py CHANGED
@@ -14,7 +14,7 @@ import letta.system as system
14
14
  # import benchmark
15
15
  from letta import create_client
16
16
  from letta.benchmark.benchmark import bench
17
- from letta.cli.cli import delete_agent, open_folder, quickstart, run, server, version
17
+ from letta.cli.cli import delete_agent, open_folder, run, server, version
18
18
  from letta.cli.cli_config import add, add_tool, configure, delete, list, list_tools
19
19
  from letta.cli.cli_load import app as load_app
20
20
  from letta.config import LettaConfig
@@ -38,7 +38,6 @@ app.command(name="list-tools")(list_tools)
38
38
  app.command(name="delete")(delete)
39
39
  app.command(name="server")(server)
40
40
  app.command(name="folder")(open_folder)
41
- app.command(name="quickstart")(quickstart)
42
41
  # load data commands
43
42
  app.add_typer(load_app, name="load")
44
43
  # benchmark command
letta/metadata.py CHANGED
@@ -151,6 +151,18 @@ class OrganizationModel(Base):
151
151
  return Organization(id=self.id, name=self.name, created_at=self.created_at)
152
152
 
153
153
 
154
+ # TODO: eventually store providers?
155
+ # class Provider(Base):
156
+ # __tablename__ = "providers"
157
+ # __table_args__ = {"extend_existing": True}
158
+ #
159
+ # id = Column(String, primary_key=True)
160
+ # name = Column(String, nullable=False)
161
+ # created_at = Column(DateTime(timezone=True))
162
+ # api_key = Column(String, nullable=False)
163
+ # base_url = Column(String, nullable=False)
164
+
165
+
154
166
  class APIKeyModel(Base):
155
167
  """Data model for authentication tokens. One-to-many relationship with UserModel (1 User - N tokens)."""
156
168
 
letta/providers.py ADDED
@@ -0,0 +1,232 @@
1
+ from typing import List, Optional
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from letta.constants import LLM_MAX_TOKENS
6
+ from letta.schemas.embedding_config import EmbeddingConfig
7
+ from letta.schemas.llm_config import LLMConfig
8
+
9
+
10
+ class Provider(BaseModel):
11
+ base_url: str
12
+
13
+ def list_llm_models(self):
14
+ return []
15
+
16
+ def list_embedding_models(self):
17
+ return []
18
+
19
+ def get_model_context_window(self, model_name: str):
20
+ pass
21
+
22
+
23
+ class OpenAIProvider(Provider):
24
+ name: str = "openai"
25
+ api_key: str = Field(..., description="API key for the OpenAI API.")
26
+ base_url: str = "https://api.openai.com/v1"
27
+
28
+ def list_llm_models(self) -> List[LLMConfig]:
29
+ from letta.llm_api.openai import openai_get_model_list
30
+
31
+ response = openai_get_model_list(self.base_url, api_key=self.api_key)
32
+ model_options = [obj["id"] for obj in response["data"]]
33
+
34
+ configs = []
35
+ for model_name in model_options:
36
+ context_window_size = self.get_model_context_window_size(model_name)
37
+
38
+ if not context_window_size:
39
+ continue
40
+ configs.append(
41
+ LLMConfig(model=model_name, model_endpoint_type="openai", model_endpoint=self.base_url, context_window=context_window_size)
42
+ )
43
+ return configs
44
+
45
+ def list_embedding_models(self) -> List[EmbeddingConfig]:
46
+
47
+ # TODO: actually automatically list models
48
+ return [
49
+ EmbeddingConfig(
50
+ embedding_model="text-embedding-ada-002",
51
+ embedding_endpoint_type="openai",
52
+ embedding_endpoint="https://api.openai.com/v1",
53
+ embedding_dim=1536,
54
+ embedding_chunk_size=300,
55
+ )
56
+ ]
57
+
58
+ def get_model_context_window_size(self, model_name: str):
59
+ if model_name in LLM_MAX_TOKENS:
60
+ return LLM_MAX_TOKENS[model_name]
61
+ else:
62
+ return None
63
+
64
+
65
+ class AnthropicProvider(Provider):
66
+ name: str = "anthropic"
67
+ api_key: str = Field(..., description="API key for the Anthropic API.")
68
+ base_url: str = "https://api.anthropic.com/v1"
69
+
70
+ def list_llm_models(self) -> List[LLMConfig]:
71
+ from letta.llm_api.anthropic import anthropic_get_model_list
72
+
73
+ models = anthropic_get_model_list(self.base_url, api_key=self.api_key)
74
+
75
+ configs = []
76
+ for model in models:
77
+ configs.append(
78
+ LLMConfig(
79
+ model=model["name"],
80
+ model_endpoint_type="anthropic",
81
+ model_endpoint=self.base_url,
82
+ context_window=model["context_window"],
83
+ )
84
+ )
85
+ return configs
86
+
87
+ def list_embedding_models(self) -> List[EmbeddingConfig]:
88
+ return []
89
+
90
+
91
+ class OllamaProvider(OpenAIProvider):
92
+ name: str = "ollama"
93
+ base_url: str = Field(..., description="Base URL for the Ollama API.")
94
+ api_key: Optional[str] = Field(None, description="API key for the Ollama API (default: `None`).")
95
+
96
+ def list_llm_models(self) -> List[LLMConfig]:
97
+ # https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
98
+ import requests
99
+
100
+ response = requests.get(f"{self.base_url}/api/tags")
101
+ if response.status_code != 200:
102
+ raise Exception(f"Failed to list Ollama models: {response.text}")
103
+ response_json = response.json()
104
+
105
+ configs = []
106
+ for model in response_json["models"]:
107
+ context_window = self.get_model_context_window(model["name"])
108
+ configs.append(
109
+ LLMConfig(
110
+ model=model["name"],
111
+ model_endpoint_type="ollama",
112
+ model_endpoint=self.base_url,
113
+ context_window=context_window,
114
+ )
115
+ )
116
+ return configs
117
+
118
+ def get_model_context_window(self, model_name: str):
119
+
120
+ import requests
121
+
122
+ response = requests.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True})
123
+ response_json = response.json()
124
+
125
+ # thank you vLLM: https://github.com/vllm-project/vllm/blob/main/vllm/config.py#L1675
126
+ possible_keys = [
127
+ # OPT
128
+ "max_position_embeddings",
129
+ # GPT-2
130
+ "n_positions",
131
+ # MPT
132
+ "max_seq_len",
133
+ # ChatGLM2
134
+ "seq_length",
135
+ # Command-R
136
+ "model_max_length",
137
+ # Others
138
+ "max_sequence_length",
139
+ "max_seq_length",
140
+ "seq_len",
141
+ ]
142
+
143
+ # max_position_embeddings
144
+ # parse model cards: nous, dolphon, llama
145
+ for key, value in response_json["model_info"].items():
146
+ if "context_window" in key:
147
+ return value
148
+ return None
149
+
150
+ def list_embedding_models(self) -> List[EmbeddingConfig]:
151
+ # TODO: filter embedding models
152
+ return []
153
+
154
+
155
+ class GroqProvider(OpenAIProvider):
156
+ name: str = "groq"
157
+ base_url: str = "https://api.groq.com/openai/v1"
158
+ api_key: str = Field(..., description="API key for the Groq API.")
159
+
160
+ def list_llm_models(self) -> List[LLMConfig]:
161
+ from letta.llm_api.openai import openai_get_model_list
162
+
163
+ response = openai_get_model_list(self.base_url, api_key=self.api_key)
164
+ configs = []
165
+ for model in response["data"]:
166
+ if not "context_window" in model:
167
+ continue
168
+ configs.append(
169
+ LLMConfig(
170
+ model=model["id"], model_endpoint_type="openai", model_endpoint=self.base_url, context_window=model["context_window"]
171
+ )
172
+ )
173
+ return configs
174
+
175
+ def list_embedding_models(self) -> List[EmbeddingConfig]:
176
+ return []
177
+
178
+ def get_model_context_window_size(self, model_name: str):
179
+ raise NotImplementedError
180
+
181
+
182
+ class GoogleAIProvider(Provider):
183
+ # gemini
184
+ api_key: str = Field(..., description="API key for the Google AI API.")
185
+ service_endpoint: str = "generativelanguage"
186
+ base_url: str = "https://generativelanguage.googleapis.com"
187
+
188
+ def list_llm_models(self):
189
+ from letta.llm_api.google_ai import google_ai_get_model_list
190
+
191
+ # TODO: use base_url instead
192
+ model_options = google_ai_get_model_list(service_endpoint=self.service_endpoint, api_key=self.api_key)
193
+ model_options = [str(m["name"]) for m in model_options]
194
+ model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
195
+ # TODO remove manual filtering for gemini-pro
196
+ model_options = [mo for mo in model_options if str(mo).startswith("gemini") and "-pro" in str(mo)]
197
+ # TODO: add context windows
198
+ # model_options = ["gemini-pro"]
199
+
200
+ configs = []
201
+ for model in model_options:
202
+ configs.append(
203
+ LLMConfig(
204
+ model=model,
205
+ model_endpoint_type="google_ai",
206
+ model_endpoint=self.base_url,
207
+ context_window=self.get_model_context_window(model),
208
+ )
209
+ )
210
+ return configs
211
+
212
+ def list_embedding_models(self):
213
+ return []
214
+
215
+ def get_model_context_window(self, model_name: str):
216
+ from letta.llm_api.google_ai import google_ai_get_model_context_window
217
+
218
+ # TODO: use base_url instead
219
+ return google_ai_get_model_context_window(self.service_endpoint, self.api_key, model_name)
220
+
221
+
222
+ class AzureProvider(Provider):
223
+ pass
224
+
225
+
226
+ class VLLMProvider(OpenAIProvider):
227
+ # NOTE: vLLM only serves one model at a time (so could configure that through env variables)
228
+ pass
229
+
230
+
231
+ class CohereProvider(OpenAIProvider):
232
+ pass
letta/schemas/block.py CHANGED
@@ -53,7 +53,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
53
53
  super().__setattr__(name, value)
54
54
  if name == "value":
55
55
  # run validation
56
- self.__class__.validate(self.dict(exclude_unset=True))
56
+ self.__class__.model_validate(self.model_dump(exclude_unset=True))
57
57
 
58
58
 
59
59
  class Block(BaseBlock):
@@ -2,6 +2,7 @@ from typing import List
2
2
 
3
3
  from pydantic import BaseModel, Field
4
4
 
5
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
5
6
  from letta.schemas.message import MessageCreate
6
7
 
7
8
 
@@ -21,3 +22,19 @@ class LettaRequest(BaseModel):
21
22
  default=False,
22
23
  description="Set True to return the raw Message object. Set False to return the Message in the format of the Letta API.",
23
24
  )
25
+
26
+ # Flags to support the use of AssistantMessage message types
27
+
28
+ use_assistant_message: bool = Field(
29
+ default=False,
30
+ description="[Only applicable if return_message_object is False] If true, returns AssistantMessage objects when the agent calls a designated message tool. If false, return FunctionCallMessage objects for all tool calls.",
31
+ )
32
+
33
+ assistant_message_function_name: str = Field(
34
+ default=DEFAULT_MESSAGE_TOOL,
35
+ description="[Only applicable if use_assistant_message is True] The name of the designated message tool.",
36
+ )
37
+ assistant_message_function_kwarg: str = Field(
38
+ default=DEFAULT_MESSAGE_TOOL_KWARG,
39
+ description="[Only applicable if use_assistant_message is True] The name of the message argument in the designated message tool.",
40
+ )
@@ -6,6 +6,7 @@ from letta.schemas.enums import MessageStreamStatus
6
6
  from letta.schemas.letta_message import LettaMessage
7
7
  from letta.schemas.message import Message
8
8
  from letta.schemas.usage import LettaUsageStatistics
9
+ from letta.utils import json_dumps
9
10
 
10
11
  # TODO: consider moving into own file
11
12
 
@@ -23,6 +24,16 @@ class LettaResponse(BaseModel):
23
24
  messages: Union[List[Message], List[LettaMessage]] = Field(..., description="The messages returned by the agent.")
24
25
  usage: LettaUsageStatistics = Field(..., description="The usage statistics of the agent.")
25
26
 
27
+ def __str__(self):
28
+ return json_dumps(
29
+ {
30
+ "messages": [message.model_dump() for message in self.messages],
31
+ # Assume `Message` and `LettaMessage` have a `dict()` method
32
+ "usage": self.usage.model_dump(), # Assume `LettaUsageStatistics` has a `dict()` method
33
+ },
34
+ indent=4,
35
+ )
36
+
26
37
 
27
38
  # The streaming response is either [DONE], [DONE_STEP], [DONE], an error, or a LettaMessage
28
39
  LettaStreamingResponse = Union[LettaMessage, MessageStreamStatus]
@@ -1,4 +1,4 @@
1
- from typing import Optional
1
+ from typing import Literal, Optional
2
2
 
3
3
  from pydantic import BaseModel, ConfigDict, Field
4
4
 
@@ -17,7 +17,23 @@ class LLMConfig(BaseModel):
17
17
 
18
18
  # TODO: 🤮 don't default to a vendor! bug city!
19
19
  model: str = Field(..., description="LLM model name. ")
20
- model_endpoint_type: str = Field(..., description="The endpoint type for the model.")
20
+ model_endpoint_type: Literal[
21
+ "openai",
22
+ "anthropic",
23
+ "cohere",
24
+ "google_ai",
25
+ "azure",
26
+ "groq",
27
+ "ollama",
28
+ "webui",
29
+ "webui-legacy",
30
+ "lmstudio",
31
+ "lmstudio-legacy",
32
+ "llamacpp",
33
+ "koboldcpp",
34
+ "vllm",
35
+ "hugging-face",
36
+ ] = Field(..., description="The endpoint type for the model.")
21
37
  model_endpoint: str = Field(..., description="The endpoint for the model.")
22
38
  model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
23
39
  context_window: int = Field(..., description="The context window size for the model.")