letta-nightly 0.8.17.dev20250722104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +5 -3
- letta/agent.py +3 -2
- letta/agents/base_agent.py +4 -1
- letta/agents/voice_agent.py +1 -0
- letta/constants.py +4 -2
- letta/functions/schema_generator.py +2 -1
- letta/groups/dynamic_multi_agent.py +1 -0
- letta/helpers/converters.py +13 -5
- letta/helpers/json_helpers.py +6 -1
- letta/llm_api/anthropic.py +2 -2
- letta/llm_api/aws_bedrock.py +24 -94
- letta/llm_api/deepseek.py +1 -1
- letta/llm_api/google_ai_client.py +0 -38
- letta/llm_api/google_constants.py +6 -3
- letta/llm_api/helpers.py +1 -1
- letta/llm_api/llm_api_tools.py +4 -7
- letta/llm_api/mistral.py +12 -37
- letta/llm_api/openai.py +17 -17
- letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
- letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
- letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
- letta/local_llm/constants.py +2 -23
- letta/local_llm/json_parser.py +11 -1
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
- letta/local_llm/ollama/api.py +2 -2
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +33 -2
- letta/orm/files_agents.py +13 -10
- letta/orm/mixins.py +8 -0
- letta/orm/prompt.py +13 -0
- letta/orm/sqlite_functions.py +61 -17
- letta/otel/db_pool_monitoring.py +13 -12
- letta/schemas/agent.py +69 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -0
- letta/schemas/embedding_config.py +15 -3
- letta/schemas/enums.py +2 -0
- letta/schemas/file.py +1 -1
- letta/schemas/folder.py +74 -0
- letta/schemas/memory.py +12 -6
- letta/schemas/prompt.py +9 -0
- letta/schemas/providers/__init__.py +47 -0
- letta/schemas/providers/anthropic.py +78 -0
- letta/schemas/providers/azure.py +80 -0
- letta/schemas/providers/base.py +201 -0
- letta/schemas/providers/bedrock.py +78 -0
- letta/schemas/providers/cerebras.py +79 -0
- letta/schemas/providers/cohere.py +18 -0
- letta/schemas/providers/deepseek.py +63 -0
- letta/schemas/providers/google_gemini.py +102 -0
- letta/schemas/providers/google_vertex.py +54 -0
- letta/schemas/providers/groq.py +35 -0
- letta/schemas/providers/letta.py +39 -0
- letta/schemas/providers/lmstudio.py +97 -0
- letta/schemas/providers/mistral.py +41 -0
- letta/schemas/providers/ollama.py +151 -0
- letta/schemas/providers/openai.py +241 -0
- letta/schemas/providers/together.py +85 -0
- letta/schemas/providers/vllm.py +57 -0
- letta/schemas/providers/xai.py +66 -0
- letta/server/db.py +0 -5
- letta/server/rest_api/app.py +4 -3
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +152 -4
- letta/server/rest_api/routers/v1/folders.py +490 -0
- letta/server/rest_api/routers/v1/providers.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +21 -26
- letta/server/rest_api/routers/v1/tools.py +90 -15
- letta/server/server.py +50 -95
- letta/services/agent_manager.py +420 -81
- letta/services/agent_serialization_manager.py +707 -0
- letta/services/block_manager.py +132 -11
- letta/services/file_manager.py +104 -29
- letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
- letta/services/file_processor/file_processor.py +75 -24
- letta/services/file_processor/parser/markitdown_parser.py +95 -0
- letta/services/files_agents_manager.py +57 -17
- letta/services/group_manager.py +7 -0
- letta/services/helpers/agent_manager_helper.py +25 -15
- letta/services/provider_manager.py +2 -2
- letta/services/source_manager.py +35 -16
- letta/services/tool_executor/files_tool_executor.py +12 -5
- letta/services/tool_manager.py +12 -0
- letta/services/tool_sandbox/e2b_sandbox.py +52 -48
- letta/settings.py +9 -6
- letta/streaming_utils.py +2 -1
- letta/utils.py +34 -1
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,102 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from pydantic import Field
|
5
|
+
|
6
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
|
7
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
8
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
9
|
+
from letta.schemas.llm_config import LLMConfig
|
10
|
+
from letta.schemas.providers.base import Provider
|
11
|
+
|
12
|
+
|
13
|
+
class GoogleAIProvider(Provider):
|
14
|
+
provider_type: Literal[ProviderType.google_ai] = Field(ProviderType.google_ai, description="The type of the provider.")
|
15
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
16
|
+
api_key: str = Field(..., description="API key for the Google AI API.")
|
17
|
+
base_url: str = "https://generativelanguage.googleapis.com"
|
18
|
+
|
19
|
+
async def check_api_key(self):
|
20
|
+
from letta.llm_api.google_ai_client import google_ai_check_valid_api_key
|
21
|
+
|
22
|
+
google_ai_check_valid_api_key(self.api_key)
|
23
|
+
|
24
|
+
async def list_llm_models_async(self):
|
25
|
+
from letta.llm_api.google_ai_client import google_ai_get_model_list_async
|
26
|
+
|
27
|
+
# Get and filter the model list
|
28
|
+
model_options = await google_ai_get_model_list_async(base_url=self.base_url, api_key=self.api_key)
|
29
|
+
model_options = [mo for mo in model_options if "generateContent" in mo["supportedGenerationMethods"]]
|
30
|
+
model_options = [str(m["name"]) for m in model_options]
|
31
|
+
|
32
|
+
# filter by model names
|
33
|
+
model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
|
34
|
+
|
35
|
+
# Add support for all gemini models
|
36
|
+
model_options = [mo for mo in model_options if str(mo).startswith("gemini-")]
|
37
|
+
|
38
|
+
# Prepare tasks for context window lookups in parallel
|
39
|
+
async def create_config(model):
|
40
|
+
context_window = await self.get_model_context_window_async(model)
|
41
|
+
return LLMConfig(
|
42
|
+
model=model,
|
43
|
+
model_endpoint_type="google_ai",
|
44
|
+
model_endpoint=self.base_url,
|
45
|
+
context_window=context_window,
|
46
|
+
handle=self.get_handle(model),
|
47
|
+
max_tokens=8192,
|
48
|
+
provider_name=self.name,
|
49
|
+
provider_category=self.provider_category,
|
50
|
+
)
|
51
|
+
|
52
|
+
# Execute all config creation tasks concurrently
|
53
|
+
configs = await asyncio.gather(*[create_config(model) for model in model_options])
|
54
|
+
|
55
|
+
return configs
|
56
|
+
|
57
|
+
async def list_embedding_models_async(self):
|
58
|
+
from letta.llm_api.google_ai_client import google_ai_get_model_list_async
|
59
|
+
|
60
|
+
# TODO: use base_url instead
|
61
|
+
model_options = await google_ai_get_model_list_async(base_url=self.base_url, api_key=self.api_key)
|
62
|
+
return self._list_embedding_models(model_options)
|
63
|
+
|
64
|
+
def _list_embedding_models(self, model_options):
|
65
|
+
# filter by 'generateContent' models
|
66
|
+
model_options = [mo for mo in model_options if "embedContent" in mo["supportedGenerationMethods"]]
|
67
|
+
model_options = [str(m["name"]) for m in model_options]
|
68
|
+
model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
|
69
|
+
|
70
|
+
configs = []
|
71
|
+
for model in model_options:
|
72
|
+
configs.append(
|
73
|
+
EmbeddingConfig(
|
74
|
+
embedding_model=model,
|
75
|
+
embedding_endpoint_type="google_ai",
|
76
|
+
embedding_endpoint=self.base_url,
|
77
|
+
embedding_dim=768,
|
78
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
|
79
|
+
handle=self.get_handle(model, is_embedding=True),
|
80
|
+
batch_size=1024,
|
81
|
+
)
|
82
|
+
)
|
83
|
+
return configs
|
84
|
+
|
85
|
+
def get_model_context_window(self, model_name: str) -> int | None:
|
86
|
+
import warnings
|
87
|
+
|
88
|
+
warnings.warn("This is deprecated, use get_model_context_window_async when possible.", DeprecationWarning)
|
89
|
+
from letta.llm_api.google_ai_client import google_ai_get_model_context_window
|
90
|
+
|
91
|
+
if model_name in LLM_MAX_TOKENS:
|
92
|
+
return LLM_MAX_TOKENS[model_name]
|
93
|
+
else:
|
94
|
+
return google_ai_get_model_context_window(self.base_url, self.api_key, model_name)
|
95
|
+
|
96
|
+
async def get_model_context_window_async(self, model_name: str) -> int | None:
|
97
|
+
from letta.llm_api.google_ai_client import google_ai_get_model_context_window_async
|
98
|
+
|
99
|
+
if model_name in LLM_MAX_TOKENS:
|
100
|
+
return LLM_MAX_TOKENS[model_name]
|
101
|
+
else:
|
102
|
+
return await google_ai_get_model_context_window_async(self.base_url, self.api_key, model_name)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
|
5
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
|
6
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
7
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
8
|
+
from letta.schemas.llm_config import LLMConfig
|
9
|
+
from letta.schemas.providers.base import Provider
|
10
|
+
|
11
|
+
|
12
|
+
# TODO (cliandy): GoogleVertexProvider uses hardcoded models vs Gemini fetches from API
|
13
|
+
class GoogleVertexProvider(Provider):
|
14
|
+
provider_type: Literal[ProviderType.google_vertex] = Field(ProviderType.google_vertex, description="The type of the provider.")
|
15
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
16
|
+
google_cloud_project: str = Field(..., description="GCP project ID for the Google Vertex API.")
|
17
|
+
google_cloud_location: str = Field(..., description="GCP region for the Google Vertex API.")
|
18
|
+
|
19
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
20
|
+
from letta.llm_api.google_constants import GOOGLE_MODEL_TO_CONTEXT_LENGTH
|
21
|
+
|
22
|
+
configs = []
|
23
|
+
for model, context_length in GOOGLE_MODEL_TO_CONTEXT_LENGTH.items():
|
24
|
+
configs.append(
|
25
|
+
LLMConfig(
|
26
|
+
model=model,
|
27
|
+
model_endpoint_type="google_vertex",
|
28
|
+
model_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
|
29
|
+
context_window=context_length,
|
30
|
+
handle=self.get_handle(model),
|
31
|
+
max_tokens=8192,
|
32
|
+
provider_name=self.name,
|
33
|
+
provider_category=self.provider_category,
|
34
|
+
)
|
35
|
+
)
|
36
|
+
return configs
|
37
|
+
|
38
|
+
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
39
|
+
from letta.llm_api.google_constants import GOOGLE_EMBEDING_MODEL_TO_DIM
|
40
|
+
|
41
|
+
configs = []
|
42
|
+
for model, dim in GOOGLE_EMBEDING_MODEL_TO_DIM.items():
|
43
|
+
configs.append(
|
44
|
+
EmbeddingConfig(
|
45
|
+
embedding_model=model,
|
46
|
+
embedding_endpoint_type="google_vertex",
|
47
|
+
embedding_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
|
48
|
+
embedding_dim=dim,
|
49
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
|
50
|
+
handle=self.get_handle(model, is_embedding=True),
|
51
|
+
batch_size=1024,
|
52
|
+
)
|
53
|
+
)
|
54
|
+
return configs
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
|
5
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
6
|
+
from letta.schemas.llm_config import LLMConfig
|
7
|
+
from letta.schemas.providers.openai import OpenAIProvider
|
8
|
+
|
9
|
+
|
10
|
+
class GroqProvider(OpenAIProvider):
|
11
|
+
provider_type: Literal[ProviderType.groq] = Field(ProviderType.groq, description="The type of the provider.")
|
12
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
13
|
+
base_url: str = "https://api.groq.com/openai/v1"
|
14
|
+
api_key: str = Field(..., description="API key for the Groq API.")
|
15
|
+
|
16
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
17
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
18
|
+
|
19
|
+
response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
|
20
|
+
configs = []
|
21
|
+
for model in response["data"]:
|
22
|
+
if "context_window" not in model:
|
23
|
+
continue
|
24
|
+
configs.append(
|
25
|
+
LLMConfig(
|
26
|
+
model=model["id"],
|
27
|
+
model_endpoint_type="groq",
|
28
|
+
model_endpoint=self.base_url,
|
29
|
+
context_window=model["context_window"],
|
30
|
+
handle=self.get_handle(model["id"]),
|
31
|
+
provider_name=self.name,
|
32
|
+
provider_category=self.provider_category,
|
33
|
+
)
|
34
|
+
)
|
35
|
+
return configs
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
|
5
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LETTA_MODEL_ENDPOINT
|
6
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
7
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
8
|
+
from letta.schemas.llm_config import LLMConfig
|
9
|
+
from letta.schemas.providers.base import Provider
|
10
|
+
|
11
|
+
|
12
|
+
class LettaProvider(Provider):
|
13
|
+
provider_type: Literal[ProviderType.letta] = Field(ProviderType.letta, description="The type of the provider.")
|
14
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
15
|
+
|
16
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
17
|
+
return [
|
18
|
+
LLMConfig(
|
19
|
+
model="letta-free", # NOTE: renamed
|
20
|
+
model_endpoint_type="openai",
|
21
|
+
model_endpoint=LETTA_MODEL_ENDPOINT,
|
22
|
+
context_window=30000,
|
23
|
+
handle=self.get_handle("letta-free"),
|
24
|
+
provider_name=self.name,
|
25
|
+
provider_category=self.provider_category,
|
26
|
+
)
|
27
|
+
]
|
28
|
+
|
29
|
+
async def list_embedding_models_async(self):
|
30
|
+
return [
|
31
|
+
EmbeddingConfig(
|
32
|
+
embedding_model="letta-free", # NOTE: renamed
|
33
|
+
embedding_endpoint_type="hugging-face",
|
34
|
+
embedding_endpoint="https://embeddings.memgpt.ai",
|
35
|
+
embedding_dim=1024,
|
36
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
37
|
+
handle=self.get_handle("letta-free", is_embedding=True),
|
38
|
+
)
|
39
|
+
]
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import warnings
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from pydantic import Field
|
5
|
+
|
6
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
|
7
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
8
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
9
|
+
from letta.schemas.llm_config import LLMConfig
|
10
|
+
from letta.schemas.providers.openai import OpenAIProvider
|
11
|
+
|
12
|
+
|
13
|
+
class LMStudioOpenAIProvider(OpenAIProvider):
|
14
|
+
provider_type: Literal[ProviderType.lmstudio_openai] = Field(ProviderType.lmstudio_openai, description="The type of the provider.")
|
15
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
16
|
+
base_url: str = Field(..., description="Base URL for the LMStudio OpenAI API.")
|
17
|
+
api_key: str | None = Field(None, description="API key for the LMStudio API.")
|
18
|
+
|
19
|
+
@property
|
20
|
+
def model_endpoint_url(self):
|
21
|
+
# For LMStudio, we want to hit 'GET /api/v0/models' instead of 'GET /v1/models'
|
22
|
+
return f"{self.base_url.strip('/v1')}/api/v0"
|
23
|
+
|
24
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
25
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
26
|
+
|
27
|
+
response = await openai_get_model_list_async(self.model_endpoint_url)
|
28
|
+
|
29
|
+
if "data" not in response:
|
30
|
+
warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
|
31
|
+
return []
|
32
|
+
|
33
|
+
configs = []
|
34
|
+
for model in response["data"]:
|
35
|
+
model_type = model.get("type")
|
36
|
+
if not model_type:
|
37
|
+
warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
|
38
|
+
continue
|
39
|
+
if model_type not in ("vlm", "llm"):
|
40
|
+
continue
|
41
|
+
|
42
|
+
# TODO (cliandy): previously we didn't get the backup context size, is this valid?
|
43
|
+
check = self._do_model_checks_for_name_and_context_size(model)
|
44
|
+
if check is None:
|
45
|
+
continue
|
46
|
+
model_name, context_window_size = check
|
47
|
+
|
48
|
+
configs.append(
|
49
|
+
LLMConfig(
|
50
|
+
model=model_name,
|
51
|
+
model_endpoint_type="openai",
|
52
|
+
model_endpoint=self.base_url,
|
53
|
+
context_window=context_window_size,
|
54
|
+
handle=self.get_handle(model_name),
|
55
|
+
provider_name=self.name,
|
56
|
+
provider_category=self.provider_category,
|
57
|
+
)
|
58
|
+
)
|
59
|
+
|
60
|
+
return configs
|
61
|
+
|
62
|
+
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
63
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
64
|
+
|
65
|
+
response = await openai_get_model_list_async(self.model_endpoint_url)
|
66
|
+
|
67
|
+
if "data" not in response:
|
68
|
+
warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
|
69
|
+
return []
|
70
|
+
|
71
|
+
configs = []
|
72
|
+
for model in response["data"]:
|
73
|
+
model_type = model.get("type")
|
74
|
+
if not model_type:
|
75
|
+
warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
|
76
|
+
continue
|
77
|
+
if model_type not in ("embeddings"):
|
78
|
+
continue
|
79
|
+
|
80
|
+
# TODO (cliandy): previously we didn't get the backup context size, is this valid?
|
81
|
+
check = self._do_model_checks_for_name_and_context_size(model, length_key="max_context_length")
|
82
|
+
if check is None:
|
83
|
+
continue
|
84
|
+
model_name, context_window_size = check
|
85
|
+
|
86
|
+
configs.append(
|
87
|
+
EmbeddingConfig(
|
88
|
+
embedding_model=model_name,
|
89
|
+
embedding_endpoint_type="openai",
|
90
|
+
embedding_endpoint=self.base_url,
|
91
|
+
embedding_dim=768, # Default embedding dimension, not context window
|
92
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
|
93
|
+
handle=self.get_handle(model_name),
|
94
|
+
),
|
95
|
+
)
|
96
|
+
|
97
|
+
return configs
|
@@ -0,0 +1,41 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
|
5
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
6
|
+
from letta.schemas.llm_config import LLMConfig
|
7
|
+
from letta.schemas.providers.base import Provider
|
8
|
+
|
9
|
+
|
10
|
+
class MistralProvider(Provider):
|
11
|
+
provider_type: Literal[ProviderType.mistral] = Field(ProviderType.mistral, description="The type of the provider.")
|
12
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
13
|
+
api_key: str = Field(..., description="API key for the Mistral API.")
|
14
|
+
base_url: str = "https://api.mistral.ai/v1"
|
15
|
+
|
16
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
17
|
+
from letta.llm_api.mistral import mistral_get_model_list_async
|
18
|
+
|
19
|
+
# Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
|
20
|
+
# See: https://openrouter.ai/docs/requests
|
21
|
+
response = await mistral_get_model_list_async(self.base_url, api_key=self.api_key)
|
22
|
+
|
23
|
+
assert "data" in response, f"Mistral model query response missing 'data' field: {response}"
|
24
|
+
|
25
|
+
configs = []
|
26
|
+
for model in response["data"]:
|
27
|
+
# If model has chat completions and function calling enabled
|
28
|
+
if model["capabilities"]["completion_chat"] and model["capabilities"]["function_calling"]:
|
29
|
+
configs.append(
|
30
|
+
LLMConfig(
|
31
|
+
model=model["id"],
|
32
|
+
model_endpoint_type="openai",
|
33
|
+
model_endpoint=self.base_url,
|
34
|
+
context_window=model["max_context_length"],
|
35
|
+
handle=self.get_handle(model["id"]),
|
36
|
+
provider_name=self.name,
|
37
|
+
provider_category=self.provider_category,
|
38
|
+
)
|
39
|
+
)
|
40
|
+
|
41
|
+
return configs
|
@@ -0,0 +1,151 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
import aiohttp
|
4
|
+
import requests
|
5
|
+
from pydantic import Field
|
6
|
+
|
7
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
|
8
|
+
from letta.log import get_logger
|
9
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
10
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
11
|
+
from letta.schemas.llm_config import LLMConfig
|
12
|
+
from letta.schemas.providers.openai import OpenAIProvider
|
13
|
+
|
14
|
+
logger = get_logger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
class OllamaProvider(OpenAIProvider):
|
18
|
+
"""Ollama provider that uses the native /api/generate endpoint
|
19
|
+
|
20
|
+
See: https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-completion
|
21
|
+
"""
|
22
|
+
|
23
|
+
provider_type: Literal[ProviderType.ollama] = Field(ProviderType.ollama, description="The type of the provider.")
|
24
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
25
|
+
base_url: str = Field(..., description="Base URL for the Ollama API.")
|
26
|
+
api_key: str | None = Field(None, description="API key for the Ollama API (default: `None`).")
|
27
|
+
default_prompt_formatter: str = Field(
|
28
|
+
..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
|
29
|
+
)
|
30
|
+
|
31
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
32
|
+
"""List available LLM Models from Ollama
|
33
|
+
|
34
|
+
https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models"""
|
35
|
+
endpoint = f"{self.base_url}/api/tags"
|
36
|
+
async with aiohttp.ClientSession() as session:
|
37
|
+
async with session.get(endpoint) as response:
|
38
|
+
if response.status != 200:
|
39
|
+
raise Exception(f"Failed to list Ollama models: {response.text}")
|
40
|
+
response_json = await response.json()
|
41
|
+
|
42
|
+
configs = []
|
43
|
+
for model in response_json["models"]:
|
44
|
+
context_window = self.get_model_context_window(model["name"])
|
45
|
+
if context_window is None:
|
46
|
+
print(f"Ollama model {model['name']} has no context window")
|
47
|
+
continue
|
48
|
+
configs.append(
|
49
|
+
LLMConfig(
|
50
|
+
model=model["name"],
|
51
|
+
model_endpoint_type="ollama",
|
52
|
+
model_endpoint=self.base_url,
|
53
|
+
model_wrapper=self.default_prompt_formatter,
|
54
|
+
context_window=context_window,
|
55
|
+
handle=self.get_handle(model["name"]),
|
56
|
+
provider_name=self.name,
|
57
|
+
provider_category=self.provider_category,
|
58
|
+
)
|
59
|
+
)
|
60
|
+
return configs
|
61
|
+
|
62
|
+
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
63
|
+
"""List available embedding models from Ollama
|
64
|
+
|
65
|
+
https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
|
66
|
+
"""
|
67
|
+
endpoint = f"{self.base_url}/api/tags"
|
68
|
+
async with aiohttp.ClientSession() as session:
|
69
|
+
async with session.get(endpoint) as response:
|
70
|
+
if response.status != 200:
|
71
|
+
raise Exception(f"Failed to list Ollama models: {response.text}")
|
72
|
+
response_json = await response.json()
|
73
|
+
|
74
|
+
configs = []
|
75
|
+
for model in response_json["models"]:
|
76
|
+
embedding_dim = await self._get_model_embedding_dim_async(model["name"])
|
77
|
+
if not embedding_dim:
|
78
|
+
print(f"Ollama model {model['name']} has no embedding dimension")
|
79
|
+
continue
|
80
|
+
configs.append(
|
81
|
+
EmbeddingConfig(
|
82
|
+
embedding_model=model["name"],
|
83
|
+
embedding_endpoint_type="ollama",
|
84
|
+
embedding_endpoint=self.base_url,
|
85
|
+
embedding_dim=embedding_dim,
|
86
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
87
|
+
handle=self.get_handle(model["name"], is_embedding=True),
|
88
|
+
)
|
89
|
+
)
|
90
|
+
return configs
|
91
|
+
|
92
|
+
def get_model_context_window(self, model_name: str) -> int | None:
|
93
|
+
"""Gets model context window for Ollama. As this can look different based on models,
|
94
|
+
we use the following for guidance:
|
95
|
+
|
96
|
+
"llama.context_length": 8192,
|
97
|
+
"llama.embedding_length": 4096,
|
98
|
+
source: https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information
|
99
|
+
|
100
|
+
FROM 2024-10-08
|
101
|
+
Notes from vLLM around keys
|
102
|
+
source: https://github.com/vllm-project/vllm/blob/72ad2735823e23b4e1cc79b7c73c3a5f3c093ab0/vllm/config.py#L3488
|
103
|
+
|
104
|
+
possible_keys = [
|
105
|
+
# OPT
|
106
|
+
"max_position_embeddings",
|
107
|
+
# GPT-2
|
108
|
+
"n_positions",
|
109
|
+
# MPT
|
110
|
+
"max_seq_len",
|
111
|
+
# ChatGLM2
|
112
|
+
"seq_length",
|
113
|
+
# Command-R
|
114
|
+
"model_max_length",
|
115
|
+
# Whisper
|
116
|
+
"max_target_positions",
|
117
|
+
# Others
|
118
|
+
"max_sequence_length",
|
119
|
+
"max_seq_length",
|
120
|
+
"seq_len",
|
121
|
+
]
|
122
|
+
max_position_embeddings
|
123
|
+
parse model cards: nous, dolphon, llama
|
124
|
+
"""
|
125
|
+
endpoint = f"{self.base_url}/api/show"
|
126
|
+
payload = {"name": model_name, "verbose": True}
|
127
|
+
response = requests.post(endpoint, json=payload)
|
128
|
+
if response.status_code != 200:
|
129
|
+
return None
|
130
|
+
|
131
|
+
try:
|
132
|
+
model_info = response.json()
|
133
|
+
# Try to extract context window from model parameters
|
134
|
+
if "model_info" in model_info and "llama.context_length" in model_info["model_info"]:
|
135
|
+
return int(model_info["model_info"]["llama.context_length"])
|
136
|
+
except Exception:
|
137
|
+
pass
|
138
|
+
logger.warning(f"Failed to get model context window for {model_name}")
|
139
|
+
return None
|
140
|
+
|
141
|
+
async def _get_model_embedding_dim_async(self, model_name: str):
|
142
|
+
async with aiohttp.ClientSession() as session:
|
143
|
+
async with session.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True}) as response:
|
144
|
+
response_json = await response.json()
|
145
|
+
|
146
|
+
if "model_info" not in response_json:
|
147
|
+
if "error" in response_json:
|
148
|
+
logger.warning("Ollama fetch model info error for %s: %s", model_name, response_json["error"])
|
149
|
+
return None
|
150
|
+
|
151
|
+
return response_json["model_info"].get("embedding_length")
|