letta-nightly 0.8.17.dev20250722104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +5 -3
  2. letta/agent.py +3 -2
  3. letta/agents/base_agent.py +4 -1
  4. letta/agents/voice_agent.py +1 -0
  5. letta/constants.py +4 -2
  6. letta/functions/schema_generator.py +2 -1
  7. letta/groups/dynamic_multi_agent.py +1 -0
  8. letta/helpers/converters.py +13 -5
  9. letta/helpers/json_helpers.py +6 -1
  10. letta/llm_api/anthropic.py +2 -2
  11. letta/llm_api/aws_bedrock.py +24 -94
  12. letta/llm_api/deepseek.py +1 -1
  13. letta/llm_api/google_ai_client.py +0 -38
  14. letta/llm_api/google_constants.py +6 -3
  15. letta/llm_api/helpers.py +1 -1
  16. letta/llm_api/llm_api_tools.py +4 -7
  17. letta/llm_api/mistral.py +12 -37
  18. letta/llm_api/openai.py +17 -17
  19. letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
  20. letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
  21. letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
  22. letta/local_llm/constants.py +2 -23
  23. letta/local_llm/json_parser.py +11 -1
  24. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
  25. letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
  26. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
  27. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
  28. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
  29. letta/local_llm/ollama/api.py +2 -2
  30. letta/orm/__init__.py +1 -0
  31. letta/orm/agent.py +33 -2
  32. letta/orm/files_agents.py +13 -10
  33. letta/orm/mixins.py +8 -0
  34. letta/orm/prompt.py +13 -0
  35. letta/orm/sqlite_functions.py +61 -17
  36. letta/otel/db_pool_monitoring.py +13 -12
  37. letta/schemas/agent.py +69 -4
  38. letta/schemas/agent_file.py +2 -0
  39. letta/schemas/block.py +11 -0
  40. letta/schemas/embedding_config.py +15 -3
  41. letta/schemas/enums.py +2 -0
  42. letta/schemas/file.py +1 -1
  43. letta/schemas/folder.py +74 -0
  44. letta/schemas/memory.py +12 -6
  45. letta/schemas/prompt.py +9 -0
  46. letta/schemas/providers/__init__.py +47 -0
  47. letta/schemas/providers/anthropic.py +78 -0
  48. letta/schemas/providers/azure.py +80 -0
  49. letta/schemas/providers/base.py +201 -0
  50. letta/schemas/providers/bedrock.py +78 -0
  51. letta/schemas/providers/cerebras.py +79 -0
  52. letta/schemas/providers/cohere.py +18 -0
  53. letta/schemas/providers/deepseek.py +63 -0
  54. letta/schemas/providers/google_gemini.py +102 -0
  55. letta/schemas/providers/google_vertex.py +54 -0
  56. letta/schemas/providers/groq.py +35 -0
  57. letta/schemas/providers/letta.py +39 -0
  58. letta/schemas/providers/lmstudio.py +97 -0
  59. letta/schemas/providers/mistral.py +41 -0
  60. letta/schemas/providers/ollama.py +151 -0
  61. letta/schemas/providers/openai.py +241 -0
  62. letta/schemas/providers/together.py +85 -0
  63. letta/schemas/providers/vllm.py +57 -0
  64. letta/schemas/providers/xai.py +66 -0
  65. letta/server/db.py +0 -5
  66. letta/server/rest_api/app.py +4 -3
  67. letta/server/rest_api/routers/v1/__init__.py +2 -0
  68. letta/server/rest_api/routers/v1/agents.py +152 -4
  69. letta/server/rest_api/routers/v1/folders.py +490 -0
  70. letta/server/rest_api/routers/v1/providers.py +2 -2
  71. letta/server/rest_api/routers/v1/sources.py +21 -26
  72. letta/server/rest_api/routers/v1/tools.py +90 -15
  73. letta/server/server.py +50 -95
  74. letta/services/agent_manager.py +420 -81
  75. letta/services/agent_serialization_manager.py +707 -0
  76. letta/services/block_manager.py +132 -11
  77. letta/services/file_manager.py +104 -29
  78. letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
  79. letta/services/file_processor/file_processor.py +75 -24
  80. letta/services/file_processor/parser/markitdown_parser.py +95 -0
  81. letta/services/files_agents_manager.py +57 -17
  82. letta/services/group_manager.py +7 -0
  83. letta/services/helpers/agent_manager_helper.py +25 -15
  84. letta/services/provider_manager.py +2 -2
  85. letta/services/source_manager.py +35 -16
  86. letta/services/tool_executor/files_tool_executor.py +12 -5
  87. letta/services/tool_manager.py +12 -0
  88. letta/services/tool_sandbox/e2b_sandbox.py +52 -48
  89. letta/settings.py +9 -6
  90. letta/streaming_utils.py +2 -1
  91. letta/utils.py +34 -1
  92. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
  93. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
  94. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,102 @@
1
+ import asyncio
2
+ from typing import Literal
3
+
4
+ from pydantic import Field
5
+
6
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
7
+ from letta.schemas.embedding_config import EmbeddingConfig
8
+ from letta.schemas.enums import ProviderCategory, ProviderType
9
+ from letta.schemas.llm_config import LLMConfig
10
+ from letta.schemas.providers.base import Provider
11
+
12
+
13
+ class GoogleAIProvider(Provider):
14
+ provider_type: Literal[ProviderType.google_ai] = Field(ProviderType.google_ai, description="The type of the provider.")
15
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
16
+ api_key: str = Field(..., description="API key for the Google AI API.")
17
+ base_url: str = "https://generativelanguage.googleapis.com"
18
+
19
+ async def check_api_key(self):
20
+ from letta.llm_api.google_ai_client import google_ai_check_valid_api_key
21
+
22
+ google_ai_check_valid_api_key(self.api_key)
23
+
24
+ async def list_llm_models_async(self):
25
+ from letta.llm_api.google_ai_client import google_ai_get_model_list_async
26
+
27
+ # Get and filter the model list
28
+ model_options = await google_ai_get_model_list_async(base_url=self.base_url, api_key=self.api_key)
29
+ model_options = [mo for mo in model_options if "generateContent" in mo["supportedGenerationMethods"]]
30
+ model_options = [str(m["name"]) for m in model_options]
31
+
32
+ # filter by model names
33
+ model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
34
+
35
+ # Add support for all gemini models
36
+ model_options = [mo for mo in model_options if str(mo).startswith("gemini-")]
37
+
38
+ # Prepare tasks for context window lookups in parallel
39
+ async def create_config(model):
40
+ context_window = await self.get_model_context_window_async(model)
41
+ return LLMConfig(
42
+ model=model,
43
+ model_endpoint_type="google_ai",
44
+ model_endpoint=self.base_url,
45
+ context_window=context_window,
46
+ handle=self.get_handle(model),
47
+ max_tokens=8192,
48
+ provider_name=self.name,
49
+ provider_category=self.provider_category,
50
+ )
51
+
52
+ # Execute all config creation tasks concurrently
53
+ configs = await asyncio.gather(*[create_config(model) for model in model_options])
54
+
55
+ return configs
56
+
57
+ async def list_embedding_models_async(self):
58
+ from letta.llm_api.google_ai_client import google_ai_get_model_list_async
59
+
60
+ # TODO: use base_url instead
61
+ model_options = await google_ai_get_model_list_async(base_url=self.base_url, api_key=self.api_key)
62
+ return self._list_embedding_models(model_options)
63
+
64
+ def _list_embedding_models(self, model_options):
65
+ # filter by 'generateContent' models
66
+ model_options = [mo for mo in model_options if "embedContent" in mo["supportedGenerationMethods"]]
67
+ model_options = [str(m["name"]) for m in model_options]
68
+ model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
69
+
70
+ configs = []
71
+ for model in model_options:
72
+ configs.append(
73
+ EmbeddingConfig(
74
+ embedding_model=model,
75
+ embedding_endpoint_type="google_ai",
76
+ embedding_endpoint=self.base_url,
77
+ embedding_dim=768,
78
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
79
+ handle=self.get_handle(model, is_embedding=True),
80
+ batch_size=1024,
81
+ )
82
+ )
83
+ return configs
84
+
85
+ def get_model_context_window(self, model_name: str) -> int | None:
86
+ import warnings
87
+
88
+ warnings.warn("This is deprecated, use get_model_context_window_async when possible.", DeprecationWarning)
89
+ from letta.llm_api.google_ai_client import google_ai_get_model_context_window
90
+
91
+ if model_name in LLM_MAX_TOKENS:
92
+ return LLM_MAX_TOKENS[model_name]
93
+ else:
94
+ return google_ai_get_model_context_window(self.base_url, self.api_key, model_name)
95
+
96
+ async def get_model_context_window_async(self, model_name: str) -> int | None:
97
+ from letta.llm_api.google_ai_client import google_ai_get_model_context_window_async
98
+
99
+ if model_name in LLM_MAX_TOKENS:
100
+ return LLM_MAX_TOKENS[model_name]
101
+ else:
102
+ return await google_ai_get_model_context_window_async(self.base_url, self.api_key, model_name)
@@ -0,0 +1,54 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
6
+ from letta.schemas.embedding_config import EmbeddingConfig
7
+ from letta.schemas.enums import ProviderCategory, ProviderType
8
+ from letta.schemas.llm_config import LLMConfig
9
+ from letta.schemas.providers.base import Provider
10
+
11
+
12
+ # TODO (cliandy): GoogleVertexProvider uses hardcoded models vs Gemini fetches from API
13
+ class GoogleVertexProvider(Provider):
14
+ provider_type: Literal[ProviderType.google_vertex] = Field(ProviderType.google_vertex, description="The type of the provider.")
15
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
16
+ google_cloud_project: str = Field(..., description="GCP project ID for the Google Vertex API.")
17
+ google_cloud_location: str = Field(..., description="GCP region for the Google Vertex API.")
18
+
19
+ async def list_llm_models_async(self) -> list[LLMConfig]:
20
+ from letta.llm_api.google_constants import GOOGLE_MODEL_TO_CONTEXT_LENGTH
21
+
22
+ configs = []
23
+ for model, context_length in GOOGLE_MODEL_TO_CONTEXT_LENGTH.items():
24
+ configs.append(
25
+ LLMConfig(
26
+ model=model,
27
+ model_endpoint_type="google_vertex",
28
+ model_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
29
+ context_window=context_length,
30
+ handle=self.get_handle(model),
31
+ max_tokens=8192,
32
+ provider_name=self.name,
33
+ provider_category=self.provider_category,
34
+ )
35
+ )
36
+ return configs
37
+
38
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
39
+ from letta.llm_api.google_constants import GOOGLE_EMBEDING_MODEL_TO_DIM
40
+
41
+ configs = []
42
+ for model, dim in GOOGLE_EMBEDING_MODEL_TO_DIM.items():
43
+ configs.append(
44
+ EmbeddingConfig(
45
+ embedding_model=model,
46
+ embedding_endpoint_type="google_vertex",
47
+ embedding_endpoint=f"https://{self.google_cloud_location}-aiplatform.googleapis.com/v1/projects/{self.google_cloud_project}/locations/{self.google_cloud_location}",
48
+ embedding_dim=dim,
49
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
50
+ handle=self.get_handle(model, is_embedding=True),
51
+ batch_size=1024,
52
+ )
53
+ )
54
+ return configs
@@ -0,0 +1,35 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.schemas.enums import ProviderCategory, ProviderType
6
+ from letta.schemas.llm_config import LLMConfig
7
+ from letta.schemas.providers.openai import OpenAIProvider
8
+
9
+
10
+ class GroqProvider(OpenAIProvider):
11
+ provider_type: Literal[ProviderType.groq] = Field(ProviderType.groq, description="The type of the provider.")
12
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
13
+ base_url: str = "https://api.groq.com/openai/v1"
14
+ api_key: str = Field(..., description="API key for the Groq API.")
15
+
16
+ async def list_llm_models_async(self) -> list[LLMConfig]:
17
+ from letta.llm_api.openai import openai_get_model_list_async
18
+
19
+ response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
20
+ configs = []
21
+ for model in response["data"]:
22
+ if "context_window" not in model:
23
+ continue
24
+ configs.append(
25
+ LLMConfig(
26
+ model=model["id"],
27
+ model_endpoint_type="groq",
28
+ model_endpoint=self.base_url,
29
+ context_window=model["context_window"],
30
+ handle=self.get_handle(model["id"]),
31
+ provider_name=self.name,
32
+ provider_category=self.provider_category,
33
+ )
34
+ )
35
+ return configs
@@ -0,0 +1,39 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LETTA_MODEL_ENDPOINT
6
+ from letta.schemas.embedding_config import EmbeddingConfig
7
+ from letta.schemas.enums import ProviderCategory, ProviderType
8
+ from letta.schemas.llm_config import LLMConfig
9
+ from letta.schemas.providers.base import Provider
10
+
11
+
12
+ class LettaProvider(Provider):
13
+ provider_type: Literal[ProviderType.letta] = Field(ProviderType.letta, description="The type of the provider.")
14
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
15
+
16
+ async def list_llm_models_async(self) -> list[LLMConfig]:
17
+ return [
18
+ LLMConfig(
19
+ model="letta-free", # NOTE: renamed
20
+ model_endpoint_type="openai",
21
+ model_endpoint=LETTA_MODEL_ENDPOINT,
22
+ context_window=30000,
23
+ handle=self.get_handle("letta-free"),
24
+ provider_name=self.name,
25
+ provider_category=self.provider_category,
26
+ )
27
+ ]
28
+
29
+ async def list_embedding_models_async(self):
30
+ return [
31
+ EmbeddingConfig(
32
+ embedding_model="letta-free", # NOTE: renamed
33
+ embedding_endpoint_type="hugging-face",
34
+ embedding_endpoint="https://embeddings.memgpt.ai",
35
+ embedding_dim=1024,
36
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
37
+ handle=self.get_handle("letta-free", is_embedding=True),
38
+ )
39
+ ]
@@ -0,0 +1,97 @@
1
+ import warnings
2
+ from typing import Literal
3
+
4
+ from pydantic import Field
5
+
6
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
7
+ from letta.schemas.embedding_config import EmbeddingConfig
8
+ from letta.schemas.enums import ProviderCategory, ProviderType
9
+ from letta.schemas.llm_config import LLMConfig
10
+ from letta.schemas.providers.openai import OpenAIProvider
11
+
12
+
13
+ class LMStudioOpenAIProvider(OpenAIProvider):
14
+ provider_type: Literal[ProviderType.lmstudio_openai] = Field(ProviderType.lmstudio_openai, description="The type of the provider.")
15
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
16
+ base_url: str = Field(..., description="Base URL for the LMStudio OpenAI API.")
17
+ api_key: str | None = Field(None, description="API key for the LMStudio API.")
18
+
19
+ @property
20
+ def model_endpoint_url(self):
21
+ # For LMStudio, we want to hit 'GET /api/v0/models' instead of 'GET /v1/models'
22
+ return f"{self.base_url.strip('/v1')}/api/v0"
23
+
24
+ async def list_llm_models_async(self) -> list[LLMConfig]:
25
+ from letta.llm_api.openai import openai_get_model_list_async
26
+
27
+ response = await openai_get_model_list_async(self.model_endpoint_url)
28
+
29
+ if "data" not in response:
30
+ warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
31
+ return []
32
+
33
+ configs = []
34
+ for model in response["data"]:
35
+ model_type = model.get("type")
36
+ if not model_type:
37
+ warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
38
+ continue
39
+ if model_type not in ("vlm", "llm"):
40
+ continue
41
+
42
+ # TODO (cliandy): previously we didn't get the backup context size, is this valid?
43
+ check = self._do_model_checks_for_name_and_context_size(model)
44
+ if check is None:
45
+ continue
46
+ model_name, context_window_size = check
47
+
48
+ configs.append(
49
+ LLMConfig(
50
+ model=model_name,
51
+ model_endpoint_type="openai",
52
+ model_endpoint=self.base_url,
53
+ context_window=context_window_size,
54
+ handle=self.get_handle(model_name),
55
+ provider_name=self.name,
56
+ provider_category=self.provider_category,
57
+ )
58
+ )
59
+
60
+ return configs
61
+
62
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
63
+ from letta.llm_api.openai import openai_get_model_list_async
64
+
65
+ response = await openai_get_model_list_async(self.model_endpoint_url)
66
+
67
+ if "data" not in response:
68
+ warnings.warn(f"LMStudio OpenAI model query response missing 'data' field: {response}")
69
+ return []
70
+
71
+ configs = []
72
+ for model in response["data"]:
73
+ model_type = model.get("type")
74
+ if not model_type:
75
+ warnings.warn(f"LMStudio OpenAI model missing 'type' field: {model}")
76
+ continue
77
+ if model_type not in ("embeddings"):
78
+ continue
79
+
80
+ # TODO (cliandy): previously we didn't get the backup context size, is this valid?
81
+ check = self._do_model_checks_for_name_and_context_size(model, length_key="max_context_length")
82
+ if check is None:
83
+ continue
84
+ model_name, context_window_size = check
85
+
86
+ configs.append(
87
+ EmbeddingConfig(
88
+ embedding_model=model_name,
89
+ embedding_endpoint_type="openai",
90
+ embedding_endpoint=self.base_url,
91
+ embedding_dim=768, # Default embedding dimension, not context window
92
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # NOTE: max is 2048
93
+ handle=self.get_handle(model_name),
94
+ ),
95
+ )
96
+
97
+ return configs
@@ -0,0 +1,41 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.schemas.enums import ProviderCategory, ProviderType
6
+ from letta.schemas.llm_config import LLMConfig
7
+ from letta.schemas.providers.base import Provider
8
+
9
+
10
+ class MistralProvider(Provider):
11
+ provider_type: Literal[ProviderType.mistral] = Field(ProviderType.mistral, description="The type of the provider.")
12
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
13
+ api_key: str = Field(..., description="API key for the Mistral API.")
14
+ base_url: str = "https://api.mistral.ai/v1"
15
+
16
+ async def list_llm_models_async(self) -> list[LLMConfig]:
17
+ from letta.llm_api.mistral import mistral_get_model_list_async
18
+
19
+ # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
20
+ # See: https://openrouter.ai/docs/requests
21
+ response = await mistral_get_model_list_async(self.base_url, api_key=self.api_key)
22
+
23
+ assert "data" in response, f"Mistral model query response missing 'data' field: {response}"
24
+
25
+ configs = []
26
+ for model in response["data"]:
27
+ # If model has chat completions and function calling enabled
28
+ if model["capabilities"]["completion_chat"] and model["capabilities"]["function_calling"]:
29
+ configs.append(
30
+ LLMConfig(
31
+ model=model["id"],
32
+ model_endpoint_type="openai",
33
+ model_endpoint=self.base_url,
34
+ context_window=model["max_context_length"],
35
+ handle=self.get_handle(model["id"]),
36
+ provider_name=self.name,
37
+ provider_category=self.provider_category,
38
+ )
39
+ )
40
+
41
+ return configs
@@ -0,0 +1,151 @@
1
+ from typing import Literal
2
+
3
+ import aiohttp
4
+ import requests
5
+ from pydantic import Field
6
+
7
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE
8
+ from letta.log import get_logger
9
+ from letta.schemas.embedding_config import EmbeddingConfig
10
+ from letta.schemas.enums import ProviderCategory, ProviderType
11
+ from letta.schemas.llm_config import LLMConfig
12
+ from letta.schemas.providers.openai import OpenAIProvider
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class OllamaProvider(OpenAIProvider):
18
+ """Ollama provider that uses the native /api/generate endpoint
19
+
20
+ See: https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-completion
21
+ """
22
+
23
+ provider_type: Literal[ProviderType.ollama] = Field(ProviderType.ollama, description="The type of the provider.")
24
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
25
+ base_url: str = Field(..., description="Base URL for the Ollama API.")
26
+ api_key: str | None = Field(None, description="API key for the Ollama API (default: `None`).")
27
+ default_prompt_formatter: str = Field(
28
+ ..., description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
29
+ )
30
+
31
+ async def list_llm_models_async(self) -> list[LLMConfig]:
32
+ """List available LLM Models from Ollama
33
+
34
+ https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models"""
35
+ endpoint = f"{self.base_url}/api/tags"
36
+ async with aiohttp.ClientSession() as session:
37
+ async with session.get(endpoint) as response:
38
+ if response.status != 200:
39
+ raise Exception(f"Failed to list Ollama models: {response.text}")
40
+ response_json = await response.json()
41
+
42
+ configs = []
43
+ for model in response_json["models"]:
44
+ context_window = self.get_model_context_window(model["name"])
45
+ if context_window is None:
46
+ print(f"Ollama model {model['name']} has no context window")
47
+ continue
48
+ configs.append(
49
+ LLMConfig(
50
+ model=model["name"],
51
+ model_endpoint_type="ollama",
52
+ model_endpoint=self.base_url,
53
+ model_wrapper=self.default_prompt_formatter,
54
+ context_window=context_window,
55
+ handle=self.get_handle(model["name"]),
56
+ provider_name=self.name,
57
+ provider_category=self.provider_category,
58
+ )
59
+ )
60
+ return configs
61
+
62
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
63
+ """List available embedding models from Ollama
64
+
65
+ https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
66
+ """
67
+ endpoint = f"{self.base_url}/api/tags"
68
+ async with aiohttp.ClientSession() as session:
69
+ async with session.get(endpoint) as response:
70
+ if response.status != 200:
71
+ raise Exception(f"Failed to list Ollama models: {response.text}")
72
+ response_json = await response.json()
73
+
74
+ configs = []
75
+ for model in response_json["models"]:
76
+ embedding_dim = await self._get_model_embedding_dim_async(model["name"])
77
+ if not embedding_dim:
78
+ print(f"Ollama model {model['name']} has no embedding dimension")
79
+ continue
80
+ configs.append(
81
+ EmbeddingConfig(
82
+ embedding_model=model["name"],
83
+ embedding_endpoint_type="ollama",
84
+ embedding_endpoint=self.base_url,
85
+ embedding_dim=embedding_dim,
86
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
87
+ handle=self.get_handle(model["name"], is_embedding=True),
88
+ )
89
+ )
90
+ return configs
91
+
92
+ def get_model_context_window(self, model_name: str) -> int | None:
93
+ """Gets model context window for Ollama. As this can look different based on models,
94
+ we use the following for guidance:
95
+
96
+ "llama.context_length": 8192,
97
+ "llama.embedding_length": 4096,
98
+ source: https://github.com/ollama/ollama/blob/main/docs/api.md#show-model-information
99
+
100
+ FROM 2024-10-08
101
+ Notes from vLLM around keys
102
+ source: https://github.com/vllm-project/vllm/blob/72ad2735823e23b4e1cc79b7c73c3a5f3c093ab0/vllm/config.py#L3488
103
+
104
+ possible_keys = [
105
+ # OPT
106
+ "max_position_embeddings",
107
+ # GPT-2
108
+ "n_positions",
109
+ # MPT
110
+ "max_seq_len",
111
+ # ChatGLM2
112
+ "seq_length",
113
+ # Command-R
114
+ "model_max_length",
115
+ # Whisper
116
+ "max_target_positions",
117
+ # Others
118
+ "max_sequence_length",
119
+ "max_seq_length",
120
+ "seq_len",
121
+ ]
122
+ max_position_embeddings
123
+ parse model cards: nous, dolphon, llama
124
+ """
125
+ endpoint = f"{self.base_url}/api/show"
126
+ payload = {"name": model_name, "verbose": True}
127
+ response = requests.post(endpoint, json=payload)
128
+ if response.status_code != 200:
129
+ return None
130
+
131
+ try:
132
+ model_info = response.json()
133
+ # Try to extract context window from model parameters
134
+ if "model_info" in model_info and "llama.context_length" in model_info["model_info"]:
135
+ return int(model_info["model_info"]["llama.context_length"])
136
+ except Exception:
137
+ pass
138
+ logger.warning(f"Failed to get model context window for {model_name}")
139
+ return None
140
+
141
+ async def _get_model_embedding_dim_async(self, model_name: str):
142
+ async with aiohttp.ClientSession() as session:
143
+ async with session.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True}) as response:
144
+ response_json = await response.json()
145
+
146
+ if "model_info" not in response_json:
147
+ if "error" in response_json:
148
+ logger.warning("Ollama fetch model info error for %s: %s", model_name, response_json["error"])
149
+ return None
150
+
151
+ return response_json["model_info"].get("embedding_length")