letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agents/helpers.py +4 -0
- letta/agents/letta_agent.py +142 -5
- letta/constants.py +10 -7
- letta/data_sources/connectors.py +70 -53
- letta/embeddings.py +3 -240
- letta/errors.py +28 -0
- letta/functions/function_sets/base.py +4 -4
- letta/functions/functions.py +287 -32
- letta/functions/mcp_client/types.py +11 -0
- letta/functions/schema_validator.py +187 -0
- letta/functions/typescript_parser.py +196 -0
- letta/helpers/datetime_helpers.py +8 -4
- letta/helpers/tool_execution_helper.py +25 -2
- letta/llm_api/anthropic_client.py +23 -18
- letta/llm_api/azure_client.py +73 -0
- letta/llm_api/bedrock_client.py +8 -4
- letta/llm_api/google_vertex_client.py +14 -5
- letta/llm_api/llm_api_tools.py +2 -217
- letta/llm_api/llm_client.py +15 -1
- letta/llm_api/llm_client_base.py +32 -1
- letta/llm_api/openai.py +1 -0
- letta/llm_api/openai_client.py +18 -28
- letta/llm_api/together_client.py +55 -0
- letta/orm/provider.py +1 -0
- letta/orm/step_metrics.py +40 -1
- letta/otel/db_pool_monitoring.py +1 -1
- letta/schemas/agent.py +3 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -5
- letta/schemas/embedding_config.py +4 -5
- letta/schemas/enums.py +1 -1
- letta/schemas/job.py +2 -3
- letta/schemas/llm_config.py +79 -7
- letta/schemas/mcp.py +0 -24
- letta/schemas/message.py +0 -108
- letta/schemas/openai/chat_completion_request.py +1 -0
- letta/schemas/providers/__init__.py +0 -2
- letta/schemas/providers/anthropic.py +106 -8
- letta/schemas/providers/azure.py +102 -8
- letta/schemas/providers/base.py +10 -3
- letta/schemas/providers/bedrock.py +28 -16
- letta/schemas/providers/letta.py +3 -3
- letta/schemas/providers/ollama.py +2 -12
- letta/schemas/providers/openai.py +4 -4
- letta/schemas/providers/together.py +14 -2
- letta/schemas/sandbox_config.py +2 -1
- letta/schemas/tool.py +46 -22
- letta/server/rest_api/routers/v1/agents.py +179 -38
- letta/server/rest_api/routers/v1/folders.py +13 -8
- letta/server/rest_api/routers/v1/providers.py +10 -3
- letta/server/rest_api/routers/v1/sources.py +14 -8
- letta/server/rest_api/routers/v1/steps.py +17 -1
- letta/server/rest_api/routers/v1/tools.py +96 -5
- letta/server/rest_api/streaming_response.py +91 -45
- letta/server/server.py +27 -38
- letta/services/agent_manager.py +92 -20
- letta/services/agent_serialization_manager.py +11 -7
- letta/services/context_window_calculator/context_window_calculator.py +40 -2
- letta/services/helpers/agent_manager_helper.py +73 -12
- letta/services/mcp_manager.py +109 -15
- letta/services/passage_manager.py +28 -109
- letta/services/provider_manager.py +24 -0
- letta/services/step_manager.py +68 -0
- letta/services/summarizer/summarizer.py +1 -4
- letta/services/tool_executor/core_tool_executor.py +1 -1
- letta/services/tool_executor/sandbox_tool_executor.py +26 -9
- letta/services/tool_manager.py +82 -5
- letta/services/tool_sandbox/base.py +3 -11
- letta/services/tool_sandbox/modal_constants.py +17 -0
- letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
- letta/services/tool_sandbox/modal_sandbox.py +218 -3
- letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
- letta/services/tool_sandbox/modal_version_manager.py +273 -0
- letta/services/tool_sandbox/safe_pickle.py +193 -0
- letta/settings.py +5 -3
- letta/templates/sandbox_code_file.py.j2 +2 -4
- letta/templates/sandbox_code_file_async.py.j2 +2 -4
- letta/utils.py +1 -1
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
- letta/llm_api/anthropic.py +0 -1206
- letta/llm_api/aws_bedrock.py +0 -104
- letta/llm_api/azure_openai.py +0 -118
- letta/llm_api/azure_openai_constants.py +0 -11
- letta/llm_api/cohere.py +0 -391
- letta/schemas/providers/cohere.py +0 -18
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0
letta/llm_api/aws_bedrock.py
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Note that this formally only supports Anthropic Bedrock.
|
3
|
-
TODO (cliandy): determine what other providers are supported and what is needed to add support.
|
4
|
-
"""
|
5
|
-
|
6
|
-
import os
|
7
|
-
from typing import Any, Optional
|
8
|
-
|
9
|
-
from anthropic import AnthropicBedrock
|
10
|
-
|
11
|
-
from letta.log import get_logger
|
12
|
-
from letta.settings import model_settings
|
13
|
-
|
14
|
-
logger = get_logger(__name__)
|
15
|
-
|
16
|
-
|
17
|
-
def has_valid_aws_credentials() -> bool:
|
18
|
-
"""
|
19
|
-
Check if AWS credentials are properly configured.
|
20
|
-
"""
|
21
|
-
return all(
|
22
|
-
(
|
23
|
-
os.getenv("AWS_ACCESS_KEY_ID"),
|
24
|
-
os.getenv("AWS_SECRET_ACCESS_KEY"),
|
25
|
-
os.getenv("AWS_DEFAULT_REGION"),
|
26
|
-
)
|
27
|
-
)
|
28
|
-
|
29
|
-
|
30
|
-
def get_bedrock_client(
|
31
|
-
access_key_id: Optional[str] = None,
|
32
|
-
secret_key: Optional[str] = None,
|
33
|
-
default_region: Optional[str] = None,
|
34
|
-
):
|
35
|
-
"""
|
36
|
-
Get a Bedrock client
|
37
|
-
"""
|
38
|
-
import boto3
|
39
|
-
|
40
|
-
sts_client = boto3.client(
|
41
|
-
"sts",
|
42
|
-
aws_access_key_id=access_key_id or model_settings.aws_access_key_id,
|
43
|
-
aws_secret_access_key=secret_key or model_settings.aws_secret_access_key,
|
44
|
-
region_name=default_region or model_settings.aws_default_region,
|
45
|
-
)
|
46
|
-
credentials = sts_client.get_session_token()["Credentials"]
|
47
|
-
|
48
|
-
bedrock = AnthropicBedrock(
|
49
|
-
aws_access_key=credentials["AccessKeyId"],
|
50
|
-
aws_secret_key=credentials["SecretAccessKey"],
|
51
|
-
aws_session_token=credentials["SessionToken"],
|
52
|
-
aws_region=default_region or model_settings.aws_default_region,
|
53
|
-
)
|
54
|
-
return bedrock
|
55
|
-
|
56
|
-
|
57
|
-
async def bedrock_get_model_list_async(
|
58
|
-
access_key_id: Optional[str] = None,
|
59
|
-
secret_access_key: Optional[str] = None,
|
60
|
-
default_region: Optional[str] = None,
|
61
|
-
) -> list[dict]:
|
62
|
-
from aioboto3.session import Session
|
63
|
-
|
64
|
-
try:
|
65
|
-
session = Session()
|
66
|
-
async with session.client(
|
67
|
-
"bedrock",
|
68
|
-
aws_access_key_id=access_key_id,
|
69
|
-
aws_secret_access_key=secret_access_key,
|
70
|
-
region_name=default_region,
|
71
|
-
) as bedrock:
|
72
|
-
response = await bedrock.list_inference_profiles()
|
73
|
-
return response["inferenceProfileSummaries"]
|
74
|
-
except Exception as e:
|
75
|
-
logger.error(f"Error getting model list for bedrock: %s", e)
|
76
|
-
raise e
|
77
|
-
|
78
|
-
|
79
|
-
def bedrock_get_model_details(region_name: str, model_id: str) -> dict[str, Any]:
|
80
|
-
"""
|
81
|
-
Get details for a specific model from Bedrock.
|
82
|
-
"""
|
83
|
-
import boto3
|
84
|
-
from botocore.exceptions import ClientError
|
85
|
-
|
86
|
-
try:
|
87
|
-
bedrock = boto3.client("bedrock", region_name=region_name)
|
88
|
-
response = bedrock.get_foundation_model(modelIdentifier=model_id)
|
89
|
-
return response["modelDetails"]
|
90
|
-
except ClientError as e:
|
91
|
-
logger.exception(f"Error getting model details: {str(e)}")
|
92
|
-
raise e
|
93
|
-
|
94
|
-
|
95
|
-
def bedrock_get_model_context_window(model_id: str) -> int:
|
96
|
-
"""
|
97
|
-
Get context window size for a specific model.
|
98
|
-
"""
|
99
|
-
# Bedrock doesn't provide this via API, so we maintain a mapping
|
100
|
-
# 200k for anthropic: https://aws.amazon.com/bedrock/anthropic/
|
101
|
-
if model_id.startswith("anthropic"):
|
102
|
-
return 200_000
|
103
|
-
else:
|
104
|
-
return 100_000 # default to 100k if unknown
|
letta/llm_api/azure_openai.py
DELETED
@@ -1,118 +0,0 @@
|
|
1
|
-
from collections import defaultdict
|
2
|
-
|
3
|
-
import requests
|
4
|
-
from openai import AzureOpenAI
|
5
|
-
|
6
|
-
from letta.llm_api.openai import prepare_openai_payload
|
7
|
-
from letta.schemas.llm_config import LLMConfig
|
8
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
9
|
-
from letta.schemas.openai.chat_completions import ChatCompletionRequest
|
10
|
-
from letta.settings import ModelSettings
|
11
|
-
|
12
|
-
|
13
|
-
def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str):
|
14
|
-
return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}"
|
15
|
-
|
16
|
-
|
17
|
-
def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str):
|
18
|
-
return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}"
|
19
|
-
|
20
|
-
|
21
|
-
def get_azure_model_list_endpoint(base_url: str, api_version: str):
|
22
|
-
return f"{base_url}/openai/models?api-version={api_version}"
|
23
|
-
|
24
|
-
|
25
|
-
def get_azure_deployment_list_endpoint(base_url: str):
|
26
|
-
# Please note that it has to be 2023-03-15-preview
|
27
|
-
# That's the only api version that works with this deployments endpoint
|
28
|
-
# TODO: Use the Azure Client library here instead
|
29
|
-
return f"{base_url}/openai/deployments?api-version=2023-03-15-preview"
|
30
|
-
|
31
|
-
|
32
|
-
def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_version: str) -> list:
|
33
|
-
"""https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
|
34
|
-
|
35
|
-
client = AzureOpenAI(api_key=api_key, api_version=api_version, azure_endpoint=base_url)
|
36
|
-
|
37
|
-
try:
|
38
|
-
models_list = client.models.list()
|
39
|
-
except Exception:
|
40
|
-
return []
|
41
|
-
|
42
|
-
all_available_models = [model.to_dict() for model in models_list.data]
|
43
|
-
|
44
|
-
# https://xxx.openai.azure.com/openai/models?api-version=xxx
|
45
|
-
headers = {"Content-Type": "application/json"}
|
46
|
-
if api_key is not None:
|
47
|
-
headers["api-key"] = f"{api_key}"
|
48
|
-
|
49
|
-
# 2. Get all the deployed models
|
50
|
-
url = get_azure_deployment_list_endpoint(base_url)
|
51
|
-
try:
|
52
|
-
response = requests.get(url, headers=headers)
|
53
|
-
response.raise_for_status()
|
54
|
-
except requests.RequestException as e:
|
55
|
-
raise RuntimeError(f"Failed to retrieve model list: {e}")
|
56
|
-
|
57
|
-
deployed_models = response.json().get("data", [])
|
58
|
-
deployed_model_names = set([m["id"] for m in deployed_models])
|
59
|
-
|
60
|
-
# 3. Only return the models in available models if they have been deployed
|
61
|
-
deployed_models = [m for m in all_available_models if m["id"] in deployed_model_names]
|
62
|
-
|
63
|
-
# 4. Remove redundant deployments, only include the ones with the latest deployment
|
64
|
-
# Create a dictionary to store the latest model for each ID
|
65
|
-
latest_models = defaultdict()
|
66
|
-
|
67
|
-
# Iterate through the models and update the dictionary with the most recent model
|
68
|
-
for model in deployed_models:
|
69
|
-
model_id = model["id"]
|
70
|
-
updated_at = model["created_at"]
|
71
|
-
|
72
|
-
# If the model ID is new or the current model has a more recent created_at, update the dictionary
|
73
|
-
if model_id not in latest_models or updated_at > latest_models[model_id]["created_at"]:
|
74
|
-
latest_models[model_id] = model
|
75
|
-
|
76
|
-
# Extract the unique models
|
77
|
-
return list(latest_models.values())
|
78
|
-
|
79
|
-
|
80
|
-
def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list:
|
81
|
-
model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version)
|
82
|
-
# Extract models that support text generation
|
83
|
-
model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
|
84
|
-
return model_options
|
85
|
-
|
86
|
-
|
87
|
-
def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list:
|
88
|
-
def valid_embedding_model(m: dict):
|
89
|
-
valid_name = True
|
90
|
-
if require_embedding_in_name:
|
91
|
-
valid_name = "embedding" in m["id"]
|
92
|
-
|
93
|
-
return m.get("capabilities").get("embeddings") == True and valid_name
|
94
|
-
|
95
|
-
model_list = azure_openai_get_deployed_model_list(base_url, api_key, api_version)
|
96
|
-
# Extract models that support embeddings
|
97
|
-
|
98
|
-
model_options = [m for m in model_list if valid_embedding_model(m)]
|
99
|
-
|
100
|
-
return model_options
|
101
|
-
|
102
|
-
|
103
|
-
def azure_openai_chat_completions_request(
|
104
|
-
model_settings: ModelSettings, llm_config: LLMConfig, chat_completion_request: ChatCompletionRequest
|
105
|
-
) -> ChatCompletionResponse:
|
106
|
-
"""https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
|
107
|
-
|
108
|
-
assert model_settings.azure_api_key is not None, "Missing required api key field when calling Azure OpenAI"
|
109
|
-
assert model_settings.azure_api_version is not None, "Missing required api version field when calling Azure OpenAI"
|
110
|
-
assert model_settings.azure_base_url is not None, "Missing required base url field when calling Azure OpenAI"
|
111
|
-
|
112
|
-
data = prepare_openai_payload(chat_completion_request)
|
113
|
-
client = AzureOpenAI(
|
114
|
-
api_key=model_settings.azure_api_key, api_version=model_settings.azure_api_version, azure_endpoint=model_settings.azure_base_url
|
115
|
-
)
|
116
|
-
chat_completion = client.chat.completions.create(**data)
|
117
|
-
|
118
|
-
return ChatCompletionResponse(**chat_completion.model_dump())
|
@@ -1,11 +0,0 @@
|
|
1
|
-
AZURE_MODEL_TO_CONTEXT_LENGTH = {
|
2
|
-
"babbage-002": 16384,
|
3
|
-
"davinci-002": 16384,
|
4
|
-
"gpt-35-turbo-0613": 4096,
|
5
|
-
"gpt-35-turbo-1106": 16385,
|
6
|
-
"gpt-35-turbo-0125": 16385,
|
7
|
-
"gpt-4-0613": 8192,
|
8
|
-
"gpt-4o-mini-2024-07-18": 128000,
|
9
|
-
"gpt-4o-mini": 128000,
|
10
|
-
"gpt-4o": 128000,
|
11
|
-
}
|
letta/llm_api/cohere.py
DELETED
@@ -1,391 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import uuid
|
3
|
-
from typing import List, Optional, Union
|
4
|
-
|
5
|
-
import requests
|
6
|
-
|
7
|
-
from letta.helpers.datetime_helpers import get_utc_time_int
|
8
|
-
from letta.helpers.json_helpers import json_dumps
|
9
|
-
from letta.local_llm.utils import count_tokens
|
10
|
-
from letta.schemas.message import Message
|
11
|
-
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
12
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
|
13
|
-
from letta.schemas.openai.chat_completion_response import (
|
14
|
-
Message as ChoiceMessage, # NOTE: avoid conflict with our own Letta Message datatype
|
15
|
-
)
|
16
|
-
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
17
|
-
from letta.utils import get_tool_call_id, smart_urljoin
|
18
|
-
|
19
|
-
BASE_URL = "https://api.cohere.ai/v1"
|
20
|
-
|
21
|
-
# models that we know will work with Letta
|
22
|
-
COHERE_VALID_MODEL_LIST = [
|
23
|
-
"command-r-plus",
|
24
|
-
]
|
25
|
-
|
26
|
-
|
27
|
-
def cohere_get_model_details(url: str, api_key: Union[str, None], model: str) -> int:
|
28
|
-
"""https://docs.cohere.com/reference/get-model"""
|
29
|
-
from letta.utils import printd
|
30
|
-
|
31
|
-
url = smart_urljoin(url, "models")
|
32
|
-
url = smart_urljoin(url, model)
|
33
|
-
headers = {
|
34
|
-
"accept": "application/json",
|
35
|
-
"authorization": f"bearer {api_key}",
|
36
|
-
}
|
37
|
-
|
38
|
-
printd(f"Sending request to {url}")
|
39
|
-
try:
|
40
|
-
response = requests.get(url, headers=headers)
|
41
|
-
printd(f"response = {response}")
|
42
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
43
|
-
response = response.json() # convert to dict from string
|
44
|
-
return response
|
45
|
-
except requests.exceptions.HTTPError as http_err:
|
46
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
47
|
-
printd(f"Got HTTPError, exception={http_err}")
|
48
|
-
raise http_err
|
49
|
-
except requests.exceptions.RequestException as req_err:
|
50
|
-
# Handle other requests-related errors (e.g., connection error)
|
51
|
-
printd(f"Got RequestException, exception={req_err}")
|
52
|
-
raise req_err
|
53
|
-
except Exception as e:
|
54
|
-
# Handle other potential errors
|
55
|
-
printd(f"Got unknown Exception, exception={e}")
|
56
|
-
raise e
|
57
|
-
|
58
|
-
|
59
|
-
def cohere_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
|
60
|
-
model_details = cohere_get_model_details(url=url, api_key=api_key, model=model)
|
61
|
-
return model_details["context_length"]
|
62
|
-
|
63
|
-
|
64
|
-
def cohere_get_model_list(url: str, api_key: Union[str, None]) -> dict:
|
65
|
-
"""https://docs.cohere.com/reference/list-models"""
|
66
|
-
from letta.utils import printd
|
67
|
-
|
68
|
-
url = smart_urljoin(url, "models")
|
69
|
-
headers = {
|
70
|
-
"accept": "application/json",
|
71
|
-
"authorization": f"bearer {api_key}",
|
72
|
-
}
|
73
|
-
|
74
|
-
printd(f"Sending request to {url}")
|
75
|
-
try:
|
76
|
-
response = requests.get(url, headers=headers)
|
77
|
-
printd(f"response = {response}")
|
78
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
79
|
-
response = response.json() # convert to dict from string
|
80
|
-
return response["models"]
|
81
|
-
except requests.exceptions.HTTPError as http_err:
|
82
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
83
|
-
printd(f"Got HTTPError, exception={http_err}")
|
84
|
-
raise http_err
|
85
|
-
except requests.exceptions.RequestException as req_err:
|
86
|
-
# Handle other requests-related errors (e.g., connection error)
|
87
|
-
printd(f"Got RequestException, exception={req_err}")
|
88
|
-
raise req_err
|
89
|
-
except Exception as e:
|
90
|
-
# Handle other potential errors
|
91
|
-
printd(f"Got unknown Exception, exception={e}")
|
92
|
-
raise e
|
93
|
-
|
94
|
-
|
95
|
-
def remap_finish_reason(finish_reason: str) -> str:
|
96
|
-
"""Remap Cohere's 'finish_reason' to OpenAI 'finish_reason'
|
97
|
-
|
98
|
-
OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
99
|
-
see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
100
|
-
|
101
|
-
Cohere finish_reason is different but undocumented ???
|
102
|
-
"""
|
103
|
-
if finish_reason == "COMPLETE":
|
104
|
-
return "stop"
|
105
|
-
elif finish_reason == "MAX_TOKENS":
|
106
|
-
return "length"
|
107
|
-
# elif stop_reason == "tool_use":
|
108
|
-
# return "function_call"
|
109
|
-
else:
|
110
|
-
raise ValueError(f"Unexpected stop_reason: {finish_reason}")
|
111
|
-
|
112
|
-
|
113
|
-
def convert_cohere_response_to_chatcompletion(
|
114
|
-
response_json: dict, # REST response from API
|
115
|
-
model: str, # Required since not returned
|
116
|
-
inner_thoughts_in_kwargs: Optional[bool] = True,
|
117
|
-
) -> ChatCompletionResponse:
|
118
|
-
"""
|
119
|
-
Example response from command-r-plus:
|
120
|
-
response.json = {
|
121
|
-
'response_id': '28c47751-acce-41cd-8c89-c48a15ac33cf',
|
122
|
-
'text': '',
|
123
|
-
'generation_id': '84209c9e-2868-4984-82c5-063b748b7776',
|
124
|
-
'chat_history': [
|
125
|
-
{
|
126
|
-
'role': 'CHATBOT',
|
127
|
-
'message': 'Bootup sequence complete. Persona activated. Testing messaging functionality.'
|
128
|
-
},
|
129
|
-
{
|
130
|
-
'role': 'SYSTEM',
|
131
|
-
'message': '{"status": "OK", "message": null, "time": "2024-04-11 11:22:36 PM PDT-0700"}'
|
132
|
-
}
|
133
|
-
],
|
134
|
-
'finish_reason': 'COMPLETE',
|
135
|
-
'meta': {
|
136
|
-
'api_version': {'version': '1'},
|
137
|
-
'billed_units': {'input_tokens': 692, 'output_tokens': 20},
|
138
|
-
'tokens': {'output_tokens': 20}
|
139
|
-
},
|
140
|
-
'tool_calls': [
|
141
|
-
{
|
142
|
-
'name': 'send_message',
|
143
|
-
'parameters': {
|
144
|
-
'message': "Hello Chad, it's Sam. How are you feeling today?"
|
145
|
-
}
|
146
|
-
}
|
147
|
-
]
|
148
|
-
}
|
149
|
-
"""
|
150
|
-
if "billed_units" in response_json["meta"]:
|
151
|
-
prompt_tokens = response_json["meta"]["billed_units"]["input_tokens"]
|
152
|
-
completion_tokens = response_json["meta"]["billed_units"]["output_tokens"]
|
153
|
-
else:
|
154
|
-
# For some reason input_tokens not included in 'meta' 'tokens' dict?
|
155
|
-
prompt_tokens = count_tokens(json_dumps(response_json["chat_history"])) # NOTE: this is a very rough approximation
|
156
|
-
completion_tokens = response_json["meta"]["tokens"]["output_tokens"]
|
157
|
-
|
158
|
-
finish_reason = remap_finish_reason(response_json["finish_reason"])
|
159
|
-
|
160
|
-
if "tool_calls" in response_json and response_json["tool_calls"] is not None:
|
161
|
-
inner_thoughts = []
|
162
|
-
tool_calls = []
|
163
|
-
for tool_call_response in response_json["tool_calls"]:
|
164
|
-
function_name = tool_call_response["name"]
|
165
|
-
function_args = tool_call_response["parameters"]
|
166
|
-
if inner_thoughts_in_kwargs:
|
167
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
168
|
-
|
169
|
-
assert INNER_THOUGHTS_KWARG in function_args
|
170
|
-
# NOTE:
|
171
|
-
inner_thoughts.append(function_args.pop(INNER_THOUGHTS_KWARG))
|
172
|
-
|
173
|
-
tool_calls.append(
|
174
|
-
ToolCall(
|
175
|
-
id=get_tool_call_id(),
|
176
|
-
type="function",
|
177
|
-
function=FunctionCall(
|
178
|
-
name=function_name,
|
179
|
-
arguments=json.dumps(function_args),
|
180
|
-
),
|
181
|
-
)
|
182
|
-
)
|
183
|
-
|
184
|
-
# NOTE: no multi-call support for now
|
185
|
-
assert len(tool_calls) == 1, tool_calls
|
186
|
-
content = inner_thoughts[0]
|
187
|
-
|
188
|
-
else:
|
189
|
-
# raise NotImplementedError(f"Expected a tool call response from Cohere API")
|
190
|
-
content = response_json["text"]
|
191
|
-
tool_calls = None
|
192
|
-
|
193
|
-
# In Cohere API empty string == null
|
194
|
-
content = None if content == "" else content
|
195
|
-
assert content is not None or tool_calls is not None, "Response message must have either content or tool_calls"
|
196
|
-
|
197
|
-
choice = Choice(
|
198
|
-
index=0,
|
199
|
-
finish_reason=finish_reason,
|
200
|
-
message=ChoiceMessage(
|
201
|
-
role="assistant",
|
202
|
-
content=content,
|
203
|
-
tool_calls=tool_calls,
|
204
|
-
),
|
205
|
-
)
|
206
|
-
|
207
|
-
return ChatCompletionResponse(
|
208
|
-
id=response_json["response_id"],
|
209
|
-
choices=[choice],
|
210
|
-
created=get_utc_time_int(),
|
211
|
-
model=model,
|
212
|
-
usage=UsageStatistics(
|
213
|
-
prompt_tokens=prompt_tokens,
|
214
|
-
completion_tokens=completion_tokens,
|
215
|
-
total_tokens=prompt_tokens + completion_tokens,
|
216
|
-
),
|
217
|
-
)
|
218
|
-
|
219
|
-
|
220
|
-
def convert_tools_to_cohere_format(tools: List[Tool], inner_thoughts_in_kwargs: Optional[bool] = True) -> List[dict]:
|
221
|
-
"""See: https://docs.cohere.com/reference/chat
|
222
|
-
|
223
|
-
OpenAI style:
|
224
|
-
"tools": [{
|
225
|
-
"type": "function",
|
226
|
-
"function": {
|
227
|
-
"name": "find_movies",
|
228
|
-
"description": "find ....",
|
229
|
-
"parameters": {
|
230
|
-
"type": "object",
|
231
|
-
"properties": {
|
232
|
-
PARAM: {
|
233
|
-
"type": PARAM_TYPE, # eg "string"
|
234
|
-
"description": PARAM_DESCRIPTION,
|
235
|
-
},
|
236
|
-
...
|
237
|
-
},
|
238
|
-
"required": List[str],
|
239
|
-
}
|
240
|
-
}
|
241
|
-
}]
|
242
|
-
|
243
|
-
Cohere style:
|
244
|
-
"tools": [{
|
245
|
-
"name": "find_movies",
|
246
|
-
"description": "find ....",
|
247
|
-
"parameter_definitions": {
|
248
|
-
PARAM_NAME: {
|
249
|
-
"description": PARAM_DESCRIPTION,
|
250
|
-
"type": PARAM_TYPE, # eg "string"
|
251
|
-
"required": <boolean>,
|
252
|
-
}
|
253
|
-
},
|
254
|
-
}
|
255
|
-
}]
|
256
|
-
"""
|
257
|
-
tools_dict_list = []
|
258
|
-
for tool in tools:
|
259
|
-
tools_dict_list.append(
|
260
|
-
{
|
261
|
-
"name": tool.function.name,
|
262
|
-
"description": tool.function.description,
|
263
|
-
"parameter_definitions": {
|
264
|
-
p_name: {
|
265
|
-
"description": p_fields["description"],
|
266
|
-
"type": p_fields["type"],
|
267
|
-
"required": p_name in tool.function.parameters["required"],
|
268
|
-
}
|
269
|
-
for p_name, p_fields in tool.function.parameters["properties"].items()
|
270
|
-
},
|
271
|
-
}
|
272
|
-
)
|
273
|
-
|
274
|
-
if inner_thoughts_in_kwargs:
|
275
|
-
# NOTE: since Cohere doesn't allow "text" in the response when a tool call happens, if we want
|
276
|
-
# a simultaneous CoT + tool call we need to put it inside a kwarg
|
277
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
278
|
-
|
279
|
-
for cohere_tool in tools_dict_list:
|
280
|
-
cohere_tool["parameter_definitions"][INNER_THOUGHTS_KWARG] = {
|
281
|
-
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
282
|
-
"type": "string",
|
283
|
-
"required": True,
|
284
|
-
}
|
285
|
-
|
286
|
-
return tools_dict_list
|
287
|
-
|
288
|
-
|
289
|
-
def cohere_chat_completions_request(
|
290
|
-
url: str,
|
291
|
-
api_key: str,
|
292
|
-
chat_completion_request: ChatCompletionRequest,
|
293
|
-
) -> ChatCompletionResponse:
|
294
|
-
"""https://docs.cohere.com/docs/multi-step-tool-use"""
|
295
|
-
from letta.utils import printd
|
296
|
-
|
297
|
-
url = smart_urljoin(url, "chat")
|
298
|
-
headers = {
|
299
|
-
"Content-Type": "application/json",
|
300
|
-
"Authorization": f"bearer {api_key}",
|
301
|
-
}
|
302
|
-
|
303
|
-
# convert the tools
|
304
|
-
cohere_tools = None if chat_completion_request.tools is None else convert_tools_to_cohere_format(chat_completion_request.tools)
|
305
|
-
|
306
|
-
# pydantic -> dict
|
307
|
-
data = chat_completion_request.model_dump(exclude_none=True)
|
308
|
-
|
309
|
-
if "functions" in data:
|
310
|
-
raise ValueError("'functions' unexpected in Anthropic API payload")
|
311
|
-
|
312
|
-
# If tools == None, strip from the payload
|
313
|
-
if "tools" in data and data["tools"] is None:
|
314
|
-
data.pop("tools")
|
315
|
-
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
|
316
|
-
|
317
|
-
# Convert messages to Cohere format
|
318
|
-
msg_objs = [Message.dict_to_message(agent_id=uuid.uuid4(), openai_message_dict=m) for m in data["messages"]]
|
319
|
-
|
320
|
-
# System message 0 should instead be a "preamble"
|
321
|
-
# See: https://docs.cohere.com/reference/chat
|
322
|
-
# The chat_history parameter should not be used for SYSTEM messages in most cases. Instead, to add a SYSTEM role message at the beginning of a conversation, the preamble parameter should be used.
|
323
|
-
assert msg_objs[0].role == "system", msg_objs[0]
|
324
|
-
preamble = msg_objs[0].content[0].text
|
325
|
-
|
326
|
-
# data["messages"] = [m.to_cohere_dict() for m in msg_objs[1:]]
|
327
|
-
data["messages"] = []
|
328
|
-
for m in msg_objs[1:]:
|
329
|
-
ms = m.to_cohere_dict() # NOTE: returns List[dict]
|
330
|
-
data["messages"].extend(ms)
|
331
|
-
|
332
|
-
assert data["messages"][-1]["role"] == "USER", data["messages"][-1]
|
333
|
-
data = {
|
334
|
-
"preamble": preamble,
|
335
|
-
"chat_history": data["messages"][:-1],
|
336
|
-
"message": data["messages"][-1]["message"],
|
337
|
-
"tools": cohere_tools,
|
338
|
-
}
|
339
|
-
|
340
|
-
# Move 'system' to the top level
|
341
|
-
# 'messages: Unexpected role "system". The Messages API accepts a top-level `system` parameter, not "system" as an input message role.'
|
342
|
-
# assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
|
343
|
-
# data["system"] = data["messages"][0]["content"]
|
344
|
-
# data["messages"] = data["messages"][1:]
|
345
|
-
|
346
|
-
# Convert to Anthropic format
|
347
|
-
# msg_objs = [Message.dict_to_message(user_id=uuid.uuid4(), agent_id=uuid.uuid4(), openai_message_dict=m) for m in data["messages"]]
|
348
|
-
# data["messages"] = [m.to_anthropic_dict(inner_thoughts_xml_tag=inner_thoughts_xml_tag) for m in msg_objs]
|
349
|
-
|
350
|
-
# Handling Anthropic special requirement for 'user' message in front
|
351
|
-
# messages: first message must use the "user" role'
|
352
|
-
# if data["messages"][0]["role"] != "user":
|
353
|
-
# data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
|
354
|
-
|
355
|
-
# Handle Anthropic's restriction on alternating user/assistant messages
|
356
|
-
# data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
357
|
-
|
358
|
-
# Anthropic also wants max_tokens in the input
|
359
|
-
# It's also part of ChatCompletions
|
360
|
-
# assert "max_tokens" in data, data
|
361
|
-
|
362
|
-
# Remove extra fields used by OpenAI but not Anthropic
|
363
|
-
# data.pop("frequency_penalty", None)
|
364
|
-
# data.pop("logprobs", None)
|
365
|
-
# data.pop("n", None)
|
366
|
-
# data.pop("top_p", None)
|
367
|
-
# data.pop("presence_penalty", None)
|
368
|
-
# data.pop("user", None)
|
369
|
-
# data.pop("tool_choice", None)
|
370
|
-
|
371
|
-
printd(f"Sending request to {url}")
|
372
|
-
try:
|
373
|
-
response = requests.post(url, headers=headers, json=data)
|
374
|
-
printd(f"response = {response}")
|
375
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
376
|
-
response = response.json() # convert to dict from string
|
377
|
-
printd(f"response.json = {response}")
|
378
|
-
response = convert_cohere_response_to_chatcompletion(response_json=response, model=chat_completion_request.model)
|
379
|
-
return response
|
380
|
-
except requests.exceptions.HTTPError as http_err:
|
381
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
382
|
-
printd(f"Got HTTPError, exception={http_err}, payload={data}")
|
383
|
-
raise http_err
|
384
|
-
except requests.exceptions.RequestException as req_err:
|
385
|
-
# Handle other requests-related errors (e.g., connection error)
|
386
|
-
printd(f"Got RequestException, exception={req_err}")
|
387
|
-
raise req_err
|
388
|
-
except Exception as e:
|
389
|
-
# Handle other potential errors
|
390
|
-
printd(f"Got unknown Exception, exception={e}")
|
391
|
-
raise e
|
@@ -1,18 +0,0 @@
|
|
1
|
-
from typing import Literal
|
2
|
-
|
3
|
-
from pydantic import Field
|
4
|
-
|
5
|
-
from letta.schemas.enums import ProviderCategory, ProviderType
|
6
|
-
from letta.schemas.llm_config import LLMConfig
|
7
|
-
from letta.schemas.providers.openai import OpenAIProvider
|
8
|
-
|
9
|
-
|
10
|
-
# TODO (cliandy): this needs to be implemented
|
11
|
-
class CohereProvider(OpenAIProvider):
|
12
|
-
provider_type: Literal[ProviderType.cohere] = Field(ProviderType.cohere, description="The type of the provider.")
|
13
|
-
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
14
|
-
base_url: str = ""
|
15
|
-
api_key: str = Field(..., description="API key for the Cohere API.")
|
16
|
-
|
17
|
-
async def list_llm_models_async(self) -> list[LLMConfig]:
|
18
|
-
raise NotImplementedError
|
File without changes
|
File without changes
|