letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241009104130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (35) hide show
  1. letta/agent.py +36 -10
  2. letta/client/client.py +8 -1
  3. letta/credentials.py +3 -3
  4. letta/errors.py +1 -1
  5. letta/functions/schema_generator.py +1 -1
  6. letta/llm_api/anthropic.py +3 -24
  7. letta/llm_api/azure_openai.py +53 -108
  8. letta/llm_api/azure_openai_constants.py +10 -0
  9. letta/llm_api/google_ai.py +39 -64
  10. letta/llm_api/helpers.py +208 -0
  11. letta/llm_api/llm_api_tools.py +43 -218
  12. letta/llm_api/openai.py +74 -50
  13. letta/main.py +1 -1
  14. letta/metadata.py +2 -0
  15. letta/providers.py +144 -31
  16. letta/schemas/agent.py +14 -0
  17. letta/schemas/llm_config.py +2 -2
  18. letta/schemas/openai/chat_completion_response.py +3 -0
  19. letta/schemas/tool.py +3 -3
  20. letta/server/rest_api/admin/tools.py +0 -1
  21. letta/server/rest_api/app.py +1 -17
  22. letta/server/rest_api/routers/openai/assistants/threads.py +10 -7
  23. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +5 -3
  24. letta/server/rest_api/routers/v1/agents.py +23 -13
  25. letta/server/rest_api/routers/v1/blocks.py +5 -3
  26. letta/server/rest_api/routers/v1/jobs.py +5 -3
  27. letta/server/rest_api/routers/v1/sources.py +25 -13
  28. letta/server/rest_api/routers/v1/tools.py +12 -7
  29. letta/server/server.py +33 -37
  30. letta/settings.py +5 -113
  31. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/METADATA +1 -1
  32. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/RECORD +35 -33
  33. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/LICENSE +0 -0
  34. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/WHEEL +0 -0
  35. {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/entry_points.txt +0 -0
letta/agent.py CHANGED
@@ -18,7 +18,7 @@ from letta.constants import (
18
18
  MESSAGE_SUMMARY_WARNING_FRAC,
19
19
  )
20
20
  from letta.interface import AgentInterface
21
- from letta.llm_api.llm_api_tools import create, is_context_overflow_error
21
+ from letta.llm_api.llm_api_tools import create
22
22
  from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
23
23
  from letta.metadata import MetadataStore
24
24
  from letta.persistence_manager import LocalStateManager
@@ -56,6 +56,7 @@ from letta.utils import (
56
56
  )
57
57
 
58
58
  from .errors import LLMError
59
+ from .llm_api.helpers import is_context_overflow_error
59
60
 
60
61
 
61
62
  def compile_memory_metadata_block(
@@ -207,7 +208,7 @@ class BaseAgent(ABC):
207
208
  recreate_message_timestamp: bool = True, # if True, when input is a Message type, recreated the 'created_at' field
208
209
  stream: bool = False, # TODO move to config?
209
210
  timestamp: Optional[datetime.datetime] = None,
210
- inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
211
+ inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
211
212
  ms: Optional[MetadataStore] = None,
212
213
  ) -> AgentStepResponse:
213
214
  """
@@ -223,7 +224,7 @@ class BaseAgent(ABC):
223
224
  class Agent(BaseAgent):
224
225
  def __init__(
225
226
  self,
226
- interface: AgentInterface,
227
+ interface: Optional[AgentInterface],
227
228
  # agents can be created from providing agent_state
228
229
  agent_state: AgentState,
229
230
  tools: List[Tool],
@@ -238,6 +239,7 @@ class Agent(BaseAgent):
238
239
  assert isinstance(self.agent_state.memory, Memory), f"Memory object is not of type Memory: {type(self.agent_state.memory)}"
239
240
 
240
241
  # link tools
242
+ self.tools = tools
241
243
  self.link_tools(tools)
242
244
 
243
245
  # gpt-4, gpt-3.5-turbo, ...
@@ -337,6 +339,9 @@ class Agent(BaseAgent):
337
339
  for tool_name in self.agent_state.tools:
338
340
  assert tool_name in [tool.name for tool in tools], f"Tool name {tool_name} not included in agent tool list"
339
341
 
342
+ # Update tools
343
+ self.tools = tools
344
+
340
345
  # Store the functions schemas (this is passed as an argument to ChatCompletion)
341
346
  self.functions = []
342
347
  self.functions_python = {}
@@ -460,7 +465,7 @@ class Agent(BaseAgent):
460
465
  function_call: str = "auto",
461
466
  first_message: bool = False, # hint
462
467
  stream: bool = False, # TODO move to config?
463
- inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
468
+ inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
464
469
  ) -> ChatCompletionResponse:
465
470
  """Get response from LLM API"""
466
471
  try:
@@ -478,10 +483,10 @@ class Agent(BaseAgent):
478
483
  stream=stream,
479
484
  stream_inferface=self.interface,
480
485
  # putting inner thoughts in func args or not
481
- inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
486
+ inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
482
487
  )
483
488
 
484
- if len(response.choices) == 0:
489
+ if len(response.choices) == 0 or response.choices[0] is None:
485
490
  raise Exception(f"API call didn't return a message: {response}")
486
491
 
487
492
  # special case for 'length'
@@ -551,15 +556,20 @@ class Agent(BaseAgent):
551
556
  ) # extend conversation with assistant's reply
552
557
  printd(f"Function call message: {messages[-1]}")
553
558
 
559
+ nonnull_content = False
554
560
  if response_message.content:
555
561
  # The content if then internal monologue, not chat
556
562
  self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
563
+ # Flag to avoid printing a duplicate if inner thoughts get popped from the function call
564
+ nonnull_content = True
557
565
 
558
566
  # Step 3: call the function
559
567
  # Note: the JSON response may not always be valid; be sure to handle errors
560
568
  function_call = (
561
569
  response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
562
570
  )
571
+
572
+ # Get the name of the function
563
573
  function_name = function_call.name
564
574
  printd(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
565
575
 
@@ -608,9 +618,21 @@ class Agent(BaseAgent):
608
618
  self.interface.function_message(f"Error: {error_msg}", msg_obj=messages[-1])
609
619
  return messages, False, True # force a heartbeat to allow agent to handle error
610
620
 
621
+ # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
622
+ if "inner_thoughts" in function_args:
623
+ response_message.content = function_args.pop("inner_thoughts")
624
+ # The content if then internal monologue, not chat
625
+ if response_message.content and not nonnull_content:
626
+ self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
627
+
611
628
  # (Still parsing function args)
612
629
  # Handle requests for immediate heartbeat
613
630
  heartbeat_request = function_args.pop("request_heartbeat", None)
631
+
632
+ # Edge case: heartbeat_request is returned as a stringified boolean, we will attempt to parse:
633
+ if isinstance(heartbeat_request, str) and heartbeat_request.lower().strip() == "true":
634
+ heartbeat_request = True
635
+
614
636
  if not isinstance(heartbeat_request, bool) or heartbeat_request is None:
615
637
  printd(
616
638
  f"{CLI_WARNING_PREFIX}'request_heartbeat' arg parsed was not a bool or None, type={type(heartbeat_request)}, value={heartbeat_request}"
@@ -716,7 +738,7 @@ class Agent(BaseAgent):
716
738
  recreate_message_timestamp: bool = True, # if True, when input is a Message type, recreated the 'created_at' field
717
739
  stream: bool = False, # TODO move to config?
718
740
  timestamp: Optional[datetime.datetime] = None,
719
- inner_thoughts_in_kwargs: OptionState = OptionState.DEFAULT,
741
+ inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
720
742
  ms: Optional[MetadataStore] = None,
721
743
  ) -> AgentStepResponse:
722
744
  """Top-level event message handler for the Letta agent"""
@@ -795,7 +817,7 @@ class Agent(BaseAgent):
795
817
  message_sequence=input_message_sequence,
796
818
  first_message=True, # passed through to the prompt formatter
797
819
  stream=stream,
798
- inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
820
+ inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
799
821
  )
800
822
  if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
801
823
  break
@@ -808,7 +830,7 @@ class Agent(BaseAgent):
808
830
  response = self._get_ai_reply(
809
831
  message_sequence=input_message_sequence,
810
832
  stream=stream,
811
- inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
833
+ inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
812
834
  )
813
835
 
814
836
  # Step 3: check if LLM wanted to call a function
@@ -892,7 +914,7 @@ class Agent(BaseAgent):
892
914
  recreate_message_timestamp=recreate_message_timestamp,
893
915
  stream=stream,
894
916
  timestamp=timestamp,
895
- inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
917
+ inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
896
918
  ms=ms,
897
919
  )
898
920
 
@@ -1343,6 +1365,10 @@ def save_agent(agent: Agent, ms: MetadataStore):
1343
1365
  else:
1344
1366
  ms.create_agent(agent_state)
1345
1367
 
1368
+ for tool in agent.tools:
1369
+ if ms.get_tool(tool_name=tool.name, user_id=tool.user_id) is None:
1370
+ ms.create_tool(tool)
1371
+
1346
1372
  agent.agent_state = ms.get_agent(agent_id=agent_id)
1347
1373
  assert isinstance(agent.agent_state.memory, Memory), f"Memory is not a Memory object: {type(agent_state.memory)}"
1348
1374
 
letta/client/client.py CHANGED
@@ -9,7 +9,7 @@ from letta.constants import BASE_TOOLS, DEFAULT_HUMAN, DEFAULT_PERSONA
9
9
  from letta.data_sources.connectors import DataConnector
10
10
  from letta.functions.functions import parse_source_code
11
11
  from letta.memory import get_memory_functions
12
- from letta.schemas.agent import AgentState, CreateAgent, UpdateAgentState
12
+ from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgentState
13
13
  from letta.schemas.block import (
14
14
  Block,
15
15
  CreateBlock,
@@ -68,6 +68,7 @@ class AbstractClient(object):
68
68
  def create_agent(
69
69
  self,
70
70
  name: Optional[str] = None,
71
+ agent_type: Optional[AgentType] = AgentType.memgpt_agent,
71
72
  embedding_config: Optional[EmbeddingConfig] = None,
72
73
  llm_config: Optional[LLMConfig] = None,
73
74
  memory: Memory = ChatMemory(human=get_human_text(DEFAULT_HUMAN), persona=get_persona_text(DEFAULT_PERSONA)),
@@ -319,6 +320,8 @@ class RESTClient(AbstractClient):
319
320
  def create_agent(
320
321
  self,
321
322
  name: Optional[str] = None,
323
+ # agent config
324
+ agent_type: Optional[AgentType] = AgentType.memgpt_agent,
322
325
  # model configs
323
326
  embedding_config: EmbeddingConfig = None,
324
327
  llm_config: LLMConfig = None,
@@ -381,6 +384,7 @@ class RESTClient(AbstractClient):
381
384
  memory=memory,
382
385
  tools=tool_names,
383
386
  system=system,
387
+ agent_type=agent_type,
384
388
  llm_config=llm_config if llm_config else self._default_llm_config,
385
389
  embedding_config=embedding_config if embedding_config else self._default_embedding_config,
386
390
  )
@@ -1462,6 +1466,8 @@ class LocalClient(AbstractClient):
1462
1466
  def create_agent(
1463
1467
  self,
1464
1468
  name: Optional[str] = None,
1469
+ # agent config
1470
+ agent_type: Optional[AgentType] = AgentType.memgpt_agent,
1465
1471
  # model configs
1466
1472
  embedding_config: EmbeddingConfig = None,
1467
1473
  llm_config: LLMConfig = None,
@@ -1524,6 +1530,7 @@ class LocalClient(AbstractClient):
1524
1530
  memory=memory,
1525
1531
  tools=tool_names,
1526
1532
  system=system,
1533
+ agent_type=agent_type,
1527
1534
  llm_config=llm_config if llm_config else self._default_llm_config,
1528
1535
  embedding_config=embedding_config if embedding_config else self._default_embedding_config,
1529
1536
  ),
letta/credentials.py CHANGED
@@ -30,7 +30,7 @@ class LettaCredentials:
30
30
 
31
31
  # azure config
32
32
  azure_auth_type: str = "api_key"
33
- azure_key: Optional[str] = None
33
+ azure_key: Optional[str] = os.getenv("AZURE_OPENAI_API_KEY")
34
34
 
35
35
  # groq config
36
36
  groq_key: Optional[str] = os.getenv("GROQ_API_KEY")
@@ -76,7 +76,7 @@ class LettaCredentials:
76
76
  "azure_embedding_deployment": get_field(config, "azure", "embedding_deployment"),
77
77
  # gemini
78
78
  "google_ai_key": get_field(config, "google_ai", "key"),
79
- "google_ai_service_endpoint": get_field(config, "google_ai", "service_endpoint"),
79
+ # "google_ai_service_endpoint": get_field(config, "google_ai", "service_endpoint"),
80
80
  # anthropic
81
81
  "anthropic_key": get_field(config, "anthropic", "key"),
82
82
  # cohere
@@ -117,7 +117,7 @@ class LettaCredentials:
117
117
 
118
118
  # gemini
119
119
  set_field(config, "google_ai", "key", self.google_ai_key)
120
- set_field(config, "google_ai", "service_endpoint", self.google_ai_service_endpoint)
120
+ # set_field(config, "google_ai", "service_endpoint", self.google_ai_service_endpoint)
121
121
 
122
122
  # anthropic
123
123
  set_field(config, "anthropic", "key", self.anthropic_key)
letta/errors.py CHANGED
@@ -56,7 +56,7 @@ class LettaMessageError(LettaError):
56
56
  error_msg += f" (Explanation: {explanation})"
57
57
 
58
58
  # Pretty print out message JSON
59
- message_json = json.dumps([message.model_dump_json(indent=4) for message in messages], indent=4)
59
+ message_json = json.dumps([message.model_dump() for message in messages], indent=4)
60
60
  return f"{error_msg}\n\n{message_json}"
61
61
 
62
62
 
@@ -130,7 +130,7 @@ def generate_schema(function, name: Optional[str] = None, description: Optional[
130
130
  if function.__name__ not in ["send_message", "pause_heartbeats"]:
131
131
  schema["parameters"]["properties"]["request_heartbeat"] = {
132
132
  "type": "boolean",
133
- "description": "Request an immediate heartbeat after function execution. Set to 'true' if you want to send a follow-up message or run a follow-up function.",
133
+ "description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",
134
134
  }
135
135
  schema["parameters"]["required"].append("request_heartbeat")
136
136
 
@@ -2,8 +2,7 @@ import json
2
2
  import re
3
3
  from typing import List, Optional, Union
4
4
 
5
- import requests
6
-
5
+ from letta.llm_api.helpers import make_post_request
7
6
  from letta.schemas.message import Message
8
7
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
9
8
  from letta.schemas.openai.chat_completion_response import (
@@ -295,7 +294,6 @@ def anthropic_chat_completions_request(
295
294
  inner_thoughts_xml_tag: Optional[str] = "thinking",
296
295
  ) -> ChatCompletionResponse:
297
296
  """https://docs.anthropic.com/claude/docs/tool-use"""
298
- from letta.utils import printd
299
297
 
300
298
  url = smart_urljoin(url, "messages")
301
299
  headers = {
@@ -360,24 +358,5 @@ def anthropic_chat_completions_request(
360
358
  data.pop("user", None)
361
359
  data.pop("tool_choice", None)
362
360
 
363
- printd(f"Sending request to {url}")
364
- try:
365
- response = requests.post(url, headers=headers, json=data)
366
- printd(f"response = {response}")
367
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
368
- response = response.json() # convert to dict from string
369
- printd(f"response.json = {response}")
370
- response = convert_anthropic_response_to_chatcompletion(response_json=response, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
371
- return response
372
- except requests.exceptions.HTTPError as http_err:
373
- # Handle HTTP errors (e.g., response 4XX, 5XX)
374
- printd(f"Got HTTPError, exception={http_err}, payload={data}")
375
- raise http_err
376
- except requests.exceptions.RequestException as req_err:
377
- # Handle other requests-related errors (e.g., connection error)
378
- printd(f"Got RequestException, exception={req_err}")
379
- raise req_err
380
- except Exception as e:
381
- # Handle other potential errors
382
- printd(f"Got unknown Exception, exception={e}")
383
- raise e
361
+ response_json = make_post_request(url, headers, data)
362
+ return convert_anthropic_response_to_chatcompletion(response_json=response_json, inner_thoughts_xml_tag=inner_thoughts_xml_tag)
@@ -1,90 +1,74 @@
1
- from typing import Union
2
-
3
1
  import requests
4
2
 
3
+ from letta.llm_api.helpers import make_post_request
4
+ from letta.schemas.llm_config import LLMConfig
5
5
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
6
+ from letta.schemas.openai.chat_completions import ChatCompletionRequest
6
7
  from letta.schemas.openai.embedding_response import EmbeddingResponse
7
- from letta.utils import smart_urljoin
8
+ from letta.settings import ModelSettings
9
+
10
+
11
+ def get_azure_chat_completions_endpoint(base_url: str, model: str, api_version: str):
12
+ return f"{base_url}/openai/deployments/{model}/chat/completions?api-version={api_version}"
13
+
8
14
 
9
- MODEL_TO_AZURE_ENGINE = {
10
- "gpt-4-1106-preview": "gpt-4",
11
- "gpt-4": "gpt-4",
12
- "gpt-4-32k": "gpt-4-32k",
13
- "gpt-3.5": "gpt-35-turbo",
14
- "gpt-3.5-turbo": "gpt-35-turbo",
15
- "gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
16
- }
15
+ def get_azure_embeddings_endpoint(base_url: str, model: str, api_version: str):
16
+ return f"{base_url}/openai/deployments/{model}/embeddings?api-version={api_version}"
17
17
 
18
18
 
19
- def clean_azure_endpoint(raw_endpoint_name: str) -> str:
20
- """Make sure the endpoint is of format 'https://YOUR_RESOURCE_NAME.openai.azure.com'"""
21
- if raw_endpoint_name is None:
22
- raise ValueError(raw_endpoint_name)
23
- endpoint_address = raw_endpoint_name.strip("/").replace(".openai.azure.com", "")
24
- endpoint_address = endpoint_address.replace("http://", "")
25
- endpoint_address = endpoint_address.replace("https://", "")
26
- return endpoint_address
19
+ def get_azure_model_list_endpoint(base_url: str, api_version: str):
20
+ return f"{base_url}/openai/models?api-version={api_version}"
27
21
 
28
22
 
29
- def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict:
23
+ def azure_openai_get_model_list(base_url: str, api_key: str, api_version: str) -> list:
30
24
  """https://learn.microsoft.com/en-us/rest/api/azureopenai/models/list?view=rest-azureopenai-2023-05-15&tabs=HTTP"""
31
- from letta.utils import printd
32
25
 
33
26
  # https://xxx.openai.azure.com/openai/models?api-version=xxx
34
- url = smart_urljoin(url, "openai")
35
- url = smart_urljoin(url, f"models?api-version={api_version}")
36
-
37
27
  headers = {"Content-Type": "application/json"}
38
28
  if api_key is not None:
39
29
  headers["api-key"] = f"{api_key}"
40
30
 
41
- printd(f"Sending request to {url}")
31
+ url = get_azure_model_list_endpoint(base_url, api_version)
42
32
  try:
43
33
  response = requests.get(url, headers=headers)
44
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
45
- response = response.json() # convert to dict from string
46
- printd(f"response = {response}")
47
- return response
48
- except requests.exceptions.HTTPError as http_err:
49
- # Handle HTTP errors (e.g., response 4XX, 5XX)
50
- try:
51
- response = response.json()
52
- except:
53
- pass
54
- printd(f"Got HTTPError, exception={http_err}, response={response}")
55
- raise http_err
56
- except requests.exceptions.RequestException as req_err:
57
- # Handle other requests-related errors (e.g., connection error)
58
- try:
59
- response = response.json()
60
- except:
61
- pass
62
- printd(f"Got RequestException, exception={req_err}, response={response}")
63
- raise req_err
64
- except Exception as e:
65
- # Handle other potential errors
66
- try:
67
- response = response.json()
68
- except:
69
- pass
70
- printd(f"Got unknown Exception, exception={e}, response={response}")
71
- raise e
34
+ response.raise_for_status()
35
+ except requests.RequestException as e:
36
+ raise RuntimeError(f"Failed to retrieve model list: {e}")
37
+
38
+ return response.json().get("data", [])
39
+
40
+
41
+ def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list:
42
+ model_list = azure_openai_get_model_list(base_url, api_key, api_version)
43
+ # Extract models that support text generation
44
+ model_options = [m for m in model_list if m.get("capabilities").get("chat_completion") == True]
45
+ return model_options
46
+
47
+
48
+ def azure_openai_get_embeddings_model_list(base_url: str, api_key: str, api_version: str, require_embedding_in_name: bool = True) -> list:
49
+ def valid_embedding_model(m: dict):
50
+ valid_name = True
51
+ if require_embedding_in_name:
52
+ valid_name = "embedding" in m["id"]
53
+
54
+ return m.get("capabilities").get("embeddings") == True and valid_name
55
+
56
+ model_list = azure_openai_get_model_list(base_url, api_key, api_version)
57
+ # Extract models that support embeddings
58
+
59
+ model_options = [m for m in model_list if valid_embedding_model(m)]
60
+ return model_options
72
61
 
73
62
 
74
63
  def azure_openai_chat_completions_request(
75
- resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict
64
+ model_settings: ModelSettings, llm_config: LLMConfig, api_key: str, chat_completion_request: ChatCompletionRequest
76
65
  ) -> ChatCompletionResponse:
77
66
  """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
78
- from letta.utils import printd
79
67
 
80
- assert resource_name is not None, "Missing required field when calling Azure OpenAI"
81
- assert deployment_id is not None, "Missing required field when calling Azure OpenAI"
82
- assert api_version is not None, "Missing required field when calling Azure OpenAI"
83
68
  assert api_key is not None, "Missing required field when calling Azure OpenAI"
84
69
 
85
- resource_name = clean_azure_endpoint(resource_name)
86
- url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}"
87
70
  headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
71
+ data = chat_completion_request.model_dump(exclude_none=True)
88
72
 
89
73
  # If functions == None, strip from the payload
90
74
  if "functions" in data and data["functions"] is None:
@@ -95,61 +79,22 @@ def azure_openai_chat_completions_request(
95
79
  data.pop("tools")
96
80
  data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
97
81
 
98
- printd(f"Sending request to {url}")
99
- try:
100
- data["messages"] = [i.to_openai_dict() for i in data["messages"]]
101
- response = requests.post(url, headers=headers, json=data)
102
- printd(f"response = {response}")
103
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
104
- response = response.json() # convert to dict from string
105
- printd(f"response.json = {response}")
106
- # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
107
- if "content" not in response["choices"][0].get("message"):
108
- response["choices"][0]["message"]["content"] = None
109
- response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default
110
- return response
111
- except requests.exceptions.HTTPError as http_err:
112
- # Handle HTTP errors (e.g., response 4XX, 5XX)
113
- printd(f"Got HTTPError, exception={http_err}, payload={data}")
114
- raise http_err
115
- except requests.exceptions.RequestException as req_err:
116
- # Handle other requests-related errors (e.g., connection error)
117
- printd(f"Got RequestException, exception={req_err}")
118
- raise req_err
119
- except Exception as e:
120
- # Handle other potential errors
121
- printd(f"Got unknown Exception, exception={e}")
122
- raise e
82
+ url = get_azure_chat_completions_endpoint(model_settings.azure_base_url, llm_config.model, model_settings.api_version)
83
+ response_json = make_post_request(url, headers, data)
84
+ # NOTE: azure openai does not include "content" in the response when it is None, so we need to add it
85
+ if "content" not in response_json["choices"][0].get("message"):
86
+ response_json["choices"][0]["message"]["content"] = None
87
+ response = ChatCompletionResponse(**response_json) # convert to 'dot-dict' style which is the openai python client default
88
+ return response
123
89
 
124
90
 
125
91
  def azure_openai_embeddings_request(
126
92
  resource_name: str, deployment_id: str, api_version: str, api_key: str, data: dict
127
93
  ) -> EmbeddingResponse:
128
94
  """https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings"""
129
- from letta.utils import printd
130
95
 
131
- resource_name = clean_azure_endpoint(resource_name)
132
96
  url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}"
133
97
  headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
134
98
 
135
- printd(f"Sending request to {url}")
136
- try:
137
- response = requests.post(url, headers=headers, json=data)
138
- printd(f"response = {response}")
139
- response.raise_for_status() # Raises HTTPError for 4XX/5XX status
140
- response = response.json() # convert to dict from string
141
- printd(f"response.json = {response}")
142
- response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default
143
- return response
144
- except requests.exceptions.HTTPError as http_err:
145
- # Handle HTTP errors (e.g., response 4XX, 5XX)
146
- printd(f"Got HTTPError, exception={http_err}, payload={data}")
147
- raise http_err
148
- except requests.exceptions.RequestException as req_err:
149
- # Handle other requests-related errors (e.g., connection error)
150
- printd(f"Got RequestException, exception={req_err}")
151
- raise req_err
152
- except Exception as e:
153
- # Handle other potential errors
154
- printd(f"Got unknown Exception, exception={e}")
155
- raise e
99
+ response_json = make_post_request(url, headers, data)
100
+ return EmbeddingResponse(**response_json)
@@ -0,0 +1,10 @@
1
+ AZURE_MODEL_TO_CONTEXT_LENGTH = {
2
+ "babbage-002": 16384,
3
+ "davinci-002": 16384,
4
+ "gpt-35-turbo-0613": 4096,
5
+ "gpt-35-turbo-1106": 16385,
6
+ "gpt-35-turbo-0125": 16385,
7
+ "gpt-4-0613": 8192,
8
+ "gpt-4o-mini-2024-07-18": 128000,
9
+ "gpt-4o-2024-08-06": 128000,
10
+ }