langroid 0.53.6__py3-none-any.whl → 0.53.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -502,6 +502,17 @@ class ChatAgent(Agent):
502
502
  idx = self.nth_message_idx_with_role(role, n_role_msgs)
503
503
  return self.message_history[idx]
504
504
 
505
+ def last_message_idx_with_role(self, role: Role) -> int:
506
+ """Index of last message in message_history, with specified role.
507
+ Return -1 if not found. Index = 0 is the first message in the history.
508
+ """
509
+ indices_with_role = [
510
+ i for i, m in enumerate(self.message_history) if m.role == role
511
+ ]
512
+ if len(indices_with_role) == 0:
513
+ return -1
514
+ return indices_with_role[-1]
515
+
505
516
  def nth_message_idx_with_role(self, role: Role, n: int) -> int:
506
517
  """Index of `n`th message in message_history, with specified role.
507
518
  (n is assumed to be 1-based, i.e. 1 is the first message with that role).
@@ -1229,9 +1240,18 @@ class ChatAgent(Agent):
1229
1240
  idx: int,
1230
1241
  tokens: int = 5,
1231
1242
  warning: str = "...[Contents truncated!]",
1243
+ inplace: bool = True,
1232
1244
  ) -> LLMMessage:
1233
- """Truncate message at idx in msg history to `tokens` tokens"""
1234
- llm_msg = self.message_history[idx]
1245
+ """
1246
+ Truncate message at idx in msg history to `tokens` tokens.
1247
+
1248
+ If inplace is True, the message is truncated in place, else
1249
+ it LEAVES the original message INTACT and returns a new message
1250
+ """
1251
+ if inplace:
1252
+ llm_msg = self.message_history[idx]
1253
+ else:
1254
+ llm_msg = copy.deepcopy(self.message_history[idx])
1235
1255
  orig_content = llm_msg.content
1236
1256
  new_content = (
1237
1257
  self.parser.truncate_tokens(orig_content, tokens)
@@ -1463,6 +1483,10 @@ class ChatAgent(Agent):
1463
1483
  """
1464
1484
  Prepare messages to be sent to self.llm_response_messages,
1465
1485
  which is the main method that calls the LLM API to get a response.
1486
+ If desired output tokens + message history exceeds the model context length,
1487
+ then first the max output tokens is reduced to fit, and if that is not
1488
+ possible, older messages may be truncated to accommodate at least
1489
+ self.config.llm.min_output_tokens of output.
1466
1490
 
1467
1491
  Returns:
1468
1492
  Tuple[List[LLMMessage], int]: (messages, output_len)
@@ -1530,17 +1554,42 @@ class ChatAgent(Agent):
1530
1554
  truncate
1531
1555
  and output_len > self.llm.chat_context_length() - self.chat_num_tokens(hist)
1532
1556
  ):
1557
+ CHAT_HISTORY_BUFFER = 300
1533
1558
  # chat + output > max context length,
1534
1559
  # so first try to shorten requested output len to fit;
1535
- # use an extra margin of 300 tokens in case our calcs are off
1560
+ # use an extra margin of CHAT_HISTORY_BUFFER tokens
1561
+ # in case our calcs are off (and to allow for some extra tokens)
1536
1562
  output_len = (
1537
- self.llm.chat_context_length() - self.chat_num_tokens(hist) - 300
1563
+ self.llm.chat_context_length()
1564
+ - self.chat_num_tokens(hist)
1565
+ - CHAT_HISTORY_BUFFER
1538
1566
  )
1539
- if output_len < self.config.llm.min_output_tokens:
1540
- # unacceptably small output len, so drop early parts of conv history
1541
- # if output_len is still too long, then drop early parts of conv history
1567
+ if output_len > self.config.llm.min_output_tokens:
1568
+ logger.warning(
1569
+ f"""
1570
+ Chat Model context length is {self.llm.chat_context_length()},
1571
+ but the current message history is {self.chat_num_tokens(hist)}
1572
+ tokens long, which does not allow
1573
+ {self.config.llm.model_max_output_tokens} output tokens.
1574
+ Therefore we reduced `max_output_tokens` to {output_len} tokens,
1575
+ so they can fit within the model's context length
1576
+ """
1577
+ )
1578
+ else:
1579
+ # unacceptably small output len, so compress early parts of conv
1580
+ # history if output_len is still too long.
1542
1581
  # TODO we should really be doing summarization or other types of
1543
1582
  # prompt-size reduction
1583
+ msg_idx_to_compress = 1 # don't touch system msg
1584
+ # we will try compressing msg indices up to but not including
1585
+ # last user msg
1586
+ last_msg_idx_to_compress = (
1587
+ self.last_message_idx_with_role(
1588
+ role=Role.USER,
1589
+ )
1590
+ - 1
1591
+ )
1592
+ n_truncated = 0
1544
1593
  while (
1545
1594
  self.chat_num_tokens(hist)
1546
1595
  > self.llm.chat_context_length() - self.config.llm.min_output_tokens
@@ -1548,14 +1597,14 @@ class ChatAgent(Agent):
1548
1597
  # try dropping early parts of conv history
1549
1598
  # TODO we should really be doing summarization or other types of
1550
1599
  # prompt-size reduction
1551
- if len(hist) <= 2:
1600
+ if msg_idx_to_compress > last_msg_idx_to_compress:
1552
1601
  # We want to preserve the first message (typically system msg)
1553
1602
  # and last message (user msg).
1554
1603
  raise ValueError(
1555
1604
  """
1556
1605
  The (message history + max_output_tokens) is longer than the
1557
1606
  max chat context length of this model, and we have tried
1558
- reducing the requested max output tokens, as well as dropping
1607
+ reducing the requested max output tokens, as well as truncating
1559
1608
  early parts of the message history, to accommodate the model
1560
1609
  context length, but we have run out of msgs to drop.
1561
1610
 
@@ -1566,51 +1615,59 @@ class ChatAgent(Agent):
1566
1615
  - decreasing `max_output_tokens`
1567
1616
  """
1568
1617
  )
1569
- # drop the second message, i.e. first msg after the sys msg
1570
- # (typically user msg).
1571
- ChatDocument.delete_id(hist[1].chat_document_id)
1572
- hist = hist[:1] + hist[2:]
1618
+ n_truncated += 1
1619
+ # compress the msg at idx `msg_idx_to_compress`
1620
+ hist[msg_idx_to_compress] = self.truncate_message(
1621
+ msg_idx_to_compress,
1622
+ tokens=30,
1623
+ warning="... [Contents truncated!]",
1624
+ )
1573
1625
 
1574
- if len(hist) < len(self.message_history):
1626
+ msg_idx_to_compress += 1
1627
+
1628
+ output_len = min(
1629
+ self.config.llm.model_max_output_tokens,
1630
+ self.llm.chat_context_length()
1631
+ - self.chat_num_tokens(hist)
1632
+ - CHAT_HISTORY_BUFFER,
1633
+ )
1634
+ if output_len < self.config.llm.min_output_tokens:
1635
+ raise ValueError(
1636
+ f"""
1637
+ Tried to shorten prompt history for chat mode
1638
+ but even after truncating all messages except system msg and
1639
+ last (user) msg,
1640
+ the history token len {self.chat_num_tokens(hist)} is
1641
+ too long to accommodate the desired minimum output tokens
1642
+ {self.config.llm.min_output_tokens} within the
1643
+ model's context length {self.llm.chat_context_length()}.
1644
+ Please try shortening the system msg or user prompts,
1645
+ or adjust `config.llm.min_output_tokens` to be smaller.
1646
+ """
1647
+ )
1648
+ else:
1649
+ # we MUST have truncated at least one msg
1575
1650
  msg_tokens = self.chat_num_tokens()
1576
1651
  logger.warning(
1577
1652
  f"""
1578
1653
  Chat Model context length is {self.llm.chat_context_length()}
1579
- tokens, but the current message history is {msg_tokens} tokens long.
1580
- Dropped the {len(self.message_history) - len(hist)} messages
1581
- from early in the conversation history so that history token
1582
- length is {self.chat_num_tokens(hist)}.
1583
- This may still not be low enough to allow minimum output length of
1584
- {self.config.llm.min_output_tokens} tokens.
1654
+ tokens, but the current message history is {msg_tokens} tokens long,
1655
+ which does not allow {self.config.llm.model_max_output_tokens}
1656
+ output tokens.
1657
+ Therefore we truncated the first {n_truncated} messages
1658
+ in the conversation history so that history token
1659
+ length is reduced to {self.chat_num_tokens(hist)}, and
1660
+ we use `max_output_tokens = {output_len}`,
1661
+ so they can fit within the model's context length
1662
+ of {self.llm.chat_context_length()} tokens.
1585
1663
  """
1586
1664
  )
1587
1665
 
1588
- if output_len < 0:
1589
- raise ValueError(
1590
- f"""
1591
- Tried to shorten prompt history for chat mode
1592
- but even after dropping all messages except system msg and last (
1593
- user) msg, the history token len {self.chat_num_tokens(hist)} is longer
1594
- than the model's max context length {self.llm.chat_context_length()}.
1595
- Please try shortening the system msg or user prompts.
1596
- """
1597
- )
1598
- if output_len < self.config.llm.min_output_tokens:
1599
- logger.warning(
1600
- f"""
1601
- Tried to shorten prompt history for chat mode
1602
- but the feasible output length {output_len} is still
1603
- less than the minimum output length {self.config.llm.min_output_tokens}.
1604
- Your chat history is too long for this model,
1605
- and the response may be truncated.
1606
- """
1607
- )
1608
1666
  if isinstance(message, ChatDocument):
1609
1667
  # record the position of the corresponding LLMMessage in
1610
1668
  # the message_history
1611
1669
  message.metadata.msg_idx = len(hist) - 1
1612
1670
  message.metadata.agent_id = self.id
1613
-
1614
1671
  return hist, output_len
1615
1672
 
1616
1673
  def _function_args(
@@ -620,33 +620,31 @@ class LanguageModel(ABC):
620
620
  def __call__(self, prompt: str, max_tokens: int) -> LLMResponse:
621
621
  return self.generate(prompt, max_tokens)
622
622
 
623
+ @staticmethod
624
+ def _fallback_model_names(model: str) -> List[str]:
625
+ parts = model.split("/")
626
+ fallbacks = []
627
+ for i in range(1, len(parts)):
628
+ fallbacks.append("/".join(parts[i:]))
629
+ return fallbacks
630
+
623
631
  def info(self) -> ModelInfo:
624
632
  """Info of relevant chat model"""
625
- model = (
626
- self.config.completion_model
627
- if self.config.use_completion_for_chat
628
- else self.config.chat_model
629
- )
630
633
  orig_model = (
631
634
  self.config.completion_model
632
635
  if self.config.use_completion_for_chat
633
636
  else self.chat_model_orig
634
637
  )
635
- return get_model_info(orig_model, model)
638
+ return get_model_info(orig_model, self._fallback_model_names(orig_model))
636
639
 
637
640
  def completion_info(self) -> ModelInfo:
638
641
  """Info of relevant completion model"""
639
- model = (
640
- self.config.chat_model
641
- if self.config.use_chat_for_completion
642
- else self.config.completion_model
643
- )
644
642
  orig_model = (
645
643
  self.chat_model_orig
646
644
  if self.config.use_chat_for_completion
647
645
  else self.config.completion_model
648
646
  )
649
- return get_model_info(orig_model, model)
647
+ return get_model_info(orig_model, self._fallback_model_names(orig_model))
650
648
 
651
649
  def supports_functions_or_tools(self) -> bool:
652
650
  """
@@ -0,0 +1,128 @@
1
+ """
2
+ An API for an Agent in an MCP Server to use for chat-completions
3
+ """
4
+
5
+ from typing import Awaitable, Callable, Dict, List, Optional, Union
6
+
7
+ from fastmcp.server import Context
8
+
9
+ import langroid.language_models as lm
10
+ from langroid.language_models import LLMResponse
11
+ from langroid.language_models.base import (
12
+ LanguageModel,
13
+ LLMConfig,
14
+ OpenAIJsonSchemaSpec,
15
+ OpenAIToolSpec,
16
+ ToolChoiceTypes,
17
+ )
18
+ from langroid.utils.types import to_string
19
+
20
+
21
+ def none_fn(x: str) -> None | str:
22
+ return None
23
+
24
+
25
+ class MCPClientLMConfig(LLMConfig):
26
+ """
27
+ Mock Language Model Configuration.
28
+
29
+ Attributes:
30
+ response_dict (Dict[str, str]): A "response rule-book", in the form of a
31
+ dictionary; if last msg in dialog is x,then respond with response_dict[x]
32
+ """
33
+
34
+ response_dict: Dict[str, str] = {}
35
+ response_fn: Callable[[str], None | str] = none_fn
36
+ response_fn_async: Optional[Callable[[str], Awaitable[Optional[str]]]] = None
37
+ default_response: str = "Mock response"
38
+
39
+ type: str = "mock"
40
+
41
+
42
+ class MockLM(LanguageModel):
43
+
44
+ def __init__(self, config: MockLMConfig = MockLMConfig()):
45
+ super().__init__(config)
46
+ self.config: MockLMConfig = config
47
+
48
+ def _response(self, msg: str) -> LLMResponse:
49
+ # response is based on this fallback order:
50
+ # - response_dict
51
+ # - response_fn
52
+ # - default_response
53
+ mapped_response = self.config.response_dict.get(
54
+ msg, self.config.response_fn(msg) or self.config.default_response
55
+ )
56
+ return lm.LLMResponse(
57
+ message=to_string(mapped_response),
58
+ cached=False,
59
+ )
60
+
61
+ async def _response_async(self, msg: str) -> LLMResponse:
62
+ # response is based on this fallback order:
63
+ # - response_dict
64
+ # - response_fn_async
65
+ # - response_fn
66
+ # - default_response
67
+ if self.config.response_fn_async is not None:
68
+ response = await self.config.response_fn_async(msg)
69
+ else:
70
+ response = self.config.response_fn(msg)
71
+
72
+ mapped_response = self.config.response_dict.get(
73
+ msg, response or self.config.default_response
74
+ )
75
+ return lm.LLMResponse(
76
+ message=to_string(mapped_response),
77
+ cached=False,
78
+ )
79
+
80
+ def chat(
81
+ self,
82
+ messages: Union[str, List[lm.LLMMessage]],
83
+ max_tokens: int = 200,
84
+ tools: Optional[List[OpenAIToolSpec]] = None,
85
+ tool_choice: ToolChoiceTypes | Dict[str, str | Dict[str, str]] = "auto",
86
+ functions: Optional[List[lm.LLMFunctionSpec]] = None,
87
+ function_call: str | Dict[str, str] = "auto",
88
+ response_format: Optional[OpenAIJsonSchemaSpec] = None,
89
+ ) -> lm.LLMResponse:
90
+ """
91
+ Mock chat function for testing
92
+ """
93
+ last_msg = messages[-1].content if isinstance(messages, list) else messages
94
+ return self._response(last_msg)
95
+
96
+ async def achat(
97
+ self,
98
+ messages: Union[str, List[lm.LLMMessage]],
99
+ max_tokens: int = 200,
100
+ tools: Optional[List[OpenAIToolSpec]] = None,
101
+ tool_choice: ToolChoiceTypes | Dict[str, str | Dict[str, str]] = "auto",
102
+ functions: Optional[List[lm.LLMFunctionSpec]] = None,
103
+ function_call: str | Dict[str, str] = "auto",
104
+ response_format: Optional[OpenAIJsonSchemaSpec] = None,
105
+ ) -> lm.LLMResponse:
106
+ """
107
+ Mock chat function for testing
108
+ """
109
+ last_msg = messages[-1].content if isinstance(messages, list) else messages
110
+ return await self._response_async(last_msg)
111
+
112
+ def generate(self, prompt: str, max_tokens: int = 200) -> lm.LLMResponse:
113
+ """
114
+ Mock generate function for testing
115
+ """
116
+ return self._response(prompt)
117
+
118
+ async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:
119
+ """
120
+ Mock generate function for testing
121
+ """
122
+ return await self._response_async(prompt)
123
+
124
+ def get_stream(self) -> bool:
125
+ return False
126
+
127
+ def set_stream(self, stream: bool) -> bool:
128
+ return False
@@ -27,6 +27,7 @@ class MockLMConfig(LLMConfig):
27
27
  dictionary; if last msg in dialog is x,then respond with response_dict[x]
28
28
  """
29
29
 
30
+ chat_context_length: int = 1_000_000_000 # infinite
30
31
  response_dict: Dict[str, str] = {}
31
32
  response_fn: Callable[[str], None | str] = none_fn
32
33
  response_fn_async: Optional[Callable[[str], Awaitable[Optional[str]]]] = None
@@ -406,10 +406,21 @@ MODEL_INFO: Dict[str, ModelInfo] = {
406
406
 
407
407
  def get_model_info(
408
408
  model: str | ModelName,
409
- fallback_model: str | ModelName = "",
409
+ fallback_models: List[str] = [],
410
410
  ) -> ModelInfo:
411
411
  """Get model information by name or enum value"""
412
- return _get_model_info(model) or _get_model_info(fallback_model) or ModelInfo()
412
+ # Sequence of models to try, starting with the primary model
413
+ models_to_try = [model] + fallback_models
414
+
415
+ # Find the first model in the sequence that has info defined using next()
416
+ # on a generator expression that filters out None results from _get_model_info
417
+ found_info = next(
418
+ (info for m in models_to_try if (info := _get_model_info(m)) is not None),
419
+ None, # Default value if the iterator is exhausted (no valid info found)
420
+ )
421
+
422
+ # Return the found info, or a default ModelInfo if none was found
423
+ return found_info or ModelInfo()
413
424
 
414
425
 
415
426
  def _get_model_info(model: str | ModelName) -> ModelInfo | None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.53.6
3
+ Version: 0.53.8
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -5,7 +5,7 @@ langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
6
6
  langroid/agent/base.py,sha256=zHwhNU403H-ZvogH4QhKTzaZn5_jt0ZdPHzSEmycDoc,80035
7
7
  langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
8
- langroid/agent/chat_agent.py,sha256=igo7wl3tOig7yae8NokEEqXS5AYuAeWJGq1YZhpzOho,85739
8
+ langroid/agent/chat_agent.py,sha256=2HIYzYxkrGkRIS97ioKfIqjaW3RbX89M39LjzBobBEY,88381
9
9
  langroid/agent/chat_document.py,sha256=6O20Fp4QrquykaF2jFtwNHkvcoDte1LLwVZNk9mVH9c,18057
10
10
  langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
11
11
  langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
@@ -71,10 +71,11 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
71
71
  langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
72
72
  langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
73
73
  langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
74
- langroid/language_models/base.py,sha256=Axj8U9o9r7ovpCYqhNJ4SaVYLvufLRQXnr51IyIYJKY,28493
74
+ langroid/language_models/base.py,sha256=253xcwXZ0yxSQ1W4SR50tAPZKCDc35yyU1o35EqB9b8,28484
75
75
  langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
76
- langroid/language_models/mock_lm.py,sha256=5BgHKDVRWFbUwDT_PFgTZXz9-k8wJSA2e3PZmyDgQ1k,4022
77
- langroid/language_models/model_info.py,sha256=7Fv5YByZjsRXKhkaa6okOM8jhDVpWZu6xlYAN3WTSCk,14453
76
+ langroid/language_models/mcp_client_lm.py,sha256=wyDvlc26E_En5u_ZNZxajCHm8KBNi4jzG-dL76QCdt4,4098
77
+ langroid/language_models/mock_lm.py,sha256=tA9JpURznsMZ59iRhFYMmaYQzAc0D0BT-PiJIV58sAk,4079
78
+ langroid/language_models/model_info.py,sha256=0e011vJZMi7XU9OkKT6doxlybrNJfMlP54klLDDNgFg,14939
78
79
  langroid/language_models/openai_gpt.py,sha256=F28jqTEerN32m14q3K0oc3vnvBT8J7Q9xqXGZNKUjKU,85938
79
80
  langroid/language_models/utils.py,sha256=n55Oe2_V_4VNGhytvPWLYC-0tFS07RTjN83KWl-p_MI,6032
80
81
  langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
@@ -132,7 +133,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
132
133
  langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
133
134
  langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
134
135
  langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
135
- langroid-0.53.6.dist-info/METADATA,sha256=kOJSlrle7MZXPeosRhCuqg25rdUJFF21wif68zBUkcQ,64945
136
- langroid-0.53.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
137
- langroid-0.53.6.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
138
- langroid-0.53.6.dist-info/RECORD,,
136
+ langroid-0.53.8.dist-info/METADATA,sha256=e4tCH-lXJE0OYlybGv2EIE84o68OQEc3HIxAmYj7BSc,64945
137
+ langroid-0.53.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
138
+ langroid-0.53.8.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
139
+ langroid-0.53.8.dist-info/RECORD,,