langroid 0.39.0__tar.gz → 0.39.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {langroid-0.39.0 → langroid-0.39.2}/PKG-INFO +1 -1
  2. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/base.py +3 -3
  3. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/chat_agent.py +26 -15
  4. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/doc_chat_agent.py +1 -1
  5. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/__init__.py +4 -3
  6. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/base.py +8 -1
  7. langroid-0.39.2/langroid/language_models/model_info.py +307 -0
  8. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/openai_gpt.py +45 -153
  9. {langroid-0.39.0 → langroid-0.39.2}/langroid/mytypes.py +2 -2
  10. {langroid-0.39.0 → langroid-0.39.2}/pyproject.toml +1 -1
  11. {langroid-0.39.0 → langroid-0.39.2}/.gitignore +0 -0
  12. {langroid-0.39.0 → langroid-0.39.2}/LICENSE +0 -0
  13. {langroid-0.39.0 → langroid-0.39.2}/README.md +0 -0
  14. {langroid-0.39.0 → langroid-0.39.2}/langroid/__init__.py +0 -0
  15. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/__init__.py +0 -0
  16. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/batch.py +0 -0
  17. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/callbacks/__init__.py +0 -0
  18. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/callbacks/chainlit.py +0 -0
  19. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/chat_document.py +0 -0
  20. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/openai_assistant.py +0 -0
  21. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/__init__.py +0 -0
  22. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/arangodb/__init__.py +0 -0
  23. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  24. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/arangodb/system_messages.py +0 -0
  25. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/arangodb/tools.py +0 -0
  26. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/arangodb/utils.py +0 -0
  27. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  28. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/lance_rag/__init__.py +0 -0
  29. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  30. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  31. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  32. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/lance_tools.py +0 -0
  33. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/neo4j/__init__.py +0 -0
  34. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  35. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  36. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/neo4j/system_messages.py +0 -0
  37. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/neo4j/tools.py +0 -0
  38. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  39. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/retriever_agent.py +0 -0
  40. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/__init__.py +0 -0
  41. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  42. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/utils/__init__.py +0 -0
  43. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  44. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  45. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/utils/system_message.py +0 -0
  46. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/sql/utils/tools.py +0 -0
  47. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/special/table_chat_agent.py +0 -0
  48. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/task.py +0 -0
  49. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tool_message.py +0 -0
  50. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/__init__.py +0 -0
  51. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  52. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/file_tools.py +0 -0
  53. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/google_search_tool.py +0 -0
  54. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  55. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/orchestration.py +0 -0
  56. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/recipient_tool.py +0 -0
  57. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/retrieval_tool.py +0 -0
  58. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/rewind_tool.py +0 -0
  59. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/tools/segment_extract_tool.py +0 -0
  60. {langroid-0.39.0 → langroid-0.39.2}/langroid/agent/xml_tool_message.py +0 -0
  61. {langroid-0.39.0 → langroid-0.39.2}/langroid/cachedb/__init__.py +0 -0
  62. {langroid-0.39.0 → langroid-0.39.2}/langroid/cachedb/base.py +0 -0
  63. {langroid-0.39.0 → langroid-0.39.2}/langroid/cachedb/momento_cachedb.py +0 -0
  64. {langroid-0.39.0 → langroid-0.39.2}/langroid/cachedb/redis_cachedb.py +0 -0
  65. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/__init__.py +0 -0
  66. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/base.py +0 -0
  67. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/models.py +0 -0
  68. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/protoc/__init__.py +0 -0
  69. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  70. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  71. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  72. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  73. {langroid-0.39.0 → langroid-0.39.2}/langroid/embedding_models/remote_embeds.py +0 -0
  74. {langroid-0.39.0 → langroid-0.39.2}/langroid/exceptions.py +0 -0
  75. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/azure_openai.py +0 -0
  76. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/config.py +0 -0
  77. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/mock_lm.py +0 -0
  78. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  79. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/prompt_formatter/base.py +0 -0
  80. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  81. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  82. {langroid-0.39.0 → langroid-0.39.2}/langroid/language_models/utils.py +0 -0
  83. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/__init__.py +0 -0
  84. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/agent_chats.py +0 -0
  85. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/code_parser.py +0 -0
  86. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/document_parser.py +0 -0
  87. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/para_sentence_split.py +0 -0
  88. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/parse_json.py +0 -0
  89. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/parser.py +0 -0
  90. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/pdf_utils.py +0 -0
  91. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/repo_loader.py +0 -0
  92. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/routing.py +0 -0
  93. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/search.py +0 -0
  94. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/spider.py +0 -0
  95. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/table_loader.py +0 -0
  96. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/url_loader.py +0 -0
  97. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/urls.py +0 -0
  98. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/utils.py +0 -0
  99. {langroid-0.39.0 → langroid-0.39.2}/langroid/parsing/web_search.py +0 -0
  100. {langroid-0.39.0 → langroid-0.39.2}/langroid/prompts/__init__.py +0 -0
  101. {langroid-0.39.0 → langroid-0.39.2}/langroid/prompts/dialog.py +0 -0
  102. {langroid-0.39.0 → langroid-0.39.2}/langroid/prompts/prompts_config.py +0 -0
  103. {langroid-0.39.0 → langroid-0.39.2}/langroid/prompts/templates.py +0 -0
  104. {langroid-0.39.0 → langroid-0.39.2}/langroid/py.typed +0 -0
  105. {langroid-0.39.0 → langroid-0.39.2}/langroid/pydantic_v1/__init__.py +0 -0
  106. {langroid-0.39.0 → langroid-0.39.2}/langroid/pydantic_v1/main.py +0 -0
  107. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/__init__.py +0 -0
  108. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/algorithms/__init__.py +0 -0
  109. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/algorithms/graph.py +0 -0
  110. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/configuration.py +0 -0
  111. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/constants.py +0 -0
  112. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/git_utils.py +0 -0
  113. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/globals.py +0 -0
  114. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/logging.py +0 -0
  115. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/object_registry.py +0 -0
  116. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/output/__init__.py +0 -0
  117. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/output/citations.py +0 -0
  118. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/output/printing.py +0 -0
  119. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/output/status.py +0 -0
  120. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/pandas_utils.py +0 -0
  121. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/pydantic_utils.py +0 -0
  122. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/system.py +0 -0
  123. {langroid-0.39.0 → langroid-0.39.2}/langroid/utils/types.py +0 -0
  124. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/__init__.py +0 -0
  125. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/base.py +0 -0
  126. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/chromadb.py +0 -0
  127. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/lancedb.py +0 -0
  128. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/meilisearch.py +0 -0
  129. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/momento.py +0 -0
  130. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/qdrantdb.py +0 -0
  131. {langroid-0.39.0 → langroid-0.39.2}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.39.0
3
+ Version: 0.39.2
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -917,7 +917,7 @@ class Agent(ABC):
917
917
  else:
918
918
  prompt = message
919
919
 
920
- output_len = self.config.llm.max_output_tokens
920
+ output_len = self.config.llm.model_max_output_tokens
921
921
  if self.num_tokens(prompt) + output_len > self.llm.completion_context_length():
922
922
  output_len = self.llm.completion_context_length() - self.num_tokens(prompt)
923
923
  if output_len < self.config.llm.min_output_tokens:
@@ -986,7 +986,7 @@ class Agent(ABC):
986
986
  # show rich spinner only if not streaming!
987
987
  cm = status("LLM responding to message...")
988
988
  stack.enter_context(cm)
989
- output_len = self.config.llm.max_output_tokens
989
+ output_len = self.config.llm.model_max_output_tokens
990
990
  if (
991
991
  self.num_tokens(prompt) + output_len
992
992
  > self.llm.completion_context_length()
@@ -1871,7 +1871,7 @@ class Agent(ABC):
1871
1871
  cumul_cost = format(tot_cost, ".4f")
1872
1872
  assert isinstance(self.llm, LanguageModel)
1873
1873
  context_length = self.llm.chat_context_length()
1874
- max_out = self.config.llm.max_output_tokens
1874
+ max_out = self.config.llm.model_max_output_tokens
1875
1875
 
1876
1876
  llm_model = (
1877
1877
  "no-LLM" if self.config.llm is None else self.llm.config.chat_model
@@ -31,7 +31,7 @@ from langroid.language_models.base import (
31
31
  ToolChoiceTypes,
32
32
  )
33
33
  from langroid.language_models.openai_gpt import OpenAIGPT
34
- from langroid.mytypes import Entity, Routing
34
+ from langroid.mytypes import Entity, NonToolAction
35
35
  from langroid.pydantic_v1 import BaseModel, ValidationError
36
36
  from langroid.utils.configuration import settings
37
37
  from langroid.utils.object_registry import ObjectRegistry
@@ -46,6 +46,7 @@ logger = logging.getLogger(__name__)
46
46
  class ChatAgentConfig(AgentConfig):
47
47
  """
48
48
  Configuration for ChatAgent
49
+
49
50
  Attributes:
50
51
  system_message: system message to include in message sequence
51
52
  (typically defines role and task of agent).
@@ -53,7 +54,8 @@ class ChatAgentConfig(AgentConfig):
53
54
  user_message: user message to include in message sequence.
54
55
  Used only if `task` is not specified in the constructor.
55
56
  use_tools: whether to use our own ToolMessages mechanism
56
- non_tool_routing (Routing|str): routing when LLM generates non-tool msg.
57
+ handle_llm_no_tool (Any): desired agent_response when
58
+ LLM generates non-tool msg.
57
59
  use_functions_api: whether to use functions/tools native to the LLM API
58
60
  (e.g. OpenAI's `function_call` or `tool_call` mechanism)
59
61
  use_tools_api: When `use_functions_api` is True, if this is also True,
@@ -86,7 +88,7 @@ class ChatAgentConfig(AgentConfig):
86
88
 
87
89
  system_message: str = "You are a helpful assistant."
88
90
  user_message: Optional[str] = None
89
- non_tool_routing: Routing | None = None
91
+ handle_llm_no_tool: Any = None
90
92
  use_tools: bool = False
91
93
  use_functions_api: bool = True
92
94
  use_tools_api: bool = False
@@ -596,16 +598,25 @@ class ChatAgent(Agent):
596
598
  Returns:
597
599
  Any: The result of the handler method
598
600
  """
599
- if self.config.non_tool_routing is None:
601
+ if self.config.handle_llm_no_tool is None:
600
602
  return None
601
603
  if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
602
604
  from langroid.agent.tools.orchestration import AgentDoneTool, ForwardTool
603
605
 
604
- match self.config.non_tool_routing:
605
- case Routing.FORWARD_USER:
606
- return ForwardTool(agent="User")
607
- case Routing.DONE:
608
- return AgentDoneTool(content=msg.content, tools=msg.tool_messages)
606
+ no_tool_option = self.config.handle_llm_no_tool
607
+ if no_tool_option in list(NonToolAction):
608
+ # in case the `no_tool_option` is one of the special NonToolAction vals
609
+ match self.config.handle_llm_no_tool:
610
+ case NonToolAction.FORWARD_USER:
611
+ return ForwardTool(agent="User")
612
+ case NonToolAction.DONE:
613
+ return AgentDoneTool(
614
+ content=msg.content, tools=msg.tool_messages
615
+ )
616
+ # Otherwise just return `no_tool_option` as is:
617
+ # This can be any string, such as a specific nudge/reminder to the LLM,
618
+ # or even something like ResultTool etc.
619
+ return no_tool_option
609
620
 
610
621
  def unhandled_tools(self) -> set[str]:
611
622
  """The set of tools that are known but not handled.
@@ -1488,11 +1499,11 @@ class ChatAgent(Agent):
1488
1499
  self.message_history.extend(llm_msgs)
1489
1500
 
1490
1501
  hist = self.message_history
1491
- output_len = self.config.llm.max_output_tokens
1502
+ output_len = self.config.llm.model_max_output_tokens
1492
1503
  if (
1493
1504
  truncate
1494
1505
  and self.chat_num_tokens(hist)
1495
- > self.llm.chat_context_length() - self.config.llm.max_output_tokens
1506
+ > self.llm.chat_context_length() - self.config.llm.model_max_output_tokens
1496
1507
  ):
1497
1508
  # chat + output > max context length,
1498
1509
  # so first try to shorten requested output len to fit.
@@ -1517,7 +1528,7 @@ class ChatAgent(Agent):
1517
1528
  The message history is longer than the max chat context
1518
1529
  length allowed, and we have run out of messages to drop.
1519
1530
  HINT: In your `OpenAIGPTConfig` object, try increasing
1520
- `chat_context_length` or decreasing `max_output_tokens`.
1531
+ `chat_context_length` or decreasing `model_max_output_tokens`.
1521
1532
  """
1522
1533
  )
1523
1534
  # drop the second message, i.e. first msg after the sys msg
@@ -1666,12 +1677,12 @@ class ChatAgent(Agent):
1666
1677
  Args:
1667
1678
  messages: seq of messages (with role, content fields) sent to LLM
1668
1679
  output_len: max number of tokens expected in response.
1669
- If None, use the LLM's default max_output_tokens.
1680
+ If None, use the LLM's default model_max_output_tokens.
1670
1681
  Returns:
1671
1682
  Document (i.e. with fields "content", "metadata")
1672
1683
  """
1673
1684
  assert self.config.llm is not None and self.llm is not None
1674
- output_len = output_len or self.config.llm.max_output_tokens
1685
+ output_len = output_len or self.config.llm.model_max_output_tokens
1675
1686
  streamer = noop_fn
1676
1687
  if self.llm.get_stream():
1677
1688
  streamer = self.callbacks.start_llm_stream()
@@ -1741,7 +1752,7 @@ class ChatAgent(Agent):
1741
1752
  Async version of `llm_response_messages`. See there for details.
1742
1753
  """
1743
1754
  assert self.config.llm is not None and self.llm is not None
1744
- output_len = output_len or self.config.llm.max_output_tokens
1755
+ output_len = output_len or self.config.llm.model_max_output_tokens
1745
1756
  functions, fun_call, tools, force_tool, output_format = self._function_args()
1746
1757
  assert self.llm is not None
1747
1758
 
@@ -1565,7 +1565,7 @@ class DocChatAgent(ChatAgent):
1565
1565
  tot_tokens = self.parser.num_tokens(full_text)
1566
1566
  MAX_INPUT_TOKENS = (
1567
1567
  self.llm.completion_context_length()
1568
- - self.config.llm.max_output_tokens
1568
+ - self.config.llm.model_max_output_tokens
1569
1569
  - 100
1570
1570
  )
1571
1571
  if tot_tokens > MAX_INPUT_TOKENS:
@@ -15,14 +15,13 @@ from .base import (
15
15
  LLMTokenUsage,
16
16
  LLMResponse,
17
17
  )
18
- from .openai_gpt import (
18
+ from .model_info import (
19
19
  OpenAIChatModel,
20
20
  AnthropicModel,
21
21
  GeminiModel,
22
22
  OpenAICompletionModel,
23
- OpenAIGPTConfig,
24
- OpenAIGPT,
25
23
  )
24
+ from .openai_gpt import OpenAIGPTConfig, OpenAIGPT, OpenAICallParams
26
25
  from .mock_lm import MockLM, MockLMConfig
27
26
  from .azure_openai import AzureConfig, AzureGPT
28
27
 
@@ -32,6 +31,7 @@ __all__ = [
32
31
  "config",
33
32
  "base",
34
33
  "openai_gpt",
34
+ "model_info",
35
35
  "azure_openai",
36
36
  "prompt_formatter",
37
37
  "StreamEventType",
@@ -48,6 +48,7 @@ __all__ = [
48
48
  "OpenAICompletionModel",
49
49
  "OpenAIGPTConfig",
50
50
  "OpenAIGPT",
51
+ "OpenAICallParams",
51
52
  "AzureConfig",
52
53
  "AzureGPT",
53
54
  "MockLM",
@@ -19,6 +19,7 @@ from typing import (
19
19
 
20
20
  from langroid.cachedb.base import CacheDBConfig
21
21
  from langroid.cachedb.redis_cachedb import RedisCacheConfig
22
+ from langroid.language_models.model_info import get_model_info
22
23
  from langroid.parsing.agent_chats import parse_message
23
24
  from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
24
25
  from langroid.prompts.dialog import collate_chat_history
@@ -60,6 +61,7 @@ class LLMConfig(BaseSettings):
60
61
  streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
61
62
  api_base: str | None = None
62
63
  formatter: None | str = None
64
+ max_output_tokens: int | None = 8192 # specify None to use model_max_output_tokens
63
65
  timeout: int = 20 # timeout for API requests
64
66
  chat_model: str = ""
65
67
  completion_model: str = ""
@@ -67,7 +69,6 @@ class LLMConfig(BaseSettings):
67
69
  chat_context_length: int = 8000
68
70
  async_stream_quiet: bool = True # suppress streaming output in async mode?
69
71
  completion_context_length: int = 8000
70
- max_output_tokens: int = 1024 # generate at most this many tokens
71
72
  # if input length + max_output_tokens > context length of model,
72
73
  # we will try shortening requested output
73
74
  min_output_tokens: int = 64
@@ -84,6 +85,12 @@ class LLMConfig(BaseSettings):
84
85
  chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
85
86
  completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
86
87
 
88
+ @property
89
+ def model_max_output_tokens(self) -> int:
90
+ return (
91
+ self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
92
+ )
93
+
87
94
 
88
95
  class LLMFunctionCall(BaseModel):
89
96
  """
@@ -0,0 +1,307 @@
1
+ from enum import Enum
2
+ from typing import Dict, List, Optional
3
+
4
+ from langroid.pydantic_v1 import BaseModel
5
+
6
+
7
+ class ModelProvider(str, Enum):
8
+ """Enum for model providers"""
9
+
10
+ OPENAI = "openai"
11
+ ANTHROPIC = "anthropic"
12
+ DEEPSEEK = "deepseek"
13
+ GOOGLE = "google"
14
+ UNKNOWN = "unknown"
15
+
16
+
17
+ class ModelName(str, Enum):
18
+ """Parent class for all model name enums"""
19
+
20
+ pass
21
+
22
+
23
+ class OpenAIChatModel(ModelName):
24
+ """Enum for OpenAI Chat models"""
25
+
26
+ GPT3_5_TURBO = "gpt-3.5-turbo-1106"
27
+ GPT4 = "gpt-4"
28
+ GPT4_TURBO = "gpt-4-turbo"
29
+ GPT4o = "gpt-4o"
30
+ GPT4o_MINI = "gpt-4o-mini"
31
+ O1 = "o1"
32
+ O1_MINI = "o1-mini"
33
+ O3_MINI = "o3-mini"
34
+
35
+
36
+ class OpenAICompletionModel(str, Enum):
37
+ """Enum for OpenAI Completion models"""
38
+
39
+ DAVINCI = "davinci-002"
40
+ BABBAGE = "babbage-002"
41
+
42
+
43
+ class AnthropicModel(ModelName):
44
+ """Enum for Anthropic models"""
45
+
46
+ CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
47
+ CLAUDE_3_OPUS = "claude-3-opus-latest"
48
+ CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
49
+ CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
50
+
51
+
52
+ class DeepSeekModel(ModelName):
53
+ """Enum for DeepSeek models direct from DeepSeek API"""
54
+
55
+ DEEPSEEK = "deepseek/deepseek-chat"
56
+ DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
57
+
58
+
59
+ class GeminiModel(ModelName):
60
+ """Enum for Gemini models"""
61
+
62
+ GEMINI_1_5_FLASH = "gemini/gemini-1.5-flash"
63
+ GEMINI_1_5_FLASH_8B = "gemini/gemini-1.5-flash-8b"
64
+ GEMINI_1_5_PRO = "gemini/gemini-1.5-pro"
65
+ GEMINI_2_FLASH = "gemini/gemini-2.0-flash-exp"
66
+ GEMINI_2_FLASH_THINKING = "gemini/gemini-2.0-flash-thinking-exp"
67
+
68
+
69
+ class ModelInfo(BaseModel):
70
+ """
71
+ Consolidated information about LLM, related to capacity, cost and API
72
+ idiosyncrasies. Reasonable defaults for all params in case there's no
73
+ specific info available.
74
+ """
75
+
76
+ name: str = "unknown"
77
+ provider: ModelProvider = ModelProvider.UNKNOWN
78
+ context_length: int = 16_000
79
+ max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
80
+ max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
81
+ input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
82
+ output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
83
+ allows_streaming: bool = True # Whether model supports streaming output
84
+ allows_system_message: bool = True # Whether model supports system messages
85
+ rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
86
+ unsupported_params: List[str] = []
87
+ has_structured_output: bool = False # Does model API support structured output?
88
+ has_tools: bool = True # Does model API support tools/function-calling?
89
+ needs_first_user_message: bool = False # Does API need first msg to be from user?
90
+ description: Optional[str] = None
91
+
92
+
93
+ # Model information registry
94
+ MODEL_INFO: Dict[str, ModelInfo] = {
95
+ # OpenAI Models
96
+ OpenAICompletionModel.DAVINCI.value: ModelInfo(
97
+ name=OpenAICompletionModel.DAVINCI.value,
98
+ provider=ModelProvider.OPENAI,
99
+ context_length=4096,
100
+ max_output_tokens=4096,
101
+ input_cost_per_million=2.0,
102
+ output_cost_per_million=2.0,
103
+ description="Davinci-002",
104
+ ),
105
+ OpenAICompletionModel.BABBAGE.value: ModelInfo(
106
+ name=OpenAICompletionModel.BABBAGE.value,
107
+ provider=ModelProvider.OPENAI,
108
+ context_length=4096,
109
+ max_output_tokens=4096,
110
+ input_cost_per_million=0.40,
111
+ output_cost_per_million=0.40,
112
+ description="Babbage-002",
113
+ ),
114
+ OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
115
+ name=OpenAIChatModel.GPT3_5_TURBO.value,
116
+ provider=ModelProvider.OPENAI,
117
+ context_length=16_385,
118
+ max_output_tokens=4096,
119
+ input_cost_per_million=0.50,
120
+ output_cost_per_million=1.50,
121
+ description="GPT-3.5 Turbo",
122
+ ),
123
+ OpenAIChatModel.GPT4.value: ModelInfo(
124
+ name=OpenAIChatModel.GPT4.value,
125
+ provider=ModelProvider.OPENAI,
126
+ context_length=8192,
127
+ max_output_tokens=8192,
128
+ input_cost_per_million=30.0,
129
+ output_cost_per_million=60.0,
130
+ description="GPT-4 (8K context)",
131
+ ),
132
+ OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
133
+ name=OpenAIChatModel.GPT4_TURBO.value,
134
+ provider=ModelProvider.OPENAI,
135
+ context_length=128_000,
136
+ max_output_tokens=4096,
137
+ input_cost_per_million=10.0,
138
+ output_cost_per_million=30.0,
139
+ description="GPT-4 Turbo",
140
+ ),
141
+ OpenAIChatModel.GPT4o.value: ModelInfo(
142
+ name=OpenAIChatModel.GPT4o.value,
143
+ provider=ModelProvider.OPENAI,
144
+ context_length=128_000,
145
+ max_output_tokens=16_384,
146
+ input_cost_per_million=2.5,
147
+ output_cost_per_million=10.0,
148
+ has_structured_output=True,
149
+ description="GPT-4o (128K context)",
150
+ ),
151
+ OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
152
+ name=OpenAIChatModel.GPT4o_MINI.value,
153
+ provider=ModelProvider.OPENAI,
154
+ context_length=128_000,
155
+ max_output_tokens=16_384,
156
+ input_cost_per_million=0.15,
157
+ output_cost_per_million=0.60,
158
+ has_structured_output=True,
159
+ description="GPT-4o Mini",
160
+ ),
161
+ OpenAIChatModel.O1.value: ModelInfo(
162
+ name=OpenAIChatModel.O1.value,
163
+ provider=ModelProvider.OPENAI,
164
+ context_length=200_000,
165
+ max_output_tokens=100_000,
166
+ input_cost_per_million=15.0,
167
+ output_cost_per_million=60.0,
168
+ allows_streaming=False,
169
+ allows_system_message=False,
170
+ unsupported_params=["temperature", "stream"],
171
+ rename_params={"max_tokens": "max_completion_tokens"},
172
+ has_tools=False,
173
+ description="O1 Reasoning LM",
174
+ ),
175
+ OpenAIChatModel.O1_MINI.value: ModelInfo(
176
+ name=OpenAIChatModel.O1_MINI.value,
177
+ provider=ModelProvider.OPENAI,
178
+ context_length=128_000,
179
+ max_output_tokens=65_536,
180
+ input_cost_per_million=1.1,
181
+ output_cost_per_million=4.4,
182
+ allows_streaming=False,
183
+ allows_system_message=False,
184
+ unsupported_params=["temperature", "stream"],
185
+ rename_params={"max_tokens": "max_completion_tokens"},
186
+ has_tools=False,
187
+ description="O1 Mini Reasoning LM",
188
+ ),
189
+ OpenAIChatModel.O3_MINI.value: ModelInfo(
190
+ name=OpenAIChatModel.O3_MINI.value,
191
+ provider=ModelProvider.OPENAI,
192
+ context_length=200_000,
193
+ max_output_tokens=100_000,
194
+ input_cost_per_million=1.1,
195
+ output_cost_per_million=4.4,
196
+ allows_streaming=False,
197
+ allows_system_message=False,
198
+ unsupported_params=["temperature", "stream"],
199
+ rename_params={"max_tokens": "max_completion_tokens"},
200
+ has_tools=False,
201
+ description="O3 Mini Reasoning LM",
202
+ ),
203
+ # Anthropic Models
204
+ AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
205
+ name=AnthropicModel.CLAUDE_3_5_SONNET.value,
206
+ provider=ModelProvider.ANTHROPIC,
207
+ context_length=200_000,
208
+ max_output_tokens=8192,
209
+ input_cost_per_million=3.0,
210
+ output_cost_per_million=15.0,
211
+ description="Claude 3.5 Sonnet",
212
+ ),
213
+ AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
214
+ name=AnthropicModel.CLAUDE_3_OPUS.value,
215
+ provider=ModelProvider.ANTHROPIC,
216
+ context_length=200_000,
217
+ max_output_tokens=4096,
218
+ input_cost_per_million=15.0,
219
+ output_cost_per_million=75.0,
220
+ description="Claude 3 Opus",
221
+ ),
222
+ AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
223
+ name=AnthropicModel.CLAUDE_3_SONNET.value,
224
+ provider=ModelProvider.ANTHROPIC,
225
+ context_length=200_000,
226
+ max_output_tokens=4096,
227
+ input_cost_per_million=3.0,
228
+ output_cost_per_million=15.0,
229
+ description="Claude 3 Sonnet",
230
+ ),
231
+ AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
232
+ name=AnthropicModel.CLAUDE_3_HAIKU.value,
233
+ provider=ModelProvider.ANTHROPIC,
234
+ context_length=200_000,
235
+ max_output_tokens=4096,
236
+ input_cost_per_million=0.25,
237
+ output_cost_per_million=1.25,
238
+ description="Claude 3 Haiku",
239
+ ),
240
+ # DeepSeek Models
241
+ DeepSeekModel.DEEPSEEK.value: ModelInfo(
242
+ name=DeepSeekModel.DEEPSEEK.value,
243
+ provider=ModelProvider.DEEPSEEK,
244
+ context_length=64_000,
245
+ max_output_tokens=8_000,
246
+ input_cost_per_million=0.27,
247
+ output_cost_per_million=1.10,
248
+ description="DeepSeek Chat",
249
+ ),
250
+ DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
251
+ name=DeepSeekModel.DEEPSEEK_R1.value,
252
+ provider=ModelProvider.DEEPSEEK,
253
+ context_length=64_000,
254
+ max_output_tokens=8_000,
255
+ input_cost_per_million=0.55,
256
+ output_cost_per_million=2.19,
257
+ description="DeepSeek-R1 Reasoning LM",
258
+ ),
259
+ # Gemini Models
260
+ GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
261
+ name=GeminiModel.GEMINI_2_FLASH.value,
262
+ provider=ModelProvider.GOOGLE,
263
+ context_length=1_056_768,
264
+ max_output_tokens=8192,
265
+ rename_params={"max_tokens": "max_completion_tokens"},
266
+ description="Gemini 2.0 Flash",
267
+ ),
268
+ GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
269
+ name=GeminiModel.GEMINI_1_5_FLASH.value,
270
+ provider=ModelProvider.GOOGLE,
271
+ context_length=1_056_768,
272
+ max_output_tokens=8192,
273
+ rename_params={"max_tokens": "max_completion_tokens"},
274
+ description="Gemini 1.5 Flash",
275
+ ),
276
+ GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
277
+ name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
278
+ provider=ModelProvider.GOOGLE,
279
+ context_length=1_000_000,
280
+ max_output_tokens=8192,
281
+ rename_params={"max_tokens": "max_completion_tokens"},
282
+ description="Gemini 1.5 Flash 8B",
283
+ ),
284
+ GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
285
+ name=GeminiModel.GEMINI_1_5_PRO.value,
286
+ provider=ModelProvider.GOOGLE,
287
+ context_length=2_000_000,
288
+ max_output_tokens=8192,
289
+ rename_params={"max_tokens": "max_completion_tokens"},
290
+ description="Gemini 1.5 Pro",
291
+ ),
292
+ GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
293
+ name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
294
+ provider=ModelProvider.GOOGLE,
295
+ context_length=1_000_000,
296
+ max_output_tokens=64_000,
297
+ rename_params={"max_tokens": "max_completion_tokens"},
298
+ description="Gemini 2.0 Flash Thinking",
299
+ ),
300
+ }
301
+
302
+
303
+ def get_model_info(model: str | ModelName) -> ModelInfo:
304
+ """Get model information by name or enum value"""
305
+ if isinstance(model, str):
306
+ return MODEL_INFO.get(model) or ModelInfo()
307
+ return MODEL_INFO.get(model.value) or ModelInfo()