langroid 0.39.0__tar.gz → 0.39.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. {langroid-0.39.0 → langroid-0.39.1}/PKG-INFO +1 -1
  2. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/base.py +3 -3
  3. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/chat_agent.py +13 -13
  4. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/doc_chat_agent.py +1 -1
  5. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/__init__.py +4 -3
  6. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/base.py +8 -1
  7. langroid-0.39.1/langroid/language_models/model_info.py +307 -0
  8. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/openai_gpt.py +45 -153
  9. {langroid-0.39.0 → langroid-0.39.1}/langroid/mytypes.py +1 -1
  10. {langroid-0.39.0 → langroid-0.39.1}/pyproject.toml +1 -1
  11. {langroid-0.39.0 → langroid-0.39.1}/.gitignore +0 -0
  12. {langroid-0.39.0 → langroid-0.39.1}/LICENSE +0 -0
  13. {langroid-0.39.0 → langroid-0.39.1}/README.md +0 -0
  14. {langroid-0.39.0 → langroid-0.39.1}/langroid/__init__.py +0 -0
  15. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/__init__.py +0 -0
  16. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/batch.py +0 -0
  17. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/callbacks/__init__.py +0 -0
  18. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/callbacks/chainlit.py +0 -0
  19. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/chat_document.py +0 -0
  20. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/openai_assistant.py +0 -0
  21. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/__init__.py +0 -0
  22. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/arangodb/__init__.py +0 -0
  23. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/arangodb/arangodb_agent.py +0 -0
  24. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/arangodb/system_messages.py +0 -0
  25. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/arangodb/tools.py +0 -0
  26. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/arangodb/utils.py +0 -0
  27. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/lance_doc_chat_agent.py +0 -0
  28. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/lance_rag/__init__.py +0 -0
  29. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/lance_rag/critic_agent.py +0 -0
  30. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/lance_rag/lance_rag_task.py +0 -0
  31. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/lance_rag/query_planner_agent.py +0 -0
  32. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/lance_tools.py +0 -0
  33. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/neo4j/__init__.py +0 -0
  34. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/neo4j/csv_kg_chat.py +0 -0
  35. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/neo4j/neo4j_chat_agent.py +0 -0
  36. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/neo4j/system_messages.py +0 -0
  37. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/neo4j/tools.py +0 -0
  38. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/relevance_extractor_agent.py +0 -0
  39. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/retriever_agent.py +0 -0
  40. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/__init__.py +0 -0
  41. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/sql_chat_agent.py +0 -0
  42. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/utils/__init__.py +0 -0
  43. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/utils/description_extractors.py +0 -0
  44. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/utils/populate_metadata.py +0 -0
  45. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/utils/system_message.py +0 -0
  46. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/sql/utils/tools.py +0 -0
  47. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/table_chat_agent.py +0 -0
  48. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/task.py +0 -0
  49. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tool_message.py +0 -0
  50. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/__init__.py +0 -0
  51. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/duckduckgo_search_tool.py +0 -0
  52. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/file_tools.py +0 -0
  53. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/google_search_tool.py +0 -0
  54. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/metaphor_search_tool.py +0 -0
  55. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/orchestration.py +0 -0
  56. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/recipient_tool.py +0 -0
  57. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/retrieval_tool.py +0 -0
  58. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/rewind_tool.py +0 -0
  59. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/tools/segment_extract_tool.py +0 -0
  60. {langroid-0.39.0 → langroid-0.39.1}/langroid/agent/xml_tool_message.py +0 -0
  61. {langroid-0.39.0 → langroid-0.39.1}/langroid/cachedb/__init__.py +0 -0
  62. {langroid-0.39.0 → langroid-0.39.1}/langroid/cachedb/base.py +0 -0
  63. {langroid-0.39.0 → langroid-0.39.1}/langroid/cachedb/momento_cachedb.py +0 -0
  64. {langroid-0.39.0 → langroid-0.39.1}/langroid/cachedb/redis_cachedb.py +0 -0
  65. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/__init__.py +0 -0
  66. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/base.py +0 -0
  67. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/models.py +0 -0
  68. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/protoc/__init__.py +0 -0
  69. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/protoc/embeddings.proto +0 -0
  70. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/protoc/embeddings_pb2.py +0 -0
  71. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/protoc/embeddings_pb2.pyi +0 -0
  72. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/protoc/embeddings_pb2_grpc.py +0 -0
  73. {langroid-0.39.0 → langroid-0.39.1}/langroid/embedding_models/remote_embeds.py +0 -0
  74. {langroid-0.39.0 → langroid-0.39.1}/langroid/exceptions.py +0 -0
  75. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/azure_openai.py +0 -0
  76. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/config.py +0 -0
  77. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/mock_lm.py +0 -0
  78. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/prompt_formatter/__init__.py +0 -0
  79. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/prompt_formatter/base.py +0 -0
  80. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/prompt_formatter/hf_formatter.py +0 -0
  81. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/prompt_formatter/llama2_formatter.py +0 -0
  82. {langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/utils.py +0 -0
  83. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/__init__.py +0 -0
  84. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/agent_chats.py +0 -0
  85. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/code_parser.py +0 -0
  86. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/document_parser.py +0 -0
  87. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/para_sentence_split.py +0 -0
  88. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/parse_json.py +0 -0
  89. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/parser.py +0 -0
  90. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/pdf_utils.py +0 -0
  91. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/repo_loader.py +0 -0
  92. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/routing.py +0 -0
  93. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/search.py +0 -0
  94. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/spider.py +0 -0
  95. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/table_loader.py +0 -0
  96. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/url_loader.py +0 -0
  97. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/urls.py +0 -0
  98. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/utils.py +0 -0
  99. {langroid-0.39.0 → langroid-0.39.1}/langroid/parsing/web_search.py +0 -0
  100. {langroid-0.39.0 → langroid-0.39.1}/langroid/prompts/__init__.py +0 -0
  101. {langroid-0.39.0 → langroid-0.39.1}/langroid/prompts/dialog.py +0 -0
  102. {langroid-0.39.0 → langroid-0.39.1}/langroid/prompts/prompts_config.py +0 -0
  103. {langroid-0.39.0 → langroid-0.39.1}/langroid/prompts/templates.py +0 -0
  104. {langroid-0.39.0 → langroid-0.39.1}/langroid/py.typed +0 -0
  105. {langroid-0.39.0 → langroid-0.39.1}/langroid/pydantic_v1/__init__.py +0 -0
  106. {langroid-0.39.0 → langroid-0.39.1}/langroid/pydantic_v1/main.py +0 -0
  107. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/__init__.py +0 -0
  108. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/algorithms/__init__.py +0 -0
  109. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/algorithms/graph.py +0 -0
  110. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/configuration.py +0 -0
  111. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/constants.py +0 -0
  112. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/git_utils.py +0 -0
  113. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/globals.py +0 -0
  114. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/logging.py +0 -0
  115. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/object_registry.py +0 -0
  116. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/output/__init__.py +0 -0
  117. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/output/citations.py +0 -0
  118. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/output/printing.py +0 -0
  119. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/output/status.py +0 -0
  120. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/pandas_utils.py +0 -0
  121. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/pydantic_utils.py +0 -0
  122. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/system.py +0 -0
  123. {langroid-0.39.0 → langroid-0.39.1}/langroid/utils/types.py +0 -0
  124. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/__init__.py +0 -0
  125. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/base.py +0 -0
  126. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/chromadb.py +0 -0
  127. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/lancedb.py +0 -0
  128. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/meilisearch.py +0 -0
  129. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/momento.py +0 -0
  130. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/qdrantdb.py +0 -0
  131. {langroid-0.39.0 → langroid-0.39.1}/langroid/vector_store/weaviatedb.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.39.0
3
+ Version: 0.39.1
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -917,7 +917,7 @@ class Agent(ABC):
917
917
  else:
918
918
  prompt = message
919
919
 
920
- output_len = self.config.llm.max_output_tokens
920
+ output_len = self.config.llm.model_max_output_tokens
921
921
  if self.num_tokens(prompt) + output_len > self.llm.completion_context_length():
922
922
  output_len = self.llm.completion_context_length() - self.num_tokens(prompt)
923
923
  if output_len < self.config.llm.min_output_tokens:
@@ -986,7 +986,7 @@ class Agent(ABC):
986
986
  # show rich spinner only if not streaming!
987
987
  cm = status("LLM responding to message...")
988
988
  stack.enter_context(cm)
989
- output_len = self.config.llm.max_output_tokens
989
+ output_len = self.config.llm.model_max_output_tokens
990
990
  if (
991
991
  self.num_tokens(prompt) + output_len
992
992
  > self.llm.completion_context_length()
@@ -1871,7 +1871,7 @@ class Agent(ABC):
1871
1871
  cumul_cost = format(tot_cost, ".4f")
1872
1872
  assert isinstance(self.llm, LanguageModel)
1873
1873
  context_length = self.llm.chat_context_length()
1874
- max_out = self.config.llm.max_output_tokens
1874
+ max_out = self.config.llm.model_max_output_tokens
1875
1875
 
1876
1876
  llm_model = (
1877
1877
  "no-LLM" if self.config.llm is None else self.llm.config.chat_model
@@ -31,7 +31,7 @@ from langroid.language_models.base import (
31
31
  ToolChoiceTypes,
32
32
  )
33
33
  from langroid.language_models.openai_gpt import OpenAIGPT
34
- from langroid.mytypes import Entity, Routing
34
+ from langroid.mytypes import Entity, NonToolAction
35
35
  from langroid.pydantic_v1 import BaseModel, ValidationError
36
36
  from langroid.utils.configuration import settings
37
37
  from langroid.utils.object_registry import ObjectRegistry
@@ -53,7 +53,7 @@ class ChatAgentConfig(AgentConfig):
53
53
  user_message: user message to include in message sequence.
54
54
  Used only if `task` is not specified in the constructor.
55
55
  use_tools: whether to use our own ToolMessages mechanism
56
- non_tool_routing (Routing|str): routing when LLM generates non-tool msg.
56
+ handle_llm_no_tool (NonToolAction|str): routing when LLM generates non-tool msg.
57
57
  use_functions_api: whether to use functions/tools native to the LLM API
58
58
  (e.g. OpenAI's `function_call` or `tool_call` mechanism)
59
59
  use_tools_api: When `use_functions_api` is True, if this is also True,
@@ -86,7 +86,7 @@ class ChatAgentConfig(AgentConfig):
86
86
 
87
87
  system_message: str = "You are a helpful assistant."
88
88
  user_message: Optional[str] = None
89
- non_tool_routing: Routing | None = None
89
+ handle_llm_no_tool: NonToolAction | None = None
90
90
  use_tools: bool = False
91
91
  use_functions_api: bool = True
92
92
  use_tools_api: bool = False
@@ -596,15 +596,15 @@ class ChatAgent(Agent):
596
596
  Returns:
597
597
  Any: The result of the handler method
598
598
  """
599
- if self.config.non_tool_routing is None:
599
+ if self.config.handle_llm_no_tool is None:
600
600
  return None
601
601
  if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
602
602
  from langroid.agent.tools.orchestration import AgentDoneTool, ForwardTool
603
603
 
604
- match self.config.non_tool_routing:
605
- case Routing.FORWARD_USER:
604
+ match self.config.handle_llm_no_tool:
605
+ case NonToolAction.FORWARD_USER:
606
606
  return ForwardTool(agent="User")
607
- case Routing.DONE:
607
+ case NonToolAction.DONE:
608
608
  return AgentDoneTool(content=msg.content, tools=msg.tool_messages)
609
609
 
610
610
  def unhandled_tools(self) -> set[str]:
@@ -1488,11 +1488,11 @@ class ChatAgent(Agent):
1488
1488
  self.message_history.extend(llm_msgs)
1489
1489
 
1490
1490
  hist = self.message_history
1491
- output_len = self.config.llm.max_output_tokens
1491
+ output_len = self.config.llm.model_max_output_tokens
1492
1492
  if (
1493
1493
  truncate
1494
1494
  and self.chat_num_tokens(hist)
1495
- > self.llm.chat_context_length() - self.config.llm.max_output_tokens
1495
+ > self.llm.chat_context_length() - self.config.llm.model_max_output_tokens
1496
1496
  ):
1497
1497
  # chat + output > max context length,
1498
1498
  # so first try to shorten requested output len to fit.
@@ -1517,7 +1517,7 @@ class ChatAgent(Agent):
1517
1517
  The message history is longer than the max chat context
1518
1518
  length allowed, and we have run out of messages to drop.
1519
1519
  HINT: In your `OpenAIGPTConfig` object, try increasing
1520
- `chat_context_length` or decreasing `max_output_tokens`.
1520
+ `chat_context_length` or decreasing `model_max_output_tokens`.
1521
1521
  """
1522
1522
  )
1523
1523
  # drop the second message, i.e. first msg after the sys msg
@@ -1666,12 +1666,12 @@ class ChatAgent(Agent):
1666
1666
  Args:
1667
1667
  messages: seq of messages (with role, content fields) sent to LLM
1668
1668
  output_len: max number of tokens expected in response.
1669
- If None, use the LLM's default max_output_tokens.
1669
+ If None, use the LLM's default model_max_output_tokens.
1670
1670
  Returns:
1671
1671
  Document (i.e. with fields "content", "metadata")
1672
1672
  """
1673
1673
  assert self.config.llm is not None and self.llm is not None
1674
- output_len = output_len or self.config.llm.max_output_tokens
1674
+ output_len = output_len or self.config.llm.model_max_output_tokens
1675
1675
  streamer = noop_fn
1676
1676
  if self.llm.get_stream():
1677
1677
  streamer = self.callbacks.start_llm_stream()
@@ -1741,7 +1741,7 @@ class ChatAgent(Agent):
1741
1741
  Async version of `llm_response_messages`. See there for details.
1742
1742
  """
1743
1743
  assert self.config.llm is not None and self.llm is not None
1744
- output_len = output_len or self.config.llm.max_output_tokens
1744
+ output_len = output_len or self.config.llm.model_max_output_tokens
1745
1745
  functions, fun_call, tools, force_tool, output_format = self._function_args()
1746
1746
  assert self.llm is not None
1747
1747
 
@@ -1565,7 +1565,7 @@ class DocChatAgent(ChatAgent):
1565
1565
  tot_tokens = self.parser.num_tokens(full_text)
1566
1566
  MAX_INPUT_TOKENS = (
1567
1567
  self.llm.completion_context_length()
1568
- - self.config.llm.max_output_tokens
1568
+ - self.config.llm.model_max_output_tokens
1569
1569
  - 100
1570
1570
  )
1571
1571
  if tot_tokens > MAX_INPUT_TOKENS:
@@ -15,14 +15,13 @@ from .base import (
15
15
  LLMTokenUsage,
16
16
  LLMResponse,
17
17
  )
18
- from .openai_gpt import (
18
+ from .model_info import (
19
19
  OpenAIChatModel,
20
20
  AnthropicModel,
21
21
  GeminiModel,
22
22
  OpenAICompletionModel,
23
- OpenAIGPTConfig,
24
- OpenAIGPT,
25
23
  )
24
+ from .openai_gpt import OpenAIGPTConfig, OpenAIGPT, OpenAICallParams
26
25
  from .mock_lm import MockLM, MockLMConfig
27
26
  from .azure_openai import AzureConfig, AzureGPT
28
27
 
@@ -32,6 +31,7 @@ __all__ = [
32
31
  "config",
33
32
  "base",
34
33
  "openai_gpt",
34
+ "model_info",
35
35
  "azure_openai",
36
36
  "prompt_formatter",
37
37
  "StreamEventType",
@@ -48,6 +48,7 @@ __all__ = [
48
48
  "OpenAICompletionModel",
49
49
  "OpenAIGPTConfig",
50
50
  "OpenAIGPT",
51
+ "OpenAICallParams",
51
52
  "AzureConfig",
52
53
  "AzureGPT",
53
54
  "MockLM",
@@ -19,6 +19,7 @@ from typing import (
19
19
 
20
20
  from langroid.cachedb.base import CacheDBConfig
21
21
  from langroid.cachedb.redis_cachedb import RedisCacheConfig
22
+ from langroid.language_models.model_info import get_model_info
22
23
  from langroid.parsing.agent_chats import parse_message
23
24
  from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
24
25
  from langroid.prompts.dialog import collate_chat_history
@@ -60,6 +61,7 @@ class LLMConfig(BaseSettings):
60
61
  streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
61
62
  api_base: str | None = None
62
63
  formatter: None | str = None
64
+ max_output_tokens: int | None = 8192 # specify None to use model_max_output_tokens
63
65
  timeout: int = 20 # timeout for API requests
64
66
  chat_model: str = ""
65
67
  completion_model: str = ""
@@ -67,7 +69,6 @@ class LLMConfig(BaseSettings):
67
69
  chat_context_length: int = 8000
68
70
  async_stream_quiet: bool = True # suppress streaming output in async mode?
69
71
  completion_context_length: int = 8000
70
- max_output_tokens: int = 1024 # generate at most this many tokens
71
72
  # if input length + max_output_tokens > context length of model,
72
73
  # we will try shortening requested output
73
74
  min_output_tokens: int = 64
@@ -84,6 +85,12 @@ class LLMConfig(BaseSettings):
84
85
  chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
85
86
  completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
86
87
 
88
+ @property
89
+ def model_max_output_tokens(self) -> int:
90
+ return (
91
+ self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
92
+ )
93
+
87
94
 
88
95
  class LLMFunctionCall(BaseModel):
89
96
  """
@@ -0,0 +1,307 @@
1
+ from enum import Enum
2
+ from typing import Dict, List, Optional
3
+
4
+ from langroid.pydantic_v1 import BaseModel
5
+
6
+
7
+ class ModelProvider(str, Enum):
8
+ """Enum for model providers"""
9
+
10
+ OPENAI = "openai"
11
+ ANTHROPIC = "anthropic"
12
+ DEEPSEEK = "deepseek"
13
+ GOOGLE = "google"
14
+ UNKNOWN = "unknown"
15
+
16
+
17
+ class ModelName(str, Enum):
18
+ """Parent class for all model name enums"""
19
+
20
+ pass
21
+
22
+
23
+ class OpenAIChatModel(ModelName):
24
+ """Enum for OpenAI Chat models"""
25
+
26
+ GPT3_5_TURBO = "gpt-3.5-turbo-1106"
27
+ GPT4 = "gpt-4"
28
+ GPT4_TURBO = "gpt-4-turbo"
29
+ GPT4o = "gpt-4o"
30
+ GPT4o_MINI = "gpt-4o-mini"
31
+ O1 = "o1"
32
+ O1_MINI = "o1-mini"
33
+ O3_MINI = "o3-mini"
34
+
35
+
36
+ class OpenAICompletionModel(str, Enum):
37
+ """Enum for OpenAI Completion models"""
38
+
39
+ DAVINCI = "davinci-002"
40
+ BABBAGE = "babbage-002"
41
+
42
+
43
+ class AnthropicModel(ModelName):
44
+ """Enum for Anthropic models"""
45
+
46
+ CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
47
+ CLAUDE_3_OPUS = "claude-3-opus-latest"
48
+ CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
49
+ CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
50
+
51
+
52
+ class DeepSeekModel(ModelName):
53
+ """Enum for DeepSeek models direct from DeepSeek API"""
54
+
55
+ DEEPSEEK = "deepseek/deepseek-chat"
56
+ DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
57
+
58
+
59
+ class GeminiModel(ModelName):
60
+ """Enum for Gemini models"""
61
+
62
+ GEMINI_1_5_FLASH = "gemini/gemini-1.5-flash"
63
+ GEMINI_1_5_FLASH_8B = "gemini/gemini-1.5-flash-8b"
64
+ GEMINI_1_5_PRO = "gemini/gemini-1.5-pro"
65
+ GEMINI_2_FLASH = "gemini/gemini-2.0-flash-exp"
66
+ GEMINI_2_FLASH_THINKING = "gemini/gemini-2.0-flash-thinking-exp"
67
+
68
+
69
+ class ModelInfo(BaseModel):
70
+ """
71
+ Consolidated information about LLM, related to capacity, cost and API
72
+ idiosyncrasies. Reasonable defaults for all params in case there's no
73
+ specific info available.
74
+ """
75
+
76
+ name: str = "unknown"
77
+ provider: ModelProvider = ModelProvider.UNKNOWN
78
+ context_length: int = 16_000
79
+ max_cot_tokens: int = 0 # max chain of thought (thinking) tokens where applicable
80
+ max_output_tokens: int = 8192 # Maximum number of output tokens - model dependent
81
+ input_cost_per_million: float = 0.0 # Cost in USD per million input tokens
82
+ output_cost_per_million: float = 0.0 # Cost in USD per million output tokens
83
+ allows_streaming: bool = True # Whether model supports streaming output
84
+ allows_system_message: bool = True # Whether model supports system messages
85
+ rename_params: Dict[str, str] = {} # Rename parameters for OpenAI API
86
+ unsupported_params: List[str] = []
87
+ has_structured_output: bool = False # Does model API support structured output?
88
+ has_tools: bool = True # Does model API support tools/function-calling?
89
+ needs_first_user_message: bool = False # Does API need first msg to be from user?
90
+ description: Optional[str] = None
91
+
92
+
93
+ # Model information registry
94
+ MODEL_INFO: Dict[str, ModelInfo] = {
95
+ # OpenAI Models
96
+ OpenAICompletionModel.DAVINCI.value: ModelInfo(
97
+ name=OpenAICompletionModel.DAVINCI.value,
98
+ provider=ModelProvider.OPENAI,
99
+ context_length=4096,
100
+ max_output_tokens=4096,
101
+ input_cost_per_million=2.0,
102
+ output_cost_per_million=2.0,
103
+ description="Davinci-002",
104
+ ),
105
+ OpenAICompletionModel.BABBAGE.value: ModelInfo(
106
+ name=OpenAICompletionModel.BABBAGE.value,
107
+ provider=ModelProvider.OPENAI,
108
+ context_length=4096,
109
+ max_output_tokens=4096,
110
+ input_cost_per_million=0.40,
111
+ output_cost_per_million=0.40,
112
+ description="Babbage-002",
113
+ ),
114
+ OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
115
+ name=OpenAIChatModel.GPT3_5_TURBO.value,
116
+ provider=ModelProvider.OPENAI,
117
+ context_length=16_385,
118
+ max_output_tokens=4096,
119
+ input_cost_per_million=0.50,
120
+ output_cost_per_million=1.50,
121
+ description="GPT-3.5 Turbo",
122
+ ),
123
+ OpenAIChatModel.GPT4.value: ModelInfo(
124
+ name=OpenAIChatModel.GPT4.value,
125
+ provider=ModelProvider.OPENAI,
126
+ context_length=8192,
127
+ max_output_tokens=8192,
128
+ input_cost_per_million=30.0,
129
+ output_cost_per_million=60.0,
130
+ description="GPT-4 (8K context)",
131
+ ),
132
+ OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
133
+ name=OpenAIChatModel.GPT4_TURBO.value,
134
+ provider=ModelProvider.OPENAI,
135
+ context_length=128_000,
136
+ max_output_tokens=4096,
137
+ input_cost_per_million=10.0,
138
+ output_cost_per_million=30.0,
139
+ description="GPT-4 Turbo",
140
+ ),
141
+ OpenAIChatModel.GPT4o.value: ModelInfo(
142
+ name=OpenAIChatModel.GPT4o.value,
143
+ provider=ModelProvider.OPENAI,
144
+ context_length=128_000,
145
+ max_output_tokens=16_384,
146
+ input_cost_per_million=2.5,
147
+ output_cost_per_million=10.0,
148
+ has_structured_output=True,
149
+ description="GPT-4o (128K context)",
150
+ ),
151
+ OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
152
+ name=OpenAIChatModel.GPT4o_MINI.value,
153
+ provider=ModelProvider.OPENAI,
154
+ context_length=128_000,
155
+ max_output_tokens=16_384,
156
+ input_cost_per_million=0.15,
157
+ output_cost_per_million=0.60,
158
+ has_structured_output=True,
159
+ description="GPT-4o Mini",
160
+ ),
161
+ OpenAIChatModel.O1.value: ModelInfo(
162
+ name=OpenAIChatModel.O1.value,
163
+ provider=ModelProvider.OPENAI,
164
+ context_length=200_000,
165
+ max_output_tokens=100_000,
166
+ input_cost_per_million=15.0,
167
+ output_cost_per_million=60.0,
168
+ allows_streaming=False,
169
+ allows_system_message=False,
170
+ unsupported_params=["temperature", "stream"],
171
+ rename_params={"max_tokens": "max_completion_tokens"},
172
+ has_tools=False,
173
+ description="O1 Reasoning LM",
174
+ ),
175
+ OpenAIChatModel.O1_MINI.value: ModelInfo(
176
+ name=OpenAIChatModel.O1_MINI.value,
177
+ provider=ModelProvider.OPENAI,
178
+ context_length=128_000,
179
+ max_output_tokens=65_536,
180
+ input_cost_per_million=1.1,
181
+ output_cost_per_million=4.4,
182
+ allows_streaming=False,
183
+ allows_system_message=False,
184
+ unsupported_params=["temperature", "stream"],
185
+ rename_params={"max_tokens": "max_completion_tokens"},
186
+ has_tools=False,
187
+ description="O1 Mini Reasoning LM",
188
+ ),
189
+ OpenAIChatModel.O3_MINI.value: ModelInfo(
190
+ name=OpenAIChatModel.O3_MINI.value,
191
+ provider=ModelProvider.OPENAI,
192
+ context_length=200_000,
193
+ max_output_tokens=100_000,
194
+ input_cost_per_million=1.1,
195
+ output_cost_per_million=4.4,
196
+ allows_streaming=False,
197
+ allows_system_message=False,
198
+ unsupported_params=["temperature", "stream"],
199
+ rename_params={"max_tokens": "max_completion_tokens"},
200
+ has_tools=False,
201
+ description="O3 Mini Reasoning LM",
202
+ ),
203
+ # Anthropic Models
204
+ AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
205
+ name=AnthropicModel.CLAUDE_3_5_SONNET.value,
206
+ provider=ModelProvider.ANTHROPIC,
207
+ context_length=200_000,
208
+ max_output_tokens=8192,
209
+ input_cost_per_million=3.0,
210
+ output_cost_per_million=15.0,
211
+ description="Claude 3.5 Sonnet",
212
+ ),
213
+ AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
214
+ name=AnthropicModel.CLAUDE_3_OPUS.value,
215
+ provider=ModelProvider.ANTHROPIC,
216
+ context_length=200_000,
217
+ max_output_tokens=4096,
218
+ input_cost_per_million=15.0,
219
+ output_cost_per_million=75.0,
220
+ description="Claude 3 Opus",
221
+ ),
222
+ AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
223
+ name=AnthropicModel.CLAUDE_3_SONNET.value,
224
+ provider=ModelProvider.ANTHROPIC,
225
+ context_length=200_000,
226
+ max_output_tokens=4096,
227
+ input_cost_per_million=3.0,
228
+ output_cost_per_million=15.0,
229
+ description="Claude 3 Sonnet",
230
+ ),
231
+ AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
232
+ name=AnthropicModel.CLAUDE_3_HAIKU.value,
233
+ provider=ModelProvider.ANTHROPIC,
234
+ context_length=200_000,
235
+ max_output_tokens=4096,
236
+ input_cost_per_million=0.25,
237
+ output_cost_per_million=1.25,
238
+ description="Claude 3 Haiku",
239
+ ),
240
+ # DeepSeek Models
241
+ DeepSeekModel.DEEPSEEK.value: ModelInfo(
242
+ name=DeepSeekModel.DEEPSEEK.value,
243
+ provider=ModelProvider.DEEPSEEK,
244
+ context_length=64_000,
245
+ max_output_tokens=8_000,
246
+ input_cost_per_million=0.27,
247
+ output_cost_per_million=1.10,
248
+ description="DeepSeek Chat",
249
+ ),
250
+ DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
251
+ name=DeepSeekModel.DEEPSEEK_R1.value,
252
+ provider=ModelProvider.DEEPSEEK,
253
+ context_length=64_000,
254
+ max_output_tokens=8_000,
255
+ input_cost_per_million=0.55,
256
+ output_cost_per_million=2.19,
257
+ description="DeepSeek-R1 Reasoning LM",
258
+ ),
259
+ # Gemini Models
260
+ GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
261
+ name=GeminiModel.GEMINI_2_FLASH.value,
262
+ provider=ModelProvider.GOOGLE,
263
+ context_length=1_056_768,
264
+ max_output_tokens=8192,
265
+ rename_params={"max_tokens": "max_completion_tokens"},
266
+ description="Gemini 2.0 Flash",
267
+ ),
268
+ GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
269
+ name=GeminiModel.GEMINI_1_5_FLASH.value,
270
+ provider=ModelProvider.GOOGLE,
271
+ context_length=1_056_768,
272
+ max_output_tokens=8192,
273
+ rename_params={"max_tokens": "max_completion_tokens"},
274
+ description="Gemini 1.5 Flash",
275
+ ),
276
+ GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
277
+ name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
278
+ provider=ModelProvider.GOOGLE,
279
+ context_length=1_000_000,
280
+ max_output_tokens=8192,
281
+ rename_params={"max_tokens": "max_completion_tokens"},
282
+ description="Gemini 1.5 Flash 8B",
283
+ ),
284
+ GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
285
+ name=GeminiModel.GEMINI_1_5_PRO.value,
286
+ provider=ModelProvider.GOOGLE,
287
+ context_length=2_000_000,
288
+ max_output_tokens=8192,
289
+ rename_params={"max_tokens": "max_completion_tokens"},
290
+ description="Gemini 1.5 Pro",
291
+ ),
292
+ GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
293
+ name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
294
+ provider=ModelProvider.GOOGLE,
295
+ context_length=1_000_000,
296
+ max_output_tokens=64_000,
297
+ rename_params={"max_tokens": "max_completion_tokens"},
298
+ description="Gemini 2.0 Flash Thinking",
299
+ ),
300
+ }
301
+
302
+
303
+ def get_model_info(model: str | ModelName) -> ModelInfo:
304
+ """Get model information by name or enum value"""
305
+ if isinstance(model, str):
306
+ return MODEL_INFO.get(model) or ModelInfo()
307
+ return MODEL_INFO.get(model.value) or ModelInfo()