PyPI - langroid - Versions diffs - 0.39.0__tar.gz → 0.39.1__tar.gz - Mend

langroid 0.39.0tar.gz → 0.39.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

{langroid-0.39.0 → langroid-0.39.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.39.0
+Version: 0.39.1
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT

{langroid-0.39.0 → langroid-0.39.1}/langroid/agent/base.py RENAMED Viewed

@@ -917,7 +917,7 @@ class Agent(ABC):
         else:
             prompt = message
-        output_len = self.config.llm.max_output_tokens
+        output_len = self.config.llm.model_max_output_tokens
         if self.num_tokens(prompt) + output_len > self.llm.completion_context_length():
             output_len = self.llm.completion_context_length() - self.num_tokens(prompt)
             if output_len < self.config.llm.min_output_tokens:
@@ -986,7 +986,7 @@ class Agent(ABC):
                 # show rich spinner only if not streaming!
                 cm = status("LLM responding to message...")
                 stack.enter_context(cm)
-            output_len = self.config.llm.max_output_tokens
+            output_len = self.config.llm.model_max_output_tokens
             if (
                 self.num_tokens(prompt) + output_len
                 > self.llm.completion_context_length()
@@ -1871,7 +1871,7 @@ class Agent(ABC):
             cumul_cost = format(tot_cost, ".4f")
             assert isinstance(self.llm, LanguageModel)
             context_length = self.llm.chat_context_length()
-            max_out = self.config.llm.max_output_tokens
+            max_out = self.config.llm.model_max_output_tokens
             llm_model = (
                 "no-LLM" if self.config.llm is None else self.llm.config.chat_model

{langroid-0.39.0 → langroid-0.39.1}/langroid/agent/chat_agent.py RENAMED Viewed

@@ -31,7 +31,7 @@ from langroid.language_models.base import (
     ToolChoiceTypes,
 )
 from langroid.language_models.openai_gpt import OpenAIGPT
-from langroid.mytypes import Entity, Routing
+from langroid.mytypes import Entity, NonToolAction
 from langroid.pydantic_v1 import BaseModel, ValidationError
 from langroid.utils.configuration import settings
 from langroid.utils.object_registry import ObjectRegistry
@@ -53,7 +53,7 @@ class ChatAgentConfig(AgentConfig):
         user_message: user message to include in message sequence.
              Used only if `task` is not specified in the constructor.
         use_tools: whether to use our own ToolMessages mechanism
-        non_tool_routing (Routing|str): routing when LLM generates non-tool msg.
+        handle_llm_no_tool (NonToolAction|str): routing when LLM generates non-tool msg.
         use_functions_api: whether to use functions/tools native to the LLM API
                 (e.g. OpenAI's `function_call` or `tool_call` mechanism)
         use_tools_api: When `use_functions_api` is True, if this is also True,
@@ -86,7 +86,7 @@ class ChatAgentConfig(AgentConfig):
     system_message: str = "You are a helpful assistant."
     user_message: Optional[str] = None
-    non_tool_routing: Routing | None = None
+    handle_llm_no_tool: NonToolAction | None = None
     use_tools: bool = False
     use_functions_api: bool = True
     use_tools_api: bool = False
@@ -596,15 +596,15 @@ class ChatAgent(Agent):
         Returns:
             Any: The result of the handler method
         """
-        if self.config.non_tool_routing is None:
+        if self.config.handle_llm_no_tool is None:
             return None
         if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
             from langroid.agent.tools.orchestration import AgentDoneTool, ForwardTool
-            match self.config.non_tool_routing:
-                case Routing.FORWARD_USER:
+            match self.config.handle_llm_no_tool:
+                case NonToolAction.FORWARD_USER:
                     return ForwardTool(agent="User")
-                case Routing.DONE:
+                case NonToolAction.DONE:
                     return AgentDoneTool(content=msg.content, tools=msg.tool_messages)
     def unhandled_tools(self) -> set[str]:
@@ -1488,11 +1488,11 @@ class ChatAgent(Agent):
                 self.message_history.extend(llm_msgs)
         hist = self.message_history
-        output_len = self.config.llm.max_output_tokens
+        output_len = self.config.llm.model_max_output_tokens
         if (
             truncate
             and self.chat_num_tokens(hist)
-            > self.llm.chat_context_length() - self.config.llm.max_output_tokens
+            > self.llm.chat_context_length() - self.config.llm.model_max_output_tokens
         ):
             # chat + output > max context length,
             # so first try to shorten requested output len to fit.
@@ -1517,7 +1517,7 @@ class ChatAgent(Agent):
                         The message history is longer than the max chat context
                         length allowed, and we have run out of messages to drop.
                         HINT: In your `OpenAIGPTConfig` object, try increasing
-                        `chat_context_length` or decreasing `max_output_tokens`.
+                        `chat_context_length` or decreasing `model_max_output_tokens`.
                         """
                         )
                     # drop the second message, i.e. first msg after the sys msg
@@ -1666,12 +1666,12 @@ class ChatAgent(Agent):
         Args:
             messages: seq of messages (with role, content fields) sent to LLM
             output_len: max number of tokens expected in response.
-                    If None, use the LLM's default max_output_tokens.
+                    If None, use the LLM's default model_max_output_tokens.
         Returns:
             Document (i.e. with fields "content", "metadata")
         """
         assert self.config.llm is not None and self.llm is not None
-        output_len = output_len or self.config.llm.max_output_tokens
+        output_len = output_len or self.config.llm.model_max_output_tokens
         streamer = noop_fn
         if self.llm.get_stream():
             streamer = self.callbacks.start_llm_stream()
@@ -1741,7 +1741,7 @@ class ChatAgent(Agent):
         Async version of `llm_response_messages`. See there for details.
         """
         assert self.config.llm is not None and self.llm is not None
-        output_len = output_len or self.config.llm.max_output_tokens
+        output_len = output_len or self.config.llm.model_max_output_tokens
         functions, fun_call, tools, force_tool, output_format = self._function_args()
         assert self.llm is not None

{langroid-0.39.0 → langroid-0.39.1}/langroid/agent/special/doc_chat_agent.py RENAMED Viewed

@@ -1565,7 +1565,7 @@ class DocChatAgent(ChatAgent):
         tot_tokens = self.parser.num_tokens(full_text)
         MAX_INPUT_TOKENS = (
             self.llm.completion_context_length()
-            - self.config.llm.max_output_tokens
+            - self.config.llm.model_max_output_tokens
             - 100
         )
         if tot_tokens > MAX_INPUT_TOKENS:

{langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/__init__.py RENAMED Viewed

@@ -15,14 +15,13 @@ from .base import (
     LLMTokenUsage,
     LLMResponse,
 )
-from .openai_gpt import (
+from .model_info import (
     OpenAIChatModel,
     AnthropicModel,
     GeminiModel,
     OpenAICompletionModel,
-    OpenAIGPTConfig,
-    OpenAIGPT,
 )
+from .openai_gpt import OpenAIGPTConfig, OpenAIGPT, OpenAICallParams
 from .mock_lm import MockLM, MockLMConfig
 from .azure_openai import AzureConfig, AzureGPT
@@ -32,6 +31,7 @@ __all__ = [
     "config",
     "base",
     "openai_gpt",
+    "model_info",
     "azure_openai",
     "prompt_formatter",
     "StreamEventType",
@@ -48,6 +48,7 @@ __all__ = [
     "OpenAICompletionModel",
     "OpenAIGPTConfig",
     "OpenAIGPT",
+    "OpenAICallParams",
     "AzureConfig",
     "AzureGPT",
     "MockLM",

{langroid-0.39.0 → langroid-0.39.1}/langroid/language_models/base.py RENAMED Viewed

@@ -19,6 +19,7 @@ from typing import (
 from langroid.cachedb.base import CacheDBConfig
 from langroid.cachedb.redis_cachedb import RedisCacheConfig
+from langroid.language_models.model_info import get_model_info
 from langroid.parsing.agent_chats import parse_message
 from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
 from langroid.prompts.dialog import collate_chat_history
@@ -60,6 +61,7 @@ class LLMConfig(BaseSettings):
     streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
     api_base: str | None = None
     formatter: None | str = None
+    max_output_tokens: int | None = 8192  # specify None to use model_max_output_tokens
     timeout: int = 20  # timeout for API requests
     chat_model: str = ""
     completion_model: str = ""
@@ -67,7 +69,6 @@ class LLMConfig(BaseSettings):
     chat_context_length: int = 8000
     async_stream_quiet: bool = True  # suppress streaming output in async mode?
     completion_context_length: int = 8000
-    max_output_tokens: int = 1024  # generate at most this many tokens
     # if input length + max_output_tokens > context length of model,
     # we will try shortening requested output
     min_output_tokens: int = 64
@@ -84,6 +85,12 @@ class LLMConfig(BaseSettings):
     chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
     completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
+    @property
+    def model_max_output_tokens(self) -> int:
+        return (
+            self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
+        )
 class LLMFunctionCall(BaseModel):
     """

langroid-0.39.1/langroid/language_models/model_info.py ADDED Viewed

@@ -0,0 +1,307 @@
+from enum import Enum
+from typing import Dict, List, Optional
+from langroid.pydantic_v1 import BaseModel
+class ModelProvider(str, Enum):
+    """Enum for model providers"""
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    DEEPSEEK = "deepseek"
+    GOOGLE = "google"
+    UNKNOWN = "unknown"
+class ModelName(str, Enum):
+    """Parent class for all model name enums"""
+    pass
+class OpenAIChatModel(ModelName):
+    """Enum for OpenAI Chat models"""
+    GPT3_5_TURBO = "gpt-3.5-turbo-1106"
+    GPT4 = "gpt-4"
+    GPT4_TURBO = "gpt-4-turbo"
+    GPT4o = "gpt-4o"
+    GPT4o_MINI = "gpt-4o-mini"
+    O1 = "o1"
+    O1_MINI = "o1-mini"
+    O3_MINI = "o3-mini"
+class OpenAICompletionModel(str, Enum):
+    """Enum for OpenAI Completion models"""
+    DAVINCI = "davinci-002"
+    BABBAGE = "babbage-002"
+class AnthropicModel(ModelName):
+    """Enum for Anthropic models"""
+    CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
+    CLAUDE_3_OPUS = "claude-3-opus-latest"
+    CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
+    CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
+class DeepSeekModel(ModelName):
+    """Enum for DeepSeek models direct from DeepSeek API"""
+    DEEPSEEK = "deepseek/deepseek-chat"
+    DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
+class GeminiModel(ModelName):
+    """Enum for Gemini models"""
+    GEMINI_1_5_FLASH = "gemini/gemini-1.5-flash"
+    GEMINI_1_5_FLASH_8B = "gemini/gemini-1.5-flash-8b"
+    GEMINI_1_5_PRO = "gemini/gemini-1.5-pro"
+    GEMINI_2_FLASH = "gemini/gemini-2.0-flash-exp"
+    GEMINI_2_FLASH_THINKING = "gemini/gemini-2.0-flash-thinking-exp"
+class ModelInfo(BaseModel):
+    """
+    Consolidated information about LLM, related to capacity, cost and API
+    idiosyncrasies. Reasonable defaults for all params in case there's no
+    specific info available.
+    """
+    name: str = "unknown"
+    provider: ModelProvider = ModelProvider.UNKNOWN
+    context_length: int = 16_000
+    max_cot_tokens: int = 0  # max chain of thought (thinking) tokens where applicable
+    max_output_tokens: int = 8192  # Maximum number of output tokens - model dependent
+    input_cost_per_million: float = 0.0  # Cost in USD per million input tokens
+    output_cost_per_million: float = 0.0  # Cost in USD per million output tokens
+    allows_streaming: bool = True  # Whether model supports streaming output
+    allows_system_message: bool = True  # Whether model supports system messages
+    rename_params: Dict[str, str] = {}  # Rename parameters for OpenAI API
+    unsupported_params: List[str] = []
+    has_structured_output: bool = False  # Does model API support structured output?
+    has_tools: bool = True  # Does model API support tools/function-calling?
+    needs_first_user_message: bool = False  # Does API need first msg to be from user?
+    description: Optional[str] = None
+# Model information registry
+MODEL_INFO: Dict[str, ModelInfo] = {
+    # OpenAI Models
+    OpenAICompletionModel.DAVINCI.value: ModelInfo(
+        name=OpenAICompletionModel.DAVINCI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=4096,
+        max_output_tokens=4096,
+        input_cost_per_million=2.0,
+        output_cost_per_million=2.0,
+        description="Davinci-002",
+    ),
+    OpenAICompletionModel.BABBAGE.value: ModelInfo(
+        name=OpenAICompletionModel.BABBAGE.value,
+        provider=ModelProvider.OPENAI,
+        context_length=4096,
+        max_output_tokens=4096,
+        input_cost_per_million=0.40,
+        output_cost_per_million=0.40,
+        description="Babbage-002",
+    ),
+    OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
+        name=OpenAIChatModel.GPT3_5_TURBO.value,
+        provider=ModelProvider.OPENAI,
+        context_length=16_385,
+        max_output_tokens=4096,
+        input_cost_per_million=0.50,
+        output_cost_per_million=1.50,
+        description="GPT-3.5 Turbo",
+    ),
+    OpenAIChatModel.GPT4.value: ModelInfo(
+        name=OpenAIChatModel.GPT4.value,
+        provider=ModelProvider.OPENAI,
+        context_length=8192,
+        max_output_tokens=8192,
+        input_cost_per_million=30.0,
+        output_cost_per_million=60.0,
+        description="GPT-4 (8K context)",
+    ),
+    OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
+        name=OpenAIChatModel.GPT4_TURBO.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=4096,
+        input_cost_per_million=10.0,
+        output_cost_per_million=30.0,
+        description="GPT-4 Turbo",
+    ),
+    OpenAIChatModel.GPT4o.value: ModelInfo(
+        name=OpenAIChatModel.GPT4o.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=16_384,
+        input_cost_per_million=2.5,
+        output_cost_per_million=10.0,
+        has_structured_output=True,
+        description="GPT-4o (128K context)",
+    ),
+    OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
+        name=OpenAIChatModel.GPT4o_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=16_384,
+        input_cost_per_million=0.15,
+        output_cost_per_million=0.60,
+        has_structured_output=True,
+        description="GPT-4o Mini",
+    ),
+    OpenAIChatModel.O1.value: ModelInfo(
+        name=OpenAIChatModel.O1.value,
+        provider=ModelProvider.OPENAI,
+        context_length=200_000,
+        max_output_tokens=100_000,
+        input_cost_per_million=15.0,
+        output_cost_per_million=60.0,
+        allows_streaming=False,
+        allows_system_message=False,
+        unsupported_params=["temperature", "stream"],
+        rename_params={"max_tokens": "max_completion_tokens"},
+        has_tools=False,
+        description="O1 Reasoning LM",
+    ),
+    OpenAIChatModel.O1_MINI.value: ModelInfo(
+        name=OpenAIChatModel.O1_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=65_536,
+        input_cost_per_million=1.1,
+        output_cost_per_million=4.4,
+        allows_streaming=False,
+        allows_system_message=False,
+        unsupported_params=["temperature", "stream"],
+        rename_params={"max_tokens": "max_completion_tokens"},
+        has_tools=False,
+        description="O1 Mini Reasoning LM",
+    ),
+    OpenAIChatModel.O3_MINI.value: ModelInfo(
+        name=OpenAIChatModel.O3_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=200_000,
+        max_output_tokens=100_000,
+        input_cost_per_million=1.1,
+        output_cost_per_million=4.4,
+        allows_streaming=False,
+        allows_system_message=False,
+        unsupported_params=["temperature", "stream"],
+        rename_params={"max_tokens": "max_completion_tokens"},
+        has_tools=False,
+        description="O3 Mini Reasoning LM",
+    ),
+    # Anthropic Models
+    AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_5_SONNET.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=8192,
+        input_cost_per_million=3.0,
+        output_cost_per_million=15.0,
+        description="Claude 3.5 Sonnet",
+    ),
+    AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_OPUS.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=4096,
+        input_cost_per_million=15.0,
+        output_cost_per_million=75.0,
+        description="Claude 3 Opus",
+    ),
+    AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_SONNET.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=4096,
+        input_cost_per_million=3.0,
+        output_cost_per_million=15.0,
+        description="Claude 3 Sonnet",
+    ),
+    AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_HAIKU.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=4096,
+        input_cost_per_million=0.25,
+        output_cost_per_million=1.25,
+        description="Claude 3 Haiku",
+    ),
+    # DeepSeek Models
+    DeepSeekModel.DEEPSEEK.value: ModelInfo(
+        name=DeepSeekModel.DEEPSEEK.value,
+        provider=ModelProvider.DEEPSEEK,
+        context_length=64_000,
+        max_output_tokens=8_000,
+        input_cost_per_million=0.27,
+        output_cost_per_million=1.10,
+        description="DeepSeek Chat",
+    ),
+    DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
+        name=DeepSeekModel.DEEPSEEK_R1.value,
+        provider=ModelProvider.DEEPSEEK,
+        context_length=64_000,
+        max_output_tokens=8_000,
+        input_cost_per_million=0.55,
+        output_cost_per_million=2.19,
+        description="DeepSeek-R1 Reasoning LM",
+    ),
+    # Gemini Models
+    GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_FLASH.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_056_768,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.0 Flash",
+    ),
+    GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
+        name=GeminiModel.GEMINI_1_5_FLASH.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_056_768,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 1.5 Flash",
+    ),
+    GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
+        name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_000_000,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 1.5 Flash 8B",
+    ),
+    GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
+        name=GeminiModel.GEMINI_1_5_PRO.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=2_000_000,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 1.5 Pro",
+    ),
+    GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_000_000,
+        max_output_tokens=64_000,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.0 Flash Thinking",
+    ),
+}
+def get_model_info(model: str | ModelName) -> ModelInfo:
+    """Get model information by name or enum value"""
+    if isinstance(model, str):
+        return MODEL_INFO.get(model) or ModelInfo()
+    return MODEL_INFO.get(model.value) or ModelInfo()

langroid 0.39.0__tar.gz → 0.39.1__tar.gz

langroid 0.39.0tar.gz → 0.39.1tar.gz