PyPI - langroid - Versions diffs - 0.38.0__tar.gz → 0.39.1__tar.gz - Mend

langroid 0.38.0tar.gz → 0.39.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

{langroid-0.38.0 → langroid-0.39.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.38.0
+Version: 0.39.1
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT

{langroid-0.38.0 → langroid-0.39.1}/langroid/agent/base.py RENAMED Viewed

@@ -333,6 +333,11 @@ class Agent(ABC):
         if hasattr(message_class, "handle_message_fallback") and (
             inspect.isfunction(message_class.handle_message_fallback)
         ):
+            # When a ToolMessage has a `handle_message_fallback` method,
+            # we inject it into the agent as a method, overriding the default
+            # `handle_message_fallback` method (which does nothing).
+            # It's possible multiple tool messages have a `handle_message_fallback`,
+            # in which case, the last one inserted will be used.
             setattr(
                 self,
                 "handle_message_fallback",
@@ -912,7 +917,7 @@ class Agent(ABC):
         else:
             prompt = message
-        output_len = self.config.llm.max_output_tokens
+        output_len = self.config.llm.model_max_output_tokens
         if self.num_tokens(prompt) + output_len > self.llm.completion_context_length():
             output_len = self.llm.completion_context_length() - self.num_tokens(prompt)
             if output_len < self.config.llm.min_output_tokens:
@@ -981,7 +986,7 @@ class Agent(ABC):
                 # show rich spinner only if not streaming!
                 cm = status("LLM responding to message...")
                 stack.enter_context(cm)
-            output_len = self.config.llm.max_output_tokens
+            output_len = self.config.llm.model_max_output_tokens
             if (
                 self.num_tokens(prompt) + output_len
                 > self.llm.completion_context_length()
@@ -1866,7 +1871,7 @@ class Agent(ABC):
             cumul_cost = format(tot_cost, ".4f")
             assert isinstance(self.llm, LanguageModel)
             context_length = self.llm.chat_context_length()
-            max_out = self.config.llm.max_output_tokens
+            max_out = self.config.llm.model_max_output_tokens
             llm_model = (
                 "no-LLM" if self.config.llm is None else self.llm.config.chat_model

{langroid-0.38.0 → langroid-0.39.1}/langroid/agent/chat_agent.py RENAMED Viewed

@@ -5,7 +5,7 @@ import logging
 import textwrap
 from contextlib import ExitStack
 from inspect import isclass
-from typing import Dict, List, Optional, Self, Set, Tuple, Type, Union, cast
+from typing import Any, Dict, List, Optional, Self, Set, Tuple, Type, Union, cast
 import openai
 from rich import print
@@ -31,6 +31,7 @@ from langroid.language_models.base import (
     ToolChoiceTypes,
 )
 from langroid.language_models.openai_gpt import OpenAIGPT
+from langroid.mytypes import Entity, NonToolAction
 from langroid.pydantic_v1 import BaseModel, ValidationError
 from langroid.utils.configuration import settings
 from langroid.utils.object_registry import ObjectRegistry
@@ -52,6 +53,7 @@ class ChatAgentConfig(AgentConfig):
         user_message: user message to include in message sequence.
              Used only if `task` is not specified in the constructor.
         use_tools: whether to use our own ToolMessages mechanism
+        handle_llm_no_tool (NonToolAction|str): routing when LLM generates non-tool msg.
         use_functions_api: whether to use functions/tools native to the LLM API
                 (e.g. OpenAI's `function_call` or `tool_call` mechanism)
         use_tools_api: When `use_functions_api` is True, if this is also True,
@@ -84,6 +86,7 @@ class ChatAgentConfig(AgentConfig):
     system_message: str = "You are a helpful assistant."
     user_message: Optional[str] = None
+    handle_llm_no_tool: NonToolAction | None = None
     use_tools: bool = False
     use_functions_api: bool = True
     use_tools_api: bool = False
@@ -579,6 +582,31 @@ class ChatAgent(Agent):
         # remove leading and trailing newlines and other whitespace
         return LLMMessage(role=Role.SYSTEM, content=content.strip())
+    def handle_message_fallback(self, msg: str | ChatDocument) -> Any:
+        """
+        Fallback method for the "no-tools" scenario.
+        Users the self.config.non_tool_routing to determine the action to take.
+        This method can be overridden by subclasses, e.g.,
+        to create a "reminder" message when a tool is expected but the LLM "forgot"
+        to generate one.
+        Args:
+            msg (str | ChatDocument): The input msg to handle
+        Returns:
+            Any: The result of the handler method
+        """
+        if self.config.handle_llm_no_tool is None:
+            return None
+        if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
+            from langroid.agent.tools.orchestration import AgentDoneTool, ForwardTool
+            match self.config.handle_llm_no_tool:
+                case NonToolAction.FORWARD_USER:
+                    return ForwardTool(agent="User")
+                case NonToolAction.DONE:
+                    return AgentDoneTool(content=msg.content, tools=msg.tool_messages)
     def unhandled_tools(self) -> set[str]:
         """The set of tools that are known but not handled.
         Useful in task flow: an agent can refuse to accept an incoming msg
@@ -1460,11 +1488,11 @@ class ChatAgent(Agent):
                 self.message_history.extend(llm_msgs)
         hist = self.message_history
-        output_len = self.config.llm.max_output_tokens
+        output_len = self.config.llm.model_max_output_tokens
         if (
             truncate
             and self.chat_num_tokens(hist)
-            > self.llm.chat_context_length() - self.config.llm.max_output_tokens
+            > self.llm.chat_context_length() - self.config.llm.model_max_output_tokens
         ):
             # chat + output > max context length,
             # so first try to shorten requested output len to fit.
@@ -1489,7 +1517,7 @@ class ChatAgent(Agent):
                         The message history is longer than the max chat context
                         length allowed, and we have run out of messages to drop.
                         HINT: In your `OpenAIGPTConfig` object, try increasing
-                        `chat_context_length` or decreasing `max_output_tokens`.
+                        `chat_context_length` or decreasing `model_max_output_tokens`.
                         """
                         )
                     # drop the second message, i.e. first msg after the sys msg
@@ -1638,12 +1666,12 @@ class ChatAgent(Agent):
         Args:
             messages: seq of messages (with role, content fields) sent to LLM
             output_len: max number of tokens expected in response.
-                    If None, use the LLM's default max_output_tokens.
+                    If None, use the LLM's default model_max_output_tokens.
         Returns:
             Document (i.e. with fields "content", "metadata")
         """
         assert self.config.llm is not None and self.llm is not None
-        output_len = output_len or self.config.llm.max_output_tokens
+        output_len = output_len or self.config.llm.model_max_output_tokens
         streamer = noop_fn
         if self.llm.get_stream():
             streamer = self.callbacks.start_llm_stream()
@@ -1713,7 +1741,7 @@ class ChatAgent(Agent):
         Async version of `llm_response_messages`. See there for details.
         """
         assert self.config.llm is not None and self.llm is not None
-        output_len = output_len or self.config.llm.max_output_tokens
+        output_len = output_len or self.config.llm.model_max_output_tokens
         functions, fun_call, tools, force_tool, output_format = self._function_args()
         assert self.llm is not None

{langroid-0.38.0 → langroid-0.39.1}/langroid/agent/special/doc_chat_agent.py RENAMED Viewed

@@ -1565,7 +1565,7 @@ class DocChatAgent(ChatAgent):
         tot_tokens = self.parser.num_tokens(full_text)
         MAX_INPUT_TOKENS = (
             self.llm.completion_context_length()
-            - self.config.llm.max_output_tokens
+            - self.config.llm.model_max_output_tokens
             - 100
         )
         if tot_tokens > MAX_INPUT_TOKENS:

{langroid-0.38.0 → langroid-0.39.1}/langroid/language_models/__init__.py RENAMED Viewed

@@ -15,14 +15,13 @@ from .base import (
     LLMTokenUsage,
     LLMResponse,
 )
-from .openai_gpt import (
+from .model_info import (
     OpenAIChatModel,
     AnthropicModel,
     GeminiModel,
     OpenAICompletionModel,
-    OpenAIGPTConfig,
-    OpenAIGPT,
 )
+from .openai_gpt import OpenAIGPTConfig, OpenAIGPT, OpenAICallParams
 from .mock_lm import MockLM, MockLMConfig
 from .azure_openai import AzureConfig, AzureGPT
@@ -32,6 +31,7 @@ __all__ = [
     "config",
     "base",
     "openai_gpt",
+    "model_info",
     "azure_openai",
     "prompt_formatter",
     "StreamEventType",
@@ -48,6 +48,7 @@ __all__ = [
     "OpenAICompletionModel",
     "OpenAIGPTConfig",
     "OpenAIGPT",
+    "OpenAICallParams",
     "AzureConfig",
     "AzureGPT",
     "MockLM",

{langroid-0.38.0 → langroid-0.39.1}/langroid/language_models/base.py RENAMED Viewed

@@ -19,6 +19,7 @@ from typing import (
 from langroid.cachedb.base import CacheDBConfig
 from langroid.cachedb.redis_cachedb import RedisCacheConfig
+from langroid.language_models.model_info import get_model_info
 from langroid.parsing.agent_chats import parse_message
 from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
 from langroid.prompts.dialog import collate_chat_history
@@ -60,6 +61,7 @@ class LLMConfig(BaseSettings):
     streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
     api_base: str | None = None
     formatter: None | str = None
+    max_output_tokens: int | None = 8192  # specify None to use model_max_output_tokens
     timeout: int = 20  # timeout for API requests
     chat_model: str = ""
     completion_model: str = ""
@@ -67,7 +69,6 @@ class LLMConfig(BaseSettings):
     chat_context_length: int = 8000
     async_stream_quiet: bool = True  # suppress streaming output in async mode?
     completion_context_length: int = 8000
-    max_output_tokens: int = 1024  # generate at most this many tokens
     # if input length + max_output_tokens > context length of model,
     # we will try shortening requested output
     min_output_tokens: int = 64
@@ -84,6 +85,12 @@ class LLMConfig(BaseSettings):
     chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
     completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
+    @property
+    def model_max_output_tokens(self) -> int:
+        return (
+            self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
+        )
 class LLMFunctionCall(BaseModel):
     """

langroid-0.39.1/langroid/language_models/model_info.py ADDED Viewed

@@ -0,0 +1,307 @@
+from enum import Enum
+from typing import Dict, List, Optional
+from langroid.pydantic_v1 import BaseModel
+class ModelProvider(str, Enum):
+    """Enum for model providers"""
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    DEEPSEEK = "deepseek"
+    GOOGLE = "google"
+    UNKNOWN = "unknown"
+class ModelName(str, Enum):
+    """Parent class for all model name enums"""
+    pass
+class OpenAIChatModel(ModelName):
+    """Enum for OpenAI Chat models"""
+    GPT3_5_TURBO = "gpt-3.5-turbo-1106"
+    GPT4 = "gpt-4"
+    GPT4_TURBO = "gpt-4-turbo"
+    GPT4o = "gpt-4o"
+    GPT4o_MINI = "gpt-4o-mini"
+    O1 = "o1"
+    O1_MINI = "o1-mini"
+    O3_MINI = "o3-mini"
+class OpenAICompletionModel(str, Enum):
+    """Enum for OpenAI Completion models"""
+    DAVINCI = "davinci-002"
+    BABBAGE = "babbage-002"
+class AnthropicModel(ModelName):
+    """Enum for Anthropic models"""
+    CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
+    CLAUDE_3_OPUS = "claude-3-opus-latest"
+    CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
+    CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
+class DeepSeekModel(ModelName):
+    """Enum for DeepSeek models direct from DeepSeek API"""
+    DEEPSEEK = "deepseek/deepseek-chat"
+    DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
+class GeminiModel(ModelName):
+    """Enum for Gemini models"""
+    GEMINI_1_5_FLASH = "gemini/gemini-1.5-flash"
+    GEMINI_1_5_FLASH_8B = "gemini/gemini-1.5-flash-8b"
+    GEMINI_1_5_PRO = "gemini/gemini-1.5-pro"
+    GEMINI_2_FLASH = "gemini/gemini-2.0-flash-exp"
+    GEMINI_2_FLASH_THINKING = "gemini/gemini-2.0-flash-thinking-exp"
+class ModelInfo(BaseModel):
+    """
+    Consolidated information about LLM, related to capacity, cost and API
+    idiosyncrasies. Reasonable defaults for all params in case there's no
+    specific info available.
+    """
+    name: str = "unknown"
+    provider: ModelProvider = ModelProvider.UNKNOWN
+    context_length: int = 16_000
+    max_cot_tokens: int = 0  # max chain of thought (thinking) tokens where applicable
+    max_output_tokens: int = 8192  # Maximum number of output tokens - model dependent
+    input_cost_per_million: float = 0.0  # Cost in USD per million input tokens
+    output_cost_per_million: float = 0.0  # Cost in USD per million output tokens
+    allows_streaming: bool = True  # Whether model supports streaming output
+    allows_system_message: bool = True  # Whether model supports system messages
+    rename_params: Dict[str, str] = {}  # Rename parameters for OpenAI API
+    unsupported_params: List[str] = []
+    has_structured_output: bool = False  # Does model API support structured output?
+    has_tools: bool = True  # Does model API support tools/function-calling?
+    needs_first_user_message: bool = False  # Does API need first msg to be from user?
+    description: Optional[str] = None
+# Model information registry
+MODEL_INFO: Dict[str, ModelInfo] = {
+    # OpenAI Models
+    OpenAICompletionModel.DAVINCI.value: ModelInfo(
+        name=OpenAICompletionModel.DAVINCI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=4096,
+        max_output_tokens=4096,
+        input_cost_per_million=2.0,
+        output_cost_per_million=2.0,
+        description="Davinci-002",
+    ),
+    OpenAICompletionModel.BABBAGE.value: ModelInfo(
+        name=OpenAICompletionModel.BABBAGE.value,
+        provider=ModelProvider.OPENAI,
+        context_length=4096,
+        max_output_tokens=4096,
+        input_cost_per_million=0.40,
+        output_cost_per_million=0.40,
+        description="Babbage-002",
+    ),
+    OpenAIChatModel.GPT3_5_TURBO.value: ModelInfo(
+        name=OpenAIChatModel.GPT3_5_TURBO.value,
+        provider=ModelProvider.OPENAI,
+        context_length=16_385,
+        max_output_tokens=4096,
+        input_cost_per_million=0.50,
+        output_cost_per_million=1.50,
+        description="GPT-3.5 Turbo",
+    ),
+    OpenAIChatModel.GPT4.value: ModelInfo(
+        name=OpenAIChatModel.GPT4.value,
+        provider=ModelProvider.OPENAI,
+        context_length=8192,
+        max_output_tokens=8192,
+        input_cost_per_million=30.0,
+        output_cost_per_million=60.0,
+        description="GPT-4 (8K context)",
+    ),
+    OpenAIChatModel.GPT4_TURBO.value: ModelInfo(
+        name=OpenAIChatModel.GPT4_TURBO.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=4096,
+        input_cost_per_million=10.0,
+        output_cost_per_million=30.0,
+        description="GPT-4 Turbo",
+    ),
+    OpenAIChatModel.GPT4o.value: ModelInfo(
+        name=OpenAIChatModel.GPT4o.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=16_384,
+        input_cost_per_million=2.5,
+        output_cost_per_million=10.0,
+        has_structured_output=True,
+        description="GPT-4o (128K context)",
+    ),
+    OpenAIChatModel.GPT4o_MINI.value: ModelInfo(
+        name=OpenAIChatModel.GPT4o_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=16_384,
+        input_cost_per_million=0.15,
+        output_cost_per_million=0.60,
+        has_structured_output=True,
+        description="GPT-4o Mini",
+    ),
+    OpenAIChatModel.O1.value: ModelInfo(
+        name=OpenAIChatModel.O1.value,
+        provider=ModelProvider.OPENAI,
+        context_length=200_000,
+        max_output_tokens=100_000,
+        input_cost_per_million=15.0,
+        output_cost_per_million=60.0,
+        allows_streaming=False,
+        allows_system_message=False,
+        unsupported_params=["temperature", "stream"],
+        rename_params={"max_tokens": "max_completion_tokens"},
+        has_tools=False,
+        description="O1 Reasoning LM",
+    ),
+    OpenAIChatModel.O1_MINI.value: ModelInfo(
+        name=OpenAIChatModel.O1_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=128_000,
+        max_output_tokens=65_536,
+        input_cost_per_million=1.1,
+        output_cost_per_million=4.4,
+        allows_streaming=False,
+        allows_system_message=False,
+        unsupported_params=["temperature", "stream"],
+        rename_params={"max_tokens": "max_completion_tokens"},
+        has_tools=False,
+        description="O1 Mini Reasoning LM",
+    ),
+    OpenAIChatModel.O3_MINI.value: ModelInfo(
+        name=OpenAIChatModel.O3_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=200_000,
+        max_output_tokens=100_000,
+        input_cost_per_million=1.1,
+        output_cost_per_million=4.4,
+        allows_streaming=False,
+        allows_system_message=False,
+        unsupported_params=["temperature", "stream"],
+        rename_params={"max_tokens": "max_completion_tokens"},
+        has_tools=False,
+        description="O3 Mini Reasoning LM",
+    ),
+    # Anthropic Models
+    AnthropicModel.CLAUDE_3_5_SONNET.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_5_SONNET.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=8192,
+        input_cost_per_million=3.0,
+        output_cost_per_million=15.0,
+        description="Claude 3.5 Sonnet",
+    ),
+    AnthropicModel.CLAUDE_3_OPUS.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_OPUS.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=4096,
+        input_cost_per_million=15.0,
+        output_cost_per_million=75.0,
+        description="Claude 3 Opus",
+    ),
+    AnthropicModel.CLAUDE_3_SONNET.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_SONNET.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=4096,
+        input_cost_per_million=3.0,
+        output_cost_per_million=15.0,
+        description="Claude 3 Sonnet",
+    ),
+    AnthropicModel.CLAUDE_3_HAIKU.value: ModelInfo(
+        name=AnthropicModel.CLAUDE_3_HAIKU.value,
+        provider=ModelProvider.ANTHROPIC,
+        context_length=200_000,
+        max_output_tokens=4096,
+        input_cost_per_million=0.25,
+        output_cost_per_million=1.25,
+        description="Claude 3 Haiku",
+    ),
+    # DeepSeek Models
+    DeepSeekModel.DEEPSEEK.value: ModelInfo(
+        name=DeepSeekModel.DEEPSEEK.value,
+        provider=ModelProvider.DEEPSEEK,
+        context_length=64_000,
+        max_output_tokens=8_000,
+        input_cost_per_million=0.27,
+        output_cost_per_million=1.10,
+        description="DeepSeek Chat",
+    ),
+    DeepSeekModel.DEEPSEEK_R1.value: ModelInfo(
+        name=DeepSeekModel.DEEPSEEK_R1.value,
+        provider=ModelProvider.DEEPSEEK,
+        context_length=64_000,
+        max_output_tokens=8_000,
+        input_cost_per_million=0.55,
+        output_cost_per_million=2.19,
+        description="DeepSeek-R1 Reasoning LM",
+    ),
+    # Gemini Models
+    GeminiModel.GEMINI_2_FLASH.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_FLASH.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_056_768,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.0 Flash",
+    ),
+    GeminiModel.GEMINI_1_5_FLASH.value: ModelInfo(
+        name=GeminiModel.GEMINI_1_5_FLASH.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_056_768,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 1.5 Flash",
+    ),
+    GeminiModel.GEMINI_1_5_FLASH_8B.value: ModelInfo(
+        name=GeminiModel.GEMINI_1_5_FLASH_8B.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_000_000,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 1.5 Flash 8B",
+    ),
+    GeminiModel.GEMINI_1_5_PRO.value: ModelInfo(
+        name=GeminiModel.GEMINI_1_5_PRO.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=2_000_000,
+        max_output_tokens=8192,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 1.5 Pro",
+    ),
+    GeminiModel.GEMINI_2_FLASH_THINKING.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_FLASH_THINKING.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_000_000,
+        max_output_tokens=64_000,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.0 Flash Thinking",
+    ),
+}
+def get_model_info(model: str | ModelName) -> ModelInfo:
+    """Get model information by name or enum value"""
+    if isinstance(model, str):
+        return MODEL_INFO.get(model) or ModelInfo()
+    return MODEL_INFO.get(model.value) or ModelInfo()

langroid 0.38.0__tar.gz → 0.39.1__tar.gz

langroid 0.38.0tar.gz → 0.39.1tar.gz