PyPI - langroid - Versions diffs - 0.56.11__py3-none-any.whl → 0.56.13__py3-none-any.whl - Mend

langroid 0.56.11py3-none-any.whl → 0.56.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

langroid/agent/base.py +9 -3
langroid/agent/task.py +27 -11
langroid/agent/tools/task_tool.py +71 -13
langroid/language_models/base.py +25 -19
langroid/language_models/model_info.py +57 -3
langroid/language_models/openai_gpt.py +45 -14
{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/METADATA +1 -1
{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/RECORD +10 -10
{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/WHEEL +0 -0
{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/licenses/LICENSE +0 -0

langroid/agent/base.py CHANGED Viewed

@@ -137,6 +137,7 @@ class Agent(ABC):
     def __init__(self, config: AgentConfig = AgentConfig()):
         self.config = config
+        self.id = ObjectRegistry.new_id()  # Initialize agent ID
         self.lock = asyncio.Lock()  # for async access to update self.llm.usage_cost
         self.dialog: List[Tuple[str, str]] = []  # seq of LLM (prompt, response) tuples
         self.llm_tools_map: Dict[str, Type[ToolMessage]] = {}
@@ -685,6 +686,7 @@ class Agent(ABC):
             results.metadata.tool_ids = (
                 [] if msg is None or isinstance(msg, str) else msg.metadata.tool_ids
             )
+            results.metadata.agent_id = self.id
             return results
         sender_name = self.config.name
         if isinstance(msg, ChatDocument) and msg.function_call is not None:
@@ -703,6 +705,7 @@ class Agent(ABC):
             metadata=ChatDocMetaData(
                 source=Entity.AGENT,
                 sender=Entity.AGENT,
+                agent_id=self.id,
                 sender_name=sender_name,
                 oai_tool_id=oai_tool_id,
                 # preserve trail of tool_ids for OpenAI Assistant fn-calls
@@ -967,6 +970,7 @@ class Agent(ABC):
             return ChatDocument(
                 content=user_msg,
                 metadata=ChatDocMetaData(
+                    agent_id=self.id,
                     source=source,
                     sender=sender,
                     # preserve trail of tool_ids for OpenAI Assistant fn-calls
@@ -2142,7 +2146,7 @@ class Agent(ABC):
                 completion_tokens = self.num_tokens(response.message)
                 if response.function_call is not None:
                     completion_tokens += self.num_tokens(str(response.function_call))
-                cost = self.compute_token_cost(prompt_tokens, completion_tokens)
+                cost = self.compute_token_cost(prompt_tokens, 0, completion_tokens)
             response.usage = LLMTokenUsage(
                 prompt_tokens=prompt_tokens,
                 completion_tokens=completion_tokens,
@@ -2166,9 +2170,11 @@ class Agent(ABC):
             if print_response_stats:
                 print(self.indent + self.token_stats_str)
-    def compute_token_cost(self, prompt: int, completion: int) -> float:
+    def compute_token_cost(self, prompt: int, cached: int, completion: int) -> float:
         price = cast(LanguageModel, self.llm).chat_cost()
-        return (price[0] * prompt + price[1] * completion) / 1000
+        return (
+            price[0] * (prompt - cached) + price[1] * cached + price[2] * completion
+        ) / 1000
     def ask_agent(
         self,

langroid/agent/task.py CHANGED Viewed

@@ -615,7 +615,10 @@ class Task:
             if isinstance(msg, ChatDocument):
                 # carefully deep-copy: fresh metadata.id, register
                 # as new obj in registry
+                original_parent_id = msg.metadata.parent_id
                 self.pending_message = ChatDocument.deepcopy(msg)
+                # Preserve the parent pointer from the original message
+                self.pending_message.metadata.parent_id = original_parent_id
             if self.pending_message is not None and self.caller is not None:
                 # msg may have come from `caller`, so we pretend this is from
                 # the CURRENT task's USER entity
@@ -623,7 +626,11 @@ class Task:
                 # update parent, child, agent pointers
                 if msg is not None:
                     msg.metadata.child_id = self.pending_message.metadata.id
-                    self.pending_message.metadata.parent_id = msg.metadata.id
+                    # Only override parent_id if it wasn't already set in the
+                    # original message. This preserves parent chains from TaskTool
+                    if not msg.metadata.parent_id:
+                        self.pending_message.metadata.parent_id = msg.metadata.id
+            if self.pending_message is not None:
                 self.pending_message.metadata.agent_id = self.agent.id
         self._show_pending_message_if_debug()
@@ -2250,24 +2257,33 @@ class Task:
     def _get_message_chain(
         self, msg: ChatDocument | None, max_depth: Optional[int] = None
     ) -> List[ChatDocument]:
-        """Get the chain of messages by following parent pointers."""
+        """Get the chain of messages using agent's message history."""
         if max_depth is None:
             # Get max depth needed from all sequences
             max_depth = 50  # default fallback
             if self._parsed_done_sequences:
                 max_depth = max(len(seq.events) for seq in self._parsed_done_sequences)
-        chain = []
-        current = msg
-        depth = 0
+        # Get chat document IDs from message history
+        doc_ids = [
+            m.chat_document_id for m in self.agent.message_history if m.chat_document_id
+        ]
+        # Add current message ID if it exists and is not already the last one
+        if msg:
+            msg_id = msg.id()
+            if not doc_ids or doc_ids[-1] != msg_id:
+                doc_ids.append(msg_id)
-        while current is not None and depth < max_depth:
-            chain.append(current)
-            current = current.parent
-            depth += 1
+        # Take only the last max_depth elements
+        relevant_ids = doc_ids[-max_depth:]
-        # Reverse to get chronological order (oldest first)
-        return list(reversed(chain))
+        # Convert IDs to ChatDocuments and filter out None values
+        return [
+            doc
+            for doc_id in relevant_ids
+            if (doc := ChatDocument.from_id(doc_id)) is not None
+        ]
     def _matches_event(self, actual: AgentEvent, expected: AgentEvent) -> bool:
         """Check if an actual event matches an expected event pattern."""

langroid/agent/tools/task_tool.py CHANGED Viewed

@@ -38,10 +38,13 @@ class TaskTool(ToolMessage):
     system_message: Optional[str] = Field(
         ...,
         description="""
-        Optional system message to configure the sub-agent's general behavior.
+        Optional system message to configure the sub-agent's general behavior and
+        to specify the task and its context.
             A good system message will have these components:
             - Inform the sub-agent of its role, e.g. "You are a financial analyst."
-            - Clear spec of the task
+            - Clear spec of the task, with sufficient context for the sub-agent to
+              understand what it needs to do, since the sub-agent does
+              NOT have access to your conversation history!
             - Any additional general context needed for the task, such as a
               (part of a) document, or data items, etc.
             - Specify when to use certain tools, e.g.
@@ -73,9 +76,10 @@ class TaskTool(ToolMessage):
         A list of tool names to enable for the sub-agent.
         This must be a list of strings referring to the names of tools
         that are known to you.
-        If you want to enable all tools, you can set this field
-         to a singleton list containing 'ALL'
-        To disable all tools, set it to a singleton list containing 'NONE'
+        If you want to enable all tools, or you do not have any preference
+        on what tools are enabled for the sub-agent, you can set
+        this field to a singleton list ['ALL']
+        To disable all tools, set it to a singleton list ['NONE']
         """,
     )
     # TODO: ensure valid model name
@@ -113,11 +117,20 @@ class TaskTool(ToolMessage):
         # TODO: Maybe we just copy the parent agent's config and override chat_model?
         #   -- but what if parent agent has a MockLMConfig?
         llm_config = lm.OpenAIGPTConfig(
-            chat_model=self.model or "gpt-4.1-mini",  # Default model if not specified
+            chat_model=self.model or lm.OpenAIChatModel.GPT4_1_MINI,
         )
         config = ChatAgentConfig(
             name=agent_name,
             llm=llm_config,
+            handle_llm_no_tool=f"""
+                You forgot to use one of your TOOLs! Remember that you must either:
+                - use a tool, or a sequence of tools, to complete your task, OR
+                - if you are done with your task, use the `{DoneTool.name()}` tool
+                to return the result.
+                As a reminder, this was your task:
+                {self.prompt}
+                """,
             system_message=f"""
                 {self.system_message}
@@ -138,7 +151,9 @@ class TaskTool(ToolMessage):
             tool_classes = [
                 agent.llm_tools_map[t]
                 for t in agent.llm_tools_known
-                if t in agent.llm_tools_map and t != self.request
+                if t in agent.llm_tools_map
+                and t != self.request
+                and agent.llm_tools_map[t]._allow_llm_use
                 # Exclude the TaskTool itself!
             ]
         elif self.tools == ["NONE"]:
@@ -150,6 +165,7 @@ class TaskTool(ToolMessage):
                 agent.llm_tools_map[tool_name]
                 for tool_name in self.tools
                 if tool_name in agent.llm_tools_map
+                and agent.llm_tools_map[tool_name]._allow_llm_use
             ]
         # always enable the DoneTool to signal task completion
@@ -160,7 +176,9 @@ class TaskTool(ToolMessage):
         return task
-    def handle(self, agent: ChatAgent) -> Optional[ChatDocument]:
+    def handle(
+        self, agent: ChatAgent, chat_doc: Optional[ChatDocument] = None
+    ) -> Optional[ChatDocument]:
         """
         Handle the TaskTool by creating a sub-agent with specified tools
@@ -168,24 +186,64 @@ class TaskTool(ToolMessage):
         Args:
             agent: The parent ChatAgent that is handling this tool
+            chat_doc: The ChatDocument containing this tool message
         """
         task = self._set_up_task(agent)
-        # Run the task on the prompt, and return the result
-        result = task.run(self.prompt, turns=self.max_iterations or 10)
+        # Create a ChatDocument for the prompt with parent pointer
+        prompt_doc = None
+        if chat_doc is not None:
+            from langroid.agent.chat_document import ChatDocMetaData
+            prompt_doc = ChatDocument(
+                content=self.prompt,
+                metadata=ChatDocMetaData(
+                    parent_id=chat_doc.id(),
+                    agent_id=agent.id,
+                    sender=chat_doc.metadata.sender,
+                ),
+            )
+            # Set bidirectional parent-child relationship
+            chat_doc.metadata.child_id = prompt_doc.id()
+        # Run the task with the ChatDocument or string prompt
+        result = task.run(prompt_doc or self.prompt, turns=self.max_iterations or 10)
         return result
-    async def handle_async(self, agent: ChatAgent) -> Optional[ChatDocument]:
+    async def handle_async(
+        self, agent: ChatAgent, chat_doc: Optional[ChatDocument] = None
+    ) -> Optional[ChatDocument]:
         """
         Async method to handle the TaskTool by creating a sub-agent with specified tools
         and running the task non-interactively.
         Args:
             agent: The parent ChatAgent that is handling this tool
+            chat_doc: The ChatDocument containing this tool message
         """
         task = self._set_up_task(agent)
-        # Run the task on the prompt, and return the result
+        # Create a ChatDocument for the prompt with parent pointer
+        prompt_doc = None
+        if chat_doc is not None:
+            from langroid.agent.chat_document import ChatDocMetaData
+            prompt_doc = ChatDocument(
+                content=self.prompt,
+                metadata=ChatDocMetaData(
+                    parent_id=chat_doc.id(),
+                    agent_id=agent.id,
+                    sender=chat_doc.metadata.sender,
+                ),
+            )
+            # Set bidirectional parent-child relationship
+            chat_doc.metadata.child_id = prompt_doc.id()
+        # Run the task with the ChatDocument or string prompt
         # TODO eventually allow the various task setup configs,
         #  including termination conditions
-        result = await task.run_async(self.prompt, turns=self.max_iterations or 10)
+        result = await task.run_async(
+            prompt_doc or self.prompt, turns=self.max_iterations or 10
+        )
         return result

langroid/language_models/base.py CHANGED Viewed

@@ -91,10 +91,6 @@ class LLMConfig(BaseSettings):
     # reasoning output from reasoning models
     cache_config: None | CacheDBConfig = RedisCacheConfig()
     thought_delimiters: Tuple[str, str] = ("<think>", "</think>")
-    # Dict of model -> (input/prompt cost, output/completion cost)
-    chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
-    completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
     retry_params: RetryParams = RetryParams()
     @property
@@ -131,7 +127,7 @@ class LLMFunctionCall(BaseModel):
             if not isinstance(dict_or_list, dict):
                 raise ValueError(
                     f"""
-                        Invalid function args: {fun_args_str}
+                        Invalid function args: {fun_args_str}
                         parsed as {dict_or_list},
                         which is not a valid dict.
                         """
@@ -224,12 +220,14 @@ class LLMTokenUsage(BaseModel):
     """
     prompt_tokens: int = 0
+    cached_tokens: int = 0
     completion_tokens: int = 0
     cost: float = 0.0
     calls: int = 0  # how many API calls - not used as of 2025-04-04
     def reset(self) -> None:
         self.prompt_tokens = 0
+        self.cached_tokens = 0
         self.completion_tokens = 0
         self.cost = 0.0
         self.calls = 0
@@ -237,7 +235,8 @@ class LLMTokenUsage(BaseModel):
     def __str__(self) -> str:
         return (
             f"Tokens = "
-            f"(prompt {self.prompt_tokens}, completion {self.completion_tokens}), "
+            f"(prompt {self.prompt_tokens}, cached {self.cached_tokens}, "
+            f"completion {self.completion_tokens}), "
             f"Cost={self.cost}, Calls={self.calls}"
         )
@@ -462,9 +461,9 @@ class LanguageModel(ABC):
         if type(config) is LLMConfig:
             raise ValueError(
                 """
-                Cannot create a Language Model object from LLMConfig.
-                Please specify a specific subclass of LLMConfig e.g.,
-                OpenAIGPTConfig. If you are creating a ChatAgent from
+                Cannot create a Language Model object from LLMConfig.
+                Please specify a specific subclass of LLMConfig e.g.,
+                OpenAIGPTConfig. If you are creating a ChatAgent from
                 a ChatAgentConfig, please specify the `llm` field of this config
                 as a specific subclass of LLMConfig, e.g., OpenAIGPTConfig.
                 """
@@ -666,8 +665,15 @@ class LanguageModel(ABC):
     def completion_context_length(self) -> int:
         return self.config.completion_context_length or DEFAULT_CONTEXT_LENGTH
-    def chat_cost(self) -> Tuple[float, float]:
-        return self.config.chat_cost_per_1k_tokens
+    def chat_cost(self) -> Tuple[float, float, float]:
+        """
+        Return the cost per 1000 tokens for chat completions.
+        Returns:
+            Tuple[float, float, float]: (input_cost, cached_cost, output_cost)
+                per 1000 tokens
+        """
+        return (0.0, 0.0, 0.0)
     def reset_usage_cost(self) -> None:
         for mdl in [self.config.chat_model, self.config.completion_model]:
@@ -754,18 +760,18 @@ class LanguageModel(ABC):
         prompt = f"""
         You are an expert at understanding a CHAT HISTORY between an AI Assistant
-        and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
-        QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
+        and a User, and you are highly skilled in rephrasing the User's FOLLOW-UP
+        QUESTION/REQUEST as a STANDALONE QUESTION/REQUEST that can be understood
         WITHOUT the context of the chat history.
-        Below is the CHAT HISTORY. When the User asks you to rephrase a
-        FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
-        question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
+        Below is the CHAT HISTORY. When the User asks you to rephrase a
+        FOLLOW-UP QUESTION/REQUEST, your ONLY task is to simply return the
+        question REPHRASED as a STANDALONE QUESTION/REQUEST, without any additional
         text or context.
         <CHAT_HISTORY>
         {history}
-        </CHAT_HISTORY>
+        </CHAT_HISTORY>
         """.strip()
         follow_up_question = f"""

langroid/language_models/model_info.py CHANGED Viewed

@@ -69,7 +69,9 @@ class GeminiModel(ModelName):
     GEMINI_1_5_FLASH = "gemini-1.5-flash"
     GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
     GEMINI_1_5_PRO = "gemini-1.5-pro"
-    GEMINI_2_5_PRO = "gemini-2.5-pro-exp-02-05"
+    GEMINI_2_5_PRO = "gemini-2.5-pro"
+    GEMINI_2_5_FLASH = "gemini-2.5-flash"
+    GEMINI_2_5_FLASH_LITE_PREVIEW = "gemini-2.5-flash-lite-preview-06-17"
     GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
     GEMINI_2_FLASH = "gemini-2.0-flash"
     GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
@@ -108,6 +110,7 @@ class ModelInfo(BaseModel):
     max_cot_tokens: int = 0  # max chain of thought (thinking) tokens where applicable
     max_output_tokens: int = 8192  # Maximum number of output tokens - model dependent
     input_cost_per_million: float = 0.0  # Cost in USD per million input tokens
+    cached_cost_per_million: float = 0.0  # Cost in USD per million cached tokens
     output_cost_per_million: float = 0.0  # Cost in USD per million output tokens
     allows_streaming: bool = True  # Whether model supports streaming output
     allows_system_message: bool = True  # Whether model supports system messages
@@ -173,6 +176,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=1_047_576,
         max_output_tokens=32_768,
         input_cost_per_million=0.10,
+        cached_cost_per_million=0.025,
         output_cost_per_million=0.40,
         description="GPT-4.1",
     ),
@@ -182,6 +186,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=1_047_576,
         max_output_tokens=32_768,
         input_cost_per_million=0.40,
+        cached_cost_per_million=0.10,
         output_cost_per_million=1.60,
         description="GPT-4.1 Mini",
     ),
@@ -191,6 +196,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=1_047_576,
         max_output_tokens=32_768,
         input_cost_per_million=2.00,
+        cached_cost_per_million=0.50,
         output_cost_per_million=8.00,
         description="GPT-4.1",
     ),
@@ -200,6 +206,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=128_000,
         max_output_tokens=16_384,
         input_cost_per_million=2.5,
+        cached_cost_per_million=1.25,
         output_cost_per_million=10.0,
         has_structured_output=True,
         description="GPT-4o (128K context)",
@@ -210,6 +217,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=128_000,
         max_output_tokens=16_384,
         input_cost_per_million=0.15,
+        cached_cost_per_million=0.075,
         output_cost_per_million=0.60,
         has_structured_output=True,
         description="GPT-4o Mini",
@@ -220,6 +228,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=100_000,
         input_cost_per_million=15.0,
+        cached_cost_per_million=7.50,
         output_cost_per_million=60.0,
         allows_streaming=True,
         allows_system_message=False,
@@ -233,8 +242,9 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         provider=ModelProvider.OPENAI,
         context_length=200_000,
         max_output_tokens=100_000,
-        input_cost_per_million=10.0,
-        output_cost_per_million=40.0,
+        input_cost_per_million=2.0,
+        cached_cost_per_million=0.50,
+        output_cost_per_million=8.0,
         allows_streaming=True,
         allows_system_message=False,
         unsupported_params=["temperature"],
@@ -248,6 +258,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=128_000,
         max_output_tokens=65_536,
         input_cost_per_million=1.1,
+        cached_cost_per_million=0.55,
         output_cost_per_million=4.4,
         allows_streaming=False,
         allows_system_message=False,
@@ -262,6 +273,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=100_000,
         input_cost_per_million=1.1,
+        cached_cost_per_million=0.55,
         output_cost_per_million=4.4,
         allows_streaming=False,
         allows_system_message=False,
@@ -276,6 +288,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=100_000,
         input_cost_per_million=1.10,
+        cached_cost_per_million=0.275,
         output_cost_per_million=4.40,
         allows_streaming=False,
         allows_system_message=False,
@@ -291,6 +304,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=8192,
         input_cost_per_million=3.0,
+        cached_cost_per_million=0.30,
         output_cost_per_million=15.0,
         description="Claude 3.5 Sonnet",
     ),
@@ -300,6 +314,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=4096,
         input_cost_per_million=15.0,
+        cached_cost_per_million=1.50,
         output_cost_per_million=75.0,
         description="Claude 3 Opus",
     ),
@@ -309,6 +324,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=4096,
         input_cost_per_million=3.0,
+        cached_cost_per_million=0.30,
         output_cost_per_million=15.0,
         description="Claude 3 Sonnet",
     ),
@@ -318,6 +334,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=200_000,
         max_output_tokens=4096,
         input_cost_per_million=0.25,
+        cached_cost_per_million=0.03,
         output_cost_per_million=1.25,
         description="Claude 3 Haiku",
     ),
@@ -328,6 +345,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=64_000,
         max_output_tokens=8_000,
         input_cost_per_million=0.27,
+        cached_cost_per_million=0.07,
         output_cost_per_million=1.10,
         description="DeepSeek Chat",
     ),
@@ -337,6 +355,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=64_000,
         max_output_tokens=8_000,
         input_cost_per_million=0.55,
+        cached_cost_per_million=0.14,
         output_cost_per_million=2.19,
         description="DeepSeek-R1 Reasoning LM",
     ),
@@ -347,6 +366,7 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         context_length=1_056_768,
         max_output_tokens=8192,
         input_cost_per_million=0.10,
+        cached_cost_per_million=0.025,
         output_cost_per_million=0.40,
         rename_params={"max_tokens": "max_completion_tokens"},
         description="Gemini 2.0 Flash",
@@ -401,6 +421,40 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         rename_params={"max_tokens": "max_completion_tokens"},
         description="Gemini 2.0 Flash Thinking",
     ),
+    # Gemini 2.5 Models
+    GeminiModel.GEMINI_2_5_PRO.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_5_PRO.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_048_576,
+        max_output_tokens=65_536,
+        input_cost_per_million=1.25,
+        cached_cost_per_million=0.31,
+        output_cost_per_million=10.0,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.5 Pro",
+    ),
+    GeminiModel.GEMINI_2_5_FLASH.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_5_FLASH.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=1_048_576,
+        max_output_tokens=65_536,
+        input_cost_per_million=0.30,
+        cached_cost_per_million=0.075,
+        output_cost_per_million=2.50,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.5 Flash",
+    ),
+    GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value: ModelInfo(
+        name=GeminiModel.GEMINI_2_5_FLASH_LITE_PREVIEW.value,
+        provider=ModelProvider.GOOGLE,
+        context_length=65_536,
+        max_output_tokens=65_536,
+        input_cost_per_million=0.10,
+        cached_cost_per_million=0.025,
+        output_cost_per_million=0.40,
+        rename_params={"max_tokens": "max_completion_tokens"},
+        description="Gemini 2.5 Flash Lite Preview",
+    ),
 }

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -766,14 +766,21 @@ class OpenAIGPT(LanguageModel):
             or self.completion_info().context_length
         )
-    def chat_cost(self) -> Tuple[float, float]:
+    def chat_cost(self) -> Tuple[float, float, float]:
         """
-        (Prompt, Generation) cost per 1000 tokens, for chat-completion
+        (Prompt, Cached, Generation) cost per 1000 tokens, for chat-completion
         models/endpoints.
         Get it from the dict, otherwise fail-over to general method
         """
         info = self.info()
-        return (info.input_cost_per_million / 1000, info.output_cost_per_million / 1000)
+        cached_cost_per_million = info.cached_cost_per_million
+        if not cached_cost_per_million:
+            cached_cost_per_million = info.input_cost_per_million
+        return (
+            info.input_cost_per_million / 1000,
+            cached_cost_per_million / 1000,
+            info.output_cost_per_million / 1000,
+        )
     def set_stream(self, stream: bool) -> bool:
         """Enable or disable streaming output from API.
@@ -1429,6 +1436,16 @@ class OpenAIGPT(LanguageModel):
             # and the reasoning may be included in the message content
             # within delimiters like <think> ... </think>
             reasoning, completion = self.get_reasoning_final(completion)
+        prompt_tokens = usage.get("prompt_tokens", 0)
+        prompt_tokens_details: Any = usage.get("prompt_tokens_details", {})
+        cached_tokens = (
+            prompt_tokens_details.get("cached_tokens", 0)
+            if isinstance(prompt_tokens_details, dict)
+            else 0
+        )
+        completion_tokens = usage.get("completion_tokens", 0)
         return (
             LLMResponse(
                 message=completion,
@@ -1438,11 +1455,13 @@ class OpenAIGPT(LanguageModel):
                 oai_tool_calls=tool_calls or None if len(tool_deltas) > 0 else None,
                 function_call=function_call if has_function else None,
                 usage=LLMTokenUsage(
-                    prompt_tokens=usage.get("prompt_tokens", 0),
-                    completion_tokens=usage.get("completion_tokens", 0),
+                    prompt_tokens=prompt_tokens,
+                    cached_tokens=cached_tokens,
+                    completion_tokens=completion_tokens,
                     cost=self._cost_chat_model(
-                        usage.get("prompt_tokens", 0),
-                        usage.get("completion_tokens", 0),
+                        prompt_tokens,
+                        cached_tokens,
+                        completion_tokens,
                     ),
                 ),
             ),
@@ -1479,9 +1498,11 @@ class OpenAIGPT(LanguageModel):
             return hashed_key, None
         return hashed_key, cached_val
-    def _cost_chat_model(self, prompt: int, completion: int) -> float:
+    def _cost_chat_model(self, prompt: int, cached: int, completion: int) -> float:
         price = self.chat_cost()
-        return (price[0] * prompt + price[1] * completion) / 1000
+        return (
+            price[0] * (prompt - cached) + price[1] * cached + price[2] * completion
+        ) / 1000
     def _get_non_stream_token_usage(
         self, cached: bool, response: Dict[str, Any]
@@ -1499,14 +1520,24 @@ class OpenAIGPT(LanguageModel):
         """
         cost = 0.0
         prompt_tokens = 0
+        cached_tokens = 0
         completion_tokens = 0
-        if not cached and not self.get_stream() and response["usage"] is not None:
-            prompt_tokens = response["usage"]["prompt_tokens"] or 0
-            completion_tokens = response["usage"]["completion_tokens"] or 0
-            cost = self._cost_chat_model(prompt_tokens, completion_tokens)
+        usage = response.get("usage")
+        if not cached and not self.get_stream() and usage is not None:
+            prompt_tokens = usage.get("prompt_tokens") or 0
+            prompt_tokens_details = usage.get("prompt_tokens_details", {})
+            cached_tokens = prompt_tokens_details.get("cached_tokens") or 0
+            completion_tokens = usage.get("completion_tokens") or 0
+            cost = self._cost_chat_model(
+                prompt_tokens, cached_tokens, completion_tokens
+            )
         return LLMTokenUsage(
-            prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, cost=cost
+            prompt_tokens=prompt_tokens,
+            cached_tokens=cached_tokens,
+            completion_tokens=completion_tokens,
+            cost=cost,
         )
     def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse:

{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.56.11
+Version: 0.56.13
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT

{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/RECORD RENAMED Viewed

@@ -3,13 +3,13 @@ langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
 langroid/mytypes.py,sha256=HIcYAqGeA9OK0Hlscym2FI5Oax9QFljDZoVgRlomhRk,4014
 langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
-langroid/agent/base.py,sha256=GVE_vdtDUJpldACH4LQwjqbQ11UDn9thr2-uBXk0RjU,86009
+langroid/agent/base.py,sha256=exiOhO0L1StZ8ziPBnunHYiKFvEnRyaLnMpBrc8tyMw,86263
 langroid/agent/batch.py,sha256=wpE9RqCNDVDhAXkCB7wEqfCIEAi6qKcrhaZ-Zr9T4C0,21375
 langroid/agent/chat_agent.py,sha256=pBnLGlAA6d2MK_1qa4GyhFZHnDf_RrUDli7__PKRRz4,88956
 langroid/agent/chat_document.py,sha256=0e6zYkqIorMIVbCsxOul9ziwAPPOWDsBsRV9E8ux-WI,18055
 langroid/agent/done_sequence_parser.py,sha256=oUPzQCkkAo-5qos3ndSV47Lre7O_LoGWwTybjE9sCwc,4381
 langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
-langroid/agent/task.py,sha256=PbHVcyFgyxiMZXHOB5xr5t8qaeACYfrjNF_lZQc8d8Y,101308
+langroid/agent/task.py,sha256=Fzqu4TbKKIO8CZr9eyppYjhRomMVkppb81ig98U4iHs,102170
 langroid/agent/tool_message.py,sha256=BhjP-_TfQ2tgxuY4Yo_JHLOwwt0mJ4BwjPnREvEY4vk,14744
 langroid/agent/xml_tool_message.py,sha256=oeBKnJNoGaKdtz39XoWGMTNlVyXew2MWH5lgtYeh8wQ,15496
 langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -54,7 +54,7 @@ langroid/agent/tools/recipient_tool.py,sha256=dr0yTxgNEIoxUYxH6TtaExC4G_8WdJ0xGo
 langroid/agent/tools/retrieval_tool.py,sha256=zcAV20PP_6VzSd-UE-IJcabaBseFL_QNz59Bnig8-lE,946
 langroid/agent/tools/rewind_tool.py,sha256=XAXL3BpNhCmBGYq_qi_sZfHJuIw7NY2jp4wnojJ7WRs,5606
 langroid/agent/tools/segment_extract_tool.py,sha256=__srZ_VGYLVOdPrITUM8S0HpmX4q7r5FHWMDdHdEv8w,1440
-langroid/agent/tools/task_tool.py,sha256=2qg3oRDYjF93pi7J0gwgBjPL4R9ByKH6yMwD-Cx8voo,7325
+langroid/agent/tools/task_tool.py,sha256=Z56QzELMNhU5TWGzI4MgxyYnw__6e75ZhCzBJ0lveqA,9686
 langroid/agent/tools/tavily_search_tool.py,sha256=soI-j0HdgVQLf09wRQScaEK4b5RpAX9C4cwOivRFWWI,1903
 langroid/agent/tools/mcp/__init__.py,sha256=DJNM0VeFnFS3pJKCyFGggT8JVjVu0rBzrGzasT1HaSM,387
 langroid/agent/tools/mcp/decorators.py,sha256=h7dterhsmvWJ8q4mp_OopmuG2DF71ty8cZwOyzdDZuk,1127
@@ -73,12 +73,12 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
 langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
 langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
 langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
-langroid/language_models/base.py,sha256=OlPgmhQS2o3Y5DLoO1IEBUp0kIOeQdYsZsd25sz7DY8,28485
+langroid/language_models/base.py,sha256=r0MckcZGmuv_opKR2xvjzOz94mmWCzn9LJKgqyBjJ7c,28559
 langroid/language_models/client_cache.py,sha256=YtGcpalYkS_ckMU12J7VmUOGmVv1wzuLUBxgIagcpmA,6896
 langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
 langroid/language_models/mock_lm.py,sha256=tA9JpURznsMZ59iRhFYMmaYQzAc0D0BT-PiJIV58sAk,4079
-langroid/language_models/model_info.py,sha256=0e011vJZMi7XU9OkKT6doxlybrNJfMlP54klLDDNgFg,14939
-langroid/language_models/openai_gpt.py,sha256=DzrB3wmyrcn6vnpqHCEHeTnYqPu9B3vQ4e-YJyyUGF4,88216
+langroid/language_models/model_info.py,sha256=LzRfZsWmOm7WF6KGJfcN0aVdRqk0URNuDGMMz6cFt50,17121
+langroid/language_models/openai_gpt.py,sha256=FFiJa9_j_bTiA8SzBv7xssuc7LGxT_TI7Pcg8XLJnzE,89230
 langroid/language_models/provider_params.py,sha256=fX25NAmYUIc1-nliMKpmTGZO6D6RpyTXtSDdZCZdb5w,5464
 langroid/language_models/utils.py,sha256=n55Oe2_V_4VNGhytvPWLYC-0tFS07RTjN83KWl-p_MI,6032
 langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
@@ -138,7 +138,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
 langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
 langroid/vector_store/qdrantdb.py,sha256=ZYrT9mxoUCx_67Qzb5xnkWuFG12rfe30yAg4NgG2ueA,19168
 langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
-langroid-0.56.11.dist-info/METADATA,sha256=tS2wldg_jCedvv3CkX5MiaEvpbNjg3Gj7zUlLuiK7z8,65745
-langroid-0.56.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.56.11.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.56.11.dist-info/RECORD,,
+langroid-0.56.13.dist-info/METADATA,sha256=W_sS_4htB4FnkLzrVk0KYhaRSpkK-rUEnFY1f-t0bAo,65745
+langroid-0.56.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.56.13.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.56.13.dist-info/RECORD,,

{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.56.11.dist-info → langroid-0.56.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.56.11__py3-none-any.whl → 0.56.13__py3-none-any.whl

langroid 0.56.11py3-none-any.whl → 0.56.13py3-none-any.whl