PyPI - langroid - Versions diffs - 0.50.2__py3-none-any.whl → 0.50.3__py3-none-any.whl - Mend

langroid 0.50.2py3-none-any.whl → 0.50.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

langroid/agent/base.py CHANGED Viewed

@@ -1929,10 +1929,13 @@ class Agent(ABC):
         print_response_stats: bool = True,
     ) -> None:
         """
-        Updates `response.usage` obj (token usage and cost fields).the usage memebr
-        It updates the cost after checking the cache and updates the
-        tokens (prompts and completion) if the response stream is True, because OpenAI
-        doesn't returns these fields.
+        Updates `response.usage` obj (token usage and cost fields) if needed.
+        An update is needed only if:
+        - stream is True (i.e. streaming was enabled), and
+        - the response was NOT obtained from cached, and
+        - the API did NOT provide the usage/cost fields during streaming
+          (As of Sep 2024, the OpenAI API started providing these; for other APIs
+            this may not necessarily be the case).
         Args:
             response (LLMResponse): LLMResponse object
@@ -1945,10 +1948,11 @@ class Agent(ABC):
         if response is None or self.llm is None:
             return
+        no_usage_info = response.usage is None or response.usage.prompt_tokens == 0
         # Note: If response was not streamed, then
         # `response.usage` would already have been set by the API,
         # so we only need to update in the stream case.
-        if stream:
+        if stream and no_usage_info:
             # usage, cost = 0 when response is from cache
             prompt_tokens = 0
             completion_tokens = 0

langroid/agent/special/doc_chat_task.py ADDED Viewed

File without changes

langroid/language_models/base.py CHANGED Viewed

@@ -216,7 +216,7 @@ class LLMTokenUsage(BaseModel):
     prompt_tokens: int = 0
     completion_tokens: int = 0
     cost: float = 0.0
-    calls: int = 0  # how many API calls
+    calls: int = 0  # how many API calls - not used as of 2025-04-04
     def reset(self) -> None:
         self.prompt_tokens = 0

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -780,22 +780,39 @@ class OpenAIGPT(LanguageModel):
         reasoning: str = "",
         function_args: str = "",
         function_name: str = "",
-    ) -> Tuple[bool, bool, str, str]:
+    ) -> Tuple[bool, bool, str, str, Dict[str, int]]:
         """Process state vars while processing a streaming API response.
             Returns a tuple consisting of:
         - is_break: whether to break out of the loop
         - has_function: whether the response contains a function_call
         - function_name: name of the function
         - function_args: args of the function
+        - completion: completion text
+        - reasoning: reasoning text
+        - usage: usage dict
         """
         # convert event obj (of type ChatCompletionChunk) to dict so rest of code,
         # which expects dicts, works as it did before switching to openai v1.x
         if not isinstance(event, dict):
             event = event.model_dump()
+        usage = event.get("usage", {}) or {}
         choices = event.get("choices", [{}])
-        if len(choices) == 0:
+        if choices is None or len(choices) == 0:
             choices = [{}]
+        if len(usage) > 0 and len(choices[0]) == 0:
+            # we have a "usage" chunk, and empty choices, so we're done
+            # ASSUMPTION: a usage chunk ONLY arrives AFTER all normal completion text!
+            # If any API does not follow this, we need to change this code.
+            return (
+                True,
+                has_function,
+                function_name,
+                function_args,
+                completion,
+                reasoning,
+                usage,
+            )
         event_args = ""
         event_fn_name = ""
         event_tool_deltas: Optional[List[Dict[str, Any]]] = None
@@ -876,23 +893,23 @@ class OpenAIGPT(LanguageModel):
                     self.config.streamer(tool_fn_args, StreamEventType.TOOL_ARGS)
         # show this delta in the stream
-        if finish_reason in [
+        is_break = finish_reason in [
             "stop",
             "function_call",
             "tool_calls",
-        ]:
-            # for function_call, finish_reason does not necessarily
-            # contain "function_call" as mentioned in the docs.
-            # So we check for "stop" or "function_call" here.
-            return (
-                True,
-                has_function,
-                function_name,
-                function_args,
-                completion,
-                reasoning,
-            )
-        return False, has_function, function_name, function_args, completion, reasoning
+        ]
+        # for function_call, finish_reason does not necessarily
+        # contain "function_call" as mentioned in the docs.
+        # So we check for "stop" or "function_call" here.
+        return (
+            is_break,
+            has_function,
+            function_name,
+            function_args,
+            completion,
+            reasoning,
+            usage,
+        )
     @no_type_check
     async def _process_stream_event_async(
@@ -912,15 +929,30 @@ class OpenAIGPT(LanguageModel):
         - has_function: whether the response contains a function_call
         - function_name: name of the function
         - function_args: args of the function
+        - completion: completion text
+        - reasoning: reasoning text
+        - usage: usage dict
         """
         # convert event obj (of type ChatCompletionChunk) to dict so rest of code,
         # which expects dicts, works as it did before switching to openai v1.x
         if not isinstance(event, dict):
             event = event.model_dump()
+        usage = event.get("usage", {}) or {}
         choices = event.get("choices", [{}])
         if len(choices) == 0:
             choices = [{}]
+        if len(usage) > 0 and len(choices[0]) == 0:
+            # we got usage chunk, and empty choices, so we're done
+            return (
+                True,
+                has_function,
+                function_name,
+                function_args,
+                completion,
+                reasoning,
+                usage,
+            )
         event_args = ""
         event_fn_name = ""
         event_tool_deltas: Optional[List[Dict[str, Any]]] = None
@@ -996,23 +1028,23 @@ class OpenAIGPT(LanguageModel):
                     )
         # show this delta in the stream
-        if choices[0].get("finish_reason", "") in [
+        is_break = choices[0].get("finish_reason", "") in [
             "stop",
             "function_call",
             "tool_calls",
-        ]:
-            # for function_call, finish_reason does not necessarily
-            # contain "function_call" as mentioned in the docs.
-            # So we check for "stop" or "function_call" here.
-            return (
-                True,
-                has_function,
-                function_name,
-                function_args,
-                completion,
-                reasoning,
-            )
-        return False, has_function, function_name, function_args, completion, reasoning
+        ]
+        # for function_call, finish_reason does not necessarily
+        # contain "function_call" as mentioned in the docs.
+        # So we check for "stop" or "function_call" here.
+        return (
+            is_break,
+            has_function,
+            function_name,
+            function_args,
+            completion,
+            reasoning,
+            usage,
+        )
     @retry_with_exponential_backoff
     def _stream_response(  # type: ignore
@@ -1038,6 +1070,8 @@ class OpenAIGPT(LanguageModel):
         sys.stdout.flush()
         has_function = False
         tool_deltas: List[Dict[str, Any]] = []
+        token_usage: Dict[str, int] = {}
+        done: bool = False
         try:
             for event in response:
                 (
@@ -1047,6 +1081,7 @@ class OpenAIGPT(LanguageModel):
                     function_args,
                     completion,
                     reasoning,
+                    usage,
                 ) = self._process_stream_event(
                     event,
                     chat=chat,
@@ -1057,8 +1092,17 @@ class OpenAIGPT(LanguageModel):
                     function_args=function_args,
                     function_name=function_name,
                 )
+                if len(usage) > 0:
+                    # capture the token usage when non-empty
+                    token_usage = usage
                 if is_break:
-                    break
+                    if not self.get_stream() or done:
+                        # if not streaming, then we don't wait for last "usage" chunk
+                        break
+                    else:
+                        # mark done, so we quit after the last "usage" chunk
+                        done = True
         except Exception as e:
             logging.warning("Error while processing stream response: %s", str(e))
@@ -1073,6 +1117,7 @@ class OpenAIGPT(LanguageModel):
             reasoning=reasoning,
             function_args=function_args,
             function_name=function_name,
+            usage=token_usage,
         )
     @async_retry_with_exponential_backoff
@@ -1100,6 +1145,8 @@ class OpenAIGPT(LanguageModel):
         sys.stdout.flush()
         has_function = False
         tool_deltas: List[Dict[str, Any]] = []
+        token_usage: Dict[str, int] = {}
+        done: bool = False
         try:
             async for event in response:
                 (
@@ -1109,6 +1156,7 @@ class OpenAIGPT(LanguageModel):
                     function_args,
                     completion,
                     reasoning,
+                    usage,
                 ) = await self._process_stream_event_async(
                     event,
                     chat=chat,
@@ -1119,8 +1167,17 @@ class OpenAIGPT(LanguageModel):
                     function_args=function_args,
                     function_name=function_name,
                 )
+                if len(usage) > 0:
+                    # capture the token usage when non-empty
+                    token_usage = usage
                 if is_break:
-                    break
+                    if not self.get_stream() or done:
+                        # if not streaming, then we don't wait for last "usage" chunk
+                        break
+                    else:
+                        # mark done, so we quit after the next "usage" chunk
+                        done = True
         except Exception as e:
             logging.warning("Error while processing stream response: %s", str(e))
@@ -1135,6 +1192,7 @@ class OpenAIGPT(LanguageModel):
             reasoning=reasoning,
             function_args=function_args,
             function_name=function_name,
+            usage=token_usage,
         )
     @staticmethod
@@ -1272,6 +1330,7 @@ class OpenAIGPT(LanguageModel):
         reasoning: str = "",
         function_args: str = "",
         function_name: str = "",
+        usage: Dict[str, int] = {},
     ) -> Tuple[LLMResponse, Dict[str, Any]]:
         """
         Create an LLMResponse object from the streaming API response.
@@ -1281,8 +1340,10 @@ class OpenAIGPT(LanguageModel):
             tool_deltas: list of tool deltas received from streaming API
             has_function: whether the response contains a function_call
             completion: completion text
+            reasoning: reasoning text
             function_args: string representing function args
             function_name: name of the function
+            usage: token usage dict
         Returns:
             Tuple consisting of:
                 LLMResponse object (with message, usage),
@@ -1347,6 +1408,14 @@ class OpenAIGPT(LanguageModel):
                 # don't allow empty list [] here
                 oai_tool_calls=tool_calls or None if len(tool_deltas) > 0 else None,
                 function_call=function_call if has_function else None,
+                usage=LLMTokenUsage(
+                    prompt_tokens=usage.get("prompt_tokens", 0),
+                    completion_tokens=usage.get("completion_tokens", 0),
+                    cost=self._cost_chat_model(
+                        usage.get("prompt_tokens", 0),
+                        usage.get("completion_tokens", 0),
+                    ),
+                ),
             ),
             openai_response.dict(),
         )
@@ -1833,6 +1902,14 @@ class OpenAIGPT(LanguageModel):
             max_tokens=max_tokens,
             stream=self.get_stream(),
         )
+        if self.get_stream():
+            args.update(
+                dict(
+                    # get token-usage numbers in stream mode from OpenAI API,
+                    # and possibly other OpenAI-compatible APIs.
+                    stream_options=dict(include_usage=True),
+                )
+            )
         args.update(self._openai_api_call_params(args))
         # only include functions-related args if functions are provided
         # since the OpenAI API will throw an error if `functions` is None or []

{langroid-0.50.2.dist-info → langroid-0.50.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.50.2
+Version: 0.50.3
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT

{langroid-0.50.2.dist-info → langroid-0.50.3.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
 langroid/mytypes.py,sha256=HIcYAqGeA9OK0Hlscym2FI5Oax9QFljDZoVgRlomhRk,4014
 langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
-langroid/agent/base.py,sha256=U-UjdpxIFqkzRIB5-LYwHrhMSNI3sDbfnNRqIhrtsyI,79568
+langroid/agent/base.py,sha256=bs5OLCf534mhsdR7Rgf27GqVNuSV2bOVbD46Y86mGFA,79829
 langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
 langroid/agent/chat_agent.py,sha256=Z53oleOUcOXVs_UL90spttGoAooe0mrx3tDtOuhKVms,85214
 langroid/agent/chat_document.py,sha256=xzMtrPbaW-Y-BnF7kuhr2dorsD-D5rMWzfOqJ8HAoo8,17885
@@ -15,6 +15,7 @@ langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
 langroid/agent/callbacks/chainlit.py,sha256=UHB6P_J40vsVnssosqkpkOVWRf9NK4TOY0_G2g_Arsg,20900
 langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
 langroid/agent/special/doc_chat_agent.py,sha256=dOL9Y0xAslkwepCdKU8Dc1m5Vk8qgk-gLbU4JzsmTII,65234
+langroid/agent/special/doc_chat_task.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
 langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
 langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
@@ -68,11 +69,11 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
 langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
 langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
 langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
-langroid/language_models/base.py,sha256=mDYmFCBCLdq8_Uvws4MiewwEgcOCP8Qb0e5yUXr3zpQ,26249
+langroid/language_models/base.py,sha256=aCEHqmxNM2CD5mt3SyMi7Mf8R4IjkyFwGX-IAUqjxmM,26277
 langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
 langroid/language_models/mock_lm.py,sha256=5BgHKDVRWFbUwDT_PFgTZXz9-k8wJSA2e3PZmyDgQ1k,4022
 langroid/language_models/model_info.py,sha256=tfBBxL0iUf2mVN6CjcvqflzFUVg2oZqOJZexZ8jHTYA,12216
-langroid/language_models/openai_gpt.py,sha256=M_jp97Ksp5r3U-d0jCLPLjVmn7IK1mC8Ry4t7k6A5tc,82906
+langroid/language_models/openai_gpt.py,sha256=yNfiWxhH5BxA_mKiw69D3L4Bu__agI6WVg80IF3P5UI,85785
 langroid/language_models/utils.py,sha256=L4_CbihDMTGcsg0TOG1Yd5JFEto46--h7CX_14m89sQ,5016
 langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
 langroid/language_models/prompt_formatter/base.py,sha256=eDS1sgRNZVnoajwV_ZIha6cba5Dt8xjgzdRbPITwx3Q,1221
@@ -128,7 +129,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
 langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
 langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
 langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
-langroid-0.50.2.dist-info/METADATA,sha256=ttDoi8hgjYIzOjj_bHVN8IMtJcN574iXw4ZN4_q1VUQ,63641
-langroid-0.50.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.50.2.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.50.2.dist-info/RECORD,,
+langroid-0.50.3.dist-info/METADATA,sha256=5c4f7md0dqoJqMQuCBZwh3HBvpUS-_rz1liE3LeoPKM,63641
+langroid-0.50.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.50.3.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.50.3.dist-info/RECORD,,

{langroid-0.50.2.dist-info → langroid-0.50.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.50.2.dist-info → langroid-0.50.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.50.2__py3-none-any.whl → 0.50.3__py3-none-any.whl

langroid 0.50.2py3-none-any.whl → 0.50.3py3-none-any.whl