PyPI - llama-index-llms-openai - Versions diffs - 0.1.29__tar.gz → 0.1.31__tar.gz - Mend

llama-index-llms-openai 0.1.29tar.gz → 0.1.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llama-index-llms-openai
-Version: 0.1.29
+Version: 0.1.31
 Summary: llama-index llms openai integration
 License: MIT
 Author: llama-index

{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/base.py RENAMED Viewed

@@ -390,7 +390,13 @@ class OpenAI(FunctionCallingLLM):
                 base_kwargs["top_logprobs"] = self.top_logprobs
             else:
                 base_kwargs["logprobs"] = self.top_logprobs  # int in this case
-        return {**base_kwargs, **self.additional_kwargs}
+        # can't send stream_options to the API when not streaming
+        all_kwargs = {**base_kwargs, **self.additional_kwargs}
+        if "stream" not in all_kwargs and "stream_options" in all_kwargs:
+            del all_kwargs["stream_options"]
+        return all_kwargs
     @llm_retry_decorator
     def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
@@ -486,8 +492,7 @@ class OpenAI(FunctionCallingLLM):
             is_function = False
             for response in client.chat.completions.create(
                 messages=message_dicts,
-                stream=True,
-                **self._get_model_kwargs(**kwargs),
+                **self._get_model_kwargs(stream=True, **kwargs),
             ):
                 response = cast(ChatCompletionChunk, response)
                 if len(response.choices) > 0:
@@ -561,15 +566,14 @@ class OpenAI(FunctionCallingLLM):
     @llm_retry_decorator
     def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
         client = self._get_client()
-        all_kwargs = self._get_model_kwargs(**kwargs)
+        all_kwargs = self._get_model_kwargs(stream=True, **kwargs)
         self._update_max_tokens(all_kwargs, prompt)
         def gen() -> CompletionResponseGen:
             text = ""
             for response in client.completions.create(
                 prompt=prompt,
-                stream=True,
-                **all_kwargs,
+                **kwargs,
             ):
                 if len(response.choices) > 0:
                     delta = response.choices[0].text
@@ -604,18 +608,29 @@ class OpenAI(FunctionCallingLLM):
     def _get_response_token_counts(self, raw_response: Any) -> dict:
         """Get the token usage reported by the response."""
-        if not isinstance(raw_response, dict):
-            return {}
-        usage = raw_response.get("usage", {})
-        # NOTE: other model providers that use the OpenAI client may not report usage
-        if usage is None:
+        if hasattr(raw_response, "usage"):
+            try:
+                prompt_tokens = raw_response.usage.prompt_tokens
+                completion_tokens = raw_response.usage.completion_tokens
+                total_tokens = raw_response.usage.total_tokens
+            except AttributeError:
+                return {}
+        elif isinstance(raw_response, dict):
+            usage = raw_response.get("usage", {})
+            # NOTE: other model providers that use the OpenAI client may not report usage
+            if usage is None:
+                return {}
+            # Backwards compatibility with old dict type
+            prompt_tokens = usage.get("prompt_tokens", 0)
+            completion_tokens = usage.get("completion_tokens", 0)
+            total_tokens = usage.get("total_tokens", 0)
+        else:
             return {}
         return {
-            "prompt_tokens": usage.get("prompt_tokens", 0),
-            "completion_tokens": usage.get("completion_tokens", 0),
-            "total_tokens": usage.get("total_tokens", 0),
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": total_tokens,
         }
     # ===== Async Endpoints =====
@@ -717,8 +732,7 @@ class OpenAI(FunctionCallingLLM):
             first_chat_chunk = True
             async for response in await aclient.chat.completions.create(
                 messages=message_dicts,
-                stream=True,
-                **self._get_model_kwargs(**kwargs),
+                **self._get_model_kwargs(stream=True, **kwargs),
             ):
                 response = cast(ChatCompletionChunk, response)
                 if len(response.choices) > 0:
@@ -804,14 +818,13 @@ class OpenAI(FunctionCallingLLM):
         self, prompt: str, **kwargs: Any
     ) -> CompletionResponseAsyncGen:
         aclient = self._get_aclient()
-        all_kwargs = self._get_model_kwargs(**kwargs)
+        all_kwargs = self._get_model_kwargs(stream=True, **kwargs)
         self._update_max_tokens(all_kwargs, prompt)
         async def gen() -> CompletionResponseAsyncGen:
             text = ""
             async for response in await aclient.completions.create(
                 prompt=prompt,
-                stream=True,
                 **all_kwargs,
             ):
                 if len(response.choices) > 0:

{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/pyproject.toml RENAMED Viewed

@@ -29,7 +29,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-llms-openai"
 readme = "README.md"
-version = "0.1.29"
+version = "0.1.31"
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0"

{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/README.md RENAMED Viewed

File without changes

{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/__init__.py RENAMED Viewed

File without changes

{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/utils.py RENAMED Viewed

File without changes

llama-index-llms-openai 0.1.29__tar.gz → 0.1.31__tar.gz

llama-index-llms-openai 0.1.29tar.gz → 0.1.31tar.gz