llama-index-llms-openai 0.1.29__tar.gz → 0.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llama-index-llms-openai
3
- Version: 0.1.29
3
+ Version: 0.1.31
4
4
  Summary: llama-index llms openai integration
5
5
  License: MIT
6
6
  Author: llama-index
@@ -390,7 +390,13 @@ class OpenAI(FunctionCallingLLM):
390
390
  base_kwargs["top_logprobs"] = self.top_logprobs
391
391
  else:
392
392
  base_kwargs["logprobs"] = self.top_logprobs # int in this case
393
- return {**base_kwargs, **self.additional_kwargs}
393
+
394
+ # can't send stream_options to the API when not streaming
395
+ all_kwargs = {**base_kwargs, **self.additional_kwargs}
396
+ if "stream" not in all_kwargs and "stream_options" in all_kwargs:
397
+ del all_kwargs["stream_options"]
398
+
399
+ return all_kwargs
394
400
 
395
401
  @llm_retry_decorator
396
402
  def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
@@ -486,8 +492,7 @@ class OpenAI(FunctionCallingLLM):
486
492
  is_function = False
487
493
  for response in client.chat.completions.create(
488
494
  messages=message_dicts,
489
- stream=True,
490
- **self._get_model_kwargs(**kwargs),
495
+ **self._get_model_kwargs(stream=True, **kwargs),
491
496
  ):
492
497
  response = cast(ChatCompletionChunk, response)
493
498
  if len(response.choices) > 0:
@@ -561,15 +566,14 @@ class OpenAI(FunctionCallingLLM):
561
566
  @llm_retry_decorator
562
567
  def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
563
568
  client = self._get_client()
564
- all_kwargs = self._get_model_kwargs(**kwargs)
569
+ all_kwargs = self._get_model_kwargs(stream=True, **kwargs)
565
570
  self._update_max_tokens(all_kwargs, prompt)
566
571
 
567
572
  def gen() -> CompletionResponseGen:
568
573
  text = ""
569
574
  for response in client.completions.create(
570
575
  prompt=prompt,
571
- stream=True,
572
- **all_kwargs,
576
+ **kwargs,
573
577
  ):
574
578
  if len(response.choices) > 0:
575
579
  delta = response.choices[0].text
@@ -604,18 +608,29 @@ class OpenAI(FunctionCallingLLM):
604
608
 
605
609
  def _get_response_token_counts(self, raw_response: Any) -> dict:
606
610
  """Get the token usage reported by the response."""
607
- if not isinstance(raw_response, dict):
608
- return {}
609
-
610
- usage = raw_response.get("usage", {})
611
- # NOTE: other model providers that use the OpenAI client may not report usage
612
- if usage is None:
611
+ if hasattr(raw_response, "usage"):
612
+ try:
613
+ prompt_tokens = raw_response.usage.prompt_tokens
614
+ completion_tokens = raw_response.usage.completion_tokens
615
+ total_tokens = raw_response.usage.total_tokens
616
+ except AttributeError:
617
+ return {}
618
+ elif isinstance(raw_response, dict):
619
+ usage = raw_response.get("usage", {})
620
+ # NOTE: other model providers that use the OpenAI client may not report usage
621
+ if usage is None:
622
+ return {}
623
+ # Backwards compatibility with old dict type
624
+ prompt_tokens = usage.get("prompt_tokens", 0)
625
+ completion_tokens = usage.get("completion_tokens", 0)
626
+ total_tokens = usage.get("total_tokens", 0)
627
+ else:
613
628
  return {}
614
629
 
615
630
  return {
616
- "prompt_tokens": usage.get("prompt_tokens", 0),
617
- "completion_tokens": usage.get("completion_tokens", 0),
618
- "total_tokens": usage.get("total_tokens", 0),
631
+ "prompt_tokens": prompt_tokens,
632
+ "completion_tokens": completion_tokens,
633
+ "total_tokens": total_tokens,
619
634
  }
620
635
 
621
636
  # ===== Async Endpoints =====
@@ -717,8 +732,7 @@ class OpenAI(FunctionCallingLLM):
717
732
  first_chat_chunk = True
718
733
  async for response in await aclient.chat.completions.create(
719
734
  messages=message_dicts,
720
- stream=True,
721
- **self._get_model_kwargs(**kwargs),
735
+ **self._get_model_kwargs(stream=True, **kwargs),
722
736
  ):
723
737
  response = cast(ChatCompletionChunk, response)
724
738
  if len(response.choices) > 0:
@@ -804,14 +818,13 @@ class OpenAI(FunctionCallingLLM):
804
818
  self, prompt: str, **kwargs: Any
805
819
  ) -> CompletionResponseAsyncGen:
806
820
  aclient = self._get_aclient()
807
- all_kwargs = self._get_model_kwargs(**kwargs)
821
+ all_kwargs = self._get_model_kwargs(stream=True, **kwargs)
808
822
  self._update_max_tokens(all_kwargs, prompt)
809
823
 
810
824
  async def gen() -> CompletionResponseAsyncGen:
811
825
  text = ""
812
826
  async for response in await aclient.completions.create(
813
827
  prompt=prompt,
814
- stream=True,
815
828
  **all_kwargs,
816
829
  ):
817
830
  if len(response.choices) > 0:
@@ -29,7 +29,7 @@ exclude = ["**/BUILD"]
29
29
  license = "MIT"
30
30
  name = "llama-index-llms-openai"
31
31
  readme = "README.md"
32
- version = "0.1.29"
32
+ version = "0.1.31"
33
33
 
34
34
  [tool.poetry.dependencies]
35
35
  python = ">=3.8.1,<4.0"