llama-index-llms-openai 0.1.29__tar.gz → 0.1.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/PKG-INFO +1 -1
- {llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/base.py +32 -19
- {llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/pyproject.toml +1 -1
- {llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/README.md +0 -0
- {llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/__init__.py +0 -0
- {llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/utils.py +0 -0
{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/base.py
RENAMED
|
@@ -390,7 +390,13 @@ class OpenAI(FunctionCallingLLM):
|
|
|
390
390
|
base_kwargs["top_logprobs"] = self.top_logprobs
|
|
391
391
|
else:
|
|
392
392
|
base_kwargs["logprobs"] = self.top_logprobs # int in this case
|
|
393
|
-
|
|
393
|
+
|
|
394
|
+
# can't send stream_options to the API when not streaming
|
|
395
|
+
all_kwargs = {**base_kwargs, **self.additional_kwargs}
|
|
396
|
+
if "stream" not in all_kwargs and "stream_options" in all_kwargs:
|
|
397
|
+
del all_kwargs["stream_options"]
|
|
398
|
+
|
|
399
|
+
return all_kwargs
|
|
394
400
|
|
|
395
401
|
@llm_retry_decorator
|
|
396
402
|
def _chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
|
|
@@ -486,8 +492,7 @@ class OpenAI(FunctionCallingLLM):
|
|
|
486
492
|
is_function = False
|
|
487
493
|
for response in client.chat.completions.create(
|
|
488
494
|
messages=message_dicts,
|
|
489
|
-
stream=True,
|
|
490
|
-
**self._get_model_kwargs(**kwargs),
|
|
495
|
+
**self._get_model_kwargs(stream=True, **kwargs),
|
|
491
496
|
):
|
|
492
497
|
response = cast(ChatCompletionChunk, response)
|
|
493
498
|
if len(response.choices) > 0:
|
|
@@ -561,15 +566,14 @@ class OpenAI(FunctionCallingLLM):
|
|
|
561
566
|
@llm_retry_decorator
|
|
562
567
|
def _stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
|
|
563
568
|
client = self._get_client()
|
|
564
|
-
all_kwargs = self._get_model_kwargs(**kwargs)
|
|
569
|
+
all_kwargs = self._get_model_kwargs(stream=True, **kwargs)
|
|
565
570
|
self._update_max_tokens(all_kwargs, prompt)
|
|
566
571
|
|
|
567
572
|
def gen() -> CompletionResponseGen:
|
|
568
573
|
text = ""
|
|
569
574
|
for response in client.completions.create(
|
|
570
575
|
prompt=prompt,
|
|
571
|
-
|
|
572
|
-
**all_kwargs,
|
|
576
|
+
**kwargs,
|
|
573
577
|
):
|
|
574
578
|
if len(response.choices) > 0:
|
|
575
579
|
delta = response.choices[0].text
|
|
@@ -604,18 +608,29 @@ class OpenAI(FunctionCallingLLM):
|
|
|
604
608
|
|
|
605
609
|
def _get_response_token_counts(self, raw_response: Any) -> dict:
|
|
606
610
|
"""Get the token usage reported by the response."""
|
|
607
|
-
if
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
611
|
+
if hasattr(raw_response, "usage"):
|
|
612
|
+
try:
|
|
613
|
+
prompt_tokens = raw_response.usage.prompt_tokens
|
|
614
|
+
completion_tokens = raw_response.usage.completion_tokens
|
|
615
|
+
total_tokens = raw_response.usage.total_tokens
|
|
616
|
+
except AttributeError:
|
|
617
|
+
return {}
|
|
618
|
+
elif isinstance(raw_response, dict):
|
|
619
|
+
usage = raw_response.get("usage", {})
|
|
620
|
+
# NOTE: other model providers that use the OpenAI client may not report usage
|
|
621
|
+
if usage is None:
|
|
622
|
+
return {}
|
|
623
|
+
# Backwards compatibility with old dict type
|
|
624
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
|
625
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
|
626
|
+
total_tokens = usage.get("total_tokens", 0)
|
|
627
|
+
else:
|
|
613
628
|
return {}
|
|
614
629
|
|
|
615
630
|
return {
|
|
616
|
-
"prompt_tokens":
|
|
617
|
-
"completion_tokens":
|
|
618
|
-
"total_tokens":
|
|
631
|
+
"prompt_tokens": prompt_tokens,
|
|
632
|
+
"completion_tokens": completion_tokens,
|
|
633
|
+
"total_tokens": total_tokens,
|
|
619
634
|
}
|
|
620
635
|
|
|
621
636
|
# ===== Async Endpoints =====
|
|
@@ -717,8 +732,7 @@ class OpenAI(FunctionCallingLLM):
|
|
|
717
732
|
first_chat_chunk = True
|
|
718
733
|
async for response in await aclient.chat.completions.create(
|
|
719
734
|
messages=message_dicts,
|
|
720
|
-
stream=True,
|
|
721
|
-
**self._get_model_kwargs(**kwargs),
|
|
735
|
+
**self._get_model_kwargs(stream=True, **kwargs),
|
|
722
736
|
):
|
|
723
737
|
response = cast(ChatCompletionChunk, response)
|
|
724
738
|
if len(response.choices) > 0:
|
|
@@ -804,14 +818,13 @@ class OpenAI(FunctionCallingLLM):
|
|
|
804
818
|
self, prompt: str, **kwargs: Any
|
|
805
819
|
) -> CompletionResponseAsyncGen:
|
|
806
820
|
aclient = self._get_aclient()
|
|
807
|
-
all_kwargs = self._get_model_kwargs(**kwargs)
|
|
821
|
+
all_kwargs = self._get_model_kwargs(stream=True, **kwargs)
|
|
808
822
|
self._update_max_tokens(all_kwargs, prompt)
|
|
809
823
|
|
|
810
824
|
async def gen() -> CompletionResponseAsyncGen:
|
|
811
825
|
text = ""
|
|
812
826
|
async for response in await aclient.completions.create(
|
|
813
827
|
prompt=prompt,
|
|
814
|
-
stream=True,
|
|
815
828
|
**all_kwargs,
|
|
816
829
|
):
|
|
817
830
|
if len(response.choices) > 0:
|
|
File without changes
|
|
File without changes
|
{llama_index_llms_openai-0.1.29 → llama_index_llms_openai-0.1.31}/llama_index/llms/openai/utils.py
RENAMED
|
File without changes
|