PyPI - llm-ie - Versions diffs - 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

llm-ie 0.4.6py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

llm_ie/__init__.py +4 -2
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt +3 -0
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt +2 -0
llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +2 -1
llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +2 -1
llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +104 -86
llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt +163 -0
llm_ie/asset/prompt_guide/DirectFrameExtractor_prompt_guide.txt +163 -0
llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +103 -85
llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +103 -86
llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +103 -86
llm_ie/chunkers.py +191 -0
llm_ie/data_types.py +75 -1
llm_ie/engines.py +274 -183
llm_ie/extractors.py +1062 -727
llm_ie/prompt_editor.py +39 -6
llm_ie-1.0.0.dist-info/METADATA +18 -0
llm_ie-1.0.0.dist-info/RECORD +27 -0
llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -217
llm_ie-0.4.6.dist-info/METADATA +0 -1215
llm_ie-0.4.6.dist-info/RECORD +0 -23
{llm_ie-0.4.6.dist-info → llm_ie-1.0.0.dist-info}/WHEEL +0 -0

llm_ie/engines.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import abc
 import warnings
 import importlib
-from typing import List, Dict, Union
+from typing import List, Dict, Union, Generator
 class InferenceEngine:
@@ -15,7 +15,8 @@ class InferenceEngine:
     @abc.abstractmethod
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0,
+             verbose:bool=False, stream:bool=False, **kwrs) -> Union[str, Generator[str, None, None]]:
         """
         This method inputs chat messages and outputs LLM generated text.
@@ -27,8 +28,10 @@ class InferenceEngine:
             the max number of new tokens LLM can generate.
         temperature : float, Optional
             the temperature for token sampling.
+        verbose : bool, Optional
+            if True, LLM generated text will be printed in terminal in real-time.
         stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
+            if True, returns a generator that yields the output in real-time.
         """
         return NotImplemented
@@ -71,7 +74,7 @@ class LlamaCppInferenceEngine(InferenceEngine):
         del self.model
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, verbose:bool=False, **kwrs) -> str:
         """
         This method inputs chat messages and outputs LLM generated text.
@@ -83,18 +86,18 @@ class LlamaCppInferenceEngine(InferenceEngine):
             the max number of new tokens LLM can generate.
         temperature : float, Optional
             the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         """
         response = self.model.create_chat_completion(
                     messages=messages,
                     max_tokens=max_new_tokens,
                     temperature=temperature,
-                    stream=stream,
+                    stream=verbose,
                     **kwrs
                 )
-        if stream:
+        if verbose:
             res = ''
             for chunk in response:
                 out_dict = chunk['choices'][0]['delta']
@@ -107,9 +110,6 @@ class LlamaCppInferenceEngine(InferenceEngine):
         return response['choices'][0]['message']['content']
 class OllamaInferenceEngine(InferenceEngine):
     def __init__(self, model_name:str, num_ctx:int=4096, keep_alive:int=300, **kwrs):
         """
@@ -133,39 +133,68 @@ class OllamaInferenceEngine(InferenceEngine):
         self.model_name = model_name
         self.num_ctx = num_ctx
         self.keep_alive = keep_alive
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0,
+             verbose:bool=False, stream:bool=False, **kwrs) -> Union[str, Generator[str, None, None]]:
         """
-        This method inputs chat messages and outputs LLM generated text.
+        This method inputs chat messages and outputs VLM generated text.
         Parameters:
         ----------
         messages : List[Dict[str,str]]
             a list of dict with role and content. role must be one of {"system", "user", "assistant"}
         max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
+            the max number of new tokens VLM can generate.
         temperature : float, Optional
             the temperature for token sampling.
+        verbose : bool, Optional
+            if True, VLM generated text will be printed in terminal in real-time.
         stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
+            if True, returns a generator that yields the output in real-time.
         """
-        response = self.client.chat(
+        options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs}
+        if stream:
+            def _stream_generator():
+                response_stream = self.client.chat(
+                    model=self.model_name,
+                    messages=messages,
+                    options=options,
+                    stream=True,
+                    keep_alive=self.keep_alive
+                )
+                for chunk in response_stream:
+                    content_chunk = chunk.get('message', {}).get('content')
+                    if content_chunk:
+                        yield content_chunk
+            return _stream_generator()
+        elif verbose:
+            response = self.client.chat(
                             model=self.model_name,
                             messages=messages,
-                            options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
-                            stream=stream,
+                            options=options,
+                            stream=True,
                             keep_alive=self.keep_alive
                         )
-        if stream:
             res = ''
             for chunk in response:
-                res += chunk['message']['content']
-                print(chunk['message']['content'], end='', flush=True)
+                content_chunk = chunk.get('message', {}).get('content')
+                print(content_chunk, end='', flush=True)
+                res += content_chunk
             print('\n')
             return res
-        return response['message']['content']
+        else:
+            response = self.client.chat(
+                                model=self.model_name,
+                                messages=messages,
+                                options=options,
+                                stream=False,
+                                keep_alive=self.keep_alive
+                            )
+            return response.get('message', {}).get('content')
     async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
         """
@@ -195,7 +224,8 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
         self.client = InferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
         self.client_async = AsyncInferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0,
+             verbose:bool=False, stream:bool=False, **kwrs) -> Union[str, Generator[str, None, None]]:
         """
         This method inputs chat messages and outputs LLM generated text.
@@ -207,25 +237,53 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
             the max number of new tokens LLM can generate.
         temperature : float, Optional
             the temperature for token sampling.
+        verbose : bool, Optional
+            if True, VLM generated text will be printed in terminal in real-time.
         stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
+            if True, returns a generator that yields the output in real-time.
         """
-        response = self.client.chat.completions.create(
-                    messages=messages,
-                    max_tokens=max_new_tokens,
-                    temperature=temperature,
-                    stream=stream,
-                    **kwrs
-                )
         if stream:
+            def _stream_generator():
+                response_stream = self.client.chat.completions.create(
+                                    messages=messages,
+                                    max_tokens=max_new_tokens,
+                                    temperature=temperature,
+                                    stream=True,
+                                    **kwrs
+                                )
+                for chunk in response_stream:
+                    content_chunk = chunk.get('choices')[0].get('delta').get('content')
+                    if content_chunk:
+                        yield content_chunk
+            return _stream_generator()
+        elif verbose:
+            response = self.client.chat.completions.create(
+                            messages=messages,
+                            max_tokens=max_new_tokens,
+                            temperature=temperature,
+                            stream=True,
+                            **kwrs
+                        )
             res = ''
             for chunk in response:
-                res += chunk.choices[0].delta.content
-                print(chunk.choices[0].delta.content, end='', flush=True)
+                content_chunk = chunk.get('choices')[0].get('delta').get('content')
+                if content_chunk:
+                    res += content_chunk
+                    print(content_chunk, end='', flush=True)
             return res
-        return response.choices[0].message.content
+        else:
+            response = self.client.chat.completions.create(
+                                messages=messages,
+                                max_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=False,
+                                **kwrs
+                            )
+            return response.choices[0].message.content
     async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
         """
@@ -267,7 +325,8 @@ class OpenAIInferenceEngine(InferenceEngine):
         self.model = model
         self.reasoning_model = reasoning_model
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0,
+             verbose:bool=False, stream:bool=False, **kwrs) -> Union[str, Generator[str, None, None]]:
         """
         This method inputs chat messages and outputs LLM generated text.
@@ -279,60 +338,145 @@ class OpenAIInferenceEngine(InferenceEngine):
             the max number of new tokens LLM can generate.
         temperature : float, Optional
             the temperature for token sampling.
+        verbose : bool, Optional
+            if True, VLM generated text will be printed in terminal in real-time.
         stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
+            if True, returns a generator that yields the output in real-time.
         """
+        # For reasoning models
         if self.reasoning_model:
+            # Reasoning models do not support temperature parameter
             if temperature != 0.0:
                 warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
+            # Reasoning models do not support system prompts
+            if any(msg['role'] == 'system' for msg in messages):
+                warnings.warn("Reasoning models do not support system prompts. Will be ignored.", UserWarning)
+                messages = [msg for msg in messages if msg['role'] != 'system']
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_completion_tokens=max_new_tokens,
-                stream=stream,
-                **kwrs
-            )
-        else:
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_tokens=max_new_tokens,
-                temperature=temperature,
-                stream=stream,
-                **kwrs
-            )
+            if stream:
+                def _stream_generator():
+                    response_stream = self.client.chat.completions.create(
+                                            model=self.model,
+                                            messages=messages,
+                                            max_completion_tokens=max_new_tokens,
+                                            stream=True,
+                                            **kwrs
+                                        )
+                    for chunk in response_stream:
+                        if len(chunk.choices) > 0:
+                            if chunk.choices[0].delta.content is not None:
+                                yield chunk.choices[0].delta.content
+                            if chunk.choices[0].finish_reason == "length":
+                                warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
+                                if self.reasoning_model:
+                                    warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
+                return _stream_generator()
+            elif verbose:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    max_completion_tokens=max_new_tokens,
+                    stream=True,
+                    **kwrs
+                )
+                res = ''
+                for chunk in response:
+                    if len(chunk.choices) > 0:
+                        if chunk.choices[0].delta.content is not None:
+                            res += chunk.choices[0].delta.content
+                            print(chunk.choices[0].delta.content, end="", flush=True)
+                        if chunk.choices[0].finish_reason == "length":
+                            warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
+                            if self.reasoning_model:
+                                warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
+                print('\n')
+                return res
+            else:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    max_completion_tokens=max_new_tokens,
+                    stream=False,
+                    **kwrs
+                )
+                return response.choices[0].message.content
-        if stream:
-            res = ''
-            for chunk in response:
-                if len(chunk.choices) > 0:
-                    if chunk.choices[0].delta.content is not None:
-                        res += chunk.choices[0].delta.content
-                        print(chunk.choices[0].delta.content, end="", flush=True)
-                    if chunk.choices[0].finish_reason == "length":
-                        warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
-                        if self.reasoning_model:
-                            warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
-            return res
-        if response.choices[0].finish_reason == "length":
-            warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
-            if self.reasoning_model:
-                warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
+        # For non-reasoning models
+        else:
+            if stream:
+                def _stream_generator():
+                    response_stream = self.client.chat.completions.create(
+                        model=self.model,
+                        messages=messages,
+                        max_tokens=max_new_tokens,
+                        temperature=temperature,
+                        stream=True,
+                        **kwrs
+                    )
+                    for chunk in response_stream:
+                        if len(chunk.choices) > 0:
+                            if chunk.choices[0].delta.content is not None:
+                                yield chunk.choices[0].delta.content
+                            if chunk.choices[0].finish_reason == "length":
+                                warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
+                                if self.reasoning_model:
+                                    warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
+                return _stream_generator()
-        return response.choices[0].message.content
+            elif verbose:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature,
+                    stream=True,
+                    **kwrs
+                )
+                res = ''
+                for chunk in response:
+                    if len(chunk.choices) > 0:
+                        if chunk.choices[0].delta.content is not None:
+                            res += chunk.choices[0].delta.content
+                            print(chunk.choices[0].delta.content, end="", flush=True)
+                        if chunk.choices[0].finish_reason == "length":
+                            warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
+                            if self.reasoning_model:
+                                warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
+                print('\n')
+                return res
+            else:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature,
+                    stream=False,
+                    **kwrs
+                )
+            return response.choices[0].message.content
-    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
+    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=4096, temperature:float=0.0, **kwrs) -> str:
         """
         Async version of chat method. Streaming is not supported.
         """
         if self.reasoning_model:
+            # Reasoning models do not support temperature parameter
             if temperature != 0.0:
                 warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
+            # Reasoning models do not support system prompts
+            if any(msg['role'] == 'system' for msg in messages):
+                warnings.warn("Reasoning models do not support system prompts. Will be ignored.", UserWarning)
+                messages = [msg for msg in messages if msg['role'] != 'system']
             response = await self.async_client.chat.completions.create(
                 model=self.model,
                 messages=messages,
@@ -340,6 +484,7 @@ class OpenAIInferenceEngine(InferenceEngine):
                 stream=False,
                 **kwrs
             )
         else:
             response = await self.async_client.chat.completions.create(
                 model=self.model,
@@ -358,7 +503,7 @@ class OpenAIInferenceEngine(InferenceEngine):
         return response.choices[0].message.content
-class AzureOpenAIInferenceEngine(InferenceEngine):
+class AzureOpenAIInferenceEngine(OpenAIInferenceEngine):
     def __init__(self, model:str, api_version:str, reasoning_model:bool=False, **kwrs):
         """
         The Azure OpenAI API inference engine.
@@ -387,96 +532,6 @@ class AzureOpenAIInferenceEngine(InferenceEngine):
                                              **kwrs)
         self.reasoning_model = reasoning_model
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
-        """
-        This method inputs chat messages and outputs LLM generated text.
-        Parameters:
-        ----------
-        messages : List[Dict[str,str]]
-            a list of dict with role and content. role must be one of {"system", "user", "assistant"}
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        """
-        if self.reasoning_model:
-            if temperature != 0.0:
-                warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_completion_tokens=max_new_tokens,
-                stream=stream,
-                **kwrs
-            )
-        else:
-            response = self.client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_tokens=max_new_tokens,
-                temperature=temperature,
-                stream=stream,
-                **kwrs
-            )
-        if stream:
-            res = ''
-            for chunk in response:
-                if len(chunk.choices) > 0:
-                    if chunk.choices[0].delta.content is not None:
-                        res += chunk.choices[0].delta.content
-                        print(chunk.choices[0].delta.content, end="", flush=True)
-                    if chunk.choices[0].finish_reason == "length":
-                        warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
-                        if self.reasoning_model:
-                            warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
-            return res
-        if response.choices[0].finish_reason == "length":
-            warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
-            if self.reasoning_model:
-                warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
-        return response.choices[0].message.content
-    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
-        """
-        Async version of chat method. Streaming is not supported.
-        """
-        if self.reasoning_model:
-            if temperature != 0.0:
-                warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
-            response = await self.async_client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_completion_tokens=max_new_tokens,
-                stream=False,
-                **kwrs
-            )
-        else:
-            response = await self.async_client.chat.completions.create(
-                model=self.model,
-                messages=messages,
-                max_tokens=max_new_tokens,
-                temperature=temperature,
-                stream=False,
-                **kwrs
-            )
-        if response.choices[0].finish_reason == "length":
-            warnings.warn("Model stopped generating due to context length limit.", RuntimeWarning)
-            if self.reasoning_model:
-                warnings.warn("max_new_tokens includes reasoning tokens and output tokens.", UserWarning)
-        return response.choices[0].message.content
 class LiteLLMInferenceEngine(InferenceEngine):
     def __init__(self, model:str=None, base_url:str=None, api_key:str=None):
@@ -502,7 +557,8 @@ class LiteLLMInferenceEngine(InferenceEngine):
         self.base_url = base_url
         self.api_key = api_key
-    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0,
+             verbose:bool=False, stream:bool=False, **kwrs) -> Union[str, Generator[str, None, None]]:
         """
         This method inputs chat messages and outputs LLM generated text.
@@ -514,29 +570,64 @@ class LiteLLMInferenceEngine(InferenceEngine):
             the max number of new tokens LLM can generate.
         temperature : float, Optional
             the temperature for token sampling.
+        verbose : bool, Optional
+            if True, VLM generated text will be printed in terminal in real-time.
         stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
+            if True, returns a generator that yields the output in real-time.
         """
-        response = self.litellm.completion(
-            model=self.model,
-            messages=messages,
-            max_tokens=max_new_tokens,
-            temperature=temperature,
-            stream=stream,
-            base_url=self.base_url,
-            api_key=self.api_key,
-            **kwrs
-        )
         if stream:
+            def _stream_generator():
+                response_stream = self.litellm.completion(
+                    model=self.model,
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature,
+                    stream=True,
+                    base_url=self.base_url,
+                    api_key=self.api_key,
+                    **kwrs
+                )
+                for chunk in response_stream:
+                    chunk_content = chunk.get('choices')[0].get('delta').get('content')
+                    if chunk_content:
+                        yield chunk_content
+            return _stream_generator()
+        elif verbose:
+            response = self.litellm.completion(
+                model=self.model,
+                messages=messages,
+                max_tokens=max_new_tokens,
+                temperature=temperature,
+                stream=True,
+                base_url=self.base_url,
+                api_key=self.api_key,
+                **kwrs
+            )
             res = ''
             for chunk in response:
-                if chunk.choices[0].delta.content is not None:
-                    res += chunk.choices[0].delta.content
-                    print(chunk.choices[0].delta.content, end="", flush=True)
+                chunk_content = chunk.get('choices')[0].get('delta').get('content')
+                if chunk_content:
+                    res += chunk_content
+                    print(chunk_content, end='', flush=True)
             return res
-        return response.choices[0].message.content
+        else:
+            response = self.litellm.completion(
+                    model=self.model,
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature,
+                    stream=False,
+                    base_url=self.base_url,
+                    api_key=self.api_key,
+                    **kwrs
+                )
+            return response.choices[0].message.content
     async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
         """
@@ -553,4 +644,4 @@ class LiteLLMInferenceEngine(InferenceEngine):
             **kwrs
         )
-        return response.choices[0].message.content
+        return response.get('choices')[0].get('message').get('content')

llm-ie 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

llm-ie 0.4.6py3-none-any.whl → 1.0.0py3-none-any.whl