PyPI - llm-ie - Versions diffs - 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

llm-ie 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

llm_ie/__init__.py +9 -0
llm_ie/engines.py +151 -9
llm_ie/extractors.py +545 -151
llm_ie/prompt_editor.py +17 -2
{llm_ie-0.3.5.dist-info → llm_ie-0.4.0.dist-info}/METADATA +341 -103
{llm_ie-0.3.5.dist-info → llm_ie-0.4.0.dist-info}/RECORD +7 -7
{llm_ie-0.3.5.dist-info → llm_ie-0.4.0.dist-info}/WHEEL +0 -0

llm_ie/__init__.py CHANGED Viewed

@@ -0,0 +1,9 @@
+from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
+from .engines import LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, LiteLLMInferenceEngine
+from .extractors import BasicFrameExtractor, ReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, SentenceCoTFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
+from .prompt_editor import PromptEditor
+__all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
+           "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "LiteLLMInferenceEngine",
+           "BasicFrameExtractor", "ReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "SentenceCoTFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
+           "PromptEditor"]

llm_ie/engines.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import abc
-from typing import List, Dict
+import importlib
+from typing import List, Dict, Union
 class InferenceEngine:
     @abc.abstractmethod
@@ -104,6 +106,9 @@ class LlamaCppInferenceEngine(InferenceEngine):
         return response['choices'][0]['message']['content']
 class OllamaInferenceEngine(InferenceEngine):
     def __init__(self, model_name:str, num_ctx:int=4096, keep_alive:int=300, **kwrs):
         """
@@ -118,8 +123,12 @@ class OllamaInferenceEngine(InferenceEngine):
         keep_alive : int, Optional
             seconds to hold the LLM after the last API call.
         """
-        import ollama
-        self.ollama = ollama
+        if importlib.util.find_spec("ollama") is None:
+            raise ImportError("ollama-python not found. Please install ollama-python (```pip install ollama```).")
+        from ollama import Client, AsyncClient
+        self.client = Client(**kwrs)
+        self.async_client = AsyncClient(**kwrs)
         self.model_name = model_name
         self.num_ctx = num_ctx
         self.keep_alive = keep_alive
@@ -139,7 +148,7 @@ class OllamaInferenceEngine(InferenceEngine):
         stream : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         """
-        response = self.ollama.chat(
+        response = self.client.chat(
                             model=self.model_name,
                             messages=messages,
                             options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
@@ -155,16 +164,35 @@ class OllamaInferenceEngine(InferenceEngine):
             return res
         return response['message']['content']
+    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
+        """
+        Async version of chat method. Streaming is not supported.
+        """
+        response = await self.async_client.chat(
+                            model=self.model_name,
+                            messages=messages,
+                            options={'temperature':temperature, 'num_ctx': self.num_ctx, 'num_predict': max_new_tokens, **kwrs},
+                            stream=False,
+                            keep_alive=self.keep_alive
+                        )
+        return response['message']['content']
 class HuggingFaceHubInferenceEngine(InferenceEngine):
-    def __init__(self, **kwrs):
+    def __init__(self, model:str=None, token:Union[str, bool]=None, base_url:str=None, api_key:str=None, **kwrs):
         """
         The Huggingface_hub InferenceClient inference engine.
         For parameters and documentation, refer to https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
         """
-        from huggingface_hub import InferenceClient
-        self.client = InferenceClient(**kwrs)
+        if importlib.util.find_spec("huggingface_hub") is None:
+            raise ImportError("huggingface-hub not found. Please install huggingface-hub (```pip install huggingface-hub```).")
+        from huggingface_hub import InferenceClient, AsyncInferenceClient
+        self.client = InferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
+        self.client_async = AsyncInferenceClient(model=model, token=token, base_url=base_url, api_key=api_key, **kwrs)
     def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
         """
@@ -197,12 +225,29 @@ class HuggingFaceHubInferenceEngine(InferenceEngine):
             return res
         return response.choices[0].message.content
+    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
+        """
+        Async version of chat method. Streaming is not supported.
+        """
+        response = await self.client_async.chat.completions.create(
+                    messages=messages,
+                    max_tokens=max_new_tokens,
+                    temperature=temperature,
+                    stream=False,
+                    **kwrs
+                )
+        return response.choices[0].message.content
 class OpenAIInferenceEngine(InferenceEngine):
     def __init__(self, model:str, **kwrs):
         """
-        The OpenAI API inference engine.
+        The OpenAI API inference engine. Supports OpenAI models and OpenAI compatible servers:
+        - vLLM OpenAI compatible server (https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html)
+        - Llama.cpp OpenAI compatible server (https://llama-cpp-python.readthedocs.io/en/latest/server/)
         For parameters and documentation, refer to https://platform.openai.com/docs/api-reference/introduction
         Parameters:
@@ -210,8 +255,12 @@ class OpenAIInferenceEngine(InferenceEngine):
         model_name : str
             model name as described in https://platform.openai.com/docs/models
         """
-        from openai import OpenAI
+        if importlib.util.find_spec("openai") is None:
+            raise ImportError("OpenAI Python API library not found. Please install OpanAI (```pip install openai```).")
+        from openai import OpenAI, AsyncOpenAI
         self.client = OpenAI(**kwrs)
+        self.async_client = AsyncOpenAI(**kwrs)
         self.model = model
     def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
@@ -246,4 +295,97 @@ class OpenAIInferenceEngine(InferenceEngine):
                     print(chunk.choices[0].delta.content, end="", flush=True)
             return res
+        return response.choices[0].message.content
+    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
+        """
+        Async version of chat method. Streaming is not supported.
+        """
+        response = await self.async_client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            stream=False,
+            **kwrs
+        )
+        return response.choices[0].message.content
+class LiteLLMInferenceEngine(InferenceEngine):
+    def __init__(self, model:str=None, base_url:str=None, api_key:str=None):
+        """
+        The LiteLLM inference engine.
+        For parameters and documentation, refer to https://github.com/BerriAI/litellm?tab=readme-ov-file
+        Parameters:
+        ----------
+        model : str
+            the model name
+        base_url : str, Optional
+            the base url for the LLM server
+        api_key : str, Optional
+            the API key for the LLM server
+        """
+        if importlib.util.find_spec("litellm") is None:
+            raise ImportError("litellm not found. Please install litellm (```pip install litellm```).")
+        import litellm
+        self.litellm = litellm
+        self.model = model
+        self.base_url = base_url
+        self.api_key = api_key
+    def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
+        """
+        This method inputs chat messages and outputs LLM generated text.
+        Parameters:
+        ----------
+        messages : List[Dict[str,str]]
+            a list of dict with role and content. role must be one of {"system", "user", "assistant"}
+        max_new_tokens : str, Optional
+            the max number of new tokens LLM can generate.
+        temperature : float, Optional
+            the temperature for token sampling.
+        stream : bool, Optional
+            if True, LLM generated text will be printed in terminal in real-time.
+        """
+        response = self.litellm.completion(
+            model=self.model,
+            messages=messages,
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            stream=stream,
+            base_url=self.base_url,
+            api_key=self.api_key,
+            **kwrs
+        )
+        if stream:
+            res = ''
+            for chunk in response:
+                if chunk.choices[0].delta.content is not None:
+                    res += chunk.choices[0].delta.content
+                    print(chunk.choices[0].delta.content, end="", flush=True)
+            return res
+        return response.choices[0].message.content
+    async def chat_async(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, **kwrs) -> str:
+        """
+        Async version of chat method. Streaming is not supported.
+        """
+        response = await self.litellm.acompletion(
+            model=self.model,
+            messages=messages,
+            max_tokens=max_new_tokens,
+            temperature=temperature,
+            stream=False,
+            base_url=self.base_url,
+            api_key=self.api_key,
+            **kwrs
+        )
         return response.choices[0].message.content

llm-ie 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

llm-ie 0.3.5py3-none-any.whl → 0.4.0py3-none-any.whl