PyPI - llm-ie - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

llm-ie 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

llm_ie/__init__.py +2 -2
llm_ie/engines.py +497 -250
llm_ie/extractors.py +100 -251
llm_ie/prompt_editor.py +13 -13
{llm_ie-1.0.0.dist-info → llm_ie-1.1.0.dist-info}/METADATA +2 -2
{llm_ie-1.0.0.dist-info → llm_ie-1.1.0.dist-info}/RECORD +7 -7
{llm_ie-1.0.0.dist-info → llm_ie-1.1.0.dist-info}/WHEEL +0 -0

llm_ie/extractors.py CHANGED Viewed

@@ -17,7 +17,7 @@ from colorama import Fore, Style
 class Extractor:
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
         """
         This is the abstract class for (frame and relation) extractors.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -172,7 +172,7 @@ class Extractor:
 class FrameExtractor(Extractor):
     from nltk.tokenize import RegexpTokenizer
     def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
-                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None, **kwrs):
+                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None):
         """
         This is the abstract class for frame extraction.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -192,8 +192,7 @@ class FrameExtractor(Extractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
         self.unit_chunker = unit_chunker
         if context_chunker is None:
@@ -332,7 +331,7 @@ class FrameExtractor(Extractor):
         return entity_spans
     @abc.abstractmethod
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, return_messages_log:bool=False, **kwrs) -> str:
+    def extract(self, text_content:Union[str, Dict[str,str]], return_messages_log:bool=False, **kwrs) -> str:
         """
         This method inputs text content and outputs a string generated by LLM
@@ -342,8 +341,6 @@ class FrameExtractor(Extractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
@@ -354,7 +351,7 @@ class FrameExtractor(Extractor):
     @abc.abstractmethod
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=2048,
+    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str,
                        document_key:str=None, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
         """
         This method inputs text content and outputs a list of LLMInformationExtractionFrame
@@ -368,8 +365,6 @@ class FrameExtractor(Extractor):
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
         entity_key : str
             the key (in ouptut JSON) for entity text. Any extraction that does not include entity key will be dropped.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
@@ -384,7 +379,7 @@ class FrameExtractor(Extractor):
 class DirectFrameExtractor(FrameExtractor):
     def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
-                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None, **kwrs):
+                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None):
         """
         This class is for general unit-context frame extraction.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -406,12 +401,11 @@ class DirectFrameExtractor(FrameExtractor):
                          unit_chunker=unit_chunker,
                          prompt_template=prompt_template,
                          system_prompt=system_prompt,
-                         context_chunker=context_chunker,
-                         **kwrs)
+                         context_chunker=context_chunker)
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
-                document_key:str=None, temperature:float=0.0, verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
+    def extract(self, text_content:Union[str, Dict[str,str]],
+                document_key:str=None, verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
         """
         This method inputs a text and outputs a list of outputs per unit.
@@ -421,13 +415,9 @@ class DirectFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : int, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
@@ -491,27 +481,12 @@ class DirectFrameExtractor(FrameExtractor):
                 print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
-                response_stream = self.inference_engine.chat(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=True,
-                                **kwrs
-                            )
-                gen_text = ""
-                for chunk in response_stream:
-                    gen_text += chunk
-                    print(chunk, end='', flush=True)
-            else:
-                gen_text = self.inference_engine.chat(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=False,
-                                **kwrs
-                            )
+            gen_text = self.inference_engine.chat(
+                            messages=messages,
+                            verbose=verbose,
+                            stream=False
+                        )
             if return_messages_log:
                 messages.append({"role": "assistant", "content": gen_text})
@@ -530,8 +505,8 @@ class DirectFrameExtractor(FrameExtractor):
         return output
-    def stream(self, text_content: Union[str, Dict[str, str]], max_new_tokens: int = 2048, document_key: str = None,
-               temperature: float = 0.0, **kwrs) -> Generator[Dict[str, Any], None, List[FrameExtractionUnitResult]]:
+    def stream(self, text_content: Union[str, Dict[str, str]],
+               document_key: str = None) -> Generator[Dict[str, Any], None, List[FrameExtractionUnitResult]]:
         """
         Streams LLM responses per unit with structured event types,
         and returns collected data for post-processing.
@@ -542,7 +517,8 @@ class DirectFrameExtractor(FrameExtractor):
             - {"type": "info", "data": str_message}: General informational messages.
             - {"type": "unit", "data": dict_unit_info}: Signals start of a new unit. dict_unit_info contains {'id', 'text', 'start', 'end'}
             - {"type": "context", "data": str_context}: Context string for the current unit.
-            - {"type": "llm_chunk", "data": str_chunk}: A raw chunk from the LLM.
+            - {"type": "reasoning", "data": str_chunk}: A reasoning model thinking chunk from the LLM.
+            - {"type": "response", "data": str_chunk}: A response/answer chunk from the LLM.
         Returns:
         --------
@@ -601,13 +577,10 @@ class DirectFrameExtractor(FrameExtractor):
             response_stream = self.inference_engine.chat(
                 messages=messages,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                stream=True,
-                **kwrs
+                stream=True
             )
             for chunk in response_stream:
-                yield {"type": "llm_chunk", "data": chunk}
+                yield chunk
                 current_gen_text += chunk
             # Store the result for this unit
@@ -622,8 +595,8 @@ class DirectFrameExtractor(FrameExtractor):
         yield {"type": "info", "data": "All units processed by LLM."}
         return collected_results
-    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None, temperature:float=0.0,
-                            concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
         """
         This is the asynchronous version of the extract() method.
@@ -633,13 +606,9 @@ class DirectFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : int, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         concurrent_batch_size : int, Optional
             the batch size for concurrent processing.
         return_messages_log : bool, Optional
@@ -701,17 +670,14 @@ class DirectFrameExtractor(FrameExtractor):
         # Process units concurrently with asyncio.Semaphore
         semaphore = asyncio.Semaphore(concurrent_batch_size)
-        async def semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
+        async def semaphore_helper(task_data: Dict, **kwrs):
             unit = task_data["unit"]
             messages = task_data["messages"]
             original_index = task_data["original_index"]
             async with semaphore:
                 gen_text = await self.inference_engine.chat_async(
-                    messages=messages,
-                    max_new_tokens=max_new_tokens,
-                    temperature=temperature,
-                    **kwrs
+                    messages=messages
                 )
             return {"original_index": original_index, "unit": unit, "gen_text": gen_text, "messages": messages}
@@ -719,10 +685,7 @@ class DirectFrameExtractor(FrameExtractor):
         tasks = []
         for task_inp in tasks_input:
             task = asyncio.create_task(semaphore_helper(
-                task_inp,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                **kwrs
+                task_inp
             ))
             tasks.append(task)
@@ -759,11 +722,10 @@ class DirectFrameExtractor(FrameExtractor):
             return output
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                        document_key:str=None, temperature:float=0.0, verbose:bool=False,
-                        concurrent:bool=False, concurrent_batch_size:int=32,
+    def extract_frames(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                       verbose:bool=False, concurrent:bool=False, concurrent_batch_size:int=32,
                         case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2, fuzzy_score_cutoff:float=0.8,
-                        allow_overlap_entities:bool=False, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
+                        allow_overlap_entities:bool=False, return_messages_log:bool=False) -> List[LLMInformationExtractionFrame]:
         """
         This method inputs a text and outputs a list of LLMInformationExtractionFrame
         It use the extract() method and post-process outputs into frames.
@@ -774,13 +736,9 @@ class DirectFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         concurrent : bool, Optional
@@ -812,21 +770,15 @@ class DirectFrameExtractor(FrameExtractor):
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
             extraction_results = asyncio.run(self.extract_async(text_content=text_content,
-                                                max_new_tokens=max_new_tokens,
                                                 document_key=document_key,
-                                                temperature=temperature,
                                                 concurrent_batch_size=concurrent_batch_size,
-                                                return_messages_log=return_messages_log,
-                                                **kwrs)
+                                                return_messages_log=return_messages_log)
                                             )
         else:
             extraction_results = self.extract(text_content=text_content,
-                                                max_new_tokens=max_new_tokens,
                                                 document_key=document_key,
-                                                temperature=temperature,
                                                 verbose=verbose,
-                                                return_messages_log=return_messages_log,
-                                                **kwrs)
+                                                return_messages_log=return_messages_log)
         llm_output_results, messages_log = extraction_results if return_messages_log else (extraction_results, None)
@@ -869,8 +821,8 @@ class DirectFrameExtractor(FrameExtractor):
 class ReviewFrameExtractor(DirectFrameExtractor):
-    def __init__(self, unit_chunker:UnitChunker, context_chunker:ContextChunker,
-                 inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
+    def __init__(self, unit_chunker:UnitChunker, context_chunker:ContextChunker, inference_engine:InferenceEngine,
+                 prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None):
         """
         This class add a review step after the DirectFrameExtractor.
         The Review process asks LLM to review its output and:
@@ -901,8 +853,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
                          unit_chunker=unit_chunker,
                          prompt_template=prompt_template,
                          system_prompt=system_prompt,
-                         context_chunker=context_chunker,
-                         **kwrs)
+                         context_chunker=context_chunker)
         # check review mode
         if review_mode not in {"addition", "revision"}:
             raise ValueError('review_mode must be one of {"addition", "revision"}.')
@@ -939,8 +890,8 @@ class ReviewFrameExtractor(DirectFrameExtractor):
         if self.review_prompt is None:
             raise ValueError(f"Cannot find review prompt for {self.__class__.__name__} in the package. Please provide a review_prompt.")
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None,
-                temperature:float=0.0, verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
+    def extract(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
         """
         This method inputs a text and outputs a list of outputs per unit.
@@ -950,13 +901,9 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : int, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
@@ -1020,28 +967,13 @@ class ReviewFrameExtractor(DirectFrameExtractor):
                     print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
                 print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
-                response_stream = self.inference_engine.chat(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=True,
-                                **kwrs
-                            )
-                initial = ""
-                for chunk in response_stream:
-                    initial += chunk
-                    print(chunk, end='', flush=True)
-            else:
-                initial = self.inference_engine.chat(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=False,
-                                **kwrs
-                            )
+            initial = self.inference_engine.chat(
+                            messages=messages,
+                            verbose=verbose,
+                            stream=False
+                        )
             if return_messages_log:
                 messages.append({"role": "assistant", "content": initial})
@@ -1053,29 +985,12 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             messages.append({'role': 'assistant', 'content': initial})
             messages.append({'role': 'user', 'content': self.review_prompt})
-            if verbose:
-                response_stream = self.inference_engine.chat(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=True,
-                                **kwrs
-                            )
-                review = ""
-                for chunk in response_stream:
-                    review += chunk
-                    print(chunk, end='', flush=True)
-            else:
-                review = self.inference_engine.chat(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=False,
-                                **kwrs
-                            )
+            review = self.inference_engine.chat(
+                            messages=messages,
+                            verbose=verbose,
+                            stream=False
+                        )
             # Output
             if self.review_mode == "revision":
@@ -1101,8 +1016,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
         return output
-    def stream(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
-               document_key:str=None, temperature:float=0.0, **kwrs) -> Generator[str, None, None]:
+    def stream(self, text_content:Union[str, Dict[str,str]], document_key:str=None) -> Generator[str, None, None]:
         """
         This method inputs a text and outputs a list of outputs per unit.
@@ -1112,13 +1026,9 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : int, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         Return : List[FrameExtractionUnitResult]
             the output from LLM for each unit. Contains the start, end, text, and generated text.
@@ -1176,10 +1086,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             response_stream = self.inference_engine.chat(
                             messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=True,
-                            **kwrs
+                            stream=True
                         )
             initial = ""
@@ -1195,16 +1102,13 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             response_stream = self.inference_engine.chat(
                             messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=True,
-                            **kwrs
+                            stream=True
                         )
             for chunk in response_stream:
                 yield chunk
-    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None, temperature:float=0.0,
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
                             concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
         """
         This is the asynchronous version of the extract() method with the review step.
@@ -1215,13 +1119,9 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : int, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         concurrent_batch_size : int, Optional
             the batch size for concurrent processing.
         return_messages_log : bool, Optional
@@ -1282,17 +1182,14 @@ class ReviewFrameExtractor(DirectFrameExtractor):
         semaphore = asyncio.Semaphore(concurrent_batch_size)
-        async def initial_semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
+        async def initial_semaphore_helper(task_data: Dict):
             unit = task_data["unit"]
             messages = task_data["messages"]
             original_index = task_data["original_index"]
             async with semaphore:
                 gen_text = await self.inference_engine.chat_async(
-                    messages=messages,
-                    max_new_tokens=max_new_tokens,
-                    temperature=temperature,
-                    **kwrs
+                    messages=messages
                 )
             # Return initial generation result along with the messages used and the unit
             return {"original_index": original_index, "unit": unit, "initial_gen_text": gen_text, "initial_messages": messages}
@@ -1300,10 +1197,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
         # Create and gather initial generation tasks
         initial_tasks = [
             asyncio.create_task(initial_semaphore_helper(
-                task_inp,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                **kwrs
+                task_inp
             ))
             for task_inp in initial_tasks_input
         ]
@@ -1333,16 +1227,13 @@ class ReviewFrameExtractor(DirectFrameExtractor):
             })
-        async def review_semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
+        async def review_semaphore_helper(task_data: Dict, **kwrs):
             messages = task_data["messages"]
             original_index = task_data["original_index"]
             async with semaphore:
                 review_gen_text = await self.inference_engine.chat_async(
-                    messages=messages,
-                    max_new_tokens=max_new_tokens,
-                    temperature=temperature,
-                    **kwrs
+                    messages=messages
                 )
             # Combine initial and review results
             task_data["review_gen_text"] = review_gen_text
@@ -1354,10 +1245,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
         # Create and gather review tasks
         review_tasks = [
              asyncio.create_task(review_semaphore_helper(
-                task_inp,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                **kwrs
+                task_inp
             ))
            for task_inp in review_tasks_input
         ]
@@ -1405,7 +1293,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
 class BasicFrameExtractor(DirectFrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
         """
         This class diretly prompt LLM for frame extraction.
         Input system prompt (optional), prompt template (with instruction, few-shot examples),
@@ -1424,11 +1312,10 @@ class BasicFrameExtractor(DirectFrameExtractor):
                          unit_chunker=WholeDocumentUnitChunker(),
                          prompt_template=prompt_template,
                          system_prompt=system_prompt,
-                         context_chunker=NoContextChunker(),
-                         **kwrs)
+                         context_chunker=NoContextChunker())
 class BasicReviewFrameExtractor(ReviewFrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None):
         """
         This class add a review step after the BasicFrameExtractor.
         The Review process asks LLM to review its output and:
@@ -1457,13 +1344,12 @@ class BasicReviewFrameExtractor(ReviewFrameExtractor):
                          review_mode=review_mode,
                          review_prompt=review_prompt,
                          system_prompt=system_prompt,
-                         context_chunker=NoContextChunker(),
-                         **kwrs)
+                         context_chunker=NoContextChunker())
 class SentenceFrameExtractor(DirectFrameExtractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
+                 context_sentences:Union[str, int]="all"):
         """
         This class performs sentence-by-sentence information extraction.
         The process is as follows:
@@ -1507,14 +1393,13 @@ class SentenceFrameExtractor(DirectFrameExtractor):
                          unit_chunker=SentenceUnitChunker(),
                          prompt_template=prompt_template,
                          system_prompt=system_prompt,
-                         context_chunker=context_chunker,
-                         **kwrs)
+                         context_chunker=context_chunker)
 class SentenceReviewFrameExtractor(ReviewFrameExtractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
                  review_mode:str, review_prompt:str=None, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
+                 context_sentences:Union[str, int]="all"):
         """
         This class adds a review step after the SentenceFrameExtractor.
         For each sentence, the review process asks LLM to review its output and:
@@ -1561,12 +1446,11 @@ class SentenceReviewFrameExtractor(ReviewFrameExtractor):
                          review_mode=review_mode,
                          review_prompt=review_prompt,
                          system_prompt=system_prompt,
-                         context_chunker=context_chunker,
-                         **kwrs)
+                         context_chunker=context_chunker)
 class RelationExtractor(Extractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
         """
         This is the abstract class for relation extraction.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1582,8 +1466,7 @@ class RelationExtractor(Extractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
     def _get_ROI(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
                  text:str, buffer_size:int=100) -> str:
@@ -1659,7 +1542,7 @@ class RelationExtractor(Extractor):
 class BinaryRelationExtractor(RelationExtractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_func: Callable,
-                 system_prompt:str=None, **kwrs):
+                 system_prompt:str=None):
         """
         This class extracts binary (yes/no) relations between two entities.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1677,8 +1560,7 @@ class BinaryRelationExtractor(RelationExtractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
         if possible_relation_func:
             # Check if possible_relation_func is a function
@@ -1718,8 +1600,8 @@ class BinaryRelationExtractor(RelationExtractor):
         return False
-    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False,
+                return_messages_log:bool=False) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
         Outputs pairs that are related.
@@ -1730,11 +1612,7 @@ class BinaryRelationExtractor(RelationExtractor):
             a document with frames.
         buffer_size : int, Optional
             the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
@@ -1753,7 +1631,7 @@ class BinaryRelationExtractor(RelationExtractor):
             if pos_rel:
                 roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                if stream:
+                if verbose:
                     print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
                     print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
                 messages = []
@@ -1767,10 +1645,7 @@ class BinaryRelationExtractor(RelationExtractor):
                 gen_text = self.inference_engine.chat(
                                 messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                **kwrs
+                                verbose=verbose
                             )
                 rel_json = self._extract_json(gen_text)
                 if self._post_process(rel_json):
@@ -1785,8 +1660,8 @@ class BinaryRelationExtractor(RelationExtractor):
         return output
-    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                            temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
         """
         This is the asynchronous version of the extract() method.
@@ -1841,10 +1716,7 @@ class BinaryRelationExtractor(RelationExtractor):
                     task = asyncio.create_task(
                         self.inference_engine.chat_async(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            **kwrs
+                            messages=messages
                         )
                     )
                     tasks.append(task)
@@ -1866,9 +1738,9 @@ class BinaryRelationExtractor(RelationExtractor):
         return output
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
-                         stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                          concurrent:bool=False, concurrent_batch_size:int=32, verbose:bool=False,
+                          return_messages_log:bool=False) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
@@ -1878,15 +1750,11 @@ class BinaryRelationExtractor(RelationExtractor):
             a document with frames.
         buffer_size : int, Optional
             the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
         concurrent: bool, Optional
             if True, the extraction will be done in concurrent.
         concurrent_batch_size : int, Optional
             the number of frame pairs to process in concurrent.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
@@ -1901,31 +1769,25 @@ class BinaryRelationExtractor(RelationExtractor):
             raise ValueError("All frame_ids in the input document must be unique.")
         if concurrent:
-            if stream:
+            if verbose:
                 warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
             return asyncio.run(self.extract_async(doc=doc,
                                                   buffer_size=buffer_size,
-                                                  max_new_tokens=max_new_tokens,
-                                                  temperature=temperature,
                                                   concurrent_batch_size=concurrent_batch_size,
-                                                  return_messages_log=return_messages_log,
-                                                  **kwrs)
+                                                  return_messages_log=return_messages_log)
                                 )
         else:
             return self.extract(doc=doc,
                                 buffer_size=buffer_size,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                return_messages_log=return_messages_log,
-                                **kwrs)
+                                verbose=verbose,
+                                return_messages_log=return_messages_log)
 class MultiClassRelationExtractor(RelationExtractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_types_func: Callable,
-                 system_prompt:str=None, **kwrs):
+                 system_prompt:str=None):
         """
         This class extracts relations with relation types.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1944,8 +1806,7 @@ class MultiClassRelationExtractor(RelationExtractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
         if possible_relation_types_func:
             # Check if possible_relation_types_func is a function
@@ -1992,8 +1853,7 @@ class MultiClassRelationExtractor(RelationExtractor):
         return None
-    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False, return_messages_log:bool=False) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
@@ -2026,7 +1886,7 @@ class MultiClassRelationExtractor(RelationExtractor):
             if pos_rel_types:
                 roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                if stream:
+                if verbose:
                     print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
                     print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
                 messages = []
@@ -2041,10 +1901,8 @@ class MultiClassRelationExtractor(RelationExtractor):
                 gen_text = self.inference_engine.chat(
                                 messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                **kwrs
+                                stream=False,
+                                verbose=verbose
                             )
                 if return_messages_log:
@@ -2061,8 +1919,8 @@ class MultiClassRelationExtractor(RelationExtractor):
         return output
-    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                            temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
         """
         This is the asynchronous version of the extract() method.
@@ -2117,10 +1975,7 @@ class MultiClassRelationExtractor(RelationExtractor):
                                                                                                     )})
                     task = asyncio.create_task(
                         self.inference_engine.chat_async(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            **kwrs
+                            messages=messages
                         )
                     )
                     tasks.append(task)
@@ -2143,9 +1998,9 @@ class MultiClassRelationExtractor(RelationExtractor):
         return output
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
-                         stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                          concurrent:bool=False, concurrent_batch_size:int=32,
+                          verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
@@ -2178,24 +2033,18 @@ class MultiClassRelationExtractor(RelationExtractor):
             raise ValueError("All frame_ids in the input document must be unique.")
         if concurrent:
-            if stream:
+            if verbose:
                 warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
             return asyncio.run(self.extract_async(doc=doc,
                                                   buffer_size=buffer_size,
-                                                  max_new_tokens=max_new_tokens,
-                                                  temperature=temperature,
                                                   concurrent_batch_size=concurrent_batch_size,
-                                                  return_messages_log=return_messages_log,
-                                                  **kwrs)
+                                                  return_messages_log=return_messages_log)
                                 )
         else:
             return self.extract(doc=doc,
                                 buffer_size=buffer_size,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                return_messages_log=return_messages_log,
-                                **kwrs)
+                                verbose=verbose,
+                                return_messages_log=return_messages_log)

llm-ie 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

llm-ie 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl