PyPI - llm-ie - Versions diffs - 0.4.7__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

llm-ie 0.4.7py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

llm_ie/__init__.py +6 -4
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt +3 -0
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt +2 -0
llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +2 -1
llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +2 -1
llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +104 -86
llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt +163 -0
llm_ie/asset/prompt_guide/DirectFrameExtractor_prompt_guide.txt +163 -0
llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +103 -85
llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +103 -86
llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +103 -86
llm_ie/chunkers.py +191 -0
llm_ie/data_types.py +75 -1
llm_ie/engines.py +600 -262
llm_ie/extractors.py +859 -899
llm_ie/prompt_editor.py +45 -12
llm_ie-1.1.0.dist-info/METADATA +18 -0
llm_ie-1.1.0.dist-info/RECORD +27 -0
llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -217
llm_ie-0.4.7.dist-info/METADATA +0 -1219
llm_ie-0.4.7.dist-info/RECORD +0 -23
{llm_ie-0.4.7.dist-info → llm_ie-1.1.0.dist-info}/WHEEL +0 -0

llm_ie/extractors.py CHANGED Viewed

@@ -8,14 +8,16 @@ import warnings
 import itertools
 import asyncio
 import nest_asyncio
-from typing import Set, List, Dict, Tuple, Union, Callable
-from llm_ie.data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
+from typing import Any, Set, List, Dict, Tuple, Union, Callable, Generator, Optional
+from llm_ie.data_types import FrameExtractionUnit, FrameExtractionUnitResult, LLMInformationExtractionFrame, LLMInformationExtractionDocument
+from llm_ie.chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker
+from llm_ie.chunkers import ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
 from llm_ie.engines import InferenceEngine
-from colorama import Fore, Style
+from colorama import Fore, Style
 class Extractor:
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
         """
         This is the abstract class for (frame and relation) extractors.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -38,15 +40,46 @@ class Extractor:
     def get_prompt_guide(cls) -> str:
         """
         This method returns the pre-defined prompt guideline for the extractor from the package asset.
+        It searches for a guide specific to the current class first, if not found, it will search
+        for the guide in its ancestors by traversing the class's method resolution order (MRO).
         """
-        # Check if the prompt guide is available
-        file_path = importlib.resources.files('llm_ie.asset.prompt_guide').joinpath(f"{cls.__name__}_prompt_guide.txt")
-        try:
-            with open(file_path, 'r', encoding="utf-8") as f:
-                return f.read()
-        except FileNotFoundError:
-            warnings.warn(f"Prompt guide for {cls.__name__} is not available. Is it a customed extractor?", UserWarning)
-            return None
+        original_class_name = cls.__name__
+        for current_class_in_mro in cls.__mro__:
+            if current_class_in_mro is object:
+                continue
+            current_class_name = current_class_in_mro.__name__
+            try:
+                file_path_obj = importlib.resources.files('llm_ie.asset.prompt_guide').joinpath(f"{current_class_name}_prompt_guide.txt")
+                with open(file_path_obj, 'r', encoding="utf-8") as f:
+                    prompt_content = f.read()
+                    # If the guide was found for an ancestor, not the original class, issue a warning.
+                    if cls is not current_class_in_mro:
+                        warnings.warn(
+                            f"Prompt guide for '{original_class_name}' not found. "
+                            f"Using guide from ancestor: '{current_class_name}_prompt_guide.txt'.",
+                            UserWarning
+                        )
+                    return prompt_content
+            except FileNotFoundError:
+                pass
+            except Exception as e:
+                warnings.warn(
+                    f"Error attempting to read prompt guide for '{current_class_name}' "
+                    f"from '{str(file_path_obj)}': {e}. Trying next in MRO.",
+                    UserWarning
+                )
+                continue
+        # If the loop completes, no prompt guide was found for the original class or any of its ancestors.
+        raise FileNotFoundError(
+            f"Prompt guide for '{original_class_name}' not found in the package asset. "
+            f"Is it a custom extractor?"
+        )
     def _get_user_prompt(self, text_content:Union[str, Dict[str,str]]) -> str:
         """
@@ -138,7 +171,8 @@ class Extractor:
 class FrameExtractor(Extractor):
     from nltk.tokenize import RegexpTokenizer
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
+                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None):
         """
         This is the abstract class for frame extraction.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -147,15 +181,24 @@ class FrameExtractor(Extractor):
         ----------
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
         prompt_template : str
             prompt template with "{{<placeholder name>}}" placeholder.
         system_prompt : str, Optional
             system prompt.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
+        self.unit_chunker = unit_chunker
+        if context_chunker is None:
+            self.context_chunker = NoContextChunker()
+        else:
+            self.context_chunker = context_chunker
         self.tokenizer = self.RegexpTokenizer(r'\w+|[^\w\s]')
@@ -288,7 +331,7 @@ class FrameExtractor(Extractor):
         return entity_spans
     @abc.abstractmethod
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, return_messages_log:bool=False, **kwrs) -> str:
+    def extract(self, text_content:Union[str, Dict[str,str]], return_messages_log:bool=False, **kwrs) -> str:
         """
         This method inputs text content and outputs a string generated by LLM
@@ -298,8 +341,6 @@ class FrameExtractor(Extractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
@@ -310,7 +351,7 @@ class FrameExtractor(Extractor):
     @abc.abstractmethod
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=2048,
+    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str,
                        document_key:str=None, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
         """
         This method inputs text content and outputs a list of LLMInformationExtractionFrame
@@ -324,8 +365,6 @@ class FrameExtractor(Extractor):
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
         entity_key : str
             the key (in ouptut JSON) for entity text. Any extraction that does not include entity key will be dropped.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
@@ -338,209 +377,37 @@ class FrameExtractor(Extractor):
         return NotImplemented
-class BasicFrameExtractor(FrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
-        """
-        This class diretly prompt LLM for frame extraction.
-        Input system prompt (optional), prompt template (with instruction, few-shot examples),
-        and specify a LLM.
-        Parameters:
-        ----------
-        inference_engine : InferenceEngine
-            the LLM inferencing engine object. Must implements the chat() method.
-        prompt_template : str
-            prompt template with "{{<placeholder name>}}" placeholder.
-        system_prompt : str, Optional
-            system prompt.
-        """
-        super().__init__(inference_engine=inference_engine,
-                         prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
-                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> str:
-        """
-        This method inputs a text and outputs a string generated by LLM.
-        Parameters:
-        ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
-        """
-        messages = []
-        if self.system_prompt:
-            messages.append({'role': 'system', 'content': self.system_prompt})
-        messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
-        response = self.inference_engine.chat(
-                    messages=messages,
-                    max_new_tokens=max_new_tokens,
-                    temperature=temperature,
-                    stream=stream,
-                    **kwrs
-                )
-        if return_messages_log:
-            messages.append({"role": "assistant", "content": response})
-            messages_log = [messages]
-            return response, messages_log
-        return response
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=2048,
-                       temperature:float=0.0, document_key:str=None, stream:bool=False,
-                       case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2,
-                       fuzzy_score_cutoff:float=0.8, allow_overlap_entities:bool=False,
-                       return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
-        """
-        This method inputs a text and outputs a list of LLMInformationExtractionFrame
-        It use the extract() method and post-process outputs into frames.
-        Parameters:
-        ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        entity_key : str
-            the key (in ouptut JSON) for entity text. Any extraction that does not include entity key will be dropped.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        document_key : str, Optional
-            specify the key in text_content where document text is.
-            If text_content is str, this parameter will be ignored.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        case_sensitive : bool, Optional
-            if True, entity text matching will be case-sensitive.
-        fuzzy_match : bool, Optional
-            if True, fuzzy matching will be applied to find entity text.
-        fuzzy_buffer_size : float, Optional
-            the buffer size for fuzzy matching. Default is 20% of entity text length.
-        fuzzy_score_cutoff : float, Optional
-            the Jaccard score cutoff for fuzzy matching.
-            Matched entity text must have a score higher than this value or a None will be returned.
-        allow_overlap_entities : bool, Optional
-            if True, entities can overlap in the text.
-            Note that this can cause multiple frames to be generated on the same entity span if they have same entity text.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : str
-            a list of frames.
-        """
-        if isinstance(text_content, str):
-            text = text_content
-        elif isinstance(text_content, dict):
-            if document_key is None:
-                raise ValueError("document_key must be provided when text_content is dict.")
-            text = text_content[document_key]
-        frame_list = []
-        extraction_results = self.extract(text_content=text_content,
-                                    max_new_tokens=max_new_tokens,
-                                    temperature=temperature,
-                                    stream=stream,
-                                    return_messages_log=return_messages_log,
-                                    **kwrs)
-        gen_text, messages_log = extraction_results if return_messages_log else (extraction_results, None)
-        entity_json = []
-        for entity in self._extract_json(gen_text=gen_text):
-            if entity_key in entity:
-                entity_json.append(entity)
-            else:
-                warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
-        spans = self._find_entity_spans(text=text,
-                                        entities=[e[entity_key] for e in entity_json],
-                                        case_sensitive=case_sensitive,
-                                        fuzzy_match=fuzzy_match,
-                                        fuzzy_buffer_size=fuzzy_buffer_size,
-                                        fuzzy_score_cutoff=fuzzy_score_cutoff,
-                                        allow_overlap_entities=allow_overlap_entities)
-        for i, (ent, span) in enumerate(zip(entity_json, spans)):
-            if span is not None:
-                start, end = span
-                frame = LLMInformationExtractionFrame(frame_id=f"{i}",
-                            start=start,
-                            end=end,
-                            entity_text=text[start:end],
-                            attr={k: v for k, v in ent.items() if k != entity_key and v != ""})
-                frame_list.append(frame)
-        if return_messages_log:
-            return frame_list, messages_log
-        return frame_list
-class ReviewFrameExtractor(BasicFrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
-                 review_mode:str, review_prompt:str=None,system_prompt:str=None, **kwrs):
+class DirectFrameExtractor(FrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
+                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None):
         """
-        This class add a review step after the BasicFrameExtractor.
-        The Review process asks LLM to review its output and:
-            1. add more frames while keep current. This is efficient for boosting recall.
-            2. or, regenerate frames (add new and delete existing).
-        Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
+        This class is for general unit-context frame extraction.
+        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
         Parameters:
         ----------
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
         prompt_template : str
             prompt template with "{{<placeholder name>}}" placeholder.
-        review_prompt : str: Optional
-            the prompt text that ask LLM to review. Specify addition or revision in the instruction.
-            if not provided, a default review prompt will be used.
-        review_mode : str
-            review mode. Must be one of {"addition", "revision"}
-            addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
         system_prompt : str, Optional
             system prompt.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
         """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, **kwrs)
-        if review_mode not in {"addition", "revision"}:
-            raise ValueError('review_mode must be one of {"addition", "revision"}.')
-        self.review_mode = review_mode
-        if review_prompt:
-            self.review_prompt = review_prompt
-        else:
-            file_path = importlib.resources.files('llm_ie.asset.default_prompts').\
-                joinpath(f"{self.__class__.__name__}_{self.review_mode}_review_prompt.txt")
-            with open(file_path, 'r', encoding="utf-8") as f:
-                self.review_prompt = f.read()
-            warnings.warn(f'Custom review prompt not provided. The default review prompt is used:\n"{self.review_prompt}"', UserWarning)
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=unit_chunker,
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker)
     def extract(self, text_content:Union[str, Dict[str,str]],
-                max_new_tokens:int=4096, temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> str:
+                document_key:str=None, verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
         """
-        This method inputs a text and outputs a string generated by LLM.
+        This method inputs a text and outputs a list of outputs per unit.
         Parameters:
         ----------
@@ -548,249 +415,190 @@ class ReviewFrameExtractor(BasicFrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
-        """
-        messages = []
-        if self.system_prompt:
-            messages.append({'role': 'system', 'content': self.system_prompt})
-        messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
-        # Initial output
-        if stream:
-            print(f"{Fore.BLUE}Initial Output:{Style.RESET_ALL}")
-        initial = self.inference_engine.chat(
-                        messages=messages,
-                        max_new_tokens=max_new_tokens,
-                        temperature=temperature,
-                        stream=stream,
-                        **kwrs
-                    )
-        # Review
-        messages.append({'role': 'assistant', 'content': initial})
-        messages.append({'role': 'user', 'content': self.review_prompt})
-        if stream:
-            print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
-        review = self.inference_engine.chat(
-                        messages=messages,
-                        max_new_tokens=max_new_tokens,
-                        temperature=temperature,
-                        stream=stream,
-                        **kwrs
-                    )
-        # Output
-        output_text = ""
-        if self.review_mode == "revision":
-            output_text = review
-        elif self.review_mode == "addition":
-            output_text = initial + '\n' + review
-        if return_messages_log:
-            messages.append({"role": "assistant", "content": review})
-            messages_log = [messages]
-            return output_text, messages_log
-        return output_text
-class SentenceFrameExtractor(FrameExtractor):
-    from nltk.tokenize.punkt import PunktSentenceTokenizer
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
-        """
-        This class performs sentence-by-sentence information extraction.
-        The process is as follows:
-            1. system prompt (optional)
-            2. user prompt with instructions (schema, background, full text, few-shot example...)
-            3. feed a sentence (start with first sentence)
-            4. LLM extract entities and attributes from the sentence
-            5. repeat #3 and #4
-        Input system prompt (optional), prompt template (with user instructions),
-        and specify a LLM.
-        Parameters:
-        ----------
-        inference_engine : InferenceEngine
-            the LLM inferencing engine object. Must implements the chat() method.
-        prompt_template : str
-            prompt template with "{{<placeholder name>}}" placeholder.
-        system_prompt : str, Optional
-            system prompt.
-        context_sentences : Union[str, int], Optional
-            number of sentences before and after the given sentence to provide additional context.
-            if "all", the full text will be provided in the prompt as context.
-            if 0, no additional context will be provided.
-                This is good for tasks that does not require context beyond the given sentence.
-            if > 0, the number of sentences before and after the given sentence to provide as context.
-                This is good for tasks that require context beyond the given sentence.
-        """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, **kwrs)
-        if not isinstance(context_sentences, int) and context_sentences != "all":
-            raise ValueError('context_sentences must be an integer (>= 0) or "all".')
-        if isinstance(context_sentences, int) and context_sentences < 0:
-            raise ValueError("context_sentences must be a positive integer.")
-        self.context_sentences =context_sentences
-    def _get_sentences(self, text:str) -> List[Dict[str,str]]:
-        """
-        This method sentence tokenize the input text into a list of sentences
-        as dict of {start, end, sentence_text}
-        Parameters:
-        ----------
-        text : str
-            text to sentence tokenize.
-        Returns : List[Dict[str,str]]
-            a list of sentences as dict with keys: {"sentence_text", "start", "end"}.
-        """
-        sentences = []
-        for start, end in self.PunktSentenceTokenizer().span_tokenize(text):
-            sentences.append({"sentence_text": text[start:end],
-                            "start": start,
-                            "end": end})
-        return sentences
-    def _get_context_sentences(self, text_content, i:int, sentences:List[Dict[str, str]], document_key:str=None) -> str:
-        """
-        This function returns the context sentences for the current sentence of interest (i).
-        """
-        if self.context_sentences == "all":
-            context = text_content if isinstance(text_content, str) else text_content[document_key]
-        elif self.context_sentences == 0:
-            context = ""
-        else:
-            start = max(0, i - self.context_sentences)
-            end = min(i + 1 + self.context_sentences, len(sentences))
-            context = " ".join([s['sentence_text'] for s in sentences[start:end]])
-        return context
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict[str,str]]:
-        """
-        This method inputs a text and outputs a list of outputs per sentence.
-        Parameters:
-        ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
         """
         # define output
         output = []
-        # sentence tokenization
+        # unit chunking
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
+            doc_text = text_content[document_key]
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # messages log
         if return_messages_log:
             messages_log = []
-        # generate sentence by sentence
-        for i, sent in enumerate(sentences):
+        # generate unit by unit
+        for i, unit in enumerate(units):
             # construct chat messages
             messages = []
             if self.system_prompt:
                 messages.append({'role': 'system', 'content': self.system_prompt})
-            context = self._get_context_sentences(text_content, i, sentences, document_key)
+            context = self.context_chunker.chunk(unit)
-            if self.context_sentences == 0:
-                # no context, just place sentence of interest
+            if context == "":
+                # no context, just place unit in user prompt
                 if isinstance(text_content, str):
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
                 else:
-                    sentence_content = text_content.copy()
-                    sentence_content[document_key] = sent['sentence_text']
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sentence_content)})
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
             else:
-                # insert context
+                # insert context to user prompt
                 if isinstance(text_content, str):
                     messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
                 else:
                     context_content = text_content.copy()
                     context_content[document_key] = context
                     messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
-                # simulate conversation
-                messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                # place sentence of interest
-                messages.append({'role': 'user', 'content': sent['sentence_text']})
-            if stream:
-                print(f"\n\n{Fore.GREEN}Sentence {i}:{Style.RESET_ALL}\n{sent['sentence_text']}\n")
-                if isinstance(self.context_sentences, int) and self.context_sentences > 0:
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            if verbose:
+                print(f"\n\n{Fore.GREEN}Unit {i}:{Style.RESET_ALL}\n{unit.text}\n")
+                if context != "":
                     print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
                 print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
             gen_text = self.inference_engine.chat(
                             messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=stream,
-                            **kwrs
+                            verbose=verbose,
+                            stream=False
                         )
             if return_messages_log:
                 messages.append({"role": "assistant", "content": gen_text})
                 messages_log.append(messages)
             # add to output
-            output.append({'sentence_start': sent['start'],
-                            'sentence_end': sent['end'],
-                            'sentence_text': sent['sentence_text'],
-                            'gen_text': gen_text})
+            result = FrameExtractionUnitResult(
+                            start=unit.start,
+                            end=unit.end,
+                            text=unit.text,
+                            gen_text=gen_text)
+            output.append(result)
         if return_messages_log:
             return output, messages_log
         return output
+    def stream(self, text_content: Union[str, Dict[str, str]],
+               document_key: str = None) -> Generator[Dict[str, Any], None, List[FrameExtractionUnitResult]]:
+        """
+        Streams LLM responses per unit with structured event types,
+        and returns collected data for post-processing.
+        Yields:
+        -------
+        Dict[str, Any]: (type, data)
+            - {"type": "info", "data": str_message}: General informational messages.
+            - {"type": "unit", "data": dict_unit_info}: Signals start of a new unit. dict_unit_info contains {'id', 'text', 'start', 'end'}
+            - {"type": "context", "data": str_context}: Context string for the current unit.
+            - {"type": "reasoning", "data": str_chunk}: A reasoning model thinking chunk from the LLM.
+            - {"type": "response", "data": str_chunk}: A response/answer chunk from the LLM.
+        Returns:
+        --------
+        List[FrameExtractionUnitResult]:
+            A list of FrameExtractionUnitResult objects, each containing the
+            original unit details and the fully accumulated 'gen_text' from the LLM.
+        """
+        collected_results: List[FrameExtractionUnitResult] = []
+        if isinstance(text_content, str):
+            doc_text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            if document_key not in text_content:
+                raise ValueError(f"document_key '{document_key}' not found in text_content.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
+        units: List[FrameExtractionUnit] = self.unit_chunker.chunk(doc_text)
+        self.context_chunker.fit(doc_text, units)
+        yield {"type": "info", "data": f"Starting LLM processing for {len(units)} units."}
-    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, concurrent_batch_size:int=32,
-                return_messages_log:bool=False, **kwrs) -> List[Dict[str,str]]:
+        for i, unit in enumerate(units):
+            unit_info_payload = {"id": i, "text": unit.text, "start": unit.start, "end": unit.end}
+            yield {"type": "unit", "data": unit_info_payload}
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context_str = self.context_chunker.chunk(unit)
+            # Construct prompt input based on whether text_content was str or dict
+            if context_str:
+                yield {"type": "context", "data": context_str}
+                prompt_input_for_context = context_str
+                if isinstance(text_content, dict):
+                    context_content_dict = text_content.copy()
+                    context_content_dict[document_key] = context_str
+                    prompt_input_for_context = context_content_dict
+                messages.append({'role': 'user', 'content': self._get_user_prompt(prompt_input_for_context)})
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                messages.append({'role': 'user', 'content': unit.text})
+            else: # No context
+                prompt_input_for_unit = unit.text
+                if isinstance(text_content, dict):
+                    unit_content_dict = text_content.copy()
+                    unit_content_dict[document_key] = unit.text
+                    prompt_input_for_unit = unit_content_dict
+                messages.append({'role': 'user', 'content': self._get_user_prompt(prompt_input_for_unit)})
+            current_gen_text = ""
+            response_stream = self.inference_engine.chat(
+                messages=messages,
+                stream=True
+            )
+            for chunk in response_stream:
+                yield chunk
+                current_gen_text += chunk
+            # Store the result for this unit
+            result_for_unit = FrameExtractionUnitResult(
+                start=unit.start,
+                end=unit.end,
+                text=unit.text,
+                gen_text=current_gen_text
+            )
+            collected_results.append(result_for_unit)
+        yield {"type": "info", "data": "All units processed by LLM."}
+        return collected_results
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
         """
-        The asynchronous version of the extract() method.
+        This is the asynchronous version of the extract() method.
         Parameters:
         ----------
@@ -798,109 +606,126 @@ class SentenceFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         concurrent_batch_size : int, Optional
-            the number of sentences to process in concurrent.
+            the batch size for concurrent processing.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
         """
-        # Check if self.inference_engine.chat_async() is implemented
-        if not hasattr(self.inference_engine, 'chat_async'):
-            raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
-        # define output
-        output = []
-        # sentence tokenization
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
+            if document_key not in text_content:
+                 raise ValueError(f"document_key '{document_key}' not found in text_content dictionary.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
-        if return_messages_log:
-            messages_log = []
+        units = self.unit_chunker.chunk(doc_text)
-        # generate sentence by sentence
-        for i in range(0, len(sentences), concurrent_batch_size):
-            tasks = []
-            batch = sentences[i:i + concurrent_batch_size]
-            batch_messages = []
-            for j, sent in enumerate(batch):
-                # construct chat messages
-                messages = []
-                if self.system_prompt:
-                    messages.append({'role': 'system', 'content': self.system_prompt})
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
-                context = self._get_context_sentences(text_content, i + j, sentences, document_key)
-                if self.context_sentences == 0:
-                    # no context, just place sentence of interest
-                    if isinstance(text_content, str):
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
-                    else:
-                        sentence_content = text_content.copy()
-                        sentence_content[document_key] = sent['sentence_text']
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(sentence_content)})
+        # Prepare inputs for all units first
+        tasks_input = []
+        for i, unit in enumerate(units):
+            # construct chat messages
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                 # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
                 else:
-                    # insert context
-                    if isinstance(text_content, str):
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                    else:
-                        context_content = text_content.copy()
-                        context_content[document_key] = context
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
-                    # simulate conversation
-                    messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                    # place sentence of interest
-                    messages.append({'role': 'user', 'content': sent['sentence_text']})
-                # add to tasks
-                task = asyncio.create_task(
-                    self.inference_engine.chat_async(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                **kwrs
-                            )
-                )
-                tasks.append(task)
-                batch_messages.append(messages)
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
-            # Wait until the batch is done, collect results and move on to next batch
-            responses = await asyncio.gather(*tasks)
+            # Store unit and messages together for the task
+            tasks_input.append({"unit": unit, "messages": messages, "original_index": i})
-            # Collect outputs
-            for gen_text, sent, messages in zip(responses, batch, batch_messages):
-                if return_messages_log:
-                    messages.append({"role": "assistant", "content": gen_text})
-                    messages_log.append(messages)
+        # Process units concurrently with asyncio.Semaphore
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def semaphore_helper(task_data: Dict, **kwrs):
+            unit = task_data["unit"]
+            messages = task_data["messages"]
+            original_index = task_data["original_index"]
+            async with semaphore:
+                gen_text = await self.inference_engine.chat_async(
+                    messages=messages
+                )
+            return {"original_index": original_index, "unit": unit, "gen_text": gen_text, "messages": messages}
+        # Create and gather tasks
+        tasks = []
+        for task_inp in tasks_input:
+            task = asyncio.create_task(semaphore_helper(
+                task_inp
+            ))
+            tasks.append(task)
+        results_raw = await asyncio.gather(*tasks)
+        # Sort results back into original order using the index stored
+        results_raw.sort(key=lambda x: x["original_index"])
+        # Restructure the results
+        output: List[FrameExtractionUnitResult] = []
+        messages_log: Optional[List[List[Dict[str, str]]]] = [] if return_messages_log else None
+        for result_data in results_raw:
+            unit = result_data["unit"]
+            gen_text = result_data["gen_text"]
+            # Create result object
+            result = FrameExtractionUnitResult(
+                start=unit.start,
+                end=unit.end,
+                text=unit.text,
+                gen_text=gen_text
+            )
+            output.append(result)
+            # Append to messages log if requested
+            if return_messages_log:
+                final_messages = result_data["messages"] + [{"role": "assistant", "content": gen_text}]
+                messages_log.append(final_messages)
-                output.append({'sentence_start': sent['start'],
-                                'sentence_end': sent['end'],
-                                'sentence_text': sent['sentence_text'],
-                                'gen_text': gen_text})
         if return_messages_log:
             return output, messages_log
-        return output
+        else:
+            return output
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=512,
-                        document_key:str=None, temperature:float=0.0, stream:bool=False,
-                        concurrent:bool=False, concurrent_batch_size:int=32,
+    def extract_frames(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                       verbose:bool=False, concurrent:bool=False, concurrent_batch_size:int=32,
                         case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2, fuzzy_score_cutoff:float=0.8,
-                        allow_overlap_entities:bool=False, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
+                        allow_overlap_entities:bool=False, return_messages_log:bool=False) -> List[LLMInformationExtractionFrame]:
         """
         This method inputs a text and outputs a list of LLMInformationExtractionFrame
         It use the extract() method and post-process outputs into frames.
@@ -911,16 +736,10 @@ class SentenceFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        entity_key : str
-            the key (in ouptut JSON) for entity text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         concurrent : bool, Optional
             if True, the sentences will be extracted in concurrent.
@@ -944,41 +763,36 @@ class SentenceFrameExtractor(FrameExtractor):
         Return : str
             a list of frames.
         """
+        ENTITY_KEY = "entity_text"
         if concurrent:
-            if stream:
-                warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
+            if verbose:
+                warnings.warn("verbose=True is not supported in concurrent mode.", RuntimeWarning)
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
             extraction_results = asyncio.run(self.extract_async(text_content=text_content,
-                                                max_new_tokens=max_new_tokens,
                                                 document_key=document_key,
-                                                temperature=temperature,
                                                 concurrent_batch_size=concurrent_batch_size,
-                                                return_messages_log=return_messages_log,
-                                                **kwrs)
+                                                return_messages_log=return_messages_log)
                                             )
         else:
             extraction_results = self.extract(text_content=text_content,
-                                            max_new_tokens=max_new_tokens,
-                                            document_key=document_key,
-                                            temperature=temperature,
-                                            stream=stream,
-                                            return_messages_log=return_messages_log,
-                                            **kwrs)
+                                                document_key=document_key,
+                                                verbose=verbose,
+                                                return_messages_log=return_messages_log)
-        llm_output_sentences, messages_log = extraction_results if return_messages_log else (extraction_results, None)
+        llm_output_results, messages_log = extraction_results if return_messages_log else (extraction_results, None)
         frame_list = []
-        for sent in llm_output_sentences:
+        for res in llm_output_results:
             entity_json = []
-            for entity in self._extract_json(gen_text=sent['gen_text']):
-                if entity_key in entity:
+            for entity in self._extract_json(gen_text=res.gen_text):
+                if ENTITY_KEY in entity:
                     entity_json.append(entity)
                 else:
-                    warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
+                    warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{ENTITY_KEY}"). This frame will be dropped.', RuntimeWarning)
-            spans = self._find_entity_spans(text=sent['sentence_text'],
-                                            entities=[e[entity_key] for e in entity_json],
+            spans = self._find_entity_spans(text=res.text,
+                                            entities=[e[ENTITY_KEY] for e in entity_json],
                                             case_sensitive=case_sensitive,
                                             fuzzy_match=fuzzy_match,
                                             fuzzy_buffer_size=fuzzy_buffer_size,
@@ -987,34 +801,41 @@ class SentenceFrameExtractor(FrameExtractor):
             for ent, span in zip(entity_json, spans):
                 if span is not None:
                     start, end = span
-                    entity_text = sent['sentence_text'][start:end]
-                    start += sent['sentence_start']
-                    end += sent['sentence_start']
+                    entity_text = res.text[start:end]
+                    start += res.start
+                    end += res.start
+                    attr = {}
+                    if "attr" in ent and ent["attr"] is not None:
+                        attr = ent["attr"]
                     frame = LLMInformationExtractionFrame(frame_id=f"{len(frame_list)}",
                                 start=start,
                                 end=end,
                                 entity_text=entity_text,
-                                attr={k: v for k, v in ent.items() if k != entity_key and v != ""})
+                                attr=attr)
                     frame_list.append(frame)
         if return_messages_log:
             return frame_list, messages_log
         return frame_list
-class SentenceReviewFrameExtractor(SentenceFrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
-                 review_mode:str, review_prompt:str=None, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
+class ReviewFrameExtractor(DirectFrameExtractor):
+    def __init__(self, unit_chunker:UnitChunker, context_chunker:ContextChunker, inference_engine:InferenceEngine,
+                 prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None):
         """
-        This class adds a review step after the SentenceFrameExtractor.
-        For each sentence, the review process asks LLM to review its output and:
-            1. add more frames while keeping current. This is efficient for boosting recall.
+        This class add a review step after the DirectFrameExtractor.
+        The Review process asks LLM to review its output and:
+            1. add more frames while keep current. This is efficient for boosting recall.
             2. or, regenerate frames (add new and delete existing).
         Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
         Parameters:
         ----------
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
         prompt_template : str
@@ -1027,36 +848,52 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
             addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
         system_prompt : str, Optional
             system prompt.
-        context_sentences : Union[str, int], Optional
-            number of sentences before and after the given sentence to provide additional context.
-            if "all", the full text will be provided in the prompt as context.
-            if 0, no additional context will be provided.
-                This is good for tasks that does not require context beyond the given sentence.
-            if > 0, the number of sentences before and after the given sentence to provide as context.
-                This is good for tasks that require context beyond the given sentence.
         """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, context_sentences=context_sentences, **kwrs)
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=unit_chunker,
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker)
+        # check review mode
         if review_mode not in {"addition", "revision"}:
             raise ValueError('review_mode must be one of {"addition", "revision"}.')
         self.review_mode = review_mode
+        # assign review prompt
         if review_prompt:
             self.review_prompt = review_prompt
         else:
-            file_path = importlib.resources.files('llm_ie.asset.default_prompts').\
-                joinpath(f"{self.__class__.__name__}_{self.review_mode}_review_prompt.txt")
-            with open(file_path, 'r', encoding="utf-8") as f:
-                self.review_prompt = f.read()
-            warnings.warn(f'Custom review prompt not provided. The default review prompt is used:\n"{self.review_prompt}"', UserWarning)
+            self.review_prompt = None
+            original_class_name = self.__class__.__name__
+            current_class_name = original_class_name
+            for current_class_in_mro in self.__class__.__mro__:
+                if current_class_in_mro is object:
+                    continue
+                current_class_name = current_class_in_mro.__name__
+                try:
+                    file_path = importlib.resources.files('llm_ie.asset.default_prompts').\
+                        joinpath(f"{self.__class__.__name__}_{self.review_mode}_review_prompt.txt")
+                    with open(file_path, 'r', encoding="utf-8") as f:
+                        self.review_prompt = f.read()
+                except FileNotFoundError:
+                    pass
+                except Exception as e:
+                    warnings.warn(
+                        f"Error attempting to read default review prompt for '{current_class_name}' "
+                        f"from '{str(file_path)}': {e}. Trying next in MRO.",
+                        UserWarning
+                    )
+                    continue
+        if self.review_prompt is None:
+            raise ValueError(f"Cannot find review prompt for {self.__class__.__name__} in the package. Please provide a review_prompt.")
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict[str,str]]:
+    def extract(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
         """
-        This method inputs a text and outputs a list of outputs per sentence.
+        This method inputs a text and outputs a list of outputs per unit.
         Parameters:
         ----------
@@ -1064,281 +901,468 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
         """
         # define output
         output = []
-        # sentence tokenization
+        # unit chunking
+        if isinstance(text_content, str):
+            doc_text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            doc_text = text_content[document_key]
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # messages log
+        if return_messages_log:
+            messages_log = []
+        # generate unit by unit
+        for i, unit in enumerate(units):
+            # <--- Initial generation step --->
+            # construct chat messages
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
+                else:
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            if verbose:
+                print(f"\n\n{Fore.GREEN}Unit {i}:{Style.RESET_ALL}\n{unit.text}\n")
+                if context != "":
+                    print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
+                print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
+            initial = self.inference_engine.chat(
+                            messages=messages,
+                            verbose=verbose,
+                            stream=False
+                        )
+            if return_messages_log:
+                messages.append({"role": "assistant", "content": initial})
+                messages_log.append(messages)
+            # <--- Review step --->
+            if verbose:
+                print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
+            messages.append({'role': 'assistant', 'content': initial})
+            messages.append({'role': 'user', 'content': self.review_prompt})
+            review = self.inference_engine.chat(
+                            messages=messages,
+                            verbose=verbose,
+                            stream=False
+                        )
+            # Output
+            if self.review_mode == "revision":
+                gen_text = review
+            elif self.review_mode == "addition":
+                gen_text = initial + '\n' + review
+            if return_messages_log:
+                messages.append({"role": "assistant", "content": review})
+                messages_log.append(messages)
+            # add to output
+            result = FrameExtractionUnitResult(
+                            start=unit.start,
+                            end=unit.end,
+                            text=unit.text,
+                            gen_text=gen_text)
+            output.append(result)
+        if return_messages_log:
+            return output, messages_log
+        return output
+    def stream(self, text_content:Union[str, Dict[str,str]], document_key:str=None) -> Generator[str, None, None]:
+        """
+        This method inputs a text and outputs a list of outputs per unit.
+        Parameters:
+        ----------
+        text_content : Union[str, Dict[str,str]]
+            the input text content to put in prompt template.
+            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
+            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
+        document_key : str, Optional
+            specify the key in text_content where document text is.
+            If text_content is str, this parameter will be ignored.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
+        """
+        # unit chunking
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
+            doc_text = text_content[document_key]
-        if return_messages_log:
-            messages_log = []
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
-        # generate sentence by sentence
-        for i, sent in enumerate(sentences):
+        # generate unit by unit
+        for i, unit in enumerate(units):
+            # <--- Initial generation step --->
             # construct chat messages
             messages = []
             if self.system_prompt:
                 messages.append({'role': 'system', 'content': self.system_prompt})
-            context = self._get_context_sentences(text_content, i, sentences, document_key)
+            context = self.context_chunker.chunk(unit)
-            if self.context_sentences == 0:
-                # no context, just place sentence of interest
+            if context == "":
+                # no context, just place unit in user prompt
                 if isinstance(text_content, str):
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
                 else:
-                    sentence_content = text_content.copy()
-                    sentence_content[document_key] = sent['sentence_text']
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sentence_content)})
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
             else:
-                # insert context
+                # insert context to user prompt
                 if isinstance(text_content, str):
                     messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
                 else:
                     context_content = text_content.copy()
                     context_content[document_key] = context
                     messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
-                # simulate conversation
-                messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                # place sentence of interest
-                messages.append({'role': 'user', 'content': sent['sentence_text']})
-            if stream:
-                print(f"\n\n{Fore.GREEN}Sentence {i}: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
-                if isinstance(self.context_sentences, int) and self.context_sentences > 0:
-                    print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
-                print(f"{Fore.BLUE}Initial Output:{Style.RESET_ALL}")
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
-            initial = self.inference_engine.chat(
+            yield f"\n\n{Fore.GREEN}Unit {i}:{Style.RESET_ALL}\n{unit.text}\n"
+            if context != "":
+                yield f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n"
+            yield f"{Fore.BLUE}Extraction:{Style.RESET_ALL}\n"
+            response_stream = self.inference_engine.chat(
                             messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=stream,
-                            **kwrs
+                            stream=True
                         )
-            # Review
-            if stream:
-                print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
+            initial = ""
+            for chunk in response_stream:
+                initial += chunk
+                yield chunk
+            # <--- Review step --->
+            yield f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}"
             messages.append({'role': 'assistant', 'content': initial})
             messages.append({'role': 'user', 'content': self.review_prompt})
-            review = self.inference_engine.chat(
+            response_stream = self.inference_engine.chat(
                             messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=stream,
-                            **kwrs
+                            stream=True
                         )
-            # Output
-            if self.review_mode == "revision":
-                gen_text = review
-            elif self.review_mode == "addition":
-                gen_text = initial + '\n' + review
+            for chunk in response_stream:
+                yield chunk
-            if return_messages_log:
-                messages.append({"role": "assistant", "content": review})
-                messages_log.append(messages)
-            # add to output
-            output.append({'sentence_start': sent['start'],
-                            'sentence_end': sent['end'],
-                            'sentence_text': sent['sentence_text'],
-                            'gen_text': gen_text})
-        if return_messages_log:
-            return output, messages_log
-        return output
-    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict[str,str]]:
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
         """
-        The asynchronous version of the extract() method.
+        This is the asynchronous version of the extract() method with the review step.
         Parameters:
         ----------
         text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
+            the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
         document_key : str, Optional
-            specify the key in text_content where document text is.
+            specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
         concurrent_batch_size : int, Optional
-            the number of sentences to process in concurrent.
+            the batch size for concurrent processing.
         return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
+            if True, a list of messages will be returned, including review steps.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit after review. Contains the start, end, text, and generated text.
         """
-        # Check if self.inference_engine.chat_async() is implemented
-        if not hasattr(self.inference_engine, 'chat_async'):
-            raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
-        # define output
-        output = []
-        # sentence tokenization
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
+            if document_key not in text_content:
+                 raise ValueError(f"document_key '{document_key}' not found in text_content dictionary.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
-        if return_messages_log:
-            messages_log = []
+        units = self.unit_chunker.chunk(doc_text)
-        # generate initial outputs sentence by sentence
-        for i in range(0, len(sentences), concurrent_batch_size):
-            messages_list = []
-            init_tasks = []
-            review_tasks = []
-            batch = sentences[i:i + concurrent_batch_size]
-            for j, sent in enumerate(batch):
-                # construct chat messages
-                messages = []
-                if self.system_prompt:
-                    messages.append({'role': 'system', 'content': self.system_prompt})
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
-                context = self._get_context_sentences(text_content, i + j, sentences, document_key)
-                if self.context_sentences == 0:
-                    # no context, just place sentence of interest
-                    if isinstance(text_content, str):
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
-                    else:
-                        sentence_content = text_content.copy()
-                        sentence_content[document_key] = sent['sentence_text']
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(sentence_content)})
+        # <--- Initial generation step --->
+        initial_tasks_input = []
+        for i, unit in enumerate(units):
+            # construct chat messages for initial generation
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                 # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
                 else:
-                    # insert context
-                    if isinstance(text_content, str):
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                    else:
-                        context_content = text_content.copy()
-                        context_content[document_key] = context
-                        messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
-                    # simulate conversation
-                    messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                    # place sentence of interest
-                    messages.append({'role': 'user', 'content': sent['sentence_text']})
-                messages_list.append(messages)
-                task = asyncio.create_task(
-                    self.inference_engine.chat_async(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                **kwrs
-                            )
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            # Store unit and messages together for the initial task
+            initial_tasks_input.append({"unit": unit, "messages": messages, "original_index": i})
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def initial_semaphore_helper(task_data: Dict):
+            unit = task_data["unit"]
+            messages = task_data["messages"]
+            original_index = task_data["original_index"]
+            async with semaphore:
+                gen_text = await self.inference_engine.chat_async(
+                    messages=messages
                 )
-                init_tasks.append(task)
-            # Wait until the batch is done, collect results and move on to next batch
-            init_responses = await asyncio.gather(*init_tasks)
-            # Collect initials
-            initials = []
-            for gen_text, sent, messages in zip(init_responses, batch, messages_list):
-                initials.append({'sentence_start': sent['start'],
-                                'sentence_end': sent['end'],
-                                'sentence_text': sent['sentence_text'],
-                                'gen_text': gen_text,
-                                'messages': messages})
-            # Review
-            for init in initials:
-                messages = init["messages"]
-                initial = init["gen_text"]
-                messages.append({'role': 'assistant', 'content': initial})
-                messages.append({'role': 'user', 'content': self.review_prompt})
-                task = asyncio.create_task(
-                                self.inference_engine.chat_async(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                **kwrs
-                                )
-                            )
-                review_tasks.append(task)
-            review_responses = await asyncio.gather(*review_tasks)
-            # Collect reviews
-            reviews = []
-            for gen_text, sent in zip(review_responses, batch):
-                reviews.append({'sentence_start': sent['start'],
-                                'sentence_end': sent['end'],
-                                'sentence_text': sent['sentence_text'],
-                                'gen_text': gen_text})
-            for init, rev in zip(initials, reviews):
-                if self.review_mode == "revision":
-                    gen_text = rev['gen_text']
-                elif self.review_mode == "addition":
-                    gen_text = init['gen_text'] + '\n' + rev['gen_text']
+            # Return initial generation result along with the messages used and the unit
+            return {"original_index": original_index, "unit": unit, "initial_gen_text": gen_text, "initial_messages": messages}
+        # Create and gather initial generation tasks
+        initial_tasks = [
+            asyncio.create_task(initial_semaphore_helper(
+                task_inp
+            ))
+            for task_inp in initial_tasks_input
+        ]
+        initial_results_raw = await asyncio.gather(*initial_tasks)
+        # Sort initial results back into original order
+        initial_results_raw.sort(key=lambda x: x["original_index"])
+        # <--- Review step --->
+        review_tasks_input = []
+        for result_data in initial_results_raw:
+            # Prepare messages for the review step
+            initial_messages = result_data["initial_messages"]
+            initial_gen_text = result_data["initial_gen_text"]
+            review_messages = initial_messages + [
+                {'role': 'assistant', 'content': initial_gen_text},
+                {'role': 'user', 'content': self.review_prompt}
+            ]
+            # Store data needed for review task
+            review_tasks_input.append({
+                "unit": result_data["unit"],
+                "initial_gen_text": initial_gen_text,
+                "messages": review_messages,
+                "original_index": result_data["original_index"],
+                "full_initial_log": initial_messages + [{'role': 'assistant', 'content': initial_gen_text}] if return_messages_log else None # Log up to initial generation
+            })
+        async def review_semaphore_helper(task_data: Dict, **kwrs):
+            messages = task_data["messages"]
+            original_index = task_data["original_index"]
+            async with semaphore:
+                review_gen_text = await self.inference_engine.chat_async(
+                    messages=messages
+                )
+            # Combine initial and review results
+            task_data["review_gen_text"] = review_gen_text
+            if return_messages_log:
+                # Log for the review call itself
+                 task_data["full_review_log"] = messages + [{'role': 'assistant', 'content': review_gen_text}]
+            return task_data # Return the augmented dictionary
-                if return_messages_log:
-                    messages = init["messages"]
-                    messages.append({"role": "assistant", "content": rev['gen_text']})
-                    messages_log.append(messages)
+        # Create and gather review tasks
+        review_tasks = [
+             asyncio.create_task(review_semaphore_helper(
+                task_inp
+            ))
+           for task_inp in review_tasks_input
+        ]
-                # add to output
-                output.append({'sentence_start': init['sentence_start'],
-                                'sentence_end': init['sentence_end'],
-                                'sentence_text': init['sentence_text'],
-                                'gen_text': gen_text})
-        if return_messages_log:
+        final_results_raw = await asyncio.gather(*review_tasks)
+        # Sort final results back into original order (although gather might preserve order for tasks added sequentially)
+        final_results_raw.sort(key=lambda x: x["original_index"])
+        # <--- Process final results --->
+        output: List[FrameExtractionUnitResult] = []
+        messages_log: Optional[List[List[Dict[str, str]]]] = [] if return_messages_log else None
+        for result_data in final_results_raw:
+            unit = result_data["unit"]
+            initial_gen = result_data["initial_gen_text"]
+            review_gen = result_data["review_gen_text"]
+            # Combine based on review mode
+            if self.review_mode == "revision":
+                final_gen_text = review_gen
+            elif self.review_mode == "addition":
+                final_gen_text = initial_gen + '\n' + review_gen
+            else: # Should not happen due to init check
+                final_gen_text = review_gen # Default to revision if mode is somehow invalid
+            # Create final result object
+            result = FrameExtractionUnitResult(
+                start=unit.start,
+                end=unit.end,
+                text=unit.text,
+                gen_text=final_gen_text # Use the combined/reviewed text
+            )
+            output.append(result)
+            # Append full conversation log if requested
+            if return_messages_log:
+                full_log_for_unit = result_data.get("full_initial_log", []) + [{'role': 'user', 'content': self.review_prompt}] + [{'role': 'assistant', 'content': review_gen}]
+                messages_log.append(full_log_for_unit)
+        if return_messages_log:
             return output, messages_log
-        return output
+        else:
+            return output
+class BasicFrameExtractor(DirectFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
+        """
+        This class diretly prompt LLM for frame extraction.
+        Input system prompt (optional), prompt template (with instruction, few-shot examples),
+        and specify a LLM.
+        Parameters:
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        system_prompt : str, Optional
+            system prompt.
+        """
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=WholeDocumentUnitChunker(),
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=NoContextChunker())
+class BasicReviewFrameExtractor(ReviewFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None):
+        """
+        This class add a review step after the BasicFrameExtractor.
+        The Review process asks LLM to review its output and:
+            1. add more frames while keep current. This is efficient for boosting recall.
+            2. or, regenerate frames (add new and delete existing).
+        Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
+        Parameters:
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        review_prompt : str: Optional
+            the prompt text that ask LLM to review. Specify addition or revision in the instruction.
+            if not provided, a default review prompt will be used.
+        review_mode : str
+            review mode. Must be one of {"addition", "revision"}
+            addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
+        system_prompt : str, Optional
+            system prompt.
+        """
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=WholeDocumentUnitChunker(),
+                         prompt_template=prompt_template,
+                         review_mode=review_mode,
+                         review_prompt=review_prompt,
+                         system_prompt=system_prompt,
+                         context_chunker=NoContextChunker())
-class SentenceCoTFrameExtractor(SentenceFrameExtractor):
-    from nltk.tokenize.punkt import PunktSentenceTokenizer
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
+class SentenceFrameExtractor(DirectFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
+                 context_sentences:Union[str, int]="all"):
         """
-        This class performs sentence-based Chain-of-thoughts (CoT) information extraction.
-        A simulated chat follows this process:
+        This class performs sentence-by-sentence information extraction.
+        The process is as follows:
             1. system prompt (optional)
-            2. user instructions (schema, background, full text, few-shot example...)
-            3. user input first sentence
-            4. assistant analyze the sentence
-            5. assistant extract outputs
-            6. repeat #3, #4, #5
+            2. user prompt with instructions (schema, background, full text, few-shot example...)
+            3. feed a sentence (start with first sentence)
+            4. LLM extract entities and attributes from the sentence
+            5. iterate to the next sentence and repeat steps 3-4 until all sentences are processed.
         Input system prompt (optional), prompt template (with user instructions),
         and specify a LLM.
-        Parameters
+        Parameters:
         ----------
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
@@ -1354,108 +1378,79 @@ class SentenceCoTFrameExtractor(SentenceFrameExtractor):
             if > 0, the number of sentences before and after the given sentence to provide as context.
                 This is good for tasks that require context beyond the given sentence.
         """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, context_sentences=context_sentences, **kwrs)
+        if not isinstance(context_sentences, int) and context_sentences != "all":
+            raise ValueError('context_sentences must be an integer (>= 0) or "all".')
+        if isinstance(context_sentences, int) and context_sentences < 0:
+            raise ValueError("context_sentences must be a positive integer.")
+        if isinstance(context_sentences, int):
+            context_chunker = SlideWindowContextChunker(window_size=context_sentences)
+        elif context_sentences == "all":
+            context_chunker = WholeDocumentContextChunker()
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=SentenceUnitChunker(),
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker)
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict[str,str]]:
+class SentenceReviewFrameExtractor(ReviewFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
+                 review_mode:str, review_prompt:str=None, system_prompt:str=None,
+                 context_sentences:Union[str, int]="all"):
         """
-        This method inputs a text and outputs a list of outputs per sentence.
+        This class adds a review step after the SentenceFrameExtractor.
+        For each sentence, the review process asks LLM to review its output and:
+            1. add more frames while keeping current. This is efficient for boosting recall.
+            2. or, regenerate frames (add new and delete existing).
+        Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
         Parameters:
         ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        document_key : str, Optional
-            specify the key in text_content where document text is.
-            If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        review_prompt : str: Optional
+            the prompt text that ask LLM to review. Specify addition or revision in the instruction.
+            if not provided, a default review prompt will be used.
+        review_mode : str
+            review mode. Must be one of {"addition", "revision"}
+            addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
+        system_prompt : str, Optional
+            system prompt.
+        context_sentences : Union[str, int], Optional
+            number of sentences before and after the given sentence to provide additional context.
+            if "all", the full text will be provided in the prompt as context.
+            if 0, no additional context will be provided.
+                This is good for tasks that does not require context beyond the given sentence.
+            if > 0, the number of sentences before and after the given sentence to provide as context.
+                This is good for tasks that require context beyond the given sentence.
         """
-        # define output
-        output = []
-        # sentence tokenization
-        if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
-        elif isinstance(text_content, dict):
-            sentences = self._get_sentences(text_content[document_key])
-        if return_messages_log:
-            messages_log = []
-        # generate sentence by sentence
-        for i, sent in enumerate(sentences):
-            # construct chat messages
-            messages = []
-            if self.system_prompt:
-                messages.append({'role': 'system', 'content': self.system_prompt})
-            context = self._get_context_sentences(text_content, i, sentences, document_key)
-            if self.context_sentences == 0:
-                # no context, just place sentence of interest
-                if isinstance(text_content, str):
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
-                else:
-                    sentence_content = text_content.copy()
-                    sentence_content[document_key] = sent['sentence_text']
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sentence_content)})
-            else:
-                # insert context
-                if isinstance(text_content, str):
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                else:
-                    context_content = text_content.copy()
-                    context_content[document_key] = context
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
-                # simulate conversation
-                messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                # place sentence of interest
-                messages.append({'role': 'user', 'content': sent['sentence_text']})
-            if stream:
-                print(f"\n\n{Fore.GREEN}Sentence: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
-                if isinstance(self.context_sentences, int) and self.context_sentences > 0:
-                    print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
-                print(f"{Fore.BLUE}CoT:{Style.RESET_ALL}")
-            gen_text = self.inference_engine.chat(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=stream,
-                            **kwrs
-                        )
-            if return_messages_log:
-                messages.append({"role": "assistant", "content": gen_text})
-                messages_log.append(messages)
-            # add to output
-            output.append({'sentence_start': sent['start'],
-                            'sentence_end': sent['end'],
-                            'sentence_text': sent['sentence_text'],
-                            'gen_text': gen_text})
+        if not isinstance(context_sentences, int) and context_sentences != "all":
+            raise ValueError('context_sentences must be an integer (>= 0) or "all".')
-        if return_messages_log:
-            return output, messages_log
-        return output
+        if isinstance(context_sentences, int) and context_sentences < 0:
+            raise ValueError("context_sentences must be a positive integer.")
+        if isinstance(context_sentences, int):
+            context_chunker = SlideWindowContextChunker(window_size=context_sentences)
+        elif context_sentences == "all":
+            context_chunker = WholeDocumentContextChunker()
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=SentenceUnitChunker(),
+                         prompt_template=prompt_template,
+                         review_mode=review_mode,
+                         review_prompt=review_prompt,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker)
 class RelationExtractor(Extractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
         """
         This is the abstract class for relation extraction.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1471,8 +1466,7 @@ class RelationExtractor(Extractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
     def _get_ROI(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
                  text:str, buffer_size:int=100) -> str:
@@ -1548,7 +1542,7 @@ class RelationExtractor(Extractor):
 class BinaryRelationExtractor(RelationExtractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_func: Callable,
-                 system_prompt:str=None, **kwrs):
+                 system_prompt:str=None):
         """
         This class extracts binary (yes/no) relations between two entities.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1566,8 +1560,7 @@ class BinaryRelationExtractor(RelationExtractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
         if possible_relation_func:
             # Check if possible_relation_func is a function
@@ -1607,8 +1600,8 @@ class BinaryRelationExtractor(RelationExtractor):
         return False
-    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False,
+                return_messages_log:bool=False) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
         Outputs pairs that are related.
@@ -1619,11 +1612,7 @@ class BinaryRelationExtractor(RelationExtractor):
             a document with frames.
         buffer_size : int, Optional
             the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
@@ -1642,7 +1631,7 @@ class BinaryRelationExtractor(RelationExtractor):
             if pos_rel:
                 roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                if stream:
+                if verbose:
                     print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
                     print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
                 messages = []
@@ -1656,10 +1645,7 @@ class BinaryRelationExtractor(RelationExtractor):
                 gen_text = self.inference_engine.chat(
                                 messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                **kwrs
+                                verbose=verbose
                             )
                 rel_json = self._extract_json(gen_text)
                 if self._post_process(rel_json):
@@ -1674,8 +1660,8 @@ class BinaryRelationExtractor(RelationExtractor):
         return output
-    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                            temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
         """
         This is the asynchronous version of the extract() method.
@@ -1730,10 +1716,7 @@ class BinaryRelationExtractor(RelationExtractor):
                     task = asyncio.create_task(
                         self.inference_engine.chat_async(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            **kwrs
+                            messages=messages
                         )
                     )
                     tasks.append(task)
@@ -1755,9 +1738,9 @@ class BinaryRelationExtractor(RelationExtractor):
         return output
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
-                         stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                          concurrent:bool=False, concurrent_batch_size:int=32, verbose:bool=False,
+                          return_messages_log:bool=False) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
@@ -1767,15 +1750,11 @@ class BinaryRelationExtractor(RelationExtractor):
             a document with frames.
         buffer_size : int, Optional
             the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
         concurrent: bool, Optional
             if True, the extraction will be done in concurrent.
         concurrent_batch_size : int, Optional
             the number of frame pairs to process in concurrent.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
@@ -1790,31 +1769,25 @@ class BinaryRelationExtractor(RelationExtractor):
             raise ValueError("All frame_ids in the input document must be unique.")
         if concurrent:
-            if stream:
+            if verbose:
                 warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
             return asyncio.run(self.extract_async(doc=doc,
                                                   buffer_size=buffer_size,
-                                                  max_new_tokens=max_new_tokens,
-                                                  temperature=temperature,
                                                   concurrent_batch_size=concurrent_batch_size,
-                                                  return_messages_log=return_messages_log,
-                                                  **kwrs)
+                                                  return_messages_log=return_messages_log)
                                 )
         else:
             return self.extract(doc=doc,
                                 buffer_size=buffer_size,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                return_messages_log=return_messages_log,
-                                **kwrs)
+                                verbose=verbose,
+                                return_messages_log=return_messages_log)
 class MultiClassRelationExtractor(RelationExtractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_types_func: Callable,
-                 system_prompt:str=None, **kwrs):
+                 system_prompt:str=None):
         """
         This class extracts relations with relation types.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1833,8 +1806,7 @@ class MultiClassRelationExtractor(RelationExtractor):
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
-                         system_prompt=system_prompt,
-                         **kwrs)
+                         system_prompt=system_prompt)
         if possible_relation_types_func:
             # Check if possible_relation_types_func is a function
@@ -1881,8 +1853,7 @@ class MultiClassRelationExtractor(RelationExtractor):
         return None
-    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False, return_messages_log:bool=False) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
@@ -1915,7 +1886,7 @@ class MultiClassRelationExtractor(RelationExtractor):
             if pos_rel_types:
                 roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                if stream:
+                if verbose:
                     print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
                     print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
                 messages = []
@@ -1930,10 +1901,8 @@ class MultiClassRelationExtractor(RelationExtractor):
                 gen_text = self.inference_engine.chat(
                                 messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                **kwrs
+                                stream=False,
+                                verbose=verbose
                             )
                 if return_messages_log:
@@ -1950,8 +1919,8 @@ class MultiClassRelationExtractor(RelationExtractor):
         return output
-    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                            temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
         """
         This is the asynchronous version of the extract() method.
@@ -2006,10 +1975,7 @@ class MultiClassRelationExtractor(RelationExtractor):
                                                                                                     )})
                     task = asyncio.create_task(
                         self.inference_engine.chat_async(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            **kwrs
+                            messages=messages
                         )
                     )
                     tasks.append(task)
@@ -2032,9 +1998,9 @@ class MultiClassRelationExtractor(RelationExtractor):
         return output
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
-                         stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
+                          concurrent:bool=False, concurrent_batch_size:int=32,
+                          verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
@@ -2067,24 +2033,18 @@ class MultiClassRelationExtractor(RelationExtractor):
             raise ValueError("All frame_ids in the input document must be unique.")
         if concurrent:
-            if stream:
+            if verbose:
                 warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
             return asyncio.run(self.extract_async(doc=doc,
                                                   buffer_size=buffer_size,
-                                                  max_new_tokens=max_new_tokens,
-                                                  temperature=temperature,
                                                   concurrent_batch_size=concurrent_batch_size,
-                                                  return_messages_log=return_messages_log,
-                                                  **kwrs)
+                                                  return_messages_log=return_messages_log)
                                 )
         else:
             return self.extract(doc=doc,
                                 buffer_size=buffer_size,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                return_messages_log=return_messages_log,
-                                **kwrs)
+                                verbose=verbose,
+                                return_messages_log=return_messages_log)

llm-ie 0.4.7__py3-none-any.whl → 1.1.0__py3-none-any.whl

llm-ie 0.4.7py3-none-any.whl → 1.1.0py3-none-any.whl