PyPI - llm-ie - Versions diffs - 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

llm-ie 0.4.6py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

llm_ie/__init__.py +4 -2
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt +3 -0
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt +2 -0
llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +2 -1
llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +2 -1
llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +104 -86
llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt +163 -0
llm_ie/asset/prompt_guide/DirectFrameExtractor_prompt_guide.txt +163 -0
llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +103 -85
llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +103 -86
llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +103 -86
llm_ie/chunkers.py +191 -0
llm_ie/data_types.py +75 -1
llm_ie/engines.py +274 -183
llm_ie/extractors.py +1062 -727
llm_ie/prompt_editor.py +39 -6
llm_ie-1.0.0.dist-info/METADATA +18 -0
llm_ie-1.0.0.dist-info/RECORD +27 -0
llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -217
llm_ie-0.4.6.dist-info/METADATA +0 -1215
llm_ie-0.4.6.dist-info/RECORD +0 -23
{llm_ie-0.4.6.dist-info → llm_ie-1.0.0.dist-info}/WHEEL +0 -0

llm_ie/extractors.py CHANGED Viewed

@@ -8,10 +8,12 @@ import warnings
 import itertools
 import asyncio
 import nest_asyncio
-from typing import Set, List, Dict, Tuple, Union, Callable
-from llm_ie.data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
+from typing import Any, Set, List, Dict, Tuple, Union, Callable, Generator, Optional
+from llm_ie.data_types import FrameExtractionUnit, FrameExtractionUnitResult, LLMInformationExtractionFrame, LLMInformationExtractionDocument
+from llm_ie.chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker
+from llm_ie.chunkers import ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
 from llm_ie.engines import InferenceEngine
-from colorama import Fore, Style
+from colorama import Fore, Style
 class Extractor:
@@ -38,15 +40,46 @@ class Extractor:
     def get_prompt_guide(cls) -> str:
         """
         This method returns the pre-defined prompt guideline for the extractor from the package asset.
+        It searches for a guide specific to the current class first, if not found, it will search
+        for the guide in its ancestors by traversing the class's method resolution order (MRO).
         """
-        # Check if the prompt guide is available
-        file_path = importlib.resources.files('llm_ie.asset.prompt_guide').joinpath(f"{cls.__name__}_prompt_guide.txt")
-        try:
-            with open(file_path, 'r', encoding="utf-8") as f:
-                return f.read()
-        except FileNotFoundError:
-            warnings.warn(f"Prompt guide for {cls.__name__} is not available. Is it a customed extractor?", UserWarning)
-            return None
+        original_class_name = cls.__name__
+        for current_class_in_mro in cls.__mro__:
+            if current_class_in_mro is object:
+                continue
+            current_class_name = current_class_in_mro.__name__
+            try:
+                file_path_obj = importlib.resources.files('llm_ie.asset.prompt_guide').joinpath(f"{current_class_name}_prompt_guide.txt")
+                with open(file_path_obj, 'r', encoding="utf-8") as f:
+                    prompt_content = f.read()
+                    # If the guide was found for an ancestor, not the original class, issue a warning.
+                    if cls is not current_class_in_mro:
+                        warnings.warn(
+                            f"Prompt guide for '{original_class_name}' not found. "
+                            f"Using guide from ancestor: '{current_class_name}_prompt_guide.txt'.",
+                            UserWarning
+                        )
+                    return prompt_content
+            except FileNotFoundError:
+                pass
+            except Exception as e:
+                warnings.warn(
+                    f"Error attempting to read prompt guide for '{current_class_name}' "
+                    f"from '{str(file_path_obj)}': {e}. Trying next in MRO.",
+                    UserWarning
+                )
+                continue
+        # If the loop completes, no prompt guide was found for the original class or any of its ancestors.
+        raise FileNotFoundError(
+            f"Prompt guide for '{original_class_name}' not found in the package asset. "
+            f"Is it a custom extractor?"
+        )
     def _get_user_prompt(self, text_content:Union[str, Dict[str,str]]) -> str:
         """
@@ -138,7 +171,8 @@ class Extractor:
 class FrameExtractor(Extractor):
     from nltk.tokenize import RegexpTokenizer
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+    def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
+                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None, **kwrs):
         """
         This is the abstract class for frame extraction.
         Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -147,15 +181,25 @@ class FrameExtractor(Extractor):
         ----------
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
         prompt_template : str
             prompt template with "{{<placeholder name>}}" placeholder.
         system_prompt : str, Optional
             system prompt.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
         """
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
                          system_prompt=system_prompt,
                          **kwrs)
+        self.unit_chunker = unit_chunker
+        if context_chunker is None:
+            self.context_chunker = NoContextChunker()
+        else:
+            self.context_chunker = context_chunker
         self.tokenizer = self.RegexpTokenizer(r'\w+|[^\w\s]')
@@ -288,7 +332,7 @@ class FrameExtractor(Extractor):
         return entity_spans
     @abc.abstractmethod
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, **kwrs) -> str:
+    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, return_messages_log:bool=False, **kwrs) -> str:
         """
         This method inputs text content and outputs a string generated by LLM
@@ -300,6 +344,8 @@ class FrameExtractor(Extractor):
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
         max_new_tokens : str, Optional
             the max number of new tokens LLM can generate.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : str
             the output from LLM. Need post-processing.
@@ -309,7 +355,7 @@ class FrameExtractor(Extractor):
     @abc.abstractmethod
     def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=2048,
-                       document_key:str=None, **kwrs) -> List[LLMInformationExtractionFrame]:
+                       document_key:str=None, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
         """
         This method inputs text content and outputs a list of LLMInformationExtractionFrame
         It use the extract() method and post-process outputs into frames.
@@ -327,6 +373,8 @@ class FrameExtractor(Extractor):
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : str
             a list of frames.
@@ -334,332 +382,38 @@ class FrameExtractor(Extractor):
         return NotImplemented
-class BasicFrameExtractor(FrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+class DirectFrameExtractor(FrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
+                 prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None, **kwrs):
         """
-        This class diretly prompt LLM for frame extraction.
-        Input system prompt (optional), prompt template (with instruction, few-shot examples),
-        and specify a LLM.
+        This class is for general unit-context frame extraction.
+        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
         Parameters:
         ----------
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
         prompt_template : str
             prompt template with "{{<placeholder name>}}" placeholder.
         system_prompt : str, Optional
             system prompt.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
         """
-        super().__init__(inference_engine=inference_engine,
-                         prompt_template=prompt_template,
-                         system_prompt=system_prompt,
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=unit_chunker,
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker,
                          **kwrs)
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
-                temperature:float=0.0, stream:bool=False, **kwrs) -> str:
-        """
-        This method inputs a text and outputs a string generated by LLM.
-        Parameters:
-        ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        Return : str
-            the output from LLM. Need post-processing.
-        """
-        messages = []
-        if self.system_prompt:
-            messages.append({'role': 'system', 'content': self.system_prompt})
-        messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
-        response = self.inference_engine.chat(
-                    messages=messages,
-                    max_new_tokens=max_new_tokens,
-                    temperature=temperature,
-                    stream=stream,
-                    **kwrs
-                )
-        return response
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=2048,
-                       temperature:float=0.0, document_key:str=None, stream:bool=False,
-                       case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2,
-                       fuzzy_score_cutoff:float=0.8, allow_overlap_entities:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
-        """
-        This method inputs a text and outputs a list of LLMInformationExtractionFrame
-        It use the extract() method and post-process outputs into frames.
-        Parameters:
-        ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        entity_key : str
-            the key (in ouptut JSON) for entity text. Any extraction that does not include entity key will be dropped.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        document_key : str, Optional
-            specify the key in text_content where document text is.
-            If text_content is str, this parameter will be ignored.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        case_sensitive : bool, Optional
-            if True, entity text matching will be case-sensitive.
-        fuzzy_match : bool, Optional
-            if True, fuzzy matching will be applied to find entity text.
-        fuzzy_buffer_size : float, Optional
-            the buffer size for fuzzy matching. Default is 20% of entity text length.
-        fuzzy_score_cutoff : float, Optional
-            the Jaccard score cutoff for fuzzy matching.
-            Matched entity text must have a score higher than this value or a None will be returned.
-        allow_overlap_entities : bool, Optional
-            if True, entities can overlap in the text.
-            Note that this can cause multiple frames to be generated on the same entity span if they have same entity text.
-        Return : str
-            a list of frames.
-        """
-        if isinstance(text_content, str):
-            text = text_content
-        elif isinstance(text_content, dict):
-            if document_key is None:
-                raise ValueError("document_key must be provided when text_content is dict.")
-            text = text_content[document_key]
-        frame_list = []
-        gen_text = self.extract(text_content=text_content,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                stream=stream,
-                                **kwrs)
-        entity_json = []
-        for entity in self._extract_json(gen_text=gen_text):
-            if entity_key in entity:
-                entity_json.append(entity)
-            else:
-                warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
-        spans = self._find_entity_spans(text=text,
-                                        entities=[e[entity_key] for e in entity_json],
-                                        case_sensitive=case_sensitive,
-                                        fuzzy_match=fuzzy_match,
-                                        fuzzy_buffer_size=fuzzy_buffer_size,
-                                        fuzzy_score_cutoff=fuzzy_score_cutoff,
-                                        allow_overlap_entities=allow_overlap_entities)
-        for i, (ent, span) in enumerate(zip(entity_json, spans)):
-            if span is not None:
-                start, end = span
-                frame = LLMInformationExtractionFrame(frame_id=f"{i}",
-                            start=start,
-                            end=end,
-                            entity_text=text[start:end],
-                            attr={k: v for k, v in ent.items() if k != entity_key and v != ""})
-                frame_list.append(frame)
-        return frame_list
-class ReviewFrameExtractor(BasicFrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
-                 review_mode:str, review_prompt:str=None,system_prompt:str=None, **kwrs):
-        """
-        This class add a review step after the BasicFrameExtractor.
-        The Review process asks LLM to review its output and:
-            1. add more frames while keep current. This is efficient for boosting recall.
-            2. or, regenerate frames (add new and delete existing).
-        Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
-        Parameters:
-        ----------
-        inference_engine : InferenceEngine
-            the LLM inferencing engine object. Must implements the chat() method.
-        prompt_template : str
-            prompt template with "{{<placeholder name>}}" placeholder.
-        review_prompt : str: Optional
-            the prompt text that ask LLM to review. Specify addition or revision in the instruction.
-            if not provided, a default review prompt will be used.
-        review_mode : str
-            review mode. Must be one of {"addition", "revision"}
-            addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
-        system_prompt : str, Optional
-            system prompt.
-        """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, **kwrs)
-        if review_mode not in {"addition", "revision"}:
-            raise ValueError('review_mode must be one of {"addition", "revision"}.')
-        self.review_mode = review_mode
-        if review_prompt:
-            self.review_prompt = review_prompt
-        else:
-            file_path = importlib.resources.files('llm_ie.asset.default_prompts').\
-                joinpath(f"{self.__class__.__name__}_{self.review_mode}_review_prompt.txt")
-            with open(file_path, 'r', encoding="utf-8") as f:
-                self.review_prompt = f.read()
-            warnings.warn(f'Custom review prompt not provided. The default review prompt is used:\n"{self.review_prompt}"', UserWarning)
-    def extract(self, text_content:Union[str, Dict[str,str]],
-                max_new_tokens:int=4096, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
-        """
-        This method inputs a text and outputs a string generated by LLM.
-        Parameters:
-        ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM can generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        Return : str
-            the output from LLM. Need post-processing.
-        """
-        messages = []
-        if self.system_prompt:
-            messages.append({'role': 'system', 'content': self.system_prompt})
-        messages.append({'role': 'user', 'content': self._get_user_prompt(text_content)})
-        # Initial output
-        if stream:
-            print(f"{Fore.BLUE}Initial Output:{Style.RESET_ALL}")
-        initial = self.inference_engine.chat(
-                        messages=messages,
-                        max_new_tokens=max_new_tokens,
-                        temperature=temperature,
-                        stream=stream,
-                        **kwrs
-                    )
-        # Review
-        messages.append({'role': 'assistant', 'content': initial})
-        messages.append({'role': 'user', 'content': self.review_prompt})
-        if stream:
-            print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
-        review = self.inference_engine.chat(
-                        messages=messages,
-                        max_new_tokens=max_new_tokens,
-                        temperature=temperature,
-                        stream=stream,
-                        **kwrs
-                    )
-        # Output
-        if self.review_mode == "revision":
-            return review
-        elif self.review_mode == "addition":
-            return initial + '\n' + review
-class SentenceFrameExtractor(FrameExtractor):
-    from nltk.tokenize.punkt import PunktSentenceTokenizer
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
-        """
-        This class performs sentence-by-sentence information extraction.
-        The process is as follows:
-            1. system prompt (optional)
-            2. user prompt with instructions (schema, background, full text, few-shot example...)
-            3. feed a sentence (start with first sentence)
-            4. LLM extract entities and attributes from the sentence
-            5. repeat #3 and #4
-        Input system prompt (optional), prompt template (with user instructions),
-        and specify a LLM.
-        Parameters:
-        ----------
-        inference_engine : InferenceEngine
-            the LLM inferencing engine object. Must implements the chat() method.
-        prompt_template : str
-            prompt template with "{{<placeholder name>}}" placeholder.
-        system_prompt : str, Optional
-            system prompt.
-        context_sentences : Union[str, int], Optional
-            number of sentences before and after the given sentence to provide additional context.
-            if "all", the full text will be provided in the prompt as context.
-            if 0, no additional context will be provided.
-                This is good for tasks that does not require context beyond the given sentence.
-            if > 0, the number of sentences before and after the given sentence to provide as context.
-                This is good for tasks that require context beyond the given sentence.
-        """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, **kwrs)
-        if not isinstance(context_sentences, int) and context_sentences != "all":
-            raise ValueError('context_sentences must be an integer (>= 0) or "all".')
-        if isinstance(context_sentences, int) and context_sentences < 0:
-            raise ValueError("context_sentences must be a positive integer.")
-        self.context_sentences =context_sentences
-    def _get_sentences(self, text:str) -> List[Dict[str,str]]:
-        """
-        This method sentence tokenize the input text into a list of sentences
-        as dict of {start, end, sentence_text}
-        Parameters:
-        ----------
-        text : str
-            text to sentence tokenize.
-        Returns : List[Dict[str,str]]
-            a list of sentences as dict with keys: {"sentence_text", "start", "end"}.
-        """
-        sentences = []
-        for start, end in self.PunktSentenceTokenizer().span_tokenize(text):
-            sentences.append({"sentence_text": text[start:end],
-                            "start": start,
-                            "end": end})
-        return sentences
-    def _get_context_sentences(self, text_content, i:int, sentences:List[Dict[str, str]], document_key:str=None) -> str:
-        """
-        This function returns the context sentences for the current sentence of interest (i).
-        """
-        if self.context_sentences == "all":
-            context = text_content if isinstance(text_content, str) else text_content[document_key]
-        elif self.context_sentences == 0:
-            context = ""
-        else:
-            start = max(0, i - self.context_sentences)
-            end = min(i + 1 + self.context_sentences, len(sentences))
-            context = " ".join([s['sentence_text'] for s in sentences[start:end]])
-        return context
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
+    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
+                document_key:str=None, temperature:float=0.0, verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
         """
-        This method inputs a text and outputs a list of outputs per sentence.
+        This method inputs a text and outputs a list of outputs per unit.
         Parameters:
         ----------
@@ -667,77 +421,211 @@ class SentenceFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
+        max_new_tokens : int, Optional
             the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
         temperature : float, Optional
             the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
         """
         # define output
         output = []
-        # sentence tokenization
+        # unit chunking
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
-        # generate sentence by sentence
-        for i, sent in enumerate(sentences):
+            doc_text = text_content[document_key]
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # messages log
+        if return_messages_log:
+            messages_log = []
+        # generate unit by unit
+        for i, unit in enumerate(units):
             # construct chat messages
             messages = []
             if self.system_prompt:
                 messages.append({'role': 'system', 'content': self.system_prompt})
-            context = self._get_context_sentences(text_content, i, sentences, document_key)
+            context = self.context_chunker.chunk(unit)
-            if self.context_sentences == 0:
-                # no context, just place sentence of interest
-                messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
+            if context == "":
+                # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
+                else:
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
             else:
-                # insert context
-                messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                # simulate conversation
-                messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                # place sentence of interest
-                messages.append({'role': 'user', 'content': sent['sentence_text']})
-            if stream:
-                print(f"\n\n{Fore.GREEN}Sentence {i}:{Style.RESET_ALL}\n{sent['sentence_text']}\n")
-                if isinstance(self.context_sentences, int) and self.context_sentences > 0:
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            if verbose:
+                print(f"\n\n{Fore.GREEN}Unit {i}:{Style.RESET_ALL}\n{unit.text}\n")
+                if context != "":
                     print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
                 print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
-            gen_text = self.inference_engine.chat(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=stream,
-                            **kwrs
-                        )
+                response_stream = self.inference_engine.chat(
+                                messages=messages,
+                                max_new_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=True,
+                                **kwrs
+                            )
+                gen_text = ""
+                for chunk in response_stream:
+                    gen_text += chunk
+                    print(chunk, end='', flush=True)
+            else:
+                gen_text = self.inference_engine.chat(
+                                messages=messages,
+                                max_new_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=False,
+                                **kwrs
+                            )
+            if return_messages_log:
+                messages.append({"role": "assistant", "content": gen_text})
+                messages_log.append(messages)
             # add to output
-            output.append({'sentence_start': sent['start'],
-                            'sentence_end': sent['end'],
-                            'sentence_text': sent['sentence_text'],
-                            'gen_text': gen_text})
+            result = FrameExtractionUnitResult(
+                            start=unit.start,
+                            end=unit.end,
+                            text=unit.text,
+                            gen_text=gen_text)
+            output.append(result)
+        if return_messages_log:
+            return output, messages_log
         return output
+    def stream(self, text_content: Union[str, Dict[str, str]], max_new_tokens: int = 2048, document_key: str = None,
+               temperature: float = 0.0, **kwrs) -> Generator[Dict[str, Any], None, List[FrameExtractionUnitResult]]:
+        """
+        Streams LLM responses per unit with structured event types,
+        and returns collected data for post-processing.
+        Yields:
+        -------
+        Dict[str, Any]: (type, data)
+            - {"type": "info", "data": str_message}: General informational messages.
+            - {"type": "unit", "data": dict_unit_info}: Signals start of a new unit. dict_unit_info contains {'id', 'text', 'start', 'end'}
+            - {"type": "context", "data": str_context}: Context string for the current unit.
+            - {"type": "llm_chunk", "data": str_chunk}: A raw chunk from the LLM.
-    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, concurrent_batch_size:int=32, **kwrs) -> List[Dict[str,str]]:
+        Returns:
+        --------
+        List[FrameExtractionUnitResult]:
+            A list of FrameExtractionUnitResult objects, each containing the
+            original unit details and the fully accumulated 'gen_text' from the LLM.
         """
-        The asynchronous version of the extract() method.
+        collected_results: List[FrameExtractionUnitResult] = []
+        if isinstance(text_content, str):
+            doc_text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            if document_key not in text_content:
+                raise ValueError(f"document_key '{document_key}' not found in text_content.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
+        units: List[FrameExtractionUnit] = self.unit_chunker.chunk(doc_text)
+        self.context_chunker.fit(doc_text, units)
+        yield {"type": "info", "data": f"Starting LLM processing for {len(units)} units."}
+        for i, unit in enumerate(units):
+            unit_info_payload = {"id": i, "text": unit.text, "start": unit.start, "end": unit.end}
+            yield {"type": "unit", "data": unit_info_payload}
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context_str = self.context_chunker.chunk(unit)
+            # Construct prompt input based on whether text_content was str or dict
+            if context_str:
+                yield {"type": "context", "data": context_str}
+                prompt_input_for_context = context_str
+                if isinstance(text_content, dict):
+                    context_content_dict = text_content.copy()
+                    context_content_dict[document_key] = context_str
+                    prompt_input_for_context = context_content_dict
+                messages.append({'role': 'user', 'content': self._get_user_prompt(prompt_input_for_context)})
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                messages.append({'role': 'user', 'content': unit.text})
+            else: # No context
+                prompt_input_for_unit = unit.text
+                if isinstance(text_content, dict):
+                    unit_content_dict = text_content.copy()
+                    unit_content_dict[document_key] = unit.text
+                    prompt_input_for_unit = unit_content_dict
+                messages.append({'role': 'user', 'content': self._get_user_prompt(prompt_input_for_unit)})
+            current_gen_text = ""
+            response_stream = self.inference_engine.chat(
+                messages=messages,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                stream=True,
+                **kwrs
+            )
+            for chunk in response_stream:
+                yield {"type": "llm_chunk", "data": chunk}
+                current_gen_text += chunk
+            # Store the result for this unit
+            result_for_unit = FrameExtractionUnitResult(
+                start=unit.start,
+                end=unit.end,
+                text=unit.text,
+                gen_text=current_gen_text
+            )
+            collected_results.append(result_for_unit)
+        yield {"type": "info", "data": "All units processed by LLM."}
+        return collected_results
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None, temperature:float=0.0,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
+        """
+        This is the asynchronous version of the extract() method.
         Parameters:
         ----------
@@ -745,7 +633,7 @@ class SentenceFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
+        max_new_tokens : int, Optional
             the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
@@ -753,73 +641,129 @@ class SentenceFrameExtractor(FrameExtractor):
         temperature : float, Optional
             the temperature for token sampling.
         concurrent_batch_size : int, Optional
-            the number of sentences to process in concurrent.
-        """
-        # Check if self.inference_engine.chat_async() is implemented
-        if not hasattr(self.inference_engine, 'chat_async'):
-            raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
+            the batch size for concurrent processing.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
-        # define output
-        output = []
-        # sentence tokenization
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
+        """
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
+            if document_key not in text_content:
+                 raise ValueError(f"document_key '{document_key}' not found in text_content dictionary.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
-        # generate sentence by sentence
-        for i in range(0, len(sentences), concurrent_batch_size):
-            tasks = []
-            batch = sentences[i:i + concurrent_batch_size]
-            for j, sent in enumerate(batch):
-                # construct chat messages
-                messages = []
-                if self.system_prompt:
-                    messages.append({'role': 'system', 'content': self.system_prompt})
+        units = self.unit_chunker.chunk(doc_text)
-                context = self._get_context_sentences(text_content, i + j, sentences, document_key)
-                if self.context_sentences == 0:
-                    # no context, just place sentence of interest
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # Prepare inputs for all units first
+        tasks_input = []
+        for i, unit in enumerate(units):
+            # construct chat messages
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                 # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
                 else:
-                    # insert context
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
                     messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                    # simulate conversation
-                    messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                    # place sentence of interest
-                    messages.append({'role': 'user', 'content': sent['sentence_text']})
-                # add to tasks
-                task = asyncio.create_task(
-                    self.inference_engine.chat_async(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                **kwrs
-                            )
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            # Store unit and messages together for the task
+            tasks_input.append({"unit": unit, "messages": messages, "original_index": i})
+        # Process units concurrently with asyncio.Semaphore
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
+            unit = task_data["unit"]
+            messages = task_data["messages"]
+            original_index = task_data["original_index"]
+            async with semaphore:
+                gen_text = await self.inference_engine.chat_async(
+                    messages=messages,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    **kwrs
                 )
-                tasks.append(task)
-            # Wait until the batch is done, collect results and move on to next batch
-            responses = await asyncio.gather(*tasks)
+            return {"original_index": original_index, "unit": unit, "gen_text": gen_text, "messages": messages}
+        # Create and gather tasks
+        tasks = []
+        for task_inp in tasks_input:
+            task = asyncio.create_task(semaphore_helper(
+                task_inp,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                **kwrs
+            ))
+            tasks.append(task)
+        results_raw = await asyncio.gather(*tasks)
+        # Sort results back into original order using the index stored
+        results_raw.sort(key=lambda x: x["original_index"])
+        # Restructure the results
+        output: List[FrameExtractionUnitResult] = []
+        messages_log: Optional[List[List[Dict[str, str]]]] = [] if return_messages_log else None
+        for result_data in results_raw:
+            unit = result_data["unit"]
+            gen_text = result_data["gen_text"]
+            # Create result object
+            result = FrameExtractionUnitResult(
+                start=unit.start,
+                end=unit.end,
+                text=unit.text,
+                gen_text=gen_text
+            )
+            output.append(result)
+            # Append to messages log if requested
+            if return_messages_log:
+                final_messages = result_data["messages"] + [{"role": "assistant", "content": gen_text}]
+                messages_log.append(final_messages)
+        if return_messages_log:
+            return output, messages_log
+        else:
+            return output
-            # Collect outputs
-            for gen_text, sent in zip(responses, batch):
-                output.append({'sentence_start': sent['start'],
-                                'sentence_end': sent['end'],
-                                'sentence_text': sent['sentence_text'],
-                                'gen_text': gen_text})
-        return output
-    def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=512,
-                        document_key:str=None, temperature:float=0.0, stream:bool=False,
+    def extract_frames(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
+                        document_key:str=None, temperature:float=0.0, verbose:bool=False,
                         concurrent:bool=False, concurrent_batch_size:int=32,
                         case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2, fuzzy_score_cutoff:float=0.8,
-                        allow_overlap_entities:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
+                        allow_overlap_entities:bool=False, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
         """
         This method inputs a text and outputs a list of LLMInformationExtractionFrame
         It use the extract() method and post-process outputs into frames.
@@ -830,8 +774,6 @@ class SentenceFrameExtractor(FrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        entity_key : str
-            the key (in ouptut JSON) for entity text.
         max_new_tokens : str, Optional
             the max number of new tokens LLM should generate.
         document_key : str, Optional
@@ -839,7 +781,7 @@ class SentenceFrameExtractor(FrameExtractor):
             If text_content is str, this parameter will be ignored.
         temperature : float, Optional
             the temperature for token sampling.
-        stream : bool, Optional
+        verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         concurrent : bool, Optional
             if True, the sentences will be extracted in concurrent.
@@ -857,40 +799,48 @@ class SentenceFrameExtractor(FrameExtractor):
         allow_overlap_entities : bool, Optional
             if True, entities can overlap in the text.
             Note that this can cause multiple frames to be generated on the same entity span if they have same entity text.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : str
             a list of frames.
         """
+        ENTITY_KEY = "entity_text"
         if concurrent:
-            if stream:
-                warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
+            if verbose:
+                warnings.warn("verbose=True is not supported in concurrent mode.", RuntimeWarning)
             nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
-            llm_output_sentences = asyncio.run(self.extract_async(text_content=text_content,
-                                        max_new_tokens=max_new_tokens,
-                                        document_key=document_key,
-                                        temperature=temperature,
-                                        concurrent_batch_size=concurrent_batch_size,
-                                        **kwrs)
-                                        )
+            extraction_results = asyncio.run(self.extract_async(text_content=text_content,
+                                                max_new_tokens=max_new_tokens,
+                                                document_key=document_key,
+                                                temperature=temperature,
+                                                concurrent_batch_size=concurrent_batch_size,
+                                                return_messages_log=return_messages_log,
+                                                **kwrs)
+                                            )
         else:
-            llm_output_sentences = self.extract(text_content=text_content,
-                                            max_new_tokens=max_new_tokens,
-                                            document_key=document_key,
-                                            temperature=temperature,
-                                            stream=stream,
-                                            **kwrs)
+            extraction_results = self.extract(text_content=text_content,
+                                                max_new_tokens=max_new_tokens,
+                                                document_key=document_key,
+                                                temperature=temperature,
+                                                verbose=verbose,
+                                                return_messages_log=return_messages_log,
+                                                **kwrs)
+        llm_output_results, messages_log = extraction_results if return_messages_log else (extraction_results, None)
         frame_list = []
-        for sent in llm_output_sentences:
+        for res in llm_output_results:
             entity_json = []
-            for entity in self._extract_json(gen_text=sent['gen_text']):
-                if entity_key in entity:
+            for entity in self._extract_json(gen_text=res.gen_text):
+                if ENTITY_KEY in entity:
                     entity_json.append(entity)
                 else:
-                    warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
+                    warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{ENTITY_KEY}"). This frame will be dropped.', RuntimeWarning)
-            spans = self._find_entity_spans(text=sent['sentence_text'],
-                                            entities=[e[entity_key] for e in entity_json],
+            spans = self._find_entity_spans(text=res.text,
+                                            entities=[e[ENTITY_KEY] for e in entity_json],
                                             case_sensitive=case_sensitive,
                                             fuzzy_match=fuzzy_match,
                                             fuzzy_buffer_size=fuzzy_buffer_size,
@@ -899,31 +849,41 @@ class SentenceFrameExtractor(FrameExtractor):
             for ent, span in zip(entity_json, spans):
                 if span is not None:
                     start, end = span
-                    entity_text = sent['sentence_text'][start:end]
-                    start += sent['sentence_start']
-                    end += sent['sentence_start']
+                    entity_text = res.text[start:end]
+                    start += res.start
+                    end += res.start
+                    attr = {}
+                    if "attr" in ent and ent["attr"] is not None:
+                        attr = ent["attr"]
                     frame = LLMInformationExtractionFrame(frame_id=f"{len(frame_list)}",
                                 start=start,
                                 end=end,
                                 entity_text=entity_text,
-                                attr={k: v for k, v in ent.items() if k != entity_key and v != ""})
+                                attr=attr)
                     frame_list.append(frame)
-        return frame_list
+        if return_messages_log:
+            return frame_list, messages_log
+        return frame_list
-class SentenceReviewFrameExtractor(SentenceFrameExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
-                 review_mode:str, review_prompt:str=None, system_prompt:str=None,
-                 context_sentences:Union[str, int]="all", **kwrs):
+class ReviewFrameExtractor(DirectFrameExtractor):
+    def __init__(self, unit_chunker:UnitChunker, context_chunker:ContextChunker,
+                 inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
         """
-        This class adds a review step after the SentenceFrameExtractor.
-        For each sentence, the review process asks LLM to review its output and:
-            1. add more frames while keeping current. This is efficient for boosting recall.
+        This class add a review step after the DirectFrameExtractor.
+        The Review process asks LLM to review its output and:
+            1. add more frames while keep current. This is efficient for boosting recall.
             2. or, regenerate frames (add new and delete existing).
         Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
         Parameters:
         ----------
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
         prompt_template : str
@@ -936,36 +896,215 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
             addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
         system_prompt : str, Optional
             system prompt.
-        context_sentences : Union[str, int], Optional
-            number of sentences before and after the given sentence to provide additional context.
-            if "all", the full text will be provided in the prompt as context.
-            if 0, no additional context will be provided.
-                This is good for tasks that does not require context beyond the given sentence.
-            if > 0, the number of sentences before and after the given sentence to provide as context.
-                This is good for tasks that require context beyond the given sentence.
         """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, context_sentences=context_sentences, **kwrs)
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=unit_chunker,
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker,
+                         **kwrs)
+        # check review mode
         if review_mode not in {"addition", "revision"}:
             raise ValueError('review_mode must be one of {"addition", "revision"}.')
         self.review_mode = review_mode
+        # assign review prompt
+        if review_prompt:
+            self.review_prompt = review_prompt
+        else:
+            self.review_prompt = None
+            original_class_name = self.__class__.__name__
+            current_class_name = original_class_name
+            for current_class_in_mro in self.__class__.__mro__:
+                if current_class_in_mro is object:
+                    continue
+                current_class_name = current_class_in_mro.__name__
+                try:
+                    file_path = importlib.resources.files('llm_ie.asset.default_prompts').\
+                        joinpath(f"{self.__class__.__name__}_{self.review_mode}_review_prompt.txt")
+                    with open(file_path, 'r', encoding="utf-8") as f:
+                        self.review_prompt = f.read()
+                except FileNotFoundError:
+                    pass
+                except Exception as e:
+                    warnings.warn(
+                        f"Error attempting to read default review prompt for '{current_class_name}' "
+                        f"from '{str(file_path)}': {e}. Trying next in MRO.",
+                        UserWarning
+                    )
+                    continue
+        if self.review_prompt is None:
+            raise ValueError(f"Cannot find review prompt for {self.__class__.__name__} in the package. Please provide a review_prompt.")
+    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None,
+                temperature:float=0.0, verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
+        """
+        This method inputs a text and outputs a list of outputs per unit.
+        Parameters:
+        ----------
+        text_content : Union[str, Dict[str,str]]
+            the input text content to put in prompt template.
+            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
+            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
+        max_new_tokens : int, Optional
+            the max number of new tokens LLM should generate.
+        document_key : str, Optional
+            specify the key in text_content where document text is.
+            If text_content is str, this parameter will be ignored.
+        temperature : float, Optional
+            the temperature for token sampling.
+        verbose : bool, Optional
+            if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
+        """
+        # define output
+        output = []
+        # unit chunking
+        if isinstance(text_content, str):
+            doc_text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            doc_text = text_content[document_key]
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # messages log
+        if return_messages_log:
+            messages_log = []
+        # generate unit by unit
+        for i, unit in enumerate(units):
+            # <--- Initial generation step --->
+            # construct chat messages
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
+                else:
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            if verbose:
+                print(f"\n\n{Fore.GREEN}Unit {i}:{Style.RESET_ALL}\n{unit.text}\n")
+                if context != "":
+                    print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
+                print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
+                response_stream = self.inference_engine.chat(
+                                messages=messages,
+                                max_new_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=True,
+                                **kwrs
+                            )
+                initial = ""
+                for chunk in response_stream:
+                    initial += chunk
+                    print(chunk, end='', flush=True)
+            else:
+                initial = self.inference_engine.chat(
+                                messages=messages,
+                                max_new_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=False,
+                                **kwrs
+                            )
+            if return_messages_log:
+                messages.append({"role": "assistant", "content": initial})
+                messages_log.append(messages)
+            # <--- Review step --->
+            if verbose:
+                print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
+            messages.append({'role': 'assistant', 'content': initial})
+            messages.append({'role': 'user', 'content': self.review_prompt})
+            if verbose:
+                response_stream = self.inference_engine.chat(
+                                messages=messages,
+                                max_new_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=True,
+                                **kwrs
+                            )
+                review = ""
+                for chunk in response_stream:
+                    review += chunk
+                    print(chunk, end='', flush=True)
+            else:
+                review = self.inference_engine.chat(
+                                messages=messages,
+                                max_new_tokens=max_new_tokens,
+                                temperature=temperature,
+                                stream=False,
+                                **kwrs
+                            )
-        if review_prompt:
-            self.review_prompt = review_prompt
-        else:
-            file_path = importlib.resources.files('llm_ie.asset.default_prompts').\
-                joinpath(f"{self.__class__.__name__}_{self.review_mode}_review_prompt.txt")
-            with open(file_path, 'r', encoding="utf-8") as f:
-                self.review_prompt = f.read()
+            # Output
+            if self.review_mode == "revision":
+                gen_text = review
+            elif self.review_mode == "addition":
+                gen_text = initial + '\n' + review
+            if return_messages_log:
+                messages.append({"role": "assistant", "content": review})
+                messages_log.append(messages)
-            warnings.warn(f'Custom review prompt not provided. The default review prompt is used:\n"{self.review_prompt}"', UserWarning)
+            # add to output
+            result = FrameExtractionUnitResult(
+                            start=unit.start,
+                            end=unit.end,
+                            text=unit.text,
+                            gen_text=gen_text)
+            output.append(result)
+        if return_messages_log:
+            return output, messages_log
+        return output
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
+    def stream(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
+               document_key:str=None, temperature:float=0.0, **kwrs) -> Generator[str, None, None]:
         """
-        This method inputs a text and outputs a list of outputs per sentence.
+        This method inputs a text and outputs a list of outputs per unit.
         Parameters:
         ----------
@@ -973,234 +1112,371 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
+        max_new_tokens : int, Optional
             the max number of new tokens LLM should generate.
         document_key : str, Optional
             specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
         temperature : float, Optional
             the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
         """
-        # define output
-        output = []
-        # sentence tokenization
+        # unit chunking
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
+            doc_text = text_content[document_key]
-        # generate sentence by sentence
-        for i, sent in enumerate(sentences):
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # generate unit by unit
+        for i, unit in enumerate(units):
+            # <--- Initial generation step --->
             # construct chat messages
             messages = []
             if self.system_prompt:
                 messages.append({'role': 'system', 'content': self.system_prompt})
-            context = self._get_context_sentences(text_content, i, sentences, document_key)
+            context = self.context_chunker.chunk(unit)
-            if self.context_sentences == 0:
-                # no context, just place sentence of interest
-                messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
+            if context == "":
+                # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
+                else:
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
             else:
-                # insert context
-                messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                # simulate conversation
-                messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                # place sentence of interest
-                messages.append({'role': 'user', 'content': sent['sentence_text']})
-            if stream:
-                print(f"\n\n{Fore.GREEN}Sentence {i}: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
-                if isinstance(self.context_sentences, int) and self.context_sentences > 0:
-                    print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
-                print(f"{Fore.BLUE}Initial Output:{Style.RESET_ALL}")
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            yield f"\n\n{Fore.GREEN}Unit {i}:{Style.RESET_ALL}\n{unit.text}\n"
+            if context != "":
+                yield f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n"
+            yield f"{Fore.BLUE}Extraction:{Style.RESET_ALL}\n"
-            initial = self.inference_engine.chat(
+            response_stream = self.inference_engine.chat(
                             messages=messages,
                             max_new_tokens=max_new_tokens,
                             temperature=temperature,
-                            stream=stream,
+                            stream=True,
                             **kwrs
                         )
-            # Review
-            if stream:
-                print(f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}")
+            initial = ""
+            for chunk in response_stream:
+                initial += chunk
+                yield chunk
+            # <--- Review step --->
+            yield f"\n{Fore.YELLOW}Review:{Style.RESET_ALL}"
             messages.append({'role': 'assistant', 'content': initial})
             messages.append({'role': 'user', 'content': self.review_prompt})
-            review = self.inference_engine.chat(
+            response_stream = self.inference_engine.chat(
                             messages=messages,
                             max_new_tokens=max_new_tokens,
                             temperature=temperature,
-                            stream=stream,
+                            stream=True,
                             **kwrs
                         )
-            # Output
-            if self.review_mode == "revision":
-                gen_text = review
-            elif self.review_mode == "addition":
-                gen_text = initial + '\n' + review
+            for chunk in response_stream:
+                yield chunk
-            # add to output
-            output.append({'sentence_start': sent['start'],
-                            'sentence_end': sent['end'],
-                            'sentence_text': sent['sentence_text'],
-                            'gen_text': gen_text})
-        return output
-    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, concurrent_batch_size:int=32, **kwrs) -> List[Dict[str,str]]:
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None, temperature:float=0.0,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
         """
-        The asynchronous version of the extract() method.
+        This is the asynchronous version of the extract() method with the review step.
         Parameters:
         ----------
         text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
+            the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
             If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
+        max_new_tokens : int, Optional
+            the max number of new tokens LLM should generate.
         document_key : str, Optional
-            specify the key in text_content where document text is.
+            specify the key in text_content where document text is.
             If text_content is str, this parameter will be ignored.
         temperature : float, Optional
             the temperature for token sampling.
         concurrent_batch_size : int, Optional
-            the number of sentences to process in concurrent.
+            the batch size for concurrent processing.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned, including review steps.
-        Return : str
-            the output from LLM. Need post-processing.
+        Return : List[FrameExtractionUnitResult]
+            the output from LLM for each unit after review. Contains the start, end, text, and generated text.
         """
-        # Check if self.inference_engine.chat_async() is implemented
-        if not hasattr(self.inference_engine, 'chat_async'):
-            raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
-        # define output
-        output = []
-        # sentence tokenization
         if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
+            doc_text = text_content
         elif isinstance(text_content, dict):
             if document_key is None:
                 raise ValueError("document_key must be provided when text_content is dict.")
-            sentences = self._get_sentences(text_content[document_key])
-        # generate initial outputs sentence by sentence
-        for i in range(0, len(sentences), concurrent_batch_size):
-            messages_list = []
-            init_tasks = []
-            review_tasks = []
-            batch = sentences[i:i + concurrent_batch_size]
-            for j, sent in enumerate(batch):
-                # construct chat messages
-                messages = []
-                if self.system_prompt:
-                    messages.append({'role': 'system', 'content': self.system_prompt})
+            if document_key not in text_content:
+                 raise ValueError(f"document_key '{document_key}' not found in text_content dictionary.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
-                context = self._get_context_sentences(text_content, i + j, sentences, document_key)
-                if self.context_sentences == 0:
-                    # no context, just place sentence of interest
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # <--- Initial generation step --->
+        initial_tasks_input = []
+        for i, unit in enumerate(units):
+            # construct chat messages for initial generation
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                 # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
                 else:
-                    # insert context
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
                     messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                    # simulate conversation
-                    messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                    # place sentence of interest
-                    messages.append({'role': 'user', 'content': sent['sentence_text']})
+                else:
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            # Store unit and messages together for the initial task
+            initial_tasks_input.append({"unit": unit, "messages": messages, "original_index": i})
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def initial_semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
+            unit = task_data["unit"]
+            messages = task_data["messages"]
+            original_index = task_data["original_index"]
+            async with semaphore:
+                gen_text = await self.inference_engine.chat_async(
+                    messages=messages,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    **kwrs
+                )
+            # Return initial generation result along with the messages used and the unit
+            return {"original_index": original_index, "unit": unit, "initial_gen_text": gen_text, "initial_messages": messages}
+        # Create and gather initial generation tasks
+        initial_tasks = [
+            asyncio.create_task(initial_semaphore_helper(
+                task_inp,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                **kwrs
+            ))
+            for task_inp in initial_tasks_input
+        ]
+        initial_results_raw = await asyncio.gather(*initial_tasks)
+        # Sort initial results back into original order
+        initial_results_raw.sort(key=lambda x: x["original_index"])
+        # <--- Review step --->
+        review_tasks_input = []
+        for result_data in initial_results_raw:
+            # Prepare messages for the review step
+            initial_messages = result_data["initial_messages"]
+            initial_gen_text = result_data["initial_gen_text"]
+            review_messages = initial_messages + [
+                {'role': 'assistant', 'content': initial_gen_text},
+                {'role': 'user', 'content': self.review_prompt}
+            ]
+            # Store data needed for review task
+            review_tasks_input.append({
+                "unit": result_data["unit"],
+                "initial_gen_text": initial_gen_text,
+                "messages": review_messages,
+                "original_index": result_data["original_index"],
+                "full_initial_log": initial_messages + [{'role': 'assistant', 'content': initial_gen_text}] if return_messages_log else None # Log up to initial generation
+            })
+        async def review_semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
+            messages = task_data["messages"]
+            original_index = task_data["original_index"]
+            async with semaphore:
+                review_gen_text = await self.inference_engine.chat_async(
+                    messages=messages,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    **kwrs
+                )
+            # Combine initial and review results
+            task_data["review_gen_text"] = review_gen_text
+            if return_messages_log:
+                # Log for the review call itself
+                 task_data["full_review_log"] = messages + [{'role': 'assistant', 'content': review_gen_text}]
+            return task_data # Return the augmented dictionary
+        # Create and gather review tasks
+        review_tasks = [
+             asyncio.create_task(review_semaphore_helper(
+                task_inp,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                **kwrs
+            ))
+           for task_inp in review_tasks_input
+        ]
+        final_results_raw = await asyncio.gather(*review_tasks)
+        # Sort final results back into original order (although gather might preserve order for tasks added sequentially)
+        final_results_raw.sort(key=lambda x: x["original_index"])
+        # <--- Process final results --->
+        output: List[FrameExtractionUnitResult] = []
+        messages_log: Optional[List[List[Dict[str, str]]]] = [] if return_messages_log else None
+        for result_data in final_results_raw:
+            unit = result_data["unit"]
+            initial_gen = result_data["initial_gen_text"]
+            review_gen = result_data["review_gen_text"]
+            # Combine based on review mode
+            if self.review_mode == "revision":
+                final_gen_text = review_gen
+            elif self.review_mode == "addition":
+                final_gen_text = initial_gen + '\n' + review_gen
+            else: # Should not happen due to init check
+                final_gen_text = review_gen # Default to revision if mode is somehow invalid
+            # Create final result object
+            result = FrameExtractionUnitResult(
+                start=unit.start,
+                end=unit.end,
+                text=unit.text,
+                gen_text=final_gen_text # Use the combined/reviewed text
+            )
+            output.append(result)
+            # Append full conversation log if requested
+            if return_messages_log:
+                full_log_for_unit = result_data.get("full_initial_log", []) + [{'role': 'user', 'content': self.review_prompt}] + [{'role': 'assistant', 'content': review_gen}]
+                messages_log.append(full_log_for_unit)
+        if return_messages_log:
+            return output, messages_log
+        else:
+            return output
-                messages_list.append(messages)
-                task = asyncio.create_task(
-                    self.inference_engine.chat_async(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                **kwrs
-                            )
-                )
-                init_tasks.append(task)
-            # Wait until the batch is done, collect results and move on to next batch
-            init_responses = await asyncio.gather(*init_tasks)
-            # Collect initials
-            initials = []
-            for gen_text, sent, messages in zip(init_responses, batch, messages_list):
-                initials.append({'sentence_start': sent['start'],
-                                'sentence_end': sent['end'],
-                                'sentence_text': sent['sentence_text'],
-                                'gen_text': gen_text,
-                                'messages': messages})
-            # Review
-            for init in initials:
-                messages = init["messages"]
-                initial = init["gen_text"]
-                messages.append({'role': 'assistant', 'content': initial})
-                messages.append({'role': 'user', 'content': self.review_prompt})
-                task = asyncio.create_task(
-                                self.inference_engine.chat_async(
-                                messages=messages,
-                                max_new_tokens=max_new_tokens,
-                                temperature=temperature,
-                                **kwrs
-                                )
-                            )
-                review_tasks.append(task)
-            review_responses = await asyncio.gather(*review_tasks)
-            # Collect reviews
-            reviews = []
-            for gen_text, sent in zip(review_responses, batch):
-                reviews.append({'sentence_start': sent['start'],
-                                'sentence_end': sent['end'],
-                                'sentence_text': sent['sentence_text'],
-                                'gen_text': gen_text})
-            for init, rev in zip(initials, reviews):
-                if self.review_mode == "revision":
-                    gen_text = rev['gen_text']
-                elif self.review_mode == "addition":
-                    gen_text = init['gen_text'] + '\n' + rev['gen_text']
-                # add to output
-                output.append({'sentence_start': init['sentence_start'],
-                                'sentence_end': init['sentence_end'],
-                                'sentence_text': init['sentence_text'],
-                                'gen_text': gen_text})
-        return output
+class BasicFrameExtractor(DirectFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
+        """
+        This class diretly prompt LLM for frame extraction.
+        Input system prompt (optional), prompt template (with instruction, few-shot examples),
+        and specify a LLM.
+        Parameters:
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        system_prompt : str, Optional
+            system prompt.
+        """
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=WholeDocumentUnitChunker(),
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=NoContextChunker(),
+                         **kwrs)
+class BasicReviewFrameExtractor(ReviewFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
+        """
+        This class add a review step after the BasicFrameExtractor.
+        The Review process asks LLM to review its output and:
+            1. add more frames while keep current. This is efficient for boosting recall.
+            2. or, regenerate frames (add new and delete existing).
+        Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
+        Parameters:
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        review_prompt : str: Optional
+            the prompt text that ask LLM to review. Specify addition or revision in the instruction.
+            if not provided, a default review prompt will be used.
+        review_mode : str
+            review mode. Must be one of {"addition", "revision"}
+            addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
+        system_prompt : str, Optional
+            system prompt.
+        """
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=WholeDocumentUnitChunker(),
+                         prompt_template=prompt_template,
+                         review_mode=review_mode,
+                         review_prompt=review_prompt,
+                         system_prompt=system_prompt,
+                         context_chunker=NoContextChunker(),
+                         **kwrs)
-class SentenceCoTFrameExtractor(SentenceFrameExtractor):
-    from nltk.tokenize.punkt import PunktSentenceTokenizer
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
+class SentenceFrameExtractor(DirectFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
                  context_sentences:Union[str, int]="all", **kwrs):
         """
-        This class performs sentence-based Chain-of-thoughts (CoT) information extraction.
-        A simulated chat follows this process:
+        This class performs sentence-by-sentence information extraction.
+        The process is as follows:
             1. system prompt (optional)
-            2. user instructions (schema, background, full text, few-shot example...)
-            3. user input first sentence
-            4. assistant analyze the sentence
-            5. assistant extract outputs
-            6. repeat #3, #4, #5
+            2. user prompt with instructions (schema, background, full text, few-shot example...)
+            3. feed a sentence (start with first sentence)
+            4. LLM extract entities and attributes from the sentence
+            5. iterate to the next sentence and repeat steps 3-4 until all sentences are processed.
         Input system prompt (optional), prompt template (with user instructions),
         and specify a LLM.
-        Parameters
+        Parameters:
         ----------
         inference_engine : InferenceEngine
             the LLM inferencing engine object. Must implements the chat() method.
@@ -1216,82 +1492,77 @@ class SentenceCoTFrameExtractor(SentenceFrameExtractor):
             if > 0, the number of sentences before and after the given sentence to provide as context.
                 This is good for tasks that require context beyond the given sentence.
         """
-        super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
-                         system_prompt=system_prompt, context_sentences=context_sentences, **kwrs)
+        if not isinstance(context_sentences, int) and context_sentences != "all":
+            raise ValueError('context_sentences must be an integer (>= 0) or "all".')
+        if isinstance(context_sentences, int) and context_sentences < 0:
+            raise ValueError("context_sentences must be a positive integer.")
+        if isinstance(context_sentences, int):
+            context_chunker = SlideWindowContextChunker(window_size=context_sentences)
+        elif context_sentences == "all":
+            context_chunker = WholeDocumentContextChunker()
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=SentenceUnitChunker(),
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker,
+                         **kwrs)
-    def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
-                document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
+class SentenceReviewFrameExtractor(ReviewFrameExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
+                 review_mode:str, review_prompt:str=None, system_prompt:str=None,
+                 context_sentences:Union[str, int]="all", **kwrs):
         """
-        This method inputs a text and outputs a list of outputs per sentence.
+        This class adds a review step after the SentenceFrameExtractor.
+        For each sentence, the review process asks LLM to review its output and:
+            1. add more frames while keeping current. This is efficient for boosting recall.
+            2. or, regenerate frames (add new and delete existing).
+        Use the review_mode parameter to specify. Note that the review_prompt should instruct LLM accordingly.
         Parameters:
         ----------
-        text_content : Union[str, Dict[str,str]]
-            the input text content to put in prompt template.
-            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        document_key : str, Optional
-            specify the key in text_content where document text is.
-            If text_content is str, this parameter will be ignored.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        Return : str
-            the output from LLM. Need post-processing.
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        review_prompt : str: Optional
+            the prompt text that ask LLM to review. Specify addition or revision in the instruction.
+            if not provided, a default review prompt will be used.
+        review_mode : str
+            review mode. Must be one of {"addition", "revision"}
+            addition mode only ask LLM to add new frames, while revision mode ask LLM to regenerate.
+        system_prompt : str, Optional
+            system prompt.
+        context_sentences : Union[str, int], Optional
+            number of sentences before and after the given sentence to provide additional context.
+            if "all", the full text will be provided in the prompt as context.
+            if 0, no additional context will be provided.
+                This is good for tasks that does not require context beyond the given sentence.
+            if > 0, the number of sentences before and after the given sentence to provide as context.
+                This is good for tasks that require context beyond the given sentence.
         """
-        # define output
-        output = []
-        # sentence tokenization
-        if isinstance(text_content, str):
-            sentences = self._get_sentences(text_content)
-        elif isinstance(text_content, dict):
-            sentences = self._get_sentences(text_content[document_key])
-        # generate sentence by sentence
-        for i, sent in enumerate(sentences):
-            # construct chat messages
-            messages = []
-            if self.system_prompt:
-                messages.append({'role': 'system', 'content': self.system_prompt})
-            context = self._get_context_sentences(text_content, i, sentences, document_key)
-            if self.context_sentences == 0:
-                # no context, just place sentence of interest
-                messages.append({'role': 'user', 'content': self._get_user_prompt(sent['sentence_text'])})
-            else:
-                # insert context
-                messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
-                # simulate conversation
-                messages.append({'role': 'assistant', 'content': 'Sure, please provide the sentence of interest.'})
-                # place sentence of interest
-                messages.append({'role': 'user', 'content': sent['sentence_text']})
-            if stream:
-                print(f"\n\n{Fore.GREEN}Sentence: {Style.RESET_ALL}\n{sent['sentence_text']}\n")
-                if isinstance(self.context_sentences, int) and self.context_sentences > 0:
-                    print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
-                print(f"{Fore.BLUE}CoT:{Style.RESET_ALL}")
-            gen_text = self.inference_engine.chat(
-                            messages=messages,
-                            max_new_tokens=max_new_tokens,
-                            temperature=temperature,
-                            stream=stream,
-                            **kwrs
-                        )
+        if not isinstance(context_sentences, int) and context_sentences != "all":
+            raise ValueError('context_sentences must be an integer (>= 0) or "all".')
+        if isinstance(context_sentences, int) and context_sentences < 0:
+            raise ValueError("context_sentences must be a positive integer.")
+        if isinstance(context_sentences, int):
+            context_chunker = SlideWindowContextChunker(window_size=context_sentences)
+        elif context_sentences == "all":
+            context_chunker = WholeDocumentContextChunker()
-            # add to output
-            output.append({'sentence_start': sent['start'],
-                            'sentence_end': sent['end'],
-                            'sentence_text': sent['sentence_text'],
-                            'gen_text': gen_text})
-        return output
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=SentenceUnitChunker(),
+                         prompt_template=prompt_template,
+                         review_mode=review_mode,
+                         review_prompt=review_prompt,
+                         system_prompt=system_prompt,
+                         context_chunker=context_chunker,
+                         **kwrs)
 class RelationExtractor(Extractor):
@@ -1361,7 +1632,7 @@ class RelationExtractor(Extractor):
     @abc.abstractmethod
     def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict]:
+                         temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames.
@@ -1377,6 +1648,8 @@ class RelationExtractor(Extractor):
             the temperature for token sampling.
         stream : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
             a list of dict with {"frame_1", "frame_2"} for all relations.
@@ -1446,7 +1719,7 @@ class BinaryRelationExtractor(RelationExtractor):
     def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict]:
+                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
         Outputs pairs that are related.
@@ -1463,11 +1736,17 @@ class BinaryRelationExtractor(RelationExtractor):
             the temperature for token sampling.
         stream : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
             a list of dict with {"frame_1_id", "frame_2_id"}.
         """
         pairs = itertools.combinations(doc.frames, 2)
+        if return_messages_log:
+            messages_log = []
         output = []
         for frame_1, frame_2 in pairs:
             pos_rel = self.possible_relation_func(frame_1, frame_2)
@@ -1495,13 +1774,19 @@ class BinaryRelationExtractor(RelationExtractor):
                             )
                 rel_json = self._extract_json(gen_text)
                 if self._post_process(rel_json):
-                    output.append({'frame_1':frame_1.frame_id, 'frame_2':frame_2.frame_id})
+                    output.append({'frame_1_id':frame_1.frame_id, 'frame_2_id':frame_2.frame_id})
+                if return_messages_log:
+                    messages.append({"role": "assistant", "content": gen_text})
+                    messages_log.append(messages)
+        if return_messages_log:
+            return output, messages_log
         return output
     async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                            temperature:float=0.0, concurrent_batch_size:int=32, **kwrs) -> List[Dict]:
+                            temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This is the asynchronous version of the extract() method.
@@ -1517,6 +1802,8 @@ class BinaryRelationExtractor(RelationExtractor):
             the temperature for token sampling.
         concurrent_batch_size : int, Optional
             the number of frame pairs to process in concurrent.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
             a list of dict with {"frame_1", "frame_2"}.
@@ -1526,12 +1813,17 @@ class BinaryRelationExtractor(RelationExtractor):
             raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
         pairs = itertools.combinations(doc.frames, 2)
+        if return_messages_log:
+            messages_log = []
         n_frames = len(doc.frames)
         num_pairs = (n_frames * (n_frames-1)) // 2
-        rel_pair_list = []
-        tasks = []
+        output = []
         for i in range(0, num_pairs, concurrent_batch_size):
+            rel_pair_list = []
+            tasks = []
             batch = list(itertools.islice(pairs, concurrent_batch_size))
+            batch_messages = []
             for frame_1, frame_2 in batch:
                 pos_rel = self.possible_relation_func(frame_1, frame_2)
@@ -1546,6 +1838,7 @@ class BinaryRelationExtractor(RelationExtractor):
                                                                                                     "frame_1": str(frame_1.to_dict()),
                                                                                                     "frame_2": str(frame_2.to_dict())}
                                                                                                     )})
                     task = asyncio.create_task(
                         self.inference_engine.chat_async(
                             messages=messages,
@@ -1555,20 +1848,27 @@ class BinaryRelationExtractor(RelationExtractor):
                         )
                     )
                     tasks.append(task)
+                    batch_messages.append(messages)
             responses = await asyncio.gather(*tasks)
-        output = []
-        for d, response in zip(rel_pair_list, responses):
-            rel_json = self._extract_json(response)
-            if self._post_process(rel_json):
-                output.append(d)
+            for d, response, messages in zip(rel_pair_list, responses, batch_messages):
+                if return_messages_log:
+                    messages.append({"role": "assistant", "content": response})
+                    messages_log.append(messages)
+                rel_json = self._extract_json(response)
+                if self._post_process(rel_json):
+                    output.append(d)
+        if return_messages_log:
+            return output, messages_log
         return output
     def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32, stream:bool=False, **kwrs) -> List[Dict]:
+                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
+                         stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
@@ -1588,6 +1888,8 @@ class BinaryRelationExtractor(RelationExtractor):
             the number of frame pairs to process in concurrent.
         stream : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
             a list of dict with {"frame_1", "frame_2"} for all relations.
@@ -1608,6 +1910,7 @@ class BinaryRelationExtractor(RelationExtractor):
                                                   max_new_tokens=max_new_tokens,
                                                   temperature=temperature,
                                                   concurrent_batch_size=concurrent_batch_size,
+                                                  return_messages_log=return_messages_log,
                                                   **kwrs)
                                 )
         else:
@@ -1616,6 +1919,7 @@ class BinaryRelationExtractor(RelationExtractor):
                                 max_new_tokens=max_new_tokens,
                                 temperature=temperature,
                                 stream=stream,
+                                return_messages_log=return_messages_log,
                                 **kwrs)
@@ -1689,7 +1993,7 @@ class MultiClassRelationExtractor(RelationExtractor):
     def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict]:
+                temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
@@ -1705,11 +2009,17 @@ class MultiClassRelationExtractor(RelationExtractor):
             the temperature for token sampling.
         stream : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
-            a list of dict with {"frame_1", "frame_2", "relation"} for all frame pairs.
+            a list of dict with {"frame_1_id", "frame_2_id", "relation"} for all frame pairs.
         """
         pairs = itertools.combinations(doc.frames, 2)
+        if return_messages_log:
+            messages_log = []
         output = []
         for frame_1, frame_2 in pairs:
             pos_rel_types = self.possible_relation_types_func(frame_1, frame_2)
@@ -1736,16 +2046,23 @@ class MultiClassRelationExtractor(RelationExtractor):
                                 stream=stream,
                                 **kwrs
                             )
+                if return_messages_log:
+                    messages.append({"role": "assistant", "content": gen_text})
+                    messages_log.append(messages)
                 rel_json = self._extract_json(gen_text)
                 rel = self._post_process(rel_json, pos_rel_types)
                 if rel:
-                    output.append({'frame_1':frame_1.frame_id, 'frame_2':frame_2.frame_id, 'relation':rel})
+                    output.append({'frame_1_id':frame_1.frame_id, 'frame_2_id':frame_2.frame_id, 'relation':rel})
+        if return_messages_log:
+            return output, messages_log
         return output
     async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                            temperature:float=0.0, concurrent_batch_size:int=32, **kwrs) -> List[Dict]:
+                            temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This is the asynchronous version of the extract() method.
@@ -1761,21 +2078,28 @@ class MultiClassRelationExtractor(RelationExtractor):
             the temperature for token sampling.
         concurrent_batch_size : int, Optional
             the number of frame pairs to process in concurrent.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
-            a list of dict with {"frame_1", "frame_2", "relation"} for all frame pairs.
+            a list of dict with {"frame_1_id", "frame_2_id", "relation"} for all frame pairs.
         """
         # Check if self.inference_engine.chat_async() is implemented
         if not hasattr(self.inference_engine, 'chat_async'):
             raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
         pairs = itertools.combinations(doc.frames, 2)
+        if return_messages_log:
+            messages_log = []
         n_frames = len(doc.frames)
         num_pairs = (n_frames * (n_frames-1)) // 2
-        rel_pair_list = []
-        tasks = []
+        output = []
         for i in range(0, num_pairs, concurrent_batch_size):
+            rel_pair_list = []
+            tasks = []
             batch = list(itertools.islice(pairs, concurrent_batch_size))
+            batch_messages = []
             for frame_1, frame_2 in batch:
                 pos_rel_types = self.possible_relation_types_func(frame_1, frame_2)
@@ -1800,21 +2124,28 @@ class MultiClassRelationExtractor(RelationExtractor):
                         )
                     )
                     tasks.append(task)
+                    batch_messages.append(messages)
             responses = await asyncio.gather(*tasks)
-        output = []
-        for d, response in zip(rel_pair_list, responses):
-            rel_json = self._extract_json(response)
-            rel = self._post_process(rel_json, d['pos_rel_types'])
-            if rel:
-                output.append({'frame_1':d['frame_1'], 'frame_2':d['frame_2'], 'relation':rel})
+            for d, response, messages in zip(rel_pair_list, responses, batch_messages):
+                if return_messages_log:
+                    messages.append({"role": "assistant", "content": response})
+                    messages_log.append(messages)
+                rel_json = self._extract_json(response)
+                rel = self._post_process(rel_json, d['pos_rel_types'])
+                if rel:
+                    output.append({'frame_1_id':d['frame_1'], 'frame_2_id':d['frame_2'], 'relation':rel})
+        if return_messages_log:
+            return output, messages_log
         return output
     def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32, stream:bool=False, **kwrs) -> List[Dict]:
+                         temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
+                         stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
         """
         This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
@@ -1834,6 +2165,8 @@ class MultiClassRelationExtractor(RelationExtractor):
             the number of frame pairs to process in concurrent.
         stream : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
         Return : List[Dict]
             a list of dict with {"frame_1", "frame_2", "relation"} for all relations.
@@ -1854,6 +2187,7 @@ class MultiClassRelationExtractor(RelationExtractor):
                                                   max_new_tokens=max_new_tokens,
                                                   temperature=temperature,
                                                   concurrent_batch_size=concurrent_batch_size,
+                                                  return_messages_log=return_messages_log,
                                                   **kwrs)
                                 )
         else:
@@ -1862,5 +2196,6 @@ class MultiClassRelationExtractor(RelationExtractor):
                                 max_new_tokens=max_new_tokens,
                                 temperature=temperature,
                                 stream=stream,
+                                return_messages_log=return_messages_log,
                                 **kwrs)

llm-ie 0.4.6__py3-none-any.whl → 1.0.0__py3-none-any.whl

llm-ie 0.4.6py3-none-any.whl → 1.0.0py3-none-any.whl