PyPI - llm-ie - Versions diffs - 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl - Mend

llm-ie 1.1.0py3-none-any.whl → 1.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

llm_ie/__init__.py +2 -2
llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt +52 -0
llm_ie/extractors.py +409 -460
{llm_ie-1.1.0.dist-info → llm_ie-1.2.0.dist-info}/METADATA +1 -1
{llm_ie-1.1.0.dist-info → llm_ie-1.2.0.dist-info}/RECORD +6 -5
{llm_ie-1.1.0.dist-info → llm_ie-1.2.0.dist-info}/WHEEL +0 -0

llm_ie/__init__.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
 from .engines import BasicLLMConfig, Qwen3LLMConfig, OpenAIReasoningLLMConfig, LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, AzureOpenAIInferenceEngine, LiteLLMInferenceEngine
-from .extractors import DirectFrameExtractor, ReviewFrameExtractor, BasicFrameExtractor, BasicReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
+from .extractors import DirectFrameExtractor, ReviewFrameExtractor, BasicFrameExtractor, BasicReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, AttributeExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
 from .chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker, TextLineUnitChunker, ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
 from .prompt_editor import PromptEditor
 __all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
            "BasicLLMConfig", "Qwen3LLMConfig", "OpenAIReasoningLLMConfig", "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "AzureOpenAIInferenceEngine", "LiteLLMInferenceEngine",
-           "DirectFrameExtractor", "ReviewFrameExtractor", "BasicFrameExtractor", "BasicReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
+           "DirectFrameExtractor", "ReviewFrameExtractor", "BasicFrameExtractor", "BasicReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "AttributeExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
            "UnitChunker", "WholeDocumentUnitChunker", "SentenceUnitChunker", "TextLineUnitChunker", "ContextChunker", "NoContextChunker", "WholeDocumentContextChunker", "SlideWindowContextChunker",
            "PromptEditor"]

llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt ADDED Viewed

@@ -0,0 +1,52 @@
+Prompt Template Design:
+1. Task Description:
+   Provide a detailed description of the task, including the background and the type of task (e.g., attribute extraction task).
+2. Schema Definition:
+   List the attributes to extract, and provide clear definitions for each one.
+3. Output Format Definition:
+   The output should be a JSON list, where each attribute be a key. The values could be any structure (e.g., str, int, List[str]).
+4. Optional: Hints:
+   Provide itemized hints for the information extractors to guide the extraction process. Remind the prompted agent to be truthful. Emphasize that the prompted agent is supposed to perform the task instead of writting code or instruct other agents to do it.
+5. Optional: Examples:
+   Include examples in the format:
+    Input: ...
+    Output: ...
+6. Entity:
+   The template must include a placeholder {{frame}} for the entity.
+7. Context:
+   The template must include a placeholder {{context}} for the context. Explain to the prompted agent that <Entity> tags are used to mark the entity in the context.
+Example:
+    ### Task description
+    This is an attribute extraction task. Given a diagnosis entity and the context, you need to generate attributes for the entity.
+    ### Schema definition
+        "Date" which is the date when the diagnosis was made in MM/DD/YYYY format,
+        "Status" which is the current status of the diagnosis (e.g. active, resolved, etc.)
+    ### Output format definition
+    Your output should follow the JSON format:
+    {"Date": "<MM/DD/YYYY>", "Status": "<status>"}
+    I am only interested in the content between []. Do not explain your answer.
+    ### Hints
+    - If the date is not complete, use the first available date in the context. For example, if the date is 01/2023, you should return 01/01/2023.
+    - If the status is not available, you should return "not specified".
+    ### Entity
+    Information about the entity to extract attributes from:
+    {{frame}}
+    ### Context
+    Context for the entity. The <Entity> tags are used to mark the entity in the context.
+    {{context}}

llm_ie/extractors.py CHANGED Viewed

@@ -1449,11 +1449,11 @@ class SentenceReviewFrameExtractor(ReviewFrameExtractor):
                          context_chunker=context_chunker)
-class RelationExtractor(Extractor):
+class AttributeExtractor(Extractor):
     def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
         """
-        This is the abstract class for relation extraction.
-        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
+        This class is for attribute extraction for frames. Though FrameExtractors can also extract attributes, when
+        the number of attribute increases, it is more efficient to use a dedicated AttributeExtractor.
         Parameters
         ----------
@@ -1467,322 +1467,469 @@ class RelationExtractor(Extractor):
         super().__init__(inference_engine=inference_engine,
                          prompt_template=prompt_template,
                          system_prompt=system_prompt)
+        # validate prompt template
+        if "{{context}}" not in self.prompt_template or "{{frame}}" not in self.prompt_template:
+            raise ValueError("prompt_template must contain both {{context}} and {{frame}} placeholders.")
-    def _get_ROI(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
-                 text:str, buffer_size:int=100) -> str:
+    def _get_context(self, frame:LLMInformationExtractionFrame, text:str, context_size:int=256) -> str:
         """
-        This method returns the Region of Interest (ROI) that covers the two frames. Leaves a buffer_size of characters before and after.
-        The returned text has the two frames inline annotated with <entity_1>, <entity_2>.
+        This method returns the context that covers the frame. Leaves a context_size of characters before and after.
+        The returned text has the frame inline annotated with <entity>.
         Parameters:
         -----------
-        frame_1 : LLMInformationExtractionFrame
+        frame : LLMInformationExtractionFrame
             a frame
-        frame_2 : LLMInformationExtractionFrame
-            the other frame
         text : str
             the entire document text
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
+        context_size : int, Optional
+            the number of characters before and after the frame in the context text.
         Return : str
-            the ROI text with the two frames inline annotated with <entity_1>, <entity_2>.
+            the context text with the frame inline annotated with <entity>.
         """
-        left_frame, right_frame = sorted([frame_1, frame_2], key=lambda f: f.start)
-        left_frame_name = "entity_1" if left_frame == frame_1 else "entity_2"
-        right_frame_name = "entity_1" if right_frame == frame_1 else "entity_2"
+        start = max(frame.start - context_size, 0)
+        end = min(frame.end + context_size, len(text))
+        context = text[start:end]
-        start = max(left_frame.start - buffer_size, 0)
-        end = min(right_frame.end + buffer_size, len(text))
-        roi = text[start:end]
-        roi_annotated = roi[0:left_frame.start - start] + \
-                f'<{left_frame_name}>' + \
-                roi[left_frame.start - start:left_frame.end - start] + \
-                f"</{left_frame_name}>" + \
-                roi[left_frame.end - start:right_frame.start - start] + \
-                f'<{right_frame_name}>' + \
-                roi[right_frame.start - start:right_frame.end - start] + \
-                f"</{right_frame_name}>" + \
-                roi[right_frame.end - start:end - start]
+        context_annotated = context[0:frame.start - start] + \
+                f"<entity> " + \
+                context[frame.start - start:frame.end - start] + \
+                f" </entity>" + \
+                context[frame.end - start:end - start]
         if start > 0:
-            roi_annotated = "..." + roi_annotated
+            context_annotated = "..." + context_annotated
         if end < len(text):
-            roi_annotated = roi_annotated + "..."
-        return roi_annotated
+            context_annotated = context_annotated + "..."
+        return context_annotated
-    @abc.abstractmethod
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
-                         temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
+    def _extract_from_frame(self, frame:LLMInformationExtractionFrame, text:str,
+                            context_size:int=256, verbose:bool=False, return_messages_log:bool=False) -> Dict[str, Any]:
         """
-        This method considers all combinations of two frames.
+        This method extracts attributes from a single frame.
         Parameters:
         -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
+        frame : LLMInformationExtractionFrame
+            a frame to extract attributes from.
+        text : str
+            the entire document text.
+        context_size : int, Optional
+            the number of characters before and after the frame in the context text.
+        verbose : bool, Optional
+            if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : List[Dict]
-            a list of dict with {"frame_1", "frame_2"} for all relations.
+        Return : Dict[str, Any]
+            a dictionary of attributes extracted from the frame.
+            If return_messages_log is True, a list of messages will be returned as well.
         """
-        return NotImplemented
+        # construct chat messages
+        messages = []
+        if self.system_prompt:
+            messages.append({'role': 'system', 'content': self.system_prompt})
-class BinaryRelationExtractor(RelationExtractor):
-    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_func: Callable,
-                 system_prompt:str=None):
-        """
-        This class extracts binary (yes/no) relations between two entities.
-        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
+        context = self._get_context(frame, text, context_size)
+        messages.append({'role': 'user', 'content': self._get_user_prompt({"context": context, "frame": str(frame.to_dict())})})
-        Parameters
-        ----------
-        inference_engine : InferenceEngine
-            the LLM inferencing engine object. Must implements the chat() method.
-        prompt_template : str
-            prompt template with "{{<placeholder name>}}" placeholder.
-        possible_relation_func : Callable, Optional
-            a function that inputs 2 frames and returns a bool indicating possible relations between them.
-        system_prompt : str, Optional
-            system prompt.
-        """
-        super().__init__(inference_engine=inference_engine,
-                         prompt_template=prompt_template,
-                         system_prompt=system_prompt)
-        if possible_relation_func:
-            # Check if possible_relation_func is a function
-            if not callable(possible_relation_func):
-                raise TypeError(f"Expect possible_relation_func as a function, received {type(possible_relation_func)} instead.")
+        if verbose:
+            print(f"\n\n{Fore.GREEN}Frame: {frame.frame_id}{Style.RESET_ALL}\n{frame.to_dict()}\n")
+            if context != "":
+                print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
-            sig = inspect.signature(possible_relation_func)
-            # Check if frame_1, frame_2 are in input parameters
-            if len(sig.parameters) != 2:
-                raise ValueError("The possible_relation_func must have exactly frame_1 and frame_2 as parameters.")
-            if "frame_1" not in sig.parameters.keys():
-                raise ValueError("The possible_relation_func is missing frame_1 as a parameter.")
-            if "frame_2" not in sig.parameters.keys():
-                raise ValueError("The possible_relation_func is missing frame_2 as a parameter.")
-            # Check if output is a bool
-            if sig.return_annotation != bool:
-                raise ValueError(f"Expect possible_relation_func to output a bool, current type hint suggests {sig.return_annotation} instead.")
-            self.possible_relation_func = possible_relation_func
-    def _post_process(self, rel_json:str) -> bool:
-        if len(rel_json) > 0:
-            if "Relation" in rel_json[0]:
-                rel = rel_json[0]["Relation"]
-                if isinstance(rel, bool):
-                    return rel
-                elif isinstance(rel, str) and rel in {"True", "False"}:
-                    return eval(rel)
-                else:
-                    warnings.warn('Extractor output JSON "Relation" key does not have bool or {"True", "False"} as value.' + \
-                                'Following default, relation = False.', RuntimeWarning)
-            else:
-                warnings.warn('Extractor output JSON without "Relation" key. Following default, relation = False.', RuntimeWarning)
-        else:
-            warnings.warn('Extractor did not output a JSON list. Following default, relation = False.', RuntimeWarning)
-        return False
+            print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
-    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False,
-                return_messages_log:bool=False) -> List[Dict]:
+        get_text = self.inference_engine.chat(
+                            messages=messages,
+                            verbose=verbose,
+                            stream=False
+                        )
+        if return_messages_log:
+            messages.append({"role": "assistant", "content": get_text})
+        attribute_list = self._extract_json(gen_text=get_text)
+        if isinstance(attribute_list, list) and len(attribute_list) > 0:
+            attributes = attribute_list[0]
+            if return_messages_log:
+                return attributes, messages
+            return attributes
+    def extract(self, frames:List[LLMInformationExtractionFrame], text:str, context_size:int=256, verbose:bool=False,
+                return_messages_log:bool=False, inplace:bool=True) -> Union[None, List[LLMInformationExtractionFrame]]:
         """
-        This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
-        Outputs pairs that are related.
+        This method extracts attributes from the document.
         Parameters:
         -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
+        frames : List[LLMInformationExtractionFrame]
+            a list of frames to extract attributes from.
+        text : str
+            the entire document text.
+        context_size : int, Optional
+            the number of characters before and after the frame in the context text.
         verbose : bool, Optional
             if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
+        inplace : bool, Optional
+            if True, the method will modify the frames in-place.
+        Return : Union[None, List[LLMInformationExtractionFrame]]
+            if inplace is True, the method will modify the frames in-place.
+            if inplace is False, the method will return a list of frames with attributes extracted.
+        """
+        for frame in frames:
+            if not isinstance(frame, LLMInformationExtractionFrame):
+                raise TypeError(f"Expect frame as LLMInformationExtractionFrame, received {type(frame)} instead.")
+        if not isinstance(text, str):
+            raise TypeError(f"Expect text as str, received {type(text)} instead.")
+        new_frames = []
+        messages_log = [] if return_messages_log else None
-        Return : List[Dict]
-            a list of dict with {"frame_1_id", "frame_2_id"}.
-        """
-        pairs = itertools.combinations(doc.frames, 2)
+        for frame in frames:
+            if return_messages_log:
+                attr, messages = self._extract_from_frame(frame=frame, text=text, context_size=context_size,
+                                                          verbose=verbose, return_messages_log=return_messages_log)
+                messages_log.append(messages)
+            else:
+                attr = self._extract_from_frame(frame=frame, text=text, context_size=context_size,
+                                                verbose=verbose, return_messages_log=return_messages_log)
+            if inplace:
+                frame.attr.update(attr)
+            else:
+                new_frame = frame.copy()
+                new_frame.attr.update(attr)
+                new_frames.append(new_frame)
-        if return_messages_log:
-            messages_log = []
+        if inplace:
+            return messages_log if return_messages_log else None
+        else:
+            return (new_frames, messages_log) if return_messages_log else new_frames
-        output = []
-        for frame_1, frame_2 in pairs:
-            pos_rel = self.possible_relation_func(frame_1, frame_2)
-            if pos_rel:
-                roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                if verbose:
-                    print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
-                    print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
+    async def extract_async(self, frames:List[LLMInformationExtractionFrame], text:str, context_size:int=256,
+                            concurrent_batch_size:int=32, inplace:bool=True, return_messages_log:bool=False) -> Union[None, List[LLMInformationExtractionFrame]]:
+        """
+        This method extracts attributes from the document asynchronously.
+        Parameters:
+        -----------
+        frames : List[LLMInformationExtractionFrame]
+            a list of frames to extract attributes from.
+        text : str
+            the entire document text.
+        context_size : int, Optional
+            the number of characters before and after the frame in the context text.
+        concurrent_batch_size : int, Optional
+            the batch size for concurrent processing.
+        inplace : bool, Optional
+            if True, the method will modify the frames in-place.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
+        Return : Union[None, List[LLMInformationExtractionFrame]]
+            if inplace is True, the method will modify the frames in-place.
+            if inplace is False, the method will return a list of frames with attributes extracted.
+        """
+        # validation
+        for frame in frames:
+            if not isinstance(frame, LLMInformationExtractionFrame):
+                raise TypeError(f"Expect frame as LLMInformationExtractionFrame, received {type(frame)} instead.")
+        if not isinstance(text, str):
+            raise TypeError(f"Expect text as str, received {type(text)} instead.")
+        # async helper
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def semaphore_helper(frame:LLMInformationExtractionFrame, text:str, context_size:int) -> dict:
+            async with semaphore:
                 messages = []
                 if self.system_prompt:
                     messages.append({'role': 'system', 'content': self.system_prompt})
-                messages.append({'role': 'user', 'content': self._get_user_prompt(text_content={"roi_text":roi_text,
-                                                                                                "frame_1": str(frame_1.to_dict()),
-                                                                                                "frame_2": str(frame_2.to_dict())}
-                                                                                                )})
-                gen_text = self.inference_engine.chat(
-                                messages=messages,
-                                verbose=verbose
-                            )
-                rel_json = self._extract_json(gen_text)
-                if self._post_process(rel_json):
-                    output.append({'frame_1_id':frame_1.frame_id, 'frame_2_id':frame_2.frame_id})
+                context = self._get_context(frame, text, context_size)
+                messages.append({'role': 'user', 'content': self._get_user_prompt({"context": context, "frame": str(frame.to_dict())})})
+                gen_text = await self.inference_engine.chat_async(messages=messages)
                 if return_messages_log:
                     messages.append({"role": "assistant", "content": gen_text})
-                    messages_log.append(messages)
-        if return_messages_log:
-            return output, messages_log
-        return output
-    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
-                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
+                attribute_list = self._extract_json(gen_text=gen_text)
+                attributes = attribute_list[0] if isinstance(attribute_list, list) and len(attribute_list) > 0 else {}
+                return {"frame": frame, "attributes": attributes, "messages": messages}
+        # create tasks
+        tasks = [asyncio.create_task(semaphore_helper(frame, text, context_size)) for frame in frames]
+        results = await asyncio.gather(*tasks)
+        # process results
+        new_frames = []
+        messages_log = [] if return_messages_log else None
+        for result in results:
+            if return_messages_log:
+                messages_log.append(result["messages"])
+            if inplace:
+                result["frame"].attr.update(result["attributes"])
+            else:
+                new_frame = result["frame"].copy()
+                new_frame.attr.update(result["attributes"])
+                new_frames.append(new_frame)
+        # output
+        if inplace:
+            return messages_log if return_messages_log else None
+        else:
+            return (new_frames, messages_log) if return_messages_log else new_frames
+    def extract_attributes(self, frames:List[LLMInformationExtractionFrame], text:str, context_size:int=256,
+                           concurrent:bool=False, concurrent_batch_size:int=32, verbose:bool=False,
+                           return_messages_log:bool=False, inplace:bool=True) -> Union[None, List[LLMInformationExtractionFrame]]:
         """
-        This is the asynchronous version of the extract() method.
+        This method extracts attributes from the document.
         Parameters:
         -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
+        frames : List[LLMInformationExtractionFrame]
+            a list of frames to extract attributes from.
+        text : str
+            the entire document text.
+        context_size : int, Optional
+            the number of characters before and after the frame in the context text.
+        concurrent : bool, Optional
+            if True, the method will run in concurrent mode with batch size concurrent_batch_size.
         concurrent_batch_size : int, Optional
-            the number of frame pairs to process in concurrent.
+            the batch size for concurrent processing.
+        verbose : bool, Optional
+            if True, LLM generated text will be printed in terminal in real-time.
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : List[Dict]
-            a list of dict with {"frame_1", "frame_2"}.
-        """
-        # Check if self.inference_engine.chat_async() is implemented
-        if not hasattr(self.inference_engine, 'chat_async'):
-            raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
+        inplace : bool, Optional
+            if True, the method will modify the frames in-place.
-        pairs = itertools.combinations(doc.frames, 2)
-        if return_messages_log:
-            messages_log = []
-        n_frames = len(doc.frames)
-        num_pairs = (n_frames * (n_frames-1)) // 2
-        output = []
-        for i in range(0, num_pairs, concurrent_batch_size):
-            rel_pair_list = []
-            tasks = []
-            batch = list(itertools.islice(pairs, concurrent_batch_size))
-            batch_messages = []
-            for frame_1, frame_2 in batch:
-                pos_rel = self.possible_relation_func(frame_1, frame_2)
-                if pos_rel:
-                    rel_pair_list.append({'frame_1_id':frame_1.frame_id, 'frame_2_id':frame_2.frame_id})
-                    roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                    messages = []
-                    if self.system_prompt:
-                        messages.append({'role': 'system', 'content': self.system_prompt})
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(text_content={"roi_text":roi_text,
-                                                                                                    "frame_1": str(frame_1.to_dict()),
-                                                                                                    "frame_2": str(frame_2.to_dict())}
-                                                                                                    )})
-                    task = asyncio.create_task(
-                        self.inference_engine.chat_async(
-                            messages=messages
-                        )
-                    )
-                    tasks.append(task)
-                    batch_messages.append(messages)
-            responses = await asyncio.gather(*tasks)
+        Return : Union[None, List[LLMInformationExtractionFrame]]
+            if inplace is True, the method will modify the frames in-place.
+            if inplace is False, the method will return a list of frames with attributes extracted.
+        """
+        if concurrent:
+            if verbose:
+                warnings.warn("verbose=True is not supported in concurrent mode.", RuntimeWarning)
-            for d, response, messages in zip(rel_pair_list, responses, batch_messages):
-                if return_messages_log:
-                    messages.append({"role": "assistant", "content": response})
-                    messages_log.append(messages)
+            nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
-                rel_json = self._extract_json(response)
-                if self._post_process(rel_json):
-                    output.append(d)
+            return asyncio.run(self.extract_async(frames=frames, text=text, context_size=context_size,
+                                                  concurrent_batch_size=concurrent_batch_size,
+                                                  inplace=inplace, return_messages_log=return_messages_log))
+        else:
+            return self.extract(frames=frames, text=text, context_size=context_size,
+                                verbose=verbose, return_messages_log=return_messages_log, inplace=inplace)
-        if return_messages_log:
-            return output, messages_log
-        return output
+class RelationExtractor(Extractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
+        """
+        This is the abstract class for relation extraction.
+        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
-                          concurrent:bool=False, concurrent_batch_size:int=32, verbose:bool=False,
-                          return_messages_log:bool=False) -> List[Dict]:
+        Parameters
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        system_prompt : str, Optional
+            system prompt.
         """
-        This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
+        super().__init__(inference_engine=inference_engine,
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt)
+    def _get_ROI(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
+                 text:str, buffer_size:int=128) -> str:
+        """
+        This method returns the Region of Interest (ROI) that covers the two frames. Leaves a buffer_size of characters before and after.
+        The returned text has the two frames inline annotated with <entity_1>, <entity_2>.
         Parameters:
         -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
+        frame_1 : LLMInformationExtractionFrame
+            a frame
+        frame_2 : LLMInformationExtractionFrame
+            the other frame
+        text : str
+            the entire document text
         buffer_size : int, Optional
             the number of characters before and after the two frames in the ROI text.
-        concurrent: bool, Optional
-            if True, the extraction will be done in concurrent.
-        concurrent_batch_size : int, Optional
-            the number of frame pairs to process in concurrent.
-        verbose : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : List[Dict]
-            a list of dict with {"frame_1", "frame_2"} for all relations.
+        Return : str
+            the ROI text with the two frames inline annotated with <entity_1>, <entity_2>.
         """
+        left_frame, right_frame = sorted([frame_1, frame_2], key=lambda f: f.start)
+        left_frame_name = "entity_1" if left_frame.frame_id == frame_1.frame_id else "entity_2"
+        right_frame_name = "entity_1" if right_frame.frame_id == frame_1.frame_id else "entity_2"
+        start = max(left_frame.start - buffer_size, 0)
+        end = min(right_frame.end + buffer_size, len(text))
+        roi = text[start:end]
+        roi_annotated = roi[0:left_frame.start - start] + \
+                f"<{left_frame_name}> " + \
+                roi[left_frame.start - start:left_frame.end - start] + \
+                f" </{left_frame_name}>" + \
+                roi[left_frame.end - start:right_frame.start - start] + \
+                f"<{right_frame_name}> " + \
+                roi[right_frame.start - start:right_frame.end - start] + \
+                f" </{right_frame_name}>" + \
+                roi[right_frame.end - start:end - start]
+        if start > 0:
+            roi_annotated = "..." + roi_annotated
+        if end < len(text):
+            roi_annotated = roi_annotated + "..."
+        return roi_annotated
+    @abc.abstractmethod
+    def _get_task_if_possible(self, frame_1: LLMInformationExtractionFrame, frame_2: LLMInformationExtractionFrame,
+                              text: str, buffer_size: int) -> Optional[Dict[str, Any]]:
+        """Checks if a relation is possible and constructs the task payload."""
+        raise NotImplementedError
+    @abc.abstractmethod
+    def _post_process_result(self, gen_text: str, pair_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """Processes the LLM output for a single pair and returns the final relation dictionary."""
+        raise NotImplementedError
+    def _extract(self, doc: LLMInformationExtractionDocument, buffer_size: int = 128, verbose: bool = False,
+                 return_messages_log: bool = False) -> Union[List[Dict], Tuple[List[Dict], List]]:
+        pairs = itertools.combinations(doc.frames, 2)
+        relations = []
+        messages_log = [] if return_messages_log else None
+        for frame_1, frame_2 in pairs:
+            task_payload = self._get_task_if_possible(frame_1, frame_2, doc.text, buffer_size)
+            if task_payload:
+                if verbose:
+                    print(f"\n\n{Fore.GREEN}Evaluating pair:{Style.RESET_ALL} ({frame_1.frame_id}, {frame_2.frame_id})")
+                    print(f"{Fore.YELLOW}ROI Text:{Style.RESET_ALL}\n{task_payload['roi_text']}\n")
+                    print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
+                gen_text = self.inference_engine.chat(
+                    messages=task_payload['messages'],
+                    verbose=verbose
+                )
+                relation = self._post_process_result(gen_text, task_payload)
+                if relation:
+                    relations.append(relation)
+                if return_messages_log:
+                    task_payload['messages'].append({"role": "assistant", "content": gen_text})
+                    messages_log.append(task_payload['messages'])
+        return (relations, messages_log) if return_messages_log else relations
+    async def _extract_async(self, doc: LLMInformationExtractionDocument, buffer_size: int = 128, concurrent_batch_size: int = 32, return_messages_log: bool = False) -> Union[List[Dict], Tuple[List[Dict], List]]:
+        pairs = list(itertools.combinations(doc.frames, 2))
+        tasks_input = [self._get_task_if_possible(f1, f2, doc.text, buffer_size) for f1, f2 in pairs]
+        # Filter out impossible pairs
+        tasks_input = [task for task in tasks_input if task is not None]
+        relations = []
+        messages_log = [] if return_messages_log else None
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def semaphore_helper(task_payload: Dict):
+            async with semaphore:
+                gen_text = await self.inference_engine.chat_async(messages=task_payload['messages'])
+                return gen_text, task_payload
+        tasks = [asyncio.create_task(semaphore_helper(payload)) for payload in tasks_input]
+        results = await asyncio.gather(*tasks)
+        for gen_text, task_payload in results:
+            relation = self._post_process_result(gen_text, task_payload)
+            if relation:
+                relations.append(relation)
+            if return_messages_log:
+                task_payload['messages'].append({"role": "assistant", "content": gen_text})
+                messages_log.append(task_payload['messages'])
+        return (relations, messages_log) if return_messages_log else relations
+    def extract_relations(self, doc: LLMInformationExtractionDocument, buffer_size: int = 128, concurrent: bool = False, concurrent_batch_size: int = 32, verbose: bool = False, return_messages_log: bool = False) -> List[Dict]:
         if not doc.has_frame():
             raise ValueError("Input document must have frames.")
         if doc.has_duplicate_frame_ids():
             raise ValueError("All frame_ids in the input document must be unique.")
         if concurrent:
             if verbose:
-                warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
-            nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
-            return asyncio.run(self.extract_async(doc=doc,
-                                                  buffer_size=buffer_size,
-                                                  concurrent_batch_size=concurrent_batch_size,
-                                                  return_messages_log=return_messages_log)
-                                )
+                warnings.warn("verbose=True is not supported in concurrent mode.", RuntimeWarning)
+            nest_asyncio.apply()
+            return asyncio.run(self._extract_async(doc, buffer_size, concurrent_batch_size, return_messages_log))
         else:
-            return self.extract(doc=doc,
-                                buffer_size=buffer_size,
-                                verbose=verbose,
-                                return_messages_log=return_messages_log)
+            return self._extract(doc, buffer_size, verbose, return_messages_log)
+class BinaryRelationExtractor(RelationExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_func: Callable,
+                 system_prompt:str=None):
+        """
+        This class extracts binary (yes/no) relations between two entities.
+        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
+        Parameters
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        possible_relation_func : Callable, Optional
+            a function that inputs 2 frames and returns a bool indicating possible relations between them.
+        system_prompt : str, Optional
+            system prompt.
+        """
+        super().__init__(inference_engine, prompt_template, system_prompt)
+        if not callable(possible_relation_func):
+            raise TypeError(f"Expect possible_relation_func as a function, received {type(possible_relation_func)} instead.")
+        sig = inspect.signature(possible_relation_func)
+        if len(sig.parameters) != 2:
+            raise ValueError("The possible_relation_func must have exactly two parameters.")
+        if sig.return_annotation not in {bool, inspect.Signature.empty}:
+            warnings.warn(f"Expected possible_relation_func return annotation to be bool, but got {sig.return_annotation}.")
+        self.possible_relation_func = possible_relation_func
+    def _get_task_if_possible(self, frame_1: LLMInformationExtractionFrame, frame_2: LLMInformationExtractionFrame,
+                              text: str, buffer_size: int) -> Optional[Dict[str, Any]]:
+        if self.possible_relation_func(frame_1, frame_2):
+            roi_text = self._get_ROI(frame_1, frame_2, text, buffer_size)
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            messages.append({'role': 'user', 'content': self._get_user_prompt(
+                text_content={"roi_text": roi_text, "frame_1": str(frame_1.to_dict()), "frame_2": str(frame_2.to_dict())}
+            )})
+            return {"frame_1": frame_1, "frame_2": frame_2, "messages": messages, "roi_text": roi_text}
+        return None
+    def _post_process_result(self, gen_text: str, pair_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        rel_json = self._extract_json(gen_text)
+        if len(rel_json) > 0 and "Relation" in rel_json[0]:
+            rel = rel_json[0]["Relation"]
+            if (isinstance(rel, bool) and rel) or (isinstance(rel, str) and rel.lower() == 'true'):
+                return {'frame_1_id': pair_data['frame_1'].frame_id, 'frame_2_id': pair_data['frame_2'].frame_id}
+        return None
 class MultiClassRelationExtractor(RelationExtractor):
@@ -1828,223 +1975,25 @@ class MultiClassRelationExtractor(RelationExtractor):
             self.possible_relation_types_func = possible_relation_types_func
-    def _post_process(self, rel_json:List[Dict], pos_rel_types:List[str]) -> Union[str, None]:
-        """
-        This method post-processes the extracted relation JSON.
-        Parameters:
-        -----------
-        rel_json : List[Dict]
-            the extracted relation JSON.
-        pos_rel_types : List[str]
-            possible relation types by the possible_relation_types_func.
-        Return : Union[str, None]
-            the relation type (str) or None for no relation.
-        """
-        if len(rel_json) > 0:
-            if "RelationType" in rel_json[0]:
-                if rel_json[0]["RelationType"] in pos_rel_types:
-                    return rel_json[0]["RelationType"]
-            else:
-                warnings.warn('Extractor output JSON without "RelationType" key. Following default, relation = "No Relation".', RuntimeWarning)
-        else:
-            warnings.warn('Extractor did not output a JSON. Following default, relation = "No Relation".', RuntimeWarning)
+    def _get_task_if_possible(self, frame_1: LLMInformationExtractionFrame, frame_2: LLMInformationExtractionFrame,
+                              text: str, buffer_size: int) -> Optional[Dict[str, Any]]:
+        pos_rel_types = self.possible_relation_types_func(frame_1, frame_2)
+        if pos_rel_types:
+            roi_text = self._get_ROI(frame_1, frame_2, text, buffer_size)
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            messages.append({'role': 'user', 'content': self._get_user_prompt(
+                text_content={"roi_text": roi_text, "frame_1": str(frame_1.to_dict()), "frame_2": str(frame_2.to_dict()), "pos_rel_types": str(pos_rel_types)}
+            )})
+            return {"frame_1": frame_1, "frame_2": frame_2, "messages": messages, "pos_rel_types": pos_rel_types, "roi_text": roi_text}
         return None
-    def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False, return_messages_log:bool=False) -> List[Dict]:
-        """
-        This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
-        Parameters:
-        -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : List[Dict]
-            a list of dict with {"frame_1_id", "frame_2_id", "relation"} for all frame pairs.
-        """
-        pairs = itertools.combinations(doc.frames, 2)
-        if return_messages_log:
-            messages_log = []
-        output = []
-        for frame_1, frame_2 in pairs:
-            pos_rel_types = self.possible_relation_types_func(frame_1, frame_2)
-            if pos_rel_types:
-                roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                if verbose:
-                    print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
-                    print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
-                messages = []
-                if self.system_prompt:
-                    messages.append({'role': 'system', 'content': self.system_prompt})
-                messages.append({'role': 'user', 'content': self._get_user_prompt(text_content={"roi_text":roi_text,
-                                                                                                "frame_1": str(frame_1.to_dict()),
-                                                                                                "frame_2": str(frame_2.to_dict()),
-                                                                                                "pos_rel_types":str(pos_rel_types)}
-                                                                                                )})
-                gen_text = self.inference_engine.chat(
-                                messages=messages,
-                                stream=False,
-                                verbose=verbose
-                            )
-                if return_messages_log:
-                    messages.append({"role": "assistant", "content": gen_text})
-                    messages_log.append(messages)
-                rel_json = self._extract_json(gen_text)
-                rel = self._post_process(rel_json, pos_rel_types)
-                if rel:
-                    output.append({'frame_1_id':frame_1.frame_id, 'frame_2_id':frame_2.frame_id, 'relation':rel})
-        if return_messages_log:
-            return output, messages_log
-        return output
-    async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
-                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
-        """
-        This is the asynchronous version of the extract() method.
-        Parameters:
-        -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        concurrent_batch_size : int, Optional
-            the number of frame pairs to process in concurrent.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : List[Dict]
-            a list of dict with {"frame_1_id", "frame_2_id", "relation"} for all frame pairs.
-        """
-        # Check if self.inference_engine.chat_async() is implemented
-        if not hasattr(self.inference_engine, 'chat_async'):
-            raise NotImplementedError(f"{self.inference_engine.__class__.__name__} does not have chat_async() method.")
-        pairs = itertools.combinations(doc.frames, 2)
-        if return_messages_log:
-            messages_log = []
-        n_frames = len(doc.frames)
-        num_pairs = (n_frames * (n_frames-1)) // 2
-        output = []
-        for i in range(0, num_pairs, concurrent_batch_size):
-            rel_pair_list = []
-            tasks = []
-            batch = list(itertools.islice(pairs, concurrent_batch_size))
-            batch_messages = []
-            for frame_1, frame_2 in batch:
-                pos_rel_types = self.possible_relation_types_func(frame_1, frame_2)
-                if pos_rel_types:
-                    rel_pair_list.append({'frame_1':frame_1.frame_id, 'frame_2':frame_2.frame_id, 'pos_rel_types':pos_rel_types})
-                    roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
-                    messages = []
-                    if self.system_prompt:
-                        messages.append({'role': 'system', 'content': self.system_prompt})
-                    messages.append({'role': 'user', 'content': self._get_user_prompt(text_content={"roi_text":roi_text,
-                                                                                                    "frame_1": str(frame_1.to_dict()),
-                                                                                                    "frame_2": str(frame_2.to_dict()),
-                                                                                                    "pos_rel_types":str(pos_rel_types)}
-                                                                                                    )})
-                    task = asyncio.create_task(
-                        self.inference_engine.chat_async(
-                            messages=messages
-                        )
-                    )
-                    tasks.append(task)
-                    batch_messages.append(messages)
-            responses = await asyncio.gather(*tasks)
-            for d, response, messages in zip(rel_pair_list, responses, batch_messages):
-                if return_messages_log:
-                    messages.append({"role": "assistant", "content": response})
-                    messages_log.append(messages)
-                rel_json = self._extract_json(response)
-                rel = self._post_process(rel_json, d['pos_rel_types'])
-                if rel:
-                    output.append({'frame_1_id':d['frame_1'], 'frame_2_id':d['frame_2'], 'relation':rel})
-        if return_messages_log:
-            return output, messages_log
-        return output
-    def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
-                          concurrent:bool=False, concurrent_batch_size:int=32,
-                          verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
-        """
-        This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
-        Parameters:
-        -----------
-        doc : LLMInformationExtractionDocument
-            a document with frames.
-        buffer_size : int, Optional
-            the number of characters before and after the two frames in the ROI text.
-        max_new_tokens : str, Optional
-            the max number of new tokens LLM should generate.
-        temperature : float, Optional
-            the temperature for token sampling.
-        concurrent: bool, Optional
-            if True, the extraction will be done in concurrent.
-        concurrent_batch_size : int, Optional
-            the number of frame pairs to process in concurrent.
-        stream : bool, Optional
-            if True, LLM generated text will be printed in terminal in real-time.
-        return_messages_log : bool, Optional
-            if True, a list of messages will be returned.
-        Return : List[Dict]
-            a list of dict with {"frame_1", "frame_2", "relation"} for all relations.
-        """
-        if not doc.has_frame():
-            raise ValueError("Input document must have frames.")
-        if doc.has_duplicate_frame_ids():
-            raise ValueError("All frame_ids in the input document must be unique.")
-        if concurrent:
-            if verbose:
-                warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
-            nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
-            return asyncio.run(self.extract_async(doc=doc,
-                                                  buffer_size=buffer_size,
-                                                  concurrent_batch_size=concurrent_batch_size,
-                                                  return_messages_log=return_messages_log)
-                                )
-        else:
-            return self.extract(doc=doc,
-                                buffer_size=buffer_size,
-                                verbose=verbose,
-                                return_messages_log=return_messages_log)
+    def _post_process_result(self, gen_text: str, pair_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        rel_json = self._extract_json(gen_text)
+        pos_rel_types = pair_data['pos_rel_types']
+        if len(rel_json) > 0 and "RelationType" in rel_json[0]:
+            rel_type = rel_json[0]["RelationType"]
+            if rel_type in pos_rel_types:
+                return {'frame_1_id': pair_data['frame_1'].frame_id, 'frame_2_id': pair_data['frame_2'].frame_id, 'relation': rel_type}
+        return None

{llm_ie-1.1.0.dist-info → llm_ie-1.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llm-ie
-Version: 1.1.0
+Version: 1.2.0
 Summary: A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines.
 License: MIT
 Author: Enshuo (David) Hsu

{llm_ie-1.1.0.dist-info → llm_ie-1.2.0.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-llm_ie/__init__.py,sha256=BiHliqXE3DqXXpUVeVmF8rM46UpuJ9zXtjmkYegd218,1548
+llm_ie/__init__.py,sha256=rLP01qXkIisX0WLzZOv6y494Braw89g5JLmA6ZyrGGA,1590
 llm_ie/asset/PromptEditor_prompts/chat.txt,sha256=Fq62voV0JQ8xBRcxS1Nmdd7DkHs1fGYb-tmNwctZZK0,118
 llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
 llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=JAwY9vm1jSmKf2qcLBYUvrSmME2EJH36bALmkwZDWYQ,178
@@ -9,6 +9,7 @@ llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt,sha
 llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt,sha256=lGGjdeFpzZEc56w-EtQDMyYFs7A3DQAM32sT42Nf_08,293
 llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt,sha256=Of11LFuXLB249oekFelzlIeoAB0cATReqWgFTvhNz_8,329
 llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt,sha256=kNJQK7NdoCx13TXGY8HYGrW_v4SEaErK8j9qIzd70CM,291
+llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt,sha256=w2amKipinuJtCiyPsgWsjaJRwTpS1qOBDuPPtPCMeQA,2120
 llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt,sha256=-Cli7rwu4wM4vSmkG0nInNkpStUhRqKESQ3oqD38pbE,10395
 llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt,sha256=-Cli7rwu4wM4vSmkG0nInNkpStUhRqKESQ3oqD38pbE,10395
 llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt,sha256=Z6Yc2_QRqroWcJ13owNJbo78I0wpS4XXDsOjXFR-aPk,2166
@@ -20,8 +21,8 @@ llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=9
 llm_ie/chunkers.py,sha256=24h9l-Ldyx3EgfYicFqGhV_b-XofUS3yovC1nBWdDoo,5143
 llm_ie/data_types.py,sha256=72-3bzzYpo7KZpD9bjoroWT2eiM0zmWyDkBr2nHoBV0,18559
 llm_ie/engines.py,sha256=uE5sag1YeKBYBFF4gY7rYZK9e1ttatf9T7bV_xSg9Pk,36075
-llm_ie/extractors.py,sha256=5mfHUFy78AUPubJuzta6P935E91Ym9BB4tj23RT0KFA,96623
+llm_ie/extractors.py,sha256=aCRqKhjSoKTAWZ3WhX_O6V-S_rIvYhPsk78nZLDpQw8,95149
 llm_ie/prompt_editor.py,sha256=zh7Es5Ta2qSTgHtfF9Y9ZKXs4DMue6XlyRt9O6_Uk6c,10962
-llm_ie-1.1.0.dist-info/METADATA,sha256=R2vOkDW-89Shxtk5y5zBgwCivxrQSi0-tzF9_EwD-8s,728
-llm_ie-1.1.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-llm_ie-1.1.0.dist-info/RECORD,,
+llm_ie-1.2.0.dist-info/METADATA,sha256=X9zsMDwBAq1QzIkX8SSbmwLsEFiiAVeNeA0GTiNkAkQ,728
+llm_ie-1.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+llm_ie-1.2.0.dist-info/RECORD,,

{llm_ie-1.1.0.dist-info → llm_ie-1.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

llm-ie 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

llm-ie 1.1.0py3-none-any.whl → 1.2.0py3-none-any.whl