PyPI - llm-ie - Versions diffs - 1.2.3__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

llm-ie 1.2.3py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

llm_ie/__init__.py +6 -6
llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt +129 -0
llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt +2 -2
llm_ie/asset/prompt_guide/StructExtractor_prompt_guide.txt +53 -0
llm_ie/chunkers.py +104 -4
llm_ie/data_types.py +72 -44
llm_ie/engines.py +44 -0
llm_ie/extractors.py +421 -73
llm_ie/prompt_editor.py +9 -32
llm_ie/utils.py +95 -0
{llm_ie-1.2.3.dist-info → llm_ie-1.3.0.dist-info}/METADATA +1 -1
{llm_ie-1.2.3.dist-info → llm_ie-1.3.0.dist-info}/RECORD +13 -10
{llm_ie-1.2.3.dist-info → llm_ie-1.3.0.dist-info}/WHEEL +0 -0

llm_ie/extractors.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import abc
 import re
-import json
-import json_repair
 import inspect
 import importlib.resources
 import warnings
@@ -10,6 +8,7 @@ import asyncio
 import nest_asyncio
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Set, List, Dict, Tuple, Union, Callable, Generator, Optional, AsyncGenerator
+from llm_ie.utils import extract_json, apply_prompt_template
 from llm_ie.data_types import FrameExtractionUnit, LLMInformationExtractionFrame, LLMInformationExtractionDocument
 from llm_ie.chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker
 from llm_ie.chunkers import ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
@@ -96,79 +95,428 @@ class Extractor:
         Returns : str
             a user prompt.
         """
-        pattern = re.compile(r'{{(.*?)}}')
+        return apply_prompt_template(self.prompt_template, text_content)
+class StructExtractor(Extractor):
+    def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker, prompt_template:str,
+                 system_prompt:str=None, context_chunker:ContextChunker=None, aggregation_func:Callable=None):
+        """
+        This class is for unanchored structured information extraction.
+        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
+        Parameters:
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        unit_chunker : UnitChunker
+            the unit chunker object that determines how to chunk the document text into units.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        system_prompt : str, Optional
+            system prompt.
+        context_chunker : ContextChunker
+            the context chunker object that determines how to get context for each unit.
+        aggregation_func : Callable
+            a function that inputs a list of structured information (dict)
+            and outputs an aggregated structured information (dict).
+            if not specified, the default is to merge all dicts by updating keys and overwriting values sequentially.
+        """
+        super().__init__(inference_engine=inference_engine,
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt)
+        self.unit_chunker = unit_chunker
+        self.context_chunker = context_chunker
+        self.aggregation_func = aggregation_func
+    def extract(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnit]:
+        """
+        This method inputs text content and outputs a string generated by LLM
+        Parameters:
+        ----------
+        text_content : Union[str, Dict[str,str]]
+            the input text content to put in prompt template.
+            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
+            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
+        Return : List[FrameExtractionUnit]
+            the output from LLM. Need post-processing.
+        """
+        # unit chunking
         if isinstance(text_content, str):
-            matches = pattern.findall(self.prompt_template)
-            if len(matches) != 1:
-                raise ValueError("When text_content is str, the prompt template must has exactly 1 placeholder {{<placeholder name>}}.")
-            text = re.sub(r'\\', r'\\\\', text_content)
-            prompt = pattern.sub(text, self.prompt_template)
+            doc_text = text_content
         elif isinstance(text_content, dict):
-            # Check if all values are str
-            if not all([isinstance(v, str) for v in text_content.values()]):
-                raise ValueError("All values in text_content must be str.")
-            # Check if all keys are in the prompt template
-            placeholders = pattern.findall(self.prompt_template)
-            if len(placeholders) != len(text_content):
-                raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
-            if not all([k in placeholders for k, _ in text_content.items()]):
-                raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
-            prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), self.prompt_template)
-        return prompt
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            doc_text = text_content[document_key]
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # messages log
+        messages_logger = MessagesLogger() if return_messages_log else None
+        # generate unit by unit
+        for i, unit in enumerate(units):
+            try:
+                # construct chat messages
+                messages = []
+                if self.system_prompt:
+                    messages.append({'role': 'system', 'content': self.system_prompt})
+                context = self.context_chunker.chunk(unit)
+                if context == "":
+                    # no context, just place unit in user prompt
+                    if isinstance(text_content, str):
+                        messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
+                    else:
+                        unit_content = text_content.copy()
+                        unit_content[document_key] = unit.text
+                        messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+                else:
+                    # insert context to user prompt
+                    if isinstance(text_content, str):
+                        messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
+                    else:
+                        context_content = text_content.copy()
+                        context_content[document_key] = context
+                        messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                    # simulate conversation where assistant confirms
+                    messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                    # place unit of interest
+                    messages.append({'role': 'user', 'content': unit.text})
+                if verbose:
+                    print(f"\n\n{Fore.GREEN}Unit {i + 1}/{len(units)}:{Style.RESET_ALL}\n{unit.text}\n")
+                    if context != "":
+                        print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
+                    print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
+                gen_text = self.inference_engine.chat(
+                                messages=messages,
+                                verbose=verbose,
+                                stream=False,
+                                messages_logger=messages_logger
+                            )
+                # add generated text to unit
+                unit.set_generated_text(gen_text["response"])
+                unit.set_status("success")
+            except Exception as e:
+                unit.set_status("fail")
+                warnings.warn(f"LLM inference failed for unit {i} ({unit.start}, {unit.end}): {e}", RuntimeWarning)
+        if return_messages_log:
+            return units, messages_logger.get_messages_log()
+        return units
-    def _find_dict_strings(self, text: str) -> List[str]:
+    def stream(self, text_content: Union[str, Dict[str, str]],
+               document_key: str = None) -> Generator[Dict[str, Any], None, List[FrameExtractionUnit]]:
         """
-        Extracts balanced JSON-like dictionaries from a string, even if nested.
+        Streams LLM responses per unit with structured event types,
+        and returns collected data for post-processing.
+        Yields:
+        -------
+        Dict[str, Any]: (type, data)
+            - {"type": "info", "data": str_message}: General informational messages.
+            - {"type": "unit", "data": dict_unit_info}: Signals start of a new unit. dict_unit_info contains {'id', 'text', 'start', 'end'}
+            - {"type": "context", "data": str_context}: Context string for the current unit.
+            - {"type": "reasoning", "data": str_chunk}: A reasoning model thinking chunk from the LLM.
+            - {"type": "response", "data": str_chunk}: A response/answer chunk from the LLM.
+        Returns:
+        --------
+        List[FrameExtractionUnit]:
+            A list of FrameExtractionUnit objects, each containing the
+            original unit details and the fully accumulated 'gen_text' from the LLM.
+        """
+        if isinstance(text_content, str):
+            doc_text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            if document_key not in text_content:
+                raise ValueError(f"document_key '{document_key}' not found in text_content.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
+        units: List[FrameExtractionUnit] = self.unit_chunker.chunk(doc_text)
+        self.context_chunker.fit(doc_text, units)
+        yield {"type": "info", "data": f"Starting LLM processing for {len(units)} units."}
+        for i, unit in enumerate(units):
+            unit_info_payload = {"id": i, "text": unit.text, "start": unit.start, "end": unit.end}
+            yield {"type": "unit", "data": unit_info_payload}
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context_str = self.context_chunker.chunk(unit)
+            # Construct prompt input based on whether text_content was str or dict
+            if context_str:
+                yield {"type": "context", "data": context_str}
+                prompt_input_for_context = context_str
+                if isinstance(text_content, dict):
+                    context_content_dict = text_content.copy()
+                    context_content_dict[document_key] = context_str
+                    prompt_input_for_context = context_content_dict
+                messages.append({'role': 'user', 'content': self._get_user_prompt(prompt_input_for_context)})
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                messages.append({'role': 'user', 'content': unit.text})
+            else: # No context
+                prompt_input_for_unit = unit.text
+                if isinstance(text_content, dict):
+                    unit_content_dict = text_content.copy()
+                    unit_content_dict[document_key] = unit.text
+                    prompt_input_for_unit = unit_content_dict
+                messages.append({'role': 'user', 'content': self._get_user_prompt(prompt_input_for_unit)})
+            current_gen_text = ""
+            response_stream = self.inference_engine.chat(
+                messages=messages,
+                stream=True
+            )
+            for chunk in response_stream:
+                yield chunk
+                if chunk["type"] == "response":
+                    current_gen_text += chunk["data"]
+            # Store the result for this unit
+            unit.set_generated_text(current_gen_text)
+            unit.set_status("success")
+        yield {"type": "info", "data": "All units processed by LLM."}
+        return units
+    async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                            concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[FrameExtractionUnit]:
+        """
+        This is the asynchronous version of the extract() method.
         Parameters:
-        -----------
-        text : str
-            the input text containing JSON-like structures.
-        Returns : List[str]
-            A list of valid JSON-like strings representing dictionaries.
-        """
-        open_brace = 0
-        start = -1
-        json_objects = []
-        for i, char in enumerate(text):
-            if char == '{':
-                if open_brace == 0:
-                    # start of a new JSON object
-                    start = i
-                open_brace += 1
-            elif char == '}':
-                open_brace -= 1
-                if open_brace == 0 and start != -1:
-                    json_objects.append(text[start:i + 1])
-                    start = -1
-        return json_objects
-    def _extract_json(self, gen_text:str) -> List[Dict[str, str]]:
-        """
-        This method inputs a generated text and output a JSON of information tuples
+        ----------
+        text_content : Union[str, Dict[str,str]]
+            the input text content to put in prompt template.
+            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
+            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
+        document_key : str, Optional
+            specify the key in text_content where document text is.
+            If text_content is str, this parameter will be ignored.
+        concurrent_batch_size : int, Optional
+            the batch size for concurrent processing.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
+        Return : List[FrameExtractionUnit]
+            the output from LLM for each unit. Contains the start, end, text, and generated text.
         """
-        out = []
-        dict_str_list = self._find_dict_strings(gen_text)
-        for dict_str in dict_str_list:
-            try:
-                dict_obj = json.loads(dict_str)
-                out.append(dict_obj)
-            except json.JSONDecodeError:
-                dict_obj = json_repair.repair_json(dict_str, skip_json_loads=True, return_objects=True)
-                if dict_obj:
-                    warnings.warn(f'JSONDecodeError detected, fixed with repair_json:\n{dict_str}', RuntimeWarning)
-                    out.append(dict_obj)
+        if isinstance(text_content, str):
+            doc_text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            if document_key not in text_content:
+                 raise ValueError(f"document_key '{document_key}' not found in text_content dictionary.")
+            doc_text = text_content[document_key]
+        else:
+            raise TypeError("text_content must be a string or a dictionary.")
+        units = self.unit_chunker.chunk(doc_text)
+        # context chunker init
+        self.context_chunker.fit(doc_text, units)
+        # messages logger init
+        messages_logger = MessagesLogger() if return_messages_log else None
+        # Prepare inputs for all units first
+        tasks_input = []
+        for i, unit in enumerate(units):
+            # construct chat messages
+            messages = []
+            if self.system_prompt:
+                messages.append({'role': 'system', 'content': self.system_prompt})
+            context = self.context_chunker.chunk(unit)
+            if context == "":
+                 # no context, just place unit in user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit.text)})
+                else:
+                    unit_content = text_content.copy()
+                    unit_content[document_key] = unit.text
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(unit_content)})
+            else:
+                # insert context to user prompt
+                if isinstance(text_content, str):
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context)})
                 else:
-                    warnings.warn(f'JSONDecodeError could not be fixed:\n{dict_str}', RuntimeWarning)
-        return out
+                    context_content = text_content.copy()
+                    context_content[document_key] = context
+                    messages.append({'role': 'user', 'content': self._get_user_prompt(context_content)})
+                # simulate conversation where assistant confirms
+                messages.append({'role': 'assistant', 'content': 'Sure, please provide the unit text (e.g., sentence, line, chunk) of interest.'})
+                # place unit of interest
+                messages.append({'role': 'user', 'content': unit.text})
+            # Store unit and messages together for the task
+            tasks_input.append({"unit": unit, "messages": messages, "original_index": i})
+        # Process units concurrently with asyncio.Semaphore
+        semaphore = asyncio.Semaphore(concurrent_batch_size)
+        async def semaphore_helper(task_data: Dict, **kwrs):
+            unit = task_data["unit"]
+            messages = task_data["messages"]
+            async with semaphore:
+                gen_text = await self.inference_engine.chat_async(
+                    messages=messages,
+                    messages_logger=messages_logger
+                )
+            unit.set_generated_text(gen_text["response"])
+            unit.set_status("success")
+        # Create and gather tasks
+        tasks = []
+        for task_inp in tasks_input:
+            task = asyncio.create_task(semaphore_helper(
+                task_inp
+            ))
+            tasks.append(task)
+        await asyncio.gather(*tasks)
+        # Return units
+        if return_messages_log:
+            return units, messages_logger.get_messages_log()
+        else:
+            return units
+    def _default_struct_aggregate(self, structs: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Given a list of structured information (dict), aggregate them into a single dict by seqentially updating keys
+        and overwriting values.
+        """
+        aggregated_struct = {}
+        for struct in structs:
+            aggregated_struct.update(struct)
+        return aggregated_struct
+    def extract_struct(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
+                       verbose:bool=False, concurrent:bool=False, concurrent_batch_size:int=32,
+                       return_messages_log:bool=False) -> List[Dict[str, Any]]:
+        """
+        This method inputs a document text and outputs a list of LLMInformationExtractionFrame
+        It use the extract() method and post-process outputs into frames.
+        Parameters:
+        ----------
+        text_content : Union[str, Dict[str,str]]
+            the input text content to put in prompt template.
+            If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
+            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
+        document_key : str, Optional
+            specify the key in text_content where document text is.
+            If text_content is str, this parameter will be ignored.
+        verbose : bool, Optional
+            if True, LLM generated text will be printed in terminal in real-time.
+        concurrent : bool, Optional
+            if True, the sentences will be extracted in concurrent.
+        concurrent_batch_size : int, Optional
+            the number of sentences to process in concurrent. Only used when `concurrent` is True.
+        return_messages_log : bool, Optional
+            if True, a list of messages will be returned.
+        Return : List[Dict[str, Any]]
+            a list of unanchored structured information.
+        """
+        if concurrent:
+            if verbose:
+                warnings.warn("verbose=True is not supported in concurrent mode.", RuntimeWarning)
+            nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
+            extraction_results = asyncio.run(self.extract_async(text_content=text_content,
+                                                document_key=document_key,
+                                                concurrent_batch_size=concurrent_batch_size,
+                                                return_messages_log=return_messages_log)
+                                            )
+        else:
+            extraction_results = self.extract(text_content=text_content,
+                                                document_key=document_key,
+                                                verbose=verbose,
+                                                return_messages_log=return_messages_log)
+        units, messages_log = extraction_results if return_messages_log else (extraction_results, None)
+        struct_json = []
+        for unit in units:
+            if unit.status != "success":
+                continue
+            try:
+                unit_struct_json = extract_json(unit.get_generated_text())
+                struct_json.extend(unit_struct_json)
+            except Exception as e:
+                unit.set_status("fail")
+                warnings.warn(f"Struct extraction failed for unit ({unit.start}, {unit.end}): {e}", RuntimeWarning)
+        if self.aggregation_func is None:
+            struct = self._default_struct_aggregate(struct_json)
+        else:
+            struct = self.aggregation_func(struct_json)
+        if return_messages_log:
+            return struct, messages_log
+        return struct
+class BasicStructExtractor(StructExtractor):
+    def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
+                 system_prompt:str=None, aggregation_func:Callable=None):
+        """
+        This class prompts the LLM with the whole document at once for structured information extraction.
+        Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
+        Parameters:
+        ----------
+        inference_engine : InferenceEngine
+            the LLM inferencing engine object. Must implements the chat() method.
+        prompt_template : str
+            prompt template with "{{<placeholder name>}}" placeholder.
+        system_prompt : str, Optional
+            system prompt.
+        aggregation_func : Callable
+            a function that inputs a list of structured information (dict)
+            and outputs an aggregated structured information (dict).
+            if not specified, the default is to merge all dicts by updating keys and overwriting values sequentially.
+        """
+        super().__init__(inference_engine=inference_engine,
+                         unit_chunker=WholeDocumentUnitChunker(),
+                         prompt_template=prompt_template,
+                         system_prompt=system_prompt,
+                         context_chunker=WholeDocumentContextChunker())
 class FrameExtractor(Extractor):
     from nltk.tokenize import RegexpTokenizer
@@ -372,7 +720,7 @@ class FrameExtractor(Extractor):
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : str
+        Return : List[LLMInformationExtractionFrame]
             a list of frames.
         """
         return NotImplemented
@@ -731,7 +1079,7 @@ class DirectFrameExtractor(FrameExtractor):
         return_messages_log : bool, Optional
             if True, a list of messages will be returned.
-        Return : str
+        Return : List[LLMInformationExtractionFrame]
             a list of frames.
         """
         ENTITY_KEY = "entity_text"
@@ -759,7 +1107,7 @@ class DirectFrameExtractor(FrameExtractor):
             if unit.status != "success":
                 warnings.warn(f"Skipping failed unit ({unit.start}, {unit.end}): {unit.text}", RuntimeWarning)
                 continue
-            for entity in self._extract_json(gen_text=unit.gen_text):
+            for entity in extract_json(gen_text=unit.gen_text):
                 if ENTITY_KEY in entity:
                     entity_json.append(entity)
                 else:
@@ -963,7 +1311,7 @@ class DirectFrameExtractor(FrameExtractor):
         frame_list = []
         for res in sorted(doc_results['units'], key=lambda r: r.start):
             entity_json = []
-            for entity in self._extract_json(gen_text=res.gen_text):
+            for entity in extract_json(gen_text=res.gen_text):
                 if ENTITY_KEY in entity:
                     entity_json.append(entity)
                 else:
@@ -1712,7 +2060,7 @@ class AttributeExtractor(Extractor):
                             messages_logger=messages_logger
                         )
-        attribute_list = self._extract_json(gen_text=gen_text["response"])
+        attribute_list = extract_json(gen_text=gen_text["response"])
         if isinstance(attribute_list, list) and len(attribute_list) > 0:
             attributes = attribute_list[0]
             if return_messages_log:
@@ -1822,7 +2170,7 @@ class AttributeExtractor(Extractor):
                 messages.append({'role': 'user', 'content': self._get_user_prompt({"context": context, "frame": str(frame.to_dict())})})
                 gen_text = await self.inference_engine.chat_async(messages=messages, messages_logger=messages_logger)
-                attribute_list = self._extract_json(gen_text=gen_text["response"])
+                attribute_list = extract_json(gen_text=gen_text["response"])
                 attributes = attribute_list[0] if isinstance(attribute_list, list) and len(attribute_list) > 0 else {}
                 return {"frame": frame, "attributes": attributes, "messages": messages}
@@ -2075,7 +2423,7 @@ class BinaryRelationExtractor(RelationExtractor):
         return None
     def _post_process_result(self, gen_text: str, pair_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        rel_json = self._extract_json(gen_text)
+        rel_json = extract_json(gen_text)
         if len(rel_json) > 0 and "Relation" in rel_json[0]:
             rel = rel_json[0]["Relation"]
             if (isinstance(rel, bool) and rel) or (isinstance(rel, str) and rel.lower() == 'true'):
@@ -2141,7 +2489,7 @@ class MultiClassRelationExtractor(RelationExtractor):
         return None
     def _post_process_result(self, gen_text: str, pair_data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-        rel_json = self._extract_json(gen_text)
+        rel_json = extract_json(gen_text)
         pos_rel_types = pair_data['pos_rel_types']
         if len(rel_json) > 0 and "RelationType" in rel_json[0]:
             rel_type = rel_json[0]["RelationType"]

llm_ie/prompt_editor.py CHANGED Viewed

@@ -2,6 +2,7 @@ import sys
 import warnings
 from typing import List, Dict, Generator
 import importlib.resources
+from llm_ie.utils import apply_prompt_template
 from llm_ie.engines import InferenceEngine
 from llm_ie.extractors import FrameExtractor
 import re
@@ -45,30 +46,6 @@ class PromptEditor:
         # internal memory (history messages) for the `chat` method
         self.messages = []
-    def _apply_prompt_template(self, text_content:Dict[str,str], prompt_template:str) -> str:
-        """
-        This method applies text_content to prompt_template and returns a prompt.
-        Parameters
-        ----------
-        text_content : Dict[str,str]
-            the input text content to put in prompt template.
-            all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        Returns : str
-            a prompt.
-        """
-        pattern = re.compile(r'{{(.*?)}}')
-        placeholders = pattern.findall(prompt_template)
-        if len(placeholders) != len(text_content):
-            raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
-        if not all([k in placeholders for k, _ in text_content.items()]):
-            raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
-        prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
-        return prompt
     def rewrite(self, draft:str) -> str:
@@ -80,8 +57,8 @@ class PromptEditor:
         with open(file_path, 'r') as f:
             rewrite_prompt_template = f.read()
-        prompt = self._apply_prompt_template(text_content={"draft": draft, "prompt_guideline": self.prompt_guide},
-                                             prompt_template=rewrite_prompt_template)
+        prompt = apply_prompt_template(prompt_template=rewrite_prompt_template,
+                                       text_content={"draft": draft, "prompt_guideline": self.prompt_guide})
         messages = [{"role": "system", "content": self.system_prompt},
                     {"role": "user", "content": prompt}]
         res = self.inference_engine.chat(messages, verbose=True)
@@ -96,8 +73,8 @@ class PromptEditor:
         with open(file_path, 'r') as f:
             comment_prompt_template = f.read()
-        prompt = self._apply_prompt_template(text_content={"draft": draft, "prompt_guideline": self.prompt_guide},
-                                             prompt_template=comment_prompt_template)
+        prompt = apply_prompt_template(prompt_template=comment_prompt_template,
+                                       text_content={"draft": draft, "prompt_guideline": self.prompt_guide})
         messages = [{"role": "system", "content": self.system_prompt},
                     {"role": "user", "content": prompt}]
         res = self.inference_engine.chat(messages, verbose=True)
@@ -254,8 +231,8 @@ class PromptEditor:
             with open(file_path, 'r') as f:
                 chat_prompt_template = f.read()
-            guideline = self._apply_prompt_template(text_content={"prompt_guideline": self.prompt_guide},
-                                                    prompt_template=chat_prompt_template)
+            guideline = apply_prompt_template(prompt_template=chat_prompt_template,
+                                              text_content={"prompt_guideline": self.prompt_guide})
             self.messages = [{"role": "system", "content": self.system_prompt + guideline}]
@@ -288,8 +265,8 @@ class PromptEditor:
         with open(file_path, 'r') as f:
             chat_prompt_template = f.read()
-        guideline = self._apply_prompt_template(text_content={"prompt_guideline": self.prompt_guide},
-                                                prompt_template=chat_prompt_template)
+        guideline = apply_prompt_template(prompt_template=chat_prompt_template,
+                                          text_content={"prompt_guideline": self.prompt_guide})
         messages = [{"role": "system", "content": self.system_prompt + guideline}] + messages

llm-ie 1.2.3__py3-none-any.whl → 1.3.0__py3-none-any.whl

llm-ie 1.2.3py3-none-any.whl → 1.3.0py3-none-any.whl