PyPI - llm-ie - Versions diffs - 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl - Mend

llm-ie 1.2.2py3-none-any.whl → 1.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

llm_ie/__init__.py +5 -4
llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt +129 -0
llm_ie/chunkers.py +145 -6
llm_ie/data_types.py +23 -37
llm_ie/engines.py +621 -61
llm_ie/extractors.py +341 -297
llm_ie/prompt_editor.py +9 -32
llm_ie/utils.py +95 -0
{llm_ie-1.2.2.dist-info → llm_ie-1.2.4.dist-info}/METADATA +1 -1
{llm_ie-1.2.2.dist-info → llm_ie-1.2.4.dist-info}/RECORD +11 -9
{llm_ie-1.2.2.dist-info → llm_ie-1.2.4.dist-info}/WHEEL +0 -0

llm_ie/prompt_editor.py CHANGED Viewed

@@ -2,6 +2,7 @@ import sys
 import warnings
 from typing import List, Dict, Generator
 import importlib.resources
+from llm_ie.utils import apply_prompt_template
 from llm_ie.engines import InferenceEngine
 from llm_ie.extractors import FrameExtractor
 import re
@@ -45,30 +46,6 @@ class PromptEditor:
         # internal memory (history messages) for the `chat` method
         self.messages = []
-    def _apply_prompt_template(self, text_content:Dict[str,str], prompt_template:str) -> str:
-        """
-        This method applies text_content to prompt_template and returns a prompt.
-        Parameters
-        ----------
-        text_content : Dict[str,str]
-            the input text content to put in prompt template.
-            all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
-        Returns : str
-            a prompt.
-        """
-        pattern = re.compile(r'{{(.*?)}}')
-        placeholders = pattern.findall(prompt_template)
-        if len(placeholders) != len(text_content):
-            raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
-        if not all([k in placeholders for k, _ in text_content.items()]):
-            raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
-        prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
-        return prompt
     def rewrite(self, draft:str) -> str:
@@ -80,8 +57,8 @@ class PromptEditor:
         with open(file_path, 'r') as f:
             rewrite_prompt_template = f.read()
-        prompt = self._apply_prompt_template(text_content={"draft": draft, "prompt_guideline": self.prompt_guide},
-                                             prompt_template=rewrite_prompt_template)
+        prompt = apply_prompt_template(prompt_template=rewrite_prompt_template,
+                                       text_content={"draft": draft, "prompt_guideline": self.prompt_guide})
         messages = [{"role": "system", "content": self.system_prompt},
                     {"role": "user", "content": prompt}]
         res = self.inference_engine.chat(messages, verbose=True)
@@ -96,8 +73,8 @@ class PromptEditor:
         with open(file_path, 'r') as f:
             comment_prompt_template = f.read()
-        prompt = self._apply_prompt_template(text_content={"draft": draft, "prompt_guideline": self.prompt_guide},
-                                             prompt_template=comment_prompt_template)
+        prompt = apply_prompt_template(prompt_template=comment_prompt_template,
+                                       text_content={"draft": draft, "prompt_guideline": self.prompt_guide})
         messages = [{"role": "system", "content": self.system_prompt},
                     {"role": "user", "content": prompt}]
         res = self.inference_engine.chat(messages, verbose=True)
@@ -254,8 +231,8 @@ class PromptEditor:
             with open(file_path, 'r') as f:
                 chat_prompt_template = f.read()
-            guideline = self._apply_prompt_template(text_content={"prompt_guideline": self.prompt_guide},
-                                                    prompt_template=chat_prompt_template)
+            guideline = apply_prompt_template(prompt_template=chat_prompt_template,
+                                              text_content={"prompt_guideline": self.prompt_guide})
             self.messages = [{"role": "system", "content": self.system_prompt + guideline}]
@@ -288,8 +265,8 @@ class PromptEditor:
         with open(file_path, 'r') as f:
             chat_prompt_template = f.read()
-        guideline = self._apply_prompt_template(text_content={"prompt_guideline": self.prompt_guide},
-                                                prompt_template=chat_prompt_template)
+        guideline = apply_prompt_template(prompt_template=chat_prompt_template,
+                                          text_content={"prompt_guideline": self.prompt_guide})
         messages = [{"role": "system", "content": self.system_prompt + guideline}] + messages

llm_ie/utils.py ADDED Viewed

@@ -0,0 +1,95 @@
+from typing import List, Dict, Union
+import re
+import json
+import warnings
+import json_repair
+def _find_dict_strings(text: str) -> List[str]:
+    """
+    Extracts balanced JSON-like dictionaries from a string, even if nested.
+    Parameters:
+    -----------
+    text : str
+        the input text containing JSON-like structures.
+    Returns : List[str]
+        A list of valid JSON-like strings representing dictionaries.
+    """
+    open_brace = 0
+    start = -1
+    json_objects = []
+    for i, char in enumerate(text):
+        if char == '{':
+            if open_brace == 0:
+                # start of a new JSON object
+                start = i
+            open_brace += 1
+        elif char == '}':
+            open_brace -= 1
+            if open_brace == 0 and start != -1:
+                json_objects.append(text[start:i + 1])
+                start = -1
+    return json_objects
+def extract_json(gen_text:str) -> List[Dict[str, str]]:
+    """
+    This method inputs a generated text and output a JSON of information tuples
+    """
+    out = []
+    dict_str_list = _find_dict_strings(gen_text)
+    for dict_str in dict_str_list:
+        try:
+            dict_obj = json.loads(dict_str)
+            out.append(dict_obj)
+        except json.JSONDecodeError:
+            dict_obj = json_repair.repair_json(dict_str, skip_json_loads=True, return_objects=True)
+            if dict_obj:
+                warnings.warn(f'JSONDecodeError detected, fixed with repair_json:\n{dict_str}', RuntimeWarning)
+                out.append(dict_obj)
+            else:
+                warnings.warn(f'JSONDecodeError could not be fixed:\n{dict_str}', RuntimeWarning)
+    return out
+def apply_prompt_template(prompt_template:str, text_content:Union[str, Dict[str,str]]) -> str:
+    """
+    This method applies text_content to prompt_template and returns a prompt.
+    Parameters:
+    ----------
+    prompt_template : str
+        the prompt template with placeholders {{<placeholder name>}}.
+    text_content : Union[str, Dict[str,str]]
+        the input text content to put in prompt template.
+        If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
+        If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
+    Returns : str
+        a user prompt.
+    """
+    pattern = re.compile(r'{{(.*?)}}')
+    if isinstance(text_content, str):
+        matches = pattern.findall(prompt_template)
+        if len(matches) != 1:
+            raise ValueError("When text_content is str, the prompt template must has exactly 1 placeholder {{<placeholder name>}}.")
+        text = re.sub(r'\\', r'\\\\', text_content)
+        prompt = pattern.sub(text, prompt_template)
+    elif isinstance(text_content, dict):
+        # Check if all values are str
+        if not all([isinstance(v, str) for v in text_content.values()]):
+            raise ValueError("All values in text_content must be str.")
+        # Check if all keys are in the prompt template
+        placeholders = pattern.findall(prompt_template)
+        if len(placeholders) != len(text_content):
+            raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
+        if not all([k in placeholders for k, _ in text_content.items()]):
+            raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
+        prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
+    return prompt

{llm_ie-1.2.2.dist-info → llm_ie-1.2.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llm-ie
-Version: 1.2.2
+Version: 1.2.4
 Summary: A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines.
 License: MIT
 Author: Enshuo (David) Hsu

{llm_ie-1.2.2.dist-info → llm_ie-1.2.4.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,11 @@
-llm_ie/__init__.py,sha256=wNyek7i90UlQRylV3hSG9RlzMZ4MVzZSe_uhQYTQab4,1632
+llm_ie/__init__.py,sha256=9a0bTN2ol5k_rCEidhnqIwJCnVTfit7TbTtbWG4hj1s,1881
 llm_ie/asset/PromptEditor_prompts/chat.txt,sha256=Fq62voV0JQ8xBRcxS1Nmdd7DkHs1fGYb-tmNwctZZK0,118
 llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
 llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=JAwY9vm1jSmKf2qcLBYUvrSmME2EJH36bALmkwZDWYQ,178
 llm_ie/asset/PromptEditor_prompts/system.txt,sha256=QwGTIJvp-5u2P8CkGt_rabttlN1puHQwIBNquUm1ZHo,730
 llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt,sha256=pKes8BOAoJJgmo_IQh2ISKiMh_rDPl_rDUU_VgDQ4o4,273
 llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt,sha256=9Nwkr2U_3ZSk01xDtgiFJVABi6FkC8Izdq7zrzFfLRg,235
+llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt,sha256=tf9tu9FvNFpp26J7S39bJLuiI5R47bapDdEplvvbJU4,4203
 llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt,sha256=NLEtnmx1aOsnwifAsXr65pX9WdrIWdx-MJ7aMtNKi8c,331
 llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt,sha256=lGGjdeFpzZEc56w-EtQDMyYFs7A3DQAM32sT42Nf_08,293
 llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt,sha256=Of11LFuXLB249oekFelzlIeoAB0cATReqWgFTvhNz_8,329
@@ -18,11 +19,12 @@ llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt,sha256=EQ
 llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=rBRIXg8JQWUHTRdoluTS0zkbTkBAacEtHHvr3lZaQCw,10437
 llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
 llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
-llm_ie/chunkers.py,sha256=jXmUk3beF3EZWqDN_ArtoeerXObRKVCDIdUsv3loO80,6100
-llm_ie/data_types.py,sha256=72-3bzzYpo7KZpD9bjoroWT2eiM0zmWyDkBr2nHoBV0,18559
-llm_ie/engines.py,sha256=Ofrbcu8j2dp2X25oMQ3Xg7FGPynHse_liQ8oFTEdeHA,38418
-llm_ie/extractors.py,sha256=5y4vuB53R2EAyHGH3wVZ3M1DvN3fPJHdypsTbzbK78s,96889
-llm_ie/prompt_editor.py,sha256=nAgCJQY5kVWTAhmrngdWRG-JKxCCPBh0dyaUcIk_-c0,13198
-llm_ie-1.2.2.dist-info/METADATA,sha256=or9H0YdfLVgjqftn3zg4nlRHmGHcK4hxBYR6R-1otuE,728
-llm_ie-1.2.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-llm_ie-1.2.2.dist-info/RECORD,,
+llm_ie/chunkers.py,sha256=b4APRwaLMU40QXVEhOK8m1DZi_jr-VCHAFwbMjqVBgA,11308
+llm_ie/data_types.py,sha256=6vefyGTgZcJBYgiuyfcbJN1ZKK4tNvOZf6HFpxFZngY,17792
+llm_ie/engines.py,sha256=K4Zgb1dYiuopBeTLcgSAseI-VXgwtTeWf9O4EK9SQqE,63901
+llm_ie/extractors.py,sha256=f-TUZFprJZ_ftrnKbi-g-au4KoJwtciCCawXHWzmDtU,100792
+llm_ie/prompt_editor.py,sha256=Hqukm2HMgsoGpXV3vZ__7CGgfMhd-UUIwTKGnfSDltM,12055
+llm_ie/utils.py,sha256=k6M4l8GsKOMcmO6UwONQ353Zk-TeoBj6HXGjlAn-JE0,3679
+llm_ie-1.2.4.dist-info/METADATA,sha256=dl0JyDkgjEbk12N5I1fZg-jh7gEvTpuJ1Ox1_mHo_6Q,728
+llm_ie-1.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+llm_ie-1.2.4.dist-info/RECORD,,

{llm_ie-1.2.2.dist-info → llm_ie-1.2.4.dist-info}/WHEEL RENAMED Viewed

File without changes

llm-ie 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl

llm-ie 1.2.2py3-none-any.whl → 1.2.4py3-none-any.whl