llm-ie 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/prompt_editor.py CHANGED
@@ -2,6 +2,7 @@ import sys
2
2
  import warnings
3
3
  from typing import List, Dict, Generator
4
4
  import importlib.resources
5
+ from llm_ie.utils import apply_prompt_template
5
6
  from llm_ie.engines import InferenceEngine
6
7
  from llm_ie.extractors import FrameExtractor
7
8
  import re
@@ -45,30 +46,6 @@ class PromptEditor:
45
46
 
46
47
  # internal memory (history messages) for the `chat` method
47
48
  self.messages = []
48
-
49
- def _apply_prompt_template(self, text_content:Dict[str,str], prompt_template:str) -> str:
50
- """
51
- This method applies text_content to prompt_template and returns a prompt.
52
-
53
- Parameters
54
- ----------
55
- text_content : Dict[str,str]
56
- the input text content to put in prompt template.
57
- all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
58
-
59
- Returns : str
60
- a prompt.
61
- """
62
- pattern = re.compile(r'{{(.*?)}}')
63
- placeholders = pattern.findall(prompt_template)
64
- if len(placeholders) != len(text_content):
65
- raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
66
- if not all([k in placeholders for k, _ in text_content.items()]):
67
- raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
68
-
69
- prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
70
-
71
- return prompt
72
49
 
73
50
 
74
51
  def rewrite(self, draft:str) -> str:
@@ -80,8 +57,8 @@ class PromptEditor:
80
57
  with open(file_path, 'r') as f:
81
58
  rewrite_prompt_template = f.read()
82
59
 
83
- prompt = self._apply_prompt_template(text_content={"draft": draft, "prompt_guideline": self.prompt_guide},
84
- prompt_template=rewrite_prompt_template)
60
+ prompt = apply_prompt_template(prompt_template=rewrite_prompt_template,
61
+ text_content={"draft": draft, "prompt_guideline": self.prompt_guide})
85
62
  messages = [{"role": "system", "content": self.system_prompt},
86
63
  {"role": "user", "content": prompt}]
87
64
  res = self.inference_engine.chat(messages, verbose=True)
@@ -96,8 +73,8 @@ class PromptEditor:
96
73
  with open(file_path, 'r') as f:
97
74
  comment_prompt_template = f.read()
98
75
 
99
- prompt = self._apply_prompt_template(text_content={"draft": draft, "prompt_guideline": self.prompt_guide},
100
- prompt_template=comment_prompt_template)
76
+ prompt = apply_prompt_template(prompt_template=comment_prompt_template,
77
+ text_content={"draft": draft, "prompt_guideline": self.prompt_guide})
101
78
  messages = [{"role": "system", "content": self.system_prompt},
102
79
  {"role": "user", "content": prompt}]
103
80
  res = self.inference_engine.chat(messages, verbose=True)
@@ -254,8 +231,8 @@ class PromptEditor:
254
231
  with open(file_path, 'r') as f:
255
232
  chat_prompt_template = f.read()
256
233
 
257
- guideline = self._apply_prompt_template(text_content={"prompt_guideline": self.prompt_guide},
258
- prompt_template=chat_prompt_template)
234
+ guideline = apply_prompt_template(prompt_template=chat_prompt_template,
235
+ text_content={"prompt_guideline": self.prompt_guide})
259
236
 
260
237
  self.messages = [{"role": "system", "content": self.system_prompt + guideline}]
261
238
 
@@ -288,8 +265,8 @@ class PromptEditor:
288
265
  with open(file_path, 'r') as f:
289
266
  chat_prompt_template = f.read()
290
267
 
291
- guideline = self._apply_prompt_template(text_content={"prompt_guideline": self.prompt_guide},
292
- prompt_template=chat_prompt_template)
268
+ guideline = apply_prompt_template(prompt_template=chat_prompt_template,
269
+ text_content={"prompt_guideline": self.prompt_guide})
293
270
 
294
271
  messages = [{"role": "system", "content": self.system_prompt + guideline}] + messages
295
272
 
llm_ie/utils.py ADDED
@@ -0,0 +1,95 @@
1
+ from typing import List, Dict, Union
2
+ import re
3
+ import json
4
+ import warnings
5
+ import json_repair
6
+
7
+ def _find_dict_strings(text: str) -> List[str]:
8
+ """
9
+ Extracts balanced JSON-like dictionaries from a string, even if nested.
10
+
11
+ Parameters:
12
+ -----------
13
+ text : str
14
+ the input text containing JSON-like structures.
15
+
16
+ Returns : List[str]
17
+ A list of valid JSON-like strings representing dictionaries.
18
+ """
19
+ open_brace = 0
20
+ start = -1
21
+ json_objects = []
22
+
23
+ for i, char in enumerate(text):
24
+ if char == '{':
25
+ if open_brace == 0:
26
+ # start of a new JSON object
27
+ start = i
28
+ open_brace += 1
29
+ elif char == '}':
30
+ open_brace -= 1
31
+ if open_brace == 0 and start != -1:
32
+ json_objects.append(text[start:i + 1])
33
+ start = -1
34
+
35
+ return json_objects
36
+
37
+
38
+ def extract_json(gen_text:str) -> List[Dict[str, str]]:
39
+ """
40
+ This method inputs a generated text and output a JSON of information tuples
41
+ """
42
+ out = []
43
+ dict_str_list = _find_dict_strings(gen_text)
44
+ for dict_str in dict_str_list:
45
+ try:
46
+ dict_obj = json.loads(dict_str)
47
+ out.append(dict_obj)
48
+ except json.JSONDecodeError:
49
+ dict_obj = json_repair.repair_json(dict_str, skip_json_loads=True, return_objects=True)
50
+ if dict_obj:
51
+ warnings.warn(f'JSONDecodeError detected, fixed with repair_json:\n{dict_str}', RuntimeWarning)
52
+ out.append(dict_obj)
53
+ else:
54
+ warnings.warn(f'JSONDecodeError could not be fixed:\n{dict_str}', RuntimeWarning)
55
+ return out
56
+
57
+
58
+ def apply_prompt_template(prompt_template:str, text_content:Union[str, Dict[str,str]]) -> str:
59
+ """
60
+ This method applies text_content to prompt_template and returns a prompt.
61
+
62
+ Parameters:
63
+ ----------
64
+ prompt_template : str
65
+ the prompt template with placeholders {{<placeholder name>}}.
66
+ text_content : Union[str, Dict[str,str]]
67
+ the input text content to put in prompt template.
68
+ If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
69
+ If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
70
+
71
+ Returns : str
72
+ a user prompt.
73
+ """
74
+ pattern = re.compile(r'{{(.*?)}}')
75
+ if isinstance(text_content, str):
76
+ matches = pattern.findall(prompt_template)
77
+ if len(matches) != 1:
78
+ raise ValueError("When text_content is str, the prompt template must has exactly 1 placeholder {{<placeholder name>}}.")
79
+ text = re.sub(r'\\', r'\\\\', text_content)
80
+ prompt = pattern.sub(text, prompt_template)
81
+
82
+ elif isinstance(text_content, dict):
83
+ # Check if all values are str
84
+ if not all([isinstance(v, str) for v in text_content.values()]):
85
+ raise ValueError("All values in text_content must be str.")
86
+ # Check if all keys are in the prompt template
87
+ placeholders = pattern.findall(prompt_template)
88
+ if len(placeholders) != len(text_content):
89
+ raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
90
+ if not all([k in placeholders for k, _ in text_content.items()]):
91
+ raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
92
+
93
+ prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
94
+
95
+ return prompt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -1,10 +1,11 @@
1
- llm_ie/__init__.py,sha256=wNyek7i90UlQRylV3hSG9RlzMZ4MVzZSe_uhQYTQab4,1632
1
+ llm_ie/__init__.py,sha256=9a0bTN2ol5k_rCEidhnqIwJCnVTfit7TbTtbWG4hj1s,1881
2
2
  llm_ie/asset/PromptEditor_prompts/chat.txt,sha256=Fq62voV0JQ8xBRcxS1Nmdd7DkHs1fGYb-tmNwctZZK0,118
3
3
  llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
4
4
  llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=JAwY9vm1jSmKf2qcLBYUvrSmME2EJH36bALmkwZDWYQ,178
5
5
  llm_ie/asset/PromptEditor_prompts/system.txt,sha256=QwGTIJvp-5u2P8CkGt_rabttlN1puHQwIBNquUm1ZHo,730
6
6
  llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt,sha256=pKes8BOAoJJgmo_IQh2ISKiMh_rDPl_rDUU_VgDQ4o4,273
7
7
  llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt,sha256=9Nwkr2U_3ZSk01xDtgiFJVABi6FkC8Izdq7zrzFfLRg,235
8
+ llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt,sha256=tf9tu9FvNFpp26J7S39bJLuiI5R47bapDdEplvvbJU4,4203
8
9
  llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt,sha256=NLEtnmx1aOsnwifAsXr65pX9WdrIWdx-MJ7aMtNKi8c,331
9
10
  llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt,sha256=lGGjdeFpzZEc56w-EtQDMyYFs7A3DQAM32sT42Nf_08,293
10
11
  llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt,sha256=Of11LFuXLB249oekFelzlIeoAB0cATReqWgFTvhNz_8,329
@@ -18,11 +19,12 @@ llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt,sha256=EQ
18
19
  llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=rBRIXg8JQWUHTRdoluTS0zkbTkBAacEtHHvr3lZaQCw,10437
19
20
  llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
20
21
  llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
21
- llm_ie/chunkers.py,sha256=jXmUk3beF3EZWqDN_ArtoeerXObRKVCDIdUsv3loO80,6100
22
- llm_ie/data_types.py,sha256=72-3bzzYpo7KZpD9bjoroWT2eiM0zmWyDkBr2nHoBV0,18559
23
- llm_ie/engines.py,sha256=Ofrbcu8j2dp2X25oMQ3Xg7FGPynHse_liQ8oFTEdeHA,38418
24
- llm_ie/extractors.py,sha256=5y4vuB53R2EAyHGH3wVZ3M1DvN3fPJHdypsTbzbK78s,96889
25
- llm_ie/prompt_editor.py,sha256=nAgCJQY5kVWTAhmrngdWRG-JKxCCPBh0dyaUcIk_-c0,13198
26
- llm_ie-1.2.2.dist-info/METADATA,sha256=or9H0YdfLVgjqftn3zg4nlRHmGHcK4hxBYR6R-1otuE,728
27
- llm_ie-1.2.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- llm_ie-1.2.2.dist-info/RECORD,,
22
+ llm_ie/chunkers.py,sha256=b4APRwaLMU40QXVEhOK8m1DZi_jr-VCHAFwbMjqVBgA,11308
23
+ llm_ie/data_types.py,sha256=6vefyGTgZcJBYgiuyfcbJN1ZKK4tNvOZf6HFpxFZngY,17792
24
+ llm_ie/engines.py,sha256=K4Zgb1dYiuopBeTLcgSAseI-VXgwtTeWf9O4EK9SQqE,63901
25
+ llm_ie/extractors.py,sha256=f-TUZFprJZ_ftrnKbi-g-au4KoJwtciCCawXHWzmDtU,100792
26
+ llm_ie/prompt_editor.py,sha256=Hqukm2HMgsoGpXV3vZ__7CGgfMhd-UUIwTKGnfSDltM,12055
27
+ llm_ie/utils.py,sha256=k6M4l8GsKOMcmO6UwONQ353Zk-TeoBj6HXGjlAn-JE0,3679
28
+ llm_ie-1.2.4.dist-info/METADATA,sha256=dl0JyDkgjEbk12N5I1fZg-jh7gEvTpuJ1Ox1_mHo_6Q,728
29
+ llm_ie-1.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
30
+ llm_ie-1.2.4.dist-info/RECORD,,
File without changes