llm-ie 1.2.3__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_ie/__init__.py +6 -6
- llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt +129 -0
- llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt +2 -2
- llm_ie/asset/prompt_guide/StructExtractor_prompt_guide.txt +53 -0
- llm_ie/chunkers.py +104 -4
- llm_ie/data_types.py +72 -44
- llm_ie/engines.py +44 -0
- llm_ie/extractors.py +421 -73
- llm_ie/prompt_editor.py +9 -32
- llm_ie/utils.py +95 -0
- {llm_ie-1.2.3.dist-info → llm_ie-1.3.0.dist-info}/METADATA +1 -1
- {llm_ie-1.2.3.dist-info → llm_ie-1.3.0.dist-info}/RECORD +13 -10
- {llm_ie-1.2.3.dist-info → llm_ie-1.3.0.dist-info}/WHEEL +0 -0
llm_ie/utils.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from typing import List, Dict, Union
|
|
2
|
+
import re
|
|
3
|
+
import json
|
|
4
|
+
import warnings
|
|
5
|
+
import json_repair
|
|
6
|
+
|
|
7
|
+
def _find_dict_strings(text: str) -> List[str]:
|
|
8
|
+
"""
|
|
9
|
+
Extracts balanced JSON-like dictionaries from a string, even if nested.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
-----------
|
|
13
|
+
text : str
|
|
14
|
+
the input text containing JSON-like structures.
|
|
15
|
+
|
|
16
|
+
Returns : List[str]
|
|
17
|
+
A list of valid JSON-like strings representing dictionaries.
|
|
18
|
+
"""
|
|
19
|
+
open_brace = 0
|
|
20
|
+
start = -1
|
|
21
|
+
json_objects = []
|
|
22
|
+
|
|
23
|
+
for i, char in enumerate(text):
|
|
24
|
+
if char == '{':
|
|
25
|
+
if open_brace == 0:
|
|
26
|
+
# start of a new JSON object
|
|
27
|
+
start = i
|
|
28
|
+
open_brace += 1
|
|
29
|
+
elif char == '}':
|
|
30
|
+
open_brace -= 1
|
|
31
|
+
if open_brace == 0 and start != -1:
|
|
32
|
+
json_objects.append(text[start:i + 1])
|
|
33
|
+
start = -1
|
|
34
|
+
|
|
35
|
+
return json_objects
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def extract_json(gen_text:str) -> List[Dict[str, str]]:
|
|
39
|
+
"""
|
|
40
|
+
This method inputs a generated text and output a JSON of information tuples
|
|
41
|
+
"""
|
|
42
|
+
out = []
|
|
43
|
+
dict_str_list = _find_dict_strings(gen_text)
|
|
44
|
+
for dict_str in dict_str_list:
|
|
45
|
+
try:
|
|
46
|
+
dict_obj = json.loads(dict_str)
|
|
47
|
+
out.append(dict_obj)
|
|
48
|
+
except json.JSONDecodeError:
|
|
49
|
+
dict_obj = json_repair.repair_json(dict_str, skip_json_loads=True, return_objects=True)
|
|
50
|
+
if dict_obj:
|
|
51
|
+
warnings.warn(f'JSONDecodeError detected, fixed with repair_json:\n{dict_str}', RuntimeWarning)
|
|
52
|
+
out.append(dict_obj)
|
|
53
|
+
else:
|
|
54
|
+
warnings.warn(f'JSONDecodeError could not be fixed:\n{dict_str}', RuntimeWarning)
|
|
55
|
+
return out
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def apply_prompt_template(prompt_template:str, text_content:Union[str, Dict[str,str]]) -> str:
|
|
59
|
+
"""
|
|
60
|
+
This method applies text_content to prompt_template and returns a prompt.
|
|
61
|
+
|
|
62
|
+
Parameters:
|
|
63
|
+
----------
|
|
64
|
+
prompt_template : str
|
|
65
|
+
the prompt template with placeholders {{<placeholder name>}}.
|
|
66
|
+
text_content : Union[str, Dict[str,str]]
|
|
67
|
+
the input text content to put in prompt template.
|
|
68
|
+
If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
|
|
69
|
+
If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
|
|
70
|
+
|
|
71
|
+
Returns : str
|
|
72
|
+
a user prompt.
|
|
73
|
+
"""
|
|
74
|
+
pattern = re.compile(r'{{(.*?)}}')
|
|
75
|
+
if isinstance(text_content, str):
|
|
76
|
+
matches = pattern.findall(prompt_template)
|
|
77
|
+
if len(matches) != 1:
|
|
78
|
+
raise ValueError("When text_content is str, the prompt template must has exactly 1 placeholder {{<placeholder name>}}.")
|
|
79
|
+
text = re.sub(r'\\', r'\\\\', text_content)
|
|
80
|
+
prompt = pattern.sub(text, prompt_template)
|
|
81
|
+
|
|
82
|
+
elif isinstance(text_content, dict):
|
|
83
|
+
# Check if all values are str
|
|
84
|
+
if not all([isinstance(v, str) for v in text_content.values()]):
|
|
85
|
+
raise ValueError("All values in text_content must be str.")
|
|
86
|
+
# Check if all keys are in the prompt template
|
|
87
|
+
placeholders = pattern.findall(prompt_template)
|
|
88
|
+
if len(placeholders) != len(text_content):
|
|
89
|
+
raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
|
|
90
|
+
if not all([k in placeholders for k, _ in text_content.items()]):
|
|
91
|
+
raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
|
|
92
|
+
|
|
93
|
+
prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
|
|
94
|
+
|
|
95
|
+
return prompt
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llm-ie
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Enshuo (David) Hsu
|
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
llm_ie/__init__.py,sha256=
|
|
1
|
+
llm_ie/__init__.py,sha256=Rtdra_fAGPXORFvTd2qjSG08q9LBLXX5J1C8tz2SMwk,1963
|
|
2
2
|
llm_ie/asset/PromptEditor_prompts/chat.txt,sha256=Fq62voV0JQ8xBRcxS1Nmdd7DkHs1fGYb-tmNwctZZK0,118
|
|
3
3
|
llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
|
|
4
4
|
llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=JAwY9vm1jSmKf2qcLBYUvrSmME2EJH36bALmkwZDWYQ,178
|
|
5
5
|
llm_ie/asset/PromptEditor_prompts/system.txt,sha256=QwGTIJvp-5u2P8CkGt_rabttlN1puHQwIBNquUm1ZHo,730
|
|
6
6
|
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt,sha256=pKes8BOAoJJgmo_IQh2ISKiMh_rDPl_rDUU_VgDQ4o4,273
|
|
7
7
|
llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt,sha256=9Nwkr2U_3ZSk01xDtgiFJVABi6FkC8Izdq7zrzFfLRg,235
|
|
8
|
+
llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt,sha256=tf9tu9FvNFpp26J7S39bJLuiI5R47bapDdEplvvbJU4,4203
|
|
8
9
|
llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt,sha256=NLEtnmx1aOsnwifAsXr65pX9WdrIWdx-MJ7aMtNKi8c,331
|
|
9
10
|
llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt,sha256=lGGjdeFpzZEc56w-EtQDMyYFs7A3DQAM32sT42Nf_08,293
|
|
10
11
|
llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt,sha256=Of11LFuXLB249oekFelzlIeoAB0cATReqWgFTvhNz_8,329
|
|
11
12
|
llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt,sha256=kNJQK7NdoCx13TXGY8HYGrW_v4SEaErK8j9qIzd70CM,291
|
|
12
|
-
llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt,sha256=
|
|
13
|
+
llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt,sha256=blr_fx4RI8NRQvSKNenYZWApLeWtjIX2xFPJfz0Mb9k,2115
|
|
13
14
|
llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt,sha256=-Cli7rwu4wM4vSmkG0nInNkpStUhRqKESQ3oqD38pbE,10395
|
|
14
15
|
llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt,sha256=-Cli7rwu4wM4vSmkG0nInNkpStUhRqKESQ3oqD38pbE,10395
|
|
15
16
|
llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt,sha256=Z6Yc2_QRqroWcJ13owNJbo78I0wpS4XXDsOjXFR-aPk,2166
|
|
@@ -18,11 +19,13 @@ llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt,sha256=EQ
|
|
|
18
19
|
llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=rBRIXg8JQWUHTRdoluTS0zkbTkBAacEtHHvr3lZaQCw,10437
|
|
19
20
|
llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
|
|
20
21
|
llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
|
|
21
|
-
llm_ie/
|
|
22
|
-
llm_ie/
|
|
23
|
-
llm_ie/
|
|
24
|
-
llm_ie/
|
|
25
|
-
llm_ie/
|
|
26
|
-
llm_ie
|
|
27
|
-
llm_ie
|
|
28
|
-
llm_ie-1.
|
|
22
|
+
llm_ie/asset/prompt_guide/StructExtractor_prompt_guide.txt,sha256=x8L4n_LVl6ofQu6cDE9YP4SB2FSQ4GrTee8y1XKwwwc,1922
|
|
23
|
+
llm_ie/chunkers.py,sha256=b4APRwaLMU40QXVEhOK8m1DZi_jr-VCHAFwbMjqVBgA,11308
|
|
24
|
+
llm_ie/data_types.py,sha256=iG_jdqhpBi33xnsfFQYayCXNBK-2N-8u1xIhoKfJzRI,18294
|
|
25
|
+
llm_ie/engines.py,sha256=K4Zgb1dYiuopBeTLcgSAseI-VXgwtTeWf9O4EK9SQqE,63901
|
|
26
|
+
llm_ie/extractors.py,sha256=Voexzc_sYQ3jBGkvLybazt9zVsLnnrMbsUswKciBS4I,120933
|
|
27
|
+
llm_ie/prompt_editor.py,sha256=Hqukm2HMgsoGpXV3vZ__7CGgfMhd-UUIwTKGnfSDltM,12055
|
|
28
|
+
llm_ie/utils.py,sha256=k6M4l8GsKOMcmO6UwONQ353Zk-TeoBj6HXGjlAn-JE0,3679
|
|
29
|
+
llm_ie-1.3.0.dist-info/METADATA,sha256=GrgKPwzTXtHIBsEThNsJ6i7Z43Ghb2I5Y47mRYbSIAo,728
|
|
30
|
+
llm_ie-1.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
31
|
+
llm_ie-1.3.0.dist-info/RECORD,,
|
|
File without changes
|