llm-ie 1.2.3__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/utils.py ADDED
@@ -0,0 +1,95 @@
1
+ from typing import List, Dict, Union
2
+ import re
3
+ import json
4
+ import warnings
5
+ import json_repair
6
+
7
+ def _find_dict_strings(text: str) -> List[str]:
8
+ """
9
+ Extracts balanced JSON-like dictionaries from a string, even if nested.
10
+
11
+ Parameters:
12
+ -----------
13
+ text : str
14
+ the input text containing JSON-like structures.
15
+
16
+ Returns : List[str]
17
+ A list of valid JSON-like strings representing dictionaries.
18
+ """
19
+ open_brace = 0
20
+ start = -1
21
+ json_objects = []
22
+
23
+ for i, char in enumerate(text):
24
+ if char == '{':
25
+ if open_brace == 0:
26
+ # start of a new JSON object
27
+ start = i
28
+ open_brace += 1
29
+ elif char == '}':
30
+ open_brace -= 1
31
+ if open_brace == 0 and start != -1:
32
+ json_objects.append(text[start:i + 1])
33
+ start = -1
34
+
35
+ return json_objects
36
+
37
+
38
+ def extract_json(gen_text:str) -> List[Dict[str, str]]:
39
+ """
40
+ This method inputs a generated text and output a JSON of information tuples
41
+ """
42
+ out = []
43
+ dict_str_list = _find_dict_strings(gen_text)
44
+ for dict_str in dict_str_list:
45
+ try:
46
+ dict_obj = json.loads(dict_str)
47
+ out.append(dict_obj)
48
+ except json.JSONDecodeError:
49
+ dict_obj = json_repair.repair_json(dict_str, skip_json_loads=True, return_objects=True)
50
+ if dict_obj:
51
+ warnings.warn(f'JSONDecodeError detected, fixed with repair_json:\n{dict_str}', RuntimeWarning)
52
+ out.append(dict_obj)
53
+ else:
54
+ warnings.warn(f'JSONDecodeError could not be fixed:\n{dict_str}', RuntimeWarning)
55
+ return out
56
+
57
+
58
+ def apply_prompt_template(prompt_template:str, text_content:Union[str, Dict[str,str]]) -> str:
59
+ """
60
+ This method applies text_content to prompt_template and returns a prompt.
61
+
62
+ Parameters:
63
+ ----------
64
+ prompt_template : str
65
+ the prompt template with placeholders {{<placeholder name>}}.
66
+ text_content : Union[str, Dict[str,str]]
67
+ the input text content to put in prompt template.
68
+ If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
69
+ If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
70
+
71
+ Returns : str
72
+ a user prompt.
73
+ """
74
+ pattern = re.compile(r'{{(.*?)}}')
75
+ if isinstance(text_content, str):
76
+ matches = pattern.findall(prompt_template)
77
+ if len(matches) != 1:
78
+ raise ValueError("When text_content is str, the prompt template must has exactly 1 placeholder {{<placeholder name>}}.")
79
+ text = re.sub(r'\\', r'\\\\', text_content)
80
+ prompt = pattern.sub(text, prompt_template)
81
+
82
+ elif isinstance(text_content, dict):
83
+ # Check if all values are str
84
+ if not all([isinstance(v, str) for v in text_content.values()]):
85
+ raise ValueError("All values in text_content must be str.")
86
+ # Check if all keys are in the prompt template
87
+ placeholders = pattern.findall(prompt_template)
88
+ if len(placeholders) != len(text_content):
89
+ raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
90
+ if not all([k in placeholders for k, _ in text_content.items()]):
91
+ raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
92
+
93
+ prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), prompt_template)
94
+
95
+ return prompt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 1.2.3
3
+ Version: 1.3.0
4
4
  Summary: A comprehensive toolkit that provides building blocks for LLM-based named entity recognition, attribute extraction, and relation extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -1,15 +1,16 @@
1
- llm_ie/__init__.py,sha256=zG23MsRXnjShG-LhUl1tOSaEECXuhfZrS6Xt35XtrMw,1799
1
+ llm_ie/__init__.py,sha256=Rtdra_fAGPXORFvTd2qjSG08q9LBLXX5J1C8tz2SMwk,1963
2
2
  llm_ie/asset/PromptEditor_prompts/chat.txt,sha256=Fq62voV0JQ8xBRcxS1Nmdd7DkHs1fGYb-tmNwctZZK0,118
3
3
  llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
4
4
  llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=JAwY9vm1jSmKf2qcLBYUvrSmME2EJH36bALmkwZDWYQ,178
5
5
  llm_ie/asset/PromptEditor_prompts/system.txt,sha256=QwGTIJvp-5u2P8CkGt_rabttlN1puHQwIBNquUm1ZHo,730
6
6
  llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt,sha256=pKes8BOAoJJgmo_IQh2ISKiMh_rDPl_rDUU_VgDQ4o4,273
7
7
  llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt,sha256=9Nwkr2U_3ZSk01xDtgiFJVABi6FkC8Izdq7zrzFfLRg,235
8
+ llm_ie/asset/default_prompts/LLMUnitChunker_user_prompt.txt,sha256=tf9tu9FvNFpp26J7S39bJLuiI5R47bapDdEplvvbJU4,4203
8
9
  llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt,sha256=NLEtnmx1aOsnwifAsXr65pX9WdrIWdx-MJ7aMtNKi8c,331
9
10
  llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt,sha256=lGGjdeFpzZEc56w-EtQDMyYFs7A3DQAM32sT42Nf_08,293
10
11
  llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt,sha256=Of11LFuXLB249oekFelzlIeoAB0cATReqWgFTvhNz_8,329
11
12
  llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt,sha256=kNJQK7NdoCx13TXGY8HYGrW_v4SEaErK8j9qIzd70CM,291
12
- llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt,sha256=w2amKipinuJtCiyPsgWsjaJRwTpS1qOBDuPPtPCMeQA,2120
13
+ llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt,sha256=blr_fx4RI8NRQvSKNenYZWApLeWtjIX2xFPJfz0Mb9k,2115
13
14
  llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt,sha256=-Cli7rwu4wM4vSmkG0nInNkpStUhRqKESQ3oqD38pbE,10395
14
15
  llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt,sha256=-Cli7rwu4wM4vSmkG0nInNkpStUhRqKESQ3oqD38pbE,10395
15
16
  llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt,sha256=Z6Yc2_QRqroWcJ13owNJbo78I0wpS4XXDsOjXFR-aPk,2166
@@ -18,11 +19,13 @@ llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt,sha256=EQ
18
19
  llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=rBRIXg8JQWUHTRdoluTS0zkbTkBAacEtHHvr3lZaQCw,10437
19
20
  llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
20
21
  llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=97_-y_vHMLG4Kb8fLsGgibLxB-3mest8k3LHfLo5h-I,10465
21
- llm_ie/chunkers.py,sha256=BTtqj2ZbAzl5s4nk9y5T12x7iTljSLfIio_u4s6Dq0Q,7611
22
- llm_ie/data_types.py,sha256=6vefyGTgZcJBYgiuyfcbJN1ZKK4tNvOZf6HFpxFZngY,17792
23
- llm_ie/engines.py,sha256=L28e4X6Qx0pcOzMMoRW1ecqudUbemF-KHrRLsYy9FRU,61987
24
- llm_ie/extractors.py,sha256=MRZEF9YSnFAkDCYWYeUysIaRxBSN71MmUpiW3M0Jejk,103890
25
- llm_ie/prompt_editor.py,sha256=nAgCJQY5kVWTAhmrngdWRG-JKxCCPBh0dyaUcIk_-c0,13198
26
- llm_ie-1.2.3.dist-info/METADATA,sha256=ww7UImUpSUWKHjzM3UJpylgO00h_glXttMykdoRNc0g,728
27
- llm_ie-1.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
28
- llm_ie-1.2.3.dist-info/RECORD,,
22
+ llm_ie/asset/prompt_guide/StructExtractor_prompt_guide.txt,sha256=x8L4n_LVl6ofQu6cDE9YP4SB2FSQ4GrTee8y1XKwwwc,1922
23
+ llm_ie/chunkers.py,sha256=b4APRwaLMU40QXVEhOK8m1DZi_jr-VCHAFwbMjqVBgA,11308
24
+ llm_ie/data_types.py,sha256=iG_jdqhpBi33xnsfFQYayCXNBK-2N-8u1xIhoKfJzRI,18294
25
+ llm_ie/engines.py,sha256=K4Zgb1dYiuopBeTLcgSAseI-VXgwtTeWf9O4EK9SQqE,63901
26
+ llm_ie/extractors.py,sha256=Voexzc_sYQ3jBGkvLybazt9zVsLnnrMbsUswKciBS4I,120933
27
+ llm_ie/prompt_editor.py,sha256=Hqukm2HMgsoGpXV3vZ__7CGgfMhd-UUIwTKGnfSDltM,12055
28
+ llm_ie/utils.py,sha256=k6M4l8GsKOMcmO6UwONQ353Zk-TeoBj6HXGjlAn-JE0,3679
29
+ llm_ie-1.3.0.dist-info/METADATA,sha256=GrgKPwzTXtHIBsEThNsJ6i7Z43Ghb2I5Y47mRYbSIAo,728
30
+ llm_ie-1.3.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
31
+ llm_ie-1.3.0.dist-info/RECORD,,
File without changes