llm-ie 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ Prompt template design:
2
+ 1. Task description (mention binary relation extraction and ROI)
3
+ 2. Schema definition (defines relation)
4
+ 3. Output format definition (must use the key "Relation")
5
+ 4. Hints
6
+ 5. Input placeholders (must include "roi_text", "frame_1", and "frame_2" placeholders)
7
+
8
+
9
+ Example:
10
+
11
+ # Task description
12
+ This is a binary relation extraction task. Given a region of interest (ROI) text and two entities from a medical note, indicate the relation existence between the two entities.
13
+
14
+ # Schema definition
15
+ True: if there is a relationship between a medication name (one of the entities) and its strength or frequency (the other entity).
16
+ False: Otherwise.
17
+
18
+ # Output format definition
19
+ Your output should follow the JSON format:
20
+ {"Relation": "<True or False>"}
21
+
22
+ I am only interested in the content between []. Do not explain your answer.
23
+
24
+ # Hints
25
+ 1. Your input always contains one medication entity and 1) one strength entity or 2) one frequency entity.
26
+ 2. Pay attention to the medication entity and see if the strength or frequency is for it.
27
+ 3. If the strength or frequency is for another medication, output False.
28
+ 4. If the strength or frequency is for the same medication but at a different location (span), output False.
29
+
30
+ # Input placeholders
31
+ ROI Text with the two entities annotated with <entity_1> and <entity_2>:
32
+ "{{roi_text}}"
33
+
34
+ Entity 1 full information:
35
+ {{frame_1}}
36
+
37
+ Entity 2 full information:
38
+ {{frame_2}}
@@ -0,0 +1,46 @@
1
+ Prompt template design:
2
+ 1. Task description (mention multi-class relation extraction and ROI)
3
+ 2. Schema definition (defines relation types)
4
+ 3. Output format definition (must use the key "RelationType")
5
+ 4. Input placeholders (must include "roi_text", "frame_1", and "frame_2" placeholders)
6
+
7
+
8
+ Example:
9
+
10
+ # Task description
11
+ This is a multi-class relation extraction task. Given a region of interest (ROI) text and two frames from a medical note, classify the relation types between the two frames.
12
+
13
+ # Schema definition
14
+ Strength-Drug: this is a relationship between the drug strength and its name.
15
+ Dosage-Drug: this is a relationship between the drug dosage and its name.
16
+ Duration-Drug: this is a relationship between a drug duration and its name.
17
+ Frequency-Drug: this is a relationship between a drug frequency and its name.
18
+ Form-Drug: this is a relationship between a drug form and its name.
19
+ Route-Drug: this is a relationship between the route of administration for a drug and its name.
20
+ Reason-Drug: this is a relationship between the reason for which a drug was administered (e.g., symptoms, diseases, etc.) and a drug name.
21
+ ADE-Drug: this is a relationship between an adverse drug event (ADE) and a drug name.
22
+
23
+ # Output format definition
24
+ Choose one of the relation types listed below or choose "No Relation":
25
+ {{pos_rel_types}}
26
+
27
+ Your output should follow the JSON format:
28
+ {"RelationType": "<relation type or No Relation>"}
29
+
30
+ I am only interested in the content between []. Do not explain your answer.
31
+
32
+ # Hints
33
+ 1. Your input always contains one medication entity and 1) one strength entity or 2) one frequency entity.
34
+ 2. Pay attention to the medication entity and see if the strength or frequency is for it.
35
+ 3. If the strength or frequency is for another medication, output "No Relation".
36
+ 4. If the strength or frequency is for the same medication but at a different location (span), output "No Relation".
37
+
38
+ # Input placeholders
39
+ ROI Text with the two entities annotated with <entity_1> and <entity_2>:
40
+ "{{roi_text}}"
41
+
42
+ Entity 1 full information:
43
+ {{frame_1}}
44
+
45
+ Entity 2 full information:
46
+ {{frame_2}}
llm_ie/data_types.py CHANGED
@@ -1,5 +1,5 @@
1
- from typing import List, Dict
2
- import yaml
1
+ from typing import List, Dict, Iterable
2
+ import json
3
3
 
4
4
 
5
5
  class LLMInformationExtractionFrame:
@@ -22,7 +22,8 @@ class LLMInformationExtractionFrame:
22
22
  attr : Dict[str,str], Optional
23
23
  dict of attributes
24
24
  """
25
- assert isinstance(frame_id, str), "frame_id must be a string."
25
+ if not isinstance(frame_id, str):
26
+ raise TypeError("frame_id must be a string.")
26
27
  self.frame_id = frame_id
27
28
  self.start = start
28
29
  self.end = end
@@ -78,7 +79,8 @@ class LLMInformationExtractionFrame:
78
79
 
79
80
 
80
81
  class LLMInformationExtractionDocument:
81
- def __init__(self, doc_id:str=None, filename:str=None, text:str=None, frames:List[LLMInformationExtractionFrame]=None):
82
+ def __init__(self, doc_id:str=None, filename:str=None, text:str=None,
83
+ frames:List[LLMInformationExtractionFrame]=None, relations:List[Dict[str,str]]=None):
82
84
  """
83
85
  This class holds LLM-extracted frames, handles save/ load.
84
86
 
@@ -92,30 +94,79 @@ class LLMInformationExtractionDocument:
92
94
  document text
93
95
  frames : List[LLMInformationExtractionFrame], Optional
94
96
  a list of LLMInformationExtractionFrame
97
+ relations : List[Dict[str,str]], Optional
98
+ a list of dictionary of {"frame_1", "frame_2", "relation"}.
99
+ If binary relation (no relation type), there is no "relation" key.
95
100
  """
96
- assert doc_id or filename, "Either doc_id (create from raw inputs) or filename (create from file) must be provided."
101
+ if doc_id is None and filename is None:
102
+ raise ValueError("Either doc_id (create from raw inputs) or filename (create from file) must be provided.")
97
103
  # if create object from file
98
104
  if filename:
99
- with open(filename) as yaml_file:
100
- llm_ie = yaml.safe_load(yaml_file)
105
+ with open(filename) as json_file:
106
+ llm_ie = json.load(json_file)
101
107
  if 'doc_id' in llm_ie.keys():
102
108
  self.doc_id = llm_ie['doc_id']
103
109
  if 'text' in llm_ie.keys():
104
110
  self.text = llm_ie['text']
105
111
  if 'frames' in llm_ie.keys():
106
112
  self.frames = [LLMInformationExtractionFrame.from_dict(d) for d in llm_ie['frames']]
113
+ if 'relations' in llm_ie.keys():
114
+ self.relations = llm_ie['relations']
107
115
 
108
116
  # create object from raw inputs
109
117
  else:
110
- assert isinstance(doc_id, str), "doc_id must be a string."
118
+ if not isinstance(doc_id, str):
119
+ raise TypeError("doc_id must be a string.")
111
120
  self.doc_id = doc_id
112
121
  self.text = text
113
122
  self.frames = frames.copy() if frames is not None else []
123
+ self.relations = relations.copy() if relations is not None else []
114
124
 
115
125
 
116
126
  def has_frame(self) -> bool:
127
+ """
128
+ This method checks if there is any frames.
129
+ """
117
130
  return bool(self.frames)
118
131
 
132
+ def has_relation(self) -> bool:
133
+ """
134
+ This method checks if there is any relations.
135
+ """
136
+ return bool(self.relations)
137
+
138
+ def has_duplicate_frame_ids(self) -> bool:
139
+ """
140
+ This method checks for duplicate frame ids.
141
+ """
142
+ frame_id_set = set()
143
+ for frame in self.frames:
144
+ if frame.frame_id in frame_id_set:
145
+ return True
146
+ frame_id_set.add(frame.frame_id)
147
+
148
+ return False
149
+
150
+ def get_frame_by_id(self, frame_id:str) -> LLMInformationExtractionFrame:
151
+ """
152
+ This method use frame_id to search for a frame.
153
+ If there are redundent frame_ids, the first will be returned
154
+
155
+ Parameters:
156
+ -----------
157
+ frame_id : str
158
+ frame id to retrieve
159
+
160
+ Returns : LLMInformationExtractionFrame
161
+ a frame (if found) or None (not found).
162
+ """
163
+ for frame in self.frames:
164
+ if frame.frame_id == frame_id:
165
+ return frame
166
+
167
+ return None
168
+
169
+
119
170
  def add_frame(self, frame:LLMInformationExtractionFrame, valid_mode:str=None, create_id:bool=False) -> bool:
120
171
  """
121
172
  This method add a new frame to the frames (list).
@@ -132,7 +183,11 @@ class LLMInformationExtractionDocument:
132
183
  create_id : bool, Optional
133
184
  Assign a sequential frame ID.
134
185
  """
135
- assert valid_mode in {None, "span", "attr"}, 'valid_mode must be one of {None, "span", "attr"}'
186
+ if not isinstance(frame, LLMInformationExtractionFrame):
187
+ raise TypeError(f"Expect frame to be LLMInformationExtractionFrame, received {type(frame)} instead.")
188
+
189
+ if valid_mode not in {None, "span", "attr"}:
190
+ raise ValueError(f'Expect valid_mode to be one of {{None, "span", "attr"}}, received {valid_mode}')
136
191
 
137
192
  if valid_mode == "span":
138
193
  for exist_frame in self.frames:
@@ -153,18 +208,76 @@ class LLMInformationExtractionDocument:
153
208
  return True
154
209
 
155
210
 
211
+ def add_frames(self, frames:List[LLMInformationExtractionFrame], valid_mode:str=None, create_id:bool=False):
212
+ """
213
+ This method adds a list of frames.
214
+ """
215
+ if not isinstance(frames, Iterable):
216
+ raise TypeError("frames must be a list or Interable.")
217
+
218
+ for frame in frames:
219
+ self.add_frame(frame=frame, valid_mode=valid_mode, create_id=create_id)
220
+
221
+
222
+ def add_relation(self, relation:Dict[str,str]) -> bool:
223
+ """
224
+ This method add a relation to the relations (list).
225
+
226
+ Parameters:
227
+ -----------
228
+ relation : Dict[str,str]
229
+ the relation to add. Must be a dict with {"frame_1", "frame_2", ("relation")}.
230
+ Could have an optional "relation" key for relation type.
231
+
232
+ Returns : bool
233
+ sucess addition.
234
+ """
235
+ if not isinstance(relation, Dict):
236
+ raise TypeError(f"Expect relation to be a Dict, received {type(relation)} instead.")
237
+
238
+ required_keys = {"frame_1", "frame_2"}
239
+ if not required_keys.issubset(relation.keys()):
240
+ raise ValueError('relation missing "frame_1" or "frame_2" keys.')
241
+
242
+ allowed_keys = {"frame_1", "frame_2", "relation"}
243
+ if not set(relation.keys()).issubset(allowed_keys):
244
+ raise ValueError('Only keys {"frame_1", "frame_2", "relation"} are allowed.')
245
+
246
+ if not self.get_frame_by_id(relation["frame_1"]):
247
+ raise ValueError(f'frame_id: {relation["frame_1"]} not found in frames.')
248
+
249
+ if not self.get_frame_by_id(relation["frame_2"]):
250
+ raise ValueError(f'frame_id: {relation["frame_2"]} not found in frames.')
251
+
252
+ self.relations.append(relation)
253
+ return True
254
+
255
+ def add_relations(self, relations:List[Dict[str,str]]):
256
+ """
257
+ This method adds a list of relations.
258
+ """
259
+ if not isinstance(relations, Iterable):
260
+ raise TypeError("relations must be a list or Interable.")
261
+ for relation in relations:
262
+ self.add_relation(relation)
263
+
264
+
156
265
  def __repr__(self, N_top_chars:int=100) -> str:
157
266
  text_to_print = self.text[0:N_top_chars]
158
267
  frame_count = len(self.frames)
159
- return ''.join((f'LLMInformationExtractionDocument(doc_id="{self.doc_id}...")\n',
160
- f'text="{text_to_print}...",\n',
161
- f'frames={frame_count}'))
268
+ relation_count = len(self.relations)
269
+ return ''.join((f'LLMInformationExtractionDocument(doc_id: "{self.doc_id}"\n',
270
+ f'text: "{text_to_print}...",\n',
271
+ f'frames: {frame_count}\n',
272
+ f'relations: {relation_count}'))
273
+
162
274
 
163
275
  def save(self, filename:str):
164
- with open(filename, 'w') as yaml_file:
165
- yaml.safe_dump({'doc_id':self.doc_id,
166
- 'text':self.text,
167
- 'frames':[frame.to_dict() for frame in self.frames]},
168
- yaml_file, sort_keys=False)
169
- yaml_file.flush()
276
+ with open(filename, 'w') as json_file:
277
+ json.dump({'doc_id': self.doc_id,
278
+ 'text': self.text,
279
+ 'frames': [frame.to_dict() for frame in self.frames],
280
+ 'relations': self.relations},
281
+ json_file, indent=4)
282
+ json_file.flush()
170
283
 
llm_ie/extractors.py CHANGED
@@ -1,16 +1,19 @@
1
1
  import abc
2
2
  import re
3
3
  import json
4
+ import inspect
4
5
  import importlib.resources
5
- from typing import List, Dict, Tuple, Union
6
- from llm_ie.data_types import LLMInformationExtractionFrame
6
+ import warnings
7
+ import itertools
8
+ from typing import List, Dict, Tuple, Union, Callable
9
+ from llm_ie.data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
7
10
  from llm_ie.engines import InferenceEngine
8
11
 
9
12
 
10
- class FrameExtractor:
13
+ class Extractor:
11
14
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
12
15
  """
13
- This is the abstract class for frame extraction.
16
+ This is the abstract class for (frame and relation) extractors.
14
17
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
15
18
 
16
19
  Parameters
@@ -26,12 +29,17 @@ class FrameExtractor:
26
29
  self.prompt_template = prompt_template
27
30
  self.system_prompt = system_prompt
28
31
 
32
+
29
33
  @classmethod
30
34
  def get_prompt_guide(cls) -> str:
35
+ """
36
+ This method returns the pre-defined prompt guideline for the extractor from the package asset.
37
+ """
31
38
  file_path = importlib.resources.files('llm_ie.asset.prompt_guide').joinpath(f"{cls.__name__}_prompt_guide.txt")
32
39
  with open(file_path, 'r') as f:
33
40
  return f.read()
34
41
 
42
+
35
43
  def _get_user_prompt(self, text_content:Union[str, Dict[str,str]]) -> str:
36
44
  """
37
45
  This method applies text_content to prompt_template and returns a prompt.
@@ -49,18 +57,19 @@ class FrameExtractor:
49
57
  pattern = re.compile(r'{{(.*?)}}')
50
58
  if isinstance(text_content, str):
51
59
  matches = pattern.findall(self.prompt_template)
52
- assert len(matches) == 1, \
53
- "When text_content is str, the prompt template must has only 1 placeholder {{<placeholder name>}}."
54
- prompt = pattern.sub(text_content, self.prompt_template)
60
+ if len(matches) != 1:
61
+ raise ValueError("When text_content is str, the prompt template must has exactly 1 placeholder {{<placeholder name>}}.")
62
+ text = re.sub(r'\\', r'\\\\', text_content)
63
+ prompt = pattern.sub(text, self.prompt_template)
55
64
 
56
65
  elif isinstance(text_content, dict):
57
66
  placeholders = pattern.findall(self.prompt_template)
58
- assert len(placeholders) == len(text_content), \
59
- f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size."
60
- assert all([k in placeholders for k, _ in text_content.items()]), \
61
- f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders})."
67
+ if len(placeholders) != len(text_content):
68
+ raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
69
+ if not all([k in placeholders for k, _ in text_content.items()]):
70
+ raise ValueError(f"All keys in text_content ({text_content.keys()}) must match placeholders in prompt template ({placeholders}).")
62
71
 
63
- prompt = pattern.sub(lambda match: text_content[match.group(1)], self.prompt_template)
72
+ prompt = pattern.sub(lambda match: re.sub(r'\\', r'\\\\', text_content[match.group(1)]), self.prompt_template)
64
73
 
65
74
  return prompt
66
75
 
@@ -79,6 +88,27 @@ class FrameExtractor:
79
88
  return out
80
89
 
81
90
 
91
+ class FrameExtractor(Extractor):
92
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
93
+ """
94
+ This is the abstract class for frame extraction.
95
+ Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
96
+
97
+ Parameters
98
+ ----------
99
+ inference_engine : InferenceEngine
100
+ the LLM inferencing engine object. Must implements the chat() method.
101
+ prompt_template : str
102
+ prompt template with "{{<placeholder name>}}" placeholder.
103
+ system_prompt : str, Optional
104
+ system prompt.
105
+ """
106
+ super().__init__(inference_engine=inference_engine,
107
+ prompt_template=prompt_template,
108
+ system_prompt=system_prompt,
109
+ **kwrs)
110
+
111
+
82
112
  def _find_entity_spans(self, text: str, entities: List[str], case_sensitive:bool=False) -> List[Tuple[int]]:
83
113
  """
84
114
  This function inputs a text and a list of entity text,
@@ -290,7 +320,8 @@ class ReviewFrameExtractor(BasicFrameExtractor):
290
320
  super().__init__(inference_engine=inference_engine, prompt_template=prompt_template,
291
321
  system_prompt=system_prompt, **kwrs)
292
322
  self.review_prompt = review_prompt
293
- assert review_mode in {"addition", "revision"}, 'review_mode must be one of {"addition", "revision"}.'
323
+ if review_mode not in {"addition", "revision"}:
324
+ raise ValueError('review_mode must be one of {"addition", "revision"}.')
294
325
  self.review_mode = review_mode
295
326
 
296
327
 
@@ -528,3 +559,395 @@ class SentenceFrameExtractor(FrameExtractor):
528
559
  attr={k: v for k, v in ent.items() if k != entity_key and v != ""})
529
560
  frame_list.append(frame)
530
561
  return frame_list
562
+
563
+
564
+ class RelationExtractor(Extractor):
565
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
566
+ """
567
+ This is the abstract class for relation extraction.
568
+ Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
569
+
570
+ Parameters
571
+ ----------
572
+ inference_engine : InferenceEngine
573
+ the LLM inferencing engine object. Must implements the chat() method.
574
+ prompt_template : str
575
+ prompt template with "{{<placeholder name>}}" placeholder.
576
+ system_prompt : str, Optional
577
+ system prompt.
578
+ """
579
+ super().__init__(inference_engine=inference_engine,
580
+ prompt_template=prompt_template,
581
+ system_prompt=system_prompt,
582
+ **kwrs)
583
+
584
+ def _get_ROI(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
585
+ text:str, buffer_size:int=100) -> str:
586
+ """
587
+ This method returns the Region of Interest (ROI) that covers the two frames. Leaves a buffer_size of characters before and after.
588
+ The returned text has the two frames inline annotated with <entity_1>, <entity_2>.
589
+
590
+ Parameters:
591
+ -----------
592
+ frame_1 : LLMInformationExtractionFrame
593
+ a frame
594
+ frame_2 : LLMInformationExtractionFrame
595
+ the other frame
596
+ text : str
597
+ the entire document text
598
+ buffer_size : int, Optional
599
+ the number of characters before and after the two frames in the ROI text.
600
+
601
+ Return : str
602
+ the ROI text with the two frames inline annotated with <entity_1>, <entity_2>.
603
+ """
604
+ left_frame, right_frame = sorted([frame_1, frame_2], key=lambda f: f.start)
605
+ left_frame_name = "entity_1" if left_frame == frame_1 else "entity_2"
606
+ right_frame_name = "entity_1" if right_frame == frame_1 else "entity_2"
607
+
608
+ start = max(left_frame.start - buffer_size, 0)
609
+ end = min(right_frame.end + buffer_size, len(text))
610
+ roi = text[start:end]
611
+
612
+ roi_annotated = roi[0:left_frame.start - start] + \
613
+ f'<{left_frame_name}>' + \
614
+ roi[left_frame.start - start:left_frame.end - start] + \
615
+ f"</{left_frame_name}>" + \
616
+ roi[left_frame.end - start:right_frame.start - start] + \
617
+ f'<{right_frame_name}>' + \
618
+ roi[right_frame.start - start:right_frame.end - start] + \
619
+ f"</{right_frame_name}>" + \
620
+ roi[right_frame.end - start:end - start]
621
+
622
+ if start > 0:
623
+ roi_annotated = "..." + roi_annotated
624
+ if end < len(text):
625
+ roi_annotated = roi_annotated + "..."
626
+ return roi_annotated
627
+
628
+
629
+ @abc.abstractmethod
630
+ def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
631
+ temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict]:
632
+ """
633
+ This method considers all combinations of two frames.
634
+
635
+ Parameters:
636
+ -----------
637
+ doc : LLMInformationExtractionDocument
638
+ a document with frames.
639
+ buffer_size : int, Optional
640
+ the number of characters before and after the two frames in the ROI text.
641
+ max_new_tokens : str, Optional
642
+ the max number of new tokens LLM should generate.
643
+ temperature : float, Optional
644
+ the temperature for token sampling.
645
+ stream : bool, Optional
646
+ if True, LLM generated text will be printed in terminal in real-time.
647
+
648
+ Return : List[Dict]
649
+ a list of dict with {"frame_1", "frame_2"} for all relations.
650
+ """
651
+ return NotImplemented
652
+
653
+
654
+ class BinaryRelationExtractor(RelationExtractor):
655
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_func: Callable,
656
+ system_prompt:str=None, **kwrs):
657
+ """
658
+ This class extracts binary (yes/no) relations between two entities.
659
+ Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
660
+
661
+ Parameters
662
+ ----------
663
+ inference_engine : InferenceEngine
664
+ the LLM inferencing engine object. Must implements the chat() method.
665
+ prompt_template : str
666
+ prompt template with "{{<placeholder name>}}" placeholder.
667
+ possible_relation_func : Callable, Optional
668
+ a function that inputs 2 frames and returns a bool indicating possible relations between them.
669
+ system_prompt : str, Optional
670
+ system prompt.
671
+ """
672
+ super().__init__(inference_engine=inference_engine,
673
+ prompt_template=prompt_template,
674
+ system_prompt=system_prompt,
675
+ **kwrs)
676
+
677
+ if possible_relation_func:
678
+ # Check if possible_relation_func is a function
679
+ if not callable(possible_relation_func):
680
+ raise TypeError(f"Expect possible_relation_func as a function, received {type(possible_relation_func)} instead.")
681
+
682
+ sig = inspect.signature(possible_relation_func)
683
+ # Check if frame_1, frame_2 are in input parameters
684
+ if len(sig.parameters) != 2:
685
+ raise ValueError("The possible_relation_func must have exactly frame_1 and frame_2 as parameters.")
686
+ if "frame_1" not in sig.parameters.keys():
687
+ raise ValueError("The possible_relation_func is missing frame_1 as a parameter.")
688
+ if "frame_2" not in sig.parameters.keys():
689
+ raise ValueError("The possible_relation_func is missing frame_2 as a parameter.")
690
+ # Check if output is a bool
691
+ if sig.return_annotation != bool:
692
+ raise ValueError(f"Expect possible_relation_func to output a bool, current type hint suggests {sig.return_annotation} instead.")
693
+
694
+ self.possible_relation_func = possible_relation_func
695
+
696
+
697
+ def _extract_relation(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
698
+ text:str, buffer_size:int=100, max_new_tokens:int=128, temperature:float=0.0, stream:bool=False, **kwrs) -> bool:
699
+ """
700
+ This method inputs two frames and a ROI text, extracts the binary relation.
701
+
702
+ Parameters:
703
+ -----------
704
+ frame_1 : LLMInformationExtractionFrame
705
+ a frame
706
+ frame_2 : LLMInformationExtractionFrame
707
+ the other frame
708
+ text : str
709
+ the entire document text
710
+ buffer_size : int, Optional
711
+ the number of characters before and after the two frames in the ROI text.
712
+ max_new_tokens : str, Optional
713
+ the max number of new tokens LLM should generate.
714
+ temperature : float, Optional
715
+ the temperature for token sampling.
716
+ stream : bool, Optional
717
+ if True, LLM generated text will be printed in terminal in real-time.
718
+
719
+ Return : bool
720
+ a relation indicator
721
+ """
722
+ roi_text = self._get_ROI(frame_1, frame_2, text, buffer_size=buffer_size)
723
+ if stream:
724
+ print(f"\n\nROI text: \n{roi_text}\n")
725
+ print("Extraction:")
726
+
727
+ messages = []
728
+ if self.system_prompt:
729
+ messages.append({'role': 'system', 'content': self.system_prompt})
730
+
731
+ messages.append({'role': 'user', 'content': self._get_user_prompt(text_content={"roi_text":roi_text,
732
+ "frame_1": str(frame_1.to_dict()),
733
+ "frame_2": str(frame_2.to_dict())}
734
+ )})
735
+ response = self.inference_engine.chat(
736
+ messages=messages,
737
+ max_new_tokens=max_new_tokens,
738
+ temperature=temperature,
739
+ stream=stream,
740
+ **kwrs
741
+ )
742
+
743
+ rel_json = self._extract_json(response)
744
+ if len(rel_json) > 0:
745
+ if "Relation" in rel_json[0]:
746
+ rel = rel_json[0]["Relation"]
747
+ if isinstance(rel, bool):
748
+ return rel
749
+ elif isinstance(rel, str) and rel in {"True", "False"}:
750
+ return eval(rel)
751
+ else:
752
+ warnings.warn('Extractor output JSON "Relation" key does not have bool or {"True", "False"} as value.' + \
753
+ 'Following default, relation = False.', RuntimeWarning)
754
+ else:
755
+ warnings.warn('Extractor output JSON without "Relation" key. Following default, relation = False.', RuntimeWarning)
756
+ else:
757
+ warnings.warn("Extractor did not output a JSON. Following default, relation = False.", RuntimeWarning)
758
+
759
+ return False
760
+
761
+
762
+ def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
763
+ temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict]:
764
+ """
765
+ This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
766
+
767
+ Parameters:
768
+ -----------
769
+ doc : LLMInformationExtractionDocument
770
+ a document with frames.
771
+ buffer_size : int, Optional
772
+ the number of characters before and after the two frames in the ROI text.
773
+ max_new_tokens : str, Optional
774
+ the max number of new tokens LLM should generate.
775
+ temperature : float, Optional
776
+ the temperature for token sampling.
777
+ stream : bool, Optional
778
+ if True, LLM generated text will be printed in terminal in real-time.
779
+
780
+ Return : List[Dict]
781
+ a list of dict with {"frame_1", "frame_2"} for all relations.
782
+ """
783
+ if not doc.has_frame():
784
+ raise ValueError("Input document must have frames.")
785
+
786
+ if doc.has_duplicate_frame_ids():
787
+ raise ValueError("All frame_ids in the input document must be unique.")
788
+
789
+ pairs = itertools.combinations(doc.frames, 2)
790
+ rel_pair_list = []
791
+ for frame_1, frame_2 in pairs:
792
+ pos_rel = self.possible_relation_func(frame_1, frame_2)
793
+ if pos_rel:
794
+ rel = self._extract_relation(frame_1=frame_1, frame_2=frame_2, text=doc.text, buffer_size=buffer_size,
795
+ max_new_tokens=max_new_tokens, temperature=temperature, stream=stream, **kwrs)
796
+ if rel:
797
+ rel_pair_list.append({'frame_1':frame_1.frame_id, 'frame_2':frame_2.frame_id})
798
+
799
+ return rel_pair_list
800
+
801
+
802
+
803
+ class MultiClassRelationExtractor(RelationExtractor):
804
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_types_func: Callable,
805
+ system_prompt:str=None, **kwrs):
806
+ """
807
+ This class extracts relations with relation types.
808
+ Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
809
+
810
+ Parameters
811
+ ----------
812
+ inference_engine : InferenceEngine
813
+ the LLM inferencing engine object. Must implements the chat() method.
814
+ prompt_template : str
815
+ prompt template with "{{<placeholder name>}}" placeholder.
816
+ possible_relation_types_func : Callable
817
+ a function that inputs 2 frames and returns a List of possible relation types between them.
818
+ If the two frames must not have relations, this function should return an empty list [].
819
+ system_prompt : str, Optional
820
+ system prompt.
821
+ """
822
+ super().__init__(inference_engine=inference_engine,
823
+ prompt_template=prompt_template,
824
+ system_prompt=system_prompt,
825
+ **kwrs)
826
+
827
+ if possible_relation_types_func:
828
+ # Check if possible_relation_types_func is a function
829
+ if not callable(possible_relation_types_func):
830
+ raise TypeError(f"Expect possible_relation_types_func as a function, received {type(possible_relation_types_func)} instead.")
831
+
832
+ sig = inspect.signature(possible_relation_types_func)
833
+ # Check if frame_1, frame_2 are in input parameters
834
+ if len(sig.parameters) != 2:
835
+ raise ValueError("The possible_relation_types_func must have exactly frame_1 and frame_2 as parameters.")
836
+ if "frame_1" not in sig.parameters.keys():
837
+ raise ValueError("The possible_relation_types_func is missing frame_1 as a parameter.")
838
+ if "frame_2" not in sig.parameters.keys():
839
+ raise ValueError("The possible_relation_types_func is missing frame_2 as a parameter.")
840
+ # Check if output is a List
841
+ if sig.return_annotation not in {inspect._empty, List, List[str]}:
842
+ raise ValueError(f"Expect possible_relation_types_func to output a List of string, current type hint suggests {sig.return_annotation} instead.")
843
+
844
+ self.possible_relation_types_func = possible_relation_types_func
845
+
846
+
847
+ def _extract_relation(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
848
+ pos_rel_types:List[str], text:str, buffer_size:int=100, max_new_tokens:int=128, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
849
+ """
850
+ This method inputs two frames and a ROI text, extracts the relation.
851
+
852
+ Parameters:
853
+ -----------
854
+ frame_1 : LLMInformationExtractionFrame
855
+ a frame
856
+ frame_2 : LLMInformationExtractionFrame
857
+ the other frame
858
+ pos_rel_types : List[str]
859
+ possible relation types.
860
+ text : str
861
+ the entire document text
862
+ buffer_size : int, Optional
863
+ the number of characters before and after the two frames in the ROI text.
864
+ max_new_tokens : str, Optional
865
+ the max number of new tokens LLM should generate.
866
+ temperature : float, Optional
867
+ the temperature for token sampling.
868
+ stream : bool, Optional
869
+ if True, LLM generated text will be printed in terminal in real-time.
870
+
871
+ Return : str
872
+ a relation type
873
+ """
874
+ roi_text = self._get_ROI(frame_1, frame_2, text, buffer_size=buffer_size)
875
+ if stream:
876
+ print(f"\n\nROI text: \n{roi_text}\n")
877
+ print("Extraction:")
878
+
879
+ messages = []
880
+ if self.system_prompt:
881
+ messages.append({'role': 'system', 'content': self.system_prompt})
882
+
883
+ messages.append({'role': 'user', 'content': self._get_user_prompt(text_content={"roi_text":roi_text,
884
+ "frame_1": str(frame_1.to_dict()),
885
+ "frame_2": str(frame_2.to_dict()),
886
+ "pos_rel_types":str(pos_rel_types)})})
887
+ response = self.inference_engine.chat(
888
+ messages=messages,
889
+ max_new_tokens=max_new_tokens,
890
+ temperature=temperature,
891
+ stream=stream,
892
+ **kwrs
893
+ )
894
+
895
+ rel_json = self._extract_json(response)
896
+ if len(rel_json) > 0:
897
+ if "RelationType" in rel_json[0]:
898
+ rel = rel_json[0]["RelationType"]
899
+ if rel in pos_rel_types:
900
+ return rel_json[0]["RelationType"]
901
+ else:
902
+ warnings.warn(f'Extracted relation type "{rel}", which is not in the return of possible_relation_types_func: {pos_rel_types}.'+ \
903
+ 'Following default, relation = "No Relation".', RuntimeWarning)
904
+
905
+ else:
906
+ warnings.warn('Extractor output JSON without "RelationType" key. Following default, relation = "No Relation".', RuntimeWarning)
907
+
908
+ else:
909
+ warnings.warn('Extractor did not output a JSON. Following default, relation = "No Relation".', RuntimeWarning)
910
+
911
+ return "No Relation"
912
+
913
+
914
+ def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
915
+ temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict]:
916
+ """
917
+ This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs
918
+ and to provide possible relation types between two frames.
919
+
920
+ Parameters:
921
+ -----------
922
+ doc : LLMInformationExtractionDocument
923
+ a document with frames.
924
+ buffer_size : int, Optional
925
+ the number of characters before and after the two frames in the ROI text.
926
+ max_new_tokens : str, Optional
927
+ the max number of new tokens LLM should generate.
928
+ temperature : float, Optional
929
+ the temperature for token sampling.
930
+ stream : bool, Optional
931
+ if True, LLM generated text will be printed in terminal in real-time.
932
+
933
+ Return : List[Dict]
934
+ a list of dict with {"frame_1", "frame_2", "relation"} for all relations.
935
+ """
936
+ if not doc.has_frame():
937
+ raise ValueError("Input document must have frames.")
938
+
939
+ if doc.has_duplicate_frame_ids():
940
+ raise ValueError("All frame_ids in the input document must be unique.")
941
+
942
+ pairs = itertools.combinations(doc.frames, 2)
943
+ rel_pair_list = []
944
+ for frame_1, frame_2 in pairs:
945
+ pos_rel_types = self.possible_relation_types_func(frame_1, frame_2)
946
+ if pos_rel_types:
947
+ rel = self._extract_relation(frame_1=frame_1, frame_2=frame_2, pos_rel_types=pos_rel_types, text=doc.text,
948
+ buffer_size=buffer_size, max_new_tokens=max_new_tokens, temperature=temperature, stream=stream, **kwrs)
949
+
950
+ if rel != "No Relation":
951
+ rel_pair_list.append({'frame_1':frame_1.frame_id, 'frame_2':frame_2.frame_id, "relation":rel})
952
+
953
+ return rel_pair_list
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 0.1.7
3
+ Version: 0.2.0
4
4
  Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -25,11 +25,14 @@ An LLM-powered tool that transforms everyday language into robust information ex
25
25
  - [Prerequisite](#prerequisite)
26
26
  - [Installation](#installation)
27
27
  - [Quick Start](#quick-start)
28
+ - [Examples](#examples)
28
29
  - [User Guide](#user-guide)
29
30
  - [LLM Inference Engine](#llm-inference-engine)
30
31
  - [Prompt Template](#prompt-template)
31
32
  - [Prompt Editor](#prompt-editor)
32
33
  - [Extractor](#extractor)
34
+ - [FrameExtractor](#frameextractor)
35
+ - [RelationExtractor](#relationextractor)
33
36
 
34
37
  ## Overview
35
38
  LLM-IE is a toolkit that provides robust information extraction utilities for frame-based information extraction. Since prompt design has a significant impact on generative information extraction with LLMs, it also provides a built-in LLM editor to help with prompt writing. The flowchart below demonstrates the workflow starting from a casual language request.
@@ -206,6 +209,10 @@ for frame in frames:
206
209
  doc.save("<your filename>.llmie")
207
210
  ```
208
211
 
212
+ ## Examples
213
+ - [Write prompt templates with AI editors](demo/prompt_template_writing.ipynb)
214
+ - [NER + RE for Drug, Strength, Frequency](demo/medication_relation_extraction.ipynb)
215
+
209
216
  ## User Guide
210
217
  This package is comprised of some key classes:
211
218
  - LLM Inference Engine
@@ -547,12 +554,25 @@ Recommendations:
547
554
  After a few iterations of revision, we will have a high-quality prompt template for the information extraction pipeline.
548
555
 
549
556
  ### Extractor
550
- An extractor implements a prompting method for information extraction. The ```BasicFrameExtractor``` directly prompts LLM to generate a list of dictionaries. Each dictionary is then post-processed into a frame. The ```ReviewFrameExtractor``` is based on the ```BasicFrameExtractor``` but adds a review step after the initial extraction to boost sensitivity and improve performance. ```SentenceFrameExtractor``` gives LLM the entire document upfront as a reference, then prompts LLM sentence by sentence and collects per-sentence outputs. To learn about an extractor, use the class method ```get_prompt_guide()``` to print out the prompt guide.
557
+ An extractor implements a prompting method for information extraction. There are two extractor families: ```FrameExtractor``` and ```RelationExtractor```.
558
+ The ```FrameExtractor``` extracts named entities and entity attributes ("frame"). The ```RelationExtractor``` extracts the relation (and relation types) between frames.
559
+
560
+ #### FrameExtractor
561
+ The ```BasicFrameExtractor``` directly prompts LLM to generate a list of dictionaries. Each dictionary is then post-processed into a frame. The ```ReviewFrameExtractor``` is based on the ```BasicFrameExtractor``` but adds a review step after the initial extraction to boost sensitivity and improve performance. ```SentenceFrameExtractor``` gives LLM the entire document upfront as a reference, then prompts LLM sentence by sentence and collects per-sentence outputs. To learn about an extractor, use the class method ```get_prompt_guide()``` to print out the prompt guide.
551
562
 
552
563
  <details>
553
564
  <summary>BasicFrameExtractor</summary>
554
565
 
555
- The ```BasicFrameExtractor``` directly prompts LLM to generate a list of dictionaries. Each dictionary is then post-processed into a frame.
566
+ The ```BasicFrameExtractor``` directly prompts LLM to generate a list of dictionaries. Each dictionary is then post-processed into a frame. The ```text_content``` holds the input text as a string, or as a dictionary (if prompt template has multiple input placeholders). The ```entity_key``` defines which JSON key should be used as entity text. It must be consistent with the prompt template.
567
+
568
+ ```python
569
+ from llm_ie.extractors import BasicFrameExtractor
570
+
571
+ extractor = BasicFrameExtractor(llm, prompt_temp)
572
+ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", stream=True)
573
+ ```
574
+
575
+ Use the ```get_prompt_guide()``` method to inspect the prompt template guideline for ```BasicFrameExtractor```.
556
576
 
557
577
  ```python
558
578
  from llm_ie.extractors import BasicFrameExtractor
@@ -630,15 +650,202 @@ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", str
630
650
  <details>
631
651
  <summary>SentenceFrameExtractor</summary>
632
652
 
633
- The ```SentenceFrameExtractor``` instructs the LLM to extract sentence by sentence. The reason is to ensure the accuracy of frame spans. It also prevents LLMs from overseeing sections/ sentences. Empirically, this extractor results in better sensitivity than the ```BasicFrameExtractor``` in complex tasks.
653
+ The ```SentenceFrameExtractor``` instructs the LLM to extract sentence by sentence. The reason is to ensure the accuracy of frame spans. It also prevents LLMs from overseeing sections/ sentences. Empirically, this extractor results in better recall than the ```BasicFrameExtractor``` in complex tasks.
654
+
655
+ The ```multi_turn``` parameter specifies multi-turn conversation for prompting. If True, sentences and LLM outputs will be appended to the input message and carry-over. If False, only the current sentence is prompted. For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting can better utilize the KV caching and results in faster inferencing. But for vLLM with [Automatic Prefix Caching (APC)](https://docs.vllm.ai/en/latest/automatic_prefix_caching/apc.html), multi-turn conversation is not necessary.
634
656
 
635
657
  ```python
636
658
  from llm_ie.extractors import SentenceFrameExtractor
637
659
 
638
660
  extractor = SentenceFrameExtractor(llm, prompt_temp)
639
- frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", stream=True)
661
+ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", multi_turn=True, stream=True)
640
662
  ```
641
663
  </details>
642
664
 
665
+ #### RelationExtractor
666
+ Relation extractors prompt LLM with combinations of two frames from a document (```LLMInformationExtractionDocument```) and extract relations.
667
+ The ```BinaryRelationExtractor``` extracts binary relations (yes/no) between two frames. The ```MultiClassRelationExtractor``` extracts relations and assign relation types ("multi-class").
668
+
669
+ An important feature of the relation extractors is that users are required to define a ```possible_relation_func``` or ```possible_relation_types_func``` function for the extractors. The reason is, there are too many possible combinations of two frames (N choose 2 combinations). The ```possible_relation_func``` helps rule out impossible combinations and therefore, reduce the LLM inferencing burden.
670
+
671
+ <details>
672
+ <summary>BinaryRelationExtractor</summary>
673
+
674
+ Use the get_prompt_guide() method to inspect the prompt template guideline for BinaryRelationExtractor.
675
+ ```python
676
+ from llm_ie.extractors import BinaryRelationExtractor
677
+
678
+ print(BinaryRelationExtractor.get_prompt_guide())
679
+ ```
680
+
681
+ ```
682
+ Prompt template design:
683
+ 1. Task description (mention binary relation extraction and ROI)
684
+ 2. Schema definition (defines relation)
685
+ 3. Output format definition (must use the key "Relation")
686
+ 4. Hints
687
+ 5. Input placeholders (must include "roi_text", "frame_1", and "frame_2" placeholders)
643
688
 
644
689
 
690
+ Example:
691
+
692
+ # Task description
693
+ This is a binary relation extraction task. Given a region of interest (ROI) text and two entities from a medical note, indicate the relation existence between the two entities.
694
+
695
+ # Schema definition
696
+ True: if there is a relationship between a medication name (one of the entities) and its strength or frequency (the other entity).
697
+ False: Otherwise.
698
+
699
+ # Output format definition
700
+ Your output should follow the JSON format:
701
+ {"Relation": "<True or False>"}
702
+
703
+ I am only interested in the content between []. Do not explain your answer.
704
+
705
+ # Hints
706
+ 1. Your input always contains one medication entity and 1) one strength entity or 2) one frequency entity.
707
+ 2. Pay attention to the medication entity and see if the strength or frequency is for it.
708
+ 3. If the strength or frequency is for another medication, output False.
709
+ 4. If the strength or frequency is for the same medication but at a different location (span), output False.
710
+
711
+ # Input placeholders
712
+ ROI Text with the two entities annotated with <entity_1> and <entity_2>:
713
+ "{{roi_text}}"
714
+
715
+ Entity 1 full information:
716
+ {{frame_1}}
717
+
718
+ Entity 2 full information:
719
+ {{frame_2}}
720
+ ```
721
+
722
+ As an example, we define the ```possible_relation_func``` function:
723
+ - if the two frames are > 500 characters apart, we assume no relation (False)
724
+ - if the two frames are "Medication" and "Strength", or "Medication" and "Frequency", there could be relations (True)
725
+
726
+ ```python
727
+ def possible_relation_func(frame_1, frame_2) -> bool:
728
+ """
729
+ This function pre-process two frames and outputs a bool indicating whether the two frames could be related.
730
+ """
731
+ # if the distance between the two frames are > 500 characters, assume no relation.
732
+ if abs(frame_1.start - frame_2.start) > 500:
733
+ return False
734
+
735
+ # if the entity types are "Medication" and "Strength", there could be relations.
736
+ if (frame_1.attr["entity_type"] == "Medication" and frame_2.attr["entity_type"] == "Strength") or \
737
+ (frame_2.attr["entity_type"] == "Medication" and frame_1.attr["entity_type"] == "Strength"):
738
+ return True
739
+
740
+ # if the entity types are "Medication" and "Frequency", there could be relations.
741
+ if (frame_1.attr["entity_type"] == "Medication" and frame_2.attr["entity_type"] == "Frequency") or \
742
+ (frame_2.attr["entity_type"] == "Medication" and frame_1.attr["entity_type"] == "Frequency"):
743
+ return True
744
+
745
+ # Otherwise, no relation.
746
+ return False
747
+ ```
748
+
749
+ In the ```BinaryRelationExtractor``` constructor, we pass in the prompt template and ```possible_relation_func```.
750
+
751
+ ```python
752
+ from llm_ie.extractors import BinaryRelationExtractor
753
+
754
+ extractor = BinaryRelationExtractor(llm, prompt_template=prompt_template, possible_relation_func=possible_relation_func)
755
+ relations = extractor.extract_relations(doc, stream=True)
756
+ ```
757
+
758
+ </details>
759
+
760
+
761
+ <details>
762
+ <summary>MultiClassRelationExtractor</summary>
763
+
764
+ The main difference from ```BinaryRelationExtractor``` is that the ```MultiClassRelationExtractor``` allows specifying relation types. The prompt template guideline has an additional placeholder for possible relation types ```{{pos_rel_types}}```.
765
+
766
+ ```python
767
+ print(MultiClassRelationExtractor.get_prompt_guide())
768
+ ```
769
+
770
+ ```
771
+ Prompt template design:
772
+ 1. Task description (mention multi-class relation extraction and ROI)
773
+ 2. Schema definition (defines relation types)
774
+ 3. Output format definition (must use the key "RelationType")
775
+ 4. Input placeholders (must include "roi_text", "frame_1", and "frame_2" placeholders)
776
+
777
+
778
+ Example:
779
+
780
+ # Task description
781
+ This is a multi-class relation extraction task. Given a region of interest (ROI) text and two frames from a medical note, classify the relation types between the two frames.
782
+
783
+ # Schema definition
784
+ Strength-Drug: this is a relationship between the drug strength and its name.
785
+ Dosage-Drug: this is a relationship between the drug dosage and its name.
786
+ Duration-Drug: this is a relationship between a drug duration and its name.
787
+ Frequency-Drug: this is a relationship between a drug frequency and its name.
788
+ Form-Drug: this is a relationship between a drug form and its name.
789
+ Route-Drug: this is a relationship between the route of administration for a drug and its name.
790
+ Reason-Drug: this is a relationship between the reason for which a drug was administered (e.g., symptoms, diseases, etc.) and a drug name.
791
+ ADE-Drug: this is a relationship between an adverse drug event (ADE) and a drug name.
792
+
793
+ # Output format definition
794
+ Choose one of the relation types listed below or choose "No Relation":
795
+ {{pos_rel_types}}
796
+
797
+ Your output should follow the JSON format:
798
+ {"RelationType": "<relation type or No Relation>"}
799
+
800
+ I am only interested in the content between []. Do not explain your answer.
801
+
802
+ # Hints
803
+ 1. Your input always contains one medication entity and 1) one strength entity or 2) one frequency entity.
804
+ 2. Pay attention to the medication entity and see if the strength or frequency is for it.
805
+ 3. If the strength or frequency is for another medication, output "No Relation".
806
+ 4. If the strength or frequency is for the same medication but at a different location (span), output "No Relation".
807
+
808
+ # Input placeholders
809
+ ROI Text with the two entities annotated with <entity_1> and <entity_2>:
810
+ "{{roi_text}}"
811
+
812
+ Entity 1 full information:
813
+ {{frame_1}}
814
+
815
+ Entity 2 full information:
816
+ {{frame_2}}
817
+ ```
818
+
819
+ As an example, we define the ```possible_relation_types_func``` :
820
+ - if the two frames are > 500 characters apart, we assume "No Relation" (output [])
821
+ - if the two frames are "Medication" and "Strength", the only possible relation types are "Strength-Drug" or "No Relation"
822
+ - if the two frames are "Medication" and "Frequency", the only possible relation types are "Frequency-Drug" or "No Relation"
823
+
824
+ ```python
825
+ def possible_relation_types_func(frame_1, frame_2) -> List[str]:
826
+ # If the two frames are > 500 characters apart, we assume "No Relation"
827
+ if abs(frame_1.start - frame_2.start) > 500:
828
+ return []
829
+
830
+ # If the two frames are "Medication" and "Strength", the only possible relation types are "Strength-Drug" or "No Relation"
831
+ if (frame_1.attr["entity_type"] == "Medication" and frame_2.attr["entity_type"] == "Strength") or \
832
+ (frame_2.attr["entity_type"] == "Medication" and frame_1.attr["entity_type"] == "Strength"):
833
+ return ['Strength-Drug']
834
+
835
+ # If the two frames are "Medication" and "Frequency", the only possible relation types are "Frequency-Drug" or "No Relation"
836
+ if (frame_1.attr["entity_type"] == "Medication" and frame_2.attr["entity_type"] == "Frequency") or \
837
+ (frame_2.attr["entity_type"] == "Medication" and frame_1.attr["entity_type"] == "Frequency"):
838
+ return ['Frequency-Drug']
839
+
840
+ return []
841
+ ```
842
+
843
+
844
+ ```python
845
+ from llm_ie.extractors import MultiClassRelationExtractor
846
+
847
+ extractor = MultiClassRelationExtractor(llm, prompt_template=re_prompt_template, possible_relation_types_func=possible_relation_types_func)
848
+ relations = extractor.extract_relations(doc, stream=True)
849
+ ```
850
+
851
+ </details>
@@ -2,12 +2,14 @@ llm_ie/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  llm_ie/asset/PromptEditor_prompts/comment.txt,sha256=C_lxx-dlOlFJ__jkHKosZ8HsNAeV1aowh2B36nIipBY,159
3
3
  llm_ie/asset/PromptEditor_prompts/rewrite.txt,sha256=bYLOix7DUBlcWv-Q0JZ5kDnZ9OEXBt_AGDN0TydLB8o,191
4
4
  llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt,sha256=XbnU8byLGGUA3A3lT0bb2Hw-ggzhcqD3ZuKzduod2ww,1944
5
+ llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt,sha256=z9Xg0fdFbVVwnTYcUTcAUvEIWhF075W8qGxN-Vj7xdo,1548
6
+ llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt,sha256=D5DphUHw8SUERUVdcIjUynuTmYJa6-PwBlF7FzxNsvQ,2276
5
7
  llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=XbnU8byLGGUA3A3lT0bb2Hw-ggzhcqD3ZuKzduod2ww,1944
6
8
  llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=8nj9OLPJMtr9Soi5JU3Xk-HC7pKNoI54xA_A4u7I5j4,2620
7
- llm_ie/data_types.py,sha256=MnpyXFviFWhxeC5mqbaPdAxGx6vV_PhnUIFfUamq3D8,6687
9
+ llm_ie/data_types.py,sha256=2RKP4wXDuku-Tn4s8uzzUFavG1fZ2e47SaY8oL57LsI,10923
8
10
  llm_ie/engines.py,sha256=m9ytGUX61jEy9SmVHbb90mrfGMAwC6dV-v7Jke1U7Ho,9296
9
- llm_ie/extractors.py,sha256=PfcUhmU_LfVFIfI5v3C7DzGAFF0xEPDdLUnwKHYnUyg,24125
11
+ llm_ie/extractors.py,sha256=i0m8uFaKXiVY1ucjvzbUFbV1slPYfZ3EGOZrolnFVHA,44079
10
12
  llm_ie/prompt_editor.py,sha256=dbu7A3O7O7Iw2v-xCgrTFH1-wTLAGf4SHDqdeS-He2Q,1869
11
- llm_ie-0.1.7.dist-info/METADATA,sha256=wMAToHKL1K3hZP-xONBdEw3sy56stKYThfn3NqbfZ34,29712
12
- llm_ie-0.1.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
13
- llm_ie-0.1.7.dist-info/RECORD,,
13
+ llm_ie-0.2.0.dist-info/METADATA,sha256=9CPC3OAd2J0nROZ7z8DI7lvGKOO2H2uAnVQv_YDFItg,40052
14
+ llm_ie-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
15
+ llm_ie-0.2.0.dist-info/RECORD,,
File without changes