llm-ie 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/extractors.py CHANGED
@@ -17,7 +17,7 @@ from colorama import Fore, Style
17
17
 
18
18
 
19
19
  class Extractor:
20
- def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
20
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
21
21
  """
22
22
  This is the abstract class for (frame and relation) extractors.
23
23
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -172,7 +172,7 @@ class Extractor:
172
172
  class FrameExtractor(Extractor):
173
173
  from nltk.tokenize import RegexpTokenizer
174
174
  def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
175
- prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None, **kwrs):
175
+ prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None):
176
176
  """
177
177
  This is the abstract class for frame extraction.
178
178
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -192,8 +192,7 @@ class FrameExtractor(Extractor):
192
192
  """
193
193
  super().__init__(inference_engine=inference_engine,
194
194
  prompt_template=prompt_template,
195
- system_prompt=system_prompt,
196
- **kwrs)
195
+ system_prompt=system_prompt)
197
196
 
198
197
  self.unit_chunker = unit_chunker
199
198
  if context_chunker is None:
@@ -332,7 +331,7 @@ class FrameExtractor(Extractor):
332
331
  return entity_spans
333
332
 
334
333
  @abc.abstractmethod
335
- def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, return_messages_log:bool=False, **kwrs) -> str:
334
+ def extract(self, text_content:Union[str, Dict[str,str]], return_messages_log:bool=False, **kwrs) -> str:
336
335
  """
337
336
  This method inputs text content and outputs a string generated by LLM
338
337
 
@@ -342,8 +341,6 @@ class FrameExtractor(Extractor):
342
341
  the input text content to put in prompt template.
343
342
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
344
343
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
345
- max_new_tokens : str, Optional
346
- the max number of new tokens LLM can generate.
347
344
  return_messages_log : bool, Optional
348
345
  if True, a list of messages will be returned.
349
346
 
@@ -354,7 +351,7 @@ class FrameExtractor(Extractor):
354
351
 
355
352
 
356
353
  @abc.abstractmethod
357
- def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=2048,
354
+ def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str,
358
355
  document_key:str=None, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
359
356
  """
360
357
  This method inputs text content and outputs a list of LLMInformationExtractionFrame
@@ -368,8 +365,6 @@ class FrameExtractor(Extractor):
368
365
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
369
366
  entity_key : str
370
367
  the key (in ouptut JSON) for entity text. Any extraction that does not include entity key will be dropped.
371
- max_new_tokens : str, Optional
372
- the max number of new tokens LLM should generate.
373
368
  document_key : str, Optional
374
369
  specify the key in text_content where document text is.
375
370
  If text_content is str, this parameter will be ignored.
@@ -384,7 +379,7 @@ class FrameExtractor(Extractor):
384
379
 
385
380
  class DirectFrameExtractor(FrameExtractor):
386
381
  def __init__(self, inference_engine:InferenceEngine, unit_chunker:UnitChunker,
387
- prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None, **kwrs):
382
+ prompt_template:str, system_prompt:str=None, context_chunker:ContextChunker=None):
388
383
  """
389
384
  This class is for general unit-context frame extraction.
390
385
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -406,12 +401,11 @@ class DirectFrameExtractor(FrameExtractor):
406
401
  unit_chunker=unit_chunker,
407
402
  prompt_template=prompt_template,
408
403
  system_prompt=system_prompt,
409
- context_chunker=context_chunker,
410
- **kwrs)
404
+ context_chunker=context_chunker)
411
405
 
412
406
 
413
- def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
414
- document_key:str=None, temperature:float=0.0, verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
407
+ def extract(self, text_content:Union[str, Dict[str,str]],
408
+ document_key:str=None, verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
415
409
  """
416
410
  This method inputs a text and outputs a list of outputs per unit.
417
411
 
@@ -421,13 +415,9 @@ class DirectFrameExtractor(FrameExtractor):
421
415
  the input text content to put in prompt template.
422
416
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
423
417
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
424
- max_new_tokens : int, Optional
425
- the max number of new tokens LLM should generate.
426
418
  document_key : str, Optional
427
419
  specify the key in text_content where document text is.
428
420
  If text_content is str, this parameter will be ignored.
429
- temperature : float, Optional
430
- the temperature for token sampling.
431
421
  verbose : bool, Optional
432
422
  if True, LLM generated text will be printed in terminal in real-time.
433
423
  return_messages_log : bool, Optional
@@ -491,27 +481,12 @@ class DirectFrameExtractor(FrameExtractor):
491
481
 
492
482
  print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
493
483
 
494
- response_stream = self.inference_engine.chat(
495
- messages=messages,
496
- max_new_tokens=max_new_tokens,
497
- temperature=temperature,
498
- stream=True,
499
- **kwrs
500
- )
501
-
502
- gen_text = ""
503
- for chunk in response_stream:
504
- gen_text += chunk
505
- print(chunk, end='', flush=True)
506
484
 
507
- else:
508
- gen_text = self.inference_engine.chat(
509
- messages=messages,
510
- max_new_tokens=max_new_tokens,
511
- temperature=temperature,
512
- stream=False,
513
- **kwrs
514
- )
485
+ gen_text = self.inference_engine.chat(
486
+ messages=messages,
487
+ verbose=verbose,
488
+ stream=False
489
+ )
515
490
 
516
491
  if return_messages_log:
517
492
  messages.append({"role": "assistant", "content": gen_text})
@@ -530,8 +505,8 @@ class DirectFrameExtractor(FrameExtractor):
530
505
 
531
506
  return output
532
507
 
533
- def stream(self, text_content: Union[str, Dict[str, str]], max_new_tokens: int = 2048, document_key: str = None,
534
- temperature: float = 0.0, **kwrs) -> Generator[Dict[str, Any], None, List[FrameExtractionUnitResult]]:
508
+ def stream(self, text_content: Union[str, Dict[str, str]],
509
+ document_key: str = None) -> Generator[Dict[str, Any], None, List[FrameExtractionUnitResult]]:
535
510
  """
536
511
  Streams LLM responses per unit with structured event types,
537
512
  and returns collected data for post-processing.
@@ -542,7 +517,8 @@ class DirectFrameExtractor(FrameExtractor):
542
517
  - {"type": "info", "data": str_message}: General informational messages.
543
518
  - {"type": "unit", "data": dict_unit_info}: Signals start of a new unit. dict_unit_info contains {'id', 'text', 'start', 'end'}
544
519
  - {"type": "context", "data": str_context}: Context string for the current unit.
545
- - {"type": "llm_chunk", "data": str_chunk}: A raw chunk from the LLM.
520
+ - {"type": "reasoning", "data": str_chunk}: A reasoning model thinking chunk from the LLM.
521
+ - {"type": "response", "data": str_chunk}: A response/answer chunk from the LLM.
546
522
 
547
523
  Returns:
548
524
  --------
@@ -601,13 +577,10 @@ class DirectFrameExtractor(FrameExtractor):
601
577
 
602
578
  response_stream = self.inference_engine.chat(
603
579
  messages=messages,
604
- max_new_tokens=max_new_tokens,
605
- temperature=temperature,
606
- stream=True,
607
- **kwrs
580
+ stream=True
608
581
  )
609
582
  for chunk in response_stream:
610
- yield {"type": "llm_chunk", "data": chunk}
583
+ yield chunk
611
584
  current_gen_text += chunk
612
585
 
613
586
  # Store the result for this unit
@@ -622,8 +595,8 @@ class DirectFrameExtractor(FrameExtractor):
622
595
  yield {"type": "info", "data": "All units processed by LLM."}
623
596
  return collected_results
624
597
 
625
- async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None, temperature:float=0.0,
626
- concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
598
+ async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
599
+ concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
627
600
  """
628
601
  This is the asynchronous version of the extract() method.
629
602
 
@@ -633,13 +606,9 @@ class DirectFrameExtractor(FrameExtractor):
633
606
  the input text content to put in prompt template.
634
607
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
635
608
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
636
- max_new_tokens : int, Optional
637
- the max number of new tokens LLM should generate.
638
609
  document_key : str, Optional
639
610
  specify the key in text_content where document text is.
640
611
  If text_content is str, this parameter will be ignored.
641
- temperature : float, Optional
642
- the temperature for token sampling.
643
612
  concurrent_batch_size : int, Optional
644
613
  the batch size for concurrent processing.
645
614
  return_messages_log : bool, Optional
@@ -701,17 +670,14 @@ class DirectFrameExtractor(FrameExtractor):
701
670
  # Process units concurrently with asyncio.Semaphore
702
671
  semaphore = asyncio.Semaphore(concurrent_batch_size)
703
672
 
704
- async def semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
673
+ async def semaphore_helper(task_data: Dict, **kwrs):
705
674
  unit = task_data["unit"]
706
675
  messages = task_data["messages"]
707
676
  original_index = task_data["original_index"]
708
677
 
709
678
  async with semaphore:
710
679
  gen_text = await self.inference_engine.chat_async(
711
- messages=messages,
712
- max_new_tokens=max_new_tokens,
713
- temperature=temperature,
714
- **kwrs
680
+ messages=messages
715
681
  )
716
682
  return {"original_index": original_index, "unit": unit, "gen_text": gen_text, "messages": messages}
717
683
 
@@ -719,10 +685,7 @@ class DirectFrameExtractor(FrameExtractor):
719
685
  tasks = []
720
686
  for task_inp in tasks_input:
721
687
  task = asyncio.create_task(semaphore_helper(
722
- task_inp,
723
- max_new_tokens=max_new_tokens,
724
- temperature=temperature,
725
- **kwrs
688
+ task_inp
726
689
  ))
727
690
  tasks.append(task)
728
691
 
@@ -759,11 +722,10 @@ class DirectFrameExtractor(FrameExtractor):
759
722
  return output
760
723
 
761
724
 
762
- def extract_frames(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
763
- document_key:str=None, temperature:float=0.0, verbose:bool=False,
764
- concurrent:bool=False, concurrent_batch_size:int=32,
725
+ def extract_frames(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
726
+ verbose:bool=False, concurrent:bool=False, concurrent_batch_size:int=32,
765
727
  case_sensitive:bool=False, fuzzy_match:bool=True, fuzzy_buffer_size:float=0.2, fuzzy_score_cutoff:float=0.8,
766
- allow_overlap_entities:bool=False, return_messages_log:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
728
+ allow_overlap_entities:bool=False, return_messages_log:bool=False) -> List[LLMInformationExtractionFrame]:
767
729
  """
768
730
  This method inputs a text and outputs a list of LLMInformationExtractionFrame
769
731
  It use the extract() method and post-process outputs into frames.
@@ -774,13 +736,9 @@ class DirectFrameExtractor(FrameExtractor):
774
736
  the input text content to put in prompt template.
775
737
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
776
738
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
777
- max_new_tokens : str, Optional
778
- the max number of new tokens LLM should generate.
779
739
  document_key : str, Optional
780
740
  specify the key in text_content where document text is.
781
741
  If text_content is str, this parameter will be ignored.
782
- temperature : float, Optional
783
- the temperature for token sampling.
784
742
  verbose : bool, Optional
785
743
  if True, LLM generated text will be printed in terminal in real-time.
786
744
  concurrent : bool, Optional
@@ -812,21 +770,15 @@ class DirectFrameExtractor(FrameExtractor):
812
770
 
813
771
  nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
814
772
  extraction_results = asyncio.run(self.extract_async(text_content=text_content,
815
- max_new_tokens=max_new_tokens,
816
773
  document_key=document_key,
817
- temperature=temperature,
818
774
  concurrent_batch_size=concurrent_batch_size,
819
- return_messages_log=return_messages_log,
820
- **kwrs)
775
+ return_messages_log=return_messages_log)
821
776
  )
822
777
  else:
823
778
  extraction_results = self.extract(text_content=text_content,
824
- max_new_tokens=max_new_tokens,
825
779
  document_key=document_key,
826
- temperature=temperature,
827
780
  verbose=verbose,
828
- return_messages_log=return_messages_log,
829
- **kwrs)
781
+ return_messages_log=return_messages_log)
830
782
 
831
783
  llm_output_results, messages_log = extraction_results if return_messages_log else (extraction_results, None)
832
784
 
@@ -869,8 +821,8 @@ class DirectFrameExtractor(FrameExtractor):
869
821
 
870
822
 
871
823
  class ReviewFrameExtractor(DirectFrameExtractor):
872
- def __init__(self, unit_chunker:UnitChunker, context_chunker:ContextChunker,
873
- inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
824
+ def __init__(self, unit_chunker:UnitChunker, context_chunker:ContextChunker, inference_engine:InferenceEngine,
825
+ prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None):
874
826
  """
875
827
  This class add a review step after the DirectFrameExtractor.
876
828
  The Review process asks LLM to review its output and:
@@ -901,8 +853,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
901
853
  unit_chunker=unit_chunker,
902
854
  prompt_template=prompt_template,
903
855
  system_prompt=system_prompt,
904
- context_chunker=context_chunker,
905
- **kwrs)
856
+ context_chunker=context_chunker)
906
857
  # check review mode
907
858
  if review_mode not in {"addition", "revision"}:
908
859
  raise ValueError('review_mode must be one of {"addition", "revision"}.')
@@ -939,8 +890,8 @@ class ReviewFrameExtractor(DirectFrameExtractor):
939
890
  if self.review_prompt is None:
940
891
  raise ValueError(f"Cannot find review prompt for {self.__class__.__name__} in the package. Please provide a review_prompt.")
941
892
 
942
- def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None,
943
- temperature:float=0.0, verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
893
+ def extract(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
894
+ verbose:bool=False, return_messages_log:bool=False) -> List[FrameExtractionUnitResult]:
944
895
  """
945
896
  This method inputs a text and outputs a list of outputs per unit.
946
897
 
@@ -950,13 +901,9 @@ class ReviewFrameExtractor(DirectFrameExtractor):
950
901
  the input text content to put in prompt template.
951
902
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
952
903
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
953
- max_new_tokens : int, Optional
954
- the max number of new tokens LLM should generate.
955
904
  document_key : str, Optional
956
905
  specify the key in text_content where document text is.
957
906
  If text_content is str, this parameter will be ignored.
958
- temperature : float, Optional
959
- the temperature for token sampling.
960
907
  verbose : bool, Optional
961
908
  if True, LLM generated text will be printed in terminal in real-time.
962
909
  return_messages_log : bool, Optional
@@ -1020,28 +967,13 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1020
967
  print(f"{Fore.YELLOW}Context:{Style.RESET_ALL}\n{context}\n")
1021
968
 
1022
969
  print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
1023
-
1024
- response_stream = self.inference_engine.chat(
1025
- messages=messages,
1026
- max_new_tokens=max_new_tokens,
1027
- temperature=temperature,
1028
- stream=True,
1029
- **kwrs
1030
- )
1031
-
1032
- initial = ""
1033
- for chunk in response_stream:
1034
- initial += chunk
1035
- print(chunk, end='', flush=True)
1036
970
 
1037
- else:
1038
- initial = self.inference_engine.chat(
1039
- messages=messages,
1040
- max_new_tokens=max_new_tokens,
1041
- temperature=temperature,
1042
- stream=False,
1043
- **kwrs
1044
- )
971
+
972
+ initial = self.inference_engine.chat(
973
+ messages=messages,
974
+ verbose=verbose,
975
+ stream=False
976
+ )
1045
977
 
1046
978
  if return_messages_log:
1047
979
  messages.append({"role": "assistant", "content": initial})
@@ -1053,29 +985,12 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1053
985
 
1054
986
  messages.append({'role': 'assistant', 'content': initial})
1055
987
  messages.append({'role': 'user', 'content': self.review_prompt})
1056
-
1057
- if verbose:
1058
- response_stream = self.inference_engine.chat(
1059
- messages=messages,
1060
- max_new_tokens=max_new_tokens,
1061
- temperature=temperature,
1062
- stream=True,
1063
- **kwrs
1064
- )
1065
-
1066
- review = ""
1067
- for chunk in response_stream:
1068
- review += chunk
1069
- print(chunk, end='', flush=True)
1070
988
 
1071
- else:
1072
- review = self.inference_engine.chat(
1073
- messages=messages,
1074
- max_new_tokens=max_new_tokens,
1075
- temperature=temperature,
1076
- stream=False,
1077
- **kwrs
1078
- )
989
+ review = self.inference_engine.chat(
990
+ messages=messages,
991
+ verbose=verbose,
992
+ stream=False
993
+ )
1079
994
 
1080
995
  # Output
1081
996
  if self.review_mode == "revision":
@@ -1101,8 +1016,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1101
1016
  return output
1102
1017
 
1103
1018
 
1104
- def stream(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048,
1105
- document_key:str=None, temperature:float=0.0, **kwrs) -> Generator[str, None, None]:
1019
+ def stream(self, text_content:Union[str, Dict[str,str]], document_key:str=None) -> Generator[str, None, None]:
1106
1020
  """
1107
1021
  This method inputs a text and outputs a list of outputs per unit.
1108
1022
 
@@ -1112,13 +1026,9 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1112
1026
  the input text content to put in prompt template.
1113
1027
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
1114
1028
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
1115
- max_new_tokens : int, Optional
1116
- the max number of new tokens LLM should generate.
1117
1029
  document_key : str, Optional
1118
1030
  specify the key in text_content where document text is.
1119
1031
  If text_content is str, this parameter will be ignored.
1120
- temperature : float, Optional
1121
- the temperature for token sampling.
1122
1032
 
1123
1033
  Return : List[FrameExtractionUnitResult]
1124
1034
  the output from LLM for each unit. Contains the start, end, text, and generated text.
@@ -1176,10 +1086,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1176
1086
 
1177
1087
  response_stream = self.inference_engine.chat(
1178
1088
  messages=messages,
1179
- max_new_tokens=max_new_tokens,
1180
- temperature=temperature,
1181
- stream=True,
1182
- **kwrs
1089
+ stream=True
1183
1090
  )
1184
1091
 
1185
1092
  initial = ""
@@ -1195,16 +1102,13 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1195
1102
 
1196
1103
  response_stream = self.inference_engine.chat(
1197
1104
  messages=messages,
1198
- max_new_tokens=max_new_tokens,
1199
- temperature=temperature,
1200
- stream=True,
1201
- **kwrs
1105
+ stream=True
1202
1106
  )
1203
1107
 
1204
1108
  for chunk in response_stream:
1205
1109
  yield chunk
1206
1110
 
1207
- async def extract_async(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=2048, document_key:str=None, temperature:float=0.0,
1111
+ async def extract_async(self, text_content:Union[str, Dict[str,str]], document_key:str=None,
1208
1112
  concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[FrameExtractionUnitResult]:
1209
1113
  """
1210
1114
  This is the asynchronous version of the extract() method with the review step.
@@ -1215,13 +1119,9 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1215
1119
  the input text content to put in prompt template.
1216
1120
  If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
1217
1121
  If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
1218
- max_new_tokens : int, Optional
1219
- the max number of new tokens LLM should generate.
1220
1122
  document_key : str, Optional
1221
1123
  specify the key in text_content where document text is.
1222
1124
  If text_content is str, this parameter will be ignored.
1223
- temperature : float, Optional
1224
- the temperature for token sampling.
1225
1125
  concurrent_batch_size : int, Optional
1226
1126
  the batch size for concurrent processing.
1227
1127
  return_messages_log : bool, Optional
@@ -1282,17 +1182,14 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1282
1182
 
1283
1183
  semaphore = asyncio.Semaphore(concurrent_batch_size)
1284
1184
 
1285
- async def initial_semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
1185
+ async def initial_semaphore_helper(task_data: Dict):
1286
1186
  unit = task_data["unit"]
1287
1187
  messages = task_data["messages"]
1288
1188
  original_index = task_data["original_index"]
1289
1189
 
1290
1190
  async with semaphore:
1291
1191
  gen_text = await self.inference_engine.chat_async(
1292
- messages=messages,
1293
- max_new_tokens=max_new_tokens,
1294
- temperature=temperature,
1295
- **kwrs
1192
+ messages=messages
1296
1193
  )
1297
1194
  # Return initial generation result along with the messages used and the unit
1298
1195
  return {"original_index": original_index, "unit": unit, "initial_gen_text": gen_text, "initial_messages": messages}
@@ -1300,10 +1197,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1300
1197
  # Create and gather initial generation tasks
1301
1198
  initial_tasks = [
1302
1199
  asyncio.create_task(initial_semaphore_helper(
1303
- task_inp,
1304
- max_new_tokens=max_new_tokens,
1305
- temperature=temperature,
1306
- **kwrs
1200
+ task_inp
1307
1201
  ))
1308
1202
  for task_inp in initial_tasks_input
1309
1203
  ]
@@ -1333,16 +1227,13 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1333
1227
  })
1334
1228
 
1335
1229
 
1336
- async def review_semaphore_helper(task_data: Dict, max_new_tokens: int, temperature: float, **kwrs):
1230
+ async def review_semaphore_helper(task_data: Dict, **kwrs):
1337
1231
  messages = task_data["messages"]
1338
1232
  original_index = task_data["original_index"]
1339
1233
 
1340
1234
  async with semaphore:
1341
1235
  review_gen_text = await self.inference_engine.chat_async(
1342
- messages=messages,
1343
- max_new_tokens=max_new_tokens,
1344
- temperature=temperature,
1345
- **kwrs
1236
+ messages=messages
1346
1237
  )
1347
1238
  # Combine initial and review results
1348
1239
  task_data["review_gen_text"] = review_gen_text
@@ -1354,10 +1245,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1354
1245
  # Create and gather review tasks
1355
1246
  review_tasks = [
1356
1247
  asyncio.create_task(review_semaphore_helper(
1357
- task_inp,
1358
- max_new_tokens=max_new_tokens,
1359
- temperature=temperature,
1360
- **kwrs
1248
+ task_inp
1361
1249
  ))
1362
1250
  for task_inp in review_tasks_input
1363
1251
  ]
@@ -1405,7 +1293,7 @@ class ReviewFrameExtractor(DirectFrameExtractor):
1405
1293
 
1406
1294
 
1407
1295
  class BasicFrameExtractor(DirectFrameExtractor):
1408
- def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
1296
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
1409
1297
  """
1410
1298
  This class diretly prompt LLM for frame extraction.
1411
1299
  Input system prompt (optional), prompt template (with instruction, few-shot examples),
@@ -1424,11 +1312,10 @@ class BasicFrameExtractor(DirectFrameExtractor):
1424
1312
  unit_chunker=WholeDocumentUnitChunker(),
1425
1313
  prompt_template=prompt_template,
1426
1314
  system_prompt=system_prompt,
1427
- context_chunker=NoContextChunker(),
1428
- **kwrs)
1315
+ context_chunker=NoContextChunker())
1429
1316
 
1430
1317
  class BasicReviewFrameExtractor(ReviewFrameExtractor):
1431
- def __init__(self, inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None, **kwrs):
1318
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, review_mode:str, review_prompt:str=None, system_prompt:str=None):
1432
1319
  """
1433
1320
  This class add a review step after the BasicFrameExtractor.
1434
1321
  The Review process asks LLM to review its output and:
@@ -1457,13 +1344,12 @@ class BasicReviewFrameExtractor(ReviewFrameExtractor):
1457
1344
  review_mode=review_mode,
1458
1345
  review_prompt=review_prompt,
1459
1346
  system_prompt=system_prompt,
1460
- context_chunker=NoContextChunker(),
1461
- **kwrs)
1347
+ context_chunker=NoContextChunker())
1462
1348
 
1463
1349
 
1464
1350
  class SentenceFrameExtractor(DirectFrameExtractor):
1465
1351
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None,
1466
- context_sentences:Union[str, int]="all", **kwrs):
1352
+ context_sentences:Union[str, int]="all"):
1467
1353
  """
1468
1354
  This class performs sentence-by-sentence information extraction.
1469
1355
  The process is as follows:
@@ -1507,14 +1393,13 @@ class SentenceFrameExtractor(DirectFrameExtractor):
1507
1393
  unit_chunker=SentenceUnitChunker(),
1508
1394
  prompt_template=prompt_template,
1509
1395
  system_prompt=system_prompt,
1510
- context_chunker=context_chunker,
1511
- **kwrs)
1396
+ context_chunker=context_chunker)
1512
1397
 
1513
1398
 
1514
1399
  class SentenceReviewFrameExtractor(ReviewFrameExtractor):
1515
1400
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str,
1516
1401
  review_mode:str, review_prompt:str=None, system_prompt:str=None,
1517
- context_sentences:Union[str, int]="all", **kwrs):
1402
+ context_sentences:Union[str, int]="all"):
1518
1403
  """
1519
1404
  This class adds a review step after the SentenceFrameExtractor.
1520
1405
  For each sentence, the review process asks LLM to review its output and:
@@ -1561,12 +1446,11 @@ class SentenceReviewFrameExtractor(ReviewFrameExtractor):
1561
1446
  review_mode=review_mode,
1562
1447
  review_prompt=review_prompt,
1563
1448
  system_prompt=system_prompt,
1564
- context_chunker=context_chunker,
1565
- **kwrs)
1449
+ context_chunker=context_chunker)
1566
1450
 
1567
1451
 
1568
1452
  class RelationExtractor(Extractor):
1569
- def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None, **kwrs):
1453
+ def __init__(self, inference_engine:InferenceEngine, prompt_template:str, system_prompt:str=None):
1570
1454
  """
1571
1455
  This is the abstract class for relation extraction.
1572
1456
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1582,8 +1466,7 @@ class RelationExtractor(Extractor):
1582
1466
  """
1583
1467
  super().__init__(inference_engine=inference_engine,
1584
1468
  prompt_template=prompt_template,
1585
- system_prompt=system_prompt,
1586
- **kwrs)
1469
+ system_prompt=system_prompt)
1587
1470
 
1588
1471
  def _get_ROI(self, frame_1:LLMInformationExtractionFrame, frame_2:LLMInformationExtractionFrame,
1589
1472
  text:str, buffer_size:int=100) -> str:
@@ -1659,7 +1542,7 @@ class RelationExtractor(Extractor):
1659
1542
 
1660
1543
  class BinaryRelationExtractor(RelationExtractor):
1661
1544
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_func: Callable,
1662
- system_prompt:str=None, **kwrs):
1545
+ system_prompt:str=None):
1663
1546
  """
1664
1547
  This class extracts binary (yes/no) relations between two entities.
1665
1548
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1677,8 +1560,7 @@ class BinaryRelationExtractor(RelationExtractor):
1677
1560
  """
1678
1561
  super().__init__(inference_engine=inference_engine,
1679
1562
  prompt_template=prompt_template,
1680
- system_prompt=system_prompt,
1681
- **kwrs)
1563
+ system_prompt=system_prompt)
1682
1564
 
1683
1565
  if possible_relation_func:
1684
1566
  # Check if possible_relation_func is a function
@@ -1718,8 +1600,8 @@ class BinaryRelationExtractor(RelationExtractor):
1718
1600
  return False
1719
1601
 
1720
1602
 
1721
- def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
1722
- temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
1603
+ def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False,
1604
+ return_messages_log:bool=False) -> List[Dict]:
1723
1605
  """
1724
1606
  This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
1725
1607
  Outputs pairs that are related.
@@ -1730,11 +1612,7 @@ class BinaryRelationExtractor(RelationExtractor):
1730
1612
  a document with frames.
1731
1613
  buffer_size : int, Optional
1732
1614
  the number of characters before and after the two frames in the ROI text.
1733
- max_new_tokens : str, Optional
1734
- the max number of new tokens LLM should generate.
1735
- temperature : float, Optional
1736
- the temperature for token sampling.
1737
- stream : bool, Optional
1615
+ verbose : bool, Optional
1738
1616
  if True, LLM generated text will be printed in terminal in real-time.
1739
1617
  return_messages_log : bool, Optional
1740
1618
  if True, a list of messages will be returned.
@@ -1753,7 +1631,7 @@ class BinaryRelationExtractor(RelationExtractor):
1753
1631
 
1754
1632
  if pos_rel:
1755
1633
  roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
1756
- if stream:
1634
+ if verbose:
1757
1635
  print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
1758
1636
  print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
1759
1637
  messages = []
@@ -1767,10 +1645,7 @@ class BinaryRelationExtractor(RelationExtractor):
1767
1645
 
1768
1646
  gen_text = self.inference_engine.chat(
1769
1647
  messages=messages,
1770
- max_new_tokens=max_new_tokens,
1771
- temperature=temperature,
1772
- stream=stream,
1773
- **kwrs
1648
+ verbose=verbose
1774
1649
  )
1775
1650
  rel_json = self._extract_json(gen_text)
1776
1651
  if self._post_process(rel_json):
@@ -1785,8 +1660,8 @@ class BinaryRelationExtractor(RelationExtractor):
1785
1660
  return output
1786
1661
 
1787
1662
 
1788
- async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
1789
- temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
1663
+ async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
1664
+ concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
1790
1665
  """
1791
1666
  This is the asynchronous version of the extract() method.
1792
1667
 
@@ -1841,10 +1716,7 @@ class BinaryRelationExtractor(RelationExtractor):
1841
1716
 
1842
1717
  task = asyncio.create_task(
1843
1718
  self.inference_engine.chat_async(
1844
- messages=messages,
1845
- max_new_tokens=max_new_tokens,
1846
- temperature=temperature,
1847
- **kwrs
1719
+ messages=messages
1848
1720
  )
1849
1721
  )
1850
1722
  tasks.append(task)
@@ -1866,9 +1738,9 @@ class BinaryRelationExtractor(RelationExtractor):
1866
1738
  return output
1867
1739
 
1868
1740
 
1869
- def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
1870
- temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
1871
- stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
1741
+ def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
1742
+ concurrent:bool=False, concurrent_batch_size:int=32, verbose:bool=False,
1743
+ return_messages_log:bool=False) -> List[Dict]:
1872
1744
  """
1873
1745
  This method considers all combinations of two frames. Use the possible_relation_func to filter impossible pairs.
1874
1746
 
@@ -1878,15 +1750,11 @@ class BinaryRelationExtractor(RelationExtractor):
1878
1750
  a document with frames.
1879
1751
  buffer_size : int, Optional
1880
1752
  the number of characters before and after the two frames in the ROI text.
1881
- max_new_tokens : str, Optional
1882
- the max number of new tokens LLM should generate.
1883
- temperature : float, Optional
1884
- the temperature for token sampling.
1885
1753
  concurrent: bool, Optional
1886
1754
  if True, the extraction will be done in concurrent.
1887
1755
  concurrent_batch_size : int, Optional
1888
1756
  the number of frame pairs to process in concurrent.
1889
- stream : bool, Optional
1757
+ verbose : bool, Optional
1890
1758
  if True, LLM generated text will be printed in terminal in real-time.
1891
1759
  return_messages_log : bool, Optional
1892
1760
  if True, a list of messages will be returned.
@@ -1901,31 +1769,25 @@ class BinaryRelationExtractor(RelationExtractor):
1901
1769
  raise ValueError("All frame_ids in the input document must be unique.")
1902
1770
 
1903
1771
  if concurrent:
1904
- if stream:
1772
+ if verbose:
1905
1773
  warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
1906
1774
 
1907
1775
  nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
1908
1776
  return asyncio.run(self.extract_async(doc=doc,
1909
1777
  buffer_size=buffer_size,
1910
- max_new_tokens=max_new_tokens,
1911
- temperature=temperature,
1912
1778
  concurrent_batch_size=concurrent_batch_size,
1913
- return_messages_log=return_messages_log,
1914
- **kwrs)
1779
+ return_messages_log=return_messages_log)
1915
1780
  )
1916
1781
  else:
1917
1782
  return self.extract(doc=doc,
1918
1783
  buffer_size=buffer_size,
1919
- max_new_tokens=max_new_tokens,
1920
- temperature=temperature,
1921
- stream=stream,
1922
- return_messages_log=return_messages_log,
1923
- **kwrs)
1784
+ verbose=verbose,
1785
+ return_messages_log=return_messages_log)
1924
1786
 
1925
1787
 
1926
1788
  class MultiClassRelationExtractor(RelationExtractor):
1927
1789
  def __init__(self, inference_engine:InferenceEngine, prompt_template:str, possible_relation_types_func: Callable,
1928
- system_prompt:str=None, **kwrs):
1790
+ system_prompt:str=None):
1929
1791
  """
1930
1792
  This class extracts relations with relation types.
1931
1793
  Input LLM inference engine, system prompt (optional), prompt template (with instruction, few-shot examples).
@@ -1944,8 +1806,7 @@ class MultiClassRelationExtractor(RelationExtractor):
1944
1806
  """
1945
1807
  super().__init__(inference_engine=inference_engine,
1946
1808
  prompt_template=prompt_template,
1947
- system_prompt=system_prompt,
1948
- **kwrs)
1809
+ system_prompt=system_prompt)
1949
1810
 
1950
1811
  if possible_relation_types_func:
1951
1812
  # Check if possible_relation_types_func is a function
@@ -1992,8 +1853,7 @@ class MultiClassRelationExtractor(RelationExtractor):
1992
1853
  return None
1993
1854
 
1994
1855
 
1995
- def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
1996
- temperature:float=0.0, stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
1856
+ def extract(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, verbose:bool=False, return_messages_log:bool=False) -> List[Dict]:
1997
1857
  """
1998
1858
  This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
1999
1859
 
@@ -2026,7 +1886,7 @@ class MultiClassRelationExtractor(RelationExtractor):
2026
1886
 
2027
1887
  if pos_rel_types:
2028
1888
  roi_text = self._get_ROI(frame_1, frame_2, doc.text, buffer_size=buffer_size)
2029
- if stream:
1889
+ if verbose:
2030
1890
  print(f"\n\n{Fore.GREEN}ROI text:{Style.RESET_ALL} \n{roi_text}\n")
2031
1891
  print(f"{Fore.BLUE}Extraction:{Style.RESET_ALL}")
2032
1892
  messages = []
@@ -2041,10 +1901,8 @@ class MultiClassRelationExtractor(RelationExtractor):
2041
1901
 
2042
1902
  gen_text = self.inference_engine.chat(
2043
1903
  messages=messages,
2044
- max_new_tokens=max_new_tokens,
2045
- temperature=temperature,
2046
- stream=stream,
2047
- **kwrs
1904
+ stream=False,
1905
+ verbose=verbose
2048
1906
  )
2049
1907
 
2050
1908
  if return_messages_log:
@@ -2061,8 +1919,8 @@ class MultiClassRelationExtractor(RelationExtractor):
2061
1919
  return output
2062
1920
 
2063
1921
 
2064
- async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
2065
- temperature:float=0.0, concurrent_batch_size:int=32, return_messages_log:bool=False, **kwrs) -> List[Dict]:
1922
+ async def extract_async(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
1923
+ concurrent_batch_size:int=32, return_messages_log:bool=False) -> List[Dict]:
2066
1924
  """
2067
1925
  This is the asynchronous version of the extract() method.
2068
1926
 
@@ -2117,10 +1975,7 @@ class MultiClassRelationExtractor(RelationExtractor):
2117
1975
  )})
2118
1976
  task = asyncio.create_task(
2119
1977
  self.inference_engine.chat_async(
2120
- messages=messages,
2121
- max_new_tokens=max_new_tokens,
2122
- temperature=temperature,
2123
- **kwrs
1978
+ messages=messages
2124
1979
  )
2125
1980
  )
2126
1981
  tasks.append(task)
@@ -2143,9 +1998,9 @@ class MultiClassRelationExtractor(RelationExtractor):
2143
1998
  return output
2144
1999
 
2145
2000
 
2146
- def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100, max_new_tokens:int=128,
2147
- temperature:float=0.0, concurrent:bool=False, concurrent_batch_size:int=32,
2148
- stream:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
2001
+ def extract_relations(self, doc:LLMInformationExtractionDocument, buffer_size:int=100,
2002
+ concurrent:bool=False, concurrent_batch_size:int=32,
2003
+ verbose:bool=False, return_messages_log:bool=False, **kwrs) -> List[Dict]:
2149
2004
  """
2150
2005
  This method considers all combinations of two frames. Use the possible_relation_types_func to filter impossible pairs.
2151
2006
 
@@ -2178,24 +2033,18 @@ class MultiClassRelationExtractor(RelationExtractor):
2178
2033
  raise ValueError("All frame_ids in the input document must be unique.")
2179
2034
 
2180
2035
  if concurrent:
2181
- if stream:
2036
+ if verbose:
2182
2037
  warnings.warn("stream=True is not supported in concurrent mode.", RuntimeWarning)
2183
2038
 
2184
2039
  nest_asyncio.apply() # For Jupyter notebook. Terminal does not need this.
2185
2040
  return asyncio.run(self.extract_async(doc=doc,
2186
2041
  buffer_size=buffer_size,
2187
- max_new_tokens=max_new_tokens,
2188
- temperature=temperature,
2189
2042
  concurrent_batch_size=concurrent_batch_size,
2190
- return_messages_log=return_messages_log,
2191
- **kwrs)
2043
+ return_messages_log=return_messages_log)
2192
2044
  )
2193
2045
  else:
2194
2046
  return self.extract(doc=doc,
2195
2047
  buffer_size=buffer_size,
2196
- max_new_tokens=max_new_tokens,
2197
- temperature=temperature,
2198
- stream=stream,
2199
- return_messages_log=return_messages_log,
2200
- **kwrs)
2048
+ verbose=verbose,
2049
+ return_messages_log=return_messages_log)
2201
2050