llm-ie 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/extractors.py
CHANGED
|
@@ -397,7 +397,7 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
397
397
|
|
|
398
398
|
|
|
399
399
|
def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
|
|
400
|
-
document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
|
|
400
|
+
document_key:str=None, multi_turn:bool=True, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
|
|
401
401
|
"""
|
|
402
402
|
This method inputs a text and outputs a list of outputs per sentence.
|
|
403
403
|
|
|
@@ -412,6 +412,12 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
412
412
|
document_key : str, Optional
|
|
413
413
|
specify the key in text_content where document text is.
|
|
414
414
|
If text_content is str, this parameter will be ignored.
|
|
415
|
+
multi_turn : bool, Optional
|
|
416
|
+
multi-turn conversation prompting.
|
|
417
|
+
If True, sentences and LLM outputs will be appended to the input message and carry-over.
|
|
418
|
+
If False, only the current sentence is prompted.
|
|
419
|
+
For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
|
|
420
|
+
can better utilize the KV caching.
|
|
415
421
|
temperature : float, Optional
|
|
416
422
|
the temperature for token sampling.
|
|
417
423
|
stream : bool, Optional
|
|
@@ -449,9 +455,14 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
449
455
|
stream=stream,
|
|
450
456
|
**kwrs
|
|
451
457
|
)
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
458
|
+
|
|
459
|
+
if multi_turn:
|
|
460
|
+
# update chat messages with LLM outputs
|
|
461
|
+
messages.append({'role': 'assistant', 'content': gen_text})
|
|
462
|
+
else:
|
|
463
|
+
# delete sentence so that message is reset
|
|
464
|
+
del messages[-1]
|
|
465
|
+
|
|
455
466
|
# add to output
|
|
456
467
|
output.append({'sentence_start': sent['start'],
|
|
457
468
|
'sentence_end': sent['end'],
|
|
@@ -460,8 +471,8 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
460
471
|
return output
|
|
461
472
|
|
|
462
473
|
|
|
463
|
-
def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str,
|
|
464
|
-
|
|
474
|
+
def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=512,
|
|
475
|
+
document_key:str=None, multi_turn:bool=True, temperature:float=0.0, stream:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
|
|
465
476
|
"""
|
|
466
477
|
This method inputs a text and outputs a list of LLMInformationExtractionFrame
|
|
467
478
|
It use the extract() method and post-process outputs into frames.
|
|
@@ -479,12 +490,27 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
479
490
|
document_key : str, Optional
|
|
480
491
|
specify the key in text_content where document text is.
|
|
481
492
|
If text_content is str, this parameter will be ignored.
|
|
493
|
+
multi_turn : bool, Optional
|
|
494
|
+
multi-turn conversation prompting.
|
|
495
|
+
If True, sentences and LLM outputs will be appended to the input message and carry-over.
|
|
496
|
+
If False, only the current sentence is prompted.
|
|
497
|
+
For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
|
|
498
|
+
can better utilize the KV caching.
|
|
499
|
+
temperature : float, Optional
|
|
500
|
+
the temperature for token sampling.
|
|
501
|
+
stream : bool, Optional
|
|
502
|
+
if True, LLM generated text will be printed in terminal in real-time.
|
|
482
503
|
|
|
483
504
|
Return : str
|
|
484
505
|
a list of frames.
|
|
485
506
|
"""
|
|
486
507
|
llm_output_sentence = self.extract(text_content=text_content,
|
|
487
|
-
max_new_tokens=max_new_tokens,
|
|
508
|
+
max_new_tokens=max_new_tokens,
|
|
509
|
+
document_key=document_key,
|
|
510
|
+
multi_turn=multi_turn,
|
|
511
|
+
temperature=temperature,
|
|
512
|
+
stream=stream,
|
|
513
|
+
**kwrs)
|
|
488
514
|
frame_list = []
|
|
489
515
|
for sent in llm_output_sentence:
|
|
490
516
|
entity_json = self._extract_json(gen_text=sent['gen_text'])
|
|
@@ -6,8 +6,8 @@ llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=XbnU8byLG
|
|
|
6
6
|
llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=8nj9OLPJMtr9Soi5JU3Xk-HC7pKNoI54xA_A4u7I5j4,2620
|
|
7
7
|
llm_ie/data_types.py,sha256=MnpyXFviFWhxeC5mqbaPdAxGx6vV_PhnUIFfUamq3D8,6687
|
|
8
8
|
llm_ie/engines.py,sha256=m9ytGUX61jEy9SmVHbb90mrfGMAwC6dV-v7Jke1U7Ho,9296
|
|
9
|
-
llm_ie/extractors.py,sha256=
|
|
9
|
+
llm_ie/extractors.py,sha256=PfcUhmU_LfVFIfI5v3C7DzGAFF0xEPDdLUnwKHYnUyg,24125
|
|
10
10
|
llm_ie/prompt_editor.py,sha256=dbu7A3O7O7Iw2v-xCgrTFH1-wTLAGf4SHDqdeS-He2Q,1869
|
|
11
|
-
llm_ie-0.1.
|
|
12
|
-
llm_ie-0.1.
|
|
13
|
-
llm_ie-0.1.
|
|
11
|
+
llm_ie-0.1.7.dist-info/METADATA,sha256=wMAToHKL1K3hZP-xONBdEw3sy56stKYThfn3NqbfZ34,29712
|
|
12
|
+
llm_ie-0.1.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
+
llm_ie-0.1.7.dist-info/RECORD,,
|
|
File without changes
|