llm-ie 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/engines.py
CHANGED
llm_ie/extractors.py
CHANGED
|
@@ -397,7 +397,7 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
397
397
|
|
|
398
398
|
|
|
399
399
|
def extract(self, text_content:Union[str, Dict[str,str]], max_new_tokens:int=512,
|
|
400
|
-
document_key:str=None, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
|
|
400
|
+
document_key:str=None, multi_turn:bool=True, temperature:float=0.0, stream:bool=False, **kwrs) -> List[Dict[str,str]]:
|
|
401
401
|
"""
|
|
402
402
|
This method inputs a text and outputs a list of outputs per sentence.
|
|
403
403
|
|
|
@@ -412,6 +412,12 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
412
412
|
document_key : str, Optional
|
|
413
413
|
specify the key in text_content where document text is.
|
|
414
414
|
If text_content is str, this parameter will be ignored.
|
|
415
|
+
multi_turn : bool, Optional
|
|
416
|
+
multi-turn conversation prompting.
|
|
417
|
+
If True, sentences and LLM outputs will be appended to the input message and carry-over.
|
|
418
|
+
If False, only the current sentence is prompted.
|
|
419
|
+
For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
|
|
420
|
+
can better utilize the KV caching.
|
|
415
421
|
temperature : float, Optional
|
|
416
422
|
the temperature for token sampling.
|
|
417
423
|
stream : bool, Optional
|
|
@@ -449,9 +455,14 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
449
455
|
stream=stream,
|
|
450
456
|
**kwrs
|
|
451
457
|
)
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
458
|
+
|
|
459
|
+
if multi_turn:
|
|
460
|
+
# update chat messages with LLM outputs
|
|
461
|
+
messages.append({'role': 'assistant', 'content': gen_text})
|
|
462
|
+
else:
|
|
463
|
+
# delete sentence so that message is reset
|
|
464
|
+
del messages[-1]
|
|
465
|
+
|
|
455
466
|
# add to output
|
|
456
467
|
output.append({'sentence_start': sent['start'],
|
|
457
468
|
'sentence_end': sent['end'],
|
|
@@ -460,8 +471,8 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
460
471
|
return output
|
|
461
472
|
|
|
462
473
|
|
|
463
|
-
def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str,
|
|
464
|
-
|
|
474
|
+
def extract_frames(self, text_content:Union[str, Dict[str,str]], entity_key:str, max_new_tokens:int=512,
|
|
475
|
+
document_key:str=None, multi_turn:bool=True, temperature:float=0.0, stream:bool=False, **kwrs) -> List[LLMInformationExtractionFrame]:
|
|
465
476
|
"""
|
|
466
477
|
This method inputs a text and outputs a list of LLMInformationExtractionFrame
|
|
467
478
|
It use the extract() method and post-process outputs into frames.
|
|
@@ -479,12 +490,27 @@ class SentenceFrameExtractor(FrameExtractor):
|
|
|
479
490
|
document_key : str, Optional
|
|
480
491
|
specify the key in text_content where document text is.
|
|
481
492
|
If text_content is str, this parameter will be ignored.
|
|
493
|
+
multi_turn : bool, Optional
|
|
494
|
+
multi-turn conversation prompting.
|
|
495
|
+
If True, sentences and LLM outputs will be appended to the input message and carry-over.
|
|
496
|
+
If False, only the current sentence is prompted.
|
|
497
|
+
For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting
|
|
498
|
+
can better utilize the KV caching.
|
|
499
|
+
temperature : float, Optional
|
|
500
|
+
the temperature for token sampling.
|
|
501
|
+
stream : bool, Optional
|
|
502
|
+
if True, LLM generated text will be printed in terminal in real-time.
|
|
482
503
|
|
|
483
504
|
Return : str
|
|
484
505
|
a list of frames.
|
|
485
506
|
"""
|
|
486
507
|
llm_output_sentence = self.extract(text_content=text_content,
|
|
487
|
-
max_new_tokens=max_new_tokens,
|
|
508
|
+
max_new_tokens=max_new_tokens,
|
|
509
|
+
document_key=document_key,
|
|
510
|
+
multi_turn=multi_turn,
|
|
511
|
+
temperature=temperature,
|
|
512
|
+
stream=stream,
|
|
513
|
+
**kwrs)
|
|
488
514
|
frame_list = []
|
|
489
515
|
for sent in llm_output_sentence:
|
|
490
516
|
entity_json = self._extract_json(gen_text=sent['gen_text'])
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llm-ie
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Enshuo (David) Hsu
|
|
@@ -37,7 +37,7 @@ LLM-IE is a toolkit that provides robust information extraction utilities for fr
|
|
|
37
37
|
<div align="center"><img src="doc_asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
|
|
38
38
|
|
|
39
39
|
## Prerequisite
|
|
40
|
-
At least one LLM inference engine is required. There are built-in supports for 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub),
|
|
40
|
+
At least one LLM inference engine is required. There are built-in supports for 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo.png width=20 /> vLLM. For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
|
|
41
41
|
|
|
42
42
|
## Installation
|
|
43
43
|
The Python package is available on PyPI.
|
|
@@ -92,6 +92,26 @@ from llm_ie.engines import OpenAIInferenceEngine
|
|
|
92
92
|
llm = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
93
93
|
```
|
|
94
94
|
|
|
95
|
+
</details>
|
|
96
|
+
|
|
97
|
+
<details>
|
|
98
|
+
<summary><img src=doc_asset/readme_img/vllm-logo.png width=20 /> vLLM</summary>
|
|
99
|
+
|
|
100
|
+
The vLLM support follows the [OpenAI Compatible Server](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html). For more parameters, please refer to the documentation.
|
|
101
|
+
|
|
102
|
+
Start the server
|
|
103
|
+
```cmd
|
|
104
|
+
vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
105
|
+
```
|
|
106
|
+
Define inference engine
|
|
107
|
+
```python
|
|
108
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
109
|
+
engine = OpenAIInferenceEngine(base_url="http://localhost:8000/v1",
|
|
110
|
+
api_key="EMPTY",
|
|
111
|
+
model="meta-llama/Meta-Llama-3.1-8B-Instruct")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
|
|
95
115
|
</details>
|
|
96
116
|
|
|
97
117
|
In this quick start demo, we use Llama-cpp-python to run Llama-3.1-8B with int8 quantization ([bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF)).
|
|
@@ -244,6 +264,24 @@ from llm_ie.engines import OpenAIInferenceEngine
|
|
|
244
264
|
openai_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
245
265
|
```
|
|
246
266
|
|
|
267
|
+
#### <img src=doc_asset/readme_img/vllm-logo.png width=20 /> vLLM
|
|
268
|
+
The vLLM support follows the [OpenAI Compatible Server](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html). For more parameters, please refer to the documentation.
|
|
269
|
+
|
|
270
|
+
Start the server
|
|
271
|
+
```cmd
|
|
272
|
+
CUDA_VISIBLE_DEVICES=<GPU#> vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --api-key MY_API_KEY --tensor-parallel-size <# of GPUs to use>
|
|
273
|
+
```
|
|
274
|
+
Use ```CUDA_VISIBLE_DEVICES``` to specify GPUs to use. The ```--tensor-parallel-size``` should be set accordingly. The ```--api-key``` is optional.
|
|
275
|
+
the default port is 8000. ```--port``` sets the port.
|
|
276
|
+
|
|
277
|
+
Define inference engine
|
|
278
|
+
```python
|
|
279
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
280
|
+
engine = OpenAIInferenceEngine(base_url="http://localhost:8000/v1",
|
|
281
|
+
api_key="MY_API_KEY",
|
|
282
|
+
model="meta-llama/Meta-Llama-3.1-8B-Instruct")
|
|
283
|
+
```
|
|
284
|
+
The ```model``` must match the repo name specified in the server.
|
|
247
285
|
|
|
248
286
|
#### Test inference engine configuration
|
|
249
287
|
To test the inference engine, use the ```chat()``` method.
|
|
@@ -5,9 +5,9 @@ llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt,sha256=XbnU8byLGG
|
|
|
5
5
|
llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=XbnU8byLGGUA3A3lT0bb2Hw-ggzhcqD3ZuKzduod2ww,1944
|
|
6
6
|
llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=8nj9OLPJMtr9Soi5JU3Xk-HC7pKNoI54xA_A4u7I5j4,2620
|
|
7
7
|
llm_ie/data_types.py,sha256=MnpyXFviFWhxeC5mqbaPdAxGx6vV_PhnUIFfUamq3D8,6687
|
|
8
|
-
llm_ie/engines.py,sha256=
|
|
9
|
-
llm_ie/extractors.py,sha256=
|
|
8
|
+
llm_ie/engines.py,sha256=m9ytGUX61jEy9SmVHbb90mrfGMAwC6dV-v7Jke1U7Ho,9296
|
|
9
|
+
llm_ie/extractors.py,sha256=PfcUhmU_LfVFIfI5v3C7DzGAFF0xEPDdLUnwKHYnUyg,24125
|
|
10
10
|
llm_ie/prompt_editor.py,sha256=dbu7A3O7O7Iw2v-xCgrTFH1-wTLAGf4SHDqdeS-He2Q,1869
|
|
11
|
-
llm_ie-0.1.
|
|
12
|
-
llm_ie-0.1.
|
|
13
|
-
llm_ie-0.1.
|
|
11
|
+
llm_ie-0.1.7.dist-info/METADATA,sha256=wMAToHKL1K3hZP-xONBdEw3sy56stKYThfn3NqbfZ34,29712
|
|
12
|
+
llm_ie-0.1.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
+
llm_ie-0.1.7.dist-info/RECORD,,
|
|
File without changes
|