llm-ie 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_ie-0.1.0 → llm_ie-0.1.2}/PKG-INFO +60 -7
- {llm_ie-0.1.0 → llm_ie-0.1.2}/README.md +59 -6
- {llm_ie-0.1.0 → llm_ie-0.1.2}/pyproject.toml +2 -3
- {llm_ie-0.1.0 → llm_ie-0.1.2}/src/llm_ie/engines.py +94 -11
- {llm_ie-0.1.0 → llm_ie-0.1.2}/src/llm_ie/__init__.py +0 -0
- {llm_ie-0.1.0 → llm_ie-0.1.2}/src/llm_ie/data_types.py +0 -0
- {llm_ie-0.1.0 → llm_ie-0.1.2}/src/llm_ie/extractors.py +0 -0
- {llm_ie-0.1.0 → llm_ie-0.1.2}/src/llm_ie/prompt_editor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llm-ie
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Enshuo (David) Hsu
|
|
@@ -11,7 +11,11 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
|
|
14
|
-
<div align="center"><img src=asset/LLM-IE.png width=500 ></div>
|
|
14
|
+
<div align="center"><img src=asset/readme_img/LLM-IE.png width=500 ></div>
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+

|
|
18
|
+
|
|
15
19
|
|
|
16
20
|
An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
|
|
17
21
|
|
|
@@ -29,10 +33,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
|
|
|
29
33
|
## Overview
|
|
30
34
|
LLM-IE is a toolkit that provides robust information extraction utilities for frame-based information extraction. Since prompt design has a significant impact on generative information extraction with LLMs, it also provides a built-in LLM editor to help with prompt writing. The flowchart below demonstrates the workflow starting from a casual language request.
|
|
31
35
|
|
|
32
|
-
<div align="center"><img src="asset/LLM-IE flowchart.png" width=800 ></div>
|
|
36
|
+
<div align="center"><img src="asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
|
|
33
37
|
|
|
34
38
|
## Prerequisite
|
|
35
|
-
At least one LLM inference engine is required.
|
|
39
|
+
At least one LLM inference engine is required. There are built-in supports for 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), and <img src=asset/readme_img/openai-logomark.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
|
|
36
40
|
|
|
37
41
|
## Installation
|
|
38
42
|
The Python package is available on PyPI.
|
|
@@ -45,7 +49,7 @@ Note that this package does not check LLM inference engine installation nor inst
|
|
|
45
49
|
We use a [synthesized medical note](demo/document/synthesized_note.txt) by ChatGPT to demo the information extraction process. Our task is to extract diagnosis names, spans, and corresponding attributes (i.e., diagnosis datetime, status).
|
|
46
50
|
|
|
47
51
|
#### Choose an LLM inference engine
|
|
48
|
-
|
|
52
|
+
Choose one of the built-in engines below.
|
|
49
53
|
|
|
50
54
|
<details>
|
|
51
55
|
<summary><img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> Ollama</summary>
|
|
@@ -62,11 +66,35 @@ llm = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
|
62
66
|
```python
|
|
63
67
|
from llm_ie.engines import LlamaCppInferenceEngine
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
llm = LlamaCppInferenceEngine(repo_id="bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
66
70
|
gguf_filename="Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
|
|
67
71
|
```
|
|
68
72
|
</details>
|
|
69
73
|
|
|
74
|
+
<details>
|
|
75
|
+
<summary>🤗 Huggingface_hub</summary>
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
79
|
+
|
|
80
|
+
llm = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
81
|
+
```
|
|
82
|
+
</details>
|
|
83
|
+
|
|
84
|
+
<details>
|
|
85
|
+
<summary><img src=asset/readme_img/openai-logomark.png width=16 /> OpenAI API</summary>
|
|
86
|
+
|
|
87
|
+
Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
|
|
88
|
+
```python
|
|
89
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
90
|
+
|
|
91
|
+
llm = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
</details>
|
|
95
|
+
|
|
96
|
+
In this quick start demo, we use Llama-cpp-python to run Llama-3.1-8B with int8 quantization ([bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF)).
|
|
97
|
+
The outputs might be slightly different with other inference engines, LLMs, or quantization.
|
|
70
98
|
|
|
71
99
|
#### Casual language as prompt
|
|
72
100
|
We start with a casual description:
|
|
@@ -165,7 +193,7 @@ This package is comprised of some key classes:
|
|
|
165
193
|
- Extractors
|
|
166
194
|
|
|
167
195
|
### LLM Inference Engine
|
|
168
|
-
Provides an interface for different LLM inference engines to work in the information extraction workflow. The built-in engines are ```LlamaCppInferenceEngine``` and ```
|
|
196
|
+
Provides an interface for different LLM inference engines to work in the information extraction workflow. The built-in engines are ```LlamaCppInferenceEngine```, ```OllamaInferenceEngine```, and ```HuggingFaceHubInferenceEngine```.
|
|
169
197
|
|
|
170
198
|
#### 🦙 Llama-cpp-python
|
|
171
199
|
The ```repo_id``` and ```gguf_filename``` must match the ones on the Huggingface repo to ensure the correct model is loaded. ```n_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. Note that when ```n_ctx``` is less than the prompt length, Llama.cpp throws exceptions. ```n_gpu_layers``` indicates a number of model layers to offload to GPU. Default is -1 for all layers (entire LLM). Flash attention ```flash_attn``` is supported by Llama.cpp. The ```verbose``` indicates whether model information should be displayed. For more input parameters, see 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python).
|
|
@@ -191,6 +219,31 @@ ollama = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0",
|
|
|
191
219
|
keep_alive=300)
|
|
192
220
|
```
|
|
193
221
|
|
|
222
|
+
#### 🤗 huggingface_hub
|
|
223
|
+
The ```model``` can be a model id hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. Refer to the [Inference Client](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client) documentation for more details.
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
227
|
+
|
|
228
|
+
hf = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
#### <img src=asset/readme_img/openai-logomark.png width=16 /> OpenAI API
|
|
232
|
+
In bash, save API key to the environmental variable ```OPENAI_API_KEY```.
|
|
233
|
+
```
|
|
234
|
+
export OPENAI_API_KEY=<your_API_key>
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
In Python, create inference engine and specify model name. For the available models, refer to [OpenAI webpage](https://platform.openai.com/docs/models).
|
|
238
|
+
For more parameters, see [OpenAI API reference](https://platform.openai.com/docs/api-reference/introduction).
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
242
|
+
|
|
243
|
+
openai_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
|
|
194
247
|
#### Test inference engine configuration
|
|
195
248
|
To test the inference engine, use the ```chat()``` method.
|
|
196
249
|
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
<div align="center"><img src=asset/LLM-IE.png width=500 ></div>
|
|
1
|
+
<div align="center"><img src=asset/readme_img/LLM-IE.png width=500 ></div>
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+
|
|
2
6
|
|
|
3
7
|
An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
|
|
4
8
|
|
|
@@ -16,10 +20,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
|
|
|
16
20
|
## Overview
|
|
17
21
|
LLM-IE is a toolkit that provides robust information extraction utilities for frame-based information extraction. Since prompt design has a significant impact on generative information extraction with LLMs, it also provides a built-in LLM editor to help with prompt writing. The flowchart below demonstrates the workflow starting from a casual language request.
|
|
18
22
|
|
|
19
|
-
<div align="center"><img src="asset/LLM-IE flowchart.png" width=800 ></div>
|
|
23
|
+
<div align="center"><img src="asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
|
|
20
24
|
|
|
21
25
|
## Prerequisite
|
|
22
|
-
At least one LLM inference engine is required.
|
|
26
|
+
At least one LLM inference engine is required. There are built-in supports for 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), and <img src=asset/readme_img/openai-logomark.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
|
|
23
27
|
|
|
24
28
|
## Installation
|
|
25
29
|
The Python package is available on PyPI.
|
|
@@ -32,7 +36,7 @@ Note that this package does not check LLM inference engine installation nor inst
|
|
|
32
36
|
We use a [synthesized medical note](demo/document/synthesized_note.txt) by ChatGPT to demo the information extraction process. Our task is to extract diagnosis names, spans, and corresponding attributes (i.e., diagnosis datetime, status).
|
|
33
37
|
|
|
34
38
|
#### Choose an LLM inference engine
|
|
35
|
-
|
|
39
|
+
Choose one of the built-in engines below.
|
|
36
40
|
|
|
37
41
|
<details>
|
|
38
42
|
<summary><img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> Ollama</summary>
|
|
@@ -49,11 +53,35 @@ llm = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
|
49
53
|
```python
|
|
50
54
|
from llm_ie.engines import LlamaCppInferenceEngine
|
|
51
55
|
|
|
52
|
-
|
|
56
|
+
llm = LlamaCppInferenceEngine(repo_id="bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
53
57
|
gguf_filename="Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
|
|
54
58
|
```
|
|
55
59
|
</details>
|
|
56
60
|
|
|
61
|
+
<details>
|
|
62
|
+
<summary>🤗 Huggingface_hub</summary>
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
66
|
+
|
|
67
|
+
llm = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
68
|
+
```
|
|
69
|
+
</details>
|
|
70
|
+
|
|
71
|
+
<details>
|
|
72
|
+
<summary><img src=asset/readme_img/openai-logomark.png width=16 /> OpenAI API</summary>
|
|
73
|
+
|
|
74
|
+
Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
|
|
75
|
+
```python
|
|
76
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
77
|
+
|
|
78
|
+
llm = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
</details>
|
|
82
|
+
|
|
83
|
+
In this quick start demo, we use Llama-cpp-python to run Llama-3.1-8B with int8 quantization ([bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF)).
|
|
84
|
+
The outputs might be slightly different with other inference engines, LLMs, or quantization.
|
|
57
85
|
|
|
58
86
|
#### Casual language as prompt
|
|
59
87
|
We start with a casual description:
|
|
@@ -152,7 +180,7 @@ This package is comprised of some key classes:
|
|
|
152
180
|
- Extractors
|
|
153
181
|
|
|
154
182
|
### LLM Inference Engine
|
|
155
|
-
Provides an interface for different LLM inference engines to work in the information extraction workflow. The built-in engines are ```LlamaCppInferenceEngine``` and ```
|
|
183
|
+
Provides an interface for different LLM inference engines to work in the information extraction workflow. The built-in engines are ```LlamaCppInferenceEngine```, ```OllamaInferenceEngine```, and ```HuggingFaceHubInferenceEngine```.
|
|
156
184
|
|
|
157
185
|
#### 🦙 Llama-cpp-python
|
|
158
186
|
The ```repo_id``` and ```gguf_filename``` must match the ones on the Huggingface repo to ensure the correct model is loaded. ```n_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. Note that when ```n_ctx``` is less than the prompt length, Llama.cpp throws exceptions. ```n_gpu_layers``` indicates a number of model layers to offload to GPU. Default is -1 for all layers (entire LLM). Flash attention ```flash_attn``` is supported by Llama.cpp. The ```verbose``` indicates whether model information should be displayed. For more input parameters, see 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python).
|
|
@@ -178,6 +206,31 @@ ollama = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0",
|
|
|
178
206
|
keep_alive=300)
|
|
179
207
|
```
|
|
180
208
|
|
|
209
|
+
#### 🤗 huggingface_hub
|
|
210
|
+
The ```model``` can be a model id hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. Refer to the [Inference Client](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client) documentation for more details.
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
214
|
+
|
|
215
|
+
hf = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
#### <img src=asset/readme_img/openai-logomark.png width=16 /> OpenAI API
|
|
219
|
+
In bash, save API key to the environmental variable ```OPENAI_API_KEY```.
|
|
220
|
+
```
|
|
221
|
+
export OPENAI_API_KEY=<your_API_key>
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
In Python, create inference engine and specify model name. For the available models, refer to [OpenAI webpage](https://platform.openai.com/docs/models).
|
|
225
|
+
For more parameters, see [OpenAI API reference](https://platform.openai.com/docs/api-reference/introduction).
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
229
|
+
|
|
230
|
+
openai_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
|
|
181
234
|
#### Test inference engine configuration
|
|
182
235
|
To test the inference engine, use the ```chat()``` method.
|
|
183
236
|
|
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "llm-ie"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
description = "An LLM-powered tool that transforms everyday language into robust information extraction pipelines."
|
|
5
5
|
authors = ["Enshuo (David) Hsu"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
|
|
9
9
|
exclude = [
|
|
10
|
-
"test/**"
|
|
11
|
-
"*.png"
|
|
10
|
+
"test/**"
|
|
12
11
|
]
|
|
13
12
|
|
|
14
13
|
[tool.poetry.dependencies]
|
|
@@ -31,7 +31,6 @@ class InferenceEngine:
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class LlamaCppInferenceEngine(InferenceEngine):
|
|
34
|
-
from llama_cpp import Llama
|
|
35
34
|
def __init__(self, repo_id:str, gguf_filename:str, n_ctx:int=4096, n_gpu_layers:int=-1, **kwrs):
|
|
36
35
|
"""
|
|
37
36
|
The Llama.cpp inference engine.
|
|
@@ -48,13 +47,13 @@ class LlamaCppInferenceEngine(InferenceEngine):
|
|
|
48
47
|
n_gpu_layers : int, Optional
|
|
49
48
|
number of layers to offload to GPU. Default is all layers (-1).
|
|
50
49
|
"""
|
|
51
|
-
|
|
50
|
+
from llama_cpp import Llama
|
|
52
51
|
self.repo_id = repo_id
|
|
53
52
|
self.gguf_filename = gguf_filename
|
|
54
53
|
self.n_ctx = n_ctx
|
|
55
54
|
self.n_gpu_layers = n_gpu_layers
|
|
56
55
|
|
|
57
|
-
self.model =
|
|
56
|
+
self.model = Llama.from_pretrained(
|
|
58
57
|
repo_id=self.repo_id,
|
|
59
58
|
filename=self.gguf_filename,
|
|
60
59
|
n_gpu_layers=n_gpu_layers,
|
|
@@ -106,7 +105,6 @@ class LlamaCppInferenceEngine(InferenceEngine):
|
|
|
106
105
|
|
|
107
106
|
|
|
108
107
|
class OllamaInferenceEngine(InferenceEngine):
|
|
109
|
-
import ollama
|
|
110
108
|
def __init__(self, model_name:str, num_ctx:int=4096, keep_alive:int=300, **kwrs):
|
|
111
109
|
"""
|
|
112
110
|
The Ollama inference engine.
|
|
@@ -120,6 +118,8 @@ class OllamaInferenceEngine(InferenceEngine):
|
|
|
120
118
|
keep_alive : int, Optional
|
|
121
119
|
seconds to hold the LLM after the last API call.
|
|
122
120
|
"""
|
|
121
|
+
import ollama
|
|
122
|
+
self.ollama = ollama
|
|
123
123
|
self.model_name = model_name
|
|
124
124
|
self.num_ctx = num_ctx
|
|
125
125
|
self.keep_alive = keep_alive
|
|
@@ -155,12 +155,95 @@ class OllamaInferenceEngine(InferenceEngine):
|
|
|
155
155
|
return res
|
|
156
156
|
|
|
157
157
|
return response['message']['content']
|
|
158
|
-
|
|
159
|
-
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class HuggingFaceHubInferenceEngine(InferenceEngine):
|
|
161
|
+
def __init__(self, **kwrs):
|
|
160
162
|
"""
|
|
161
|
-
|
|
163
|
+
The Huggingface_hub InferenceClient inference engine.
|
|
164
|
+
For parameters and documentation, refer to https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client
|
|
165
|
+
"""
|
|
166
|
+
from huggingface_hub import InferenceClient
|
|
167
|
+
self.client = InferenceClient(**kwrs)
|
|
168
|
+
|
|
169
|
+
def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
|
|
162
170
|
"""
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
171
|
+
This method inputs chat messages and outputs LLM generated text.
|
|
172
|
+
|
|
173
|
+
Parameters:
|
|
174
|
+
----------
|
|
175
|
+
messages : List[Dict[str,str]]
|
|
176
|
+
a list of dict with role and content. role must be one of {"system", "user", "assistant"}
|
|
177
|
+
max_new_tokens : str, Optional
|
|
178
|
+
the max number of new tokens LLM can generate.
|
|
179
|
+
temperature : float, Optional
|
|
180
|
+
the temperature for token sampling.
|
|
181
|
+
stream : bool, Optional
|
|
182
|
+
if True, LLM generated text will be printed in terminal in real-time.
|
|
183
|
+
"""
|
|
184
|
+
response = self.client.chat.completions.create(
|
|
185
|
+
messages=messages,
|
|
186
|
+
max_tokens=max_new_tokens,
|
|
187
|
+
temperature=temperature,
|
|
188
|
+
stream=stream,
|
|
189
|
+
**kwrs
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if stream:
|
|
193
|
+
res = ''
|
|
194
|
+
for chunk in response:
|
|
195
|
+
res += chunk.choices[0].delta.content
|
|
196
|
+
print(chunk.choices[0].delta.content, end='', flush=True)
|
|
197
|
+
return res
|
|
198
|
+
|
|
199
|
+
return response.choices[0].message.content
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class OpenAIInferenceEngine(InferenceEngine):
|
|
203
|
+
def __init__(self, model:str, **kwrs):
|
|
204
|
+
"""
|
|
205
|
+
The OpenAI API inference engine.
|
|
206
|
+
For parameters and documentation, refer to https://platform.openai.com/docs/api-reference/introduction
|
|
207
|
+
|
|
208
|
+
Parameters:
|
|
209
|
+
----------
|
|
210
|
+
model_name : str
|
|
211
|
+
model name as described in https://platform.openai.com/docs/models
|
|
212
|
+
"""
|
|
213
|
+
from openai import OpenAI
|
|
214
|
+
self.client = OpenAI(**kwrs)
|
|
215
|
+
self.model = model
|
|
216
|
+
|
|
217
|
+
def chat(self, messages:List[Dict[str,str]], max_new_tokens:int=2048, temperature:float=0.0, stream:bool=False, **kwrs) -> str:
|
|
218
|
+
"""
|
|
219
|
+
This method inputs chat messages and outputs LLM generated text.
|
|
220
|
+
|
|
221
|
+
Parameters:
|
|
222
|
+
----------
|
|
223
|
+
messages : List[Dict[str,str]]
|
|
224
|
+
a list of dict with role and content. role must be one of {"system", "user", "assistant"}
|
|
225
|
+
max_new_tokens : str, Optional
|
|
226
|
+
the max number of new tokens LLM can generate.
|
|
227
|
+
temperature : float, Optional
|
|
228
|
+
the temperature for token sampling.
|
|
229
|
+
stream : bool, Optional
|
|
230
|
+
if True, LLM generated text will be printed in terminal in real-time.
|
|
231
|
+
"""
|
|
232
|
+
response = self.client.chat.completions.create(
|
|
233
|
+
model=self.model,
|
|
234
|
+
messages=messages,
|
|
235
|
+
max_tokens=max_new_tokens,
|
|
236
|
+
temperature=temperature,
|
|
237
|
+
stream=stream,
|
|
238
|
+
**kwrs
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
if stream:
|
|
242
|
+
res = ''
|
|
243
|
+
for chunk in response:
|
|
244
|
+
if chunk.choices[0].delta.content is not None:
|
|
245
|
+
res += chunk.choices[0].delta.content
|
|
246
|
+
print(chunk.choices[0].delta.content, end="")
|
|
247
|
+
return res
|
|
248
|
+
|
|
249
|
+
return response.choices[0].delta.content
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|