llm-ie 0.3.4__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_ie-0.3.4 → llm_ie-0.4.0}/PKG-INFO +342 -103
- {llm_ie-0.3.4 → llm_ie-0.4.0}/README.md +341 -103
- {llm_ie-0.3.4 → llm_ie-0.4.0}/pyproject.toml +2 -1
- llm_ie-0.4.0/src/llm_ie/__init__.py +9 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/engines.py +151 -9
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/extractors.py +552 -152
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/prompt_editor.py +17 -2
- llm_ie-0.3.4/src/llm_ie/__init__.py +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/PromptEditor_prompts/chat.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/PromptEditor_prompts/comment.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/PromptEditor_prompts/rewrite.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/PromptEditor_prompts/system.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.3.4 → llm_ie-0.4.0}/src/llm_ie/data_types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llm-ie
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Enshuo (David) Hsu
|
|
@@ -10,6 +10,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
12
|
Requires-Dist: colorama (>=0.4.6,<0.5.0)
|
|
13
|
+
Requires-Dist: json_repair (>=0.30,<0.31)
|
|
13
14
|
Requires-Dist: nltk (>=3.8,<4.0)
|
|
14
15
|
Description-Content-Type: text/markdown
|
|
15
16
|
|
|
@@ -23,10 +24,20 @@ An LLM-powered tool that transforms everyday language into robust information ex
|
|
|
23
24
|
|
|
24
25
|
| Features | Support |
|
|
25
26
|
|----------|----------|
|
|
26
|
-
| **LLM Agent for prompt writing** | :white_check_mark:
|
|
27
|
+
| **LLM Agent for prompt writing** | :white_check_mark: Interactive chat, Python functions |
|
|
27
28
|
| **Named Entity Recognition (NER)** | :white_check_mark: Document-level, Sentence-level |
|
|
28
29
|
| **Entity Attributes Extraction** | :white_check_mark: Flexible formats |
|
|
29
30
|
| **Relation Extraction (RE)** | :white_check_mark: Binary & Multiclass relations |
|
|
31
|
+
| **Visualization** | :white_check_mark: Built-in entity & relation visualization |
|
|
32
|
+
|
|
33
|
+
## Recent Updates
|
|
34
|
+
- [v0.3.0](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.0) (Oct 17, 2024): Interactive chat to Prompt editor LLM agent.
|
|
35
|
+
- [v0.3.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.1) (Oct 26, 2024): Added Sentence Review Frame Extractor and Sentence CoT Frame Extractor
|
|
36
|
+
- [v0.3.4](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.4) (Nov 24, 2024): Added entity fuzzy search.
|
|
37
|
+
- [v0.3.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.5) (Nov 27, 2024): Adopted `json_repair` to fix broken JSON from LLM outputs.
|
|
38
|
+
- v0.4.0:
|
|
39
|
+
- Concurrent LLM inferencing to speed up frame and relation extraction.
|
|
40
|
+
- Support for LiteLLM.
|
|
30
41
|
|
|
31
42
|
## Table of Contents
|
|
32
43
|
- [Overview](#overview)
|
|
@@ -37,10 +48,13 @@ An LLM-powered tool that transforms everyday language into robust information ex
|
|
|
37
48
|
- [User Guide](#user-guide)
|
|
38
49
|
- [LLM Inference Engine](#llm-inference-engine)
|
|
39
50
|
- [Prompt Template](#prompt-template)
|
|
40
|
-
- [Prompt Editor](#prompt-editor)
|
|
51
|
+
- [Prompt Editor LLM Agent](#prompt-editor-llm-agent)
|
|
41
52
|
- [Extractor](#extractor)
|
|
42
53
|
- [FrameExtractor](#frameextractor)
|
|
43
54
|
- [RelationExtractor](#relationextractor)
|
|
55
|
+
- [Visualization](#visualization)
|
|
56
|
+
- [Benchmarks](#benchmarks)
|
|
57
|
+
- [Citation](#citation)
|
|
44
58
|
|
|
45
59
|
## Overview
|
|
46
60
|
LLM-IE is a toolkit that provides robust information extraction utilities for named entity, entity attributes, and entity relation extraction. Since prompt design has a significant impact on generative information extraction with LLMs, it has a built-in LLM agent ("editor") to help with prompt writing. The flowchart below demonstrates the workflow starting from a casual language request to output visualization.
|
|
@@ -48,7 +62,7 @@ LLM-IE is a toolkit that provides robust information extraction utilities for na
|
|
|
48
62
|
<div align="center"><img src="doc_asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
|
|
49
63
|
|
|
50
64
|
## Prerequisite
|
|
51
|
-
At least one LLM inference engine is required. There are built-in supports for 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="
|
|
65
|
+
At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
|
|
52
66
|
|
|
53
67
|
## Installation
|
|
54
68
|
The Python package is available on PyPI.
|
|
@@ -64,22 +78,23 @@ We use a [synthesized medical note](demo/document/synthesized_note.txt) by ChatG
|
|
|
64
78
|
Choose one of the built-in engines below.
|
|
65
79
|
|
|
66
80
|
<details>
|
|
67
|
-
<summary
|
|
81
|
+
<summary>🚅 LiteLLM</summary>
|
|
68
82
|
|
|
69
|
-
```python
|
|
70
|
-
from llm_ie.engines import
|
|
83
|
+
```python
|
|
84
|
+
from llm_ie.engines import LiteLLMInferenceEngine
|
|
71
85
|
|
|
72
|
-
|
|
86
|
+
inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.3-70B-Instruct", base_url="http://localhost:8000/v1", api_key="EMPTY")
|
|
73
87
|
```
|
|
74
88
|
</details>
|
|
89
|
+
|
|
75
90
|
<details>
|
|
76
|
-
<summary
|
|
91
|
+
<summary><img src=doc_asset/readme_img/openai-logomark.png width=16 /> OpenAI API</summary>
|
|
77
92
|
|
|
93
|
+
Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
|
|
78
94
|
```python
|
|
79
|
-
from llm_ie.engines import
|
|
95
|
+
from llm_ie.engines import OpenAIInferenceEngine
|
|
80
96
|
|
|
81
|
-
|
|
82
|
-
gguf_filename="Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
|
|
97
|
+
inference_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
83
98
|
```
|
|
84
99
|
</details>
|
|
85
100
|
|
|
@@ -89,24 +104,22 @@ llm = LlamaCppInferenceEngine(repo_id="bullerwins/Meta-Llama-3.1-8B-Instruct-GGU
|
|
|
89
104
|
```python
|
|
90
105
|
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
91
106
|
|
|
92
|
-
|
|
107
|
+
inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
93
108
|
```
|
|
94
109
|
</details>
|
|
95
110
|
|
|
96
111
|
<details>
|
|
97
|
-
<summary><img src=doc_asset/readme_img/
|
|
112
|
+
<summary><img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> Ollama</summary>
|
|
98
113
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
from llm_ie.engines import OpenAIInferenceEngine
|
|
114
|
+
```python
|
|
115
|
+
from llm_ie.engines import OllamaInferenceEngine
|
|
102
116
|
|
|
103
|
-
|
|
117
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
104
118
|
```
|
|
105
|
-
|
|
106
119
|
</details>
|
|
107
120
|
|
|
108
121
|
<details>
|
|
109
|
-
<summary><img src=doc_asset/readme_img/vllm-
|
|
122
|
+
<summary><img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> vLLM</summary>
|
|
110
123
|
|
|
111
124
|
The vLLM support follows the [OpenAI Compatible Server](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html). For more parameters, please refer to the documentation.
|
|
112
125
|
|
|
@@ -117,15 +130,24 @@ vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
|
117
130
|
Define inference engine
|
|
118
131
|
```python
|
|
119
132
|
from llm_ie.engines import OpenAIInferenceEngine
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
133
|
+
inference_engine = OpenAIInferenceEngine(base_url="http://localhost:8000/v1",
|
|
134
|
+
api_key="EMPTY",
|
|
135
|
+
model="meta-llama/Meta-Llama-3.1-8B-Instruct")
|
|
123
136
|
```
|
|
137
|
+
</details>
|
|
138
|
+
|
|
139
|
+
<details>
|
|
140
|
+
<summary>🦙 Llama-cpp-python</summary>
|
|
124
141
|
|
|
142
|
+
```python
|
|
143
|
+
from llm_ie.engines import LlamaCppInferenceEngine
|
|
125
144
|
|
|
145
|
+
inference_engine = LlamaCppInferenceEngine(repo_id="bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
146
|
+
gguf_filename="Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
|
|
147
|
+
```
|
|
126
148
|
</details>
|
|
127
149
|
|
|
128
|
-
In this quick start demo, we use
|
|
150
|
+
In this quick start demo, we use Ollama to run Llama-3.1-8B with int8 quantization.
|
|
129
151
|
The outputs might be slightly different with other inference engines, LLMs, or quantization.
|
|
130
152
|
|
|
131
153
|
#### Casual language as prompt
|
|
@@ -135,14 +157,12 @@ We start with a casual description:
|
|
|
135
157
|
|
|
136
158
|
Define the AI prompt editor.
|
|
137
159
|
```python
|
|
138
|
-
from llm_ie
|
|
139
|
-
from llm_ie.extractors import BasicFrameExtractor
|
|
140
|
-
from llm_ie.prompt_editor import PromptEditor
|
|
160
|
+
from llm_ie import OllamaInferenceEngine, PromptEditor, BasicFrameExtractor
|
|
141
161
|
|
|
142
162
|
# Define a LLM inference engine
|
|
143
|
-
|
|
163
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
144
164
|
# Define LLM prompt editor
|
|
145
|
-
editor = PromptEditor(
|
|
165
|
+
editor = PromptEditor(inference_engine, BasicFrameExtractor)
|
|
146
166
|
# Start chat
|
|
147
167
|
editor.chat()
|
|
148
168
|
```
|
|
@@ -189,7 +209,7 @@ with open("./demo/document/synthesized_note.txt", 'r') as f:
|
|
|
189
209
|
note_text = f.read()
|
|
190
210
|
|
|
191
211
|
# Define extractor
|
|
192
|
-
extractor = BasicFrameExtractor(
|
|
212
|
+
extractor = BasicFrameExtractor(inference_engine, prompt_template)
|
|
193
213
|
|
|
194
214
|
# Extract
|
|
195
215
|
frames = extractor.extract_frames(note_text, entity_key="Diagnosis", stream=True)
|
|
@@ -227,7 +247,7 @@ To visualize the extracted frames, we use the ```viz_serve()``` method.
|
|
|
227
247
|
```python
|
|
228
248
|
doc.viz_serve()
|
|
229
249
|
```
|
|
230
|
-
A Flask
|
|
250
|
+
A Flask App starts at port 5000 (default).
|
|
231
251
|
```
|
|
232
252
|
* Serving Flask app 'ie_viz.utilities'
|
|
233
253
|
* Debug mode: off
|
|
@@ -254,39 +274,28 @@ This package is comprised of some key classes:
|
|
|
254
274
|
- Extractors
|
|
255
275
|
|
|
256
276
|
### LLM Inference Engine
|
|
257
|
-
Provides an interface for different LLM inference engines to work in the information extraction workflow. The built-in engines are
|
|
277
|
+
Provides an interface for different LLM inference engines to work in the information extraction workflow. The built-in engines are `LiteLLMInferenceEngine`, `OpenAIInferenceEngine`, `HuggingFaceHubInferenceEngine`, `OllamaInferenceEngine`, and `LlamaCppInferenceEngine`.
|
|
258
278
|
|
|
259
|
-
####
|
|
260
|
-
The
|
|
279
|
+
#### 🚅 LiteLLM
|
|
280
|
+
The LiteLLM is an adaptor project that unifies many proprietary and open-source LLM APIs. Popular inferncing servers, including OpenAI, Huggingface Hub, and Ollama are supported via its interface. For more details, refer to [LiteLLM GitHub page](https://github.com/BerriAI/litellm).
|
|
261
281
|
|
|
282
|
+
To use LiteLLM with LLM-IE, import the `LiteLLMInferenceEngine` and follow the required model naming.
|
|
262
283
|
```python
|
|
263
|
-
from llm_ie.engines import
|
|
284
|
+
from llm_ie.engines import LiteLLMInferenceEngine
|
|
264
285
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
n_gpu_layers=-1,
|
|
269
|
-
flash_attn=True,
|
|
270
|
-
verbose=False)
|
|
271
|
-
```
|
|
272
|
-
#### <img src="https://avatars.githubusercontent.com/u/151674099?s=48&v=4" alt="Icon" width="20"/> Ollama
|
|
273
|
-
The ```model_name``` must match the names on the [Ollama library](https://ollama.com/library). Use the command line ```ollama ls``` to check your local model list. ```num_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. ```keep_alive``` regulates the lifespan of LLM. It indicates a number of seconds to hold the LLM after the last API call. Default is 5 minutes (300 sec).
|
|
286
|
+
# Huggingface serverless inferencing
|
|
287
|
+
os.environ['HF_TOKEN']
|
|
288
|
+
inference_engine = LiteLLMInferenceEngine(model="huggingface/meta-llama/Meta-Llama-3-8B-Instruct")
|
|
274
289
|
|
|
275
|
-
|
|
276
|
-
|
|
290
|
+
# OpenAI GPT models
|
|
291
|
+
os.environ['OPENAI_API_KEY']
|
|
292
|
+
inference_engine = LiteLLMInferenceEngine(model="openai/gpt-4o-mini")
|
|
277
293
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
keep_alive=300)
|
|
281
|
-
```
|
|
294
|
+
# OpenAI compatible local server
|
|
295
|
+
inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.1-8B-Instruct", base_url="http://localhost:8000/v1", api_key="EMPTY")
|
|
282
296
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
```python
|
|
287
|
-
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
288
|
-
|
|
289
|
-
hf = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
297
|
+
# Ollama
|
|
298
|
+
inference_engine = LiteLLMInferenceEngine(model="ollama/llama3.1:8b-instruct-q8_0")
|
|
290
299
|
```
|
|
291
300
|
|
|
292
301
|
#### <img src=doc_asset/readme_img/openai-logomark.png width=16 /> OpenAI API
|
|
@@ -301,10 +310,28 @@ For more parameters, see [OpenAI API reference](https://platform.openai.com/docs
|
|
|
301
310
|
```python
|
|
302
311
|
from llm_ie.engines import OpenAIInferenceEngine
|
|
303
312
|
|
|
304
|
-
|
|
313
|
+
inference_engine = OpenAIInferenceEngine(model="gpt-4o-mini")
|
|
305
314
|
```
|
|
306
315
|
|
|
307
|
-
####
|
|
316
|
+
#### 🤗 huggingface_hub
|
|
317
|
+
The ```model``` can be a model id hosted on the Hugging Face Hub or a URL to a deployed Inference Endpoint. Refer to the [Inference Client](https://huggingface.co/docs/huggingface_hub/en/package_reference/inference_client) documentation for more details.
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
321
|
+
|
|
322
|
+
inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
#### <img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> Ollama
|
|
326
|
+
The ```model_name``` must match the names on the [Ollama library](https://ollama.com/library). Use the command line ```ollama ls``` to check your local model list. ```num_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. ```keep_alive``` regulates the lifespan of LLM. It indicates a number of seconds to hold the LLM after the last API call. Default is 5 minutes (300 sec).
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
from llm_ie.engines import OllamaInferenceEngine
|
|
330
|
+
|
|
331
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0", num_ctx=4096, keep_alive=300)
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
#### <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> vLLM
|
|
308
335
|
The vLLM support follows the [OpenAI Compatible Server](https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html). For more parameters, please refer to the documentation.
|
|
309
336
|
|
|
310
337
|
Start the server
|
|
@@ -317,20 +344,34 @@ the default port is 8000. ```--port``` sets the port.
|
|
|
317
344
|
Define inference engine
|
|
318
345
|
```python
|
|
319
346
|
from llm_ie.engines import OpenAIInferenceEngine
|
|
320
|
-
|
|
347
|
+
inference_engine = OpenAIInferenceEngine(base_url="http://localhost:8000/v1",
|
|
321
348
|
api_key="MY_API_KEY",
|
|
322
349
|
model="meta-llama/Meta-Llama-3.1-8B-Instruct")
|
|
323
350
|
```
|
|
324
351
|
The ```model``` must match the repo name specified in the server.
|
|
325
352
|
|
|
353
|
+
#### 🦙 Llama-cpp-python
|
|
354
|
+
The ```repo_id``` and ```gguf_filename``` must match the ones on the Huggingface repo to ensure the correct model is loaded. ```n_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. Note that when ```n_ctx``` is less than the prompt length, Llama.cpp throws exceptions. ```n_gpu_layers``` indicates a number of model layers to offload to GPU. Default is -1 for all layers (entire LLM). Flash attention ```flash_attn``` is supported by Llama.cpp. The ```verbose``` indicates whether model information should be displayed. For more input parameters, see 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python).
|
|
355
|
+
|
|
356
|
+
```python
|
|
357
|
+
from llm_ie.engines import LlamaCppInferenceEngine
|
|
358
|
+
|
|
359
|
+
inference_engine = LlamaCppInferenceEngine(repo_id="bullerwins/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
360
|
+
gguf_filename="Meta-Llama-3.1-8B-Instruct-Q8_0.gguf",
|
|
361
|
+
n_ctx=4096,
|
|
362
|
+
n_gpu_layers=-1,
|
|
363
|
+
flash_attn=True,
|
|
364
|
+
verbose=False)
|
|
365
|
+
```
|
|
366
|
+
|
|
326
367
|
#### Test inference engine configuration
|
|
327
368
|
To test the inference engine, use the ```chat()``` method.
|
|
328
369
|
|
|
329
370
|
```python
|
|
330
371
|
from llm_ie.engines import OllamaInferenceEngine
|
|
331
372
|
|
|
332
|
-
|
|
333
|
-
|
|
373
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
374
|
+
inference_engine.chat(messages=[{"role": "user", "content":"Hi"}], stream=True)
|
|
334
375
|
```
|
|
335
376
|
The output should be something like (might vary by LLMs and versions)
|
|
336
377
|
|
|
@@ -448,8 +489,8 @@ prompt_template = """
|
|
|
448
489
|
Below is the medical note:
|
|
449
490
|
"{{note}}"
|
|
450
491
|
"""
|
|
451
|
-
|
|
452
|
-
extractor = BasicFrameExtractor(
|
|
492
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
493
|
+
extractor = BasicFrameExtractor(inference_engine, prompt_template)
|
|
453
494
|
prompt_text = extractor._get_user_prompt(text_content={"knowledge": "<some text...>",
|
|
454
495
|
"note": "<some text...>")
|
|
455
496
|
print(prompt_text)
|
|
@@ -467,7 +508,7 @@ from llm_ie.extractors import BasicFrameExtractor
|
|
|
467
508
|
print(BasicFrameExtractor.get_prompt_guide())
|
|
468
509
|
```
|
|
469
510
|
|
|
470
|
-
### Prompt Editor
|
|
511
|
+
### Prompt Editor LLM Agent
|
|
471
512
|
The prompt editor is an LLM agent that help users write prompt templates following the defined schema and guideline of each extractor. Chat with the promtp editor:
|
|
472
513
|
|
|
473
514
|
```python
|
|
@@ -476,10 +517,10 @@ from llm_ie.extractors import BasicFrameExtractor
|
|
|
476
517
|
from llm_ie.engines import OllamaInferenceEngine
|
|
477
518
|
|
|
478
519
|
# Define an LLM inference engine
|
|
479
|
-
|
|
520
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
480
521
|
|
|
481
522
|
# Define editor
|
|
482
|
-
editor = PromptEditor(
|
|
523
|
+
editor = PromptEditor(inference_engine, BasicFrameExtractor)
|
|
483
524
|
|
|
484
525
|
editor.chat()
|
|
485
526
|
```
|
|
@@ -503,10 +544,10 @@ from llm_ie.extractors import BasicFrameExtractor
|
|
|
503
544
|
from llm_ie.engines import OllamaInferenceEngine
|
|
504
545
|
|
|
505
546
|
# Define an LLM inference engine
|
|
506
|
-
|
|
547
|
+
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
507
548
|
|
|
508
549
|
# Define editor
|
|
509
|
-
editor = PromptEditor(
|
|
550
|
+
editor = PromptEditor(inference_engine, BasicFrameExtractor)
|
|
510
551
|
|
|
511
552
|
# Have editor to generate initial prompt template
|
|
512
553
|
initial_version = editor.rewrite("Extract treatment events from the discharge summary.")
|
|
@@ -611,10 +652,12 @@ After a few iterations of revision, we will have a high-quality prompt template
|
|
|
611
652
|
|
|
612
653
|
### Extractor
|
|
613
654
|
An extractor implements a prompting method for information extraction. There are two extractor families: ```FrameExtractor``` and ```RelationExtractor```.
|
|
614
|
-
The ```FrameExtractor``` extracts named entities
|
|
655
|
+
The ```FrameExtractor``` extracts named entities with attributes ("frames"). The ```RelationExtractor``` extracts the relations (and relation types) between frames.
|
|
615
656
|
|
|
616
657
|
#### FrameExtractor
|
|
617
|
-
The ```BasicFrameExtractor``` directly prompts LLM to generate a list of dictionaries. Each dictionary is then post-processed into a frame. The ```ReviewFrameExtractor``` is based on the ```BasicFrameExtractor``` but adds a review step after the initial extraction to boost sensitivity and improve performance. ```SentenceFrameExtractor``` gives LLM the entire document upfront as a reference, then prompts LLM sentence by sentence and collects per-sentence outputs. To learn about an extractor, use the class method ```get_prompt_guide()``` to print out the prompt guide.
|
|
658
|
+
The ```BasicFrameExtractor``` directly prompts LLM to generate a list of dictionaries. Each dictionary is then post-processed into a frame. The ```ReviewFrameExtractor``` is based on the ```BasicFrameExtractor``` but adds a review step after the initial extraction to boost sensitivity and improve performance. ```SentenceFrameExtractor``` gives LLM the entire document upfront as a reference, then prompts LLM sentence by sentence and collects per-sentence outputs. ```SentenceReviewFrameExtractor``` is the combined version of ```ReviewFrameExtractor``` and ```SentenceFrameExtractor``` which each sentence is extracted and reviewed. The ```SentenceCoTFrameExtractor``` implements chain of thoughts (CoT). It first analyzes a sentence, then extract frames based on the CoT. To learn about an extractor, use the class method ```get_prompt_guide()``` to print out the prompt guide.
|
|
659
|
+
|
|
660
|
+
Since the output entity text from LLMs might not be consistent with the original text due to the limitations of LLMs, we apply fuzzy search in post-processing to find the accurate entity span. In the `FrameExtractor.extract_frames()` method, setting parameter `fuzzy_match=True` applies Jaccard similarity matching.
|
|
618
661
|
|
|
619
662
|
<details>
|
|
620
663
|
<summary>BasicFrameExtractor</summary>
|
|
@@ -624,8 +667,8 @@ The ```BasicFrameExtractor``` directly prompts LLM to generate a list of diction
|
|
|
624
667
|
```python
|
|
625
668
|
from llm_ie.extractors import BasicFrameExtractor
|
|
626
669
|
|
|
627
|
-
extractor = BasicFrameExtractor(
|
|
628
|
-
frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", stream=True)
|
|
670
|
+
extractor = BasicFrameExtractor(inference_engine, prompt_temp)
|
|
671
|
+
frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", case_sensitive=False, fuzzy_match=True, stream=True)
|
|
629
672
|
```
|
|
630
673
|
|
|
631
674
|
Use the ```get_prompt_guide()``` method to inspect the prompt template guideline for ```BasicFrameExtractor```.
|
|
@@ -687,7 +730,7 @@ The ```review_mode``` should be set to ```review_mode="revision"```
|
|
|
687
730
|
```python
|
|
688
731
|
review_prompt = "Review the input and your output again. If you find some diagnosis was missed, add them to your output. Regenerate your output."
|
|
689
732
|
|
|
690
|
-
extractor = ReviewFrameExtractor(
|
|
733
|
+
extractor = ReviewFrameExtractor(inference_engine, prompt_temp, review_prompt, review_mode="revision")
|
|
691
734
|
frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", stream=True)
|
|
692
735
|
```
|
|
693
736
|
</details>
|
|
@@ -697,14 +740,95 @@ frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", str
|
|
|
697
740
|
|
|
698
741
|
The ```SentenceFrameExtractor``` instructs the LLM to extract sentence by sentence. The reason is to ensure the accuracy of frame spans. It also prevents LLMs from overseeing sections/ sentences. Empirically, this extractor results in better recall than the ```BasicFrameExtractor``` in complex tasks.
|
|
699
742
|
|
|
743
|
+
For concurrent extraction (recommended), the `async/ await` feature is used to speed up inferencing. The `concurrent_batch_size` sets the batch size of sentences to be processed in cocurrent.
|
|
744
|
+
|
|
745
|
+
```python
|
|
746
|
+
from llm_ie.extractors import SentenceFrameExtractor
|
|
747
|
+
|
|
748
|
+
extractor = SentenceFrameExtractor(inference_engine, prompt_temp)
|
|
749
|
+
frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", case_sensitive=False, fuzzy_match=True, concurrent=True, concurrent_batch_size=32)
|
|
750
|
+
```
|
|
751
|
+
|
|
700
752
|
The ```multi_turn``` parameter specifies multi-turn conversation for prompting. If True, sentences and LLM outputs will be appended to the input message and carry-over. If False, only the current sentence is prompted. For LLM inference engines that supports prompt cache (e.g., Llama.Cpp, Ollama), use multi-turn conversation prompting can better utilize the KV caching and results in faster inferencing. But for vLLM with [Automatic Prefix Caching (APC)](https://docs.vllm.ai/en/latest/automatic_prefix_caching/apc.html), multi-turn conversation is not necessary.
|
|
701
753
|
|
|
702
754
|
```python
|
|
703
755
|
from llm_ie.extractors import SentenceFrameExtractor
|
|
704
756
|
|
|
705
|
-
extractor = SentenceFrameExtractor(
|
|
706
|
-
frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", multi_turn=True, stream=True)
|
|
757
|
+
extractor = SentenceFrameExtractor(inference_engine, prompt_temp)
|
|
758
|
+
frames = extractor.extract_frames(text_content=text, entity_key="Diagnosis", multi_turn=False, case_sensitive=False, fuzzy_match=True, stream=True)
|
|
759
|
+
```
|
|
760
|
+
|
|
761
|
+
</details>
|
|
762
|
+
|
|
763
|
+
<details>
|
|
764
|
+
<summary>SentenceReviewFrameExtractor</summary>
|
|
765
|
+
|
|
766
|
+
The `SentenceReviewFrameExtractor` performs sentence-level extraction and review.
|
|
767
|
+
|
|
768
|
+
```python
|
|
769
|
+
from llm_ie.extractors import SentenceReviewFrameExtractor
|
|
770
|
+
|
|
771
|
+
extractor = SentenceReviewFrameExtractor(inference_engine, prompt_temp, review_mode="revision")
|
|
772
|
+
frames = extractor.extract_frames(text_content=note_text, entity_key="Diagnosis", stream=True)
|
|
773
|
+
```
|
|
774
|
+
|
|
775
|
+
```
|
|
776
|
+
Sentence:
|
|
777
|
+
#### History of Present Illness
|
|
778
|
+
The patient reported that the chest pain started two days prior to admission.
|
|
779
|
+
|
|
780
|
+
Initial Output:
|
|
781
|
+
[
|
|
782
|
+
{"Diagnosis": "chest pain", "Date": "two days prior to admission", "Status": "reported"}
|
|
783
|
+
]
|
|
784
|
+
Review:
|
|
785
|
+
[
|
|
786
|
+
{"Diagnosis": "admission", "Date": null, "Status": null}
|
|
787
|
+
]
|
|
788
|
+
```
|
|
789
|
+
|
|
790
|
+
</details>
|
|
791
|
+
|
|
792
|
+
<details>
|
|
793
|
+
<summary>SentenceCoTFrameExtractor</summary>
|
|
794
|
+
|
|
795
|
+
The `SentenceCoTFrameExtractor` processes document sentence-by-sentence. For each sentence, it first generate an analysis paragraph in `<Analysis>... </Analysis>`(chain-of-thought). Then output extraction in JSON in `<Outputs>... </Outputs>`, similar to `SentenceFrameExtractor`.
|
|
796
|
+
|
|
797
|
+
```python
|
|
798
|
+
from llm_ie.extractors import SentenceCoTFrameExtractor
|
|
799
|
+
|
|
800
|
+
extractor = SentenceCoTFrameExtractor(inference_engine, CoT_prompt_temp)
|
|
801
|
+
frames = extractor.extract_frames(text_content=note_text, entity_key="Diagnosis", stream=True)
|
|
802
|
+
```
|
|
803
|
+
|
|
804
|
+
```
|
|
805
|
+
Sentence:
|
|
806
|
+
#### Discharge Medications
|
|
807
|
+
- Aspirin 81 mg daily
|
|
808
|
+
- Clopidogrel 75 mg daily
|
|
809
|
+
- Atorvastatin 40 mg daily
|
|
810
|
+
- Metoprolol 50 mg twice daily
|
|
811
|
+
- Lisinopril 20 mg daily
|
|
812
|
+
- Metformin 1000 mg twice daily
|
|
813
|
+
|
|
814
|
+
#### Discharge Instructions
|
|
815
|
+
John Doe was advised to follow a heart-healthy diet, engage in regular physical activity, and monitor his blood glucose levels.
|
|
816
|
+
|
|
817
|
+
CoT:
|
|
818
|
+
<Analysis>
|
|
819
|
+
The given text does not explicitly mention a diagnosis, but rather lists the discharge medications and instructions for the patient. However, we can infer that the patient has been diagnosed with conditions that require these medications, such as high blood pressure, high cholesterol, and diabetes.
|
|
820
|
+
|
|
821
|
+
</Analysis>
|
|
822
|
+
|
|
823
|
+
<Outputs>
|
|
824
|
+
[
|
|
825
|
+
{"Diagnosis": "hypertension", "Date": null, "Status": "confirmed"},
|
|
826
|
+
{"Diagnosis": "hyperlipidemia", "Date": null, "Status": "confirmed"},
|
|
827
|
+
{"Diagnosis": "Type 2 diabetes mellitus", "Date": null, "Status": "confirmed"}
|
|
828
|
+
]
|
|
829
|
+
</Outputs>
|
|
707
830
|
```
|
|
831
|
+
|
|
708
832
|
</details>
|
|
709
833
|
|
|
710
834
|
#### RelationExtractor
|
|
@@ -724,12 +848,32 @@ print(BinaryRelationExtractor.get_prompt_guide())
|
|
|
724
848
|
```
|
|
725
849
|
|
|
726
850
|
```
|
|
727
|
-
Prompt
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
851
|
+
Prompt Template Design:
|
|
852
|
+
|
|
853
|
+
1. Task description:
|
|
854
|
+
Provide a detailed description of the task, including the background and the type of task (e.g., binary relation extraction). Mention the region of interest (ROI) text.
|
|
855
|
+
2. Schema definition:
|
|
856
|
+
List the criterion for relation (True) and for no relation (False).
|
|
857
|
+
|
|
858
|
+
3. Output format definition:
|
|
859
|
+
The ouptut must be a dictionary with a key "Relation" (i.e., {"Relation": "<True or False>"}).
|
|
860
|
+
|
|
861
|
+
4. (optional) Hints:
|
|
862
|
+
Provide itemized hints for the information extractors to guide the extraction process.
|
|
863
|
+
|
|
864
|
+
5. (optional) Examples:
|
|
865
|
+
Include examples in the format:
|
|
866
|
+
Input: ...
|
|
867
|
+
Output: ...
|
|
868
|
+
|
|
869
|
+
6. Entity 1 full information:
|
|
870
|
+
Include a placeholder in the format {{<frame_1>}}
|
|
871
|
+
|
|
872
|
+
7. Entity 2 full information:
|
|
873
|
+
Include a placeholder in the format {{<frame_2>}}
|
|
874
|
+
|
|
875
|
+
8. Input placeholders:
|
|
876
|
+
The template must include a placeholder "{{roi_text}}" for the ROI text.
|
|
733
877
|
|
|
734
878
|
|
|
735
879
|
Example:
|
|
@@ -753,15 +897,15 @@ Example:
|
|
|
753
897
|
3. If the strength or frequency is for another medication, output False.
|
|
754
898
|
4. If the strength or frequency is for the same medication but at a different location (span), output False.
|
|
755
899
|
|
|
756
|
-
#
|
|
757
|
-
ROI Text with the two entities annotated with <entity_1> and <entity_2>:
|
|
758
|
-
"{{roi_text}}"
|
|
759
|
-
|
|
760
|
-
Entity 1 full information:
|
|
900
|
+
# Entity 1 full information:
|
|
761
901
|
{{frame_1}}
|
|
762
902
|
|
|
763
|
-
Entity 2 full information:
|
|
903
|
+
# Entity 2 full information:
|
|
764
904
|
{{frame_2}}
|
|
905
|
+
|
|
906
|
+
# Input placeholders
|
|
907
|
+
ROI Text with the two entities annotated with <entity_1> and <entity_2>:
|
|
908
|
+
"{{roi_text}}"
|
|
765
909
|
```
|
|
766
910
|
|
|
767
911
|
As an example, we define the ```possible_relation_func``` function:
|
|
@@ -796,8 +940,12 @@ In the ```BinaryRelationExtractor``` constructor, we pass in the prompt template
|
|
|
796
940
|
```python
|
|
797
941
|
from llm_ie.extractors import BinaryRelationExtractor
|
|
798
942
|
|
|
799
|
-
extractor = BinaryRelationExtractor(
|
|
800
|
-
relations
|
|
943
|
+
extractor = BinaryRelationExtractor(inference_engine, prompt_template=prompt_template, possible_relation_func=possible_relation_func)
|
|
944
|
+
# Extract binary relations with concurrent mode (faster)
|
|
945
|
+
relations = extractor.extract_relations(doc, concurrent=True)
|
|
946
|
+
|
|
947
|
+
# To print out the step-by-step, use the `concurrent=False` and `stream=True` options
|
|
948
|
+
relations = extractor.extract_relations(doc, concurrent=False, stream=True)
|
|
801
949
|
```
|
|
802
950
|
|
|
803
951
|
</details>
|
|
@@ -813,11 +961,34 @@ print(MultiClassRelationExtractor.get_prompt_guide())
|
|
|
813
961
|
```
|
|
814
962
|
|
|
815
963
|
```
|
|
816
|
-
Prompt
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
964
|
+
Prompt Template Design:
|
|
965
|
+
|
|
966
|
+
1. Task description:
|
|
967
|
+
Provide a detailed description of the task, including the background and the type of task (e.g., binary relation extraction). Mention the region of interest (ROI) text.
|
|
968
|
+
2. Schema definition:
|
|
969
|
+
List the criterion for relation (True) and for no relation (False).
|
|
970
|
+
|
|
971
|
+
3. Output format definition:
|
|
972
|
+
This section must include a placeholder "{{pos_rel_types}}" for the possible relation types.
|
|
973
|
+
The ouptut must be a dictionary with a key "RelationType" (i.e., {"RelationType": "<relation type or No Relation>"}).
|
|
974
|
+
|
|
975
|
+
4. (optional) Hints:
|
|
976
|
+
Provide itemized hints for the information extractors to guide the extraction process.
|
|
977
|
+
|
|
978
|
+
5. (optional) Examples:
|
|
979
|
+
Include examples in the format:
|
|
980
|
+
Input: ...
|
|
981
|
+
Output: ...
|
|
982
|
+
|
|
983
|
+
6. Entity 1 full information:
|
|
984
|
+
Include a placeholder in the format {{<frame_1>}}
|
|
985
|
+
|
|
986
|
+
7. Entity 2 full information:
|
|
987
|
+
Include a placeholder in the format {{<frame_2>}}
|
|
988
|
+
|
|
989
|
+
8. Input placeholders:
|
|
990
|
+
The template must include a placeholder "{{roi_text}}" for the ROI text.
|
|
991
|
+
|
|
821
992
|
|
|
822
993
|
|
|
823
994
|
Example:
|
|
@@ -850,15 +1021,15 @@ Example:
|
|
|
850
1021
|
3. If the strength or frequency is for another medication, output "No Relation".
|
|
851
1022
|
4. If the strength or frequency is for the same medication but at a different location (span), output "No Relation".
|
|
852
1023
|
|
|
853
|
-
#
|
|
854
|
-
ROI Text with the two entities annotated with <entity_1> and <entity_2>:
|
|
855
|
-
"{{roi_text}}"
|
|
856
|
-
|
|
857
|
-
Entity 1 full information:
|
|
1024
|
+
# Entity 1 full information:
|
|
858
1025
|
{{frame_1}}
|
|
859
1026
|
|
|
860
|
-
Entity 2 full information:
|
|
1027
|
+
# Entity 2 full information:
|
|
861
1028
|
{{frame_2}}
|
|
1029
|
+
|
|
1030
|
+
# Input placeholders
|
|
1031
|
+
ROI Text with the two entities annotated with <entity_1> and <entity_2>:
|
|
1032
|
+
"{{roi_text}}"
|
|
862
1033
|
```
|
|
863
1034
|
|
|
864
1035
|
As an example, we define the ```possible_relation_types_func``` :
|
|
@@ -889,8 +1060,76 @@ def possible_relation_types_func(frame_1, frame_2) -> List[str]:
|
|
|
889
1060
|
```python
|
|
890
1061
|
from llm_ie.extractors import MultiClassRelationExtractor
|
|
891
1062
|
|
|
892
|
-
extractor = MultiClassRelationExtractor(
|
|
893
|
-
|
|
1063
|
+
extractor = MultiClassRelationExtractor(inference_engine, prompt_template=re_prompt_template,
|
|
1064
|
+
possible_relation_types_func=possible_relation_types_func)
|
|
1065
|
+
|
|
1066
|
+
# Extract multi-class relations with concurrent mode (faster)
|
|
1067
|
+
relations = extractor.extract_relations(doc, concurrent=True)
|
|
1068
|
+
|
|
1069
|
+
# To print out the step-by-step, use the `concurrent=False` and `stream=True` options
|
|
1070
|
+
relations = extractor.extract_relations(doc, concurrent=False, stream=True)
|
|
894
1071
|
```
|
|
895
1072
|
|
|
896
1073
|
</details>
|
|
1074
|
+
|
|
1075
|
+
### Visualization
|
|
1076
|
+
The `LLMInformationExtractionDocument` class supports named entity, entity attributes, and relation visualization. The implementation is through our plug-in package [ie-viz](https://github.com/daviden1013/ie-viz). Check the example Jupyter Notebook [NER + RE for Drug, Strength, Frequency](demo/medication_relation_extraction.ipynb) for a working demo.
|
|
1077
|
+
|
|
1078
|
+
```cmd
|
|
1079
|
+
pip install ie-viz
|
|
1080
|
+
```
|
|
1081
|
+
|
|
1082
|
+
The `viz_serve()` method starts a Flask App on localhost port 5000 by default.
|
|
1083
|
+
```python
|
|
1084
|
+
from llm_ie.data_types import LLMInformationExtractionDocument
|
|
1085
|
+
|
|
1086
|
+
# Define document
|
|
1087
|
+
doc = LLMInformationExtractionDocument(doc_id="Medical note",
|
|
1088
|
+
text=note_text)
|
|
1089
|
+
# Add extracted frames and relations to document
|
|
1090
|
+
doc.add_frames(frames)
|
|
1091
|
+
doc.add_relations(relations)
|
|
1092
|
+
# Visualize the document
|
|
1093
|
+
doc.viz_serve()
|
|
1094
|
+
```
|
|
1095
|
+
|
|
1096
|
+
Alternatively, the `viz_render()` method returns a self-contained (HTML + JS + CSS) string. Save it to file and open with a browser.
|
|
1097
|
+
```python
|
|
1098
|
+
html = doc.viz_render()
|
|
1099
|
+
|
|
1100
|
+
with open("Medical note.html", "w") as f:
|
|
1101
|
+
f.write(html)
|
|
1102
|
+
```
|
|
1103
|
+
|
|
1104
|
+
To customize colors for different entities, use `color_attr_key` (simple) or `color_map_func` (advanced).
|
|
1105
|
+
|
|
1106
|
+
The `color_attr_key` automatically assign colors based on the specified attribute key. For example, "EntityType".
|
|
1107
|
+
```python
|
|
1108
|
+
doc.viz_serve(color_attr_key="EntityType")
|
|
1109
|
+
```
|
|
1110
|
+
|
|
1111
|
+
The `color_map_func` allow users to define a custom entity-color mapping function. For example,
|
|
1112
|
+
```python
|
|
1113
|
+
def color_map_func(entity) -> str:
|
|
1114
|
+
if entity['attr']['<attribute key>'] == "<a certain value>":
|
|
1115
|
+
return "#7f7f7f"
|
|
1116
|
+
else:
|
|
1117
|
+
return "#03A9F4"
|
|
1118
|
+
|
|
1119
|
+
doc.viz_serve(color_map_func=color_map_func)
|
|
1120
|
+
```
|
|
1121
|
+
|
|
1122
|
+
## Benchmarks
|
|
1123
|
+
We benchmarked the frame and relation extractors on biomedical information extraction tasks. The results and experiment code is available on [this page](https://github.com/daviden1013/LLM-IE_Benchmark).
|
|
1124
|
+
|
|
1125
|
+
|
|
1126
|
+
## Citation
|
|
1127
|
+
For more information and benchmarks, please check our paper:
|
|
1128
|
+
```bibtex
|
|
1129
|
+
@article{hsu2024llm,
|
|
1130
|
+
title={LLM-IE: A Python Package for Generative Information Extraction with Large Language Models},
|
|
1131
|
+
author={Hsu, Enshuo and Roberts, Kirk},
|
|
1132
|
+
journal={arXiv preprint arXiv:2411.11779},
|
|
1133
|
+
year={2024}
|
|
1134
|
+
}
|
|
1135
|
+
```
|