llm-ie 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_ie-0.4.0 → llm_ie-0.4.1}/PKG-INFO +30 -16
- {llm_ie-0.4.0 → llm_ie-0.4.1}/README.md +29 -15
- {llm_ie-0.4.0 → llm_ie-0.4.1}/pyproject.toml +1 -1
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/data_types.py +44 -17
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/__init__.py +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/PromptEditor_prompts/chat.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/PromptEditor_prompts/comment.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/PromptEditor_prompts/rewrite.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/PromptEditor_prompts/system.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/engines.py +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/extractors.py +0 -0
- {llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/prompt_editor.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: llm-ie
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Enshuo (David) Hsu
|
|
@@ -35,9 +35,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
|
|
|
35
35
|
- [v0.3.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.1) (Oct 26, 2024): Added Sentence Review Frame Extractor and Sentence CoT Frame Extractor
|
|
36
36
|
- [v0.3.4](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.4) (Nov 24, 2024): Added entity fuzzy search.
|
|
37
37
|
- [v0.3.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.5) (Nov 27, 2024): Adopted `json_repair` to fix broken JSON from LLM outputs.
|
|
38
|
-
- v0.4.0:
|
|
38
|
+
- [v0.4.0](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.0) (Jan 4, 2025):
|
|
39
39
|
- Concurrent LLM inferencing to speed up frame and relation extraction.
|
|
40
40
|
- Support for LiteLLM.
|
|
41
|
+
- [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
|
|
41
42
|
|
|
42
43
|
## Table of Contents
|
|
43
44
|
- [Overview](#overview)
|
|
@@ -62,7 +63,7 @@ LLM-IE is a toolkit that provides robust information extraction utilities for na
|
|
|
62
63
|
<div align="center"><img src="doc_asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
|
|
63
64
|
|
|
64
65
|
## Prerequisite
|
|
65
|
-
At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/
|
|
66
|
+
At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
|
|
66
67
|
|
|
67
68
|
## Installation
|
|
68
69
|
The Python package is available on PyPI.
|
|
@@ -88,7 +89,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.3-70B-Instruct",
|
|
|
88
89
|
</details>
|
|
89
90
|
|
|
90
91
|
<details>
|
|
91
|
-
<summary><img src=doc_asset/readme_img/openai-
|
|
92
|
+
<summary><img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API</summary>
|
|
92
93
|
|
|
93
94
|
Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
|
|
94
95
|
```python
|
|
@@ -109,7 +110,7 @@ inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-
|
|
|
109
110
|
</details>
|
|
110
111
|
|
|
111
112
|
<details>
|
|
112
|
-
<summary><img src="doc_asset/readme_img/
|
|
113
|
+
<summary><img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama</summary>
|
|
113
114
|
|
|
114
115
|
```python
|
|
115
116
|
from llm_ie.engines import OllamaInferenceEngine
|
|
@@ -157,12 +158,12 @@ We start with a casual description:
|
|
|
157
158
|
|
|
158
159
|
Define the AI prompt editor.
|
|
159
160
|
```python
|
|
160
|
-
from llm_ie import OllamaInferenceEngine, PromptEditor,
|
|
161
|
+
from llm_ie import OllamaInferenceEngine, PromptEditor, SentenceFrameExtractor
|
|
161
162
|
|
|
162
163
|
# Define a LLM inference engine
|
|
163
164
|
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
164
165
|
# Define LLM prompt editor
|
|
165
|
-
editor = PromptEditor(inference_engine,
|
|
166
|
+
editor = PromptEditor(inference_engine, SentenceFrameExtractor)
|
|
166
167
|
# Start chat
|
|
167
168
|
editor.chat()
|
|
168
169
|
```
|
|
@@ -171,7 +172,7 @@ This opens an interactive session:
|
|
|
171
172
|
<div align="left"><img src=doc_asset/readme_img/terminal_chat.PNG width=1000 ></div>
|
|
172
173
|
|
|
173
174
|
|
|
174
|
-
The ```PromptEditor``` drafts a prompt template following the schema required by the ```
|
|
175
|
+
The ```PromptEditor``` drafts a prompt template following the schema required by the ```SentenceFrameExtractor```:
|
|
175
176
|
|
|
176
177
|
```
|
|
177
178
|
# Task description
|
|
@@ -209,10 +210,13 @@ with open("./demo/document/synthesized_note.txt", 'r') as f:
|
|
|
209
210
|
note_text = f.read()
|
|
210
211
|
|
|
211
212
|
# Define extractor
|
|
212
|
-
extractor =
|
|
213
|
+
extractor = SentenceFrameExtractor(inference_engine, prompt_template)
|
|
213
214
|
|
|
214
215
|
# Extract
|
|
215
|
-
|
|
216
|
+
# To stream the extraction process, use concurrent=False, stream=True:
|
|
217
|
+
frames = extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=False, stream=True)
|
|
218
|
+
# For faster extraction, use concurrent=True to enable asynchronous prompting
|
|
219
|
+
frames = extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=True)
|
|
216
220
|
|
|
217
221
|
# Check extractions
|
|
218
222
|
for frame in frames:
|
|
@@ -221,10 +225,17 @@ for frame in frames:
|
|
|
221
225
|
The output is a list of frames. Each frame has a ```entity_text```, ```start```, ```end```, and a dictionary of ```attr```.
|
|
222
226
|
|
|
223
227
|
```python
|
|
224
|
-
{'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': '
|
|
225
|
-
{'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': '
|
|
226
|
-
{'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2
|
|
227
|
-
{'frame_id': '3', 'start':
|
|
228
|
+
{'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': 'hypertension', 'attr': {'Date': '2010-01-01', 'Status': 'Active'}}
|
|
229
|
+
{'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': 'hyperlipidemia', 'attr': {'Date': '2015-01-01', 'Status': 'Active'}}
|
|
230
|
+
{'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2 diabetes mellitus', 'attr': {'Date': '2018-01-01', 'Status': 'Active'}}
|
|
231
|
+
{'frame_id': '3', 'start': 660, 'end': 670, 'entity_text': 'chest pain', 'attr': {'Date': 'July 18, 2024'}}
|
|
232
|
+
{'frame_id': '4', 'start': 991, 'end': 1003, 'entity_text': 'Hypertension', 'attr': {'Date': '2010-01-01'}}
|
|
233
|
+
{'frame_id': '5', 'start': 1026, 'end': 1040, 'entity_text': 'Hyperlipidemia', 'attr': {'Date': '2015-01-01'}}
|
|
234
|
+
{'frame_id': '6', 'start': 1063, 'end': 1087, 'entity_text': 'Type 2 Diabetes Mellitus', 'attr': {'Date': '2018-01-01'}}
|
|
235
|
+
{'frame_id': '7', 'start': 1926, 'end': 1947, 'entity_text': 'ST-segment depression', 'attr': None}
|
|
236
|
+
{'frame_id': '8', 'start': 2049, 'end': 2066, 'entity_text': 'acute infiltrates', 'attr': None}
|
|
237
|
+
{'frame_id': '9', 'start': 2117, 'end': 2150, 'entity_text': 'Mild left ventricular hypertrophy', 'attr': None}
|
|
238
|
+
{'frame_id': '10', 'start': 2402, 'end': 2425, 'entity_text': 'acute coronary syndrome', 'attr': {'Date': 'July 20, 2024', 'Status': 'Active'}}
|
|
228
239
|
```
|
|
229
240
|
|
|
230
241
|
We can save the frames to a document object for better management. The document holds ```text``` and ```frames```. The ```add_frame()``` method performs validation and (if passed) adds a frame to the document.
|
|
@@ -298,7 +309,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.1-8B-Instruct",
|
|
|
298
309
|
inference_engine = LiteLLMInferenceEngine(model="ollama/llama3.1:8b-instruct-q8_0")
|
|
299
310
|
```
|
|
300
311
|
|
|
301
|
-
#### <img src=doc_asset/readme_img/openai-
|
|
312
|
+
#### <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API
|
|
302
313
|
In bash, save API key to the environmental variable ```OPENAI_API_KEY```.
|
|
303
314
|
```
|
|
304
315
|
export OPENAI_API_KEY=<your_API_key>
|
|
@@ -322,7 +333,7 @@ from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
|
322
333
|
inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
323
334
|
```
|
|
324
335
|
|
|
325
|
-
#### <img src="doc_asset/readme_img/
|
|
336
|
+
#### <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama
|
|
326
337
|
The ```model_name``` must match the names on the [Ollama library](https://ollama.com/library). Use the command line ```ollama ls``` to check your local model list. ```num_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. ```keep_alive``` regulates the lifespan of LLM. It indicates a number of seconds to hold the LLM after the last API call. Default is 5 minutes (300 sec).
|
|
327
338
|
|
|
328
339
|
```python
|
|
@@ -1073,6 +1084,9 @@ relations = extractor.extract_relations(doc, concurrent=False, stream=True)
|
|
|
1073
1084
|
</details>
|
|
1074
1085
|
|
|
1075
1086
|
### Visualization
|
|
1087
|
+
|
|
1088
|
+
<div align="center"><img src="doc_asset/readme_img/visualization.PNG" width=95% ></div>
|
|
1089
|
+
|
|
1076
1090
|
The `LLMInformationExtractionDocument` class supports named entity, entity attributes, and relation visualization. The implementation is through our plug-in package [ie-viz](https://github.com/daviden1013/ie-viz). Check the example Jupyter Notebook [NER + RE for Drug, Strength, Frequency](demo/medication_relation_extraction.ipynb) for a working demo.
|
|
1077
1091
|
|
|
1078
1092
|
```cmd
|
|
@@ -19,9 +19,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
|
|
|
19
19
|
- [v0.3.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.1) (Oct 26, 2024): Added Sentence Review Frame Extractor and Sentence CoT Frame Extractor
|
|
20
20
|
- [v0.3.4](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.4) (Nov 24, 2024): Added entity fuzzy search.
|
|
21
21
|
- [v0.3.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.5) (Nov 27, 2024): Adopted `json_repair` to fix broken JSON from LLM outputs.
|
|
22
|
-
- v0.4.0:
|
|
22
|
+
- [v0.4.0](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.0) (Jan 4, 2025):
|
|
23
23
|
- Concurrent LLM inferencing to speed up frame and relation extraction.
|
|
24
24
|
- Support for LiteLLM.
|
|
25
|
+
- [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
|
|
25
26
|
|
|
26
27
|
## Table of Contents
|
|
27
28
|
- [Overview](#overview)
|
|
@@ -46,7 +47,7 @@ LLM-IE is a toolkit that provides robust information extraction utilities for na
|
|
|
46
47
|
<div align="center"><img src="doc_asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
|
|
47
48
|
|
|
48
49
|
## Prerequisite
|
|
49
|
-
At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/
|
|
50
|
+
At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
|
|
50
51
|
|
|
51
52
|
## Installation
|
|
52
53
|
The Python package is available on PyPI.
|
|
@@ -72,7 +73,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.3-70B-Instruct",
|
|
|
72
73
|
</details>
|
|
73
74
|
|
|
74
75
|
<details>
|
|
75
|
-
<summary><img src=doc_asset/readme_img/openai-
|
|
76
|
+
<summary><img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API</summary>
|
|
76
77
|
|
|
77
78
|
Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
|
|
78
79
|
```python
|
|
@@ -93,7 +94,7 @@ inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-
|
|
|
93
94
|
</details>
|
|
94
95
|
|
|
95
96
|
<details>
|
|
96
|
-
<summary><img src="doc_asset/readme_img/
|
|
97
|
+
<summary><img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama</summary>
|
|
97
98
|
|
|
98
99
|
```python
|
|
99
100
|
from llm_ie.engines import OllamaInferenceEngine
|
|
@@ -141,12 +142,12 @@ We start with a casual description:
|
|
|
141
142
|
|
|
142
143
|
Define the AI prompt editor.
|
|
143
144
|
```python
|
|
144
|
-
from llm_ie import OllamaInferenceEngine, PromptEditor,
|
|
145
|
+
from llm_ie import OllamaInferenceEngine, PromptEditor, SentenceFrameExtractor
|
|
145
146
|
|
|
146
147
|
# Define a LLM inference engine
|
|
147
148
|
inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
|
|
148
149
|
# Define LLM prompt editor
|
|
149
|
-
editor = PromptEditor(inference_engine,
|
|
150
|
+
editor = PromptEditor(inference_engine, SentenceFrameExtractor)
|
|
150
151
|
# Start chat
|
|
151
152
|
editor.chat()
|
|
152
153
|
```
|
|
@@ -155,7 +156,7 @@ This opens an interactive session:
|
|
|
155
156
|
<div align="left"><img src=doc_asset/readme_img/terminal_chat.PNG width=1000 ></div>
|
|
156
157
|
|
|
157
158
|
|
|
158
|
-
The ```PromptEditor``` drafts a prompt template following the schema required by the ```
|
|
159
|
+
The ```PromptEditor``` drafts a prompt template following the schema required by the ```SentenceFrameExtractor```:
|
|
159
160
|
|
|
160
161
|
```
|
|
161
162
|
# Task description
|
|
@@ -193,10 +194,13 @@ with open("./demo/document/synthesized_note.txt", 'r') as f:
|
|
|
193
194
|
note_text = f.read()
|
|
194
195
|
|
|
195
196
|
# Define extractor
|
|
196
|
-
extractor =
|
|
197
|
+
extractor = SentenceFrameExtractor(inference_engine, prompt_template)
|
|
197
198
|
|
|
198
199
|
# Extract
|
|
199
|
-
|
|
200
|
+
# To stream the extraction process, use concurrent=False, stream=True:
|
|
201
|
+
frames = extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=False, stream=True)
|
|
202
|
+
# For faster extraction, use concurrent=True to enable asynchronous prompting
|
|
203
|
+
frames = extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=True)
|
|
200
204
|
|
|
201
205
|
# Check extractions
|
|
202
206
|
for frame in frames:
|
|
@@ -205,10 +209,17 @@ for frame in frames:
|
|
|
205
209
|
The output is a list of frames. Each frame has a ```entity_text```, ```start```, ```end```, and a dictionary of ```attr```.
|
|
206
210
|
|
|
207
211
|
```python
|
|
208
|
-
{'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': '
|
|
209
|
-
{'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': '
|
|
210
|
-
{'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2
|
|
211
|
-
{'frame_id': '3', 'start':
|
|
212
|
+
{'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': 'hypertension', 'attr': {'Date': '2010-01-01', 'Status': 'Active'}}
|
|
213
|
+
{'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': 'hyperlipidemia', 'attr': {'Date': '2015-01-01', 'Status': 'Active'}}
|
|
214
|
+
{'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2 diabetes mellitus', 'attr': {'Date': '2018-01-01', 'Status': 'Active'}}
|
|
215
|
+
{'frame_id': '3', 'start': 660, 'end': 670, 'entity_text': 'chest pain', 'attr': {'Date': 'July 18, 2024'}}
|
|
216
|
+
{'frame_id': '4', 'start': 991, 'end': 1003, 'entity_text': 'Hypertension', 'attr': {'Date': '2010-01-01'}}
|
|
217
|
+
{'frame_id': '5', 'start': 1026, 'end': 1040, 'entity_text': 'Hyperlipidemia', 'attr': {'Date': '2015-01-01'}}
|
|
218
|
+
{'frame_id': '6', 'start': 1063, 'end': 1087, 'entity_text': 'Type 2 Diabetes Mellitus', 'attr': {'Date': '2018-01-01'}}
|
|
219
|
+
{'frame_id': '7', 'start': 1926, 'end': 1947, 'entity_text': 'ST-segment depression', 'attr': None}
|
|
220
|
+
{'frame_id': '8', 'start': 2049, 'end': 2066, 'entity_text': 'acute infiltrates', 'attr': None}
|
|
221
|
+
{'frame_id': '9', 'start': 2117, 'end': 2150, 'entity_text': 'Mild left ventricular hypertrophy', 'attr': None}
|
|
222
|
+
{'frame_id': '10', 'start': 2402, 'end': 2425, 'entity_text': 'acute coronary syndrome', 'attr': {'Date': 'July 20, 2024', 'Status': 'Active'}}
|
|
212
223
|
```
|
|
213
224
|
|
|
214
225
|
We can save the frames to a document object for better management. The document holds ```text``` and ```frames```. The ```add_frame()``` method performs validation and (if passed) adds a frame to the document.
|
|
@@ -282,7 +293,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.1-8B-Instruct",
|
|
|
282
293
|
inference_engine = LiteLLMInferenceEngine(model="ollama/llama3.1:8b-instruct-q8_0")
|
|
283
294
|
```
|
|
284
295
|
|
|
285
|
-
#### <img src=doc_asset/readme_img/openai-
|
|
296
|
+
#### <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API
|
|
286
297
|
In bash, save API key to the environmental variable ```OPENAI_API_KEY```.
|
|
287
298
|
```
|
|
288
299
|
export OPENAI_API_KEY=<your_API_key>
|
|
@@ -306,7 +317,7 @@ from llm_ie.engines import HuggingFaceHubInferenceEngine
|
|
|
306
317
|
inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
|
|
307
318
|
```
|
|
308
319
|
|
|
309
|
-
#### <img src="doc_asset/readme_img/
|
|
320
|
+
#### <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama
|
|
310
321
|
The ```model_name``` must match the names on the [Ollama library](https://ollama.com/library). Use the command line ```ollama ls``` to check your local model list. ```num_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. ```keep_alive``` regulates the lifespan of LLM. It indicates a number of seconds to hold the LLM after the last API call. Default is 5 minutes (300 sec).
|
|
311
322
|
|
|
312
323
|
```python
|
|
@@ -1057,6 +1068,9 @@ relations = extractor.extract_relations(doc, concurrent=False, stream=True)
|
|
|
1057
1068
|
</details>
|
|
1058
1069
|
|
|
1059
1070
|
### Visualization
|
|
1071
|
+
|
|
1072
|
+
<div align="center"><img src="doc_asset/readme_img/visualization.PNG" width=95% ></div>
|
|
1073
|
+
|
|
1060
1074
|
The `LLMInformationExtractionDocument` class supports named entity, entity attributes, and relation visualization. The implementation is through our plug-in package [ie-viz](https://github.com/daviden1013/ie-viz). Check the example Jupyter Notebook [NER + RE for Drug, Strength, Frequency](demo/medication_relation_extraction.ipynb) for a working demo.
|
|
1061
1075
|
|
|
1062
1076
|
```cmd
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import List, Dict, Tuple, Iterable, Callable
|
|
2
2
|
import importlib.util
|
|
3
|
+
import warnings
|
|
3
4
|
import json
|
|
4
5
|
|
|
5
6
|
|
|
@@ -306,7 +307,7 @@ class LLMInformationExtractionDocument:
|
|
|
306
307
|
return entities, relations
|
|
307
308
|
|
|
308
309
|
|
|
309
|
-
def viz_serve(self, host: str = '0.0.0.0', port: int = 5000, theme:str = "light",
|
|
310
|
+
def viz_serve(self, host: str = '0.0.0.0', port: int = 5000, theme:str = "light", title:str="Frames Visualization",
|
|
310
311
|
color_attr_key:str=None, color_map_func:Callable=None):
|
|
311
312
|
"""
|
|
312
313
|
This method serves a visualization App of the document.
|
|
@@ -319,6 +320,8 @@ class LLMInformationExtractionDocument:
|
|
|
319
320
|
The port number to run the server on.
|
|
320
321
|
theme : str, Optional
|
|
321
322
|
The theme of the visualization. Must be either "light" or "dark".
|
|
323
|
+
title : str, Optional
|
|
324
|
+
the title of the HTML.
|
|
322
325
|
color_attr_key : str, Optional
|
|
323
326
|
The attribute key to be used for coloring the entities.
|
|
324
327
|
color_map_func : Callable, Optional
|
|
@@ -328,17 +331,29 @@ class LLMInformationExtractionDocument:
|
|
|
328
331
|
entities, relations = self._viz_preprocess()
|
|
329
332
|
from ie_viz import serve
|
|
330
333
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
334
|
+
try:
|
|
335
|
+
serve(text=self.text,
|
|
336
|
+
entities=entities,
|
|
337
|
+
relations=relations,
|
|
338
|
+
host=host,
|
|
339
|
+
port=port,
|
|
340
|
+
theme=theme,
|
|
341
|
+
title=title,
|
|
342
|
+
color_attr_key=color_attr_key,
|
|
343
|
+
color_map_func=color_map_func)
|
|
344
|
+
except TypeError:
|
|
345
|
+
warnings.warn("The version of ie_viz is not the latest. Please update to the latest version (pip install --upgrade ie-viz) for complete features.", UserWarning)
|
|
346
|
+
serve(text=self.text,
|
|
347
|
+
entities=entities,
|
|
348
|
+
relations=relations,
|
|
349
|
+
host=host,
|
|
350
|
+
port=port,
|
|
351
|
+
theme=theme,
|
|
352
|
+
color_attr_key=color_attr_key,
|
|
353
|
+
color_map_func=color_map_func)
|
|
340
354
|
|
|
341
|
-
def viz_render(self, theme:str = "light", color_attr_key:str=None, color_map_func:Callable=None
|
|
355
|
+
def viz_render(self, theme:str = "light", color_attr_key:str=None, color_map_func:Callable=None,
|
|
356
|
+
title:str="Frames Visualization") -> str:
|
|
342
357
|
"""
|
|
343
358
|
This method renders visualization html of the document.
|
|
344
359
|
|
|
@@ -351,13 +366,25 @@ class LLMInformationExtractionDocument:
|
|
|
351
366
|
color_map_func : Callable, Optional
|
|
352
367
|
The function to be used for mapping the entity attributes to colors. When provided, the color_attr_key and
|
|
353
368
|
theme will be overwritten. The function must take an entity dictionary as input and return a color string (hex).
|
|
369
|
+
title : str, Optional
|
|
370
|
+
the title of the HTML.
|
|
354
371
|
"""
|
|
355
372
|
entities, relations = self._viz_preprocess()
|
|
356
373
|
from ie_viz import render
|
|
357
374
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
375
|
+
try:
|
|
376
|
+
return render(text=self.text,
|
|
377
|
+
entities=entities,
|
|
378
|
+
relations=relations,
|
|
379
|
+
theme=theme,
|
|
380
|
+
title=title,
|
|
381
|
+
color_attr_key=color_attr_key,
|
|
382
|
+
color_map_func=color_map_func)
|
|
383
|
+
except TypeError:
|
|
384
|
+
warnings.warn("The version of ie_viz is not the latest. Please update to the latest version (pip install --upgrade ie-viz) for complete features.", UserWarning)
|
|
385
|
+
return render(text=self.text,
|
|
386
|
+
entities=entities,
|
|
387
|
+
relations=relations,
|
|
388
|
+
theme=theme,
|
|
389
|
+
color_attr_key=color_attr_key,
|
|
390
|
+
color_map_func=color_map_func)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt
RENAMED
|
File without changes
|
{llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt
RENAMED
|
File without changes
|
|
File without changes
|
{llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt
RENAMED
|
File without changes
|
|
File without changes
|
{llm_ie-0.4.0 → llm_ie-0.4.1}/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|