llm-ie 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llm_ie/data_types.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from typing import List, Dict, Tuple, Iterable, Callable
2
2
  import importlib.util
3
+ import warnings
3
4
  import json
4
5
 
5
6
 
@@ -306,7 +307,7 @@ class LLMInformationExtractionDocument:
306
307
  return entities, relations
307
308
 
308
309
 
309
- def viz_serve(self, host: str = '0.0.0.0', port: int = 5000, theme:str = "light",
310
+ def viz_serve(self, host: str = '0.0.0.0', port: int = 5000, theme:str = "light", title:str="Frames Visualization",
310
311
  color_attr_key:str=None, color_map_func:Callable=None):
311
312
  """
312
313
  This method serves a visualization App of the document.
@@ -319,6 +320,8 @@ class LLMInformationExtractionDocument:
319
320
  The port number to run the server on.
320
321
  theme : str, Optional
321
322
  The theme of the visualization. Must be either "light" or "dark".
323
+ title : str, Optional
324
+ the title of the HTML.
322
325
  color_attr_key : str, Optional
323
326
  The attribute key to be used for coloring the entities.
324
327
  color_map_func : Callable, Optional
@@ -328,17 +331,29 @@ class LLMInformationExtractionDocument:
328
331
  entities, relations = self._viz_preprocess()
329
332
  from ie_viz import serve
330
333
 
331
- serve(text=self.text,
332
- entities=entities,
333
- relations=relations,
334
- host=host,
335
- port=port,
336
- theme=theme,
337
- color_attr_key=color_attr_key,
338
- color_map_func=color_map_func)
339
-
334
+ try:
335
+ serve(text=self.text,
336
+ entities=entities,
337
+ relations=relations,
338
+ host=host,
339
+ port=port,
340
+ theme=theme,
341
+ title=title,
342
+ color_attr_key=color_attr_key,
343
+ color_map_func=color_map_func)
344
+ except TypeError:
345
+ warnings.warn("The version of ie_viz is not the latest. Please update to the latest version (pip install --upgrade ie-viz) for complete features.", UserWarning)
346
+ serve(text=self.text,
347
+ entities=entities,
348
+ relations=relations,
349
+ host=host,
350
+ port=port,
351
+ theme=theme,
352
+ color_attr_key=color_attr_key,
353
+ color_map_func=color_map_func)
340
354
 
341
- def viz_render(self, theme:str = "light", color_attr_key:str=None, color_map_func:Callable=None) -> str:
355
+ def viz_render(self, theme:str = "light", color_attr_key:str=None, color_map_func:Callable=None,
356
+ title:str="Frames Visualization") -> str:
342
357
  """
343
358
  This method renders visualization html of the document.
344
359
 
@@ -351,13 +366,25 @@ class LLMInformationExtractionDocument:
351
366
  color_map_func : Callable, Optional
352
367
  The function to be used for mapping the entity attributes to colors. When provided, the color_attr_key and
353
368
  theme will be overwritten. The function must take an entity dictionary as input and return a color string (hex).
369
+ title : str, Optional
370
+ the title of the HTML.
354
371
  """
355
372
  entities, relations = self._viz_preprocess()
356
373
  from ie_viz import render
357
374
 
358
- return render(text=self.text,
359
- entities=entities,
360
- relations=relations,
361
- theme=theme,
362
- color_attr_key=color_attr_key,
363
- color_map_func=color_map_func)
375
+ try:
376
+ return render(text=self.text,
377
+ entities=entities,
378
+ relations=relations,
379
+ theme=theme,
380
+ title=title,
381
+ color_attr_key=color_attr_key,
382
+ color_map_func=color_map_func)
383
+ except TypeError:
384
+ warnings.warn("The version of ie_viz is not the latest. Please update to the latest version (pip install --upgrade ie-viz) for complete features.", UserWarning)
385
+ return render(text=self.text,
386
+ entities=entities,
387
+ relations=relations,
388
+ theme=theme,
389
+ color_attr_key=color_attr_key,
390
+ color_map_func=color_map_func)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: llm-ie
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
5
  License: MIT
6
6
  Author: Enshuo (David) Hsu
@@ -35,9 +35,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
35
35
  - [v0.3.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.1) (Oct 26, 2024): Added Sentence Review Frame Extractor and Sentence CoT Frame Extractor
36
36
  - [v0.3.4](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.4) (Nov 24, 2024): Added entity fuzzy search.
37
37
  - [v0.3.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.5) (Nov 27, 2024): Adopted `json_repair` to fix broken JSON from LLM outputs.
38
- - v0.4.0:
38
+ - [v0.4.0](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.0) (Jan 4, 2025):
39
39
  - Concurrent LLM inferencing to speed up frame and relation extraction.
40
40
  - Support for LiteLLM.
41
+ - [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
41
42
 
42
43
  ## Table of Contents
43
44
  - [Overview](#overview)
@@ -62,7 +63,7 @@ LLM-IE is a toolkit that provides robust information extraction utilities for na
62
63
  <div align="center"><img src="doc_asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
63
64
 
64
65
  ## Prerequisite
65
- At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
66
+ At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
66
67
 
67
68
  ## Installation
68
69
  The Python package is available on PyPI.
@@ -88,7 +89,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.3-70B-Instruct",
88
89
  </details>
89
90
 
90
91
  <details>
91
- <summary><img src=doc_asset/readme_img/openai-logomark.png width=16 /> OpenAI API</summary>
92
+ <summary><img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API</summary>
92
93
 
93
94
  Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
94
95
  ```python
@@ -109,7 +110,7 @@ inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-
109
110
  </details>
110
111
 
111
112
  <details>
112
- <summary><img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> Ollama</summary>
113
+ <summary><img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama</summary>
113
114
 
114
115
  ```python
115
116
  from llm_ie.engines import OllamaInferenceEngine
@@ -157,12 +158,12 @@ We start with a casual description:
157
158
 
158
159
  Define the AI prompt editor.
159
160
  ```python
160
- from llm_ie import OllamaInferenceEngine, PromptEditor, BasicFrameExtractor
161
+ from llm_ie import OllamaInferenceEngine, PromptEditor, SentenceFrameExtractor
161
162
 
162
163
  # Define a LLM inference engine
163
164
  inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
164
165
  # Define LLM prompt editor
165
- editor = PromptEditor(inference_engine, BasicFrameExtractor)
166
+ editor = PromptEditor(inference_engine, SentenceFrameExtractor)
166
167
  # Start chat
167
168
  editor.chat()
168
169
  ```
@@ -171,7 +172,7 @@ This opens an interactive session:
171
172
  <div align="left"><img src=doc_asset/readme_img/terminal_chat.PNG width=1000 ></div>
172
173
 
173
174
 
174
- The ```PromptEditor``` drafts a prompt template following the schema required by the ```BasicFrameExtractor```:
175
+ The ```PromptEditor``` drafts a prompt template following the schema required by the ```SentenceFrameExtractor```:
175
176
 
176
177
  ```
177
178
  # Task description
@@ -209,10 +210,13 @@ with open("./demo/document/synthesized_note.txt", 'r') as f:
209
210
  note_text = f.read()
210
211
 
211
212
  # Define extractor
212
- extractor = BasicFrameExtractor(inference_engine, prompt_template)
213
+ extractor = SentenceFrameExtractor(inference_engine, prompt_template)
213
214
 
214
215
  # Extract
215
- frames = extractor.extract_frames(note_text, entity_key="Diagnosis", stream=True)
216
+ # To stream the extraction process, use concurrent=False, stream=True:
217
+ frames = extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=False, stream=True)
218
+ # For faster extraction, use concurrent=True to enable asynchronous prompting
219
+ frames = extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=True)
216
220
 
217
221
  # Check extractions
218
222
  for frame in frames:
@@ -221,10 +225,17 @@ for frame in frames:
221
225
  The output is a list of frames. Each frame has a ```entity_text```, ```start```, ```end```, and a dictionary of ```attr```.
222
226
 
223
227
  ```python
224
- {'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': 'Hypertension', 'attr': {'Datetime': '2010', 'Status': 'history'}}
225
- {'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': 'Hyperlipidemia', 'attr': {'Datetime': '2015', 'Status': 'history'}}
226
- {'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2 Diabetes Mellitus', 'attr': {'Datetime': '2018', 'Status': 'history'}}
227
- {'frame_id': '3', 'start': 2402, 'end': 2431, 'entity_text': 'Acute Coronary Syndrome (ACS)', 'attr': {'Datetime': 'July 20, 2024', 'Status': 'present'}}
228
+ {'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': 'hypertension', 'attr': {'Date': '2010-01-01', 'Status': 'Active'}}
229
+ {'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': 'hyperlipidemia', 'attr': {'Date': '2015-01-01', 'Status': 'Active'}}
230
+ {'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2 diabetes mellitus', 'attr': {'Date': '2018-01-01', 'Status': 'Active'}}
231
+ {'frame_id': '3', 'start': 660, 'end': 670, 'entity_text': 'chest pain', 'attr': {'Date': 'July 18, 2024'}}
232
+ {'frame_id': '4', 'start': 991, 'end': 1003, 'entity_text': 'Hypertension', 'attr': {'Date': '2010-01-01'}}
233
+ {'frame_id': '5', 'start': 1026, 'end': 1040, 'entity_text': 'Hyperlipidemia', 'attr': {'Date': '2015-01-01'}}
234
+ {'frame_id': '6', 'start': 1063, 'end': 1087, 'entity_text': 'Type 2 Diabetes Mellitus', 'attr': {'Date': '2018-01-01'}}
235
+ {'frame_id': '7', 'start': 1926, 'end': 1947, 'entity_text': 'ST-segment depression', 'attr': None}
236
+ {'frame_id': '8', 'start': 2049, 'end': 2066, 'entity_text': 'acute infiltrates', 'attr': None}
237
+ {'frame_id': '9', 'start': 2117, 'end': 2150, 'entity_text': 'Mild left ventricular hypertrophy', 'attr': None}
238
+ {'frame_id': '10', 'start': 2402, 'end': 2425, 'entity_text': 'acute coronary syndrome', 'attr': {'Date': 'July 20, 2024', 'Status': 'Active'}}
228
239
  ```
229
240
 
230
241
  We can save the frames to a document object for better management. The document holds ```text``` and ```frames```. The ```add_frame()``` method performs validation and (if passed) adds a frame to the document.
@@ -298,7 +309,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.1-8B-Instruct",
298
309
  inference_engine = LiteLLMInferenceEngine(model="ollama/llama3.1:8b-instruct-q8_0")
299
310
  ```
300
311
 
301
- #### <img src=doc_asset/readme_img/openai-logomark.png width=16 /> OpenAI API
312
+ #### <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API
302
313
  In bash, save API key to the environmental variable ```OPENAI_API_KEY```.
303
314
  ```
304
315
  export OPENAI_API_KEY=<your_API_key>
@@ -322,7 +333,7 @@ from llm_ie.engines import HuggingFaceHubInferenceEngine
322
333
  inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
323
334
  ```
324
335
 
325
- #### <img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> Ollama
336
+ #### <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama
326
337
  The ```model_name``` must match the names on the [Ollama library](https://ollama.com/library). Use the command line ```ollama ls``` to check your local model list. ```num_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. ```keep_alive``` regulates the lifespan of LLM. It indicates a number of seconds to hold the LLM after the last API call. Default is 5 minutes (300 sec).
327
338
 
328
339
  ```python
@@ -1073,6 +1084,9 @@ relations = extractor.extract_relations(doc, concurrent=False, stream=True)
1073
1084
  </details>
1074
1085
 
1075
1086
  ### Visualization
1087
+
1088
+ <div align="center"><img src="doc_asset/readme_img/visualization.PNG" width=95% ></div>
1089
+
1076
1090
  The `LLMInformationExtractionDocument` class supports named entity, entity attributes, and relation visualization. The implementation is through our plug-in package [ie-viz](https://github.com/daviden1013/ie-viz). Check the example Jupyter Notebook [NER + RE for Drug, Strength, Frequency](demo/medication_relation_extraction.ipynb) for a working demo.
1077
1091
 
1078
1092
  ```cmd
@@ -14,10 +14,10 @@ llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=m7iX4Qjsf
14
14
  llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt,sha256=T4NsO33s3KSJml-klzXAJiYox0kiuxGo-ou2a2Ig2SY,14225
15
15
  llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
16
16
  llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
17
- llm_ie/data_types.py,sha256=hPz3WOeAzfn2QKmb0CxHmRdQWZQ4G9zq8U-RJBVFdYk,14329
17
+ llm_ie/data_types.py,sha256=3-FsQPlcUwMQLA9IOM5qaCFtWfiwrS80w-18XKgPN5w,15729
18
18
  llm_ie/engines.py,sha256=lz2HODoqlndgezdT76diXKN_wgb7mjl6hX3JuCwsH-g,15191
19
19
  llm_ie/extractors.py,sha256=CpEuSqzlYd3u8Qwiu7Qdd26iII2pci1nNKxGz8sv1ZU,84506
20
20
  llm_ie/prompt_editor.py,sha256=pw_FOsEeWxFJ1p5lYR93cTNMqKQ-YZHzgBmRbPm7aNE,9486
21
- llm_ie-0.4.0.dist-info/METADATA,sha256=o721Obb1copeoFz34bz_B7am2i1Vi1xMpK5QkWn4R6A,51119
22
- llm_ie-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
- llm_ie-0.4.0.dist-info/RECORD,,
21
+ llm_ie-0.4.1.dist-info/METADATA,sha256=9oa9Li6ailbEqLENTC4F4DyUQApswK0ecpS0NKEhXVM,52527
22
+ llm_ie-0.4.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
+ llm_ie-0.4.1.dist-info/RECORD,,
File without changes