PyPI - llm-ie - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

llm-ie 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

llm_ie/data_types.py +45 -18
llm_ie/extractors.py +20 -6
{llm_ie-0.4.0.dist-info → llm_ie-0.4.2.dist-info}/METADATA +30 -16
{llm_ie-0.4.0.dist-info → llm_ie-0.4.2.dist-info}/RECORD +5 -5
{llm_ie-0.4.0.dist-info → llm_ie-0.4.2.dist-info}/WHEEL +0 -0

llm_ie/data_types.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import List, Dict, Tuple, Iterable, Callable
 import importlib.util
+import warnings
 import json
@@ -203,7 +204,7 @@ class LLMInformationExtractionDocument:
         # Add frame
         frame_clone = frame.copy()
         if create_id:
-            frame_clone.doc_id = f"{self.doc_id}_{len(self.frames)}"
+            frame_clone.frame_id = str(len(self.frames))
         self.frames.append(frame_clone)
         return True
@@ -306,7 +307,7 @@ class LLMInformationExtractionDocument:
         return entities, relations
-    def viz_serve(self, host: str = '0.0.0.0', port: int = 5000, theme:str = "light",
+    def viz_serve(self, host: str = '0.0.0.0', port: int = 5000, theme:str = "light", title:str="Frames Visualization",
                   color_attr_key:str=None, color_map_func:Callable=None):
         """
         This method serves a visualization App of the document.
@@ -319,6 +320,8 @@ class LLMInformationExtractionDocument:
             The port number to run the server on.
         theme : str, Optional
             The theme of the visualization. Must be either "light" or "dark".
+        title : str, Optional
+            the title of the HTML.
         color_attr_key : str, Optional
             The attribute key to be used for coloring the entities.
         color_map_func : Callable, Optional
@@ -328,17 +331,29 @@ class LLMInformationExtractionDocument:
         entities, relations = self._viz_preprocess()
         from ie_viz import serve
-        serve(text=self.text,
-              entities=entities,
-              relations=relations,
-              host=host,
-              port=port,
-              theme=theme,
-              color_attr_key=color_attr_key,
-              color_map_func=color_map_func)
+        try:
+            serve(text=self.text,
+                    entities=entities,
+                    relations=relations,
+                    host=host,
+                    port=port,
+                    theme=theme,
+                    title=title,
+                    color_attr_key=color_attr_key,
+                    color_map_func=color_map_func)
+        except TypeError:
+            warnings.warn("The version of ie_viz is not the latest. Please update to the latest version (pip install --upgrade ie-viz) for complete features.", UserWarning)
+            serve(text=self.text,
+                    entities=entities,
+                    relations=relations,
+                    host=host,
+                    port=port,
+                    theme=theme,
+                    color_attr_key=color_attr_key,
+                    color_map_func=color_map_func)
-    def viz_render(self, theme:str = "light", color_attr_key:str=None, color_map_func:Callable=None) -> str:
+    def viz_render(self, theme:str = "light", color_attr_key:str=None, color_map_func:Callable=None,
+                   title:str="Frames Visualization") -> str:
         """
         This method renders visualization html of the document.
@@ -351,13 +366,25 @@ class LLMInformationExtractionDocument:
         color_map_func : Callable, Optional
             The function to be used for mapping the entity attributes to colors. When provided, the color_attr_key and
             theme will be overwritten. The function must take an entity dictionary as input and return a color string (hex).
+        title : str, Optional
+            the title of the HTML.
         """
         entities, relations = self._viz_preprocess()
         from ie_viz import render
-        return render(text=self.text,
-                      entities=entities,
-                      relations=relations,
-                      theme=theme,
-                      color_attr_key=color_attr_key,
-                      color_map_func=color_map_func)
+        try:
+            return render(text=self.text,
+                        entities=entities,
+                        relations=relations,
+                        theme=theme,
+                        title=title,
+                        color_attr_key=color_attr_key,
+                        color_map_func=color_map_func)
+        except TypeError:
+                warnings.warn("The version of ie_viz is not the latest. Please update to the latest version (pip install --upgrade ie-viz) for complete features.", UserWarning)
+                return render(text=self.text,
+                        entities=entities,
+                        relations=relations,
+                        theme=theme,
+                        color_attr_key=color_attr_key,
+                        color_map_func=color_map_func)

llm_ie/extractors.py CHANGED Viewed

@@ -59,7 +59,7 @@ class Extractor:
         text_content : Union[str, Dict[str,str]]
             the input text content to put in prompt template.
             If str, the prompt template must has only 1 placeholder {{<placeholder name>}}, regardless of placeholder name.
-            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}.
+            If dict, all the keys must be included in the prompt template placeholder {{<placeholder name>}}. All values must be str.
         Returns : str
             a user prompt.
@@ -73,6 +73,10 @@ class Extractor:
             prompt = pattern.sub(text, self.prompt_template)
         elif isinstance(text_content, dict):
+            # Check if all values are str
+            if not all([isinstance(v, str) for v in text_content.values()]):
+                raise ValueError("All values in text_content must be str.")
+            # Check if all keys are in the prompt template
             placeholders = pattern.findall(self.prompt_template)
             if len(placeholders) != len(text_content):
                 raise ValueError(f"Expect text_content ({len(text_content)}) and prompt template placeholder ({len(placeholders)}) to have equal size.")
@@ -422,6 +426,13 @@ class BasicFrameExtractor(FrameExtractor):
         Return : str
             a list of frames.
         """
+        if isinstance(text_content, str):
+            text = text_content
+        elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
+            text = text_content[document_key]
         frame_list = []
         gen_text = self.extract(text_content=text_content,
                                 max_new_tokens=max_new_tokens,
@@ -435,11 +446,6 @@ class BasicFrameExtractor(FrameExtractor):
                 entity_json.append(entity)
             else:
                 warnings.warn(f'Extractor output "{entity}" does not have entity_key ("{entity_key}"). This frame will be dropped.', RuntimeWarning)
-        if isinstance(text_content, str):
-            text = text_content
-        elif isinstance(text_content, dict):
-            text = text_content[document_key]
         spans = self._find_entity_spans(text=text,
                                         entities=[e[entity_key] for e in entity_json],
@@ -645,6 +651,8 @@ class SentenceFrameExtractor(FrameExtractor):
         if isinstance(text_content, str):
             sentences = self._get_sentences(text_content)
         elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
             sentences = self._get_sentences(text_content[document_key])
         # construct chat messages
         messages = []
@@ -715,6 +723,8 @@ class SentenceFrameExtractor(FrameExtractor):
         if isinstance(text_content, str):
             sentences = self._get_sentences(text_content)
         elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
             sentences = self._get_sentences(text_content[document_key])
         # construct chat messages
         base_messages = []
@@ -933,6 +943,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
         if isinstance(text_content, str):
             sentences = self._get_sentences(text_content)
         elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
             sentences = self._get_sentences(text_content[document_key])
         # construct chat messages
         messages = []
@@ -1025,6 +1037,8 @@ class SentenceReviewFrameExtractor(SentenceFrameExtractor):
         if isinstance(text_content, str):
             sentences = self._get_sentences(text_content)
         elif isinstance(text_content, dict):
+            if document_key is None:
+                raise ValueError("document_key must be provided when text_content is dict.")
             sentences = self._get_sentences(text_content[document_key])
         # construct chat messages
         base_messages = []

{llm_ie-0.4.0.dist-info → llm_ie-0.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: llm-ie
-Version: 0.4.0
+Version: 0.4.2
 Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
 License: MIT
 Author: Enshuo (David) Hsu
@@ -35,9 +35,10 @@ An LLM-powered tool that transforms everyday language into robust information ex
 - [v0.3.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.1) (Oct 26, 2024): Added Sentence Review Frame Extractor and Sentence CoT Frame Extractor
 - [v0.3.4](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.4) (Nov 24, 2024): Added entity fuzzy search.
 - [v0.3.5](https://github.com/daviden1013/llm-ie/releases/tag/v0.3.5) (Nov 27, 2024): Adopted `json_repair` to fix broken JSON from LLM outputs.
-- v0.4.0:
+- [v0.4.0](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.0) (Jan 4, 2025):
     - Concurrent LLM inferencing to speed up frame and relation extraction.
     - Support for LiteLLM.
+- [v0.4.1](https://github.com/daviden1013/llm-ie/releases/tag/v0.4.1) (Jan 25, 2025): Added filters, table view, and some new features to visualization tool (make sure to update [ie-viz](https://github.com/daviden1013/ie-viz)).
 ## Table of Contents
 - [Overview](#overview)
@@ -62,7 +63,7 @@ LLM-IE is a toolkit that provides robust information extraction utilities for na
 <div align="center"><img src="doc_asset/readme_img/LLM-IE flowchart.png" width=800 ></div>
 ## Prerequisite
-At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
+At least one LLM inference engine is required. There are built-in supports for 🚅 [LiteLLM](https://github.com/BerriAI/litellm), 🦙 [Llama-cpp-python](https://github.com/abetlen/llama-cpp-python), <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> [Ollama](https://github.com/ollama/ollama), 🤗 [Huggingface_hub](https://github.com/huggingface/huggingface_hub), <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> [OpenAI API](https://platform.openai.com/docs/api-reference/introduction), and <img src=doc_asset/readme_img/vllm-logo_small.png width=20 /> [vLLM](https://github.com/vllm-project/vllm). For installation guides, please refer to those projects. Other inference engines can be configured through the [InferenceEngine](src/llm_ie/engines.py) abstract class. See [LLM Inference Engine](#llm-inference-engine) section below.
 ## Installation
 The Python package is available on PyPI.
@@ -88,7 +89,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.3-70B-Instruct",
 </details>
 <details>
-<summary><img src=doc_asset/readme_img/openai-logomark.png width=16 /> OpenAI API</summary>
+<summary><img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API</summary>
 Follow the [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) to set up API key.
 ```python
@@ -109,7 +110,7 @@ inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-
 </details>
 <details>
-<summary><img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> Ollama</summary>
+<summary><img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama</summary>
 ```python
 from llm_ie.engines import OllamaInferenceEngine
@@ -157,12 +158,12 @@ We start with a casual description:
 Define the AI prompt editor.
 ```python
-from llm_ie import OllamaInferenceEngine, PromptEditor, BasicFrameExtractor
+from llm_ie import OllamaInferenceEngine, PromptEditor, SentenceFrameExtractor
 # Define a LLM inference engine
 inference_engine = OllamaInferenceEngine(model_name="llama3.1:8b-instruct-q8_0")
 # Define LLM prompt editor
-editor = PromptEditor(inference_engine, BasicFrameExtractor)
+editor = PromptEditor(inference_engine, SentenceFrameExtractor)
 # Start chat
 editor.chat()
 ```
@@ -171,7 +172,7 @@ This opens an interactive session:
 <div align="left"><img src=doc_asset/readme_img/terminal_chat.PNG width=1000 ></div>
-The ```PromptEditor``` drafts a prompt template following the schema required by the ```BasicFrameExtractor```:
+The ```PromptEditor``` drafts a prompt template following the schema required by the ```SentenceFrameExtractor```:
 ```
 # Task description
@@ -209,10 +210,13 @@ with open("./demo/document/synthesized_note.txt", 'r') as f:
     note_text = f.read()
 # Define extractor
-extractor = BasicFrameExtractor(inference_engine, prompt_template)
+extractor = SentenceFrameExtractor(inference_engine, prompt_template)
 # Extract
-frames =  extractor.extract_frames(note_text, entity_key="Diagnosis", stream=True)
+# To stream the extraction process, use concurrent=False, stream=True:
+frames =  extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=False, stream=True)
+# For faster extraction, use concurrent=True to enable asynchronous prompting
+frames =  extractor.extract_frames(note_text, entity_key="Diagnosis", concurrent=True)
 # Check extractions
 for frame in frames:
@@ -221,10 +225,17 @@ for frame in frames:
 The output is a list of frames. Each frame has a ```entity_text```, ```start```, ```end```, and a dictionary of ```attr```.
 ```python
-{'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': 'Hypertension', 'attr': {'Datetime': '2010', 'Status': 'history'}}
-{'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': 'Hyperlipidemia', 'attr': {'Datetime': '2015', 'Status': 'history'}}
-{'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2 Diabetes Mellitus', 'attr': {'Datetime': '2018', 'Status': 'history'}}
-{'frame_id': '3', 'start': 2402, 'end': 2431, 'entity_text': 'Acute Coronary Syndrome (ACS)', 'attr': {'Datetime': 'July 20, 2024', 'Status': 'present'}}
+{'frame_id': '0', 'start': 537, 'end': 549, 'entity_text': 'hypertension', 'attr': {'Date': '2010-01-01', 'Status': 'Active'}}
+{'frame_id': '1', 'start': 551, 'end': 565, 'entity_text': 'hyperlipidemia', 'attr': {'Date': '2015-01-01', 'Status': 'Active'}}
+{'frame_id': '2', 'start': 571, 'end': 595, 'entity_text': 'Type 2 diabetes mellitus', 'attr': {'Date': '2018-01-01', 'Status': 'Active'}}
+{'frame_id': '3', 'start': 660, 'end': 670, 'entity_text': 'chest pain', 'attr': {'Date': 'July 18, 2024'}}
+{'frame_id': '4', 'start': 991, 'end': 1003, 'entity_text': 'Hypertension', 'attr': {'Date': '2010-01-01'}}
+{'frame_id': '5', 'start': 1026, 'end': 1040, 'entity_text': 'Hyperlipidemia', 'attr': {'Date': '2015-01-01'}}
+{'frame_id': '6', 'start': 1063, 'end': 1087, 'entity_text': 'Type 2 Diabetes Mellitus', 'attr': {'Date': '2018-01-01'}}
+{'frame_id': '7', 'start': 1926, 'end': 1947, 'entity_text': 'ST-segment depression', 'attr': None}
+{'frame_id': '8', 'start': 2049, 'end': 2066, 'entity_text': 'acute infiltrates', 'attr': None}
+{'frame_id': '9', 'start': 2117, 'end': 2150, 'entity_text': 'Mild left ventricular hypertrophy', 'attr': None}
+{'frame_id': '10', 'start': 2402, 'end': 2425, 'entity_text': 'acute coronary syndrome', 'attr': {'Date': 'July 20, 2024', 'Status': 'Active'}}
 ```
 We can save the frames to a document object for better management. The document holds ```text``` and ```frames```. The ```add_frame()``` method performs validation and (if passed) adds a frame to the document.
@@ -298,7 +309,7 @@ inference_engine = LiteLLMInferenceEngine(model="openai/Llama-3.1-8B-Instruct",
 inference_engine = LiteLLMInferenceEngine(model="ollama/llama3.1:8b-instruct-q8_0")
 ```
-#### <img src=doc_asset/readme_img/openai-logomark.png width=16 /> OpenAI API
+#### <img src=doc_asset/readme_img/openai-logomark_white.png width=16 /> OpenAI API
 In bash, save API key to the environmental variable ```OPENAI_API_KEY```.
 ```
 export OPENAI_API_KEY=<your_API_key>
@@ -322,7 +333,7 @@ from llm_ie.engines import HuggingFaceHubInferenceEngine
 inference_engine = HuggingFaceHubInferenceEngine(model="meta-llama/Meta-Llama-3-8B-Instruct")
 ```
-####  <img src="doc_asset/readme_img/ollama_icon_small.png" alt="Icon" width="18"/> Ollama
+####  <img src="doc_asset/readme_img/ollama_icon.png" alt="Icon" width="22"/> Ollama
 The ```model_name``` must match the names on the [Ollama library](https://ollama.com/library). Use the command line ```ollama ls``` to check your local model list. ```num_ctx``` determines the context length LLM will consider during text generation. Empirically, longer context length gives better performance, while consuming more memory and increases computation. ```keep_alive``` regulates the lifespan of LLM. It indicates a number of seconds to hold the LLM after the last API call. Default is 5 minutes (300 sec).
 ```python
@@ -1073,6 +1084,9 @@ relations = extractor.extract_relations(doc, concurrent=False, stream=True)
 </details>
 ### Visualization
+<div align="center"><img src="doc_asset/readme_img/visualization.PNG" width=95% ></div>
 The `LLMInformationExtractionDocument` class supports named entity, entity attributes, and relation visualization. The implementation is through our plug-in package [ie-viz](https://github.com/daviden1013/ie-viz). Check the example Jupyter Notebook [NER + RE for Drug, Strength, Frequency](demo/medication_relation_extraction.ipynb) for a working demo.
 ```cmd

{llm_ie-0.4.0.dist-info → llm_ie-0.4.2.dist-info}/RECORD RENAMED Viewed

@@ -14,10 +14,10 @@ llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt,sha256=m7iX4Qjsf
 llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt,sha256=T4NsO33s3KSJml-klzXAJiYox0kiuxGo-ou2a2Ig2SY,14225
 llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
 llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt,sha256=oKH_QeDgpw771ZdHk3L7DYz2Jvfm7OolUoTiJyMJI30,9541
-llm_ie/data_types.py,sha256=hPz3WOeAzfn2QKmb0CxHmRdQWZQ4G9zq8U-RJBVFdYk,14329
+llm_ie/data_types.py,sha256=_Kt4Er1SMj1jg8U8TCXFJH_64prur-IbFngHKmZgWr8,15717
 llm_ie/engines.py,sha256=lz2HODoqlndgezdT76diXKN_wgb7mjl6hX3JuCwsH-g,15191
-llm_ie/extractors.py,sha256=CpEuSqzlYd3u8Qwiu7Qdd26iII2pci1nNKxGz8sv1ZU,84506
+llm_ie/extractors.py,sha256=ueSt8jBKLnqOxu8FuqyYqEERugzd6FsI0r-pY8EboHw,85426
 llm_ie/prompt_editor.py,sha256=pw_FOsEeWxFJ1p5lYR93cTNMqKQ-YZHzgBmRbPm7aNE,9486
-llm_ie-0.4.0.dist-info/METADATA,sha256=o721Obb1copeoFz34bz_B7am2i1Vi1xMpK5QkWn4R6A,51119
-llm_ie-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-llm_ie-0.4.0.dist-info/RECORD,,
+llm_ie-0.4.2.dist-info/METADATA,sha256=DASy47RtSsT1d7s3nzncjUHLIyJEPV8rTrqr1jRTFnY,52527
+llm_ie-0.4.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+llm_ie-0.4.2.dist-info/RECORD,,

{llm_ie-0.4.0.dist-info → llm_ie-0.4.2.dist-info}/WHEEL RENAMED Viewed

File without changes

llm-ie 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

llm-ie 0.4.0py3-none-any.whl → 0.4.2py3-none-any.whl