llm-ie 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_ie/__init__.py +2 -2
- llm_ie/asset/prompt_guide/AttributeExtractor_prompt_guide.txt +52 -0
- llm_ie/engines.py +8 -6
- llm_ie/extractors.py +409 -460
- llm_ie/prompt_editor.py +88 -34
- {llm_ie-1.1.0.dist-info → llm_ie-1.2.1.dist-info}/METADATA +1 -1
- {llm_ie-1.1.0.dist-info → llm_ie-1.2.1.dist-info}/RECORD +8 -7
- {llm_ie-1.1.0.dist-info → llm_ie-1.2.1.dist-info}/WHEEL +0 -0
llm_ie/__init__.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
|
|
2
2
|
from .engines import BasicLLMConfig, Qwen3LLMConfig, OpenAIReasoningLLMConfig, LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, AzureOpenAIInferenceEngine, LiteLLMInferenceEngine
|
|
3
|
-
from .extractors import DirectFrameExtractor, ReviewFrameExtractor, BasicFrameExtractor, BasicReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
|
|
3
|
+
from .extractors import DirectFrameExtractor, ReviewFrameExtractor, BasicFrameExtractor, BasicReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, AttributeExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
|
|
4
4
|
from .chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker, TextLineUnitChunker, ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
|
|
5
5
|
from .prompt_editor import PromptEditor
|
|
6
6
|
|
|
7
7
|
__all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
|
|
8
8
|
"BasicLLMConfig", "Qwen3LLMConfig", "OpenAIReasoningLLMConfig", "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "AzureOpenAIInferenceEngine", "LiteLLMInferenceEngine",
|
|
9
|
-
"DirectFrameExtractor", "ReviewFrameExtractor", "BasicFrameExtractor", "BasicReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
|
|
9
|
+
"DirectFrameExtractor", "ReviewFrameExtractor", "BasicFrameExtractor", "BasicReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "AttributeExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
|
|
10
10
|
"UnitChunker", "WholeDocumentUnitChunker", "SentenceUnitChunker", "TextLineUnitChunker", "ContextChunker", "NoContextChunker", "WholeDocumentContextChunker", "SlideWindowContextChunker",
|
|
11
11
|
"PromptEditor"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
Prompt Template Design:
|
|
2
|
+
|
|
3
|
+
1. Task Description:
|
|
4
|
+
Provide a detailed description of the task, including the background and the type of task (e.g., attribute extraction task).
|
|
5
|
+
|
|
6
|
+
2. Schema Definition:
|
|
7
|
+
List the attributes to extract, and provide clear definitions for each one.
|
|
8
|
+
|
|
9
|
+
3. Output Format Definition:
|
|
10
|
+
The output should be a JSON list, where each attribute be a key. The values could be any structure (e.g., str, int, List[str]).
|
|
11
|
+
|
|
12
|
+
4. Optional: Hints:
|
|
13
|
+
Provide itemized hints for the information extractors to guide the extraction process. Remind the prompted agent to be truthful. Emphasize that the prompted agent is supposed to perform the task instead of writting code or instruct other agents to do it.
|
|
14
|
+
|
|
15
|
+
5. Optional: Examples:
|
|
16
|
+
Include examples in the format:
|
|
17
|
+
Input: ...
|
|
18
|
+
Output: ...
|
|
19
|
+
|
|
20
|
+
6. Entity:
|
|
21
|
+
The template must include a placeholder {{frame}} for the entity.
|
|
22
|
+
|
|
23
|
+
7. Context:
|
|
24
|
+
The template must include a placeholder {{context}} for the context. Explain to the prompted agent that <Entity> tags are used to mark the entity in the context.
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
|
|
29
|
+
### Task description
|
|
30
|
+
This is an attribute extraction task. Given a diagnosis entity and the context, you need to generate attributes for the entity.
|
|
31
|
+
|
|
32
|
+
### Schema definition
|
|
33
|
+
"Date" which is the date when the diagnosis was made in MM/DD/YYYY format,
|
|
34
|
+
"Status" which is the current status of the diagnosis (e.g. active, resolved, etc.)
|
|
35
|
+
|
|
36
|
+
### Output format definition
|
|
37
|
+
Your output should follow the JSON format:
|
|
38
|
+
{"Date": "<MM/DD/YYYY>", "Status": "<status>"}
|
|
39
|
+
|
|
40
|
+
I am only interested in the content between []. Do not explain your answer.
|
|
41
|
+
|
|
42
|
+
### Hints
|
|
43
|
+
- If the date is not complete, use the first available date in the context. For example, if the date is 01/2023, you should return 01/01/2023.
|
|
44
|
+
- If the status is not available, you should return "not specified".
|
|
45
|
+
|
|
46
|
+
### Entity
|
|
47
|
+
Information about the entity to extract attributes from:
|
|
48
|
+
{{frame}}
|
|
49
|
+
|
|
50
|
+
### Context
|
|
51
|
+
Context for the entity. The <Entity> tags are used to mark the entity in the context.
|
|
52
|
+
{{context}}
|
llm_ie/engines.py
CHANGED
|
@@ -185,10 +185,11 @@ class Qwen3LLMConfig(LLMConfig):
|
|
|
185
185
|
|
|
186
186
|
|
|
187
187
|
class OpenAIReasoningLLMConfig(LLMConfig):
|
|
188
|
-
def __init__(self, reasoning_effort:str=
|
|
188
|
+
def __init__(self, reasoning_effort:str=None, **kwargs):
|
|
189
189
|
"""
|
|
190
190
|
The OpenAI "o" series configuration.
|
|
191
|
-
1. The reasoning effort
|
|
191
|
+
1. The reasoning effort as one of {"low", "medium", "high"}.
|
|
192
|
+
For models that do not support setting reasoning effort (e.g., o1-mini, o1-preview), set to None.
|
|
192
193
|
2. The temperature parameter is not supported and will be ignored.
|
|
193
194
|
3. The system prompt is not supported and will be concatenated to the next user prompt.
|
|
194
195
|
|
|
@@ -198,11 +199,12 @@ class OpenAIReasoningLLMConfig(LLMConfig):
|
|
|
198
199
|
the reasoning effort. Must be one of {"low", "medium", "high"}. Default is "low".
|
|
199
200
|
"""
|
|
200
201
|
super().__init__(**kwargs)
|
|
201
|
-
if reasoning_effort not
|
|
202
|
-
|
|
202
|
+
if reasoning_effort is not None:
|
|
203
|
+
if reasoning_effort not in ["low", "medium", "high"]:
|
|
204
|
+
raise ValueError("reasoning_effort must be one of {'low', 'medium', 'high'}.")
|
|
203
205
|
|
|
204
|
-
|
|
205
|
-
|
|
206
|
+
self.reasoning_effort = reasoning_effort
|
|
207
|
+
self.params["reasoning_effort"] = self.reasoning_effort
|
|
206
208
|
|
|
207
209
|
if "temperature" in self.params:
|
|
208
210
|
warnings.warn("Reasoning models do not support temperature parameter. Will be ignored.", UserWarning)
|