llm-ie 0.4.7__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. llm_ie-1.0.0/PKG-INFO +18 -0
  2. llm_ie-1.0.0/README.md +1 -0
  3. {llm_ie-0.4.7 → llm_ie-1.0.0}/pyproject.toml +1 -1
  4. llm_ie-1.0.0/src/llm_ie/__init__.py +11 -0
  5. llm_ie-1.0.0/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt +4 -0
  6. llm_ie-1.0.0/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt +3 -0
  7. llm_ie-1.0.0/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +163 -0
  8. llm_ie-1.0.0/src/llm_ie/asset/prompt_guide/BasicReviewFrameExtractor_prompt_guide.txt +163 -0
  9. llm_ie-1.0.0/src/llm_ie/asset/prompt_guide/DirectFrameExtractor_prompt_guide.txt +163 -0
  10. llm_ie-1.0.0/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +163 -0
  11. llm_ie-1.0.0/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +162 -0
  12. llm_ie-1.0.0/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +162 -0
  13. llm_ie-1.0.0/src/llm_ie/chunkers.py +191 -0
  14. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/data_types.py +75 -1
  15. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/engines.py +274 -183
  16. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/extractors.py +961 -850
  17. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/prompt_editor.py +39 -6
  18. llm_ie-0.4.7/PKG-INFO +0 -1219
  19. llm_ie-0.4.7/README.md +0 -1202
  20. llm_ie-0.4.7/src/llm_ie/__init__.py +0 -9
  21. llm_ie-0.4.7/src/llm_ie/asset/prompt_guide/BasicFrameExtractor_prompt_guide.txt +0 -145
  22. llm_ie-0.4.7/src/llm_ie/asset/prompt_guide/ReviewFrameExtractor_prompt_guide.txt +0 -145
  23. llm_ie-0.4.7/src/llm_ie/asset/prompt_guide/SentenceCoTFrameExtractor_prompt_guide.txt +0 -217
  24. llm_ie-0.4.7/src/llm_ie/asset/prompt_guide/SentenceFrameExtractor_prompt_guide.txt +0 -145
  25. llm_ie-0.4.7/src/llm_ie/asset/prompt_guide/SentenceReviewFrameExtractor_prompt_guide.txt +0 -145
  26. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/PromptEditor_prompts/chat.txt +0 -0
  27. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/PromptEditor_prompts/comment.txt +0 -0
  28. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/PromptEditor_prompts/rewrite.txt +0 -0
  29. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/PromptEditor_prompts/system.txt +0 -0
  30. /llm_ie-0.4.7/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_addition_review_prompt.txt → /llm_ie-1.0.0/src/llm_ie/asset/default_prompts/BasicReviewFrameExtractor_addition_review_prompt.txt +0 -0
  31. /llm_ie-0.4.7/src/llm_ie/asset/default_prompts/ReviewFrameExtractor_revision_review_prompt.txt → /llm_ie-1.0.0/src/llm_ie/asset/default_prompts/BasicReviewFrameExtractor_revision_review_prompt.txt +0 -0
  32. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_addition_review_prompt.txt +0 -0
  33. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/default_prompts/SentenceReviewFrameExtractor_revision_review_prompt.txt +0 -0
  34. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/prompt_guide/BinaryRelationExtractor_prompt_guide.txt +0 -0
  35. {llm_ie-0.4.7 → llm_ie-1.0.0}/src/llm_ie/asset/prompt_guide/MultiClassRelationExtractor_prompt_guide.txt +0 -0
llm_ie-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.1
2
+ Name: llm-ie
3
+ Version: 1.0.0
4
+ Summary: An LLM-powered tool that transforms everyday language into robust information extraction pipelines.
5
+ License: MIT
6
+ Author: Enshuo (David) Hsu
7
+ Requires-Python: >=3.11,<4.0
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: colorama (>=0.4.6,<0.5.0)
13
+ Requires-Dist: json_repair (>=0.30,<0.31)
14
+ Requires-Dist: nest_asyncio (>=1.6.0,<2.0.0)
15
+ Requires-Dist: nltk (>=3.8,<4.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+ This is the readme for llm-ie Python package.
llm_ie-1.0.0/README.md ADDED
@@ -0,0 +1 @@
1
+ This is the readme for llm-ie Python package.
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "llm-ie"
3
- version = "0.4.7"
3
+ version = "1.0.0"
4
4
  description = "An LLM-powered tool that transforms everyday language into robust information extraction pipelines."
5
5
  authors = ["Enshuo (David) Hsu"]
6
6
  license = "MIT"
@@ -0,0 +1,11 @@
1
+ from .data_types import LLMInformationExtractionFrame, LLMInformationExtractionDocument
2
+ from .engines import LlamaCppInferenceEngine, OllamaInferenceEngine, HuggingFaceHubInferenceEngine, OpenAIInferenceEngine, AzureOpenAIInferenceEngine, LiteLLMInferenceEngine
3
+ from .extractors import DirectFrameExtractor, ReviewFrameExtractor, BasicFrameExtractor, BasicReviewFrameExtractor, SentenceFrameExtractor, SentenceReviewFrameExtractor, BinaryRelationExtractor, MultiClassRelationExtractor
4
+ from .chunkers import UnitChunker, WholeDocumentUnitChunker, SentenceUnitChunker, TextLineUnitChunker, ContextChunker, NoContextChunker, WholeDocumentContextChunker, SlideWindowContextChunker
5
+ from .prompt_editor import PromptEditor
6
+
7
+ __all__ = ["LLMInformationExtractionFrame", "LLMInformationExtractionDocument",
8
+ "LlamaCppInferenceEngine", "OllamaInferenceEngine", "HuggingFaceHubInferenceEngine", "OpenAIInferenceEngine", "AzureOpenAIInferenceEngine", "LiteLLMInferenceEngine",
9
+ "DirectFrameExtractor", "ReviewFrameExtractor", "BasicFrameExtractor", "BasicReviewFrameExtractor", "SentenceFrameExtractor", "SentenceReviewFrameExtractor", "BinaryRelationExtractor", "MultiClassRelationExtractor",
10
+ "UnitChunker", "WholeDocumentUnitChunker", "SentenceUnitChunker", "TextLineUnitChunker", "ContextChunker", "NoContextChunker", "WholeDocumentContextChunker", "SlideWindowContextChunker",
11
+ "PromptEditor"]
@@ -0,0 +1,4 @@
1
+ Review the input unit text and your output carefully. If anything was missed, add it to your output following the defined output formats.
2
+ You should ONLY adding new items. Do NOT re-generate the entire answer.
3
+ Your output should be based on the input unit text.
4
+ Your output should strictly adheres to the defined output formats.
@@ -0,0 +1,3 @@
1
+ Review the input unit text and your output carefully. If you find any omissions or errors, correct them by generating a revised output following the defined output formats.
2
+ Your output should be based on the input unit text.
3
+ Your output should strictly adheres to the defined output formats.
@@ -0,0 +1,163 @@
1
+ Prompt Template Design:
2
+
3
+ 1. Task Description:
4
+ Provide a detailed description of the task, including the background and the type of task (e.g., named entity recognition).
5
+
6
+ 2. Schema Definition:
7
+ List the key concepts that should be extracted, and provide clear definitions for each one. **Must define "entity_text"** which will be used to recognize the entity.
8
+
9
+ 3. Output Format Definition:
10
+ The output should be a JSON list, where each element is a dictionary representing a frame (an entity along with its attributes). **Each dictionary must include a key "entity_text" and a key "attr"**. The attributes are placed in the "attr" (e.g., {"entity_text": "<entity_text>", "attr": {"attr1": "<attr1>", "attr2": "<attr2>"}}).
11
+
12
+ 4. Optional: Hints:
13
+ Provide itemized hints for the information extractors to guide the extraction process. Remind the prompted agent to be truthful. Emphasize that the prompted agent is supposed to perform the task instead of writting code or instruct other agents to do it.
14
+
15
+ 5. Optional: Examples:
16
+ Include examples in the format:
17
+ Input: ...
18
+ Output: ...
19
+
20
+ 6. Context:
21
+ The template must include a placeholder in the format {{<placeholder_name>}} for the context. The placeholder name can be customized as needed.
22
+
23
+
24
+ Example 1 (single entity type with attributes):
25
+
26
+ ### Task description
27
+ The paragraph below is from the Food and Drug Administration (FDA) Clinical Pharmacology Section of Labeling for Human Prescription Drug and Biological Products, Adverse reactions section. Please carefully review it and extract the adverse reactions and percentages. Note that each adverse reaction is nested under a clinical trial and potentially an arm. Your output should take that into consideration.
28
+
29
+ ### Schema definition
30
+ Your output should contain:
31
+ "entity_text" which is the name of the clinical trial as spelled in the document,
32
+ If applicable, "Arm" which is the arm within the clinical trial,
33
+ "AdverseReaction" which is the name of the adverse reaction,
34
+ If applicable, "Percentage" which is the occurance of the adverse reaction within the trial and arm,
35
+ "Evidence" which is the EXACT sentence in the text where you found the AdverseReaction from
36
+
37
+ ### Output format definition
38
+ Your output should follow JSON format, for example:
39
+ [
40
+ {"entity_text": "<Clinical trial name or number>", "attr": {"Arm": "<name of arm>", "AdverseReaction": "<Adverse reaction text>", "Percentage": "<a percent>", "Evidence": "<exact sentence from the text>"}},
41
+ {"entity_text": "<Clinical trial name or number>", "attr": {"Arm": "<name of arm>", "AdverseReaction": "<Adverse reaction text>", "Percentage": "<a percent>", "Evidence": "<exact sentence from the text>"}}
42
+ ]
43
+
44
+ ### Additional hints
45
+ - Your output should be 100% based on the provided content. DO NOT output fake numbers.
46
+ - You are expected to generate the output instead of writting code or insturcting other agents to do so.
47
+ - If there is no specific arm, just omit the "Arm" key. If the percentage is not reported, just omit the "Percentage" key. The "Evidence" should always be provided.
48
+
49
+ ### Context
50
+ Below is full text from the Adverse reactions section:
51
+ "{{input}}"
52
+
53
+
54
+ Example 2 (multiple entity types):
55
+
56
+ ### Task description
57
+ This is a named entity recognition task. Given a medical note, annotate the Drug, Form, Strength, Frequency, Route, Dosage, Reason, ADE, and Duration.
58
+
59
+ ### Schema definition
60
+ Your output should contain:
61
+ "entity_text": the exact wording as mentioned in the note.
62
+ "entity_type": type of the entity. It should be one of the "Drug", "Form", "Strength", "Frequency", "Route", "Dosage", "Reason", "ADE", or "Duration".
63
+
64
+ ### Output format definition
65
+ Your output should follow JSON format,
66
+ if there are one of the entity mentions: Drug, Form, Strength, Frequency, Route, Dosage, Reason, ADE, or Duration:
67
+ [
68
+ {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "<entity type as listed above>"}},
69
+ {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "<entity type as listed above>"}}
70
+ ]
71
+ if there is no entity mentioned in the given note, just output an empty list:
72
+ []
73
+
74
+ I am only interested in the extracted contents in []. Do NOT explain your answer.
75
+
76
+ ### Examples
77
+ Below are some examples:
78
+
79
+ Input: Acetaminophen 650 mg PO BID 5.
80
+ Output: [
81
+ {"entity_text": "Acetaminophen", "attr": {"entity_type": "Drug"}},
82
+ {"entity_text": "650 mg", "attr": {"entity_type": "Strength"}},
83
+ {"entity_text": "attr": {"PO", "entity_type": "Route"}},
84
+ {"entity_text": "BID", "attr": {"entity_type": "Frequency"}}
85
+ ]
86
+
87
+ Input: Mesalamine DR 1200 mg PO BID 2.
88
+ Output: [
89
+ {"entity_text": "Mesalamine DR", "attr": {"entity_type": "Drug"}},
90
+ {"entity_text": "1200 mg", "attr": {"entity_type": "Strength"}},
91
+ {"entity_text": "BID", "attr": {"entity_type": "Frequency"}},
92
+ {"entity_text": "PO", "attr": {"entity_type": "Route"}}
93
+ ]
94
+
95
+
96
+ ### Context
97
+ Below is the full text from the medical note:
98
+ "{{input}}"
99
+
100
+
101
+ Example 3 (multiple entity types with corresponding attributes):
102
+
103
+     ### Task description
104
+     This is a named entity recognition task. Given a medical note, annotate the events (EVENT) and time expressions (TIMEX3):
105
+
106
+     ### Schema definition
107
+     Your output should contain: 
108
+         "entity_text": the exact wording as mentioned in the note.
109
+         "entity_type": type of the entity. It should be one of the "EVENT" or "TIMEX3".
110
+         if entity_type is "EVENT",
111
+             "type": the event type as one of the "TEST", "PROBLEM", "TREATMENT", "CLINICAL_DEPT", "EVIDENTIAL", or "OCCURRENCE".
112
+             "polarity": whether an EVENT is positive ("POS") or negative ("NAG"). For example, in “the patient reports headache, and denies chills”, the EVENT [headache] is positive in its polarity, and the EVENT [chills] is negative in its polarity.
113
+             "modality": whether an EVENT actually occurred or not. Must be one of the "FACTUAL", "CONDITIONAL", "POSSIBLE", or "PROPOSED".
114
+
115
+         if entity_type is "TIMEX3",
116
+             "type": the type as one of the "DATE", "TIME", "DURATION", or "FREQUENCY".
117
+             "val": the numeric value 1) DATE: [YYYY]-[MM]-[DD], 2) TIME: [hh]:[mm]:[ss], 3) DURATION: P[n][Y/M/W/D]. So, “for eleven days” will be 
118
+     represented as “P11D”, meaning a period of 11 days. 4)  R[n][duration], where n denotes the number of repeats. When the n is omitted, the expression denotes an unspecified amount of repeats. For example, “once a day for 3 days” is “R3P1D” (repeat the time interval of 1 day (P1D) for 3 times (R3)), twice every day is “RP12H” (repeat every 12 hours)
119
+             "mod": additional information regarding the temporal value of a time expression. Must be one of the:
120
+                 “NA”: the default value, no relevant modifier is present;  
121
+                 “MORE”, means “more than”, e.g. over 2 days (val = P2D, mod = MORE);  
122
+                 “LESS”, means “less than”, e.g. almost 2 months (val = P2M, mod=LESS); 
123
+                 “APPROX”, means “approximate”, e.g. nearly a week (val = P1W, mod=APPROX);  
124
+                 “START”, describes the beginning of a period of time, e.g.  Christmas morning, 2005 (val= 2005-12-25, mod= START).  
125
+                 “END”, describes the end of a period of time, e.g. late last year, (val = 2010, mod = END)
126
+                 “MIDDLE”, describes the middle of a period of time, e.g. mid-September 2001 (val = 2001-09, mod = MIDDLE) 
127
+
128
+     ### Output format definition
129
+     Your output should follow JSON format. Each element in the list is a dictionary with an "entity_text" key and an "attr" key. The "attr" dictionary contains all other attributes, including "entity_type".
130
+
131
+     For example:
132
+     If there are EVENT or TIMEX3 entity mentions:
133
+         [
134
+             {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "EVENT", "type": "<event type>", "polarity": "<event polarity>", "modality": "<event modality>"}},
135
+             {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "TIMEX3", "type": "<TIMEX3 type>", "val": "<time value>", "mod": "<additional information>"}}
136
+             ...
137
+         ]
138
+     If there is no entity mentioned in the given note, just output an empty list:
139
+         []
140
+
141
+     I am only interested in the extracted contents in []. Do NOT explain your answer.
142
+
143
+     ### Examples
144
+     Below are some examples:
145
+
146
+     Input: At 9/7/93 , 1:00 a.m. , intravenous fluids rate was decreased to 50 cc&apos;s per hour , total fluids given during the first 24 hours were 140 to 150 cc&apos;s per kilo per day .
147
+     Output: [
148
+             {"entity_text": "intravenous fluids", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}},
149
+             {"entity_text": "decreased", "attr": {"entity_type": "EVENT", "type": "OCCURRENCE", "polarity": "POS", "modality": "FACTUAL"}},
150
+             {"entity_text": "total fluids", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}}, 
151
+             {"entity_text": "9/7/93 , 1:00 a.m.", "attr": {"entity_type": "TIMEX3", "type": "TIME", "val": "1993-09-07T01:00", "mod": "NA"}},
152
+             {"entity_text": "24 hours", "attr": {"entity_type": "TIMEX3", "type": "DURATION", "val": "PT24H", "mod": "NA"}}
153
+         ]
154
+
155
+     Input: At that time it appeared well adhered to the underlying skin .
156
+     Output: [
157
+             {"entity_text": "it", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}},
158
+             {"entity_text": "well adhered", "attr": {"entity_type": "EVENT", "type": "OCCURRENCE", "polarity": "POS", "modality": "FACTUAL"}}
159
+         ]
160
+
161
+     ### Context
162
+     Below is the medical note:
163
+     "{{input}}"
@@ -0,0 +1,163 @@
1
+ Prompt Template Design:
2
+
3
+ 1. Task Description:
4
+ Provide a detailed description of the task, including the background and the type of task (e.g., named entity recognition).
5
+
6
+ 2. Schema Definition:
7
+ List the key concepts that should be extracted, and provide clear definitions for each one. **Must define "entity_text"** which will be used to recognize the entity.
8
+
9
+ 3. Output Format Definition:
10
+ The output should be a JSON list, where each element is a dictionary representing a frame (an entity along with its attributes). **Each dictionary must include a key "entity_text" and a key "attr"**. The attributes are placed in the "attr" (e.g., {"entity_text": "<entity_text>", "attr": {"attr1": "<attr1>", "attr2": "<attr2>"}}).
11
+
12
+ 4. Optional: Hints:
13
+ Provide itemized hints for the information extractors to guide the extraction process. Remind the prompted agent to be truthful. Emphasize that the prompted agent is supposed to perform the task instead of writting code or instruct other agents to do it.
14
+
15
+ 5. Optional: Examples:
16
+ Include examples in the format:
17
+ Input: ...
18
+ Output: ...
19
+
20
+ 6. Context:
21
+ The template must include a placeholder in the format {{<placeholder_name>}} for the context. The placeholder name can be customized as needed.
22
+
23
+
24
+ Example 1 (single entity type with attributes):
25
+
26
+ ### Task description
27
+ The paragraph below is from the Food and Drug Administration (FDA) Clinical Pharmacology Section of Labeling for Human Prescription Drug and Biological Products, Adverse reactions section. Please carefully review it and extract the adverse reactions and percentages. Note that each adverse reaction is nested under a clinical trial and potentially an arm. Your output should take that into consideration.
28
+
29
+ ### Schema definition
30
+ Your output should contain:
31
+ "entity_text" which is the name of the clinical trial as spelled in the document,
32
+ If applicable, "Arm" which is the arm within the clinical trial,
33
+ "AdverseReaction" which is the name of the adverse reaction,
34
+ If applicable, "Percentage" which is the occurance of the adverse reaction within the trial and arm,
35
+ "Evidence" which is the EXACT sentence in the text where you found the AdverseReaction from
36
+
37
+ ### Output format definition
38
+ Your output should follow JSON format, for example:
39
+ [
40
+ {"entity_text": "<Clinical trial name or number>", "attr": {"Arm": "<name of arm>", "AdverseReaction": "<Adverse reaction text>", "Percentage": "<a percent>", "Evidence": "<exact sentence from the text>"}},
41
+ {"entity_text": "<Clinical trial name or number>", "attr": {"Arm": "<name of arm>", "AdverseReaction": "<Adverse reaction text>", "Percentage": "<a percent>", "Evidence": "<exact sentence from the text>"}}
42
+ ]
43
+
44
+ ### Additional hints
45
+ - Your output should be 100% based on the provided content. DO NOT output fake numbers.
46
+ - You are expected to generate the output instead of writting code or insturcting other agents to do so.
47
+ - If there is no specific arm, just omit the "Arm" key. If the percentage is not reported, just omit the "Percentage" key. The "Evidence" should always be provided.
48
+
49
+ ### Context
50
+ Below is full text from the Adverse reactions section:
51
+ "{{input}}"
52
+
53
+
54
+ Example 2 (multiple entity types):
55
+
56
+ ### Task description
57
+ This is a named entity recognition task. Given a medical note, annotate the Drug, Form, Strength, Frequency, Route, Dosage, Reason, ADE, and Duration.
58
+
59
+ ### Schema definition
60
+ Your output should contain:
61
+ "entity_text": the exact wording as mentioned in the note.
62
+ "entity_type": type of the entity. It should be one of the "Drug", "Form", "Strength", "Frequency", "Route", "Dosage", "Reason", "ADE", or "Duration".
63
+
64
+ ### Output format definition
65
+ Your output should follow JSON format,
66
+ if there are one of the entity mentions: Drug, Form, Strength, Frequency, Route, Dosage, Reason, ADE, or Duration:
67
+ [
68
+ {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "<entity type as listed above>"}},
69
+ {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "<entity type as listed above>"}}
70
+ ]
71
+ if there is no entity mentioned in the given note, just output an empty list:
72
+ []
73
+
74
+ I am only interested in the extracted contents in []. Do NOT explain your answer.
75
+
76
+ ### Examples
77
+ Below are some examples:
78
+
79
+ Input: Acetaminophen 650 mg PO BID 5.
80
+ Output: [
81
+ {"entity_text": "Acetaminophen", "attr": {"entity_type": "Drug"}},
82
+ {"entity_text": "650 mg", "attr": {"entity_type": "Strength"}},
83
+ {"entity_text": "attr": {"PO", "entity_type": "Route"}},
84
+ {"entity_text": "BID", "attr": {"entity_type": "Frequency"}}
85
+ ]
86
+
87
+ Input: Mesalamine DR 1200 mg PO BID 2.
88
+ Output: [
89
+ {"entity_text": "Mesalamine DR", "attr": {"entity_type": "Drug"}},
90
+ {"entity_text": "1200 mg", "attr": {"entity_type": "Strength"}},
91
+ {"entity_text": "BID", "attr": {"entity_type": "Frequency"}},
92
+ {"entity_text": "PO", "attr": {"entity_type": "Route"}}
93
+ ]
94
+
95
+
96
+ ### Context
97
+ Below is the full text from the medical note:
98
+ "{{input}}"
99
+
100
+
101
+ Example 3 (multiple entity types with corresponding attributes):
102
+
103
+     ### Task description
104
+     This is a named entity recognition task. Given a medical note, annotate the events (EVENT) and time expressions (TIMEX3):
105
+
106
+     ### Schema definition
107
+     Your output should contain: 
108
+         "entity_text": the exact wording as mentioned in the note.
109
+         "entity_type": type of the entity. It should be one of the "EVENT" or "TIMEX3".
110
+         if entity_type is "EVENT",
111
+             "type": the event type as one of the "TEST", "PROBLEM", "TREATMENT", "CLINICAL_DEPT", "EVIDENTIAL", or "OCCURRENCE".
112
+             "polarity": whether an EVENT is positive ("POS") or negative ("NAG"). For example, in “the patient reports headache, and denies chills”, the EVENT [headache] is positive in its polarity, and the EVENT [chills] is negative in its polarity.
113
+             "modality": whether an EVENT actually occurred or not. Must be one of the "FACTUAL", "CONDITIONAL", "POSSIBLE", or "PROPOSED".
114
+
115
+         if entity_type is "TIMEX3",
116
+             "type": the type as one of the "DATE", "TIME", "DURATION", or "FREQUENCY".
117
+             "val": the numeric value 1) DATE: [YYYY]-[MM]-[DD], 2) TIME: [hh]:[mm]:[ss], 3) DURATION: P[n][Y/M/W/D]. So, “for eleven days” will be 
118
+     represented as “P11D”, meaning a period of 11 days. 4)  R[n][duration], where n denotes the number of repeats. When the n is omitted, the expression denotes an unspecified amount of repeats. For example, “once a day for 3 days” is “R3P1D” (repeat the time interval of 1 day (P1D) for 3 times (R3)), twice every day is “RP12H” (repeat every 12 hours)
119
+             "mod": additional information regarding the temporal value of a time expression. Must be one of the:
120
+                 “NA”: the default value, no relevant modifier is present;  
121
+                 “MORE”, means “more than”, e.g. over 2 days (val = P2D, mod = MORE);  
122
+                 “LESS”, means “less than”, e.g. almost 2 months (val = P2M, mod=LESS); 
123
+                 “APPROX”, means “approximate”, e.g. nearly a week (val = P1W, mod=APPROX);  
124
+                 “START”, describes the beginning of a period of time, e.g.  Christmas morning, 2005 (val= 2005-12-25, mod= START).  
125
+                 “END”, describes the end of a period of time, e.g. late last year, (val = 2010, mod = END)
126
+                 “MIDDLE”, describes the middle of a period of time, e.g. mid-September 2001 (val = 2001-09, mod = MIDDLE) 
127
+
128
+     ### Output format definition
129
+     Your output should follow JSON format. Each element in the list is a dictionary with an "entity_text" key and an "attr" key. The "attr" dictionary contains all other attributes, including "entity_type".
130
+
131
+     For example:
132
+     If there are EVENT or TIMEX3 entity mentions:
133
+         [
134
+             {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "EVENT", "type": "<event type>", "polarity": "<event polarity>", "modality": "<event modality>"}},
135
+             {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "TIMEX3", "type": "<TIMEX3 type>", "val": "<time value>", "mod": "<additional information>"}}
136
+             ...
137
+         ]
138
+     If there is no entity mentioned in the given note, just output an empty list:
139
+         []
140
+
141
+     I am only interested in the extracted contents in []. Do NOT explain your answer.
142
+
143
+     ### Examples
144
+     Below are some examples:
145
+
146
+     Input: At 9/7/93 , 1:00 a.m. , intravenous fluids rate was decreased to 50 cc&apos;s per hour , total fluids given during the first 24 hours were 140 to 150 cc&apos;s per kilo per day .
147
+     Output: [
148
+             {"entity_text": "intravenous fluids", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}},
149
+             {"entity_text": "decreased", "attr": {"entity_type": "EVENT", "type": "OCCURRENCE", "polarity": "POS", "modality": "FACTUAL"}},
150
+             {"entity_text": "total fluids", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}}, 
151
+             {"entity_text": "9/7/93 , 1:00 a.m.", "attr": {"entity_type": "TIMEX3", "type": "TIME", "val": "1993-09-07T01:00", "mod": "NA"}},
152
+             {"entity_text": "24 hours", "attr": {"entity_type": "TIMEX3", "type": "DURATION", "val": "PT24H", "mod": "NA"}}
153
+         ]
154
+
155
+     Input: At that time it appeared well adhered to the underlying skin .
156
+     Output: [
157
+             {"entity_text": "it", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}},
158
+             {"entity_text": "well adhered", "attr": {"entity_type": "EVENT", "type": "OCCURRENCE", "polarity": "POS", "modality": "FACTUAL"}}
159
+         ]
160
+
161
+     ### Context
162
+     Below is the medical note:
163
+     "{{input}}"
@@ -0,0 +1,163 @@
1
+ Prompt Template Design:
2
+
3
+ 1. Task Description:
4
+ Provide a detailed description of the task, including the background and the type of task (e.g., named entity recognition).
5
+
6
+ 2. Schema Definition:
7
+ List the key concepts that should be extracted, and provide clear definitions for each one. **Must define "entity_text"** which will be used to recognize the entity.
8
+
9
+ 3. Output Format Definition:
10
+ The output should be a JSON list, where each element is a dictionary representing a frame (an entity along with its attributes). **Each dictionary must include a key "entity_text" and a key "attr"**. The attributes are placed in the "attr" (e.g., {"entity_text": "<entity_text>", "attr": {"attr1": "<attr1>", "attr2": "<attr2>"}}).
11
+
12
+ 4. Optional: Hints:
13
+ Provide itemized hints for the information extractors to guide the extraction process. Remind the prompted agent to be truthful. Emphasize that the prompted agent is supposed to perform the task instead of writting code or instruct other agents to do it.
14
+
15
+ 5. Optional: Examples:
16
+ Include examples in the format:
17
+ Input: ...
18
+ Output: ...
19
+
20
+ 6. Context:
21
+ The template must include a placeholder in the format {{<placeholder_name>}} for the context. The placeholder name can be customized as needed.
22
+
23
+
24
+ Example 1 (single entity type with attributes):
25
+
26
+ ### Task description
27
+ The paragraph below is from the Food and Drug Administration (FDA) Clinical Pharmacology Section of Labeling for Human Prescription Drug and Biological Products, Adverse reactions section. Please carefully review it and extract the adverse reactions and percentages. Note that each adverse reaction is nested under a clinical trial and potentially an arm. Your output should take that into consideration.
28
+
29
+ ### Schema definition
30
+ Your output should contain:
31
+ "entity_text" which is the name of the clinical trial as spelled in the document,
32
+ If applicable, "Arm" which is the arm within the clinical trial,
33
+ "AdverseReaction" which is the name of the adverse reaction,
34
+ If applicable, "Percentage" which is the occurance of the adverse reaction within the trial and arm,
35
+ "Evidence" which is the EXACT sentence in the text where you found the AdverseReaction from
36
+
37
+ ### Output format definition
38
+ Your output should follow JSON format, for example:
39
+ [
40
+ {"entity_text": "<Clinical trial name or number>", "attr": {"Arm": "<name of arm>", "AdverseReaction": "<Adverse reaction text>", "Percentage": "<a percent>", "Evidence": "<exact sentence from the text>"}},
41
+ {"entity_text": "<Clinical trial name or number>", "attr": {"Arm": "<name of arm>", "AdverseReaction": "<Adverse reaction text>", "Percentage": "<a percent>", "Evidence": "<exact sentence from the text>"}}
42
+ ]
43
+
44
+ ### Additional hints
45
+ - Your output should be 100% based on the provided content. DO NOT output fake numbers.
46
+ - You are expected to generate the output instead of writting code or insturcting other agents to do so.
47
+ - If there is no specific arm, just omit the "Arm" key. If the percentage is not reported, just omit the "Percentage" key. The "Evidence" should always be provided.
48
+
49
+ ### Context
50
+ The text below is from the Adverse reactions section:
51
+ "{{input}}"
52
+
53
+
54
+ Example 2 (multiple entity types):
55
+
56
+ ### Task description
57
+ This is a named entity recognition task. Given a text chunk from a medical note, annotate the Drug, Form, Strength, Frequency, Route, Dosage, Reason, ADE, and Duration.
58
+
59
+ ### Schema definition
60
+ Your output should contain:
61
+ "entity_text": the exact wording as mentioned in the note.
62
+ "entity_type": type of the entity. It should be one of the "Drug", "Form", "Strength", "Frequency", "Route", "Dosage", "Reason", "ADE", or "Duration".
63
+
64
+ ### Output format definition
65
+ Your output should follow JSON format,
66
+ if there are one of the entity mentions: Drug, Form, Strength, Frequency, Route, Dosage, Reason, ADE, or Duration:
67
+ [
68
+ {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "<entity type as listed above>"}},
69
+ {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "<entity type as listed above>"}}
70
+ ]
71
+ if there is no entity mentioned in the given note, just output an empty list:
72
+ []
73
+
74
+ I am only interested in the extracted contents in []. Do NOT explain your answer.
75
+
76
+ ### Examples
77
+ Below are some examples:
78
+
79
+ Input: Acetaminophen 650 mg PO BID 5.
80
+ Output: [
81
+ {"entity_text": "Acetaminophen", "attr": {"entity_type": "Drug"}},
82
+ {"entity_text": "650 mg", "attr": {"entity_type": "Strength"}},
83
+ {"entity_text": "attr": {"PO", "entity_type": "Route"}},
84
+ {"entity_text": "BID", "attr": {"entity_type": "Frequency"}}
85
+ ]
86
+
87
+ Input: Mesalamine DR 1200 mg PO BID 2.
88
+ Output: [
89
+ {"entity_text": "Mesalamine DR", "attr": {"entity_type": "Drug"}},
90
+ {"entity_text": "1200 mg", "attr": {"entity_type": "Strength"}},
91
+ {"entity_text": "BID", "attr": {"entity_type": "Frequency"}},
92
+ {"entity_text": "PO", "attr": {"entity_type": "Route"}}
93
+ ]
94
+
95
+
96
+ ### Context
97
+ The text below is from the medical note:
98
+ "{{input}}"
99
+
100
+
101
+ Example 3 (multiple entity types with corresponding attributes):
102
+
103
+     ### Task description
104
+     This is a named entity recognition task. Given a unit text from a medical note, annotate the events (EVENT) and time expressions (TIMEX3):
105
+
106
+     ### Schema definition
107
+     Your output should contain: 
108
+         "entity_text": the exact wording as mentioned in the note.
109
+         "entity_type": type of the entity. It should be one of the "EVENT" or "TIMEX3".
110
+         if entity_type is "EVENT",
111
+             "type": the event type as one of the "TEST", "PROBLEM", "TREATMENT", "CLINICAL_DEPT", "EVIDENTIAL", or "OCCURRENCE".
112
+             "polarity": whether an EVENT is positive ("POS") or negative ("NAG"). For example, in “the patient reports headache, and denies chills”, the EVENT [headache] is positive in its polarity, and the EVENT [chills] is negative in its polarity.
113
+             "modality": whether an EVENT actually occurred or not. Must be one of the "FACTUAL", "CONDITIONAL", "POSSIBLE", or "PROPOSED".
114
+
115
+         if entity_type is "TIMEX3",
116
+             "type": the type as one of the "DATE", "TIME", "DURATION", or "FREQUENCY".
117
+             "val": the numeric value 1) DATE: [YYYY]-[MM]-[DD], 2) TIME: [hh]:[mm]:[ss], 3) DURATION: P[n][Y/M/W/D]. So, “for eleven days” will be 
118
+     represented as “P11D”, meaning a period of 11 days. 4)  R[n][duration], where n denotes the number of repeats. When the n is omitted, the expression denotes an unspecified amount of repeats. For example, “once a day for 3 days” is “R3P1D” (repeat the time interval of 1 day (P1D) for 3 times (R3)), twice every day is “RP12H” (repeat every 12 hours)
119
+             "mod": additional information regarding the temporal value of a time expression. Must be one of the:
120
+                 “NA”: the default value, no relevant modifier is present;  
121
+                 “MORE”, means “more than”, e.g. over 2 days (val = P2D, mod = MORE);  
122
+                 “LESS”, means “less than”, e.g. almost 2 months (val = P2M, mod=LESS); 
123
+                 “APPROX”, means “approximate”, e.g. nearly a week (val = P1W, mod=APPROX);  
124
+                 “START”, describes the beginning of a period of time, e.g.  Christmas morning, 2005 (val= 2005-12-25, mod= START).  
125
+                 “END”, describes the end of a period of time, e.g. late last year, (val = 2010, mod = END)
126
+                 “MIDDLE”, describes the middle of a period of time, e.g. mid-September 2001 (val = 2001-09, mod = MIDDLE) 
127
+
128
+     ### Output format definition
129
+     Your output should follow JSON format. Each element in the list is a dictionary with an "entity_text" key and an "attr" key. The "attr" dictionary contains all other attributes, including "entity_type".
130
+
131
+     For example:
132
+     If there are EVENT or TIMEX3 entity mentions:
133
+         [
134
+             {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "EVENT", "type": "<event type>", "polarity": "<event polarity>", "modality": "<event modality>"}},
135
+             {"entity_text": "<Exact entity mentions as in the note>", "attr": {"entity_type": "TIMEX3", "type": "<TIMEX3 type>", "val": "<time value>", "mod": "<additional information>"}}
136
+             ...
137
+         ]
138
+     If there is no entity mentioned in the given note, just output an empty list:
139
+         []
140
+
141
+     I am only interested in the extracted contents in []. Do NOT explain your answer.
142
+
143
+     ### Examples
144
+     Below are some examples:
145
+
146
+     Input: At 9/7/93 , 1:00 a.m. , intravenous fluids rate was decreased to 50 cc&apos;s per hour , total fluids given during the first 24 hours were 140 to 150 cc&apos;s per kilo per day .
147
+     Output: [
148
+             {"entity_text": "intravenous fluids", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}},
149
+             {"entity_text": "decreased", "attr": {"entity_type": "EVENT", "type": "OCCURRENCE", "polarity": "POS", "modality": "FACTUAL"}},
150
+             {"entity_text": "total fluids", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}}, 
151
+             {"entity_text": "9/7/93 , 1:00 a.m.", "attr": {"entity_type": "TIMEX3", "type": "TIME", "val": "1993-09-07T01:00", "mod": "NA"}},
152
+             {"entity_text": "24 hours", "attr": {"entity_type": "TIMEX3", "type": "DURATION", "val": "PT24H", "mod": "NA"}}
153
+         ]
154
+
155
+     Input: At that time it appeared well adhered to the underlying skin .
156
+     Output: [
157
+             {"entity_text": "it", "attr": {"entity_type": "EVENT", "type": "TREATMENT", "polarity": "POS", "modality": "FACTUAL"}},
158
+             {"entity_text": "well adhered", "attr": {"entity_type": "EVENT", "type": "OCCURRENCE", "polarity": "POS", "modality": "FACTUAL"}}
159
+         ]
160
+
161
+     ### Context
162
+     The text below is from the medical note:
163
+     "{{input}}"