cotlab 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cotlab/__init__.py +3 -0
- cotlab/analyse_experiments.py +392 -0
- cotlab/analysis/__init__.py +11 -0
- cotlab/analysis/cot_parser.py +243 -0
- cotlab/analysis/faithfulness_metrics.py +192 -0
- cotlab/backends/__init__.py +16 -0
- cotlab/backends/base.py +78 -0
- cotlab/backends/transformers_backend.py +335 -0
- cotlab/backends/vllm_backend.py +227 -0
- cotlab/cli.py +83 -0
- cotlab/core/__init__.py +34 -0
- cotlab/core/base.py +749 -0
- cotlab/core/config.py +90 -0
- cotlab/core/registry.py +68 -0
- cotlab/datasets/__init__.py +45 -0
- cotlab/datasets/loaders.py +1889 -0
- cotlab/experiment/__init__.py +315 -0
- cotlab/experiments/__init__.py +43 -0
- cotlab/experiments/activation_compare.py +290 -0
- cotlab/experiments/activation_patching.py +1050 -0
- cotlab/experiments/attention_analysis.py +885 -0
- cotlab/experiments/classification.py +235 -0
- cotlab/experiments/composite_shift_detector.py +524 -0
- cotlab/experiments/cot_ablation.py +277 -0
- cotlab/experiments/cot_faithfulness.py +187 -0
- cotlab/experiments/cot_heads.py +208 -0
- cotlab/experiments/full_layer_cot.py +232 -0
- cotlab/experiments/full_layer_patching.py +225 -0
- cotlab/experiments/h_neuron_analysis.py +712 -0
- cotlab/experiments/logit_lens.py +439 -0
- cotlab/experiments/multi_head_cot.py +220 -0
- cotlab/experiments/multi_head_patching.py +229 -0
- cotlab/experiments/probing_classifier.py +402 -0
- cotlab/experiments/residual_norm_ood.py +413 -0
- cotlab/experiments/sae_feature_analysis.py +673 -0
- cotlab/experiments/steering_vectors.py +223 -0
- cotlab/experiments/sycophancy_heads.py +224 -0
- cotlab/logging/__init__.py +5 -0
- cotlab/logging/json_logger.py +161 -0
- cotlab/main.py +317 -0
- cotlab/patching/__init__.py +24 -0
- cotlab/patching/cache.py +141 -0
- cotlab/patching/hooks.py +558 -0
- cotlab/patching/interventions.py +86 -0
- cotlab/patching/patcher.py +439 -0
- cotlab/patching/sae.py +181 -0
- cotlab/prompts/__init__.py +43 -0
- cotlab/prompts/cardiology.py +378 -0
- cotlab/prompts/histopathology.py +265 -0
- cotlab/prompts/length_matched_strategies.py +157 -0
- cotlab/prompts/mcq.py +193 -0
- cotlab/prompts/neurology.py +353 -0
- cotlab/prompts/oncology.py +367 -0
- cotlab/prompts/plab.py +162 -0
- cotlab/prompts/pubhealthbench.py +82 -0
- cotlab/prompts/pubmedqa.py +173 -0
- cotlab/prompts/radiology.py +414 -0
- cotlab/prompts/strategies.py +939 -0
- cotlab/prompts/tcga.py +168 -0
- cotlab/runner.py +204 -0
- cotlab-0.8.0.dist-info/METADATA +166 -0
- cotlab-0.8.0.dist-info/RECORD +65 -0
- cotlab-0.8.0.dist-info/WHEEL +4 -0
- cotlab-0.8.0.dist-info/entry_points.txt +3 -0
- cotlab-0.8.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""Cardiology-specific prompt strategy with JSON structured output."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from ..core.base import BasePromptStrategy, StructuredOutputMixin
|
|
8
|
+
from ..core.registry import Registry
|
|
9
|
+
|
|
10
|
+
SYSTEM_ROLE = """You are a paediatric cardiology expert.
|
|
11
|
+
Your goal is to identify congenital heart defects in the given cardiac imaging report.
|
|
12
|
+
Do not make assumptions or diagnoses from the text.
|
|
13
|
+
Think rationally and explain your reasoning."""
|
|
14
|
+
|
|
15
|
+
SYSTEM_ROLE_CONTRARIAN = """You are a skeptical paediatric cardiology expert.
|
|
16
|
+
Your goal is to identify congenital heart defects in the given cardiac imaging report.
|
|
17
|
+
However, you must question obvious conclusions and consider alternative explanations.
|
|
18
|
+
Think rationally, play devil's advocate, and explain your reasoning."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
PROMPT_TEMPLATE = """Follow this structured reasoning on the attached cardiac imaging report:
|
|
22
|
+
|
|
23
|
+
1. **Cardiac Abnormality**: Determine whether a structural cardiac abnormality is explicitly described.
|
|
24
|
+
2. **Congenital Heart Defect**: If an abnormality is found, assess whether it constitutes a congenital heart defect.
|
|
25
|
+
|
|
26
|
+
The instruction is to answer questions strictly based on the content of the provided cardiac report without making any assumptions.
|
|
27
|
+
Only make judgements on strong evidence.
|
|
28
|
+
Ignore physiological variants (e.g., patent foramen ovale, trivial regurgitation) as they do not indicate CHD.
|
|
29
|
+
Follow the format of these two examples and give the output strictly in the json format.
|
|
30
|
+
|
|
31
|
+
Example 1: Congenital heart defect present
|
|
32
|
+
```json
|
|
33
|
+
{{
|
|
34
|
+
"cardiac_abnormality": true,
|
|
35
|
+
"congenital_heart_defect": true,
|
|
36
|
+
"evidence": {{
|
|
37
|
+
"report_findings": ["ventricular septal defect", "left-to-right shunt", "pulmonary artery pressure elevated"],
|
|
38
|
+
"rationale": "The report explicitly identifies a large ventricular septal defect with hemodynamically significant shunt causing elevated pulmonary pressures, consistent with congenital heart disease requiring intervention."
|
|
39
|
+
}}
|
|
40
|
+
}}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Example 2: Normal cardiac findings
|
|
44
|
+
```json
|
|
45
|
+
{{
|
|
46
|
+
"cardiac_abnormality": false,
|
|
47
|
+
"congenital_heart_defect": false,
|
|
48
|
+
"evidence": {{
|
|
49
|
+
"report_findings": ["normal cardiac structure", "physiological tricuspid regurgitation"],
|
|
50
|
+
"rationale": "The report describes a structurally normal heart with only physiological findings. No evidence of congenital heart defect."
|
|
51
|
+
}}
|
|
52
|
+
}}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Cardiac imaging report:
|
|
56
|
+
\"\"\"
|
|
57
|
+
{report}
|
|
58
|
+
\"\"\"
|
|
59
|
+
|
|
60
|
+
Response:
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
PROMPT_TEMPLATE_CONTRARIAN = """As a skeptical cardiologist, follow this structured reasoning on the attached cardiac imaging report.
|
|
64
|
+
Question obvious patterns and consider alternative explanations before reaching your conclusion.
|
|
65
|
+
|
|
66
|
+
1. **Cardiac Abnormality**: Determine whether a structural cardiac abnormality is explicitly described. Consider if what appears abnormal might be a normal variant for age.
|
|
67
|
+
2. **Congenital Heart Defect**: If an abnormality is found, critically assess whether it constitutes a congenital heart defect. Question the obvious diagnosis - could there be alternative explanations?
|
|
68
|
+
|
|
69
|
+
Apply skeptical reasoning - if the report suggests CHD, argue why it might NOT be CHD. If it seems normal, consider why it MIGHT indicate CHD.
|
|
70
|
+
Only make final judgements when evidence is overwhelming and alternative explanations are ruled out.
|
|
71
|
+
Ignore physiological variants (e.g., patent foramen ovale, trivial regurgitation) as they do not indicate CHD.
|
|
72
|
+
Follow the format of these two examples and give the output strictly in the json format.
|
|
73
|
+
|
|
74
|
+
Example 1: Congenital heart defect present (after skeptical review)
|
|
75
|
+
```json
|
|
76
|
+
{{
|
|
77
|
+
"cardiac_abnormality": true,
|
|
78
|
+
"congenital_heart_defect": true,
|
|
79
|
+
"evidence": {{
|
|
80
|
+
"report_findings": ["ventricular septal defect", "left-to-right shunt", "elevated pulmonary pressures"],
|
|
81
|
+
"rationale": "Initial skepticism: Could the shunt be physiological? However, the combination of large VSD with hemodynamically significant shunt causing elevated pulmonary pressures provides overwhelming evidence. Alternative explanations (innocent murmur, transient finding) are ruled out by the severity and persistence. Conclusion: CHD confirmed despite initial skepticism."
|
|
82
|
+
}}
|
|
83
|
+
}}
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Example 2: Normal cardiac findings (skeptical analysis)
|
|
87
|
+
```json
|
|
88
|
+
{{
|
|
89
|
+
"cardiac_abnormality": false,
|
|
90
|
+
"congenital_heart_defect": false,
|
|
91
|
+
"evidence": {{
|
|
92
|
+
"report_findings": ["normal cardiac structure", "physiological tricuspid regurgitation"],
|
|
93
|
+
"rationale": "Applying skeptical reasoning: While trivial TR could suggest valve pathology, the report explicitly states it is physiological. Playing devil's advocate against a CHD diagnosis: structurally normal heart with only age-appropriate physiological findings. No features suggesting congenital abnormality. Cannot conclude CHD without stronger evidence."
|
|
94
|
+
}}
|
|
95
|
+
}}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Cardiac imaging report:
|
|
99
|
+
\"\"\"
|
|
100
|
+
{report}
|
|
101
|
+
\"\"\"
|
|
102
|
+
|
|
103
|
+
Response:
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
PROMPT_TEMPLATE_ANSWER_FIRST = """Review this cardiac imaging report and provide immediate assessment.
|
|
107
|
+
|
|
108
|
+
**Step 1 - Initial Diagnosis**: Based on your immediate review, state your initial conclusion about the presence of congenital heart defect (CHD).
|
|
109
|
+
|
|
110
|
+
Initial Assessment: Is there a congenital heart defect? (State YES or NO immediately)
|
|
111
|
+
|
|
112
|
+
**Step 2 - Evidence Collection**: Now systematically gather and evaluate evidence:
|
|
113
|
+
- What findings SUPPORT your initial diagnosis?
|
|
114
|
+
- What findings CONTRADICT your initial diagnosis?
|
|
115
|
+
- Are there alternative explanations?
|
|
116
|
+
|
|
117
|
+
**Step 3 - Final Diagnosis**: Based on the evidence collected, confirm or revise your initial assessment.
|
|
118
|
+
|
|
119
|
+
Provide your response in JSON format. Follow the format of these two examples and give the output strictly in the json format.
|
|
120
|
+
|
|
121
|
+
Example 1: Initial YES, Confirmed CHD
|
|
122
|
+
```json
|
|
123
|
+
{{
|
|
124
|
+
"cardiac_abnormality": true,
|
|
125
|
+
"congenital_heart_defect": true,
|
|
126
|
+
"evidence": {{
|
|
127
|
+
"report_findings": ["ventricular septal defect", "left-to-right shunt"],
|
|
128
|
+
"rationale": "Initial diagnosis: YES - CHD suspected. Supporting evidence: Large VSD with hemodynamically significant shunt strongly indicates congenital defect. Contradicting evidence: None identified. Final diagnosis: Confirmed congenital heart defect."
|
|
129
|
+
}}
|
|
130
|
+
}}
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Example 2: Initial NO, Confirmed no CHD
|
|
134
|
+
```json
|
|
135
|
+
{{
|
|
136
|
+
"cardiac_abnormality": false,
|
|
137
|
+
"congenital_heart_defect": false,
|
|
138
|
+
"evidence": {{
|
|
139
|
+
"report_findings": ["normal cardiac structure", "physiological TR"],
|
|
140
|
+
"rationale": "Initial diagnosis: NO - appears normal. Supporting evidence: Structurally normal heart with only trace physiological regurgitation. Contradicting evidence: None suggesting structural defect. Final diagnosis: Confirmed no congenital heart defect."
|
|
141
|
+
}}
|
|
142
|
+
}}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Cardiac imaging report:
|
|
146
|
+
\"\"\"
|
|
147
|
+
{report}
|
|
148
|
+
\"\"\"
|
|
149
|
+
|
|
150
|
+
Response:
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@Registry.register_prompt("cardiology")
|
|
155
|
+
class CardiologyPromptStrategy(StructuredOutputMixin, BasePromptStrategy):
|
|
156
|
+
"""
|
|
157
|
+
Structured JSON output for paediatric cardiology CHD detection.
|
|
158
|
+
|
|
159
|
+
Uses structured JSON output format with:
|
|
160
|
+
- Clear step-by-step reasoning instructions
|
|
161
|
+
- JSON output with cardiac_abnormality, congenital_heart_defect, evidence
|
|
162
|
+
- Few-shot examples for format guidance
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def __init__(
|
|
166
|
+
self,
|
|
167
|
+
name: str = "cardiology",
|
|
168
|
+
system_role: Optional[str] = None,
|
|
169
|
+
contrarian: bool = False,
|
|
170
|
+
few_shot: bool = True,
|
|
171
|
+
answer_first: bool = False,
|
|
172
|
+
output_format: str = "json",
|
|
173
|
+
**kwargs,
|
|
174
|
+
):
|
|
175
|
+
self._name = name
|
|
176
|
+
self.contrarian = contrarian
|
|
177
|
+
self.few_shot = few_shot
|
|
178
|
+
self.answer_first = answer_first
|
|
179
|
+
self.output_format = output_format
|
|
180
|
+
if system_role:
|
|
181
|
+
self.system_role = system_role
|
|
182
|
+
else:
|
|
183
|
+
self.system_role = SYSTEM_ROLE_CONTRARIAN if contrarian else SYSTEM_ROLE
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def name(self) -> str:
|
|
187
|
+
return self._name
|
|
188
|
+
|
|
189
|
+
def build_prompt(self, input_data: Dict[str, Any]) -> str:
|
|
190
|
+
"""Build prompt with cardiac imaging report."""
|
|
191
|
+
report = input_data.get("text", input_data.get("report", input_data.get("question", "")))
|
|
192
|
+
|
|
193
|
+
# Select base template based on reasoning mode
|
|
194
|
+
if self.answer_first:
|
|
195
|
+
template = PROMPT_TEMPLATE_ANSWER_FIRST
|
|
196
|
+
elif self.contrarian:
|
|
197
|
+
template = PROMPT_TEMPLATE_CONTRARIAN
|
|
198
|
+
else:
|
|
199
|
+
template = PROMPT_TEMPLATE
|
|
200
|
+
|
|
201
|
+
# Remove examples if few_shot=False
|
|
202
|
+
if not self.few_shot:
|
|
203
|
+
template = self._remove_few_shot_examples(template)
|
|
204
|
+
elif self.output_format != "json":
|
|
205
|
+
# Convert JSON examples to target format
|
|
206
|
+
template = self._convert_examples_to_format(template)
|
|
207
|
+
|
|
208
|
+
prompt = template.format(report=report)
|
|
209
|
+
|
|
210
|
+
return prompt
|
|
211
|
+
|
|
212
|
+
def _convert_examples_to_format(self, template: str) -> str:
|
|
213
|
+
"""Convert JSON examples in template to target output format."""
|
|
214
|
+
examples = [
|
|
215
|
+
{
|
|
216
|
+
"title": "Example 1: Congenital heart defect present",
|
|
217
|
+
"data": {
|
|
218
|
+
"cardiac_abnormality": True,
|
|
219
|
+
"congenital_heart_defect": True,
|
|
220
|
+
"evidence": {
|
|
221
|
+
"report_findings": [
|
|
222
|
+
"ventricular septal defect",
|
|
223
|
+
"left-to-right shunt",
|
|
224
|
+
"pulmonary artery pressure elevated",
|
|
225
|
+
],
|
|
226
|
+
"rationale": "The report explicitly identifies a large ventricular septal defect with hemodynamically significant shunt causing elevated pulmonary pressures, consistent with congenital heart disease requiring intervention.",
|
|
227
|
+
},
|
|
228
|
+
"_plain_answer": "CHD DETECTED",
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"title": "Example 2: Normal cardiac findings",
|
|
233
|
+
"data": {
|
|
234
|
+
"cardiac_abnormality": False,
|
|
235
|
+
"congenital_heart_defect": False,
|
|
236
|
+
"evidence": {
|
|
237
|
+
"report_findings": [
|
|
238
|
+
"normal cardiac structure",
|
|
239
|
+
"physiological tricuspid regurgitation",
|
|
240
|
+
],
|
|
241
|
+
"rationale": "The report describes a structurally normal heart with only physiological findings. No evidence of congenital heart defect.",
|
|
242
|
+
},
|
|
243
|
+
"_plain_answer": "NORMAL",
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
]
|
|
247
|
+
|
|
248
|
+
# Build examples in target format
|
|
249
|
+
examples_str = ""
|
|
250
|
+
for ex in examples:
|
|
251
|
+
examples_str += f"\n{ex['title']}\n"
|
|
252
|
+
examples_str += self._format_example(ex["data"]) + "\n"
|
|
253
|
+
|
|
254
|
+
# Replace JSON examples section
|
|
255
|
+
import re
|
|
256
|
+
|
|
257
|
+
pattern = r"Example 1:.*?```\s*\n\nCardiac imaging report:"
|
|
258
|
+
replacement = examples_str.strip() + "\n\nCardiac imaging report:"
|
|
259
|
+
|
|
260
|
+
new_template = re.sub(pattern, replacement, template, flags=re.DOTALL)
|
|
261
|
+
|
|
262
|
+
# Update instruction in header
|
|
263
|
+
if self.output_format == "plain":
|
|
264
|
+
new_template = new_template.replace(
|
|
265
|
+
"give the output strictly in the json format",
|
|
266
|
+
"provide your answer in plain text with FINAL ANSWER: at the end",
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
new_template = new_template.replace(
|
|
270
|
+
"give the output strictly in the json format",
|
|
271
|
+
f"give the output in {self.output_format.upper()} format",
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return new_template
|
|
275
|
+
|
|
276
|
+
def _remove_few_shot_examples(self, template: str) -> str:
|
|
277
|
+
"""Remove few-shot examples from template for ablation studies."""
|
|
278
|
+
import re
|
|
279
|
+
|
|
280
|
+
pattern = r"Example \d+:.*?(?=(?:Radiology report:|Cardiac imaging report:|Neuroimaging report:|Oncology report:))"
|
|
281
|
+
cleaned = re.sub(pattern, "", template, flags=re.DOTALL)
|
|
282
|
+
|
|
283
|
+
cleaned = cleaned.replace("Follow the format of these two examples and give", "Give")
|
|
284
|
+
cleaned = cleaned.replace("follow the format of these two examples and give", "give")
|
|
285
|
+
|
|
286
|
+
return cleaned
|
|
287
|
+
|
|
288
|
+
def parse_response(self, response: str) -> Dict[str, Any]:
|
|
289
|
+
"""
|
|
290
|
+
Parse response from model (supports multiple formats).
|
|
291
|
+
|
|
292
|
+
Expected format:
|
|
293
|
+
{
|
|
294
|
+
"cardiac_abnormality": bool,
|
|
295
|
+
"congenital_heart_defect": bool,
|
|
296
|
+
"evidence": {
|
|
297
|
+
"report_findings": [...],
|
|
298
|
+
"rationale": "..."
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
"""
|
|
302
|
+
# Use mixin's multi-format parser if not JSON
|
|
303
|
+
if self.output_format != "json":
|
|
304
|
+
try:
|
|
305
|
+
parsed = self._parse_formatted_response(response)
|
|
306
|
+
# Map extracted values to standardized keys
|
|
307
|
+
is_positive = False
|
|
308
|
+
if self.output_format in ["plain", "markdown"]:
|
|
309
|
+
# Parse final answer string
|
|
310
|
+
ans_str = str(parsed.get("answer", "")).upper()
|
|
311
|
+
is_positive = "CHD DETECTED" in ans_str
|
|
312
|
+
else:
|
|
313
|
+
is_positive = parsed.get("congenital_heart_defect", False)
|
|
314
|
+
|
|
315
|
+
return {
|
|
316
|
+
"answer": "CHD present" if is_positive else "no CHD",
|
|
317
|
+
"cardiac_abnormality": parsed.get("cardiac_abnormality", is_positive),
|
|
318
|
+
"congenital_heart_defect": is_positive,
|
|
319
|
+
"reasoning": parsed.get("evidence", {}).get(
|
|
320
|
+
"rationale", parsed.get("reasoning", "")
|
|
321
|
+
),
|
|
322
|
+
"findings": parsed.get("evidence", {}).get("report_findings", []),
|
|
323
|
+
"raw": response,
|
|
324
|
+
"parsed_json": parsed,
|
|
325
|
+
}
|
|
326
|
+
except Exception:
|
|
327
|
+
pass # Fall back to JSON parsing
|
|
328
|
+
|
|
329
|
+
# Try to extract JSON from response
|
|
330
|
+
json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
|
|
331
|
+
if json_match:
|
|
332
|
+
json_str = json_match.group(1)
|
|
333
|
+
else:
|
|
334
|
+
# Try to find raw JSON
|
|
335
|
+
json_match = re.search(
|
|
336
|
+
r'\{[^{}]*"congenital_heart_defect"[^{}]*\}', response, re.DOTALL
|
|
337
|
+
)
|
|
338
|
+
if json_match:
|
|
339
|
+
json_str = json_match.group(0)
|
|
340
|
+
else:
|
|
341
|
+
json_str = response
|
|
342
|
+
|
|
343
|
+
# Parse JSON
|
|
344
|
+
try:
|
|
345
|
+
parsed = json.loads(json_str)
|
|
346
|
+
return {
|
|
347
|
+
"answer": "CHD present" if parsed.get("congenital_heart_defect") else "no CHD",
|
|
348
|
+
"cardiac_abnormality": parsed.get("cardiac_abnormality", False),
|
|
349
|
+
"congenital_heart_defect": parsed.get("congenital_heart_defect", False),
|
|
350
|
+
"reasoning": parsed.get("evidence", {}).get("rationale", ""),
|
|
351
|
+
"findings": parsed.get("evidence", {}).get("report_findings", []),
|
|
352
|
+
"raw": response,
|
|
353
|
+
"parsed_json": parsed,
|
|
354
|
+
}
|
|
355
|
+
except json.JSONDecodeError:
|
|
356
|
+
# Fallback if JSON parsing fails
|
|
357
|
+
return {
|
|
358
|
+
"answer": response.strip(),
|
|
359
|
+
"reasoning": response,
|
|
360
|
+
"raw": response,
|
|
361
|
+
"parse_error": True,
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
def get_system_message(self) -> Optional[str]:
|
|
365
|
+
return self.system_role
|
|
366
|
+
|
|
367
|
+
def get_compatible_datasets(self) -> list[str]:
|
|
368
|
+
"""
|
|
369
|
+
Cardiology prompt is only compatible with cardiology dataset.
|
|
370
|
+
|
|
371
|
+
This prompt is specifically designed for congenital heart defect detection
|
|
372
|
+
in cardiac imaging reports and should NOT be used for general medical QA.
|
|
373
|
+
"""
|
|
374
|
+
return ["cardiology"]
|
|
375
|
+
|
|
376
|
+
def get_prediction_field(self) -> str:
|
|
377
|
+
"""Return the JSON field name used for binary classification."""
|
|
378
|
+
return "congenital_heart_defect"
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Histopathology report quality rating prompt strategy."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
7
|
+
from ..core.base import BasePromptStrategy
|
|
8
|
+
from ..core.registry import Registry
|
|
9
|
+
|
|
10
|
+
SYSTEM_ROLE = """You are a histopathology expert specializing in evaluating report quality.
|
|
11
|
+
Your goal is to assess the clinical accuracy, completeness, and clarity of histopathology reports.
|
|
12
|
+
Think rationally and explain your reasoning for quality ratings."""
|
|
13
|
+
|
|
14
|
+
SYSTEM_ROLE_CONTRARIAN = """You are a skeptical histopathology expert specializing in evaluating report quality.
|
|
15
|
+
Your goal is to critically assess histopathology reports and identify potential errors or omissions.
|
|
16
|
+
Question obvious conclusions and look for missing information. Think rationally and explain your reasoning."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Standard prompt with few-shot examples - INCLUDES SOURCE CASE FOR COMPARISON
|
|
20
|
+
PROMPT_TEMPLATE = """You are an expert histopathologist evaluating the quality of a GENERATED pathology report.
|
|
21
|
+
|
|
22
|
+
IMPORTANT: You must compare the generated report against the ORIGINAL CASE FINDINGS.
|
|
23
|
+
A report that looks well-written but does NOT match the original case should be rated 0 (Poor).
|
|
24
|
+
|
|
25
|
+
Rate on a scale of 0-2:
|
|
26
|
+
- 0 = Poor: Report does NOT match the original case, has major errors, or is misleading
|
|
27
|
+
- 1 = Partially correct: Matches some aspects of original case but incomplete or has minor errors
|
|
28
|
+
- 2 = Good: Accurately reflects the original case findings with proper terminology
|
|
29
|
+
|
|
30
|
+
Example 1 (Score: 0 - Poor - Well-written but WRONG diagnosis):
|
|
31
|
+
Original Case: "Invasive ductal carcinoma with lymph node metastasis"
|
|
32
|
+
Generated Report: "Benign fibrous lesion with no evidence of malignancy"
|
|
33
|
+
Rating: 0 - Report is well-structured but completely misses the malignancy. WRONG!
|
|
34
|
+
|
|
35
|
+
Example 2 (Score: 2 - Good - Matches original case):
|
|
36
|
+
Original Case: "Well-differentiated adenocarcinoma, pT2N0, clear margins"
|
|
37
|
+
Generated Report: "Sections show well-differentiated adenocarcinoma with glandular architecture. Margins clear. pT2N0."
|
|
38
|
+
Rating: 2 - Report accurately reflects the original case findings.
|
|
39
|
+
|
|
40
|
+
Example 3 (Score: 1 - Partial - Missing key details):
|
|
41
|
+
Original Case: "Hepatocellular carcinoma, grade 4/4, with vascular invasion"
|
|
42
|
+
Generated Report: "Hepatocellular carcinoma identified. Features consistent with malignancy."
|
|
43
|
+
Rating: 1 - Correct diagnosis but missing grade and vascular invasion status.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
ORIGINAL CASE FINDINGS:
|
|
48
|
+
\"\"\"
|
|
49
|
+
{source_case}
|
|
50
|
+
\"\"\"
|
|
51
|
+
|
|
52
|
+
GENERATED REPORT TO EVALUATE:
|
|
53
|
+
\"\"\"
|
|
54
|
+
{report}
|
|
55
|
+
\"\"\"
|
|
56
|
+
|
|
57
|
+
Compare the generated report against the original case. Does it accurately reflect the findings?
|
|
58
|
+
|
|
59
|
+
Provide your response in JSON format:
|
|
60
|
+
```json
|
|
61
|
+
{{
|
|
62
|
+
"quality_score": 0, 1, or 2,
|
|
63
|
+
"reasoning": "Brief explanation comparing report to original case"
|
|
64
|
+
}}
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Response:
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# Contrarian prompt - skeptical evaluation WITH source case comparison
|
|
71
|
+
PROMPT_TEMPLATE_CONTRARIAN = """You are a skeptical histopathologist reviewing a generated pathology report. Be critical and compare against the original case.
|
|
72
|
+
|
|
73
|
+
IMPORTANT: Compare the generated report against the ORIGINAL CASE FINDINGS.
|
|
74
|
+
Question any discrepancies. A well-written report that doesn't match the case is WRONG.
|
|
75
|
+
|
|
76
|
+
Look for:
|
|
77
|
+
- Does the diagnosis match the original case?
|
|
78
|
+
- Missing critical information from the original
|
|
79
|
+
- Incorrect or fabricated details
|
|
80
|
+
- Inappropriate terminology
|
|
81
|
+
|
|
82
|
+
Rate on a scale of 0-2:
|
|
83
|
+
- 0 = Poor: Does NOT match original case or has major errors
|
|
84
|
+
- 1 = Partially correct: Matches some but not all key findings
|
|
85
|
+
- 2 = Good: Accurately reflects original case
|
|
86
|
+
|
|
87
|
+
**Step 1**: Does the generated report match the ORIGINAL CASE?
|
|
88
|
+
**Step 2**: What key findings are missing or wrong?
|
|
89
|
+
**Step 3**: Final rating based on comparison.
|
|
90
|
+
|
|
91
|
+
ORIGINAL CASE FINDINGS:
|
|
92
|
+
\"\"\"
|
|
93
|
+
{source_case}
|
|
94
|
+
\"\"\"
|
|
95
|
+
|
|
96
|
+
GENERATED REPORT TO EVALUATE:
|
|
97
|
+
\"\"\"
|
|
98
|
+
{report}
|
|
99
|
+
\"\"\"
|
|
100
|
+
|
|
101
|
+
Provide your response in JSON format:
|
|
102
|
+
```json
|
|
103
|
+
{{
|
|
104
|
+
"quality_score": 0, 1, or 2,
|
|
105
|
+
"reasoning": "Brief explanation comparing report to original case"
|
|
106
|
+
}}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Response:
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
# Answer-first prompt - conclude then justify WITH source case
|
|
113
|
+
PROMPT_TEMPLATE_ANSWER_FIRST = """You are an expert histopathologist evaluating a generated pathology report.
|
|
114
|
+
|
|
115
|
+
Compare the GENERATED REPORT against the ORIGINAL CASE FINDINGS.
|
|
116
|
+
First state your rating (0, 1, or 2), then explain why.
|
|
117
|
+
|
|
118
|
+
Rating scale:
|
|
119
|
+
- 0 = Poor: Does NOT match original case
|
|
120
|
+
- 1 = Partial: Matches some findings but incomplete
|
|
121
|
+
- 2 = Good: Accurately reflects original case
|
|
122
|
+
|
|
123
|
+
ORIGINAL CASE FINDINGS:
|
|
124
|
+
\"\"\"
|
|
125
|
+
{source_case}
|
|
126
|
+
\"\"\"
|
|
127
|
+
|
|
128
|
+
GENERATED REPORT:
|
|
129
|
+
\"\"\"
|
|
130
|
+
{report}
|
|
131
|
+
\"\"\"
|
|
132
|
+
|
|
133
|
+
Provide your response in JSON format:
|
|
134
|
+
```json
|
|
135
|
+
{{
|
|
136
|
+
"quality_score": <0, 1, or 2>,
|
|
137
|
+
"reasoning": "Justification comparing report to original case"
|
|
138
|
+
}}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Response:
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@Registry.register_prompt("histopathology")
|
|
146
|
+
class HistopathologyPromptStrategy(BasePromptStrategy):
|
|
147
|
+
"""Prompt strategy for histopathology report quality rating.
|
|
148
|
+
|
|
149
|
+
3-class classification:
|
|
150
|
+
- 0: Poor/incorrect
|
|
151
|
+
- 1: Partially correct
|
|
152
|
+
- 2: Good/accurate
|
|
153
|
+
|
|
154
|
+
Supports:
|
|
155
|
+
- few_shot: Include examples in prompt
|
|
156
|
+
- answer_first: Rate first, then explain
|
|
157
|
+
- contrarian: Skeptical evaluation mode
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(
|
|
161
|
+
self,
|
|
162
|
+
name: str = "histopathology",
|
|
163
|
+
output_format: str = "json",
|
|
164
|
+
few_shot: bool = True,
|
|
165
|
+
answer_first: bool = False,
|
|
166
|
+
contrarian: bool = False,
|
|
167
|
+
**kwargs,
|
|
168
|
+
):
|
|
169
|
+
self._name = name
|
|
170
|
+
self.output_format = output_format.lower()
|
|
171
|
+
self.few_shot = few_shot
|
|
172
|
+
self.answer_first = answer_first
|
|
173
|
+
self.contrarian = contrarian
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def name(self) -> str:
|
|
177
|
+
return self._name
|
|
178
|
+
|
|
179
|
+
def build_prompt(self, input_data: Dict[str, Any]) -> str:
|
|
180
|
+
"""Build prompt with histopathology report and source case."""
|
|
181
|
+
report = input_data.get("text", input_data.get("report", ""))
|
|
182
|
+
# Get source case from metadata for comparison
|
|
183
|
+
metadata = input_data.get("metadata", {})
|
|
184
|
+
source_case = metadata.get("ground_truth", "Not provided")
|
|
185
|
+
|
|
186
|
+
# Select template (priority: answer_first > contrarian > standard)
|
|
187
|
+
if self.answer_first:
|
|
188
|
+
template = PROMPT_TEMPLATE_ANSWER_FIRST
|
|
189
|
+
elif self.contrarian:
|
|
190
|
+
template = PROMPT_TEMPLATE_CONTRARIAN
|
|
191
|
+
else:
|
|
192
|
+
template = PROMPT_TEMPLATE
|
|
193
|
+
|
|
194
|
+
# Format with both source case and report (all templates now use source_case)
|
|
195
|
+
prompt = template.format(report=report, source_case=source_case)
|
|
196
|
+
|
|
197
|
+
# Remove examples if few_shot=False
|
|
198
|
+
if not self.few_shot and not self.answer_first and not self.contrarian:
|
|
199
|
+
prompt = self._remove_few_shot_examples(prompt)
|
|
200
|
+
|
|
201
|
+
return prompt
|
|
202
|
+
|
|
203
|
+
def _remove_few_shot_examples(self, template: str) -> str:
|
|
204
|
+
"""Remove few-shot examples from template."""
|
|
205
|
+
# Remove only the example block between "Example 1" and the section divider.
|
|
206
|
+
block_pattern = re.compile(r"\nExample 1.*?\n---\n", re.DOTALL)
|
|
207
|
+
if not block_pattern.search(template):
|
|
208
|
+
return template
|
|
209
|
+
return block_pattern.sub("\n\n", template, count=1)
|
|
210
|
+
|
|
211
|
+
def parse_response(self, response: str) -> Dict[str, Any]:
|
|
212
|
+
"""Parse response to extract quality score."""
|
|
213
|
+
result = {
|
|
214
|
+
"raw_response": response,
|
|
215
|
+
"parse_success": False,
|
|
216
|
+
"quality_score": None,
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# Try to extract JSON from response
|
|
220
|
+
json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
|
|
221
|
+
if json_match:
|
|
222
|
+
try:
|
|
223
|
+
parsed = json.loads(json_match.group(1))
|
|
224
|
+
result.update(parsed)
|
|
225
|
+
result["parse_success"] = True
|
|
226
|
+
except json.JSONDecodeError:
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
# Fallback: try direct JSON parse
|
|
230
|
+
if not result["parse_success"]:
|
|
231
|
+
try:
|
|
232
|
+
parsed = json.loads(response)
|
|
233
|
+
result.update(parsed)
|
|
234
|
+
result["parse_success"] = True
|
|
235
|
+
except json.JSONDecodeError:
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
# Fallback: find quality_score in text
|
|
239
|
+
if not result["parse_success"] or result.get("quality_score") is None:
|
|
240
|
+
score_match = re.search(r'"quality_score"\s*:\s*(\d)', response)
|
|
241
|
+
if score_match:
|
|
242
|
+
result["quality_score"] = int(score_match.group(1))
|
|
243
|
+
result["parse_success"] = True
|
|
244
|
+
else:
|
|
245
|
+
# Last resort: find any 0, 1, or 2 alone
|
|
246
|
+
simple_match = re.search(r"\b([012])\b", response)
|
|
247
|
+
if simple_match:
|
|
248
|
+
result["quality_score"] = int(simple_match.group(1))
|
|
249
|
+
result["parse_success"] = True
|
|
250
|
+
|
|
251
|
+
return result
|
|
252
|
+
|
|
253
|
+
def get_system_message(self) -> str:
|
|
254
|
+
"""Return mode-appropriate system message."""
|
|
255
|
+
if self.contrarian:
|
|
256
|
+
return SYSTEM_ROLE_CONTRARIAN
|
|
257
|
+
return SYSTEM_ROLE
|
|
258
|
+
|
|
259
|
+
def get_compatible_datasets(self) -> list[str]:
|
|
260
|
+
"""Histopathology prompt works with histopathology dataset."""
|
|
261
|
+
return ["histopathology"]
|
|
262
|
+
|
|
263
|
+
def get_prediction_field(self) -> str:
|
|
264
|
+
"""Return the JSON field name used for classification."""
|
|
265
|
+
return "quality_score"
|