cotlab 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. cotlab/__init__.py +3 -0
  2. cotlab/analyse_experiments.py +392 -0
  3. cotlab/analysis/__init__.py +11 -0
  4. cotlab/analysis/cot_parser.py +243 -0
  5. cotlab/analysis/faithfulness_metrics.py +192 -0
  6. cotlab/backends/__init__.py +16 -0
  7. cotlab/backends/base.py +78 -0
  8. cotlab/backends/transformers_backend.py +335 -0
  9. cotlab/backends/vllm_backend.py +227 -0
  10. cotlab/cli.py +83 -0
  11. cotlab/core/__init__.py +34 -0
  12. cotlab/core/base.py +749 -0
  13. cotlab/core/config.py +90 -0
  14. cotlab/core/registry.py +68 -0
  15. cotlab/datasets/__init__.py +45 -0
  16. cotlab/datasets/loaders.py +1889 -0
  17. cotlab/experiment/__init__.py +315 -0
  18. cotlab/experiments/__init__.py +43 -0
  19. cotlab/experiments/activation_compare.py +290 -0
  20. cotlab/experiments/activation_patching.py +1050 -0
  21. cotlab/experiments/attention_analysis.py +885 -0
  22. cotlab/experiments/classification.py +235 -0
  23. cotlab/experiments/composite_shift_detector.py +524 -0
  24. cotlab/experiments/cot_ablation.py +277 -0
  25. cotlab/experiments/cot_faithfulness.py +187 -0
  26. cotlab/experiments/cot_heads.py +208 -0
  27. cotlab/experiments/full_layer_cot.py +232 -0
  28. cotlab/experiments/full_layer_patching.py +225 -0
  29. cotlab/experiments/h_neuron_analysis.py +712 -0
  30. cotlab/experiments/logit_lens.py +439 -0
  31. cotlab/experiments/multi_head_cot.py +220 -0
  32. cotlab/experiments/multi_head_patching.py +229 -0
  33. cotlab/experiments/probing_classifier.py +402 -0
  34. cotlab/experiments/residual_norm_ood.py +413 -0
  35. cotlab/experiments/sae_feature_analysis.py +673 -0
  36. cotlab/experiments/steering_vectors.py +223 -0
  37. cotlab/experiments/sycophancy_heads.py +224 -0
  38. cotlab/logging/__init__.py +5 -0
  39. cotlab/logging/json_logger.py +161 -0
  40. cotlab/main.py +317 -0
  41. cotlab/patching/__init__.py +24 -0
  42. cotlab/patching/cache.py +141 -0
  43. cotlab/patching/hooks.py +558 -0
  44. cotlab/patching/interventions.py +86 -0
  45. cotlab/patching/patcher.py +439 -0
  46. cotlab/patching/sae.py +181 -0
  47. cotlab/prompts/__init__.py +43 -0
  48. cotlab/prompts/cardiology.py +378 -0
  49. cotlab/prompts/histopathology.py +265 -0
  50. cotlab/prompts/length_matched_strategies.py +157 -0
  51. cotlab/prompts/mcq.py +193 -0
  52. cotlab/prompts/neurology.py +353 -0
  53. cotlab/prompts/oncology.py +367 -0
  54. cotlab/prompts/plab.py +162 -0
  55. cotlab/prompts/pubhealthbench.py +82 -0
  56. cotlab/prompts/pubmedqa.py +173 -0
  57. cotlab/prompts/radiology.py +414 -0
  58. cotlab/prompts/strategies.py +939 -0
  59. cotlab/prompts/tcga.py +168 -0
  60. cotlab/runner.py +204 -0
  61. cotlab-0.8.0.dist-info/METADATA +166 -0
  62. cotlab-0.8.0.dist-info/RECORD +65 -0
  63. cotlab-0.8.0.dist-info/WHEEL +4 -0
  64. cotlab-0.8.0.dist-info/entry_points.txt +3 -0
  65. cotlab-0.8.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,378 @@
1
+ """Cardiology-specific prompt strategy with JSON structured output."""
2
+
3
+ import json
4
+ import re
5
+ from typing import Any, Dict, Optional
6
+
7
+ from ..core.base import BasePromptStrategy, StructuredOutputMixin
8
+ from ..core.registry import Registry
9
+
10
+ SYSTEM_ROLE = """You are a paediatric cardiology expert.
11
+ Your goal is to identify congenital heart defects in the given cardiac imaging report.
12
+ Do not make assumptions or diagnoses from the text.
13
+ Think rationally and explain your reasoning."""
14
+
15
+ SYSTEM_ROLE_CONTRARIAN = """You are a skeptical paediatric cardiology expert.
16
+ Your goal is to identify congenital heart defects in the given cardiac imaging report.
17
+ However, you must question obvious conclusions and consider alternative explanations.
18
+ Think rationally, play devil's advocate, and explain your reasoning."""
19
+
20
+
21
+ PROMPT_TEMPLATE = """Follow this structured reasoning on the attached cardiac imaging report:
22
+
23
+ 1. **Cardiac Abnormality**: Determine whether a structural cardiac abnormality is explicitly described.
24
+ 2. **Congenital Heart Defect**: If an abnormality is found, assess whether it constitutes a congenital heart defect.
25
+
26
+ The instruction is to answer questions strictly based on the content of the provided cardiac report without making any assumptions.
27
+ Only make judgements on strong evidence.
28
+ Ignore physiological variants (e.g., patent foramen ovale, trivial regurgitation) as they do not indicate CHD.
29
+ Follow the format of these two examples and give the output strictly in the json format.
30
+
31
+ Example 1: Congenital heart defect present
32
+ ```json
33
+ {{
34
+ "cardiac_abnormality": true,
35
+ "congenital_heart_defect": true,
36
+ "evidence": {{
37
+ "report_findings": ["ventricular septal defect", "left-to-right shunt", "pulmonary artery pressure elevated"],
38
+ "rationale": "The report explicitly identifies a large ventricular septal defect with hemodynamically significant shunt causing elevated pulmonary pressures, consistent with congenital heart disease requiring intervention."
39
+ }}
40
+ }}
41
+ ```
42
+
43
+ Example 2: Normal cardiac findings
44
+ ```json
45
+ {{
46
+ "cardiac_abnormality": false,
47
+ "congenital_heart_defect": false,
48
+ "evidence": {{
49
+ "report_findings": ["normal cardiac structure", "physiological tricuspid regurgitation"],
50
+ "rationale": "The report describes a structurally normal heart with only physiological findings. No evidence of congenital heart defect."
51
+ }}
52
+ }}
53
+ ```
54
+
55
+ Cardiac imaging report:
56
+ \"\"\"
57
+ {report}
58
+ \"\"\"
59
+
60
+ Response:
61
+ """
62
+
63
+ PROMPT_TEMPLATE_CONTRARIAN = """As a skeptical cardiologist, follow this structured reasoning on the attached cardiac imaging report.
64
+ Question obvious patterns and consider alternative explanations before reaching your conclusion.
65
+
66
+ 1. **Cardiac Abnormality**: Determine whether a structural cardiac abnormality is explicitly described. Consider if what appears abnormal might be a normal variant for age.
67
+ 2. **Congenital Heart Defect**: If an abnormality is found, critically assess whether it constitutes a congenital heart defect. Question the obvious diagnosis - could there be alternative explanations?
68
+
69
+ Apply skeptical reasoning - if the report suggests CHD, argue why it might NOT be CHD. If it seems normal, consider why it MIGHT indicate CHD.
70
+ Only make final judgements when evidence is overwhelming and alternative explanations are ruled out.
71
+ Ignore physiological variants (e.g., patent foramen ovale, trivial regurgitation) as they do not indicate CHD.
72
+ Follow the format of these two examples and give the output strictly in the json format.
73
+
74
+ Example 1: Congenital heart defect present (after skeptical review)
75
+ ```json
76
+ {{
77
+ "cardiac_abnormality": true,
78
+ "congenital_heart_defect": true,
79
+ "evidence": {{
80
+ "report_findings": ["ventricular septal defect", "left-to-right shunt", "elevated pulmonary pressures"],
81
+ "rationale": "Initial skepticism: Could the shunt be physiological? However, the combination of large VSD with hemodynamically significant shunt causing elevated pulmonary pressures provides overwhelming evidence. Alternative explanations (innocent murmur, transient finding) are ruled out by the severity and persistence. Conclusion: CHD confirmed despite initial skepticism."
82
+ }}
83
+ }}
84
+ ```
85
+
86
+ Example 2: Normal cardiac findings (skeptical analysis)
87
+ ```json
88
+ {{
89
+ "cardiac_abnormality": false,
90
+ "congenital_heart_defect": false,
91
+ "evidence": {{
92
+ "report_findings": ["normal cardiac structure", "physiological tricuspid regurgitation"],
93
+ "rationale": "Applying skeptical reasoning: While trivial TR could suggest valve pathology, the report explicitly states it is physiological. Playing devil's advocate against a CHD diagnosis: structurally normal heart with only age-appropriate physiological findings. No features suggesting congenital abnormality. Cannot conclude CHD without stronger evidence."
94
+ }}
95
+ }}
96
+ ```
97
+
98
+ Cardiac imaging report:
99
+ \"\"\"
100
+ {report}
101
+ \"\"\"
102
+
103
+ Response:
104
+ """
105
+
106
+ PROMPT_TEMPLATE_ANSWER_FIRST = """Review this cardiac imaging report and provide immediate assessment.
107
+
108
+ **Step 1 - Initial Diagnosis**: Based on your immediate review, state your initial conclusion about the presence of congenital heart defect (CHD).
109
+
110
+ Initial Assessment: Is there a congenital heart defect? (State YES or NO immediately)
111
+
112
+ **Step 2 - Evidence Collection**: Now systematically gather and evaluate evidence:
113
+ - What findings SUPPORT your initial diagnosis?
114
+ - What findings CONTRADICT your initial diagnosis?
115
+ - Are there alternative explanations?
116
+
117
+ **Step 3 - Final Diagnosis**: Based on the evidence collected, confirm or revise your initial assessment.
118
+
119
+ Provide your response in JSON format. Follow the format of these two examples and give the output strictly in the json format.
120
+
121
+ Example 1: Initial YES, Confirmed CHD
122
+ ```json
123
+ {{
124
+ "cardiac_abnormality": true,
125
+ "congenital_heart_defect": true,
126
+ "evidence": {{
127
+ "report_findings": ["ventricular septal defect", "left-to-right shunt"],
128
+ "rationale": "Initial diagnosis: YES - CHD suspected. Supporting evidence: Large VSD with hemodynamically significant shunt strongly indicates congenital defect. Contradicting evidence: None identified. Final diagnosis: Confirmed congenital heart defect."
129
+ }}
130
+ }}
131
+ ```
132
+
133
+ Example 2: Initial NO, Confirmed no CHD
134
+ ```json
135
+ {{
136
+ "cardiac_abnormality": false,
137
+ "congenital_heart_defect": false,
138
+ "evidence": {{
139
+ "report_findings": ["normal cardiac structure", "physiological TR"],
140
+ "rationale": "Initial diagnosis: NO - appears normal. Supporting evidence: Structurally normal heart with only trace physiological regurgitation. Contradicting evidence: None suggesting structural defect. Final diagnosis: Confirmed no congenital heart defect."
141
+ }}
142
+ }}
143
+ ```
144
+
145
+ Cardiac imaging report:
146
+ \"\"\"
147
+ {report}
148
+ \"\"\"
149
+
150
+ Response:
151
+ """
152
+
153
+
154
+ @Registry.register_prompt("cardiology")
155
+ class CardiologyPromptStrategy(StructuredOutputMixin, BasePromptStrategy):
156
+ """
157
+ Structured JSON output for paediatric cardiology CHD detection.
158
+
159
+ Uses structured JSON output format with:
160
+ - Clear step-by-step reasoning instructions
161
+ - JSON output with cardiac_abnormality, congenital_heart_defect, evidence
162
+ - Few-shot examples for format guidance
163
+ """
164
+
165
+ def __init__(
166
+ self,
167
+ name: str = "cardiology",
168
+ system_role: Optional[str] = None,
169
+ contrarian: bool = False,
170
+ few_shot: bool = True,
171
+ answer_first: bool = False,
172
+ output_format: str = "json",
173
+ **kwargs,
174
+ ):
175
+ self._name = name
176
+ self.contrarian = contrarian
177
+ self.few_shot = few_shot
178
+ self.answer_first = answer_first
179
+ self.output_format = output_format
180
+ if system_role:
181
+ self.system_role = system_role
182
+ else:
183
+ self.system_role = SYSTEM_ROLE_CONTRARIAN if contrarian else SYSTEM_ROLE
184
+
185
+ @property
186
+ def name(self) -> str:
187
+ return self._name
188
+
189
+ def build_prompt(self, input_data: Dict[str, Any]) -> str:
190
+ """Build prompt with cardiac imaging report."""
191
+ report = input_data.get("text", input_data.get("report", input_data.get("question", "")))
192
+
193
+ # Select base template based on reasoning mode
194
+ if self.answer_first:
195
+ template = PROMPT_TEMPLATE_ANSWER_FIRST
196
+ elif self.contrarian:
197
+ template = PROMPT_TEMPLATE_CONTRARIAN
198
+ else:
199
+ template = PROMPT_TEMPLATE
200
+
201
+ # Remove examples if few_shot=False
202
+ if not self.few_shot:
203
+ template = self._remove_few_shot_examples(template)
204
+ elif self.output_format != "json":
205
+ # Convert JSON examples to target format
206
+ template = self._convert_examples_to_format(template)
207
+
208
+ prompt = template.format(report=report)
209
+
210
+ return prompt
211
+
212
+ def _convert_examples_to_format(self, template: str) -> str:
213
+ """Convert JSON examples in template to target output format."""
214
+ examples = [
215
+ {
216
+ "title": "Example 1: Congenital heart defect present",
217
+ "data": {
218
+ "cardiac_abnormality": True,
219
+ "congenital_heart_defect": True,
220
+ "evidence": {
221
+ "report_findings": [
222
+ "ventricular septal defect",
223
+ "left-to-right shunt",
224
+ "pulmonary artery pressure elevated",
225
+ ],
226
+ "rationale": "The report explicitly identifies a large ventricular septal defect with hemodynamically significant shunt causing elevated pulmonary pressures, consistent with congenital heart disease requiring intervention.",
227
+ },
228
+ "_plain_answer": "CHD DETECTED",
229
+ },
230
+ },
231
+ {
232
+ "title": "Example 2: Normal cardiac findings",
233
+ "data": {
234
+ "cardiac_abnormality": False,
235
+ "congenital_heart_defect": False,
236
+ "evidence": {
237
+ "report_findings": [
238
+ "normal cardiac structure",
239
+ "physiological tricuspid regurgitation",
240
+ ],
241
+ "rationale": "The report describes a structurally normal heart with only physiological findings. No evidence of congenital heart defect.",
242
+ },
243
+ "_plain_answer": "NORMAL",
244
+ },
245
+ },
246
+ ]
247
+
248
+ # Build examples in target format
249
+ examples_str = ""
250
+ for ex in examples:
251
+ examples_str += f"\n{ex['title']}\n"
252
+ examples_str += self._format_example(ex["data"]) + "\n"
253
+
254
+ # Replace JSON examples section
255
+ import re
256
+
257
+ pattern = r"Example 1:.*?```\s*\n\nCardiac imaging report:"
258
+ replacement = examples_str.strip() + "\n\nCardiac imaging report:"
259
+
260
+ new_template = re.sub(pattern, replacement, template, flags=re.DOTALL)
261
+
262
+ # Update instruction in header
263
+ if self.output_format == "plain":
264
+ new_template = new_template.replace(
265
+ "give the output strictly in the json format",
266
+ "provide your answer in plain text with FINAL ANSWER: at the end",
267
+ )
268
+ else:
269
+ new_template = new_template.replace(
270
+ "give the output strictly in the json format",
271
+ f"give the output in {self.output_format.upper()} format",
272
+ )
273
+
274
+ return new_template
275
+
276
+ def _remove_few_shot_examples(self, template: str) -> str:
277
+ """Remove few-shot examples from template for ablation studies."""
278
+ import re
279
+
280
+ pattern = r"Example \d+:.*?(?=(?:Radiology report:|Cardiac imaging report:|Neuroimaging report:|Oncology report:))"
281
+ cleaned = re.sub(pattern, "", template, flags=re.DOTALL)
282
+
283
+ cleaned = cleaned.replace("Follow the format of these two examples and give", "Give")
284
+ cleaned = cleaned.replace("follow the format of these two examples and give", "give")
285
+
286
+ return cleaned
287
+
288
+ def parse_response(self, response: str) -> Dict[str, Any]:
289
+ """
290
+ Parse response from model (supports multiple formats).
291
+
292
+ Expected format:
293
+ {
294
+ "cardiac_abnormality": bool,
295
+ "congenital_heart_defect": bool,
296
+ "evidence": {
297
+ "report_findings": [...],
298
+ "rationale": "..."
299
+ }
300
+ }
301
+ """
302
+ # Use mixin's multi-format parser if not JSON
303
+ if self.output_format != "json":
304
+ try:
305
+ parsed = self._parse_formatted_response(response)
306
+ # Map extracted values to standardized keys
307
+ is_positive = False
308
+ if self.output_format in ["plain", "markdown"]:
309
+ # Parse final answer string
310
+ ans_str = str(parsed.get("answer", "")).upper()
311
+ is_positive = "CHD DETECTED" in ans_str
312
+ else:
313
+ is_positive = parsed.get("congenital_heart_defect", False)
314
+
315
+ return {
316
+ "answer": "CHD present" if is_positive else "no CHD",
317
+ "cardiac_abnormality": parsed.get("cardiac_abnormality", is_positive),
318
+ "congenital_heart_defect": is_positive,
319
+ "reasoning": parsed.get("evidence", {}).get(
320
+ "rationale", parsed.get("reasoning", "")
321
+ ),
322
+ "findings": parsed.get("evidence", {}).get("report_findings", []),
323
+ "raw": response,
324
+ "parsed_json": parsed,
325
+ }
326
+ except Exception:
327
+ pass # Fall back to JSON parsing
328
+
329
+ # Try to extract JSON from response
330
+ json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
331
+ if json_match:
332
+ json_str = json_match.group(1)
333
+ else:
334
+ # Try to find raw JSON
335
+ json_match = re.search(
336
+ r'\{[^{}]*"congenital_heart_defect"[^{}]*\}', response, re.DOTALL
337
+ )
338
+ if json_match:
339
+ json_str = json_match.group(0)
340
+ else:
341
+ json_str = response
342
+
343
+ # Parse JSON
344
+ try:
345
+ parsed = json.loads(json_str)
346
+ return {
347
+ "answer": "CHD present" if parsed.get("congenital_heart_defect") else "no CHD",
348
+ "cardiac_abnormality": parsed.get("cardiac_abnormality", False),
349
+ "congenital_heart_defect": parsed.get("congenital_heart_defect", False),
350
+ "reasoning": parsed.get("evidence", {}).get("rationale", ""),
351
+ "findings": parsed.get("evidence", {}).get("report_findings", []),
352
+ "raw": response,
353
+ "parsed_json": parsed,
354
+ }
355
+ except json.JSONDecodeError:
356
+ # Fallback if JSON parsing fails
357
+ return {
358
+ "answer": response.strip(),
359
+ "reasoning": response,
360
+ "raw": response,
361
+ "parse_error": True,
362
+ }
363
+
364
+ def get_system_message(self) -> Optional[str]:
365
+ return self.system_role
366
+
367
+ def get_compatible_datasets(self) -> list[str]:
368
+ """
369
+ Cardiology prompt is only compatible with cardiology dataset.
370
+
371
+ This prompt is specifically designed for congenital heart defect detection
372
+ in cardiac imaging reports and should NOT be used for general medical QA.
373
+ """
374
+ return ["cardiology"]
375
+
376
+ def get_prediction_field(self) -> str:
377
+ """Return the JSON field name used for binary classification."""
378
+ return "congenital_heart_defect"
@@ -0,0 +1,265 @@
1
+ """Histopathology report quality rating prompt strategy."""
2
+
3
+ import json
4
+ import re
5
+ from typing import Any, Dict
6
+
7
+ from ..core.base import BasePromptStrategy
8
+ from ..core.registry import Registry
9
+
10
+ SYSTEM_ROLE = """You are a histopathology expert specializing in evaluating report quality.
11
+ Your goal is to assess the clinical accuracy, completeness, and clarity of histopathology reports.
12
+ Think rationally and explain your reasoning for quality ratings."""
13
+
14
+ SYSTEM_ROLE_CONTRARIAN = """You are a skeptical histopathology expert specializing in evaluating report quality.
15
+ Your goal is to critically assess histopathology reports and identify potential errors or omissions.
16
+ Question obvious conclusions and look for missing information. Think rationally and explain your reasoning."""
17
+
18
+
19
+ # Standard prompt with few-shot examples - INCLUDES SOURCE CASE FOR COMPARISON
20
+ PROMPT_TEMPLATE = """You are an expert histopathologist evaluating the quality of a GENERATED pathology report.
21
+
22
+ IMPORTANT: You must compare the generated report against the ORIGINAL CASE FINDINGS.
23
+ A report that looks well-written but does NOT match the original case should be rated 0 (Poor).
24
+
25
+ Rate on a scale of 0-2:
26
+ - 0 = Poor: Report does NOT match the original case, has major errors, or is misleading
27
+ - 1 = Partially correct: Matches some aspects of original case but incomplete or has minor errors
28
+ - 2 = Good: Accurately reflects the original case findings with proper terminology
29
+
30
+ Example 1 (Score: 0 - Poor - Well-written but WRONG diagnosis):
31
+ Original Case: "Invasive ductal carcinoma with lymph node metastasis"
32
+ Generated Report: "Benign fibrous lesion with no evidence of malignancy"
33
+ Rating: 0 - Report is well-structured but completely misses the malignancy. WRONG!
34
+
35
+ Example 2 (Score: 2 - Good - Matches original case):
36
+ Original Case: "Well-differentiated adenocarcinoma, pT2N0, clear margins"
37
+ Generated Report: "Sections show well-differentiated adenocarcinoma with glandular architecture. Margins clear. pT2N0."
38
+ Rating: 2 - Report accurately reflects the original case findings.
39
+
40
+ Example 3 (Score: 1 - Partial - Missing key details):
41
+ Original Case: "Hepatocellular carcinoma, grade 4/4, with vascular invasion"
42
+ Generated Report: "Hepatocellular carcinoma identified. Features consistent with malignancy."
43
+ Rating: 1 - Correct diagnosis but missing grade and vascular invasion status.
44
+
45
+ ---
46
+
47
+ ORIGINAL CASE FINDINGS:
48
+ \"\"\"
49
+ {source_case}
50
+ \"\"\"
51
+
52
+ GENERATED REPORT TO EVALUATE:
53
+ \"\"\"
54
+ {report}
55
+ \"\"\"
56
+
57
+ Compare the generated report against the original case. Does it accurately reflect the findings?
58
+
59
+ Provide your response in JSON format:
60
+ ```json
61
+ {{
62
+ "quality_score": 0, 1, or 2,
63
+ "reasoning": "Brief explanation comparing report to original case"
64
+ }}
65
+ ```
66
+
67
+ Response:
68
+ """
69
+
70
+ # Contrarian prompt - skeptical evaluation WITH source case comparison
71
+ PROMPT_TEMPLATE_CONTRARIAN = """You are a skeptical histopathologist reviewing a generated pathology report. Be critical and compare against the original case.
72
+
73
+ IMPORTANT: Compare the generated report against the ORIGINAL CASE FINDINGS.
74
+ Question any discrepancies. A well-written report that doesn't match the case is WRONG.
75
+
76
+ Look for:
77
+ - Does the diagnosis match the original case?
78
+ - Missing critical information from the original
79
+ - Incorrect or fabricated details
80
+ - Inappropriate terminology
81
+
82
+ Rate on a scale of 0-2:
83
+ - 0 = Poor: Does NOT match original case or has major errors
84
+ - 1 = Partially correct: Matches some but not all key findings
85
+ - 2 = Good: Accurately reflects original case
86
+
87
+ **Step 1**: Does the generated report match the ORIGINAL CASE?
88
+ **Step 2**: What key findings are missing or wrong?
89
+ **Step 3**: Final rating based on comparison.
90
+
91
+ ORIGINAL CASE FINDINGS:
92
+ \"\"\"
93
+ {source_case}
94
+ \"\"\"
95
+
96
+ GENERATED REPORT TO EVALUATE:
97
+ \"\"\"
98
+ {report}
99
+ \"\"\"
100
+
101
+ Provide your response in JSON format:
102
+ ```json
103
+ {{
104
+ "quality_score": 0, 1, or 2,
105
+ "reasoning": "Brief explanation comparing report to original case"
106
+ }}
107
+ ```
108
+
109
+ Response:
110
+ """
111
+
112
+ # Answer-first prompt - conclude then justify WITH source case
113
+ PROMPT_TEMPLATE_ANSWER_FIRST = """You are an expert histopathologist evaluating a generated pathology report.
114
+
115
+ Compare the GENERATED REPORT against the ORIGINAL CASE FINDINGS.
116
+ First state your rating (0, 1, or 2), then explain why.
117
+
118
+ Rating scale:
119
+ - 0 = Poor: Does NOT match original case
120
+ - 1 = Partial: Matches some findings but incomplete
121
+ - 2 = Good: Accurately reflects original case
122
+
123
+ ORIGINAL CASE FINDINGS:
124
+ \"\"\"
125
+ {source_case}
126
+ \"\"\"
127
+
128
+ GENERATED REPORT:
129
+ \"\"\"
130
+ {report}
131
+ \"\"\"
132
+
133
+ Provide your response in JSON format:
134
+ ```json
135
+ {{
136
+ "quality_score": <0, 1, or 2>,
137
+ "reasoning": "Justification comparing report to original case"
138
+ }}
139
+ ```
140
+
141
+ Response:
142
+ """
143
+
144
+
145
+ @Registry.register_prompt("histopathology")
146
+ class HistopathologyPromptStrategy(BasePromptStrategy):
147
+ """Prompt strategy for histopathology report quality rating.
148
+
149
+ 3-class classification:
150
+ - 0: Poor/incorrect
151
+ - 1: Partially correct
152
+ - 2: Good/accurate
153
+
154
+ Supports:
155
+ - few_shot: Include examples in prompt
156
+ - answer_first: Rate first, then explain
157
+ - contrarian: Skeptical evaluation mode
158
+ """
159
+
160
+ def __init__(
161
+ self,
162
+ name: str = "histopathology",
163
+ output_format: str = "json",
164
+ few_shot: bool = True,
165
+ answer_first: bool = False,
166
+ contrarian: bool = False,
167
+ **kwargs,
168
+ ):
169
+ self._name = name
170
+ self.output_format = output_format.lower()
171
+ self.few_shot = few_shot
172
+ self.answer_first = answer_first
173
+ self.contrarian = contrarian
174
+
175
+ @property
176
+ def name(self) -> str:
177
+ return self._name
178
+
179
+ def build_prompt(self, input_data: Dict[str, Any]) -> str:
180
+ """Build prompt with histopathology report and source case."""
181
+ report = input_data.get("text", input_data.get("report", ""))
182
+ # Get source case from metadata for comparison
183
+ metadata = input_data.get("metadata", {})
184
+ source_case = metadata.get("ground_truth", "Not provided")
185
+
186
+ # Select template (priority: answer_first > contrarian > standard)
187
+ if self.answer_first:
188
+ template = PROMPT_TEMPLATE_ANSWER_FIRST
189
+ elif self.contrarian:
190
+ template = PROMPT_TEMPLATE_CONTRARIAN
191
+ else:
192
+ template = PROMPT_TEMPLATE
193
+
194
+ # Format with both source case and report (all templates now use source_case)
195
+ prompt = template.format(report=report, source_case=source_case)
196
+
197
+ # Remove examples if few_shot=False
198
+ if not self.few_shot and not self.answer_first and not self.contrarian:
199
+ prompt = self._remove_few_shot_examples(prompt)
200
+
201
+ return prompt
202
+
203
+ def _remove_few_shot_examples(self, template: str) -> str:
204
+ """Remove few-shot examples from template."""
205
+ # Remove only the example block between "Example 1" and the section divider.
206
+ block_pattern = re.compile(r"\nExample 1.*?\n---\n", re.DOTALL)
207
+ if not block_pattern.search(template):
208
+ return template
209
+ return block_pattern.sub("\n\n", template, count=1)
210
+
211
+ def parse_response(self, response: str) -> Dict[str, Any]:
212
+ """Parse response to extract quality score."""
213
+ result = {
214
+ "raw_response": response,
215
+ "parse_success": False,
216
+ "quality_score": None,
217
+ }
218
+
219
+ # Try to extract JSON from response
220
+ json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
221
+ if json_match:
222
+ try:
223
+ parsed = json.loads(json_match.group(1))
224
+ result.update(parsed)
225
+ result["parse_success"] = True
226
+ except json.JSONDecodeError:
227
+ pass
228
+
229
+ # Fallback: try direct JSON parse
230
+ if not result["parse_success"]:
231
+ try:
232
+ parsed = json.loads(response)
233
+ result.update(parsed)
234
+ result["parse_success"] = True
235
+ except json.JSONDecodeError:
236
+ pass
237
+
238
+ # Fallback: find quality_score in text
239
+ if not result["parse_success"] or result.get("quality_score") is None:
240
+ score_match = re.search(r'"quality_score"\s*:\s*(\d)', response)
241
+ if score_match:
242
+ result["quality_score"] = int(score_match.group(1))
243
+ result["parse_success"] = True
244
+ else:
245
+ # Last resort: find any 0, 1, or 2 alone
246
+ simple_match = re.search(r"\b([012])\b", response)
247
+ if simple_match:
248
+ result["quality_score"] = int(simple_match.group(1))
249
+ result["parse_success"] = True
250
+
251
+ return result
252
+
253
+ def get_system_message(self) -> str:
254
+ """Return mode-appropriate system message."""
255
+ if self.contrarian:
256
+ return SYSTEM_ROLE_CONTRARIAN
257
+ return SYSTEM_ROLE
258
+
259
+ def get_compatible_datasets(self) -> list[str]:
260
+ """Histopathology prompt works with histopathology dataset."""
261
+ return ["histopathology"]
262
+
263
+ def get_prediction_field(self) -> str:
264
+ """Return the JSON field name used for classification."""
265
+ return "quality_score"