parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pantapradhan (Manager/PlannerHigh) role for the Parishad council.
|
|
3
|
+
Creates high-level strategic plans and identifies phases.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
Role,
|
|
10
|
+
RoleInput,
|
|
11
|
+
Slot,
|
|
12
|
+
RoleOutput,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
PLANNER_HIGH_SYSTEM_PROMPT = """You are Pantapradhan, the Manager in the Parishad council. Your job is to create strategic plans and identify the major components of a task.
|
|
17
|
+
|
|
18
|
+
Your responsibilities:
|
|
19
|
+
1. Understand the overall goal and scope
|
|
20
|
+
2. Identify major sub-tasks or phases
|
|
21
|
+
3. Determine the strategic approach
|
|
22
|
+
4. Identify key decision points and risks
|
|
23
|
+
5. Estimate overall complexity
|
|
24
|
+
|
|
25
|
+
You must ALWAYS respond with a valid JSON object in the following format:
|
|
26
|
+
```json
|
|
27
|
+
{
|
|
28
|
+
"goal": "Clear statement of what needs to be achieved",
|
|
29
|
+
"approach": "High-level strategy description",
|
|
30
|
+
"phases": [
|
|
31
|
+
{
|
|
32
|
+
"id": 1,
|
|
33
|
+
"name": "Phase name",
|
|
34
|
+
"description": "What this phase accomplishes",
|
|
35
|
+
"success_criteria": "How to know this phase is complete"
|
|
36
|
+
}
|
|
37
|
+
],
|
|
38
|
+
"key_decisions": ["Critical choices that affect the solution"],
|
|
39
|
+
"risks": ["Potential issues or challenges"],
|
|
40
|
+
"complexity": "trivial|simple|moderate|complex|very_complex",
|
|
41
|
+
"task_category": "code|math|qa|explanation|creative|analysis"
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Focus on the big picture. Don't worry about implementation details."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
PLANNER_HIGH_USER_TEMPLATE = """Create a high-level strategic plan for the following task.
|
|
49
|
+
|
|
50
|
+
ORIGINAL QUERY:
|
|
51
|
+
{user_query}
|
|
52
|
+
|
|
53
|
+
TASK SPECIFICATION:
|
|
54
|
+
{task_spec}
|
|
55
|
+
|
|
56
|
+
Provide a strategic overview and decomposition. Respond with ONLY a valid JSON object."""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Pantapradhan(Role):
|
|
60
|
+
"""
|
|
61
|
+
Pantapradhan (Manager) creates high-level strategic plans.
|
|
62
|
+
|
|
63
|
+
- Slot: BIG (13-34B)
|
|
64
|
+
- Purpose: Strategic decomposition and approach selection
|
|
65
|
+
- Output: High-level plan with phases, risks, decisions
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
name = "pantapradhan"
|
|
69
|
+
default_slot = Slot.BIG
|
|
70
|
+
|
|
71
|
+
def __init__(self, model_runner: Any, **kwargs):
|
|
72
|
+
super().__init__(
|
|
73
|
+
model_runner=model_runner,
|
|
74
|
+
slot=kwargs.get("slot", Slot.BIG),
|
|
75
|
+
max_tokens=kwargs.get("max_tokens", 768),
|
|
76
|
+
temperature=kwargs.get("temperature", 0.5)
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def system_prompt(self) -> str:
|
|
81
|
+
return PLANNER_HIGH_SYSTEM_PROMPT
|
|
82
|
+
|
|
83
|
+
def format_input(self, role_input: RoleInput) -> str:
|
|
84
|
+
task_spec_str = self._format_task_spec(role_input.task_spec)
|
|
85
|
+
|
|
86
|
+
return PLANNER_HIGH_USER_TEMPLATE.format(
|
|
87
|
+
user_query=role_input.user_query,
|
|
88
|
+
task_spec=task_spec_str
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def _format_task_spec(self, task_spec: Optional[dict]) -> str:
|
|
92
|
+
"""Format task spec for inclusion in prompt."""
|
|
93
|
+
if not task_spec:
|
|
94
|
+
return "No task specification provided."
|
|
95
|
+
|
|
96
|
+
lines = [
|
|
97
|
+
f"Problem: {task_spec.get('problem', 'Not specified')}",
|
|
98
|
+
f"Constraints: {', '.join(task_spec.get('constraints', []))}",
|
|
99
|
+
f"Output Format: {task_spec.get('output_format', 'text')}",
|
|
100
|
+
f"Difficulty: {task_spec.get('difficulty_guess', 'medium')}",
|
|
101
|
+
f"Task Type: {task_spec.get('task_type', 'unknown')}",
|
|
102
|
+
]
|
|
103
|
+
return "\n".join(lines)
|
|
104
|
+
|
|
105
|
+
def parse_output(self, raw_output: str) -> dict[str, Any]:
|
|
106
|
+
"""Parse LLM output into high-level plan dict."""
|
|
107
|
+
import json
|
|
108
|
+
import re
|
|
109
|
+
|
|
110
|
+
# Try to extract JSON from the response
|
|
111
|
+
json_match = re.search(r'\{[\s\S]*\}', raw_output)
|
|
112
|
+
if json_match:
|
|
113
|
+
try:
|
|
114
|
+
data = json.loads(json_match.group())
|
|
115
|
+
except json.JSONDecodeError:
|
|
116
|
+
data = {}
|
|
117
|
+
else:
|
|
118
|
+
data = {}
|
|
119
|
+
|
|
120
|
+
# Normalize phases
|
|
121
|
+
phases = []
|
|
122
|
+
for phase in data.get("phases", []):
|
|
123
|
+
phases.append({
|
|
124
|
+
"id": phase.get("id", len(phases) + 1),
|
|
125
|
+
"name": phase.get("name", "Unnamed phase"),
|
|
126
|
+
"description": phase.get("description", ""),
|
|
127
|
+
"success_criteria": phase.get("success_criteria", "")
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
"plan_high": {
|
|
132
|
+
"goal": data.get("goal", ""),
|
|
133
|
+
"approach": data.get("approach", ""),
|
|
134
|
+
"phases": phases,
|
|
135
|
+
"key_decisions": data.get("key_decisions", []),
|
|
136
|
+
"risks": data.get("risks", []),
|
|
137
|
+
"complexity": self._normalize_complexity(data.get("complexity", "moderate")),
|
|
138
|
+
"task_category": data.get("task_category", "unknown")
|
|
139
|
+
},
|
|
140
|
+
# Compatible return
|
|
141
|
+
"goal": data.get("goal", ""),
|
|
142
|
+
"approach": data.get("approach", ""),
|
|
143
|
+
"phases": phases
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
def _normalize_complexity(self, value: str) -> str:
|
|
147
|
+
"""Normalize complexity to valid enum value."""
|
|
148
|
+
valid = {"trivial", "simple", "moderate", "complex", "very_complex"}
|
|
149
|
+
normalized = value.lower().strip().replace(" ", "_")
|
|
150
|
+
return normalized if normalized in valid else "moderate"
|
parishad/roles/prerak.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prerak (Challenger/Checker) role for the Parishad council.
|
|
3
|
+
Validates outputs using ensemble of verification methods.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
Role,
|
|
10
|
+
RoleInput,
|
|
11
|
+
RoleOutput,
|
|
12
|
+
Slot,
|
|
13
|
+
Verdict,
|
|
14
|
+
CheckerFlag,
|
|
15
|
+
RoleMetadata,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
CHECKER_SYSTEM_PROMPT = """You are Prerak, the Challenger in the Parishad council. Your job is to validate the Implementor's output for correctness, completeness, and safety.
|
|
20
|
+
|
|
21
|
+
Your responsibilities:
|
|
22
|
+
1. Verify the output meets the task requirements
|
|
23
|
+
2. Check for factual accuracy (when possible)
|
|
24
|
+
3. Identify errors, inconsistencies, or omissions
|
|
25
|
+
4. Flag potential issues with severity levels
|
|
26
|
+
5. Suggest specific fixes for problems found
|
|
27
|
+
|
|
28
|
+
You must ALWAYS respond with a valid JSON object in the following format:
|
|
29
|
+
```json
|
|
30
|
+
{
|
|
31
|
+
"flags": [
|
|
32
|
+
{
|
|
33
|
+
"type": "claim_unsupported|syntax_error|logic_error|incomplete_output|format_error",
|
|
34
|
+
"severity": "low|medium|high|critical",
|
|
35
|
+
"detail": "Description of the issue",
|
|
36
|
+
"location": "Where in the output the issue was found",
|
|
37
|
+
"suggested_fix": "How to fix this issue"
|
|
38
|
+
}
|
|
39
|
+
],
|
|
40
|
+
"must_fix": true,
|
|
41
|
+
"evidence": [
|
|
42
|
+
{
|
|
43
|
+
"source": "Source of evidence",
|
|
44
|
+
"source_type": "retrieval|deterministic|llm_judgment",
|
|
45
|
+
"snippet": "Relevant snippet",
|
|
46
|
+
"relevance_score": 0.8,
|
|
47
|
+
"supports_claim": true
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"suggested_edits": ["Specific fix 1", "Specific fix 2"],
|
|
51
|
+
"overall_confidence": 0.75,
|
|
52
|
+
"checks_performed": ["schema", "syntax", "logic", "retrieval"]
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Flag types:
|
|
57
|
+
- format_error: Output doesn't match expected format
|
|
58
|
+
- schema_violation: JSON/structure issues
|
|
59
|
+
- syntax_error: Code syntax problems
|
|
60
|
+
- runtime_error: Code would fail at runtime
|
|
61
|
+
- test_failure: Code fails test cases
|
|
62
|
+
- claim_unsupported: Factual claim without support
|
|
63
|
+
- claim_contradicted: Claim contradicts known facts
|
|
64
|
+
- claim_uncertain: Claim cannot be verified
|
|
65
|
+
- safety_violation: Content policy issues
|
|
66
|
+
- pii_detected: Personal information found
|
|
67
|
+
- incomplete_output: Missing required parts
|
|
68
|
+
- logic_error: Reasoning or logic flaw
|
|
69
|
+
|
|
70
|
+
Severity levels:
|
|
71
|
+
- low: Minor issue, doesn't affect correctness
|
|
72
|
+
- medium: Should be fixed but output is usable
|
|
73
|
+
- high: Significant issue, likely incorrect
|
|
74
|
+
- critical: Must be fixed, output is wrong/unsafe
|
|
75
|
+
|
|
76
|
+
Set must_fix = true if there are any HIGH or CRITICAL severity flags.
|
|
77
|
+
|
|
78
|
+
Be thorough but fair. Don't flag things that are working correctly."""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
CHECKER_USER_TEMPLATE = """Validate the following Implementor output.
|
|
82
|
+
|
|
83
|
+
TASK SPECIFICATION:
|
|
84
|
+
{task_spec}
|
|
85
|
+
|
|
86
|
+
EXECUTION PLAN:
|
|
87
|
+
{plan}
|
|
88
|
+
|
|
89
|
+
IMPLEMENTOR OUTPUT:
|
|
90
|
+
{candidate}
|
|
91
|
+
|
|
92
|
+
{tool_results}
|
|
93
|
+
|
|
94
|
+
Analyze the output for correctness and completeness. Respond with ONLY a valid JSON object."""
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
CHECKER_CODE_EMPHASIS = """
|
|
98
|
+
For CODE validation, focus on:
|
|
99
|
+
- Syntax correctness
|
|
100
|
+
- Logic errors
|
|
101
|
+
- Edge case handling
|
|
102
|
+
- Import statements
|
|
103
|
+
- Function signatures matching requirements
|
|
104
|
+
- Potential runtime errors"""
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
CHECKER_MATH_EMPHASIS = """
|
|
108
|
+
For MATH validation, focus on:
|
|
109
|
+
- Calculation accuracy
|
|
110
|
+
- Step-by-step reasoning correctness
|
|
111
|
+
- Final answer format
|
|
112
|
+
- Units and precision
|
|
113
|
+
- Common arithmetic errors"""
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
CHECKER_QA_EMPHASIS = """
|
|
117
|
+
For QA validation, focus on:
|
|
118
|
+
- Factual accuracy
|
|
119
|
+
- Completeness of answer
|
|
120
|
+
- Relevance to the question
|
|
121
|
+
- Unsupported claims
|
|
122
|
+
- Potential misinformation"""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class Prerak(Role):
|
|
126
|
+
"""
|
|
127
|
+
Prerak (Challenger) validates Implementor output using ensemble of verification methods.
|
|
128
|
+
|
|
129
|
+
- Slot: SMALL (2-4B) + external tools
|
|
130
|
+
- Purpose: Identify errors, flag issues, suggest fixes
|
|
131
|
+
- Output: Verdict with flags, evidence, must_fix decision
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
name = "prerak"
|
|
135
|
+
default_slot = Slot.SMALL
|
|
136
|
+
|
|
137
|
+
def __init__(
|
|
138
|
+
self,
|
|
139
|
+
model_runner: Any,
|
|
140
|
+
tools: Optional[list[str]] = None,
|
|
141
|
+
use_ensemble: bool = False,
|
|
142
|
+
enable_retrieval: bool = True,
|
|
143
|
+
enable_llm_check: bool = True,
|
|
144
|
+
**kwargs
|
|
145
|
+
):
|
|
146
|
+
super().__init__(
|
|
147
|
+
model_runner=model_runner,
|
|
148
|
+
slot=kwargs.get("slot", Slot.SMALL),
|
|
149
|
+
max_tokens=kwargs.get("max_tokens", 768),
|
|
150
|
+
temperature=kwargs.get("temperature", 0.2)
|
|
151
|
+
)
|
|
152
|
+
self.tools = tools or ["json_validator", "syntax_checker"]
|
|
153
|
+
self._tool_results: dict[str, Any] = {}
|
|
154
|
+
|
|
155
|
+
# Ensemble configuration (opt-in)
|
|
156
|
+
self.use_ensemble = use_ensemble
|
|
157
|
+
self.enable_retrieval = enable_retrieval
|
|
158
|
+
self.enable_llm_check = enable_llm_check
|
|
159
|
+
self._ensemble_results: Optional[dict[str, Any]] = None
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def system_prompt(self) -> str:
|
|
163
|
+
return CHECKER_SYSTEM_PROMPT
|
|
164
|
+
|
|
165
|
+
def format_input(self, role_input: RoleInput) -> str:
|
|
166
|
+
task_spec_str = self._format_task_spec(role_input.task_spec)
|
|
167
|
+
plan_str = self._format_plan(role_input.plan)
|
|
168
|
+
candidate_str = self._format_candidate(role_input.candidate)
|
|
169
|
+
tool_results_str = self._format_tool_results()
|
|
170
|
+
|
|
171
|
+
# Add task-specific emphasis
|
|
172
|
+
task_type = ""
|
|
173
|
+
if role_input.task_spec:
|
|
174
|
+
task_type = role_input.task_spec.get("task_type", "")
|
|
175
|
+
|
|
176
|
+
prompt = CHECKER_USER_TEMPLATE.format(
|
|
177
|
+
task_spec=task_spec_str,
|
|
178
|
+
plan=plan_str,
|
|
179
|
+
candidate=candidate_str,
|
|
180
|
+
tool_results=tool_results_str
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
if task_type == "code":
|
|
184
|
+
prompt += CHECKER_CODE_EMPHASIS
|
|
185
|
+
elif task_type == "math":
|
|
186
|
+
prompt += CHECKER_MATH_EMPHASIS
|
|
187
|
+
elif task_type == "qa":
|
|
188
|
+
prompt += CHECKER_QA_EMPHASIS
|
|
189
|
+
|
|
190
|
+
return prompt
|
|
191
|
+
|
|
192
|
+
def _format_task_spec(self, task_spec: Optional[dict]) -> str:
|
|
193
|
+
"""Format task spec for inclusion in prompt."""
|
|
194
|
+
if not task_spec:
|
|
195
|
+
return "No task specification provided."
|
|
196
|
+
|
|
197
|
+
return f"""Problem: {task_spec.get('problem', 'Not specified')}
|
|
198
|
+
Task Type: {task_spec.get('task_type', 'Unknown')}
|
|
199
|
+
Output Format: {task_spec.get('output_format', 'text')}"""
|
|
200
|
+
|
|
201
|
+
def _format_plan(self, plan: Optional[dict]) -> str:
|
|
202
|
+
"""Format plan summary for checker."""
|
|
203
|
+
if not plan:
|
|
204
|
+
return "No plan provided."
|
|
205
|
+
|
|
206
|
+
steps = plan.get("steps", [])
|
|
207
|
+
if not steps:
|
|
208
|
+
return "No steps in plan."
|
|
209
|
+
|
|
210
|
+
lines = [f"Expected Output: {plan.get('expected_output_type', 'text')}"]
|
|
211
|
+
lines.append(f"Steps: {len(steps)}")
|
|
212
|
+
|
|
213
|
+
checkpoints = plan.get("checkpoints", [])
|
|
214
|
+
if checkpoints:
|
|
215
|
+
lines.append(f"Checkpoints: {checkpoints}")
|
|
216
|
+
|
|
217
|
+
return "\n".join(lines)
|
|
218
|
+
|
|
219
|
+
def _format_candidate(self, candidate: Optional[dict]) -> str:
|
|
220
|
+
"""Format candidate output for checking."""
|
|
221
|
+
if not candidate:
|
|
222
|
+
return "No candidate output provided."
|
|
223
|
+
|
|
224
|
+
content = candidate.get("content", "")
|
|
225
|
+
content_type = candidate.get("content_type", "text")
|
|
226
|
+
confidence = candidate.get("confidence", 0.5)
|
|
227
|
+
warnings = candidate.get("warnings", [])
|
|
228
|
+
|
|
229
|
+
lines = [
|
|
230
|
+
f"Content Type: {content_type}",
|
|
231
|
+
f"Implementor Confidence: {confidence}",
|
|
232
|
+
"",
|
|
233
|
+
"=== CONTENT START ===",
|
|
234
|
+
content[:3000] if len(content) > 3000 else content, # Truncate if too long
|
|
235
|
+
"=== CONTENT END ==="
|
|
236
|
+
]
|
|
237
|
+
|
|
238
|
+
if warnings:
|
|
239
|
+
lines.append(f"\nImplementor Warnings: {warnings}")
|
|
240
|
+
|
|
241
|
+
return "\n".join(lines)
|
|
242
|
+
|
|
243
|
+
def _format_tool_results(self) -> str:
|
|
244
|
+
"""Format results from deterministic tools."""
|
|
245
|
+
if not self._tool_results:
|
|
246
|
+
return ""
|
|
247
|
+
|
|
248
|
+
lines = ["\n--- TOOL RESULTS ---"]
|
|
249
|
+
|
|
250
|
+
for tool_name, result in self._tool_results.items():
|
|
251
|
+
lines.append(f"\n[{tool_name}]:")
|
|
252
|
+
if isinstance(result, dict):
|
|
253
|
+
if result.get("success"):
|
|
254
|
+
lines.append(f" Status: PASS")
|
|
255
|
+
else:
|
|
256
|
+
lines.append(f" Status: FAIL")
|
|
257
|
+
if result.get("errors"):
|
|
258
|
+
for error in result["errors"][:3]:
|
|
259
|
+
lines.append(f" - {error}")
|
|
260
|
+
else:
|
|
261
|
+
lines.append(f" {result}")
|
|
262
|
+
|
|
263
|
+
lines.append("--- END TOOL RESULTS ---")
|
|
264
|
+
return "\n".join(lines)
|
|
265
|
+
|
|
266
|
+
def run_ensemble_checks(self, content: str, check_type: str, context: Optional[dict] = None) -> dict[str, Any]:
|
|
267
|
+
"""Run ensemble checks (placeholder for actual implementation)."""
|
|
268
|
+
# In a real implementation this would call out to deterministic tools
|
|
269
|
+
return {"must_fix": False, "flags": [], "confidence": 0.5}
|
|
270
|
+
|
|
271
|
+
def __call__(self, role_input: RoleInput) -> RoleOutput:
|
|
272
|
+
"""Execute Checker role."""
|
|
273
|
+
return super().__call__(role_input)
|
|
274
|
+
|
|
275
|
+
def set_retrieval_results(self, results: list[dict]) -> None:
|
|
276
|
+
"""Set retrieval results from external retrieval system."""
|
|
277
|
+
self._tool_results["retrieval"] = {
|
|
278
|
+
"success": True,
|
|
279
|
+
"results": results
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
def parse_output(self, raw_output: str) -> dict[str, Any]:
|
|
283
|
+
"""Parse LLM output into Verdict dict."""
|
|
284
|
+
data = self._extract_json(raw_output)
|
|
285
|
+
|
|
286
|
+
# Normalize flags
|
|
287
|
+
flags = []
|
|
288
|
+
for flag in data.get("flags", []):
|
|
289
|
+
if isinstance(flag, dict):
|
|
290
|
+
flags.append({
|
|
291
|
+
"type": flag.get("type", "unknown"),
|
|
292
|
+
"severity": self._normalize_severity(flag.get("severity", "low")),
|
|
293
|
+
"detail": flag.get("detail", ""),
|
|
294
|
+
"location": flag.get("location"),
|
|
295
|
+
"suggested_fix": flag.get("suggested_fix")
|
|
296
|
+
})
|
|
297
|
+
|
|
298
|
+
# Normalize evidence
|
|
299
|
+
evidence = []
|
|
300
|
+
for ev in data.get("evidence", []):
|
|
301
|
+
if isinstance(ev, dict):
|
|
302
|
+
evidence.append({
|
|
303
|
+
"source": ev.get("source", ""),
|
|
304
|
+
"source_type": ev.get("source_type", "llm_judgment"),
|
|
305
|
+
"snippet": ev.get("snippet", ""),
|
|
306
|
+
"relevance_score": float(ev.get("relevance_score", 0)),
|
|
307
|
+
"supports_claim": ev.get("supports_claim", True)
|
|
308
|
+
})
|
|
309
|
+
|
|
310
|
+
# Determine must_fix
|
|
311
|
+
must_fix = data.get("must_fix", False)
|
|
312
|
+
if not must_fix:
|
|
313
|
+
must_fix = any(
|
|
314
|
+
f.get("severity") in ["high", "critical"]
|
|
315
|
+
for f in flags
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
# Normalize confidence
|
|
319
|
+
confidence = data.get("overall_confidence", 0.5)
|
|
320
|
+
if isinstance(confidence, str):
|
|
321
|
+
try:
|
|
322
|
+
confidence = float(confidence)
|
|
323
|
+
except ValueError:
|
|
324
|
+
confidence = 0.5
|
|
325
|
+
confidence = max(0.0, min(1.0, confidence))
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
"flags": flags,
|
|
329
|
+
"must_fix": must_fix,
|
|
330
|
+
"evidence": evidence,
|
|
331
|
+
"suggested_edits": data.get("suggested_edits", []),
|
|
332
|
+
"overall_confidence": confidence,
|
|
333
|
+
"checks_performed": data.get("checks_performed", [])
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
def _normalize_severity(self, value: str) -> str:
|
|
337
|
+
"""Normalize severity to valid enum value."""
|
|
338
|
+
valid = {"low", "medium", "high", "critical"}
|
|
339
|
+
normalized = value.lower().strip()
|
|
340
|
+
return normalized if normalized in valid else "low"
|
|
341
|
+
|
|
342
|
+
def create_verdict(self, role_input: RoleInput) -> Verdict:
|
|
343
|
+
"""Execute checker and return a Verdict object."""
|
|
344
|
+
output = self(role_input)
|
|
345
|
+
|
|
346
|
+
if output.status == "error":
|
|
347
|
+
return Verdict(
|
|
348
|
+
flags=[CheckerFlag(
|
|
349
|
+
type="checker_error",
|
|
350
|
+
severity="low",
|
|
351
|
+
detail=f"Checker failed: {output.error}"
|
|
352
|
+
)],
|
|
353
|
+
must_fix=False,
|
|
354
|
+
overall_confidence=0.5
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
return Verdict.from_dict(output.core_output)
|