parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dandadhyaksha (Enforcer/Safety Checker) role for the Parishad council.
|
|
3
|
+
Checks for safety violations, PII, and policy compliance.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
Role,
|
|
10
|
+
RoleInput,
|
|
11
|
+
RoleOutput,
|
|
12
|
+
Slot,
|
|
13
|
+
Verdict,
|
|
14
|
+
CheckerFlag,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
CHECKER_SAFETY_SYSTEM_PROMPT = """You are Dandadhyaksha, the Enforcer in the Parishad council. Your job is to verify the output is safe, policy-compliant, and free of sensitive information.
|
|
19
|
+
|
|
20
|
+
Your responsibilities:
|
|
21
|
+
1. Check for harmful, offensive, or inappropriate content
|
|
22
|
+
2. Identify any personally identifiable information (PII)
|
|
23
|
+
3. Verify compliance with content policies
|
|
24
|
+
4. Flag potential safety or ethical concerns
|
|
25
|
+
5. Suggest redactions or modifications for safety issues
|
|
26
|
+
|
|
27
|
+
You must ALWAYS respond with a valid JSON object in the following format:
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"flags": [
|
|
31
|
+
{
|
|
32
|
+
"type": "safety_violation|pii_detected|policy_violation|unsafe_code",
|
|
33
|
+
"severity": "medium|high|critical",
|
|
34
|
+
"detail": "Description of the safety issue",
|
|
35
|
+
"location": "Where it appears",
|
|
36
|
+
"suggested_fix": "Redaction or modification"
|
|
37
|
+
}
|
|
38
|
+
],
|
|
39
|
+
"must_fix": true,
|
|
40
|
+
"safe": false,
|
|
41
|
+
"summary": "Safety assessment summary"
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
If "safe" is false, "must_fix" MUST be true.
|
|
46
|
+
Be extremely vigilant about PII (names, emails, phones, keys) and harmful content."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
CHECKER_SAFETY_USER_TEMPLATE = """Perform a safety check on the following output.
|
|
50
|
+
|
|
51
|
+
OUTPUT TO CHECK:
|
|
52
|
+
{candidate}
|
|
53
|
+
|
|
54
|
+
Analyze for safety/PII violations. Respond with ONLY a valid JSON object."""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class Dandadhyaksha(Role):
|
|
58
|
+
"""
|
|
59
|
+
Dandadhyaksha (Enforcer) checks for safety violations and PII.
|
|
60
|
+
|
|
61
|
+
- Slot: SMALL (2-4B)
|
|
62
|
+
- Purpose: Safety and policy enforcement
|
|
63
|
+
- Output: Verdict on safety
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
name = "dandadhyaksha"
|
|
67
|
+
default_slot = Slot.SMALL
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
model_runner: Any,
|
|
72
|
+
tools: Optional[list[str]] = None,
|
|
73
|
+
**kwargs
|
|
74
|
+
):
|
|
75
|
+
super().__init__(
|
|
76
|
+
model_runner=model_runner,
|
|
77
|
+
slot=kwargs.get("slot", Slot.SMALL),
|
|
78
|
+
max_tokens=kwargs.get("max_tokens", 512),
|
|
79
|
+
temperature=kwargs.get("temperature", 0.1)
|
|
80
|
+
)
|
|
81
|
+
self.tools = tools or ["toxicity_detector", "pii_scanner"]
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def system_prompt(self) -> str:
|
|
85
|
+
return CHECKER_SAFETY_SYSTEM_PROMPT
|
|
86
|
+
|
|
87
|
+
def format_input(self, role_input: RoleInput) -> str:
|
|
88
|
+
candidate_str = self._format_candidate(role_input.candidate)
|
|
89
|
+
|
|
90
|
+
return CHECKER_SAFETY_USER_TEMPLATE.format(
|
|
91
|
+
candidate=candidate_str
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _format_candidate(self, candidate: Optional[dict]) -> str:
|
|
95
|
+
"""Format candidate output for checking."""
|
|
96
|
+
if not candidate:
|
|
97
|
+
return "No output to verify."
|
|
98
|
+
|
|
99
|
+
content = candidate.get("content", "")
|
|
100
|
+
# Don't truncate too much for safety check, but respect limit
|
|
101
|
+
if len(content) > 4000:
|
|
102
|
+
content = content[:4000] + "... [truncated]"
|
|
103
|
+
|
|
104
|
+
return f"""Content:
|
|
105
|
+
{content}"""
|
|
106
|
+
|
|
107
|
+
def parse_output(self, raw_output: str) -> dict[str, Any]:
|
|
108
|
+
"""Parse LLM output into safety verdict dict."""
|
|
109
|
+
import json
|
|
110
|
+
import re
|
|
111
|
+
|
|
112
|
+
# Try to extract JSON from the response
|
|
113
|
+
json_match = re.search(r'\{[\s\S]*\}', raw_output)
|
|
114
|
+
if json_match:
|
|
115
|
+
try:
|
|
116
|
+
data = json.loads(json_match.group())
|
|
117
|
+
except json.JSONDecodeError:
|
|
118
|
+
data = {}
|
|
119
|
+
else:
|
|
120
|
+
data = {}
|
|
121
|
+
|
|
122
|
+
# Normalize flags
|
|
123
|
+
flags = []
|
|
124
|
+
for flag in data.get("flags", []):
|
|
125
|
+
flags.append({
|
|
126
|
+
"type": flag.get("type", "unknown"),
|
|
127
|
+
"severity": self._normalize_severity(flag.get("severity", "medium")),
|
|
128
|
+
"detail": flag.get("detail", ""),
|
|
129
|
+
"location": flag.get("location"),
|
|
130
|
+
"suggested_fix": flag.get("suggested_fix")
|
|
131
|
+
})
|
|
132
|
+
|
|
133
|
+
# Determine must_fix
|
|
134
|
+
must_fix = data.get("must_fix", False)
|
|
135
|
+
safe = data.get("safe", True)
|
|
136
|
+
|
|
137
|
+
if not safe:
|
|
138
|
+
must_fix = True
|
|
139
|
+
|
|
140
|
+
# If flags exist, ensure must_fix matches severity
|
|
141
|
+
if flags and any(f["severity"] in ["high", "critical"] for f in flags):
|
|
142
|
+
must_fix = True
|
|
143
|
+
safe = False
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
"verdict_safety": {
|
|
147
|
+
"flags": flags,
|
|
148
|
+
"must_fix": must_fix,
|
|
149
|
+
"safe": safe,
|
|
150
|
+
"summary": data.get("summary", "")
|
|
151
|
+
},
|
|
152
|
+
# Compatible with standard Verdict schema
|
|
153
|
+
"flags": flags,
|
|
154
|
+
"must_fix": must_fix,
|
|
155
|
+
"evidence": [],
|
|
156
|
+
"overall_confidence": 1.0 if safe else 0.0
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
def _normalize_severity(self, value: str) -> str:
|
|
160
|
+
"""Normalize severity to valid enum value."""
|
|
161
|
+
valid = {"low", "medium", "high", "critical"}
|
|
162
|
+
normalized = value.lower().strip()
|
|
163
|
+
return normalized if normalized in valid else "high"
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Darbari (Communicator/Refiner) role for the Parishad council.
|
|
3
|
+
Normalizes user input into structured task specifications.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
Role,
|
|
10
|
+
RoleInput,
|
|
11
|
+
RoleOutput,
|
|
12
|
+
Slot,
|
|
13
|
+
TaskSpec,
|
|
14
|
+
Difficulty,
|
|
15
|
+
TaskType,
|
|
16
|
+
OutputFormat,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
REFINER_SYSTEM_PROMPT = """You are Darbari, the Communicator in the Parishad council. Your job is to carefully analyze user queries and transform them into clear, structured task specifications.
|
|
21
|
+
|
|
22
|
+
Your responsibilities:
|
|
23
|
+
1. Understand the user's true intent and goal
|
|
24
|
+
2. Identify constraints (explicit or implicit)
|
|
25
|
+
3. Determine the expected output format
|
|
26
|
+
4. Estimate task difficulty for routing
|
|
27
|
+
5. Classify the task type
|
|
28
|
+
6. Assess safety sensitivity and expected answer length
|
|
29
|
+
|
|
30
|
+
You must ALWAYS respond with a valid JSON object in the following format:
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"problem": "A clear, normalized restatement of what the user wants",
|
|
34
|
+
"constraints": ["List of constraints or requirements"],
|
|
35
|
+
"output_format": "code|text|numeric|structured|mixed",
|
|
36
|
+
"difficulty_guess": "easy|medium|hard",
|
|
37
|
+
"task_type": "math|code|qa|explanation|creative|analysis|chat",
|
|
38
|
+
"key_concepts": ["Key concepts or topics involved"],
|
|
39
|
+
"safety_sensitivity": "low|medium|high",
|
|
40
|
+
"expected_answer_length": "short|paragraph|long"
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Guidelines for difficulty estimation:
|
|
45
|
+
- EASY: Simple, single-step tasks; short answers; basic operations
|
|
46
|
+
- MEDIUM: Multi-step but straightforward; moderate reasoning needed
|
|
47
|
+
- HARD: Complex reasoning; multiple interconnected parts; edge cases; ambiguity
|
|
48
|
+
|
|
49
|
+
Guidelines for task type:
|
|
50
|
+
- CODE: Writing, debugging, or explaining code
|
|
51
|
+
- MATH: Numerical computation, word problems, proofs
|
|
52
|
+
- QA: Factual questions requiring knowledge retrieval
|
|
53
|
+
- EXPLANATION: Explaining concepts, processes, or ideas
|
|
54
|
+
- CREATIVE: Open-ended generation, writing, brainstorming
|
|
55
|
+
- ANALYSIS: Analyzing data, text, or situations
|
|
56
|
+
- CHAT: Conversational, no specific goal
|
|
57
|
+
|
|
58
|
+
Guidelines for safety sensitivity:
|
|
59
|
+
- LOW: Neutral topics, no harmful content
|
|
60
|
+
- MEDIUM: Potentially controversial or sensitive topics
|
|
61
|
+
- HIGH: Medical/legal advice, harmful content, misinformation risk
|
|
62
|
+
|
|
63
|
+
Guidelines for expected answer length:
|
|
64
|
+
- SHORT: 1-3 sentences, concise answer
|
|
65
|
+
- PARAGRAPH: 1-2 paragraphs, moderate detail
|
|
66
|
+
- LONG: Multi-paragraph, comprehensive explanation
|
|
67
|
+
|
|
68
|
+
Be precise and concise. Focus on extracting the essential requirements."""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
REFINER_USER_TEMPLATE = """Analyze the following user query and create a structured task specification.
|
|
72
|
+
|
|
73
|
+
USER QUERY:
|
|
74
|
+
{user_query}
|
|
75
|
+
|
|
76
|
+
Respond with ONLY a valid JSON object following the required schema."""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Darbari(Role):
|
|
80
|
+
"""
|
|
81
|
+
Darbari (Communicator) normalizes user input into structured task specifications.
|
|
82
|
+
|
|
83
|
+
- Slot: SMALL (2-4B)
|
|
84
|
+
- Purpose: First role in pipeline, sets up context for all subsequent roles
|
|
85
|
+
- Output: TaskSpec with problem, constraints, format, difficulty, type
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
name = "darbari"
|
|
89
|
+
default_slot = Slot.SMALL
|
|
90
|
+
|
|
91
|
+
def __init__(self, model_runner: Any, **kwargs):
|
|
92
|
+
super().__init__(
|
|
93
|
+
model_runner=model_runner,
|
|
94
|
+
slot=kwargs.get("slot", Slot.SMALL),
|
|
95
|
+
max_tokens=kwargs.get("max_tokens", 512),
|
|
96
|
+
temperature=kwargs.get("temperature", 0.3)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def system_prompt(self) -> str:
|
|
101
|
+
return REFINER_SYSTEM_PROMPT
|
|
102
|
+
|
|
103
|
+
def format_input(self, role_input: RoleInput) -> str:
|
|
104
|
+
return REFINER_USER_TEMPLATE.format(
|
|
105
|
+
user_query=role_input.user_query
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def parse_output(self, raw_output: str) -> dict[str, Any]:
|
|
109
|
+
"""Parse LLM output into TaskSpec dict with routing metadata and robust fallback."""
|
|
110
|
+
raw = raw_output.strip()
|
|
111
|
+
|
|
112
|
+
# Handle empty output
|
|
113
|
+
if not raw:
|
|
114
|
+
logger.warning("Darbari received empty output from model")
|
|
115
|
+
return {
|
|
116
|
+
"problem": "",
|
|
117
|
+
"constraints": [],
|
|
118
|
+
"output_format": "text",
|
|
119
|
+
"difficulty_guess": "medium",
|
|
120
|
+
"task_type": "qa",
|
|
121
|
+
"key_concepts": [],
|
|
122
|
+
"difficulty": "medium",
|
|
123
|
+
"safety_sensitivity": "low",
|
|
124
|
+
"expected_answer_length": "paragraph",
|
|
125
|
+
"parse_status": "empty"
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
data = self._extract_json(raw)
|
|
130
|
+
|
|
131
|
+
# Check if JSON parsing succeeded or fell back to raw_output
|
|
132
|
+
has_structure = "problem" in data or "task_type" in data
|
|
133
|
+
if not has_structure and "raw_output" in data:
|
|
134
|
+
# Fallback: extract problem from raw text
|
|
135
|
+
problem_text = data["raw_output"][:500]
|
|
136
|
+
parse_status = "fallback_text"
|
|
137
|
+
else:
|
|
138
|
+
problem_text = data.get("problem", "")
|
|
139
|
+
parse_status = "json_ok"
|
|
140
|
+
|
|
141
|
+
# Validate and normalize fields
|
|
142
|
+
output = {
|
|
143
|
+
"problem": problem_text,
|
|
144
|
+
"constraints": data.get("constraints", []),
|
|
145
|
+
"output_format": self._normalize_output_format(data.get("output_format", "text")),
|
|
146
|
+
"difficulty_guess": self._normalize_difficulty(data.get("difficulty_guess", "medium")),
|
|
147
|
+
"task_type": self._normalize_task_type(data.get("task_type", "qa")),
|
|
148
|
+
"key_concepts": data.get("key_concepts", []),
|
|
149
|
+
# Routing metadata for adaptive pipeline
|
|
150
|
+
"difficulty": self._normalize_difficulty(data.get("difficulty_guess", "medium")),
|
|
151
|
+
"safety_sensitivity": self._normalize_safety(data.get("safety_sensitivity", "low")),
|
|
152
|
+
"expected_answer_length": self._normalize_length(data.get("expected_answer_length", "paragraph")),
|
|
153
|
+
"parse_status": parse_status,
|
|
154
|
+
"raw_output": raw
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# If problem is still empty, use raw output
|
|
158
|
+
if not output["problem"]:
|
|
159
|
+
output["problem"] = raw[:500]
|
|
160
|
+
|
|
161
|
+
return output
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.exception("Darbari.parse_output: unexpected error during parsing")
|
|
165
|
+
# Return minimal valid structure
|
|
166
|
+
return {
|
|
167
|
+
"problem": raw[:500] if raw else "",
|
|
168
|
+
"constraints": [],
|
|
169
|
+
"output_format": "text",
|
|
170
|
+
"difficulty_guess": "medium",
|
|
171
|
+
"task_type": "qa",
|
|
172
|
+
"key_concepts": [],
|
|
173
|
+
"difficulty": "medium",
|
|
174
|
+
"safety_sensitivity": "low",
|
|
175
|
+
"expected_answer_length": "paragraph",
|
|
176
|
+
"parse_status": "error_fallback",
|
|
177
|
+
"raw_output": raw
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
def _normalize_output_format(self, value: str) -> str:
|
|
181
|
+
"""Normalize output format to valid enum value."""
|
|
182
|
+
valid = {"code", "text", "numeric", "structured", "mixed"}
|
|
183
|
+
normalized = value.lower().strip()
|
|
184
|
+
return normalized if normalized in valid else "text"
|
|
185
|
+
|
|
186
|
+
def _normalize_difficulty(self, value: str) -> str:
|
|
187
|
+
"""Normalize difficulty to valid enum value."""
|
|
188
|
+
valid = {"easy", "medium", "hard"}
|
|
189
|
+
normalized = value.lower().strip()
|
|
190
|
+
return normalized if normalized in valid else "medium"
|
|
191
|
+
|
|
192
|
+
def _normalize_task_type(self, value: str) -> str:
|
|
193
|
+
"""Normalize task type to valid enum value."""
|
|
194
|
+
valid = {"math", "code", "qa", "explanation", "creative", "analysis", "chat"}
|
|
195
|
+
normalized = value.lower().strip()
|
|
196
|
+
|
|
197
|
+
# Handle common aliases
|
|
198
|
+
aliases = {
|
|
199
|
+
"coding": "code",
|
|
200
|
+
"programming": "code",
|
|
201
|
+
"mathematics": "math",
|
|
202
|
+
"calculation": "math",
|
|
203
|
+
"question": "qa",
|
|
204
|
+
"factual": "qa",
|
|
205
|
+
"explain": "explanation",
|
|
206
|
+
"describe": "explanation",
|
|
207
|
+
"write": "creative",
|
|
208
|
+
"generate": "creative",
|
|
209
|
+
"analyze": "analysis",
|
|
210
|
+
"evaluate": "analysis",
|
|
211
|
+
"conversation": "chat",
|
|
212
|
+
"conversational": "chat"
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if normalized in valid:
|
|
216
|
+
return normalized
|
|
217
|
+
return aliases.get(normalized, "qa")
|
|
218
|
+
|
|
219
|
+
def _normalize_safety(self, value: str) -> str:
|
|
220
|
+
"""Normalize safety sensitivity to valid value."""
|
|
221
|
+
valid = {"low", "medium", "high"}
|
|
222
|
+
normalized = value.lower().strip()
|
|
223
|
+
return normalized if normalized in valid else "low"
|
|
224
|
+
|
|
225
|
+
def _normalize_length(self, value: str) -> str:
|
|
226
|
+
"""Normalize expected answer length to valid value."""
|
|
227
|
+
valid = {"short", "paragraph", "long"}
|
|
228
|
+
normalized = value.lower().strip()
|
|
229
|
+
return normalized if normalized in valid else "paragraph"
|
|
230
|
+
|
|
231
|
+
def create_task_spec(self, role_input: RoleInput) -> TaskSpec:
|
|
232
|
+
"""
|
|
233
|
+
Execute Refiner and return a TaskSpec object.
|
|
234
|
+
"""
|
|
235
|
+
output = self(role_input)
|
|
236
|
+
|
|
237
|
+
if output.status == "error":
|
|
238
|
+
# Return default task spec on error
|
|
239
|
+
return TaskSpec(
|
|
240
|
+
problem=role_input.user_query,
|
|
241
|
+
difficulty_guess=Difficulty.MEDIUM,
|
|
242
|
+
task_type=TaskType.QA,
|
|
243
|
+
output_format=OutputFormat.TEXT
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return TaskSpec.from_dict(output.core_output)
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Majumdar (Planner) role for the Parishad council.
|
|
3
|
+
Decomposes complex tasks into clear, executable steps.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .base import (
|
|
9
|
+
Role,
|
|
10
|
+
RoleInput,
|
|
11
|
+
RoleOutput,
|
|
12
|
+
Slot,
|
|
13
|
+
Plan,
|
|
14
|
+
PlanStep,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
PLANNER_SYSTEM_PROMPT = """You are Majumdar, the Planner in the Parishad council. Your job is to decompose complex tasks into clear, executable steps that the Implementor can follow.
|
|
19
|
+
|
|
20
|
+
Your responsibilities:
|
|
21
|
+
1. Analyze the task specification
|
|
22
|
+
2. Break down the problem into logical, sequential steps
|
|
23
|
+
3. Provide rationale for each step
|
|
24
|
+
4. Identify critical checkpoints where verification is needed
|
|
25
|
+
5. Estimate the complexity of execution
|
|
26
|
+
|
|
27
|
+
You must ALWAYS respond with a valid JSON object in the following format:
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"steps": [
|
|
31
|
+
{
|
|
32
|
+
"id": 1,
|
|
33
|
+
"description": "What to do in this step",
|
|
34
|
+
"rationale": "Why this step is needed",
|
|
35
|
+
"expected_output": "What this step should produce",
|
|
36
|
+
"depends_on": []
|
|
37
|
+
}
|
|
38
|
+
],
|
|
39
|
+
"checkpoints": [1, 3],
|
|
40
|
+
"expected_output_type": "python_function|explanation|numeric_answer|structured_data",
|
|
41
|
+
"complexity_estimate": "trivial|simple|moderate|complex|very_complex",
|
|
42
|
+
"suggested_approach": "High-level strategy for the Implementor"
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Guidelines:
|
|
47
|
+
- Keep steps atomic and actionable
|
|
48
|
+
- Steps should be independent where possible
|
|
49
|
+
- Identify dependencies between steps accurately
|
|
50
|
+
- Mark steps that need verification as checkpoints
|
|
51
|
+
|
|
52
|
+
For code tasks:
|
|
53
|
+
- Include steps for understanding requirements, implementing, and testing
|
|
54
|
+
- Consider edge cases and error handling
|
|
55
|
+
|
|
56
|
+
For math tasks:
|
|
57
|
+
- Break down into clear mathematical operations
|
|
58
|
+
- Include verification steps for intermediate results
|
|
59
|
+
|
|
60
|
+
For QA tasks:
|
|
61
|
+
- Identify what information needs to be retrieved
|
|
62
|
+
- Include steps for synthesizing information into an answer"""
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
PLANNER_USER_TEMPLATE = """Create an execution plan for the following task.
|
|
66
|
+
|
|
67
|
+
ORIGINAL QUERY:
|
|
68
|
+
{user_query}
|
|
69
|
+
|
|
70
|
+
TASK SPECIFICATION:
|
|
71
|
+
{task_spec}
|
|
72
|
+
|
|
73
|
+
Create a detailed step-by-step plan. Respond with ONLY a valid JSON object."""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Majumdar(Role):
|
|
77
|
+
"""
|
|
78
|
+
Majumdar (Planner) decomposes tasks into executable steps.
|
|
79
|
+
|
|
80
|
+
- Slot: BIG (13-34B), with MID fallback for easy tasks
|
|
81
|
+
- Purpose: Create structured plan for Implementor to execute
|
|
82
|
+
- Output: Plan with steps, checkpoints, complexity estimate
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
name = "majumdar"
|
|
86
|
+
default_slot = Slot.BIG
|
|
87
|
+
|
|
88
|
+
def __init__(
|
|
89
|
+
self,
|
|
90
|
+
model_runner: Any,
|
|
91
|
+
fallback_slot: Optional[Slot] = Slot.MID,
|
|
92
|
+
**kwargs
|
|
93
|
+
):
|
|
94
|
+
super().__init__(
|
|
95
|
+
model_runner=model_runner,
|
|
96
|
+
slot=kwargs.get("slot", Slot.BIG),
|
|
97
|
+
max_tokens=kwargs.get("max_tokens", 1024),
|
|
98
|
+
temperature=kwargs.get("temperature", 0.5)
|
|
99
|
+
)
|
|
100
|
+
self.fallback_slot = fallback_slot
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def system_prompt(self) -> str:
|
|
104
|
+
return PLANNER_SYSTEM_PROMPT
|
|
105
|
+
|
|
106
|
+
def format_input(self, role_input: RoleInput) -> str:
|
|
107
|
+
task_spec_str = self._format_task_spec(role_input.task_spec)
|
|
108
|
+
|
|
109
|
+
return PLANNER_USER_TEMPLATE.format(
|
|
110
|
+
user_query=role_input.user_query,
|
|
111
|
+
task_spec=task_spec_str
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _format_task_spec(self, task_spec: Optional[dict]) -> str:
|
|
115
|
+
"""Format task spec for inclusion in prompt."""
|
|
116
|
+
if not task_spec:
|
|
117
|
+
return "No task specification provided."
|
|
118
|
+
|
|
119
|
+
lines = [
|
|
120
|
+
f"Problem: {task_spec.get('problem', 'Not specified')}",
|
|
121
|
+
f"Task Type: {task_spec.get('task_type', 'Unknown')}",
|
|
122
|
+
f"Output Format: {task_spec.get('output_format', 'text')}",
|
|
123
|
+
f"Difficulty: {task_spec.get('difficulty_guess', 'medium')}",
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
constraints = task_spec.get('constraints', [])
|
|
127
|
+
if constraints:
|
|
128
|
+
lines.append(f"Constraints: {', '.join(constraints)}")
|
|
129
|
+
|
|
130
|
+
concepts = task_spec.get('key_concepts', [])
|
|
131
|
+
if concepts:
|
|
132
|
+
lines.append(f"Key Concepts: {', '.join(concepts)}")
|
|
133
|
+
|
|
134
|
+
return "\n".join(lines)
|
|
135
|
+
|
|
136
|
+
def parse_output(self, raw_output: str) -> dict[str, Any]:
|
|
137
|
+
"""Parse LLM output into Plan dict with robust fallback."""
|
|
138
|
+
raw = raw_output.strip()
|
|
139
|
+
|
|
140
|
+
# Handle empty output
|
|
141
|
+
if not raw:
|
|
142
|
+
logger.warning("Majumdar received empty output from model")
|
|
143
|
+
return {
|
|
144
|
+
"steps": [{
|
|
145
|
+
"id": 1,
|
|
146
|
+
"description": "Complete the task",
|
|
147
|
+
"rationale": "Default step",
|
|
148
|
+
"expected_output": "Result",
|
|
149
|
+
"depends_on": []
|
|
150
|
+
}],
|
|
151
|
+
"checkpoints": [],
|
|
152
|
+
"expected_output_type": "text",
|
|
153
|
+
"complexity_estimate": "moderate",
|
|
154
|
+
"suggested_approach": "",
|
|
155
|
+
"parse_status": "empty"
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
data = self._extract_json(raw)
|
|
160
|
+
|
|
161
|
+
# Normalize steps
|
|
162
|
+
steps = []
|
|
163
|
+
raw_steps = data.get("steps", [])
|
|
164
|
+
|
|
165
|
+
for i, step in enumerate(raw_steps):
|
|
166
|
+
if isinstance(step, dict):
|
|
167
|
+
steps.append({
|
|
168
|
+
"id": step.get("id", i + 1),
|
|
169
|
+
"description": step.get("description", ""),
|
|
170
|
+
"rationale": step.get("rationale", ""),
|
|
171
|
+
"expected_output": step.get("expected_output", ""),
|
|
172
|
+
"depends_on": step.get("depends_on", [])
|
|
173
|
+
})
|
|
174
|
+
elif isinstance(step, str):
|
|
175
|
+
steps.append({
|
|
176
|
+
"id": i + 1,
|
|
177
|
+
"description": step,
|
|
178
|
+
"rationale": "",
|
|
179
|
+
"expected_output": "",
|
|
180
|
+
"depends_on": []
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
# Fallback: if no steps found, create default step
|
|
184
|
+
if not steps:
|
|
185
|
+
has_structure = "steps" in data or "checkpoints" in data
|
|
186
|
+
if not has_structure and "raw_output" in data:
|
|
187
|
+
# Model didn't follow format - create simple plan from text
|
|
188
|
+
steps = [{
|
|
189
|
+
"id": 1,
|
|
190
|
+
"description": data["raw_output"][:200],
|
|
191
|
+
"rationale": "Extracted from model output",
|
|
192
|
+
"expected_output": "Completion",
|
|
193
|
+
"depends_on": []
|
|
194
|
+
}]
|
|
195
|
+
parse_status = "fallback_text"
|
|
196
|
+
else:
|
|
197
|
+
steps = [{
|
|
198
|
+
"id": 1,
|
|
199
|
+
"description": "Complete the task as specified",
|
|
200
|
+
"rationale": "Single-step execution",
|
|
201
|
+
"expected_output": "Final result",
|
|
202
|
+
"depends_on": []
|
|
203
|
+
}]
|
|
204
|
+
parse_status = "default_plan"
|
|
205
|
+
else:
|
|
206
|
+
parse_status = "json_ok"
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
"steps": steps,
|
|
210
|
+
"checkpoints": data.get("checkpoints", []),
|
|
211
|
+
"expected_output_type": data.get("expected_output_type", "text"),
|
|
212
|
+
"complexity_estimate": self._normalize_complexity(
|
|
213
|
+
data.get("complexity_estimate", "moderate")
|
|
214
|
+
),
|
|
215
|
+
"suggested_approach": data.get("suggested_approach", ""),
|
|
216
|
+
"parse_status": parse_status,
|
|
217
|
+
"raw_output": raw
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
except Exception as e:
|
|
221
|
+
logger.exception("Majumdar.parse_output: unexpected error during parsing")
|
|
222
|
+
# Return minimal valid plan
|
|
223
|
+
return {
|
|
224
|
+
"steps": [{
|
|
225
|
+
"id": 1,
|
|
226
|
+
"description": raw[:200] if raw else "Execute task",
|
|
227
|
+
"rationale": "Fallback plan",
|
|
228
|
+
"expected_output": "Result",
|
|
229
|
+
"depends_on": []
|
|
230
|
+
}],
|
|
231
|
+
"checkpoints": [],
|
|
232
|
+
"expected_output_type": "text",
|
|
233
|
+
"complexity_estimate": "moderate",
|
|
234
|
+
"suggested_approach": "",
|
|
235
|
+
"parse_status": "error_fallback",
|
|
236
|
+
"raw_output": raw
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
def _normalize_complexity(self, value: str) -> str:
|
|
240
|
+
"""Normalize complexity to valid enum value."""
|
|
241
|
+
valid = {"trivial", "simple", "moderate", "complex", "very_complex"}
|
|
242
|
+
normalized = value.lower().strip().replace(" ", "_")
|
|
243
|
+
return normalized if normalized in valid else "moderate"
|
|
244
|
+
|
|
245
|
+
def should_use_fallback(self, task_spec: Optional[dict]) -> bool:
|
|
246
|
+
"""Determine if we should use fallback slot based on difficulty."""
|
|
247
|
+
if not task_spec:
|
|
248
|
+
return False
|
|
249
|
+
difficulty = task_spec.get("difficulty_guess", "medium")
|
|
250
|
+
return difficulty == "easy"
|
|
251
|
+
|
|
252
|
+
def __call__(self, role_input: RoleInput) -> Any:
|
|
253
|
+
"""Execute planner with potential slot fallback."""
|
|
254
|
+
if self.fallback_slot and self.should_use_fallback(role_input.task_spec):
|
|
255
|
+
original_slot = self.slot
|
|
256
|
+
self.slot = self.fallback_slot
|
|
257
|
+
try:
|
|
258
|
+
return super().__call__(role_input)
|
|
259
|
+
finally:
|
|
260
|
+
self.slot = original_slot
|
|
261
|
+
|
|
262
|
+
return super().__call__(role_input)
|
|
263
|
+
|
|
264
|
+
def create_plan(self, role_input: RoleInput) -> Plan:
|
|
265
|
+
"""Execute planner and return a Plan object."""
|
|
266
|
+
output = self(role_input)
|
|
267
|
+
|
|
268
|
+
if output.status == "error":
|
|
269
|
+
return Plan(
|
|
270
|
+
steps=[PlanStep(id=1, description="Complete the task")],
|
|
271
|
+
expected_output_type="text"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return Plan.from_dict(output.core_output)
|