parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,163 @@
1
+ """
2
+ Dandadhyaksha (Enforcer/Safety Checker) role for the Parishad council.
3
+ Checks for safety violations, PII, and policy compliance.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ RoleOutput,
12
+ Slot,
13
+ Verdict,
14
+ CheckerFlag,
15
+ )
16
+
17
+
18
+ CHECKER_SAFETY_SYSTEM_PROMPT = """You are Dandadhyaksha, the Enforcer in the Parishad council. Your job is to verify the output is safe, policy-compliant, and free of sensitive information.
19
+
20
+ Your responsibilities:
21
+ 1. Check for harmful, offensive, or inappropriate content
22
+ 2. Identify any personally identifiable information (PII)
23
+ 3. Verify compliance with content policies
24
+ 4. Flag potential safety or ethical concerns
25
+ 5. Suggest redactions or modifications for safety issues
26
+
27
+ You must ALWAYS respond with a valid JSON object in the following format:
28
+ ```json
29
+ {
30
+ "flags": [
31
+ {
32
+ "type": "safety_violation|pii_detected|policy_violation|unsafe_code",
33
+ "severity": "medium|high|critical",
34
+ "detail": "Description of the safety issue",
35
+ "location": "Where it appears",
36
+ "suggested_fix": "Redaction or modification"
37
+ }
38
+ ],
39
+ "must_fix": true,
40
+ "safe": false,
41
+ "summary": "Safety assessment summary"
42
+ }
43
+ ```
44
+
45
+ If "safe" is false, "must_fix" MUST be true.
46
+ Be extremely vigilant about PII (names, emails, phones, keys) and harmful content."""
47
+
48
+
49
+ CHECKER_SAFETY_USER_TEMPLATE = """Perform a safety check on the following output.
50
+
51
+ OUTPUT TO CHECK:
52
+ {candidate}
53
+
54
+ Analyze for safety/PII violations. Respond with ONLY a valid JSON object."""
55
+
56
+
57
+ class Dandadhyaksha(Role):
58
+ """
59
+ Dandadhyaksha (Enforcer) checks for safety violations and PII.
60
+
61
+ - Slot: SMALL (2-4B)
62
+ - Purpose: Safety and policy enforcement
63
+ - Output: Verdict on safety
64
+ """
65
+
66
+ name = "dandadhyaksha"
67
+ default_slot = Slot.SMALL
68
+
69
+ def __init__(
70
+ self,
71
+ model_runner: Any,
72
+ tools: Optional[list[str]] = None,
73
+ **kwargs
74
+ ):
75
+ super().__init__(
76
+ model_runner=model_runner,
77
+ slot=kwargs.get("slot", Slot.SMALL),
78
+ max_tokens=kwargs.get("max_tokens", 512),
79
+ temperature=kwargs.get("temperature", 0.1)
80
+ )
81
+ self.tools = tools or ["toxicity_detector", "pii_scanner"]
82
+
83
+ @property
84
+ def system_prompt(self) -> str:
85
+ return CHECKER_SAFETY_SYSTEM_PROMPT
86
+
87
+ def format_input(self, role_input: RoleInput) -> str:
88
+ candidate_str = self._format_candidate(role_input.candidate)
89
+
90
+ return CHECKER_SAFETY_USER_TEMPLATE.format(
91
+ candidate=candidate_str
92
+ )
93
+
94
+ def _format_candidate(self, candidate: Optional[dict]) -> str:
95
+ """Format candidate output for checking."""
96
+ if not candidate:
97
+ return "No output to verify."
98
+
99
+ content = candidate.get("content", "")
100
+ # Don't truncate too much for safety check, but respect limit
101
+ if len(content) > 4000:
102
+ content = content[:4000] + "... [truncated]"
103
+
104
+ return f"""Content:
105
+ {content}"""
106
+
107
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
108
+ """Parse LLM output into safety verdict dict."""
109
+ import json
110
+ import re
111
+
112
+ # Try to extract JSON from the response
113
+ json_match = re.search(r'\{[\s\S]*\}', raw_output)
114
+ if json_match:
115
+ try:
116
+ data = json.loads(json_match.group())
117
+ except json.JSONDecodeError:
118
+ data = {}
119
+ else:
120
+ data = {}
121
+
122
+ # Normalize flags
123
+ flags = []
124
+ for flag in data.get("flags", []):
125
+ flags.append({
126
+ "type": flag.get("type", "unknown"),
127
+ "severity": self._normalize_severity(flag.get("severity", "medium")),
128
+ "detail": flag.get("detail", ""),
129
+ "location": flag.get("location"),
130
+ "suggested_fix": flag.get("suggested_fix")
131
+ })
132
+
133
+ # Determine must_fix
134
+ must_fix = data.get("must_fix", False)
135
+ safe = data.get("safe", True)
136
+
137
+ if not safe:
138
+ must_fix = True
139
+
140
+ # If flags exist, ensure must_fix matches severity
141
+ if flags and any(f["severity"] in ["high", "critical"] for f in flags):
142
+ must_fix = True
143
+ safe = False
144
+
145
+ return {
146
+ "verdict_safety": {
147
+ "flags": flags,
148
+ "must_fix": must_fix,
149
+ "safe": safe,
150
+ "summary": data.get("summary", "")
151
+ },
152
+ # Compatible with standard Verdict schema
153
+ "flags": flags,
154
+ "must_fix": must_fix,
155
+ "evidence": [],
156
+ "overall_confidence": 1.0 if safe else 0.0
157
+ }
158
+
159
+ def _normalize_severity(self, value: str) -> str:
160
+ """Normalize severity to valid enum value."""
161
+ valid = {"low", "medium", "high", "critical"}
162
+ normalized = value.lower().strip()
163
+ return normalized if normalized in valid else "high"
@@ -0,0 +1,246 @@
1
+ """
2
+ Darbari (Communicator/Refiner) role for the Parishad council.
3
+ Normalizes user input into structured task specifications.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ RoleOutput,
12
+ Slot,
13
+ TaskSpec,
14
+ Difficulty,
15
+ TaskType,
16
+ OutputFormat,
17
+ )
18
+
19
+
20
+ REFINER_SYSTEM_PROMPT = """You are Darbari, the Communicator in the Parishad council. Your job is to carefully analyze user queries and transform them into clear, structured task specifications.
21
+
22
+ Your responsibilities:
23
+ 1. Understand the user's true intent and goal
24
+ 2. Identify constraints (explicit or implicit)
25
+ 3. Determine the expected output format
26
+ 4. Estimate task difficulty for routing
27
+ 5. Classify the task type
28
+ 6. Assess safety sensitivity and expected answer length
29
+
30
+ You must ALWAYS respond with a valid JSON object in the following format:
31
+ ```json
32
+ {
33
+ "problem": "A clear, normalized restatement of what the user wants",
34
+ "constraints": ["List of constraints or requirements"],
35
+ "output_format": "code|text|numeric|structured|mixed",
36
+ "difficulty_guess": "easy|medium|hard",
37
+ "task_type": "math|code|qa|explanation|creative|analysis|chat",
38
+ "key_concepts": ["Key concepts or topics involved"],
39
+ "safety_sensitivity": "low|medium|high",
40
+ "expected_answer_length": "short|paragraph|long"
41
+ }
42
+ ```
43
+
44
+ Guidelines for difficulty estimation:
45
+ - EASY: Simple, single-step tasks; short answers; basic operations
46
+ - MEDIUM: Multi-step but straightforward; moderate reasoning needed
47
+ - HARD: Complex reasoning; multiple interconnected parts; edge cases; ambiguity
48
+
49
+ Guidelines for task type:
50
+ - CODE: Writing, debugging, or explaining code
51
+ - MATH: Numerical computation, word problems, proofs
52
+ - QA: Factual questions requiring knowledge retrieval
53
+ - EXPLANATION: Explaining concepts, processes, or ideas
54
+ - CREATIVE: Open-ended generation, writing, brainstorming
55
+ - ANALYSIS: Analyzing data, text, or situations
56
+ - CHAT: Conversational, no specific goal
57
+
58
+ Guidelines for safety sensitivity:
59
+ - LOW: Neutral topics, no harmful content
60
+ - MEDIUM: Potentially controversial or sensitive topics
61
+ - HIGH: Medical/legal advice, harmful content, misinformation risk
62
+
63
+ Guidelines for expected answer length:
64
+ - SHORT: 1-3 sentences, concise answer
65
+ - PARAGRAPH: 1-2 paragraphs, moderate detail
66
+ - LONG: Multi-paragraph, comprehensive explanation
67
+
68
+ Be precise and concise. Focus on extracting the essential requirements."""
69
+
70
+
71
+ REFINER_USER_TEMPLATE = """Analyze the following user query and create a structured task specification.
72
+
73
+ USER QUERY:
74
+ {user_query}
75
+
76
+ Respond with ONLY a valid JSON object following the required schema."""
77
+
78
+
79
+ class Darbari(Role):
80
+ """
81
+ Darbari (Communicator) normalizes user input into structured task specifications.
82
+
83
+ - Slot: SMALL (2-4B)
84
+ - Purpose: First role in pipeline, sets up context for all subsequent roles
85
+ - Output: TaskSpec with problem, constraints, format, difficulty, type
86
+ """
87
+
88
+ name = "darbari"
89
+ default_slot = Slot.SMALL
90
+
91
+ def __init__(self, model_runner: Any, **kwargs):
92
+ super().__init__(
93
+ model_runner=model_runner,
94
+ slot=kwargs.get("slot", Slot.SMALL),
95
+ max_tokens=kwargs.get("max_tokens", 512),
96
+ temperature=kwargs.get("temperature", 0.3)
97
+ )
98
+
99
+ @property
100
+ def system_prompt(self) -> str:
101
+ return REFINER_SYSTEM_PROMPT
102
+
103
+ def format_input(self, role_input: RoleInput) -> str:
104
+ return REFINER_USER_TEMPLATE.format(
105
+ user_query=role_input.user_query
106
+ )
107
+
108
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
109
+ """Parse LLM output into TaskSpec dict with routing metadata and robust fallback."""
110
+ raw = raw_output.strip()
111
+
112
+ # Handle empty output
113
+ if not raw:
114
+ logger.warning("Darbari received empty output from model")
115
+ return {
116
+ "problem": "",
117
+ "constraints": [],
118
+ "output_format": "text",
119
+ "difficulty_guess": "medium",
120
+ "task_type": "qa",
121
+ "key_concepts": [],
122
+ "difficulty": "medium",
123
+ "safety_sensitivity": "low",
124
+ "expected_answer_length": "paragraph",
125
+ "parse_status": "empty"
126
+ }
127
+
128
+ try:
129
+ data = self._extract_json(raw)
130
+
131
+ # Check if JSON parsing succeeded or fell back to raw_output
132
+ has_structure = "problem" in data or "task_type" in data
133
+ if not has_structure and "raw_output" in data:
134
+ # Fallback: extract problem from raw text
135
+ problem_text = data["raw_output"][:500]
136
+ parse_status = "fallback_text"
137
+ else:
138
+ problem_text = data.get("problem", "")
139
+ parse_status = "json_ok"
140
+
141
+ # Validate and normalize fields
142
+ output = {
143
+ "problem": problem_text,
144
+ "constraints": data.get("constraints", []),
145
+ "output_format": self._normalize_output_format(data.get("output_format", "text")),
146
+ "difficulty_guess": self._normalize_difficulty(data.get("difficulty_guess", "medium")),
147
+ "task_type": self._normalize_task_type(data.get("task_type", "qa")),
148
+ "key_concepts": data.get("key_concepts", []),
149
+ # Routing metadata for adaptive pipeline
150
+ "difficulty": self._normalize_difficulty(data.get("difficulty_guess", "medium")),
151
+ "safety_sensitivity": self._normalize_safety(data.get("safety_sensitivity", "low")),
152
+ "expected_answer_length": self._normalize_length(data.get("expected_answer_length", "paragraph")),
153
+ "parse_status": parse_status,
154
+ "raw_output": raw
155
+ }
156
+
157
+ # If problem is still empty, use raw output
158
+ if not output["problem"]:
159
+ output["problem"] = raw[:500]
160
+
161
+ return output
162
+
163
+ except Exception as e:
164
+ logger.exception("Darbari.parse_output: unexpected error during parsing")
165
+ # Return minimal valid structure
166
+ return {
167
+ "problem": raw[:500] if raw else "",
168
+ "constraints": [],
169
+ "output_format": "text",
170
+ "difficulty_guess": "medium",
171
+ "task_type": "qa",
172
+ "key_concepts": [],
173
+ "difficulty": "medium",
174
+ "safety_sensitivity": "low",
175
+ "expected_answer_length": "paragraph",
176
+ "parse_status": "error_fallback",
177
+ "raw_output": raw
178
+ }
179
+
180
+ def _normalize_output_format(self, value: str) -> str:
181
+ """Normalize output format to valid enum value."""
182
+ valid = {"code", "text", "numeric", "structured", "mixed"}
183
+ normalized = value.lower().strip()
184
+ return normalized if normalized in valid else "text"
185
+
186
+ def _normalize_difficulty(self, value: str) -> str:
187
+ """Normalize difficulty to valid enum value."""
188
+ valid = {"easy", "medium", "hard"}
189
+ normalized = value.lower().strip()
190
+ return normalized if normalized in valid else "medium"
191
+
192
+ def _normalize_task_type(self, value: str) -> str:
193
+ """Normalize task type to valid enum value."""
194
+ valid = {"math", "code", "qa", "explanation", "creative", "analysis", "chat"}
195
+ normalized = value.lower().strip()
196
+
197
+ # Handle common aliases
198
+ aliases = {
199
+ "coding": "code",
200
+ "programming": "code",
201
+ "mathematics": "math",
202
+ "calculation": "math",
203
+ "question": "qa",
204
+ "factual": "qa",
205
+ "explain": "explanation",
206
+ "describe": "explanation",
207
+ "write": "creative",
208
+ "generate": "creative",
209
+ "analyze": "analysis",
210
+ "evaluate": "analysis",
211
+ "conversation": "chat",
212
+ "conversational": "chat"
213
+ }
214
+
215
+ if normalized in valid:
216
+ return normalized
217
+ return aliases.get(normalized, "qa")
218
+
219
+ def _normalize_safety(self, value: str) -> str:
220
+ """Normalize safety sensitivity to valid value."""
221
+ valid = {"low", "medium", "high"}
222
+ normalized = value.lower().strip()
223
+ return normalized if normalized in valid else "low"
224
+
225
+ def _normalize_length(self, value: str) -> str:
226
+ """Normalize expected answer length to valid value."""
227
+ valid = {"short", "paragraph", "long"}
228
+ normalized = value.lower().strip()
229
+ return normalized if normalized in valid else "paragraph"
230
+
231
+ def create_task_spec(self, role_input: RoleInput) -> TaskSpec:
232
+ """
233
+ Execute Refiner and return a TaskSpec object.
234
+ """
235
+ output = self(role_input)
236
+
237
+ if output.status == "error":
238
+ # Return default task spec on error
239
+ return TaskSpec(
240
+ problem=role_input.user_query,
241
+ difficulty_guess=Difficulty.MEDIUM,
242
+ task_type=TaskType.QA,
243
+ output_format=OutputFormat.TEXT
244
+ )
245
+
246
+ return TaskSpec.from_dict(output.core_output)
@@ -0,0 +1,274 @@
1
+ """
2
+ Majumdar (Planner) role for the Parishad council.
3
+ Decomposes complex tasks into clear, executable steps.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ RoleOutput,
12
+ Slot,
13
+ Plan,
14
+ PlanStep,
15
+ )
16
+
17
+
18
+ PLANNER_SYSTEM_PROMPT = """You are Majumdar, the Planner in the Parishad council. Your job is to decompose complex tasks into clear, executable steps that the Implementor can follow.
19
+
20
+ Your responsibilities:
21
+ 1. Analyze the task specification
22
+ 2. Break down the problem into logical, sequential steps
23
+ 3. Provide rationale for each step
24
+ 4. Identify critical checkpoints where verification is needed
25
+ 5. Estimate the complexity of execution
26
+
27
+ You must ALWAYS respond with a valid JSON object in the following format:
28
+ ```json
29
+ {
30
+ "steps": [
31
+ {
32
+ "id": 1,
33
+ "description": "What to do in this step",
34
+ "rationale": "Why this step is needed",
35
+ "expected_output": "What this step should produce",
36
+ "depends_on": []
37
+ }
38
+ ],
39
+ "checkpoints": [1, 3],
40
+ "expected_output_type": "python_function|explanation|numeric_answer|structured_data",
41
+ "complexity_estimate": "trivial|simple|moderate|complex|very_complex",
42
+ "suggested_approach": "High-level strategy for the Implementor"
43
+ }
44
+ ```
45
+
46
+ Guidelines:
47
+ - Keep steps atomic and actionable
48
+ - Steps should be independent where possible
49
+ - Identify dependencies between steps accurately
50
+ - Mark steps that need verification as checkpoints
51
+
52
+ For code tasks:
53
+ - Include steps for understanding requirements, implementing, and testing
54
+ - Consider edge cases and error handling
55
+
56
+ For math tasks:
57
+ - Break down into clear mathematical operations
58
+ - Include verification steps for intermediate results
59
+
60
+ For QA tasks:
61
+ - Identify what information needs to be retrieved
62
+ - Include steps for synthesizing information into an answer"""
63
+
64
+
65
+ PLANNER_USER_TEMPLATE = """Create an execution plan for the following task.
66
+
67
+ ORIGINAL QUERY:
68
+ {user_query}
69
+
70
+ TASK SPECIFICATION:
71
+ {task_spec}
72
+
73
+ Create a detailed step-by-step plan. Respond with ONLY a valid JSON object."""
74
+
75
+
76
+ class Majumdar(Role):
77
+ """
78
+ Majumdar (Planner) decomposes tasks into executable steps.
79
+
80
+ - Slot: BIG (13-34B), with MID fallback for easy tasks
81
+ - Purpose: Create structured plan for Implementor to execute
82
+ - Output: Plan with steps, checkpoints, complexity estimate
83
+ """
84
+
85
+ name = "majumdar"
86
+ default_slot = Slot.BIG
87
+
88
+ def __init__(
89
+ self,
90
+ model_runner: Any,
91
+ fallback_slot: Optional[Slot] = Slot.MID,
92
+ **kwargs
93
+ ):
94
+ super().__init__(
95
+ model_runner=model_runner,
96
+ slot=kwargs.get("slot", Slot.BIG),
97
+ max_tokens=kwargs.get("max_tokens", 1024),
98
+ temperature=kwargs.get("temperature", 0.5)
99
+ )
100
+ self.fallback_slot = fallback_slot
101
+
102
+ @property
103
+ def system_prompt(self) -> str:
104
+ return PLANNER_SYSTEM_PROMPT
105
+
106
+ def format_input(self, role_input: RoleInput) -> str:
107
+ task_spec_str = self._format_task_spec(role_input.task_spec)
108
+
109
+ return PLANNER_USER_TEMPLATE.format(
110
+ user_query=role_input.user_query,
111
+ task_spec=task_spec_str
112
+ )
113
+
114
+ def _format_task_spec(self, task_spec: Optional[dict]) -> str:
115
+ """Format task spec for inclusion in prompt."""
116
+ if not task_spec:
117
+ return "No task specification provided."
118
+
119
+ lines = [
120
+ f"Problem: {task_spec.get('problem', 'Not specified')}",
121
+ f"Task Type: {task_spec.get('task_type', 'Unknown')}",
122
+ f"Output Format: {task_spec.get('output_format', 'text')}",
123
+ f"Difficulty: {task_spec.get('difficulty_guess', 'medium')}",
124
+ ]
125
+
126
+ constraints = task_spec.get('constraints', [])
127
+ if constraints:
128
+ lines.append(f"Constraints: {', '.join(constraints)}")
129
+
130
+ concepts = task_spec.get('key_concepts', [])
131
+ if concepts:
132
+ lines.append(f"Key Concepts: {', '.join(concepts)}")
133
+
134
+ return "\n".join(lines)
135
+
136
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
137
+ """Parse LLM output into Plan dict with robust fallback."""
138
+ raw = raw_output.strip()
139
+
140
+ # Handle empty output
141
+ if not raw:
142
+ logger.warning("Majumdar received empty output from model")
143
+ return {
144
+ "steps": [{
145
+ "id": 1,
146
+ "description": "Complete the task",
147
+ "rationale": "Default step",
148
+ "expected_output": "Result",
149
+ "depends_on": []
150
+ }],
151
+ "checkpoints": [],
152
+ "expected_output_type": "text",
153
+ "complexity_estimate": "moderate",
154
+ "suggested_approach": "",
155
+ "parse_status": "empty"
156
+ }
157
+
158
+ try:
159
+ data = self._extract_json(raw)
160
+
161
+ # Normalize steps
162
+ steps = []
163
+ raw_steps = data.get("steps", [])
164
+
165
+ for i, step in enumerate(raw_steps):
166
+ if isinstance(step, dict):
167
+ steps.append({
168
+ "id": step.get("id", i + 1),
169
+ "description": step.get("description", ""),
170
+ "rationale": step.get("rationale", ""),
171
+ "expected_output": step.get("expected_output", ""),
172
+ "depends_on": step.get("depends_on", [])
173
+ })
174
+ elif isinstance(step, str):
175
+ steps.append({
176
+ "id": i + 1,
177
+ "description": step,
178
+ "rationale": "",
179
+ "expected_output": "",
180
+ "depends_on": []
181
+ })
182
+
183
+ # Fallback: if no steps found, create default step
184
+ if not steps:
185
+ has_structure = "steps" in data or "checkpoints" in data
186
+ if not has_structure and "raw_output" in data:
187
+ # Model didn't follow format - create simple plan from text
188
+ steps = [{
189
+ "id": 1,
190
+ "description": data["raw_output"][:200],
191
+ "rationale": "Extracted from model output",
192
+ "expected_output": "Completion",
193
+ "depends_on": []
194
+ }]
195
+ parse_status = "fallback_text"
196
+ else:
197
+ steps = [{
198
+ "id": 1,
199
+ "description": "Complete the task as specified",
200
+ "rationale": "Single-step execution",
201
+ "expected_output": "Final result",
202
+ "depends_on": []
203
+ }]
204
+ parse_status = "default_plan"
205
+ else:
206
+ parse_status = "json_ok"
207
+
208
+ return {
209
+ "steps": steps,
210
+ "checkpoints": data.get("checkpoints", []),
211
+ "expected_output_type": data.get("expected_output_type", "text"),
212
+ "complexity_estimate": self._normalize_complexity(
213
+ data.get("complexity_estimate", "moderate")
214
+ ),
215
+ "suggested_approach": data.get("suggested_approach", ""),
216
+ "parse_status": parse_status,
217
+ "raw_output": raw
218
+ }
219
+
220
+ except Exception as e:
221
+ logger.exception("Majumdar.parse_output: unexpected error during parsing")
222
+ # Return minimal valid plan
223
+ return {
224
+ "steps": [{
225
+ "id": 1,
226
+ "description": raw[:200] if raw else "Execute task",
227
+ "rationale": "Fallback plan",
228
+ "expected_output": "Result",
229
+ "depends_on": []
230
+ }],
231
+ "checkpoints": [],
232
+ "expected_output_type": "text",
233
+ "complexity_estimate": "moderate",
234
+ "suggested_approach": "",
235
+ "parse_status": "error_fallback",
236
+ "raw_output": raw
237
+ }
238
+
239
+ def _normalize_complexity(self, value: str) -> str:
240
+ """Normalize complexity to valid enum value."""
241
+ valid = {"trivial", "simple", "moderate", "complex", "very_complex"}
242
+ normalized = value.lower().strip().replace(" ", "_")
243
+ return normalized if normalized in valid else "moderate"
244
+
245
+ def should_use_fallback(self, task_spec: Optional[dict]) -> bool:
246
+ """Determine if we should use fallback slot based on difficulty."""
247
+ if not task_spec:
248
+ return False
249
+ difficulty = task_spec.get("difficulty_guess", "medium")
250
+ return difficulty == "easy"
251
+
252
+ def __call__(self, role_input: RoleInput) -> Any:
253
+ """Execute planner with potential slot fallback."""
254
+ if self.fallback_slot and self.should_use_fallback(role_input.task_spec):
255
+ original_slot = self.slot
256
+ self.slot = self.fallback_slot
257
+ try:
258
+ return super().__call__(role_input)
259
+ finally:
260
+ self.slot = original_slot
261
+
262
+ return super().__call__(role_input)
263
+
264
+ def create_plan(self, role_input: RoleInput) -> Plan:
265
+ """Execute planner and return a Plan object."""
266
+ output = self(role_input)
267
+
268
+ if output.status == "error":
269
+ return Plan(
270
+ steps=[PlanStep(id=1, description="Complete the task")],
271
+ expected_output_type="text"
272
+ )
273
+
274
+ return Plan.from_dict(output.core_output)