parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,150 @@
1
+ """
2
+ Pantapradhan (Manager/PlannerHigh) role for the Parishad council.
3
+ Creates high-level strategic plans and identifies phases.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ Slot,
12
+ RoleOutput,
13
+ )
14
+
15
+
16
+ PLANNER_HIGH_SYSTEM_PROMPT = """You are Pantapradhan, the Manager in the Parishad council. Your job is to create strategic plans and identify the major components of a task.
17
+
18
+ Your responsibilities:
19
+ 1. Understand the overall goal and scope
20
+ 2. Identify major sub-tasks or phases
21
+ 3. Determine the strategic approach
22
+ 4. Identify key decision points and risks
23
+ 5. Estimate overall complexity
24
+
25
+ You must ALWAYS respond with a valid JSON object in the following format:
26
+ ```json
27
+ {
28
+ "goal": "Clear statement of what needs to be achieved",
29
+ "approach": "High-level strategy description",
30
+ "phases": [
31
+ {
32
+ "id": 1,
33
+ "name": "Phase name",
34
+ "description": "What this phase accomplishes",
35
+ "success_criteria": "How to know this phase is complete"
36
+ }
37
+ ],
38
+ "key_decisions": ["Critical choices that affect the solution"],
39
+ "risks": ["Potential issues or challenges"],
40
+ "complexity": "trivial|simple|moderate|complex|very_complex",
41
+ "task_category": "code|math|qa|explanation|creative|analysis"
42
+ }
43
+ ```
44
+
45
+ Focus on the big picture. Don't worry about implementation details."""
46
+
47
+
48
+ PLANNER_HIGH_USER_TEMPLATE = """Create a high-level strategic plan for the following task.
49
+
50
+ ORIGINAL QUERY:
51
+ {user_query}
52
+
53
+ TASK SPECIFICATION:
54
+ {task_spec}
55
+
56
+ Provide a strategic overview and decomposition. Respond with ONLY a valid JSON object."""
57
+
58
+
59
+ class Pantapradhan(Role):
60
+ """
61
+ Pantapradhan (Manager) creates high-level strategic plans.
62
+
63
+ - Slot: BIG (13-34B)
64
+ - Purpose: Strategic decomposition and approach selection
65
+ - Output: High-level plan with phases, risks, decisions
66
+ """
67
+
68
+ name = "pantapradhan"
69
+ default_slot = Slot.BIG
70
+
71
+ def __init__(self, model_runner: Any, **kwargs):
72
+ super().__init__(
73
+ model_runner=model_runner,
74
+ slot=kwargs.get("slot", Slot.BIG),
75
+ max_tokens=kwargs.get("max_tokens", 768),
76
+ temperature=kwargs.get("temperature", 0.5)
77
+ )
78
+
79
+ @property
80
+ def system_prompt(self) -> str:
81
+ return PLANNER_HIGH_SYSTEM_PROMPT
82
+
83
+ def format_input(self, role_input: RoleInput) -> str:
84
+ task_spec_str = self._format_task_spec(role_input.task_spec)
85
+
86
+ return PLANNER_HIGH_USER_TEMPLATE.format(
87
+ user_query=role_input.user_query,
88
+ task_spec=task_spec_str
89
+ )
90
+
91
+ def _format_task_spec(self, task_spec: Optional[dict]) -> str:
92
+ """Format task spec for inclusion in prompt."""
93
+ if not task_spec:
94
+ return "No task specification provided."
95
+
96
+ lines = [
97
+ f"Problem: {task_spec.get('problem', 'Not specified')}",
98
+ f"Constraints: {', '.join(task_spec.get('constraints', []))}",
99
+ f"Output Format: {task_spec.get('output_format', 'text')}",
100
+ f"Difficulty: {task_spec.get('difficulty_guess', 'medium')}",
101
+ f"Task Type: {task_spec.get('task_type', 'unknown')}",
102
+ ]
103
+ return "\n".join(lines)
104
+
105
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
106
+ """Parse LLM output into high-level plan dict."""
107
+ import json
108
+ import re
109
+
110
+ # Try to extract JSON from the response
111
+ json_match = re.search(r'\{[\s\S]*\}', raw_output)
112
+ if json_match:
113
+ try:
114
+ data = json.loads(json_match.group())
115
+ except json.JSONDecodeError:
116
+ data = {}
117
+ else:
118
+ data = {}
119
+
120
+ # Normalize phases
121
+ phases = []
122
+ for phase in data.get("phases", []):
123
+ phases.append({
124
+ "id": phase.get("id", len(phases) + 1),
125
+ "name": phase.get("name", "Unnamed phase"),
126
+ "description": phase.get("description", ""),
127
+ "success_criteria": phase.get("success_criteria", "")
128
+ })
129
+
130
+ return {
131
+ "plan_high": {
132
+ "goal": data.get("goal", ""),
133
+ "approach": data.get("approach", ""),
134
+ "phases": phases,
135
+ "key_decisions": data.get("key_decisions", []),
136
+ "risks": data.get("risks", []),
137
+ "complexity": self._normalize_complexity(data.get("complexity", "moderate")),
138
+ "task_category": data.get("task_category", "unknown")
139
+ },
140
+ # Compatible return
141
+ "goal": data.get("goal", ""),
142
+ "approach": data.get("approach", ""),
143
+ "phases": phases
144
+ }
145
+
146
+ def _normalize_complexity(self, value: str) -> str:
147
+ """Normalize complexity to valid enum value."""
148
+ valid = {"trivial", "simple", "moderate", "complex", "very_complex"}
149
+ normalized = value.lower().strip().replace(" ", "_")
150
+ return normalized if normalized in valid else "moderate"
@@ -0,0 +1,357 @@
1
+ """
2
+ Prerak (Challenger/Checker) role for the Parishad council.
3
+ Validates outputs using ensemble of verification methods.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ RoleOutput,
12
+ Slot,
13
+ Verdict,
14
+ CheckerFlag,
15
+ RoleMetadata,
16
+ )
17
+
18
+
19
+ CHECKER_SYSTEM_PROMPT = """You are Prerak, the Challenger in the Parishad council. Your job is to validate the Implementor's output for correctness, completeness, and safety.
20
+
21
+ Your responsibilities:
22
+ 1. Verify the output meets the task requirements
23
+ 2. Check for factual accuracy (when possible)
24
+ 3. Identify errors, inconsistencies, or omissions
25
+ 4. Flag potential issues with severity levels
26
+ 5. Suggest specific fixes for problems found
27
+
28
+ You must ALWAYS respond with a valid JSON object in the following format:
29
+ ```json
30
+ {
31
+ "flags": [
32
+ {
33
+ "type": "claim_unsupported|syntax_error|logic_error|incomplete_output|format_error",
34
+ "severity": "low|medium|high|critical",
35
+ "detail": "Description of the issue",
36
+ "location": "Where in the output the issue was found",
37
+ "suggested_fix": "How to fix this issue"
38
+ }
39
+ ],
40
+ "must_fix": true,
41
+ "evidence": [
42
+ {
43
+ "source": "Source of evidence",
44
+ "source_type": "retrieval|deterministic|llm_judgment",
45
+ "snippet": "Relevant snippet",
46
+ "relevance_score": 0.8,
47
+ "supports_claim": true
48
+ }
49
+ ],
50
+ "suggested_edits": ["Specific fix 1", "Specific fix 2"],
51
+ "overall_confidence": 0.75,
52
+ "checks_performed": ["schema", "syntax", "logic", "retrieval"]
53
+ }
54
+ ```
55
+
56
+ Flag types:
57
+ - format_error: Output doesn't match expected format
58
+ - schema_violation: JSON/structure issues
59
+ - syntax_error: Code syntax problems
60
+ - runtime_error: Code would fail at runtime
61
+ - test_failure: Code fails test cases
62
+ - claim_unsupported: Factual claim without support
63
+ - claim_contradicted: Claim contradicts known facts
64
+ - claim_uncertain: Claim cannot be verified
65
+ - safety_violation: Content policy issues
66
+ - pii_detected: Personal information found
67
+ - incomplete_output: Missing required parts
68
+ - logic_error: Reasoning or logic flaw
69
+
70
+ Severity levels:
71
+ - low: Minor issue, doesn't affect correctness
72
+ - medium: Should be fixed but output is usable
73
+ - high: Significant issue, likely incorrect
74
+ - critical: Must be fixed, output is wrong/unsafe
75
+
76
+ Set must_fix = true if there are any HIGH or CRITICAL severity flags.
77
+
78
+ Be thorough but fair. Don't flag things that are working correctly."""
79
+
80
+
81
+ CHECKER_USER_TEMPLATE = """Validate the following Implementor output.
82
+
83
+ TASK SPECIFICATION:
84
+ {task_spec}
85
+
86
+ EXECUTION PLAN:
87
+ {plan}
88
+
89
+ IMPLEMENTOR OUTPUT:
90
+ {candidate}
91
+
92
+ {tool_results}
93
+
94
+ Analyze the output for correctness and completeness. Respond with ONLY a valid JSON object."""
95
+
96
+
97
+ CHECKER_CODE_EMPHASIS = """
98
+ For CODE validation, focus on:
99
+ - Syntax correctness
100
+ - Logic errors
101
+ - Edge case handling
102
+ - Import statements
103
+ - Function signatures matching requirements
104
+ - Potential runtime errors"""
105
+
106
+
107
+ CHECKER_MATH_EMPHASIS = """
108
+ For MATH validation, focus on:
109
+ - Calculation accuracy
110
+ - Step-by-step reasoning correctness
111
+ - Final answer format
112
+ - Units and precision
113
+ - Common arithmetic errors"""
114
+
115
+
116
+ CHECKER_QA_EMPHASIS = """
117
+ For QA validation, focus on:
118
+ - Factual accuracy
119
+ - Completeness of answer
120
+ - Relevance to the question
121
+ - Unsupported claims
122
+ - Potential misinformation"""
123
+
124
+
125
+ class Prerak(Role):
126
+ """
127
+ Prerak (Challenger) validates Implementor output using ensemble of verification methods.
128
+
129
+ - Slot: SMALL (2-4B) + external tools
130
+ - Purpose: Identify errors, flag issues, suggest fixes
131
+ - Output: Verdict with flags, evidence, must_fix decision
132
+ """
133
+
134
+ name = "prerak"
135
+ default_slot = Slot.SMALL
136
+
137
+ def __init__(
138
+ self,
139
+ model_runner: Any,
140
+ tools: Optional[list[str]] = None,
141
+ use_ensemble: bool = False,
142
+ enable_retrieval: bool = True,
143
+ enable_llm_check: bool = True,
144
+ **kwargs
145
+ ):
146
+ super().__init__(
147
+ model_runner=model_runner,
148
+ slot=kwargs.get("slot", Slot.SMALL),
149
+ max_tokens=kwargs.get("max_tokens", 768),
150
+ temperature=kwargs.get("temperature", 0.2)
151
+ )
152
+ self.tools = tools or ["json_validator", "syntax_checker"]
153
+ self._tool_results: dict[str, Any] = {}
154
+
155
+ # Ensemble configuration (opt-in)
156
+ self.use_ensemble = use_ensemble
157
+ self.enable_retrieval = enable_retrieval
158
+ self.enable_llm_check = enable_llm_check
159
+ self._ensemble_results: Optional[dict[str, Any]] = None
160
+
161
+ @property
162
+ def system_prompt(self) -> str:
163
+ return CHECKER_SYSTEM_PROMPT
164
+
165
+ def format_input(self, role_input: RoleInput) -> str:
166
+ task_spec_str = self._format_task_spec(role_input.task_spec)
167
+ plan_str = self._format_plan(role_input.plan)
168
+ candidate_str = self._format_candidate(role_input.candidate)
169
+ tool_results_str = self._format_tool_results()
170
+
171
+ # Add task-specific emphasis
172
+ task_type = ""
173
+ if role_input.task_spec:
174
+ task_type = role_input.task_spec.get("task_type", "")
175
+
176
+ prompt = CHECKER_USER_TEMPLATE.format(
177
+ task_spec=task_spec_str,
178
+ plan=plan_str,
179
+ candidate=candidate_str,
180
+ tool_results=tool_results_str
181
+ )
182
+
183
+ if task_type == "code":
184
+ prompt += CHECKER_CODE_EMPHASIS
185
+ elif task_type == "math":
186
+ prompt += CHECKER_MATH_EMPHASIS
187
+ elif task_type == "qa":
188
+ prompt += CHECKER_QA_EMPHASIS
189
+
190
+ return prompt
191
+
192
+ def _format_task_spec(self, task_spec: Optional[dict]) -> str:
193
+ """Format task spec for inclusion in prompt."""
194
+ if not task_spec:
195
+ return "No task specification provided."
196
+
197
+ return f"""Problem: {task_spec.get('problem', 'Not specified')}
198
+ Task Type: {task_spec.get('task_type', 'Unknown')}
199
+ Output Format: {task_spec.get('output_format', 'text')}"""
200
+
201
+ def _format_plan(self, plan: Optional[dict]) -> str:
202
+ """Format plan summary for checker."""
203
+ if not plan:
204
+ return "No plan provided."
205
+
206
+ steps = plan.get("steps", [])
207
+ if not steps:
208
+ return "No steps in plan."
209
+
210
+ lines = [f"Expected Output: {plan.get('expected_output_type', 'text')}"]
211
+ lines.append(f"Steps: {len(steps)}")
212
+
213
+ checkpoints = plan.get("checkpoints", [])
214
+ if checkpoints:
215
+ lines.append(f"Checkpoints: {checkpoints}")
216
+
217
+ return "\n".join(lines)
218
+
219
+ def _format_candidate(self, candidate: Optional[dict]) -> str:
220
+ """Format candidate output for checking."""
221
+ if not candidate:
222
+ return "No candidate output provided."
223
+
224
+ content = candidate.get("content", "")
225
+ content_type = candidate.get("content_type", "text")
226
+ confidence = candidate.get("confidence", 0.5)
227
+ warnings = candidate.get("warnings", [])
228
+
229
+ lines = [
230
+ f"Content Type: {content_type}",
231
+ f"Implementor Confidence: {confidence}",
232
+ "",
233
+ "=== CONTENT START ===",
234
+ content[:3000] if len(content) > 3000 else content, # Truncate if too long
235
+ "=== CONTENT END ==="
236
+ ]
237
+
238
+ if warnings:
239
+ lines.append(f"\nImplementor Warnings: {warnings}")
240
+
241
+ return "\n".join(lines)
242
+
243
+ def _format_tool_results(self) -> str:
244
+ """Format results from deterministic tools."""
245
+ if not self._tool_results:
246
+ return ""
247
+
248
+ lines = ["\n--- TOOL RESULTS ---"]
249
+
250
+ for tool_name, result in self._tool_results.items():
251
+ lines.append(f"\n[{tool_name}]:")
252
+ if isinstance(result, dict):
253
+ if result.get("success"):
254
+ lines.append(f" Status: PASS")
255
+ else:
256
+ lines.append(f" Status: FAIL")
257
+ if result.get("errors"):
258
+ for error in result["errors"][:3]:
259
+ lines.append(f" - {error}")
260
+ else:
261
+ lines.append(f" {result}")
262
+
263
+ lines.append("--- END TOOL RESULTS ---")
264
+ return "\n".join(lines)
265
+
266
+ def run_ensemble_checks(self, content: str, check_type: str, context: Optional[dict] = None) -> dict[str, Any]:
267
+ """Run ensemble checks (placeholder for actual implementation)."""
268
+ # In a real implementation this would call out to deterministic tools
269
+ return {"must_fix": False, "flags": [], "confidence": 0.5}
270
+
271
+ def __call__(self, role_input: RoleInput) -> RoleOutput:
272
+ """Execute Checker role."""
273
+ return super().__call__(role_input)
274
+
275
+ def set_retrieval_results(self, results: list[dict]) -> None:
276
+ """Set retrieval results from external retrieval system."""
277
+ self._tool_results["retrieval"] = {
278
+ "success": True,
279
+ "results": results
280
+ }
281
+
282
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
283
+ """Parse LLM output into Verdict dict."""
284
+ data = self._extract_json(raw_output)
285
+
286
+ # Normalize flags
287
+ flags = []
288
+ for flag in data.get("flags", []):
289
+ if isinstance(flag, dict):
290
+ flags.append({
291
+ "type": flag.get("type", "unknown"),
292
+ "severity": self._normalize_severity(flag.get("severity", "low")),
293
+ "detail": flag.get("detail", ""),
294
+ "location": flag.get("location"),
295
+ "suggested_fix": flag.get("suggested_fix")
296
+ })
297
+
298
+ # Normalize evidence
299
+ evidence = []
300
+ for ev in data.get("evidence", []):
301
+ if isinstance(ev, dict):
302
+ evidence.append({
303
+ "source": ev.get("source", ""),
304
+ "source_type": ev.get("source_type", "llm_judgment"),
305
+ "snippet": ev.get("snippet", ""),
306
+ "relevance_score": float(ev.get("relevance_score", 0)),
307
+ "supports_claim": ev.get("supports_claim", True)
308
+ })
309
+
310
+ # Determine must_fix
311
+ must_fix = data.get("must_fix", False)
312
+ if not must_fix:
313
+ must_fix = any(
314
+ f.get("severity") in ["high", "critical"]
315
+ for f in flags
316
+ )
317
+
318
+ # Normalize confidence
319
+ confidence = data.get("overall_confidence", 0.5)
320
+ if isinstance(confidence, str):
321
+ try:
322
+ confidence = float(confidence)
323
+ except ValueError:
324
+ confidence = 0.5
325
+ confidence = max(0.0, min(1.0, confidence))
326
+
327
+ return {
328
+ "flags": flags,
329
+ "must_fix": must_fix,
330
+ "evidence": evidence,
331
+ "suggested_edits": data.get("suggested_edits", []),
332
+ "overall_confidence": confidence,
333
+ "checks_performed": data.get("checks_performed", [])
334
+ }
335
+
336
+ def _normalize_severity(self, value: str) -> str:
337
+ """Normalize severity to valid enum value."""
338
+ valid = {"low", "medium", "high", "critical"}
339
+ normalized = value.lower().strip()
340
+ return normalized if normalized in valid else "low"
341
+
342
+ def create_verdict(self, role_input: RoleInput) -> Verdict:
343
+ """Execute checker and return a Verdict object."""
344
+ output = self(role_input)
345
+
346
+ if output.status == "error":
347
+ return Verdict(
348
+ flags=[CheckerFlag(
349
+ type="checker_error",
350
+ severity="low",
351
+ detail=f"Checker failed: {output.error}"
352
+ )],
353
+ must_fix=False,
354
+ overall_confidence=0.5
355
+ )
356
+
357
+ return Verdict.from_dict(output.core_output)