parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
parishad/roles/raja.py ADDED
@@ -0,0 +1,345 @@
1
+ """
2
+ Raja (Judge) role for the Parishad council.
3
+ Decider who synthesizes all information to produce the final answer.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ Slot,
12
+ FinalAnswer,
13
+ )
14
+ from ..utils.text import truncate_with_note
15
+
16
+ JUDGE_SYSTEM_PROMPT = """You are Raja, the Judge in the Parishad council. Your job is to synthesize all information from the council and produce the final, authoritative answer.
17
+
18
+ You have access to:
19
+ 1. The original user query
20
+ 2. The Task Specification (from Darbari)
21
+ 3. The Execution Plan (from Majumdar/Sar-Senapati)
22
+ 4. The Implementor's solution (from Sainik)
23
+ 5. The Challenger's verification verdict (from Prerak)
24
+
25
+ Your responsibilities:
26
+ 1. Review all outputs from the council
27
+ 2. Consider the Challenger's flags and evidence
28
+ 3. Make the final decision on the answer
29
+ 4. Ensure the answer is complete and accurate
30
+ 5. Note any caveats or limitations
31
+
32
+ You must ALWAYS respond with a valid JSON object in the following format:
33
+ ```json
34
+ {
35
+ "final_answer": "The complete, polished answer to present to the user",
36
+ "answer_type": "code|text|numeric|structured",
37
+ "rationale": "Why this answer is correct and how you arrived at it",
38
+ "confidence": 0.9,
39
+ "caveats": ["Any limitations or assumptions"],
40
+ "sources_used": ["What information you relied on"],
41
+ "numeric_answer": 42,
42
+ "code_block": "def solution(): ..."
43
+ }
44
+ ```
45
+
46
+ Guidelines:
47
+
48
+ For CODE answers:
49
+ - Include the complete, runnable code in "code_block"
50
+ - Set "answer_type" to "code"
51
+ - Include any necessary explanations in "final_answer"
52
+ - If Challenger found issues, fix them in your final code
53
+
54
+ For MATH answers:
55
+ - Include the numeric result in "numeric_answer"
56
+ - Show key steps in "final_answer"
57
+ - Set "answer_type" to "numeric"
58
+
59
+ For QA/TEXT answers:
60
+ - Provide a clear, complete answer in "final_answer"
61
+ - Set "answer_type" to "text"
62
+ - Address the question directly
63
+
64
+ When Challenger found issues (must_fix = true):
65
+ - Carefully consider each flag
66
+ - Fix issues if possible
67
+ - If you cannot fix, explain why in caveats
68
+ - Adjust confidence accordingly
69
+
70
+ Be authoritative but honest. If something is uncertain, say so."""
71
+
72
+
73
+ JUDGE_USER_TEMPLATE = """Synthesize the council's outputs and produce the final answer.
74
+
75
+ ORIGINAL QUERY:
76
+ {user_query}
77
+
78
+ TASK SPECIFICATION:
79
+ {task_spec}
80
+
81
+ EXECUTION PLAN:
82
+ {plan}
83
+
84
+ IMPLEMENTOR OUTPUT:
85
+ {candidate}
86
+
87
+ CHALLENGER VERDICT:
88
+ {verdict}
89
+
90
+ Based on all the above, provide the final, authoritative answer. Respond with ONLY a valid JSON object."""
91
+
92
+
93
+ class Raja(Role):
94
+ """
95
+ Raja (Judge) integrates all council outputs into final answer.
96
+
97
+ - Slot: BIG (13-34B)
98
+ - Purpose: Final synthesis and decision making
99
+ - Output: FinalAnswer with polished answer, rationale, confidence
100
+ """
101
+
102
+ name = "raja"
103
+ default_slot = Slot.BIG
104
+
105
+ def __init__(
106
+ self,
107
+ model_runner: Any,
108
+ fallback_slot: Optional[Slot] = Slot.MID,
109
+ **kwargs
110
+ ):
111
+ super().__init__(
112
+ model_runner=model_runner,
113
+ slot=kwargs.get("slot", Slot.BIG),
114
+ max_tokens=kwargs.get("max_tokens", 1536),
115
+ temperature=kwargs.get("temperature", 0.4)
116
+ )
117
+ self.fallback_slot = fallback_slot
118
+ # Phase-3 Task 2: Track truncation for metadata
119
+ self._worker_truncated = False
120
+ self._checker_truncated = False
121
+
122
+ @property
123
+ def system_prompt(self) -> str:
124
+ return JUDGE_SYSTEM_PROMPT
125
+
126
+ def format_input(self, role_input: RoleInput) -> str:
127
+ # Phase-3 Task 2: Extract truncation policy from routing metadata
128
+ routing_meta = role_input.metadata.get("routing", {})
129
+ truncation_policy = routing_meta.get("truncation_policy", "none")
130
+
131
+ # Reset truncation tracking
132
+ self._worker_truncated = False
133
+ self._checker_truncated = False
134
+
135
+ task_spec_str = self._format_task_spec(role_input.task_spec)
136
+ plan_str = self._format_plan(role_input.plan)
137
+ candidate_str = self._format_candidate(role_input.candidate, truncation_policy)
138
+ verdict_str = self._format_verdict(role_input.verdict, truncation_policy)
139
+
140
+ return JUDGE_USER_TEMPLATE.format(
141
+ user_query=role_input.user_query,
142
+ task_spec=task_spec_str,
143
+ plan=plan_str,
144
+ candidate=candidate_str,
145
+ verdict=verdict_str
146
+ )
147
+
148
+ def __call__(self, role_input: RoleInput):
149
+ """Override to add truncation metadata to output."""
150
+ from .base import RoleOutput, RoleMetadata
151
+
152
+ # Call base implementation
153
+ output = super().__call__(role_input)
154
+
155
+ # Phase-3 Task 2: Add truncation metadata if truncation occurred
156
+ if self._worker_truncated or self._checker_truncated:
157
+ # Create new RoleMetadata with truncation info
158
+ new_metadata = RoleMetadata(
159
+ tokens_used=output.metadata.tokens_used,
160
+ latency_ms=output.metadata.latency_ms,
161
+ model_id=output.metadata.model_id,
162
+ slot=output.metadata.slot,
163
+ timestamp=output.metadata.timestamp,
164
+ duration_ms=output.metadata.duration_ms,
165
+ schema_warning=output.metadata.schema_warning,
166
+ worker_truncated=self._worker_truncated,
167
+ checker_truncated=self._checker_truncated,
168
+ )
169
+
170
+ # Create new RoleOutput with updated metadata
171
+ output = RoleOutput(
172
+ role=output.role,
173
+ status=output.status,
174
+ core_output=output.core_output,
175
+ error=output.error,
176
+ metadata=new_metadata,
177
+ )
178
+
179
+ return output
180
+
181
+ def _format_task_spec(self, task_spec: Optional[dict]) -> str:
182
+ """Format task spec for judge review."""
183
+ if not task_spec:
184
+ return "No task specification provided."
185
+
186
+ return f"""Problem: {task_spec.get('problem', 'Not specified')}
187
+ Task Type: {task_spec.get('task_type', 'Unknown')}
188
+ Output Format: {task_spec.get('output_format', 'text')}
189
+ Difficulty: {task_spec.get('difficulty_guess', 'medium')}"""
190
+
191
+ def _format_plan(self, plan: Optional[dict]) -> str:
192
+ """Format plan summary for judge."""
193
+ if not plan:
194
+ return "No plan provided."
195
+
196
+ lines = []
197
+
198
+ if plan.get("suggested_approach"):
199
+ lines.append(f"Approach: {plan['suggested_approach']}")
200
+
201
+ steps = plan.get("steps", [])
202
+ lines.append(f"Steps planned: {len(steps)}")
203
+
204
+ expected = plan.get("expected_output_type", "")
205
+ if expected:
206
+ lines.append(f"Expected output: {expected}")
207
+
208
+ return "\n".join(lines)
209
+
210
+ def _format_candidate(self, candidate: Optional[dict], truncation_policy: str = "none") -> str:
211
+ """Format worker candidate for judge review.
212
+
213
+ Args:
214
+ candidate: Worker output dict
215
+ truncation_policy: "none", "moderate", or "aggressive"
216
+ """
217
+ if not candidate:
218
+ return "No candidate output from Implementor."
219
+
220
+ content = candidate.get("content", "")
221
+ content_type = candidate.get("content_type", "text")
222
+ confidence = candidate.get("confidence", 0.5)
223
+ warnings = candidate.get("warnings", [])
224
+
225
+ # Phase-3 Task 2: Apply truncation based on policy
226
+ limits = {
227
+ "none": None,
228
+ "moderate": 2500,
229
+ "aggressive": 1200,
230
+ }
231
+ max_chars = limits.get(truncation_policy)
232
+
233
+ was_truncated = False
234
+ if max_chars and len(content) > max_chars:
235
+ content, was_truncated = truncate_with_note(content, max_chars, "worker")
236
+ self._worker_truncated = True # Track for metadata
237
+
238
+ lines = [
239
+ f"Content Type: {content_type}",
240
+ f"Implementor Confidence: {confidence}",
241
+ ]
242
+
243
+ if warnings:
244
+ lines.append(f"Implementor Warnings: {warnings}")
245
+
246
+ if was_truncated:
247
+ lines.append(f"[Note: Worker output truncated from {len(candidate.get('content', ''))} to {max_chars} chars]")
248
+
249
+ lines.extend([
250
+ "",
251
+ "=== IMPLEMENTOR OUTPUT ===",
252
+ content[:4000] if len(content) > 4000 else content,
253
+ "=== END OUTPUT ==="
254
+ ])
255
+
256
+ return "\n".join(lines)
257
+
258
+ def _format_verdict(self, verdict: Optional[dict], truncation_policy: str = "none") -> str:
259
+ """Format checker verdict for judge consideration.
260
+
261
+ Args:
262
+ verdict: Checker verdict dict
263
+ truncation_policy: "none", "moderate", or "aggressive"
264
+ """
265
+ if not verdict:
266
+ return "No challenger verdict available."
267
+
268
+ lines = []
269
+
270
+ must_fix = verdict.get("must_fix", False)
271
+ confidence = verdict.get("overall_confidence", 0.5)
272
+
273
+ lines.append(f"Must Fix: {'YES' if must_fix else 'No'}")
274
+ lines.append(f"Challenger Confidence: {confidence}")
275
+
276
+ flags = verdict.get("flags", [])
277
+ if flags:
278
+ # Phase-3 Task 2: Truncate number of flags shown based on policy
279
+ flag_limit = {
280
+ "none": len(flags),
281
+ "moderate": min(5, len(flags)),
282
+ "aggressive": min(3, len(flags)),
283
+ }.get(truncation_policy, 5)
284
+
285
+ if flag_limit < len(flags):
286
+ self._checker_truncated = True # Track for metadata
287
+
288
+ lines.append(f"\nFlags ({len(flags)} total, showing {min(flag_limit, len(flags))}):")
289
+ for flag in flags[:flag_limit]:
290
+ severity = flag.get("severity", "unknown").upper()
291
+ detail = flag.get("detail", "")
292
+ fix = flag.get("suggested_fix", "")
293
+ lines.append(f" [{severity}] {detail}")
294
+ if fix and truncation_policy != "aggressive": # Skip fix details in aggressive mode
295
+ lines.append(f" Fix: {fix}")
296
+ else:
297
+ lines.append("\nNo flags raised - output appears valid.")
298
+
299
+ edits = verdict.get("suggested_edits", [])
300
+ if edits and truncation_policy != "aggressive": # Skip edits in aggressive mode
301
+ lines.append("\nSuggested Edits:")
302
+ for edit in edits[:3]:
303
+ lines.append(f" - {edit}")
304
+
305
+ evidence = verdict.get("evidence", [])
306
+ if evidence:
307
+ lines.append(f"\nEvidence items: {len(evidence)}")
308
+
309
+ return "\n".join(lines)
310
+
311
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
312
+ """Parse LLM output into FinalAnswer dict."""
313
+ data = self._extract_json(raw_output)
314
+
315
+ # Handle raw output fallback
316
+ final_answer = data.get("final_answer", "")
317
+ if not final_answer and "raw_output" in data:
318
+ final_answer = data["raw_output"]
319
+
320
+ return {
321
+ "final_answer": final_answer,
322
+ "answer_type": data.get("answer_type", "text"),
323
+ "rationale": data.get("rationale", ""),
324
+ "confidence": max(0.0, min(1.0, data.get("confidence", 0.5))),
325
+ "caveats": data.get("caveats", []),
326
+ "sources_used": data.get("sources_used", []),
327
+ "numeric_answer": data.get("numeric_answer"),
328
+ "code_block": data.get("code_block")
329
+ }
330
+
331
+ def create_final_answer(self, role_input: RoleInput) -> FinalAnswer:
332
+ """
333
+ Execute Raja and return a FinalAnswer object.
334
+ """
335
+ output = self(role_input)
336
+
337
+ if output.status == "error":
338
+ return FinalAnswer(
339
+ final_answer="Desh sevak encountered an error and could not produce a result.",
340
+ answer_type="text",
341
+ confidence=0.0,
342
+ rationale=f"Error: {output.error}"
343
+ )
344
+
345
+ return FinalAnswer.from_dict(output.core_output)
@@ -0,0 +1,203 @@
1
+ """
2
+ Sacheev (Advisor/CheckerFact) role for the Parishad council.
3
+ Verifies factual claims using retrieval and reasoning.
4
+ """
5
+
6
+ from typing import Any, Optional
7
+
8
+ from .base import (
9
+ Role,
10
+ RoleInput,
11
+ RoleOutput,
12
+ Slot,
13
+ )
14
+
15
+
16
+ CHECKER_FACT_SYSTEM_PROMPT = """You are Sacheev, the Advisor in the Parishad council. Your job is to verify the factual accuracy of the Implementor's output.
17
+
18
+ Your responsibilities:
19
+ 1. Identify factual claims in the output
20
+ 2. Verify claims against known facts and reasoning
21
+ 3. Flag unsupported or incorrect claims
22
+ 4. Assess overall factual reliability
23
+ 5. Suggest corrections for factual errors
24
+
25
+ You must ALWAYS respond with a valid JSON object in the following format:
26
+ ```json
27
+ {
28
+ "claims": [
29
+ {
30
+ "claim": "The specific claim being verified",
31
+ "status": "verified|unverified|incorrect|partially_correct",
32
+ "confidence": 0.9,
33
+ "evidence": "Supporting or contradicting evidence",
34
+ "correction": "Corrected version if incorrect"
35
+ }
36
+ ],
37
+ "overall_accuracy": 0.85,
38
+ "factual_issues": [
39
+ {
40
+ "type": "incorrect_fact|unsupported_claim|outdated_info|logical_error",
41
+ "severity": "low|medium|high|critical",
42
+ "description": "Description of the issue",
43
+ "suggestion": "How to fix it"
44
+ }
45
+ ],
46
+ "must_fix": false,
47
+ "summary": "Brief summary of factual assessment"
48
+ }
49
+ ```
50
+
51
+ Be rigorous but fair. Only flag issues you're confident about.
52
+ Distinguish between factual errors and matters of opinion."""
53
+
54
+
55
+ CHECKER_FACT_USER_TEMPLATE = """Verify the factual accuracy of the following output.
56
+
57
+ TASK SPECIFICATION:
58
+ {task_spec}
59
+
60
+ EXECUTION PLAN:
61
+ {plan}
62
+
63
+ OUTPUT TO VERIFY:
64
+ {candidate}
65
+
66
+ Analyze for factual correctness. Respond with ONLY a valid JSON object."""
67
+
68
+
69
+ class Sacheev(Role):
70
+ """
71
+ Sacheev (Advisor) verifies factual accuracy of outputs.
72
+
73
+ - Slot: SMALL (2-4B)
74
+ - Purpose: Verify claims and flag factual errors
75
+ - Output: Verdict on factual correctness
76
+ """
77
+
78
+ name = "sacheev"
79
+ default_slot = Slot.SMALL
80
+
81
+ def __init__(
82
+ self,
83
+ model_runner: Any,
84
+ tools: Optional[list[str]] = None,
85
+ **kwargs
86
+ ):
87
+ super().__init__(
88
+ model_runner=model_runner,
89
+ slot=kwargs.get("slot", Slot.SMALL),
90
+ max_tokens=kwargs.get("max_tokens", 768),
91
+ temperature=kwargs.get("temperature", 0.2)
92
+ )
93
+ self.tools = tools or ["retrieval", "claim_extractor"]
94
+
95
+ @property
96
+ def system_prompt(self) -> str:
97
+ return CHECKER_FACT_SYSTEM_PROMPT
98
+
99
+ def format_input(self, role_input: RoleInput) -> str:
100
+ task_spec_str = self._format_task_spec(role_input.task_spec)
101
+ plan_str = self._format_plan(role_input.plan)
102
+ candidate_str = self._format_candidate(role_input.candidate)
103
+
104
+ return CHECKER_FACT_USER_TEMPLATE.format(
105
+ task_spec=task_spec_str,
106
+ plan=plan_str,
107
+ candidate=candidate_str
108
+ )
109
+
110
+ def _format_task_spec(self, task_spec: Optional[dict]) -> str:
111
+ """Format task spec for inclusion in prompt."""
112
+ if not task_spec:
113
+ return "No task specification provided."
114
+
115
+ return f"""Problem: {task_spec.get('problem', 'Not specified')}
116
+ Task Type: {task_spec.get('task_type', 'Unknown')}"""
117
+
118
+ def _format_plan(self, plan: Optional[dict]) -> str:
119
+ """Format plan summary."""
120
+ if not plan:
121
+ return "No plan provided."
122
+
123
+ steps = plan.get("steps", [])
124
+ return f"Steps: {len(steps)}, Expected: {plan.get('expected_output_type', 'text')}"
125
+
126
+ def _format_candidate(self, candidate: Optional[dict]) -> str:
127
+ """Format candidate output for checking."""
128
+ if not candidate:
129
+ return "No output to verify."
130
+
131
+ content = candidate.get("content", "")
132
+ if len(content) > 2000:
133
+ content = content[:2000] + "... [truncated]"
134
+
135
+ return f"""Content Type: {candidate.get('content_type', 'unknown')}
136
+ Content:
137
+ {content}"""
138
+
139
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
140
+ """Parse LLM output into factual verdict dict."""
141
+ import json
142
+ import re
143
+
144
+ # Try to extract JSON from the response
145
+ json_match = re.search(r'\{[\s\S]*\}', raw_output)
146
+ if json_match:
147
+ try:
148
+ data = json.loads(json_match.group())
149
+ except json.JSONDecodeError:
150
+ data = {}
151
+ else:
152
+ data = {}
153
+
154
+ # Normalize claims
155
+ claims = []
156
+ for claim in data.get("claims", []):
157
+ claims.append({
158
+ "claim": claim.get("claim", ""),
159
+ "status": claim.get("status", "unverified"),
160
+ "confidence": max(0.0, min(1.0, claim.get("confidence", 0.5))),
161
+ "evidence": claim.get("evidence", ""),
162
+ "correction": claim.get("correction", "")
163
+ })
164
+
165
+ # Normalize factual issues
166
+ issues = []
167
+ for issue in data.get("factual_issues", []):
168
+ issues.append({
169
+ "type": issue.get("type", "unknown"),
170
+ "severity": self._normalize_severity(issue.get("severity", "low")),
171
+ "description": issue.get("description", ""),
172
+ "suggestion": issue.get("suggestion", "")
173
+ })
174
+
175
+ # Determine must_fix
176
+ must_fix = data.get("must_fix", False)
177
+ if not must_fix:
178
+ must_fix = any(i.get("severity") in ["high", "critical"] for i in issues)
179
+
180
+ return {
181
+ "verdict_fact": {
182
+ "claims": claims,
183
+ "overall_accuracy": max(0.0, min(1.0, data.get("overall_accuracy", 0.5))),
184
+ "factual_issues": issues,
185
+ "must_fix": must_fix,
186
+ "summary": data.get("summary", "")
187
+ },
188
+ # Compatible with standard Verdict schema
189
+ "flags": [{
190
+ "type": i["type"],
191
+ "severity": i["severity"],
192
+ "detail": i["description"],
193
+ "suggested_fix": i["suggestion"]
194
+ } for i in issues],
195
+ "must_fix": must_fix,
196
+ "overall_confidence": max(0.0, min(1.0, data.get("overall_accuracy", 0.5)))
197
+ }
198
+
199
+ def _normalize_severity(self, value: str) -> str:
200
+ """Normalize severity to valid enum value."""
201
+ valid = {"low", "medium", "high", "critical"}
202
+ normalized = value.lower().strip()
203
+ return normalized if normalized in valid else "low"