parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,644 @@
1
+ """
2
+ Deterministic checking tools for Parishad.
3
+
4
+ These are "free" checks that don't require LLM calls:
5
+ - JSON schema validation
6
+ - Math expression evaluation
7
+ - Code syntax checking
8
+ - Code execution with tests
9
+ - Format validation
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import ast
15
+ import json
16
+ import os
17
+ import re
18
+ import subprocess
19
+ import tempfile
20
+ from dataclasses import dataclass, field
21
+ from typing import Any, Callable, Optional
22
+
23
+ import jsonschema
24
+
25
+
26
+ # ==============================================================================
27
+ # Standalone helper functions (stateless, for direct use)
28
+ # ==============================================================================
29
+
30
+ def validate_schema(role_output: dict, schema: dict) -> dict:
31
+ """
32
+ Validate a role output dict against a JSON schema.
33
+
34
+ Args:
35
+ role_output: The output dict to validate
36
+ schema: JSON schema to validate against
37
+
38
+ Returns:
39
+ Compact dict: {"ok": bool, "error": Optional[str]}
40
+ """
41
+ try:
42
+ jsonschema.validate(role_output, schema)
43
+ return {"ok": True, "error": None}
44
+ except jsonschema.ValidationError as e:
45
+ return {"ok": False, "error": f"{e.message} at {'.'.join(str(p) for p in e.path)}"}
46
+ except jsonschema.SchemaError as e:
47
+ return {"ok": False, "error": f"Invalid schema: {e.message}"}
48
+
49
+
50
+ def check_math(expression: str) -> dict:
51
+ """
52
+ Safely evaluate a simple math expression.
53
+
54
+ Only allows basic operators (+, -, *, /, parentheses) and numbers.
55
+ Uses AST parsing with node type whitelisting for safety.
56
+
57
+ Args:
58
+ expression: Math expression string (e.g., "2 + 3 * 4")
59
+
60
+ Returns:
61
+ Compact dict: {"ok": bool, "result": Optional[float], "error": Optional[str]}
62
+ """
63
+ # Whitelist of allowed AST node types (ast.Constant is the modern replacement for ast.Num)
64
+ ALLOWED_NODES = (
65
+ ast.Expression,
66
+ ast.BinOp,
67
+ ast.UnaryOp,
68
+ ast.Constant, # Python 3.8+ for numbers, strings, etc.
69
+ ast.Add,
70
+ ast.Sub,
71
+ ast.Mult,
72
+ ast.Div,
73
+ ast.FloorDiv,
74
+ ast.Mod,
75
+ ast.Pow,
76
+ ast.USub, # Unary minus
77
+ ast.UAdd, # Unary plus
78
+ )
79
+
80
+ def _validate_node(node: ast.AST) -> bool:
81
+ """Recursively check all nodes are in whitelist."""
82
+ if not isinstance(node, ALLOWED_NODES):
83
+ return False
84
+ # For Constant nodes, only allow numeric types
85
+ if isinstance(node, ast.Constant):
86
+ if not isinstance(node.value, (int, float, complex)):
87
+ return False
88
+ for child in ast.iter_child_nodes(node):
89
+ if not _validate_node(child):
90
+ return False
91
+ return True
92
+
93
+ # Clean the expression
94
+ expression = expression.strip()
95
+ if not expression:
96
+ return {"ok": False, "result": None, "error": "Empty expression"}
97
+
98
+ try:
99
+ # Parse to AST
100
+ tree = ast.parse(expression, mode="eval")
101
+
102
+ # Validate all nodes are safe
103
+ if not _validate_node(tree):
104
+ return {"ok": False, "result": None, "error": "Expression contains disallowed operations"}
105
+
106
+ # Compile and evaluate
107
+ code = compile(tree, "<math>", "eval")
108
+ result = eval(code, {"__builtins__": {}}, {})
109
+
110
+ # Handle division by zero
111
+ if isinstance(result, float) and (result != result or abs(result) == float('inf')):
112
+ return {"ok": False, "result": None, "error": "Division error (inf or nan)"}
113
+
114
+ return {"ok": True, "result": float(result), "error": None}
115
+
116
+ except SyntaxError as e:
117
+ return {"ok": False, "result": None, "error": f"Syntax error: {e}"}
118
+ except ZeroDivisionError:
119
+ return {"ok": False, "result": None, "error": "Division by zero"}
120
+ except Exception as e:
121
+ return {"ok": False, "result": None, "error": f"Evaluation error: {e}"}
122
+
123
+
124
+ def run_code_tests(
125
+ code: str,
126
+ test_code: str,
127
+ timeout: int = 10,
128
+ language: str = "python"
129
+ ) -> dict:
130
+ """
131
+ Run code with tests in isolated environment.
132
+
133
+ Executes in a temporary directory with a timeout.
134
+
135
+ Args:
136
+ code: The code to test
137
+ test_code: Test code to run against the solution
138
+ timeout: Maximum execution time in seconds
139
+ language: Programming language (currently only "python" supported)
140
+
141
+ Returns:
142
+ Compact dict: {"ok": bool, "stdout": str, "stderr": str, "returncode": int}
143
+ """
144
+ if language != "python":
145
+ return {
146
+ "ok": False,
147
+ "stdout": "",
148
+ "stderr": f"Unsupported language: {language}",
149
+ "returncode": -1
150
+ }
151
+
152
+ # Create combined test file
153
+ combined_code = f'''{code}
154
+
155
+ # ===== TEST CODE =====
156
+ {test_code}
157
+ '''
158
+
159
+ try:
160
+ # Create temporary directory
161
+ with tempfile.TemporaryDirectory() as tmpdir:
162
+ # Write code to file
163
+ code_file = os.path.join(tmpdir, "solution.py")
164
+ with open(code_file, "w") as f:
165
+ f.write(combined_code)
166
+
167
+ # Run with timeout
168
+ result = subprocess.run(
169
+ ["python", code_file],
170
+ capture_output=True,
171
+ text=True,
172
+ timeout=timeout,
173
+ cwd=tmpdir,
174
+ env={**os.environ, "PYTHONDONTWRITEBYTECODE": "1"}
175
+ )
176
+
177
+ # Truncate output to avoid large strings
178
+ stdout = result.stdout[:2000] if result.stdout else ""
179
+ stderr = result.stderr[:2000] if result.stderr else ""
180
+
181
+ return {
182
+ "ok": result.returncode == 0,
183
+ "stdout": stdout,
184
+ "stderr": stderr,
185
+ "returncode": result.returncode
186
+ }
187
+
188
+ except subprocess.TimeoutExpired:
189
+ return {
190
+ "ok": False,
191
+ "stdout": "",
192
+ "stderr": f"Execution timed out after {timeout} seconds",
193
+ "returncode": -1
194
+ }
195
+ except Exception as e:
196
+ return {
197
+ "ok": False,
198
+ "stdout": "",
199
+ "stderr": f"Execution error: {e}",
200
+ "returncode": -1
201
+ }
202
+
203
+
204
+ # ==============================================================================
205
+ # Dataclasses for structured results
206
+ # ==============================================================================
207
+
208
+
209
+ @dataclass
210
+ class CheckResult:
211
+ """Result from a single check."""
212
+
213
+ name: str
214
+ passed: bool
215
+ message: str
216
+ details: dict[str, Any] = field(default_factory=dict)
217
+
218
+
219
+ @dataclass
220
+ class DeterministicCheckResults:
221
+ """Aggregated results from deterministic checks."""
222
+
223
+ checks: list[CheckResult]
224
+ all_passed: bool
225
+ critical_failure: bool = False
226
+ failure_reason: Optional[str] = None
227
+
228
+ @classmethod
229
+ def from_checks(cls, checks: list[CheckResult]) -> "DeterministicCheckResults":
230
+ """Create from list of check results."""
231
+ all_passed = all(c.passed for c in checks)
232
+ # Find critical failures (e.g., JSON parse failure when JSON expected)
233
+ critical = [c for c in checks if not c.passed and c.details.get("critical", False)]
234
+ critical_failure = len(critical) > 0
235
+ failure_reason = critical[0].message if critical else None
236
+ return cls(
237
+ checks=checks,
238
+ all_passed=all_passed,
239
+ critical_failure=critical_failure,
240
+ failure_reason=failure_reason,
241
+ )
242
+
243
+ def to_dict(self) -> dict[str, Any]:
244
+ """Convert to dictionary for logging."""
245
+ return {
246
+ "all_passed": self.all_passed,
247
+ "critical_failure": self.critical_failure,
248
+ "failure_reason": self.failure_reason,
249
+ "checks": [
250
+ {
251
+ "name": c.name,
252
+ "passed": c.passed,
253
+ "message": c.message,
254
+ }
255
+ for c in self.checks
256
+ ],
257
+ }
258
+
259
+
260
+ class DeterministicChecker:
261
+ """
262
+ Collection of deterministic (free) checks.
263
+
264
+ These checks don't require LLM inference and should be run
265
+ before any LLM-based verification to catch obvious errors.
266
+ """
267
+
268
+ def __init__(self):
269
+ """Initialize with default checks enabled."""
270
+ self._custom_checks: list[Callable] = []
271
+
272
+ def register_check(self, check_fn: Callable[[str, dict], CheckResult]) -> None:
273
+ """Register a custom check function."""
274
+ self._custom_checks.append(check_fn)
275
+
276
+ def check_json_parseable(
277
+ self,
278
+ text: str,
279
+ schema: Optional[dict] = None,
280
+ critical: bool = True,
281
+ ) -> CheckResult:
282
+ """
283
+ Check if text is valid JSON and optionally validate against schema.
284
+
285
+ Args:
286
+ text: Text to parse as JSON
287
+ schema: Optional JSON schema to validate against
288
+ critical: Whether parse failure is critical
289
+
290
+ Returns:
291
+ CheckResult with parsing/validation status
292
+ """
293
+ try:
294
+ data = json.loads(text)
295
+ except json.JSONDecodeError as e:
296
+ return CheckResult(
297
+ name="json_parse",
298
+ passed=False,
299
+ message=f"JSON parse error: {e}",
300
+ details={"critical": critical, "position": e.pos},
301
+ )
302
+
303
+ if schema is not None:
304
+ try:
305
+ jsonschema.validate(data, schema)
306
+ except jsonschema.ValidationError as e:
307
+ return CheckResult(
308
+ name="json_schema",
309
+ passed=False,
310
+ message=f"Schema validation error: {e.message}",
311
+ details={"path": list(e.path), "critical": False},
312
+ )
313
+
314
+ return CheckResult(
315
+ name="json_parse",
316
+ passed=True,
317
+ message="Valid JSON",
318
+ details={"parsed": data},
319
+ )
320
+
321
+ def check_python_syntax(
322
+ self,
323
+ code: str,
324
+ critical: bool = True,
325
+ ) -> CheckResult:
326
+ """
327
+ Check if Python code has valid syntax.
328
+
329
+ Args:
330
+ code: Python code to check
331
+ critical: Whether syntax error is critical
332
+
333
+ Returns:
334
+ CheckResult with syntax status
335
+ """
336
+ try:
337
+ ast.parse(code)
338
+ return CheckResult(
339
+ name="python_syntax",
340
+ passed=True,
341
+ message="Valid Python syntax",
342
+ )
343
+ except SyntaxError as e:
344
+ return CheckResult(
345
+ name="python_syntax",
346
+ passed=False,
347
+ message=f"Syntax error: {e.msg} at line {e.lineno}",
348
+ details={
349
+ "critical": critical,
350
+ "line": e.lineno,
351
+ "offset": e.offset,
352
+ },
353
+ )
354
+
355
+ def check_math_expression(
356
+ self,
357
+ expression: str,
358
+ expected_result: Optional[float] = None,
359
+ tolerance: float = 1e-6,
360
+ ) -> CheckResult:
361
+ """
362
+ Safely evaluate a math expression.
363
+
364
+ Args:
365
+ expression: Math expression to evaluate
366
+ expected_result: Optional expected value
367
+ tolerance: Tolerance for floating point comparison
368
+
369
+ Returns:
370
+ CheckResult with evaluation status
371
+ """
372
+ # Safe subset of allowed operations
373
+ allowed_names = {
374
+ "abs": abs,
375
+ "round": round,
376
+ "min": min,
377
+ "max": max,
378
+ "sum": sum,
379
+ "pow": pow,
380
+ "int": int,
381
+ "float": float,
382
+ }
383
+
384
+ try:
385
+ # Parse the expression
386
+ tree = ast.parse(expression, mode="eval")
387
+
388
+ # Compile with restricted builtins
389
+ code = compile(tree, "<math>", "eval")
390
+
391
+ # Evaluate in restricted namespace
392
+ result = eval(code, {"__builtins__": {}}, allowed_names)
393
+
394
+ if expected_result is not None:
395
+ if abs(result - expected_result) <= tolerance:
396
+ return CheckResult(
397
+ name="math_eval",
398
+ passed=True,
399
+ message=f"Correct: {result}",
400
+ details={"result": result, "expected": expected_result},
401
+ )
402
+ else:
403
+ return CheckResult(
404
+ name="math_eval",
405
+ passed=False,
406
+ message=f"Wrong answer: got {result}, expected {expected_result}",
407
+ details={"result": result, "expected": expected_result},
408
+ )
409
+
410
+ return CheckResult(
411
+ name="math_eval",
412
+ passed=True,
413
+ message=f"Evaluated to: {result}",
414
+ details={"result": result},
415
+ )
416
+
417
+ except Exception as e:
418
+ return CheckResult(
419
+ name="math_eval",
420
+ passed=False,
421
+ message=f"Evaluation error: {e}",
422
+ details={"critical": False},
423
+ )
424
+
425
+ def check_format(
426
+ self,
427
+ text: str,
428
+ pattern: str,
429
+ description: str = "format",
430
+ ) -> CheckResult:
431
+ """
432
+ Check if text matches a regex pattern.
433
+
434
+ Args:
435
+ text: Text to check
436
+ pattern: Regex pattern to match
437
+ description: Human-readable description of expected format
438
+
439
+ Returns:
440
+ CheckResult with match status
441
+ """
442
+ try:
443
+ if re.search(pattern, text, re.MULTILINE | re.DOTALL):
444
+ return CheckResult(
445
+ name="format_check",
446
+ passed=True,
447
+ message=f"Matches {description} format",
448
+ )
449
+ else:
450
+ return CheckResult(
451
+ name="format_check",
452
+ passed=False,
453
+ message=f"Does not match {description} format",
454
+ details={"pattern": pattern},
455
+ )
456
+ except re.error as e:
457
+ return CheckResult(
458
+ name="format_check",
459
+ passed=False,
460
+ message=f"Invalid regex pattern: {e}",
461
+ )
462
+
463
+ def check_contains_answer(
464
+ self,
465
+ text: str,
466
+ answer_patterns: Optional[list[str]] = None,
467
+ ) -> CheckResult:
468
+ """
469
+ Check if text contains a properly formatted answer.
470
+
471
+ Args:
472
+ text: Text to check for answer
473
+ answer_patterns: List of patterns that indicate an answer
474
+
475
+ Returns:
476
+ CheckResult indicating if answer format is present
477
+ """
478
+ if answer_patterns is None:
479
+ answer_patterns = [
480
+ r"(?:answer|result|solution).*?[:=]\s*\S+",
481
+ r"\\boxed\{.+?\}",
482
+ r"####\s*\S+",
483
+ r"```[\w]*\n.+?\n```",
484
+ ]
485
+
486
+ for pattern in answer_patterns:
487
+ if re.search(pattern, text, re.IGNORECASE | re.DOTALL):
488
+ return CheckResult(
489
+ name="answer_present",
490
+ passed=True,
491
+ message="Answer format detected",
492
+ details={"pattern": pattern},
493
+ )
494
+
495
+ return CheckResult(
496
+ name="answer_present",
497
+ passed=False,
498
+ message="No answer format detected",
499
+ details={"checked_patterns": len(answer_patterns)},
500
+ )
501
+
502
+ def check_length(
503
+ self,
504
+ text: str,
505
+ min_length: int = 0,
506
+ max_length: int = 100000,
507
+ ) -> CheckResult:
508
+ """
509
+ Check if text length is within bounds.
510
+
511
+ Args:
512
+ text: Text to check
513
+ min_length: Minimum allowed length
514
+ max_length: Maximum allowed length
515
+
516
+ Returns:
517
+ CheckResult with length status
518
+ """
519
+ length = len(text)
520
+
521
+ if length < min_length:
522
+ return CheckResult(
523
+ name="length_check",
524
+ passed=False,
525
+ message=f"Too short: {length} < {min_length}",
526
+ details={"length": length, "min": min_length},
527
+ )
528
+
529
+ if length > max_length:
530
+ return CheckResult(
531
+ name="length_check",
532
+ passed=False,
533
+ message=f"Too long: {length} > {max_length}",
534
+ details={"length": length, "max": max_length},
535
+ )
536
+
537
+ return CheckResult(
538
+ name="length_check",
539
+ passed=True,
540
+ message=f"Length OK: {length}",
541
+ details={"length": length},
542
+ )
543
+
544
+ def check_no_placeholders(
545
+ self,
546
+ text: str,
547
+ ) -> CheckResult:
548
+ """
549
+ Check that output doesn't contain placeholder text.
550
+
551
+ Args:
552
+ text: Text to check
553
+
554
+ Returns:
555
+ CheckResult indicating if placeholders were found
556
+ """
557
+ placeholder_patterns = [
558
+ r"\[insert\s+.*?\]",
559
+ r"\[TODO\]",
560
+ r"\[PLACEHOLDER\]",
561
+ r"<your.*?here>",
562
+ r"\.{3,}", # Multiple dots as placeholder
563
+ r"\[\.{3}\]",
564
+ ]
565
+
566
+ for pattern in placeholder_patterns:
567
+ match = re.search(pattern, text, re.IGNORECASE)
568
+ if match:
569
+ return CheckResult(
570
+ name="no_placeholders",
571
+ passed=False,
572
+ message=f"Placeholder detected: '{match.group()}'",
573
+ details={"match": match.group()},
574
+ )
575
+
576
+ return CheckResult(
577
+ name="no_placeholders",
578
+ passed=True,
579
+ message="No placeholders detected",
580
+ )
581
+
582
+ def run_all(
583
+ self,
584
+ text: str,
585
+ task_type: str = "general",
586
+ context: Optional[dict] = None,
587
+ ) -> DeterministicCheckResults:
588
+ """
589
+ Run all applicable checks based on task type.
590
+
591
+ Args:
592
+ text: Output text to check
593
+ task_type: Type of task (code, math, general)
594
+ context: Additional context for checks
595
+
596
+ Returns:
597
+ Aggregated check results
598
+ """
599
+ context = context or {}
600
+ checks: list[CheckResult] = []
601
+
602
+ # Universal checks
603
+ checks.append(self.check_length(text, min_length=1))
604
+ checks.append(self.check_no_placeholders(text))
605
+ checks.append(self.check_contains_answer(text))
606
+
607
+ # Task-specific checks
608
+ if task_type == "code":
609
+ # Extract code blocks and check syntax
610
+ code_pattern = r"```(?:python)?\n?(.*?)```"
611
+ code_matches = re.findall(code_pattern, text, re.DOTALL)
612
+ if code_matches:
613
+ for i, code in enumerate(code_matches):
614
+ result = self.check_python_syntax(code.strip())
615
+ result.name = f"python_syntax_{i}"
616
+ checks.append(result)
617
+
618
+ elif task_type == "math":
619
+ # Look for math expressions
620
+ math_pattern = r"####\s*([0-9+\-*/().\s]+)"
621
+ math_matches = re.findall(math_pattern, text)
622
+ if math_matches:
623
+ for expr in math_matches[:3]: # Limit checks
624
+ checks.append(self.check_math_expression(expr.strip()))
625
+
626
+ elif task_type == "json":
627
+ # Check JSON validity
628
+ json_schema = context.get("json_schema")
629
+ checks.append(self.check_json_parseable(text, json_schema))
630
+
631
+ # Run custom checks
632
+ for check_fn in self._custom_checks:
633
+ try:
634
+ result = check_fn(text, context)
635
+ if result is not None:
636
+ checks.append(result)
637
+ except Exception as e:
638
+ checks.append(CheckResult(
639
+ name="custom_check",
640
+ passed=False,
641
+ message=f"Custom check error: {e}",
642
+ ))
643
+
644
+ return DeterministicCheckResults.from_checks(checks)