parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
parishad/roles/base.py ADDED
@@ -0,0 +1,712 @@
1
+ """
2
+ Base classes and types for Parishad roles.
3
+
4
+ All functional roles (Refiner, Planner, Worker, Checker, Judge) inherit from
5
+ the abstract Role class defined here.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from abc import ABC, abstractmethod
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from enum import Enum
14
+ from pathlib import Path
15
+ from typing import Any, Optional, Dict
16
+ import json
17
+ import uuid
18
+ import logging
19
+
20
+ # Schema validation disabled
21
+ ROLE_SCHEMA = None
22
+ SCHEMA_VALIDATION_AVAILABLE = False
23
+
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def validate_role_output(data: Dict[str, Any]) -> Dict[str, Any]:
29
+ """
30
+ Validate role output against JSON schema.
31
+
32
+ Args:
33
+ data: Role output dict to validate
34
+
35
+ Returns:
36
+ Dict with 'ok' (bool) and optional 'error' (str) keys
37
+ """
38
+ if not SCHEMA_VALIDATION_AVAILABLE or not ROLE_SCHEMA:
39
+ return {"ok": True, "warning": "Schema validation not available"}
40
+
41
+ try:
42
+ # Create a schema that includes both envelope and definitions for $ref resolution
43
+ envelope_schema = ROLE_SCHEMA.get("definitions", {}).get("envelope", {})
44
+ if envelope_schema:
45
+ # Build a complete schema with definitions for $ref resolution
46
+ full_schema = {
47
+ **envelope_schema,
48
+ "definitions": ROLE_SCHEMA.get("definitions", {})
49
+ }
50
+ jsonschema.validate(instance=data, schema=full_schema)
51
+ return {"ok": True}
52
+ except jsonschema.ValidationError as e:
53
+ error_msg = f"Schema validation failed: {e.message}"
54
+ if e.path:
55
+ error_msg += f" at path: {'.'.join(str(p) for p in e.path)}"
56
+ return {"ok": False, "error": error_msg}
57
+ except Exception as e:
58
+ return {"ok": False, "error": f"Validation error: {str(e)}"}
59
+
60
+
61
+ class Slot(Enum):
62
+ """Model slot sizes for heterogeneous council."""
63
+ SMALL = "small" # 2-4B: Refiner, Checker
64
+ MID = "mid" # 7-13B: Worker
65
+ BIG = "big" # 13-34B: Planner, Judge
66
+
67
+
68
+ class TaskType(Enum):
69
+ """Types of tasks Parishad can handle."""
70
+ CODE = "code"
71
+ MATH = "math"
72
+ QA = "qa"
73
+ EXPLANATION = "explanation"
74
+ CREATIVE = "creative"
75
+ ANALYSIS = "analysis"
76
+
77
+
78
+ class Difficulty(Enum):
79
+ """Task difficulty for routing decisions."""
80
+ EASY = "easy"
81
+ MEDIUM = "medium"
82
+ HARD = "hard"
83
+
84
+
85
+ class OutputFormat(Enum):
86
+ """Expected output format types."""
87
+ CODE = "code"
88
+ TEXT = "text"
89
+ NUMERIC = "numeric"
90
+ STRUCTURED = "structured"
91
+ MIXED = "mixed"
92
+
93
+
94
+ @dataclass
95
+ class RoleMetadata:
96
+ """Metadata about a role execution."""
97
+ tokens_used: int = 0
98
+ latency_ms: int = 0
99
+ model_id: str = ""
100
+ slot: Slot = Slot.MID
101
+ timestamp: datetime = field(default_factory=datetime.now)
102
+ duration_ms: int = 0 # Added for Task 5
103
+ schema_warning: Optional[str] = None # Added for Task 2
104
+ # Phase-3 Task 2: Truncation tracking for Judge
105
+ worker_truncated: bool = False
106
+ checker_truncated: bool = False
107
+
108
+ def to_dict(self) -> dict[str, Any]:
109
+ result = {
110
+ "tokens_used": self.tokens_used,
111
+ "latency_ms": self.latency_ms,
112
+ "model_id": self.model_id,
113
+ "slot": self.slot.value,
114
+ "timestamp": self.timestamp.isoformat()
115
+ }
116
+ if self.duration_ms > 0:
117
+ result["duration_ms"] = self.duration_ms
118
+ if self.schema_warning:
119
+ result["schema_warning"] = self.schema_warning
120
+ if self.worker_truncated:
121
+ result["worker_truncated"] = self.worker_truncated
122
+ if self.checker_truncated:
123
+ result["checker_truncated"] = self.checker_truncated
124
+ return result
125
+
126
+
127
+ @dataclass
128
+ class RoleInput:
129
+ """Standard input structure for roles."""
130
+ user_query: str
131
+ context: dict[str, Any] = field(default_factory=dict)
132
+ metadata: dict[str, Any] = field(default_factory=dict) # Phase-3: routing metadata
133
+
134
+ # Previous role outputs (populated by orchestrator)
135
+ task_spec: Optional[dict] = None
136
+ plan: Optional[dict] = None
137
+ candidate: Optional[dict] = None
138
+ verdict: Optional[dict] = None
139
+
140
+ def to_dict(self) -> dict[str, Any]:
141
+ return {
142
+ "user_query": self.user_query,
143
+ "context": self.context,
144
+ "metadata": self.metadata,
145
+ "task_spec": self.task_spec,
146
+ "plan": self.plan,
147
+ "candidate": self.candidate,
148
+ "verdict": self.verdict
149
+ }
150
+
151
+
152
+ @dataclass
153
+ class RoleOutput:
154
+ """Standard output structure from roles."""
155
+ role: str
156
+ status: str # "success", "error", "partial"
157
+ core_output: dict[str, Any]
158
+ metadata: RoleMetadata = field(default_factory=RoleMetadata)
159
+ error: Optional[str] = None
160
+
161
+ def to_dict(self) -> dict[str, Any]:
162
+ return {
163
+ "role": self.role,
164
+ "status": self.status,
165
+ "output": self.core_output,
166
+ "metadata": self.metadata.to_dict(),
167
+ "error": self.error
168
+ }
169
+
170
+ def to_json(self) -> str:
171
+ return json.dumps(self.to_dict(), indent=2, default=str)
172
+
173
+
174
+ @dataclass
175
+ class TaskSpec:
176
+ """Structured task specification from Refiner."""
177
+ problem: str
178
+ constraints: list[str] = field(default_factory=list)
179
+ output_format: OutputFormat = OutputFormat.TEXT
180
+ difficulty_guess: Difficulty = Difficulty.MEDIUM
181
+ task_type: TaskType = TaskType.QA
182
+ key_concepts: list[str] = field(default_factory=list)
183
+
184
+ def to_dict(self) -> dict[str, Any]:
185
+ return {
186
+ "problem": self.problem,
187
+ "constraints": self.constraints,
188
+ "output_format": self.output_format.value,
189
+ "difficulty_guess": self.difficulty_guess.value,
190
+ "task_type": self.task_type.value,
191
+ "key_concepts": self.key_concepts
192
+ }
193
+
194
+ @classmethod
195
+ def from_dict(cls, data: dict[str, Any]) -> "TaskSpec":
196
+ return cls(
197
+ problem=data.get("problem", ""),
198
+ constraints=data.get("constraints", []),
199
+ output_format=OutputFormat(data.get("output_format", "text")),
200
+ difficulty_guess=Difficulty(data.get("difficulty_guess", "medium")),
201
+ task_type=TaskType(data.get("task_type", "qa")),
202
+ key_concepts=data.get("key_concepts", [])
203
+ )
204
+
205
+
206
+ @dataclass
207
+ class PlanStep:
208
+ """A single step in a plan."""
209
+ id: int
210
+ description: str
211
+ rationale: str = ""
212
+ expected_output: str = ""
213
+ depends_on: list[int] = field(default_factory=list)
214
+
215
+ def to_dict(self) -> dict[str, Any]:
216
+ return {
217
+ "id": self.id,
218
+ "description": self.description,
219
+ "rationale": self.rationale,
220
+ "expected_output": self.expected_output,
221
+ "depends_on": self.depends_on
222
+ }
223
+
224
+
225
+ @dataclass
226
+ class Plan:
227
+ """Structured plan from Planner."""
228
+ steps: list[PlanStep]
229
+ checkpoints: list[int] = field(default_factory=list)
230
+ expected_output_type: str = "text"
231
+ complexity_estimate: str = "moderate"
232
+ suggested_approach: str = ""
233
+
234
+ def to_dict(self) -> dict[str, Any]:
235
+ return {
236
+ "steps": [s.to_dict() for s in self.steps],
237
+ "checkpoints": self.checkpoints,
238
+ "expected_output_type": self.expected_output_type,
239
+ "complexity_estimate": self.complexity_estimate,
240
+ "suggested_approach": self.suggested_approach
241
+ }
242
+
243
+ @classmethod
244
+ def from_dict(cls, data: dict[str, Any]) -> "Plan":
245
+ steps = [
246
+ PlanStep(
247
+ id=s.get("id", i+1),
248
+ description=s.get("description", ""),
249
+ rationale=s.get("rationale", ""),
250
+ expected_output=s.get("expected_output", ""),
251
+ depends_on=s.get("depends_on", [])
252
+ )
253
+ for i, s in enumerate(data.get("steps", []))
254
+ ]
255
+ return cls(
256
+ steps=steps,
257
+ checkpoints=data.get("checkpoints", []),
258
+ expected_output_type=data.get("expected_output_type", "text"),
259
+ complexity_estimate=data.get("complexity_estimate", "moderate"),
260
+ suggested_approach=data.get("suggested_approach", "")
261
+ )
262
+
263
+
264
+ @dataclass
265
+ class Candidate:
266
+ """Candidate output from Worker."""
267
+ content: str
268
+ content_type: str = "text"
269
+ language: Optional[str] = None # For code
270
+ target_file: Optional[str] = None # Target file for write operations
271
+ reasoning_trace: list[str] = field(default_factory=list)
272
+ confidence: float = 0.5
273
+ warnings: list[str] = field(default_factory=list)
274
+
275
+ tool_calls: list[dict[str, Any]] = field(default_factory=list)
276
+
277
+ def to_dict(self) -> dict[str, Any]:
278
+ return {
279
+ "content": self.content,
280
+ "content_type": self.content_type,
281
+ "language": self.language,
282
+ "target_file": self.target_file,
283
+ "reasoning_trace": self.reasoning_trace,
284
+ "confidence": self.confidence,
285
+ "warnings": self.warnings,
286
+ "tool_calls": self.tool_calls
287
+ }
288
+
289
+ @classmethod
290
+ def from_dict(cls, data: dict[str, Any]) -> "Candidate":
291
+ return cls(
292
+ content=data.get("content", ""),
293
+ content_type=data.get("content_type", "text"),
294
+ language=data.get("language"),
295
+ target_file=data.get("target_file"),
296
+ reasoning_trace=data.get("reasoning_trace", []),
297
+ confidence=data.get("confidence", 0.5),
298
+ warnings=data.get("warnings", []),
299
+ tool_calls=data.get("tool_calls", [])
300
+ )
301
+
302
+
303
+ @dataclass
304
+ class CheckerFlag:
305
+ """A flag raised by the Checker."""
306
+ type: str
307
+ severity: str # low, medium, high, critical
308
+ detail: str
309
+ location: Optional[str] = None
310
+ suggested_fix: Optional[str] = None
311
+
312
+ def to_dict(self) -> dict[str, Any]:
313
+ return {
314
+ "type": self.type,
315
+ "severity": self.severity,
316
+ "detail": self.detail,
317
+ "location": self.location,
318
+ "suggested_fix": self.suggested_fix
319
+ }
320
+
321
+
322
+ @dataclass
323
+ class Evidence:
324
+ """Evidence item from Checker verification."""
325
+ source: str
326
+ source_type: str # retrieval, deterministic, llm_judgment
327
+ snippet: str = ""
328
+ relevance_score: float = 0.0
329
+ supports_claim: bool = True
330
+
331
+ def to_dict(self) -> dict[str, Any]:
332
+ return {
333
+ "source": self.source,
334
+ "source_type": self.source_type,
335
+ "snippet": self.snippet,
336
+ "relevance_score": self.relevance_score,
337
+ "supports_claim": self.supports_claim
338
+ }
339
+
340
+
341
+ @dataclass
342
+ class Verdict:
343
+ """Checker verdict on Worker output."""
344
+ flags: list[CheckerFlag] = field(default_factory=list)
345
+ must_fix: bool = False
346
+ evidence: list[Evidence] = field(default_factory=list)
347
+ suggested_edits: list[str] = field(default_factory=list)
348
+ overall_confidence: float = 0.5
349
+ checks_performed: list[str] = field(default_factory=list)
350
+
351
+ def to_dict(self) -> dict[str, Any]:
352
+ return {
353
+ "flags": [f.to_dict() for f in self.flags],
354
+ "must_fix": self.must_fix,
355
+ "evidence": [e.to_dict() for e in self.evidence],
356
+ "suggested_edits": self.suggested_edits,
357
+ "overall_confidence": self.overall_confidence,
358
+ "checks_performed": self.checks_performed
359
+ }
360
+
361
+ @classmethod
362
+ def from_dict(cls, data: dict[str, Any]) -> "Verdict":
363
+ flags = [
364
+ CheckerFlag(
365
+ type=f.get("type", "unknown"),
366
+ severity=f.get("severity", "low"),
367
+ detail=f.get("detail", ""),
368
+ location=f.get("location"),
369
+ suggested_fix=f.get("suggested_fix")
370
+ )
371
+ for f in data.get("flags", [])
372
+ ]
373
+ evidence = [
374
+ Evidence(
375
+ source=e.get("source", ""),
376
+ source_type=e.get("source_type", "unknown"),
377
+ snippet=e.get("snippet", ""),
378
+ relevance_score=e.get("relevance_score", 0.0),
379
+ supports_claim=e.get("supports_claim", True)
380
+ )
381
+ for e in data.get("evidence", [])
382
+ ]
383
+ return cls(
384
+ flags=flags,
385
+ must_fix=data.get("must_fix", False),
386
+ evidence=evidence,
387
+ suggested_edits=data.get("suggested_edits", []),
388
+ overall_confidence=data.get("overall_confidence", 0.5),
389
+ checks_performed=data.get("checks_performed", [])
390
+ )
391
+
392
+
393
+ @dataclass
394
+ class FinalAnswer:
395
+ """Final answer from Judge."""
396
+ final_answer: str
397
+ answer_type: str = "text"
398
+ rationale: str = ""
399
+ confidence: float = 0.5
400
+ caveats: list[str] = field(default_factory=list)
401
+ sources_used: list[str] = field(default_factory=list)
402
+ numeric_answer: Optional[float] = None # For math problems
403
+ code_block: Optional[str] = None # For code problems
404
+
405
+ def to_dict(self) -> dict[str, Any]:
406
+ return {
407
+ "final_answer": self.final_answer,
408
+ "answer_type": self.answer_type,
409
+ "rationale": self.rationale,
410
+ "confidence": self.confidence,
411
+ "caveats": self.caveats,
412
+ "sources_used": self.sources_used,
413
+ "numeric_answer": self.numeric_answer,
414
+ "code_block": self.code_block
415
+ }
416
+
417
+ @classmethod
418
+ def from_dict(cls, data: dict[str, Any]) -> "FinalAnswer":
419
+ return cls(
420
+ final_answer=data.get("final_answer", ""),
421
+ answer_type=data.get("answer_type", "text"),
422
+ rationale=data.get("rationale", ""),
423
+ confidence=data.get("confidence", 0.5),
424
+ caveats=data.get("caveats", []),
425
+ sources_used=data.get("sources_used", []),
426
+ numeric_answer=data.get("numeric_answer"),
427
+ code_block=data.get("code_block")
428
+ )
429
+
430
+
431
+ @dataclass
432
+ class Trace:
433
+ """Complete execution trace for a Parishad run."""
434
+ query_id: str = field(default_factory=lambda: str(uuid.uuid4()))
435
+ config: str = "core"
436
+ timestamp: datetime = field(default_factory=datetime.now)
437
+ user_query: str = ""
438
+ total_tokens: int = 0
439
+ total_latency_ms: int = 0
440
+ budget_initial: int = 8000
441
+ budget_remaining: int = 8000
442
+ roles: list[RoleOutput] = field(default_factory=list)
443
+ retries: int = 0
444
+ final_answer: Optional[FinalAnswer] = None
445
+ success: bool = True
446
+ error: Optional[str] = None
447
+ budget_exceeded: bool = False # Flag indicating budget was exceeded during execution
448
+ budget_enforcement_triggered: bool = False # True if roles were skipped due to budget
449
+ skipped_roles: list[dict] = field(default_factory=list) # Roles skipped (with reason)
450
+ validation_errors: list[str] = field(default_factory=list) # Roles with validation errors
451
+
452
+ def to_dict(self) -> dict[str, Any]:
453
+ result = {
454
+ "query_id": self.query_id,
455
+ "config": self.config,
456
+ "timestamp": self.timestamp.isoformat(),
457
+ "user_query": self.user_query,
458
+ "total_tokens": self.total_tokens,
459
+ "total_latency_ms": self.total_latency_ms,
460
+ "budget_initial": self.budget_initial,
461
+ "budget_remaining": self.budget_remaining,
462
+ "roles": [r.to_dict() for r in self.roles],
463
+ "retries": self.retries,
464
+ "final_answer": self.final_answer.to_dict() if self.final_answer else None,
465
+ "success": self.success,
466
+ "error": self.error,
467
+ "budget_exceeded": self.budget_exceeded,
468
+ }
469
+ # Only include if triggered/non-empty
470
+ if self.budget_enforcement_triggered:
471
+ result["budget_enforcement_triggered"] = True
472
+ if self.skipped_roles:
473
+ result["skipped_roles"] = self.skipped_roles
474
+ if self.validation_errors:
475
+ result["validation_errors"] = self.validation_errors
476
+ return result
477
+
478
+ def to_json(self) -> str:
479
+ return json.dumps(self.to_dict(), indent=2, default=str)
480
+
481
+ def add_role_output(self, output: RoleOutput) -> None:
482
+ """Add a role output and update totals."""
483
+ self.roles.append(output)
484
+ self.total_tokens += output.metadata.tokens_used
485
+ self.total_latency_ms += output.metadata.latency_ms
486
+ self.budget_remaining -= output.metadata.tokens_used
487
+
488
+
489
+ class Role(ABC):
490
+ """
491
+ Abstract base class for all Parishad roles.
492
+
493
+ Each role:
494
+ - Has a default slot (SMALL, MID, BIG)
495
+ - Has a system prompt template
496
+ - Produces structured JSON output
497
+ - Can be invoked with a RoleInput and returns a RoleOutput
498
+ """
499
+
500
+ name: str = "base"
501
+ default_slot: Slot = Slot.MID
502
+
503
+ def __init__(
504
+ self,
505
+ model_runner: Any, # ModelRunner instance
506
+ slot: Optional[Slot] = None,
507
+ max_tokens: int = 1024,
508
+ temperature: float = 0.5
509
+ ):
510
+ self.model_runner = model_runner
511
+ self.slot = slot or self.default_slot
512
+ self.max_tokens = max_tokens
513
+ self.temperature = temperature
514
+
515
+ @property
516
+ @abstractmethod
517
+ def system_prompt(self) -> str:
518
+ """System prompt for this role."""
519
+ pass
520
+
521
+ @abstractmethod
522
+ def format_input(self, role_input: RoleInput) -> str:
523
+ """Format the role input into a user message."""
524
+ pass
525
+
526
+ @abstractmethod
527
+ def parse_output(self, raw_output: str) -> dict[str, Any]:
528
+ """Parse the raw LLM output into structured data."""
529
+ pass
530
+
531
+ def __call__(self, role_input: RoleInput) -> RoleOutput:
532
+ """Execute this role."""
533
+ import time
534
+ from ..models.runner import ModelRunnerError, UnknownSlotError, ModelBackendError
535
+
536
+ start_time = time.perf_counter()
537
+ tokens_from_backend = 0 # Track tokens separately to preserve them even on parse error
538
+ raw_output_for_debug = "" # Save for error reporting
539
+ model_id_from_backend = None
540
+
541
+ try:
542
+ # Format input
543
+ user_message = self.format_input(role_input)
544
+
545
+ # Call model - wrap backend errors
546
+ try:
547
+ raw_output, tokens_used, model_id = self.model_runner.generate(
548
+ system_prompt=self.system_prompt,
549
+ user_message=user_message,
550
+ slot=self.slot,
551
+ max_tokens=self.max_tokens,
552
+ temperature=self.temperature
553
+ )
554
+ # Save these in case parse_output fails
555
+ tokens_from_backend = tokens_used
556
+ raw_output_for_debug = raw_output[:500] # First 500 chars for debugging
557
+ model_id_from_backend = model_id
558
+ except (UnknownSlotError, ModelBackendError) as e:
559
+ # Normalize backend errors into RoleOutput
560
+ latency_ms = int((time.perf_counter() - start_time) * 1000)
561
+ logger.error(f"Backend error in {self.name}: {e}")
562
+
563
+ error_type = "unknown_slot" if isinstance(e, UnknownSlotError) else "backend_error"
564
+
565
+ return RoleOutput(
566
+ role=self.name,
567
+ status="error",
568
+ core_output={
569
+ "error_type": error_type,
570
+ "error_message": str(e),
571
+ "backend_error": True
572
+ },
573
+ metadata=RoleMetadata(
574
+ latency_ms=latency_ms,
575
+ slot=self.slot,
576
+ tokens_used=0
577
+ ),
578
+ error=str(e)
579
+ )
580
+
581
+ # Parse output - wrap to preserve tokens even if this fails
582
+ try:
583
+ core_output = self.parse_output(raw_output)
584
+ except Exception as parse_error:
585
+ # Parse failed, but backend DID generate - preserve tokens!
586
+ logger.error(
587
+ f"Parse error in {self.name}: {parse_error}. "
588
+ f"Backend generated {tokens_from_backend} tokens. "
589
+ f"Raw output preview: {raw_output_for_debug}"
590
+ )
591
+ latency_ms = int((time.perf_counter() - start_time) * 1000)
592
+
593
+ return RoleOutput(
594
+ role=self.name,
595
+ status="error",
596
+ core_output={
597
+ "error_type": "parse_error",
598
+ "error_message": str(parse_error),
599
+ "raw_output_preview": raw_output_for_debug
600
+ },
601
+ metadata=RoleMetadata(
602
+ latency_ms=latency_ms,
603
+ slot=self.slot,
604
+ tokens_used=tokens_from_backend, # PRESERVE TOKENS!
605
+ model_id=model_id_from_backend
606
+ ),
607
+ error=f"Parse error: {parse_error}"
608
+ )
609
+
610
+ latency_ms = int((time.perf_counter() - start_time) * 1000)
611
+
612
+ # Build metadata
613
+ metadata = RoleMetadata(
614
+ tokens_used=tokens_from_backend,
615
+ latency_ms=latency_ms,
616
+ model_id=model_id_from_backend,
617
+ slot=self.slot
618
+ )
619
+
620
+ # Create output
621
+ output = RoleOutput(
622
+ role=self.name,
623
+ status="success",
624
+ core_output=core_output,
625
+ metadata=metadata
626
+ )
627
+
628
+ # Soft schema validation - don't fail, just warn
629
+ validation_result = validate_role_output({
630
+ "role": output.role,
631
+ "status": output.status,
632
+ "output": output.core_output,
633
+ "metadata": metadata.to_dict()
634
+ })
635
+
636
+ if not validation_result.get("ok", True):
637
+ # Add schema warning to metadata but keep status as success
638
+ logger.warning(
639
+ f"Schema validation failed for {self.name}: "
640
+ f"{validation_result.get('error', 'Unknown error')}"
641
+ )
642
+ # Store warning in metadata by converting to dict, updating, and creating new metadata
643
+ metadata_dict = metadata.to_dict()
644
+ metadata_dict["schema_warning"] = validation_result.get("error", "Validation failed")
645
+ # Reconstruct with warning
646
+ output.metadata = RoleMetadata(
647
+ tokens_used=tokens_from_backend,
648
+ latency_ms=latency_ms,
649
+ model_id=model_id_from_backend,
650
+ slot=self.slot,
651
+ timestamp=metadata.timestamp
652
+ )
653
+
654
+ return output
655
+
656
+ except Exception as e:
657
+ latency_ms = int((time.perf_counter() - start_time) * 1000)
658
+ logger.error(f"Role {self.name} failed: {e}")
659
+ return RoleOutput(
660
+ role=self.name,
661
+ status="error",
662
+ core_output={},
663
+ metadata=RoleMetadata(
664
+ latency_ms=latency_ms,
665
+ slot=self.slot,
666
+ tokens_used=tokens_from_backend # Preserve tokens even on total failure
667
+ ),
668
+ error=str(e)
669
+ )
670
+
671
+ def _extract_json(self, text: str) -> dict[str, Any]:
672
+ """
673
+ Extract JSON from LLM output.
674
+
675
+ Handles cases where JSON is wrapped in markdown code blocks.
676
+ """
677
+ import re
678
+
679
+ # Try to find JSON in code blocks first
680
+ json_pattern = r'```(?:json)?\s*\n?([\s\S]*?)\n?```'
681
+ matches = re.findall(json_pattern, text)
682
+
683
+ for match in matches:
684
+ try:
685
+ return json.loads(match.strip())
686
+ except json.JSONDecodeError:
687
+ continue
688
+
689
+ # Try to parse the entire text as JSON
690
+ try:
691
+ return json.loads(text.strip())
692
+ except json.JSONDecodeError:
693
+ pass
694
+
695
+ # Try to find JSON-like structure
696
+ brace_pattern = r'\{[\s\S]*\}'
697
+ matches = re.findall(brace_pattern, text)
698
+
699
+ for match in matches:
700
+ try:
701
+ return json.loads(match)
702
+ except json.JSONDecodeError:
703
+ # Fallback: Try ast.literal_eval for Python-style dicts (common in weaker models)
704
+ try:
705
+ import ast
706
+ # Only safe evaluation
707
+ return ast.literal_eval(match)
708
+ except (ValueError, SyntaxError):
709
+ continue
710
+
711
+ # Return raw text as fallback
712
+ return {"raw_output": text}