parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1113 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Orchestrator engine for Parishad council pipeline.
|
|
3
|
+
|
|
4
|
+
Executes role graphs with budget tracking and retry logic.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
import uuid
|
|
16
|
+
|
|
17
|
+
import yaml
|
|
18
|
+
|
|
19
|
+
from ..models.runner import ModelRunner, ModelConfig
|
|
20
|
+
from ..roles.base import (
|
|
21
|
+
Role,
|
|
22
|
+
RoleInput,
|
|
23
|
+
RoleOutput,
|
|
24
|
+
Trace,
|
|
25
|
+
FinalAnswer,
|
|
26
|
+
Slot,
|
|
27
|
+
)
|
|
28
|
+
from ..roles import (
|
|
29
|
+
Darbari, Majumdar, Sainik, Prerak, Raja,
|
|
30
|
+
Pantapradhan, SarSenapati, Sacheev, Dandadhyaksha,
|
|
31
|
+
Vidushak,
|
|
32
|
+
)
|
|
33
|
+
from .config_loader import load_pipeline_config, RoleSpec
|
|
34
|
+
from .exceptions import InvalidPipelineConfigError
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Registry mapping role class names to Role classes
|
|
41
|
+
# Used by config-driven pipeline execution
|
|
42
|
+
ROLE_REGISTRY: dict[str, type[Role]] = {
|
|
43
|
+
# Core roles
|
|
44
|
+
"Darbari": Darbari,
|
|
45
|
+
"Majumdar": Majumdar,
|
|
46
|
+
"Sainik": Sainik,
|
|
47
|
+
"Prerak": Prerak,
|
|
48
|
+
"Raja": Raja,
|
|
49
|
+
# Extended roles
|
|
50
|
+
"Pantapradhan": Pantapradhan,
|
|
51
|
+
"SarSenapati": SarSenapati,
|
|
52
|
+
"Sacheev": Sacheev,
|
|
53
|
+
"Dandadhyaksha": Dandadhyaksha,
|
|
54
|
+
"Vidushak": Vidushak,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class Budget:
|
|
60
|
+
"""Runtime budget tracker for token and cost management."""
|
|
61
|
+
max_tokens: int = 8000
|
|
62
|
+
max_cost: float = 1.0
|
|
63
|
+
used_tokens: int = 0
|
|
64
|
+
used_cost: float = 0.0
|
|
65
|
+
|
|
66
|
+
def spend(self, tokens: int = 0, cost: float = 0.0) -> None:
|
|
67
|
+
"""Record spending of tokens and cost."""
|
|
68
|
+
self.used_tokens += tokens
|
|
69
|
+
self.used_cost += cost
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def remaining_tokens(self) -> int:
|
|
73
|
+
"""Get remaining token budget."""
|
|
74
|
+
return max(0, self.max_tokens - self.used_tokens)
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def remaining_cost(self) -> float:
|
|
78
|
+
"""Get remaining cost budget."""
|
|
79
|
+
return max(0.0, self.max_cost - self.used_cost)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def is_exceeded(self) -> bool:
|
|
83
|
+
"""Check if budget is exceeded."""
|
|
84
|
+
return self.used_tokens > self.max_tokens or self.used_cost > self.max_cost
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def token_percent_used(self) -> float:
|
|
88
|
+
"""Get percentage of token budget used."""
|
|
89
|
+
if self.max_tokens == 0:
|
|
90
|
+
return 100.0
|
|
91
|
+
return (self.used_tokens / self.max_tokens) * 100
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def cost_percent_used(self) -> float:
|
|
95
|
+
"""Get percentage of cost budget used."""
|
|
96
|
+
if self.max_cost == 0:
|
|
97
|
+
return 100.0
|
|
98
|
+
return (self.used_cost / self.max_cost) * 100
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class BudgetConfig:
|
|
103
|
+
"""Configuration for token budget management."""
|
|
104
|
+
max_tokens_per_query: int = 8000
|
|
105
|
+
min_budget_for_retry: int = 1500
|
|
106
|
+
|
|
107
|
+
role_budgets: dict[str, int] = field(default_factory=lambda: {
|
|
108
|
+
"darbari": 600,
|
|
109
|
+
"majumdar": 1200,
|
|
110
|
+
"sainik": 2500,
|
|
111
|
+
"prerak": 1000,
|
|
112
|
+
"raja": 1800,
|
|
113
|
+
"reserve": 900
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class RetryConfig:
|
|
119
|
+
"""Configuration for retry logic."""
|
|
120
|
+
enabled: bool = True
|
|
121
|
+
max_retries: int = 1
|
|
122
|
+
retry_roles: list[str] = field(default_factory=lambda: ["sainik"])
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class DifficultyRouting:
|
|
127
|
+
"""Configuration for difficulty-based model routing."""
|
|
128
|
+
enabled: bool = True
|
|
129
|
+
easy_planner_slot: str = "mid"
|
|
130
|
+
easy_judge_slot: str = "mid"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class PipelineConfig:
|
|
135
|
+
"""Complete pipeline configuration."""
|
|
136
|
+
name: str = "parishad-core"
|
|
137
|
+
version: str = "0.1.0"
|
|
138
|
+
|
|
139
|
+
roles: dict[str, dict] = field(default_factory=dict)
|
|
140
|
+
pipeline: list[str] = field(default_factory=lambda: [
|
|
141
|
+
"darbari", "majumdar", "sainik", "prerak", "raja"
|
|
142
|
+
])
|
|
143
|
+
|
|
144
|
+
budget: BudgetConfig = field(default_factory=BudgetConfig)
|
|
145
|
+
retry: RetryConfig = field(default_factory=RetryConfig)
|
|
146
|
+
difficulty_routing: DifficultyRouting = field(default_factory=DifficultyRouting)
|
|
147
|
+
|
|
148
|
+
@classmethod
|
|
149
|
+
def from_yaml(cls, path: str | Path) -> "PipelineConfig":
|
|
150
|
+
"""Load configuration from YAML file."""
|
|
151
|
+
with open(path) as f:
|
|
152
|
+
data = yaml.safe_load(f)
|
|
153
|
+
|
|
154
|
+
budget_data = data.get("budget", {})
|
|
155
|
+
budget = BudgetConfig(
|
|
156
|
+
max_tokens_per_query=budget_data.get("max_tokens_per_query", 8000),
|
|
157
|
+
min_budget_for_retry=budget_data.get("min_budget_for_retry", 1500),
|
|
158
|
+
role_budgets=budget_data.get("role_budgets", {})
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
retry_data = data.get("retry", {})
|
|
162
|
+
retry = RetryConfig(
|
|
163
|
+
enabled=retry_data.get("enabled", True),
|
|
164
|
+
max_retries=retry_data.get("max_retries", 1),
|
|
165
|
+
retry_roles=retry_data.get("retry_roles", ["sainik"])
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
routing_data = data.get("difficulty_routing", {})
|
|
169
|
+
routing = DifficultyRouting(
|
|
170
|
+
enabled=routing_data.get("enabled", True),
|
|
171
|
+
easy_planner_slot=routing_data.get("rules", {}).get("easy", {}).get("planner_slot", "mid"),
|
|
172
|
+
easy_judge_slot=routing_data.get("rules", {}).get("easy", {}).get("judge_slot", "mid")
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return cls(
|
|
176
|
+
name=data.get("name", "parishad-core"),
|
|
177
|
+
version=data.get("version", "0.1.0"),
|
|
178
|
+
roles=data.get("roles", {}),
|
|
179
|
+
pipeline=data.get("pipeline", []),
|
|
180
|
+
budget=budget,
|
|
181
|
+
retry=retry,
|
|
182
|
+
difficulty_routing=routing
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class ExecutionContext:
|
|
188
|
+
"""Context maintained during pipeline execution."""
|
|
189
|
+
query_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
190
|
+
user_query: str = ""
|
|
191
|
+
|
|
192
|
+
# Budget tracking
|
|
193
|
+
budget_initial: int = 8000
|
|
194
|
+
budget_remaining: int = 8000
|
|
195
|
+
tokens_used: int = 0
|
|
196
|
+
|
|
197
|
+
# Core role outputs
|
|
198
|
+
task_spec: Optional[dict] = None
|
|
199
|
+
plan: Optional[dict] = None
|
|
200
|
+
candidate: Optional[dict] = None
|
|
201
|
+
verdict: Optional[dict] = None
|
|
202
|
+
final_answer: Optional[dict] = None
|
|
203
|
+
|
|
204
|
+
# Extended role outputs
|
|
205
|
+
plan_high: Optional[dict] = None # From Pantapradhan
|
|
206
|
+
plan_exec: Optional[dict] = None # From SarSenapati
|
|
207
|
+
verdict_fact: Optional[dict] = None # From Sacheev
|
|
208
|
+
verdict_safety: Optional[dict] = None # From Dandadhyaksha
|
|
209
|
+
|
|
210
|
+
# Retry tracking
|
|
211
|
+
retry_count: int = 0
|
|
212
|
+
|
|
213
|
+
# Trace
|
|
214
|
+
role_outputs: list[RoleOutput] = field(default_factory=list)
|
|
215
|
+
|
|
216
|
+
# Budget enforcement tracking
|
|
217
|
+
skipped_roles: list[dict] = field(default_factory=list)
|
|
218
|
+
validation_errors: list[str] = field(default_factory=list)
|
|
219
|
+
budget_enforcement_triggered: bool = False
|
|
220
|
+
budget_exceeded: bool = False
|
|
221
|
+
|
|
222
|
+
# Routing decision (Task 1 - Router integration)
|
|
223
|
+
routing_decision: Optional[RoutingDecision] = None
|
|
224
|
+
|
|
225
|
+
def use_tokens(self, tokens: int) -> None:
|
|
226
|
+
"""Record token usage."""
|
|
227
|
+
self.tokens_used += tokens
|
|
228
|
+
self.budget_remaining -= tokens
|
|
229
|
+
|
|
230
|
+
def has_budget(self, min_required: int = 0) -> bool:
|
|
231
|
+
"""Check if we have enough budget."""
|
|
232
|
+
return self.budget_remaining >= min_required
|
|
233
|
+
|
|
234
|
+
def to_role_input(self) -> RoleInput:
|
|
235
|
+
"""Create RoleInput from current context."""
|
|
236
|
+
# Build extended context dict for Extended pipeline roles
|
|
237
|
+
extended_context = {}
|
|
238
|
+
if self.plan_high is not None:
|
|
239
|
+
extended_context["plan_high"] = self.plan_high
|
|
240
|
+
if self.plan_exec is not None:
|
|
241
|
+
extended_context["plan_exec"] = self.plan_exec
|
|
242
|
+
if self.verdict_fact is not None:
|
|
243
|
+
extended_context["verdict_fact"] = self.verdict_fact
|
|
244
|
+
if self.verdict_safety is not None:
|
|
245
|
+
extended_context["verdict_safety"] = self.verdict_safety
|
|
246
|
+
|
|
247
|
+
# Phase-3 Task 1: Add routing decision to metadata (not context) for roles to access
|
|
248
|
+
metadata = {}
|
|
249
|
+
if self.routing_decision is not None:
|
|
250
|
+
metadata["routing"] = {
|
|
251
|
+
"config_name": self.routing_decision.config_name,
|
|
252
|
+
"mode": self.routing_decision.mode,
|
|
253
|
+
"allow_retry": self.routing_decision.allow_retry,
|
|
254
|
+
"checker_mode": self.routing_decision.checker_mode,
|
|
255
|
+
"truncation_policy": self.routing_decision.truncation_policy,
|
|
256
|
+
"max_tokens": self.routing_decision.max_tokens,
|
|
257
|
+
"per_role_max_tokens": self.routing_decision.per_role_max_tokens,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return RoleInput(
|
|
261
|
+
user_query=self.user_query,
|
|
262
|
+
task_spec=self.task_spec,
|
|
263
|
+
plan=self.plan or self.plan_exec, # Extended uses plan_exec as plan
|
|
264
|
+
candidate=self.candidate,
|
|
265
|
+
verdict=self.verdict,
|
|
266
|
+
context=extended_context,
|
|
267
|
+
metadata=metadata,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class ParishadEngine:
|
|
272
|
+
"""
|
|
273
|
+
Main orchestrator engine for Parishad council.
|
|
274
|
+
|
|
275
|
+
Executes the role pipeline with budget tracking, difficulty routing,
|
|
276
|
+
and retry logic.
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
def __init__(
|
|
280
|
+
self,
|
|
281
|
+
model_config: Optional[ModelConfig] = None,
|
|
282
|
+
pipeline_config: Optional[PipelineConfig] = None,
|
|
283
|
+
model_runner: Optional[ModelRunner] = None,
|
|
284
|
+
trace_dir: Optional[str | Path] = None,
|
|
285
|
+
strict_validation: bool = False,
|
|
286
|
+
enforce_budget: bool = False,
|
|
287
|
+
mode: str = "balanced",
|
|
288
|
+
user_forced_config: Optional[str] = None,
|
|
289
|
+
**kwargs # Ignore legacy mock/stub args
|
|
290
|
+
):
|
|
291
|
+
"""
|
|
292
|
+
Initialize the Parishad engine.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
model_config: Configuration for model slots
|
|
296
|
+
pipeline_config: Configuration for pipeline execution
|
|
297
|
+
model_runner: Pre-configured ModelRunner (optional)
|
|
298
|
+
trace_dir: Directory to save execution traces
|
|
299
|
+
strict_validation: If True, set status="error" when schema validation
|
|
300
|
+
fails instead of soft warning (default: False)
|
|
301
|
+
enforce_budget: If True, skip optional roles when budget is low
|
|
302
|
+
(default: False)
|
|
303
|
+
mode: Execution mode ("auto"|"fast"|"balanced"|"thorough")
|
|
304
|
+
:param user_forced_config: Dict with slot overrides (model_id, backend_type)
|
|
305
|
+
"""
|
|
306
|
+
# 1. Resolve ModelConfig
|
|
307
|
+
# Check for profile in kwargs (passed by CLI)
|
|
308
|
+
profile = kwargs.get("profile")
|
|
309
|
+
if not model_config and profile:
|
|
310
|
+
try:
|
|
311
|
+
# Assuming standard config path or None (defaults)
|
|
312
|
+
model_config = ModelConfig.from_profile(profile)
|
|
313
|
+
except Exception as e:
|
|
314
|
+
import logging
|
|
315
|
+
logging.getLogger(__name__).warning(f"Failed to load profile '{profile}': {e}")
|
|
316
|
+
|
|
317
|
+
self.model_config = model_config or ModelConfig()
|
|
318
|
+
|
|
319
|
+
# 2. Apply user_forced_config overrides
|
|
320
|
+
# Ensure it's a dict (handling potential type hint mismatch)
|
|
321
|
+
if user_forced_config and isinstance(user_forced_config, dict):
|
|
322
|
+
import copy
|
|
323
|
+
for slot_name, overrides in user_forced_config.items():
|
|
324
|
+
if slot_name in self.model_config.slots:
|
|
325
|
+
# Deepcopy to prevent shared references (e.g. from YAML anchors)
|
|
326
|
+
# causing overrides to one slot affecting others.
|
|
327
|
+
slot_cfg = copy.deepcopy(self.model_config.slots[slot_name])
|
|
328
|
+
|
|
329
|
+
if "model_id" in overrides:
|
|
330
|
+
slot_cfg.model_id = overrides["model_id"]
|
|
331
|
+
# Default to None, override if provided
|
|
332
|
+
slot_cfg.model_file = overrides.get("model_file")
|
|
333
|
+
if "backend_type" in overrides:
|
|
334
|
+
slot_cfg.backend = overrides["backend_type"] # Correct field name is 'backend'
|
|
335
|
+
if "model_file" in overrides:
|
|
336
|
+
slot_cfg.model_file = overrides["model_file"]
|
|
337
|
+
|
|
338
|
+
# Update config with new object
|
|
339
|
+
self.model_config.slots[slot_name] = slot_cfg
|
|
340
|
+
else:
|
|
341
|
+
# Create new slot if it doesn't exist
|
|
342
|
+
from ..models.runner import SlotConfig
|
|
343
|
+
backend_type = overrides.get("backend_type", "ollama")
|
|
344
|
+
model_id = overrides.get("model_id", "")
|
|
345
|
+
model_file = overrides.get("model_file")
|
|
346
|
+
|
|
347
|
+
# Use defaults for other fields
|
|
348
|
+
slot_cfg = SlotConfig(
|
|
349
|
+
model_id=model_id,
|
|
350
|
+
backend=backend_type,
|
|
351
|
+
model_file=model_file, # Pass explicit model file
|
|
352
|
+
context_length=32768, # Force high context defaults for local models
|
|
353
|
+
default_max_tokens=2048,
|
|
354
|
+
default_temperature=0.7
|
|
355
|
+
)
|
|
356
|
+
self.model_config.slots[slot_name] = slot_cfg
|
|
357
|
+
|
|
358
|
+
self.pipeline_config = pipeline_config or PipelineConfig()
|
|
359
|
+
|
|
360
|
+
# Validation and budget enforcement flags
|
|
361
|
+
self.strict_validation = strict_validation
|
|
362
|
+
self.enforce_budget = enforce_budget
|
|
363
|
+
|
|
364
|
+
# Router integration (Task 1)
|
|
365
|
+
self.mode = mode # "auto" | "fast" | "balanced" | "thorough"
|
|
366
|
+
self.user_forced_config = user_forced_config # Config explicitly set by user
|
|
367
|
+
|
|
368
|
+
# Use provided runner or create new one
|
|
369
|
+
self.model_runner = model_runner or ModelRunner(
|
|
370
|
+
config=self.model_config
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
self.trace_dir = Path(trace_dir) if trace_dir else None
|
|
374
|
+
if self.trace_dir:
|
|
375
|
+
self.trace_dir.mkdir(parents=True, exist_ok=True)
|
|
376
|
+
|
|
377
|
+
# Store cwd for tools
|
|
378
|
+
self.cwd = kwargs.get("cwd") or Path.cwd()
|
|
379
|
+
|
|
380
|
+
# Initialize roles
|
|
381
|
+
self._init_roles()
|
|
382
|
+
|
|
383
|
+
def _init_roles(self) -> None:
|
|
384
|
+
"""Initialize role instances."""
|
|
385
|
+
role_configs = self.pipeline_config.roles
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# Tools initialization (Phase 13)
|
|
389
|
+
from ..tools.fs import FileSystemTool
|
|
390
|
+
from ..tools.shell import ShellTool
|
|
391
|
+
from ..tools.perception import PerceptionTool
|
|
392
|
+
|
|
393
|
+
# Use stored cwd or default
|
|
394
|
+
cwd = self.cwd
|
|
395
|
+
self.fs_tool = FileSystemTool(working_directory=str(cwd))
|
|
396
|
+
self.shell_tool = ShellTool(safe_mode=True) # Safe mode by default
|
|
397
|
+
|
|
398
|
+
# Configure Perception with Vision Model (Phase 13)
|
|
399
|
+
vision_slot_name = "vision" if "vision" in self.model_config.slots else "small"
|
|
400
|
+
vision_slot = self.model_config.slots.get(vision_slot_name)
|
|
401
|
+
|
|
402
|
+
perception_config = None
|
|
403
|
+
if vision_slot:
|
|
404
|
+
# Construct config for MarkItDown (uses OpenAI client)
|
|
405
|
+
# Ollama provides OpenAI compatible API at /v1
|
|
406
|
+
base_url = vision_slot.extra.get("base_url", "http://localhost:11434/v1")
|
|
407
|
+
# Ensure base_url ends with /v1 for OpenAI client compatibility if using Ollama
|
|
408
|
+
if "localhost" in base_url and "/v1" not in base_url:
|
|
409
|
+
base_url = f"{base_url.rstrip('/')}/v1"
|
|
410
|
+
|
|
411
|
+
perception_config = {
|
|
412
|
+
"base_url": base_url,
|
|
413
|
+
"api_key": "ollama",
|
|
414
|
+
"model": vision_slot.model_file or vision_slot.model_id or "llava"
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
self.perception_tool = PerceptionTool(llm_config=perception_config)
|
|
418
|
+
|
|
419
|
+
self.darbari = Darbari(
|
|
420
|
+
model_runner=self.model_runner,
|
|
421
|
+
**self._get_role_kwargs("darbari", role_configs.get("darbari", {}))
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
self.majumdar = Majumdar(
|
|
425
|
+
model_runner=self.model_runner,
|
|
426
|
+
**self._get_role_kwargs("majumdar", role_configs.get("majumdar", {}))
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
self.sainik = Sainik(
|
|
430
|
+
model_runner=self.model_runner,
|
|
431
|
+
tools=[self.fs_tool, self.shell_tool, self.perception_tool],
|
|
432
|
+
**self._get_role_kwargs("sainik", role_configs.get("sainik", {}))
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
self.prerak = Prerak(
|
|
436
|
+
model_runner=self.model_runner,
|
|
437
|
+
tools=role_configs.get("prerak", {}).get("tools", []),
|
|
438
|
+
**self._get_role_kwargs("prerak", role_configs.get("prerak", {}))
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
self.raja = Raja(
|
|
442
|
+
model_runner=self.model_runner,
|
|
443
|
+
**self._get_role_kwargs("raja", role_configs.get("raja", {}))
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
def _get_role_kwargs(self, role_name: str, config: dict) -> dict:
|
|
447
|
+
"""Extract role initialization kwargs from config."""
|
|
448
|
+
kwargs = {}
|
|
449
|
+
|
|
450
|
+
if "slot" in config:
|
|
451
|
+
slot_name = config["slot"]
|
|
452
|
+
kwargs["slot"] = Slot(slot_name)
|
|
453
|
+
|
|
454
|
+
if "max_tokens" in config:
|
|
455
|
+
kwargs["max_tokens"] = config["max_tokens"]
|
|
456
|
+
|
|
457
|
+
if "temperature" in config:
|
|
458
|
+
kwargs["temperature"] = config["temperature"]
|
|
459
|
+
|
|
460
|
+
return kwargs
|
|
461
|
+
|
|
462
|
+
def _load_pipeline(self, config_name: str = "core") -> list[RoleSpec]:
|
|
463
|
+
"""Load pipeline configuration from YAML."""
|
|
464
|
+
return load_pipeline_config(config_name)
|
|
465
|
+
|
|
466
|
+
def _get_context_updates_for_role(self, role_name: str) -> dict[str, str]:
|
|
467
|
+
"""
|
|
468
|
+
Return context field mapping for a role.
|
|
469
|
+
"""
|
|
470
|
+
mappings = {
|
|
471
|
+
# Core
|
|
472
|
+
"darbari": {"task_spec": "core_output"},
|
|
473
|
+
"majumdar": {"plan": "core_output"},
|
|
474
|
+
"sainik": {"candidate": "core_output"},
|
|
475
|
+
"prerak": {"verdict": "core_output"},
|
|
476
|
+
"raja": {"final_answer": "core_output"},
|
|
477
|
+
|
|
478
|
+
# Extended
|
|
479
|
+
"pantapradhan": {"plan_high": "core_output"},
|
|
480
|
+
"sar_senapati": {"plan_exec": "core_output", "plan": "core_output"},
|
|
481
|
+
"sacheev": {"verdict_fact": "core_output"},
|
|
482
|
+
"dandadhyaksha": {"verdict_safety": "core_output"},
|
|
483
|
+
}
|
|
484
|
+
return mappings.get(role_name, {})
|
|
485
|
+
|
|
486
|
+
def _is_optional_role(self, role_name: str) -> bool:
|
|
487
|
+
"""Determine if a role is optional."""
|
|
488
|
+
optional_roles = {
|
|
489
|
+
"sacheev",
|
|
490
|
+
"dandadhyaksha",
|
|
491
|
+
}
|
|
492
|
+
return role_name in optional_roles
|
|
493
|
+
|
|
494
|
+
def _estimate_role_tokens(self, role_name: str, role_spec: Optional[RoleSpec] = None) -> int:
|
|
495
|
+
"""Estimate token usage for a role based on configuration."""
|
|
496
|
+
if role_spec and role_spec.budget_tokens > 0:
|
|
497
|
+
return role_spec.budget_tokens
|
|
498
|
+
|
|
499
|
+
role_budgets = self.pipeline_config.budget.role_budgets
|
|
500
|
+
|
|
501
|
+
# Use configured budget if available
|
|
502
|
+
if role_name in role_budgets:
|
|
503
|
+
return role_budgets[role_name]
|
|
504
|
+
|
|
505
|
+
# Default estimates
|
|
506
|
+
default_estimates = {
|
|
507
|
+
"pantapradhan": 1000,
|
|
508
|
+
"sar_senapati": 800,
|
|
509
|
+
"sacheev": 600,
|
|
510
|
+
"dandadhyaksha": 400,
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
return default_estimates.get(role_name, 500)
|
|
514
|
+
|
|
515
|
+
def _get_role_instance(self, role_name: str, role_spec: Optional[RoleSpec] = None) -> Role:
|
|
516
|
+
"""Get the role instance by name."""
|
|
517
|
+
# Core roles (pre-initialized)
|
|
518
|
+
core_roles = {
|
|
519
|
+
"darbari": self.darbari,
|
|
520
|
+
"majumdar": self.majumdar,
|
|
521
|
+
"sainik": self.sainik,
|
|
522
|
+
"prerak": self.prerak,
|
|
523
|
+
"raja": self.raja,
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
if role_name in core_roles:
|
|
527
|
+
return core_roles[role_name]
|
|
528
|
+
|
|
529
|
+
# Extended roles (dynamically instantiated)
|
|
530
|
+
if not hasattr(self, "_extended_roles"):
|
|
531
|
+
self._extended_roles: dict[str, Role] = {}
|
|
532
|
+
|
|
533
|
+
if role_name in self._extended_roles:
|
|
534
|
+
return self._extended_roles[role_name]
|
|
535
|
+
|
|
536
|
+
# Instantiate extended role
|
|
537
|
+
extended_class_map = {
|
|
538
|
+
"pantapradhan": Pantapradhan,
|
|
539
|
+
"sar_senapati": SarSenapati,
|
|
540
|
+
"sacheev": Sacheev,
|
|
541
|
+
"dandadhyaksha": Dandadhyaksha,
|
|
542
|
+
"vidushak": Vidushak,
|
|
543
|
+
# Aliases for compatibility if needed
|
|
544
|
+
"sainik_code": Sainik,
|
|
545
|
+
"sainik_text": Sainik,
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
if role_name not in extended_class_map:
|
|
549
|
+
raise KeyError(f"Unknown role: {role_name}")
|
|
550
|
+
|
|
551
|
+
role_class = extended_class_map[role_name]
|
|
552
|
+
|
|
553
|
+
# Get base config
|
|
554
|
+
role_config = self.pipeline_config.roles.get(role_name, {}).copy()
|
|
555
|
+
|
|
556
|
+
# Override with spec if provided
|
|
557
|
+
if role_spec:
|
|
558
|
+
if role_spec.slot:
|
|
559
|
+
role_config["slot"] = role_spec.slot
|
|
560
|
+
if role_spec.max_tokens:
|
|
561
|
+
role_config["max_tokens"] = role_spec.max_tokens
|
|
562
|
+
if role_spec.temperature:
|
|
563
|
+
role_config["temperature"] = role_spec.temperature
|
|
564
|
+
# Merge extra config (tools, etc)
|
|
565
|
+
if role_spec.extra_config:
|
|
566
|
+
role_config.update(role_spec.extra_config)
|
|
567
|
+
|
|
568
|
+
# Build kwargs for role initialization
|
|
569
|
+
kwargs = self._get_role_kwargs(role_name, role_config)
|
|
570
|
+
|
|
571
|
+
# Handle tools for checker roles
|
|
572
|
+
if role_name in ("sacheev", "dandadhyaksha") and "tools" in role_config:
|
|
573
|
+
kwargs["tools"] = role_config["tools"]
|
|
574
|
+
|
|
575
|
+
role_instance = role_class(model_runner=self.model_runner, **kwargs)
|
|
576
|
+
self._extended_roles[role_name] = role_instance
|
|
577
|
+
|
|
578
|
+
return role_instance
|
|
579
|
+
|
|
580
|
+
def _run_role(
|
|
581
|
+
self,
|
|
582
|
+
role: Role,
|
|
583
|
+
role_name: str,
|
|
584
|
+
ctx: ExecutionContext,
|
|
585
|
+
context_updates: dict[str, str] | None = None
|
|
586
|
+
) -> RoleOutput:
|
|
587
|
+
"""Execute a role with consistent logging and context management."""
|
|
588
|
+
import time
|
|
589
|
+
from ..roles.base import validate_role_output
|
|
590
|
+
|
|
591
|
+
logger.debug(f"Starting role: {role_name} (budget: {ctx.budget_remaining})")
|
|
592
|
+
|
|
593
|
+
start_time = time.perf_counter()
|
|
594
|
+
|
|
595
|
+
# Build role input from context
|
|
596
|
+
role_input = ctx.to_role_input()
|
|
597
|
+
|
|
598
|
+
# Add any special context for this role (e.g., retry context)
|
|
599
|
+
# Note: 'sainik' handles retry
|
|
600
|
+
if role_name == "sainik" and ctx.retry_count > 0 and ctx.verdict:
|
|
601
|
+
role_input.context["is_retry"] = True
|
|
602
|
+
# Truncate previous output to avoid large memory copies
|
|
603
|
+
prev_content = ctx.candidate.get("content", "") if ctx.candidate else ""
|
|
604
|
+
role_input.context["previous_output"] = prev_content[:1024] if len(prev_content) > 1024 else prev_content
|
|
605
|
+
role_input.context["checker_feedback"] = ctx.verdict
|
|
606
|
+
|
|
607
|
+
# Execute role
|
|
608
|
+
output = role(role_input)
|
|
609
|
+
|
|
610
|
+
# Add duration to metadata
|
|
611
|
+
duration_ms = int((time.perf_counter() - start_time) * 1000)
|
|
612
|
+
output.metadata.duration_ms = duration_ms
|
|
613
|
+
|
|
614
|
+
# Log completion at appropriate level
|
|
615
|
+
logger.debug(
|
|
616
|
+
f"Role {role_name} completed: status={output.status}, "
|
|
617
|
+
f"tokens={output.metadata.tokens_used}, duration={duration_ms}ms"
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
# Strict validation check
|
|
621
|
+
if self.strict_validation and output.status == "success":
|
|
622
|
+
validation_result = validate_role_output({
|
|
623
|
+
"role": output.role,
|
|
624
|
+
"status": output.status,
|
|
625
|
+
"output": output.core_output,
|
|
626
|
+
"metadata": output.metadata.to_dict()
|
|
627
|
+
})
|
|
628
|
+
|
|
629
|
+
if not validation_result.get("ok", True):
|
|
630
|
+
error_msg = validation_result.get("error", "Schema validation failed")
|
|
631
|
+
logger.error(f"Strict validation failed for {role_name}: {error_msg}")
|
|
632
|
+
|
|
633
|
+
# Convert to error status
|
|
634
|
+
output.status = "error"
|
|
635
|
+
output.error = f"Schema validation failed: {error_msg}"
|
|
636
|
+
ctx.validation_errors.append(role_name)
|
|
637
|
+
|
|
638
|
+
# Update context
|
|
639
|
+
ctx.role_outputs.append(output)
|
|
640
|
+
ctx.use_tokens(output.metadata.tokens_used)
|
|
641
|
+
|
|
642
|
+
# Update context fields based on role output
|
|
643
|
+
if output.status == "success" and context_updates:
|
|
644
|
+
for ctx_field, output_field in context_updates.items():
|
|
645
|
+
if output_field == "core_output":
|
|
646
|
+
setattr(ctx, ctx_field, output.core_output)
|
|
647
|
+
else:
|
|
648
|
+
setattr(ctx, ctx_field, output.core_output.get(output_field))
|
|
649
|
+
logger.debug(f"Updated context.{ctx_field} from {role_name}")
|
|
650
|
+
|
|
651
|
+
# Phase 13: File Writing Capability
|
|
652
|
+
# Check if Sainik wants to write a file
|
|
653
|
+
if role_name == "sainik" and output.status == "success":
|
|
654
|
+
# Handle dictionary (raw output)
|
|
655
|
+
target_file = output.core_output.get("target_file")
|
|
656
|
+
content = output.core_output.get("content")
|
|
657
|
+
|
|
658
|
+
if target_file and content:
|
|
659
|
+
try:
|
|
660
|
+
# Use FS tool to write
|
|
661
|
+
logger.info(f"Writing file {target_file} via Sainik")
|
|
662
|
+
|
|
663
|
+
# Simple content write
|
|
664
|
+
result = self.fs_tool.run("write", path=target_file, content=content)
|
|
665
|
+
|
|
666
|
+
if not result.success:
|
|
667
|
+
logger.error(f"Failed to write file {target_file}: {result.error}")
|
|
668
|
+
output.error = f"File write failed: {result.error}"
|
|
669
|
+
# Optionally mark partial success?
|
|
670
|
+
else:
|
|
671
|
+
logger.info(f"Successfully wrote {target_file}")
|
|
672
|
+
|
|
673
|
+
except Exception as e:
|
|
674
|
+
logger.error(f"Error handling file write for {target_file}: {e}")
|
|
675
|
+
output.error = f"File write exception: {str(e)}"
|
|
676
|
+
|
|
677
|
+
# Phase 13: General Tool Execution (Agentic)
|
|
678
|
+
if role_name == "sainik" and output.status == "success":
|
|
679
|
+
tool_calls = output.core_output.get("tool_calls", [])
|
|
680
|
+
for call in tool_calls:
|
|
681
|
+
tool_name = call.get("tool")
|
|
682
|
+
action = call.get("action")
|
|
683
|
+
args = call.get("args", {})
|
|
684
|
+
|
|
685
|
+
if not tool_name or not action:
|
|
686
|
+
continue
|
|
687
|
+
|
|
688
|
+
logger.info(f"Executing tool {tool_name}.{action} with args {args}")
|
|
689
|
+
|
|
690
|
+
# Resolve tool instance
|
|
691
|
+
tool_instance = None
|
|
692
|
+
if tool_name == "file_system":
|
|
693
|
+
tool_instance = self.fs_tool
|
|
694
|
+
elif tool_name == "shell":
|
|
695
|
+
tool_instance = self.shell_tool
|
|
696
|
+
elif tool_name == "perception":
|
|
697
|
+
tool_instance = self.perception_tool
|
|
698
|
+
|
|
699
|
+
if tool_instance:
|
|
700
|
+
try:
|
|
701
|
+
result = tool_instance.run(action, **args)
|
|
702
|
+
logger.info(f"Tool {tool_name} result: {result.success}")
|
|
703
|
+
if not result.success:
|
|
704
|
+
logger.warning(f"Tool failure: {result.message if hasattr(result, 'message') else result.error}")
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logger.error(f"Error executing tool {tool_name}: {e}")
|
|
707
|
+
|
|
708
|
+
return output
|
|
709
|
+
|
|
710
|
+
def run(self, query: str, config: str = "core", max_tokens: int | None = None) -> Trace:
|
|
711
|
+
"""Execute the Parishad council pipeline on a user query."""
|
|
712
|
+
# Initialize context
|
|
713
|
+
budget = max_tokens or self.pipeline_config.budget.max_tokens_per_query
|
|
714
|
+
ctx = ExecutionContext(
|
|
715
|
+
user_query=query,
|
|
716
|
+
budget_initial=budget,
|
|
717
|
+
budget_remaining=budget
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
# Add soft budget tracking
|
|
721
|
+
budget_exceeded = False
|
|
722
|
+
|
|
723
|
+
# Log at info level with truncated query for privacy/memory
|
|
724
|
+
query_preview = query[:100] + "..." if len(query) > 100 else query
|
|
725
|
+
logger.info(f"Parishad run started: id={ctx.query_id}, config={config}, budget={budget}")
|
|
726
|
+
|
|
727
|
+
try:
|
|
728
|
+
# Load pipeline configuration
|
|
729
|
+
try:
|
|
730
|
+
role_specs = self._load_pipeline(config)
|
|
731
|
+
except InvalidPipelineConfigError as e:
|
|
732
|
+
logger.error(f"Invalid pipeline configuration: {e}")
|
|
733
|
+
raise RuntimeError(f"Pipeline configuration error: {e}") from e
|
|
734
|
+
|
|
735
|
+
# Execute pipeline: config-driven loop over all roles
|
|
736
|
+
for idx, role_spec in enumerate(role_specs):
|
|
737
|
+
role_name = role_spec.name.lower() # Ensure lowercase for lookups
|
|
738
|
+
|
|
739
|
+
# Budget enforcement check
|
|
740
|
+
if self.enforce_budget and self._is_optional_role(role_name):
|
|
741
|
+
estimated_tokens = self._estimate_role_tokens(role_name, role_spec)
|
|
742
|
+
if not ctx.has_budget(estimated_tokens):
|
|
743
|
+
logger.info(
|
|
744
|
+
f"Budget enforcement: skipping optional role {role_name} "
|
|
745
|
+
f"(need ~{estimated_tokens} tokens, have {ctx.budget_remaining})"
|
|
746
|
+
)
|
|
747
|
+
ctx.skipped_roles.append({
|
|
748
|
+
"role": role_name,
|
|
749
|
+
"reason": "budget_exceeded",
|
|
750
|
+
"tokens_needed": estimated_tokens,
|
|
751
|
+
"tokens_available": ctx.budget_remaining
|
|
752
|
+
})
|
|
753
|
+
ctx.budget_enforcement_triggered = True
|
|
754
|
+
ctx.budget_exceeded = True
|
|
755
|
+
continue
|
|
756
|
+
|
|
757
|
+
role_instance = self._get_role_instance(role_name, role_spec)
|
|
758
|
+
context_updates = self._get_context_updates_for_role(role_name)
|
|
759
|
+
|
|
760
|
+
output = self._run_role(role_instance, role_name, ctx, context_updates)
|
|
761
|
+
|
|
762
|
+
# Router integration: DISABLED due to missing route_policy function
|
|
763
|
+
# if idx == 0 and output.status == "success":
|
|
764
|
+
# # Build global_config for routing
|
|
765
|
+
# effective_forced_config = self.user_forced_config
|
|
766
|
+
#
|
|
767
|
+
# global_config = {
|
|
768
|
+
# "mode": self.mode,
|
|
769
|
+
# "config": effective_forced_config, # None if user didn't force --config
|
|
770
|
+
# "no_retry": not self.pipeline_config.retry.enabled,
|
|
771
|
+
# "profile": getattr(self.model_runner, "profile", None),
|
|
772
|
+
# }
|
|
773
|
+
#
|
|
774
|
+
# # Call Router to get adaptive decision
|
|
775
|
+
# # decision = route_policy(
|
|
776
|
+
# # output.core_output,
|
|
777
|
+
# # query,
|
|
778
|
+
# # global_config
|
|
779
|
+
# # )
|
|
780
|
+
#
|
|
781
|
+
# # Store decision in context for roles to access
|
|
782
|
+
# # ctx.routing_decision = decision
|
|
783
|
+
#
|
|
784
|
+
# # Apply routing decision if user hasn't forced a config
|
|
785
|
+
# # should_apply_routing = (
|
|
786
|
+
# # not effective_forced_config and
|
|
787
|
+
# # self.mode != "balanced" # Balanced mode with no CLI override = keep run() param
|
|
788
|
+
# # )
|
|
789
|
+
#
|
|
790
|
+
# # if should_apply_routing:
|
|
791
|
+
# # new_config = decision.config_name
|
|
792
|
+
# # if new_config and new_config != config:
|
|
793
|
+
# # logger.info(
|
|
794
|
+
# # f"Router selected pipeline: {new_config} "
|
|
795
|
+
# # f"(mode={self.mode}, task={output.core_output.get('task_type', 'unknown')})"
|
|
796
|
+
# # )
|
|
797
|
+
# # # Reload pipeline with new config and skip re-running first role
|
|
798
|
+
# config = new_config
|
|
799
|
+
# new_role_specs = self._load_pipeline(config)
|
|
800
|
+
# # Continue with remaining roles from new pipeline
|
|
801
|
+
# if len(new_role_specs) > 1:
|
|
802
|
+
# role_specs = role_specs[:idx+1] + new_role_specs[1:]
|
|
803
|
+
|
|
804
|
+
# # Apply retry setting from Router
|
|
805
|
+
# original_retry = self.pipeline_config.retry.enabled
|
|
806
|
+
# self.pipeline_config.retry.enabled = decision.allow_retry
|
|
807
|
+
# if not decision.allow_retry and original_retry:
|
|
808
|
+
# logger.info(f"Router disabled retry for this query")
|
|
809
|
+
|
|
810
|
+
# # Apply budget from Router (soft limit)
|
|
811
|
+
# if decision.max_tokens and decision.max_tokens < ctx.budget_initial:
|
|
812
|
+
# ctx.budget_initial = decision.max_tokens
|
|
813
|
+
# logger.debug(f"Router adjusted budget to {decision.max_tokens} tokens")
|
|
814
|
+
|
|
815
|
+
# logger.debug(
|
|
816
|
+
# f"Routing decision: pipeline={decision.config_name}, "
|
|
817
|
+
# f"checker_mode={decision.checker_mode}, "
|
|
818
|
+
# f"truncation={decision.truncation_policy}"
|
|
819
|
+
# )
|
|
820
|
+
|
|
821
|
+
# Strict validation: stop pipeline if role failed validation
|
|
822
|
+
if self.strict_validation and output.status == "error":
|
|
823
|
+
logger.error(f"Strict validation: stopping pipeline after {role_name} error")
|
|
824
|
+
raise RuntimeError(f"Role {role_name} failed validation: {output.error}")
|
|
825
|
+
|
|
826
|
+
# Check soft budget after each role
|
|
827
|
+
if max_tokens and ctx.tokens_used > max_tokens:
|
|
828
|
+
logger.warning(f"Soft token budget exceeded after {role_name}: {ctx.tokens_used}/{max_tokens}")
|
|
829
|
+
ctx.budget_exceeded = True
|
|
830
|
+
budget_exceeded = True
|
|
831
|
+
|
|
832
|
+
# Check for retry after checker (Prerak)
|
|
833
|
+
if role_name == "prerak" and self._should_retry(ctx):
|
|
834
|
+
# Budget enforcement: skip retry if budget is low
|
|
835
|
+
if self.enforce_budget:
|
|
836
|
+
min_retry_budget = self.pipeline_config.budget.min_budget_for_retry
|
|
837
|
+
if not ctx.has_budget(min_retry_budget):
|
|
838
|
+
logger.info(
|
|
839
|
+
f"Budget enforcement: skipping retry "
|
|
840
|
+
f"(need {min_retry_budget} tokens, have {ctx.budget_remaining})"
|
|
841
|
+
)
|
|
842
|
+
ctx.skipped_roles.append({
|
|
843
|
+
"role": "retry",
|
|
844
|
+
"reason": "budget_exceeded",
|
|
845
|
+
"tokens_needed": min_retry_budget,
|
|
846
|
+
"tokens_available": ctx.budget_remaining
|
|
847
|
+
})
|
|
848
|
+
ctx.budget_enforcement_triggered = True
|
|
849
|
+
ctx.budget_exceeded = True
|
|
850
|
+
continue
|
|
851
|
+
|
|
852
|
+
logger.info(f"Retrying Sainik (attempt {ctx.retry_count + 1})")
|
|
853
|
+
ctx.retry_count += 1
|
|
854
|
+
|
|
855
|
+
# Re-run sainik and prerak
|
|
856
|
+
sainik_instance = self._get_role_instance("sainik")
|
|
857
|
+
sainik_updates = self._get_context_updates_for_role("sainik")
|
|
858
|
+
self._run_role(sainik_instance, "sainik", ctx, sainik_updates)
|
|
859
|
+
|
|
860
|
+
prerak_instance = self._get_role_instance("prerak")
|
|
861
|
+
prerak_updates = self._get_context_updates_for_role("prerak")
|
|
862
|
+
self._run_role(prerak_instance, "prerak", ctx, prerak_updates)
|
|
863
|
+
|
|
864
|
+
if max_tokens and ctx.tokens_used > max_tokens:
|
|
865
|
+
budget_exceeded = True
|
|
866
|
+
|
|
867
|
+
success = True
|
|
868
|
+
error = None
|
|
869
|
+
|
|
870
|
+
except Exception as e:
|
|
871
|
+
logger.error(f"Pipeline error: {e}", exc_info=True)
|
|
872
|
+
success = False
|
|
873
|
+
error = str(e)
|
|
874
|
+
budget_exceeded = False # Error takes precedence
|
|
875
|
+
|
|
876
|
+
# Build trace
|
|
877
|
+
trace = self._build_trace(ctx, success, error)
|
|
878
|
+
|
|
879
|
+
# Add budget exceeded flag to trace if applicable
|
|
880
|
+
if budget_exceeded and success:
|
|
881
|
+
logger.warning(f"Pipeline completed but exceeded token budget: {ctx.tokens_used}/{max_tokens}")
|
|
882
|
+
|
|
883
|
+
# Save trace if configured
|
|
884
|
+
if self.trace_dir:
|
|
885
|
+
self._save_trace(trace)
|
|
886
|
+
|
|
887
|
+
logger.info(
|
|
888
|
+
f"Parishad run complete: {ctx.query_id} "
|
|
889
|
+
f"(tokens: {ctx.tokens_used}/{budget}, success: {success})"
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
return trace
|
|
893
|
+
|
|
894
|
+
def _should_retry(self, ctx: ExecutionContext) -> bool:
|
|
895
|
+
"""Determine if we should retry the Sainik."""
|
|
896
|
+
if not self.pipeline_config.retry.enabled:
|
|
897
|
+
return False
|
|
898
|
+
|
|
899
|
+
if ctx.retry_count >= self.pipeline_config.retry.max_retries:
|
|
900
|
+
return False
|
|
901
|
+
|
|
902
|
+
if not ctx.verdict:
|
|
903
|
+
return False
|
|
904
|
+
|
|
905
|
+
if not ctx.verdict.get("must_fix", False):
|
|
906
|
+
return False
|
|
907
|
+
|
|
908
|
+
min_budget = self.pipeline_config.budget.min_budget_for_retry
|
|
909
|
+
if not ctx.has_budget(min_budget):
|
|
910
|
+
logger.info("Insufficient budget for retry")
|
|
911
|
+
return False
|
|
912
|
+
|
|
913
|
+
return True
|
|
914
|
+
|
|
915
|
+
def _build_trace(
|
|
916
|
+
self,
|
|
917
|
+
ctx: ExecutionContext,
|
|
918
|
+
success: bool,
|
|
919
|
+
error: Optional[str]
|
|
920
|
+
) -> Trace:
|
|
921
|
+
"""Build execution trace from context."""
|
|
922
|
+
final_answer = None
|
|
923
|
+
if ctx.final_answer:
|
|
924
|
+
final_answer = FinalAnswer.from_dict(ctx.final_answer)
|
|
925
|
+
elif ctx.candidate:
|
|
926
|
+
# If no Raja in pipeline, use Sainik's output as final answer
|
|
927
|
+
final_answer = FinalAnswer(
|
|
928
|
+
final_answer=ctx.candidate.get("content", ""),
|
|
929
|
+
answer_type=ctx.candidate.get("content_type", "text"),
|
|
930
|
+
confidence=ctx.candidate.get("confidence", 0.8),
|
|
931
|
+
rationale="\n".join(ctx.candidate.get("reasoning_trace", [])) if isinstance(ctx.candidate.get("reasoning_trace"), list) else str(ctx.candidate.get("reasoning_trace", "")),
|
|
932
|
+
caveats=ctx.candidate.get("warnings", []),
|
|
933
|
+
code_block=ctx.candidate.get("content", "") if ctx.candidate.get("content_type") == "code" else None,
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
return Trace(
|
|
937
|
+
query_id=ctx.query_id,
|
|
938
|
+
config=self.pipeline_config.name,
|
|
939
|
+
timestamp=datetime.now(),
|
|
940
|
+
user_query=ctx.user_query,
|
|
941
|
+
total_tokens=ctx.tokens_used,
|
|
942
|
+
total_latency_ms=sum(o.metadata.latency_ms for o in ctx.role_outputs),
|
|
943
|
+
budget_initial=ctx.budget_initial,
|
|
944
|
+
budget_remaining=ctx.budget_remaining,
|
|
945
|
+
roles=ctx.role_outputs,
|
|
946
|
+
retries=ctx.retry_count,
|
|
947
|
+
final_answer=final_answer,
|
|
948
|
+
success=success,
|
|
949
|
+
error=error,
|
|
950
|
+
budget_exceeded=ctx.budget_exceeded,
|
|
951
|
+
budget_enforcement_triggered=ctx.budget_enforcement_triggered,
|
|
952
|
+
skipped_roles=ctx.skipped_roles,
|
|
953
|
+
validation_errors=ctx.validation_errors,
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
def _save_trace(self, trace: Trace) -> None:
|
|
957
|
+
"""Save trace to file."""
|
|
958
|
+
if not self.trace_dir:
|
|
959
|
+
return
|
|
960
|
+
|
|
961
|
+
filename = f"trace_{trace.query_id}.json"
|
|
962
|
+
filepath = self.trace_dir / filename
|
|
963
|
+
|
|
964
|
+
with open(filepath, "w") as f:
|
|
965
|
+
f.write(trace.to_json())
|
|
966
|
+
|
|
967
|
+
logger.debug(f"Trace saved: {filepath}")
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
class Parishad:
|
|
971
|
+
"""
|
|
972
|
+
High-level API for running Parishad council.
|
|
973
|
+
|
|
974
|
+
This is the main entry point for users.
|
|
975
|
+
"""
|
|
976
|
+
|
|
977
|
+
def __init__(
|
|
978
|
+
self,
|
|
979
|
+
config: str = "core",
|
|
980
|
+
model_config: Optional[ModelConfig] = None,
|
|
981
|
+
model_config_path: Optional[str | Path] = None,
|
|
982
|
+
profile: Optional[str] = None,
|
|
983
|
+
pipeline_config_path: Optional[str | Path] = None,
|
|
984
|
+
trace_dir: Optional[str | Path] = None,
|
|
985
|
+
strict_validation: bool = False,
|
|
986
|
+
enforce_budget: bool = False,
|
|
987
|
+
mode: Optional[str] = None,
|
|
988
|
+
user_forced_config: Optional[str] = None,
|
|
989
|
+
no_retry: bool = False,
|
|
990
|
+
**kwargs # Ignore legacy mock/stub args
|
|
991
|
+
):
|
|
992
|
+
"""
|
|
993
|
+
Initialize Parishad.
|
|
994
|
+
|
|
995
|
+
Args:
|
|
996
|
+
config: Pipeline configuration ("core" or "extended")
|
|
997
|
+
model_config: Direct ModelConfig object (overrides model_config_path + profile)
|
|
998
|
+
model_config_path: Path to models.yaml (defaults to ~/.parishad/models.yaml if exists)
|
|
999
|
+
profile: Model profile to use (defaults to user config, fallback: "local_cpu")
|
|
1000
|
+
pipeline_config_path: Path to pipeline config YAML
|
|
1001
|
+
trace_dir: Directory to save traces
|
|
1002
|
+
strict_validation: If True, fail on schema validation errors
|
|
1003
|
+
enforce_budget: If True, skip optional roles when budget is low
|
|
1004
|
+
mode: Execution mode ("auto"|"fast"|"balanced"|"thorough") for adaptive routing (defaults to user config, fallback: "balanced")
|
|
1005
|
+
user_forced_config: Config explicitly set by user (overrides routing)
|
|
1006
|
+
no_retry: If True, disable Worker+Checker retry logic
|
|
1007
|
+
"""
|
|
1008
|
+
from ..config.user_config import load_user_config
|
|
1009
|
+
|
|
1010
|
+
self.config_name = config
|
|
1011
|
+
|
|
1012
|
+
# Load user config for defaults
|
|
1013
|
+
user_cfg = load_user_config()
|
|
1014
|
+
|
|
1015
|
+
# Apply defaults from user config if not explicitly provided
|
|
1016
|
+
if profile is None:
|
|
1017
|
+
profile = user_cfg.default_profile
|
|
1018
|
+
logger.debug(f"Using default profile from user config: {profile}")
|
|
1019
|
+
|
|
1020
|
+
if mode is None:
|
|
1021
|
+
mode = user_cfg.default_mode
|
|
1022
|
+
logger.debug(f"Using default mode from user config: {mode}")
|
|
1023
|
+
|
|
1024
|
+
# If model_config_path not provided, try to load from config.json
|
|
1025
|
+
if model_config_path is None:
|
|
1026
|
+
# Try unified config.json first (client-side approach)
|
|
1027
|
+
config_json_path = Path.home() / ".parishad" / "config.json"
|
|
1028
|
+
if config_json_path.exists():
|
|
1029
|
+
try:
|
|
1030
|
+
import json
|
|
1031
|
+
with open(config_json_path) as f:
|
|
1032
|
+
user_config_data = json.load(f)
|
|
1033
|
+
|
|
1034
|
+
session = user_config_data.get("session", {})
|
|
1035
|
+
model_settings = user_config_data.get("model_config", {})
|
|
1036
|
+
model_path = session.get("model")
|
|
1037
|
+
backend_name = session.get("backend", "llama_cpp")
|
|
1038
|
+
|
|
1039
|
+
if model_path:
|
|
1040
|
+
# Create ModelConfig directly from config.json
|
|
1041
|
+
from ..models.runner import SlotConfig, Backend
|
|
1042
|
+
|
|
1043
|
+
# Map all slots to the same model (Laghu Sabha approach)
|
|
1044
|
+
slot_config = SlotConfig(
|
|
1045
|
+
model_id=model_path,
|
|
1046
|
+
backend=Backend(backend_name) if backend_name in [e.value for e in Backend] else Backend.LLAMA_CPP,
|
|
1047
|
+
default_max_tokens=1024,
|
|
1048
|
+
default_temperature=0.5,
|
|
1049
|
+
extra={
|
|
1050
|
+
"n_gpu_layers": model_settings.get("n_gpu_layers", -1),
|
|
1051
|
+
"n_ctx": model_settings.get("n_ctx", 8192),
|
|
1052
|
+
}
|
|
1053
|
+
)
|
|
1054
|
+
|
|
1055
|
+
model_config = ModelConfig(
|
|
1056
|
+
slots={
|
|
1057
|
+
"small": slot_config,
|
|
1058
|
+
"mid": slot_config,
|
|
1059
|
+
"big": slot_config,
|
|
1060
|
+
}
|
|
1061
|
+
)
|
|
1062
|
+
logger.debug(f"Loaded model config from config.json: {model_path}")
|
|
1063
|
+
except Exception as e:
|
|
1064
|
+
logger.warning(f"Failed to load model config from config.json: {e}")
|
|
1065
|
+
|
|
1066
|
+
# Fall back to models.yaml if config.json didn't provide model config
|
|
1067
|
+
if model_config is None:
|
|
1068
|
+
user_models_path = Path.home() / ".parishad" / "models.yaml"
|
|
1069
|
+
if user_models_path.exists():
|
|
1070
|
+
model_config_path = user_models_path
|
|
1071
|
+
logger.debug(f"Using user models config: {model_config_path}")
|
|
1072
|
+
|
|
1073
|
+
# Load configurations
|
|
1074
|
+
# Use provided model_config or load from file
|
|
1075
|
+
if model_config is None and model_config_path:
|
|
1076
|
+
model_config = ModelConfig.from_profile(profile, model_config_path)
|
|
1077
|
+
|
|
1078
|
+
pipeline_config = None
|
|
1079
|
+
if pipeline_config_path:
|
|
1080
|
+
pipeline_config = PipelineConfig.from_yaml(pipeline_config_path)
|
|
1081
|
+
|
|
1082
|
+
# Task 4: Handle no_retry flag
|
|
1083
|
+
if pipeline_config and no_retry:
|
|
1084
|
+
pipeline_config.retry.enabled = False
|
|
1085
|
+
|
|
1086
|
+
# Create engine
|
|
1087
|
+
self.engine = ParishadEngine(
|
|
1088
|
+
model_config=model_config,
|
|
1089
|
+
pipeline_config=pipeline_config,
|
|
1090
|
+
trace_dir=trace_dir,
|
|
1091
|
+
strict_validation=strict_validation,
|
|
1092
|
+
enforce_budget=enforce_budget,
|
|
1093
|
+
mode=mode,
|
|
1094
|
+
user_forced_config=user_forced_config,
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
def run(self, query: str) -> Trace:
|
|
1098
|
+
"""
|
|
1099
|
+
Run a query through the Parishad council.
|
|
1100
|
+
|
|
1101
|
+
Args:
|
|
1102
|
+
query: User query to process
|
|
1103
|
+
|
|
1104
|
+
Returns:
|
|
1105
|
+
Complete execution trace
|
|
1106
|
+
"""
|
|
1107
|
+
return self.engine.run(query, config=self.config_name)
|
|
1108
|
+
|
|
1109
|
+
@property
|
|
1110
|
+
def final_answer(self) -> Optional[FinalAnswer]:
|
|
1111
|
+
"""Get the final answer from the last run."""
|
|
1112
|
+
# This would need to store the last trace
|
|
1113
|
+
return None
|