ouroboros-ai 0.2.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ouroboros-ai might be problematic. Click here for more details.
- ouroboros/__init__.py +1 -1
- ouroboros/bigbang/__init__.py +9 -0
- ouroboros/bigbang/interview.py +16 -18
- ouroboros/bigbang/ontology.py +180 -0
- ouroboros/cli/commands/__init__.py +2 -0
- ouroboros/cli/commands/init.py +162 -97
- ouroboros/cli/commands/mcp.py +161 -0
- ouroboros/cli/commands/run.py +165 -27
- ouroboros/cli/main.py +2 -1
- ouroboros/core/ontology_aspect.py +455 -0
- ouroboros/core/ontology_questions.py +462 -0
- ouroboros/evaluation/__init__.py +16 -1
- ouroboros/evaluation/consensus.py +569 -11
- ouroboros/evaluation/models.py +81 -0
- ouroboros/events/ontology.py +135 -0
- ouroboros/mcp/__init__.py +83 -0
- ouroboros/mcp/client/__init__.py +20 -0
- ouroboros/mcp/client/adapter.py +632 -0
- ouroboros/mcp/client/manager.py +600 -0
- ouroboros/mcp/client/protocol.py +161 -0
- ouroboros/mcp/errors.py +377 -0
- ouroboros/mcp/resources/__init__.py +22 -0
- ouroboros/mcp/resources/handlers.py +328 -0
- ouroboros/mcp/server/__init__.py +21 -0
- ouroboros/mcp/server/adapter.py +408 -0
- ouroboros/mcp/server/protocol.py +291 -0
- ouroboros/mcp/server/security.py +636 -0
- ouroboros/mcp/tools/__init__.py +24 -0
- ouroboros/mcp/tools/definitions.py +351 -0
- ouroboros/mcp/tools/registry.py +269 -0
- ouroboros/mcp/types.py +333 -0
- ouroboros/orchestrator/__init__.py +31 -0
- ouroboros/orchestrator/events.py +40 -0
- ouroboros/orchestrator/mcp_config.py +419 -0
- ouroboros/orchestrator/mcp_tools.py +483 -0
- ouroboros/orchestrator/runner.py +119 -2
- ouroboros/providers/claude_code_adapter.py +75 -0
- ouroboros/strategies/__init__.py +23 -0
- ouroboros/strategies/devil_advocate.py +197 -0
- {ouroboros_ai-0.2.3.dist-info → ouroboros_ai-0.4.0.dist-info}/METADATA +73 -17
- {ouroboros_ai-0.2.3.dist-info → ouroboros_ai-0.4.0.dist-info}/RECORD +44 -19
- {ouroboros_ai-0.2.3.dist-info → ouroboros_ai-0.4.0.dist-info}/WHEEL +0 -0
- {ouroboros_ai-0.2.3.dist-info → ouroboros_ai-0.4.0.dist-info}/entry_points.txt +0 -0
- {ouroboros_ai-0.2.3.dist-info → ouroboros_ai-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,11 +1,20 @@
|
|
|
1
1
|
"""Stage 3: Multi-Model Consensus.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
- 3 different models evaluate independently
|
|
5
|
-
- 2/3 majority required for approval
|
|
6
|
-
- Disagreements are logged with reasoning
|
|
3
|
+
This module provides two consensus evaluation modes:
|
|
7
4
|
|
|
8
|
-
|
|
5
|
+
1. Simple Consensus (ConsensusEvaluator):
|
|
6
|
+
- 3 models evaluate independently
|
|
7
|
+
- 2/3 majority required for approval
|
|
8
|
+
- Fast, straightforward voting
|
|
9
|
+
|
|
10
|
+
2. Deliberative Consensus (DeliberativeConsensus):
|
|
11
|
+
- Role-based evaluation: Advocate, Devil's Advocate, Judge
|
|
12
|
+
- 2-round deliberation: positions → judgment
|
|
13
|
+
- Devil's Advocate uses ontological questions
|
|
14
|
+
- Deeper analysis of whether solution addresses root cause
|
|
15
|
+
|
|
16
|
+
The deliberative mode is recommended for complex decisions where
|
|
17
|
+
ensuring root cause resolution is important.
|
|
9
18
|
"""
|
|
10
19
|
|
|
11
20
|
import asyncio
|
|
@@ -13,8 +22,17 @@ from dataclasses import dataclass
|
|
|
13
22
|
import json
|
|
14
23
|
|
|
15
24
|
from ouroboros.core.errors import ProviderError, ValidationError
|
|
25
|
+
from ouroboros.core.ontology_aspect import AnalysisResult
|
|
16
26
|
from ouroboros.core.types import Result
|
|
17
|
-
from ouroboros.evaluation.models import
|
|
27
|
+
from ouroboros.evaluation.models import (
|
|
28
|
+
ConsensusResult,
|
|
29
|
+
DeliberationResult,
|
|
30
|
+
EvaluationContext,
|
|
31
|
+
FinalVerdict,
|
|
32
|
+
JudgmentResult,
|
|
33
|
+
Vote,
|
|
34
|
+
VoterRole,
|
|
35
|
+
)
|
|
18
36
|
from ouroboros.events.base import BaseEvent
|
|
19
37
|
from ouroboros.events.evaluation import (
|
|
20
38
|
create_stage3_completed_event,
|
|
@@ -22,6 +40,7 @@ from ouroboros.events.evaluation import (
|
|
|
22
40
|
)
|
|
23
41
|
from ouroboros.providers.base import CompletionConfig, Message, MessageRole
|
|
24
42
|
from ouroboros.providers.litellm_adapter import LiteLLMAdapter
|
|
43
|
+
from ouroboros.strategies.devil_advocate import ConsensusContext, DevilAdvocateStrategy
|
|
25
44
|
|
|
26
45
|
# Default models for consensus voting (Frontier tier)
|
|
27
46
|
# Can be overridden via ConsensusConfig.models
|
|
@@ -101,9 +120,10 @@ Cast your vote as a JSON object with: approved (boolean), confidence (0-1), and
|
|
|
101
120
|
|
|
102
121
|
|
|
103
122
|
def extract_json_payload(text: str) -> str | None:
|
|
104
|
-
"""Extract JSON object from text using
|
|
123
|
+
"""Extract JSON object from text using bracket-matching approach.
|
|
105
124
|
|
|
106
|
-
|
|
125
|
+
Uses brace counting to find the first complete JSON object,
|
|
126
|
+
avoiding issues with multiple disjoint brace blocks (e.g., code snippets).
|
|
107
127
|
|
|
108
128
|
Args:
|
|
109
129
|
text: Raw text potentially containing JSON
|
|
@@ -112,9 +132,37 @@ def extract_json_payload(text: str) -> str | None:
|
|
|
112
132
|
Extracted JSON string or None if not found
|
|
113
133
|
"""
|
|
114
134
|
start = text.find("{")
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
135
|
+
if start == -1:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
# Count braces to find matching closing brace
|
|
139
|
+
depth = 0
|
|
140
|
+
in_string = False
|
|
141
|
+
escape_next = False
|
|
142
|
+
|
|
143
|
+
for i, char in enumerate(text[start:], start=start):
|
|
144
|
+
if escape_next:
|
|
145
|
+
escape_next = False
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
if char == "\\":
|
|
149
|
+
escape_next = True
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
if char == '"' and not escape_next:
|
|
153
|
+
in_string = not in_string
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
if in_string:
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
if char == "{":
|
|
160
|
+
depth += 1
|
|
161
|
+
elif char == "}":
|
|
162
|
+
depth -= 1
|
|
163
|
+
if depth == 0:
|
|
164
|
+
return text[start : i + 1]
|
|
165
|
+
|
|
118
166
|
return None
|
|
119
167
|
|
|
120
168
|
|
|
@@ -329,6 +377,489 @@ class ConsensusEvaluator:
|
|
|
329
377
|
return parse_vote_response(llm_result.value.content, model)
|
|
330
378
|
|
|
331
379
|
|
|
380
|
+
# Role-based system prompts for deliberative consensus
|
|
381
|
+
ADVOCATE_SYSTEM_PROMPT = """You are the ADVOCATE in a deliberative review.
|
|
382
|
+
|
|
383
|
+
Your role is to find and articulate the STRENGTHS of this solution:
|
|
384
|
+
- Does it correctly implement the acceptance criterion?
|
|
385
|
+
- Does it align with the stated goal?
|
|
386
|
+
- What are its positive aspects and well-designed elements?
|
|
387
|
+
- Is the approach sound and maintainable?
|
|
388
|
+
|
|
389
|
+
You must respond ONLY with a valid JSON object:
|
|
390
|
+
{
|
|
391
|
+
"approved": true,
|
|
392
|
+
"confidence": <float between 0.0 and 1.0>,
|
|
393
|
+
"reasoning": "<string explaining the strengths you found>"
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
Be thorough but honest. If you find genuine strengths, articulate them clearly.
|
|
397
|
+
If you cannot find enough strengths to advocate for approval, you may vote against,
|
|
398
|
+
but this should be rare for your role."""
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
JUDGE_SYSTEM_PROMPT = """You are the JUDGE in a deliberative review.
|
|
402
|
+
|
|
403
|
+
You will receive:
|
|
404
|
+
1. ADVOCATE's position (strengths of the solution)
|
|
405
|
+
2. DEVIL'S ADVOCATE's position (ontological critique - root cause vs symptom)
|
|
406
|
+
|
|
407
|
+
Your task:
|
|
408
|
+
- Weigh both arguments fairly and impartially
|
|
409
|
+
- Consider whether the solution addresses the ROOT CAUSE or just treats symptoms
|
|
410
|
+
- Make a final verdict: APPROVED, REJECTED, or CONDITIONAL
|
|
411
|
+
|
|
412
|
+
You must respond ONLY with a valid JSON object:
|
|
413
|
+
{
|
|
414
|
+
"verdict": "<one of: approved, rejected, conditional>",
|
|
415
|
+
"confidence": <float between 0.0 and 1.0>,
|
|
416
|
+
"reasoning": "<string explaining your judgment>",
|
|
417
|
+
"conditions": ["<condition 1>", "<condition 2>"] or null
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
Guidelines:
|
|
421
|
+
- APPROVED: Solution is sound and addresses the root problem
|
|
422
|
+
- CONDITIONAL: Solution has merit but requires specific changes
|
|
423
|
+
- REJECTED: Solution treats symptoms rather than root cause, or has fundamental issues
|
|
424
|
+
|
|
425
|
+
Be thorough and fair. The best solutions deserve recognition.
|
|
426
|
+
Symptomatic treatments deserve honest critique."""
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
@dataclass(frozen=True, slots=True)
|
|
430
|
+
class DeliberativeConfig:
|
|
431
|
+
"""Configuration for deliberative consensus.
|
|
432
|
+
|
|
433
|
+
Attributes:
|
|
434
|
+
advocate_model: Model for the Advocate role
|
|
435
|
+
devil_model: Model for the Devil's Advocate role
|
|
436
|
+
judge_model: Model for the Judge role
|
|
437
|
+
temperature: Sampling temperature
|
|
438
|
+
max_tokens: Maximum tokens per response
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
advocate_model: str = "openrouter/anthropic/claude-sonnet-4-20250514"
|
|
442
|
+
devil_model: str = "openrouter/openai/gpt-4o"
|
|
443
|
+
judge_model: str = "openrouter/google/gemini-2.5-pro"
|
|
444
|
+
temperature: float = 0.3
|
|
445
|
+
max_tokens: int = 2048
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _parse_judgment_response(
|
|
449
|
+
response_text: str,
|
|
450
|
+
model: str,
|
|
451
|
+
) -> Result[JudgmentResult, ValidationError]:
|
|
452
|
+
"""Parse LLM response into JudgmentResult.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
response_text: Raw LLM response
|
|
456
|
+
model: Model that made the judgment
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
Result containing JudgmentResult or ValidationError
|
|
460
|
+
"""
|
|
461
|
+
json_str = extract_json_payload(response_text)
|
|
462
|
+
|
|
463
|
+
if not json_str:
|
|
464
|
+
return Result.err(
|
|
465
|
+
ValidationError(
|
|
466
|
+
f"Could not find JSON in judgment from {model}",
|
|
467
|
+
field="response",
|
|
468
|
+
value=response_text[:100],
|
|
469
|
+
)
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
try:
|
|
473
|
+
data = json.loads(json_str)
|
|
474
|
+
except json.JSONDecodeError as e:
|
|
475
|
+
return Result.err(
|
|
476
|
+
ValidationError(
|
|
477
|
+
f"Invalid JSON in judgment from {model}: {e}",
|
|
478
|
+
field="response",
|
|
479
|
+
)
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Validate required fields
|
|
483
|
+
if "verdict" not in data:
|
|
484
|
+
return Result.err(
|
|
485
|
+
ValidationError(
|
|
486
|
+
f"Missing 'verdict' field in judgment from {model}",
|
|
487
|
+
field="verdict",
|
|
488
|
+
)
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Parse verdict
|
|
492
|
+
verdict_str = str(data["verdict"]).lower()
|
|
493
|
+
verdict_map = {
|
|
494
|
+
"approved": FinalVerdict.APPROVED,
|
|
495
|
+
"rejected": FinalVerdict.REJECTED,
|
|
496
|
+
"conditional": FinalVerdict.CONDITIONAL,
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
if verdict_str not in verdict_map:
|
|
500
|
+
return Result.err(
|
|
501
|
+
ValidationError(
|
|
502
|
+
f"Invalid verdict '{verdict_str}' from {model}",
|
|
503
|
+
field="verdict",
|
|
504
|
+
value=verdict_str,
|
|
505
|
+
)
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
try:
|
|
509
|
+
confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
|
|
510
|
+
conditions = data.get("conditions")
|
|
511
|
+
if conditions is not None:
|
|
512
|
+
conditions = tuple(str(c) for c in conditions)
|
|
513
|
+
|
|
514
|
+
return Result.ok(
|
|
515
|
+
JudgmentResult(
|
|
516
|
+
verdict=verdict_map[verdict_str],
|
|
517
|
+
confidence=confidence,
|
|
518
|
+
reasoning=str(data.get("reasoning", "No reasoning provided")),
|
|
519
|
+
conditions=conditions,
|
|
520
|
+
)
|
|
521
|
+
)
|
|
522
|
+
except (TypeError, ValueError) as e:
|
|
523
|
+
return Result.err(
|
|
524
|
+
ValidationError(
|
|
525
|
+
f"Invalid field types in judgment from {model}: {e}",
|
|
526
|
+
field="response",
|
|
527
|
+
)
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class DeliberativeConsensus:
|
|
532
|
+
"""Two-round deliberative consensus evaluator.
|
|
533
|
+
|
|
534
|
+
Uses role-based evaluation with ontological questioning:
|
|
535
|
+
- Round 1: Advocate and Devil's Advocate present positions (parallel)
|
|
536
|
+
- Round 2: Judge reviews both and makes final decision
|
|
537
|
+
|
|
538
|
+
The Devil's Advocate uses DevilAdvocateStrategy with AOP-based
|
|
539
|
+
ontological analysis to ensure the solution addresses the root
|
|
540
|
+
cause rather than just treating symptoms.
|
|
541
|
+
|
|
542
|
+
Example:
|
|
543
|
+
evaluator = DeliberativeConsensus(llm_adapter)
|
|
544
|
+
result = await evaluator.deliberate(context, trigger_reason)
|
|
545
|
+
|
|
546
|
+
# With custom strategy for testing
|
|
547
|
+
mock_strategy = MockDevilStrategy()
|
|
548
|
+
evaluator = DeliberativeConsensus(llm_adapter, devil_strategy=mock_strategy)
|
|
549
|
+
"""
|
|
550
|
+
|
|
551
|
+
def __init__(
|
|
552
|
+
self,
|
|
553
|
+
llm_adapter: LiteLLMAdapter,
|
|
554
|
+
config: DeliberativeConfig | None = None,
|
|
555
|
+
devil_strategy: DevilAdvocateStrategy | None = None,
|
|
556
|
+
) -> None:
|
|
557
|
+
"""Initialize evaluator.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
llm_adapter: LLM adapter for completions
|
|
561
|
+
config: Deliberative configuration
|
|
562
|
+
devil_strategy: Optional custom strategy for Devil's Advocate.
|
|
563
|
+
If None, creates default DevilAdvocateStrategy.
|
|
564
|
+
"""
|
|
565
|
+
self._llm = llm_adapter
|
|
566
|
+
self._config = config or DeliberativeConfig()
|
|
567
|
+
self._devil_strategy = devil_strategy or DevilAdvocateStrategy(
|
|
568
|
+
llm_adapter=llm_adapter,
|
|
569
|
+
model=self._config.devil_model,
|
|
570
|
+
temperature=self._config.temperature,
|
|
571
|
+
max_tokens=self._config.max_tokens,
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
async def deliberate(
|
|
575
|
+
self,
|
|
576
|
+
context: EvaluationContext,
|
|
577
|
+
trigger_reason: str = "manual",
|
|
578
|
+
) -> Result[tuple[DeliberationResult, list[BaseEvent]], ProviderError | ValidationError]:
|
|
579
|
+
"""Run 2-round deliberative consensus.
|
|
580
|
+
|
|
581
|
+
Round 1: Advocate and Devil's Advocate present positions concurrently
|
|
582
|
+
Round 2: Judge reviews both positions and makes final decision
|
|
583
|
+
|
|
584
|
+
Args:
|
|
585
|
+
context: Evaluation context
|
|
586
|
+
trigger_reason: Why consensus was triggered
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
Result containing DeliberationResult and events, or error
|
|
590
|
+
"""
|
|
591
|
+
events: list[BaseEvent] = []
|
|
592
|
+
|
|
593
|
+
# Emit start event
|
|
594
|
+
events.append(
|
|
595
|
+
create_stage3_started_event(
|
|
596
|
+
execution_id=context.execution_id,
|
|
597
|
+
models=[
|
|
598
|
+
self._config.advocate_model,
|
|
599
|
+
self._config.devil_model,
|
|
600
|
+
self._config.judge_model,
|
|
601
|
+
],
|
|
602
|
+
trigger_reason=f"deliberative:{trigger_reason}",
|
|
603
|
+
)
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
# Round 1: Get Advocate and Devil's Advocate positions concurrently
|
|
607
|
+
advocate_task = self._get_position(context, VoterRole.ADVOCATE)
|
|
608
|
+
devil_task = self._get_position(context, VoterRole.DEVIL)
|
|
609
|
+
|
|
610
|
+
# Type hint for asyncio.gather with return_exceptions=True
|
|
611
|
+
results: list[Result[Vote, ProviderError | ValidationError] | BaseException] = (
|
|
612
|
+
await asyncio.gather(advocate_task, devil_task, return_exceptions=True)
|
|
613
|
+
)
|
|
614
|
+
advocate_result, devil_result = results[0], results[1]
|
|
615
|
+
|
|
616
|
+
# Handle Round 1 errors - type narrowing via isinstance
|
|
617
|
+
if isinstance(advocate_result, BaseException):
|
|
618
|
+
return Result.err(
|
|
619
|
+
ValidationError(f"Advocate failed: {advocate_result}")
|
|
620
|
+
)
|
|
621
|
+
if advocate_result.is_err:
|
|
622
|
+
return Result.err(advocate_result.error)
|
|
623
|
+
advocate_vote = advocate_result.value
|
|
624
|
+
|
|
625
|
+
if isinstance(devil_result, BaseException):
|
|
626
|
+
return Result.err(
|
|
627
|
+
ValidationError(f"Devil's Advocate failed: {devil_result}")
|
|
628
|
+
)
|
|
629
|
+
if devil_result.is_err:
|
|
630
|
+
return Result.err(devil_result.error)
|
|
631
|
+
devil_vote = devil_result.value
|
|
632
|
+
|
|
633
|
+
# Round 2: Judge reviews both positions
|
|
634
|
+
judgment_result = await self._get_judgment(
|
|
635
|
+
context, advocate_vote, devil_vote
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
if judgment_result.is_err:
|
|
639
|
+
return Result.err(judgment_result.error)
|
|
640
|
+
judgment = judgment_result.value
|
|
641
|
+
|
|
642
|
+
# Determine if Devil confirmed this addresses root cause
|
|
643
|
+
# Devil approves (approved=True) means they couldn't find fundamental issues
|
|
644
|
+
is_root_solution = devil_vote.approved
|
|
645
|
+
|
|
646
|
+
deliberation_result = DeliberationResult(
|
|
647
|
+
final_verdict=judgment.verdict,
|
|
648
|
+
advocate_position=advocate_vote,
|
|
649
|
+
devil_position=devil_vote,
|
|
650
|
+
judgment=judgment,
|
|
651
|
+
is_root_solution=is_root_solution,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# Emit completion event
|
|
655
|
+
events.append(
|
|
656
|
+
create_stage3_completed_event(
|
|
657
|
+
execution_id=context.execution_id,
|
|
658
|
+
approved=deliberation_result.approved,
|
|
659
|
+
votes=[
|
|
660
|
+
{
|
|
661
|
+
"model": advocate_vote.model,
|
|
662
|
+
"role": advocate_vote.role,
|
|
663
|
+
"approved": advocate_vote.approved,
|
|
664
|
+
"confidence": advocate_vote.confidence,
|
|
665
|
+
"reasoning": advocate_vote.reasoning,
|
|
666
|
+
},
|
|
667
|
+
{
|
|
668
|
+
"model": devil_vote.model,
|
|
669
|
+
"role": devil_vote.role,
|
|
670
|
+
"approved": devil_vote.approved,
|
|
671
|
+
"confidence": devil_vote.confidence,
|
|
672
|
+
"reasoning": devil_vote.reasoning,
|
|
673
|
+
},
|
|
674
|
+
],
|
|
675
|
+
majority_ratio=1.0 if deliberation_result.approved else 0.0,
|
|
676
|
+
disagreements=[],
|
|
677
|
+
)
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
return Result.ok((deliberation_result, events))
|
|
681
|
+
|
|
682
|
+
async def _get_position(
|
|
683
|
+
self,
|
|
684
|
+
context: EvaluationContext,
|
|
685
|
+
role: VoterRole,
|
|
686
|
+
) -> Result[Vote, ProviderError | ValidationError]:
|
|
687
|
+
"""Get a position from Advocate or Devil's Advocate.
|
|
688
|
+
|
|
689
|
+
Args:
|
|
690
|
+
context: Evaluation context
|
|
691
|
+
role: The role (ADVOCATE or DEVIL)
|
|
692
|
+
|
|
693
|
+
Returns:
|
|
694
|
+
Result containing Vote or error
|
|
695
|
+
"""
|
|
696
|
+
if role == VoterRole.ADVOCATE:
|
|
697
|
+
# Advocate uses direct LLM call with role-specific prompt
|
|
698
|
+
system_prompt = ADVOCATE_SYSTEM_PROMPT
|
|
699
|
+
model = self._config.advocate_model
|
|
700
|
+
|
|
701
|
+
messages = [
|
|
702
|
+
Message(role=MessageRole.SYSTEM, content=system_prompt),
|
|
703
|
+
Message(role=MessageRole.USER, content=build_consensus_prompt(context)),
|
|
704
|
+
]
|
|
705
|
+
|
|
706
|
+
config = CompletionConfig(
|
|
707
|
+
model=model,
|
|
708
|
+
temperature=self._config.temperature,
|
|
709
|
+
max_tokens=self._config.max_tokens,
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
llm_result = await self._llm.complete(messages, config)
|
|
713
|
+
if llm_result.is_err:
|
|
714
|
+
return Result.err(llm_result.error)
|
|
715
|
+
|
|
716
|
+
vote_result = parse_vote_response(llm_result.value.content, model)
|
|
717
|
+
if vote_result.is_err:
|
|
718
|
+
return Result.err(vote_result.error)
|
|
719
|
+
|
|
720
|
+
vote = vote_result.value
|
|
721
|
+
return Result.ok(
|
|
722
|
+
Vote(
|
|
723
|
+
model=vote.model,
|
|
724
|
+
approved=vote.approved,
|
|
725
|
+
confidence=vote.confidence,
|
|
726
|
+
reasoning=vote.reasoning,
|
|
727
|
+
role=role,
|
|
728
|
+
)
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
elif role == VoterRole.DEVIL:
|
|
732
|
+
# Devil uses AOP-based DevilAdvocateStrategy for ontological analysis
|
|
733
|
+
return await self._get_devil_position(context)
|
|
734
|
+
|
|
735
|
+
else:
|
|
736
|
+
return Result.err(
|
|
737
|
+
ValidationError(f"Invalid role for position: {role}")
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
async def _get_devil_position(
|
|
741
|
+
self,
|
|
742
|
+
context: EvaluationContext,
|
|
743
|
+
) -> Result[Vote, ProviderError | ValidationError]:
|
|
744
|
+
"""Get Devil's Advocate position using ontological analysis.
|
|
745
|
+
|
|
746
|
+
Uses DevilAdvocateStrategy to analyze whether the artifact
|
|
747
|
+
addresses root cause or treats symptoms.
|
|
748
|
+
|
|
749
|
+
Args:
|
|
750
|
+
context: Evaluation context
|
|
751
|
+
|
|
752
|
+
Returns:
|
|
753
|
+
Result containing Vote with Devil's Advocate role
|
|
754
|
+
"""
|
|
755
|
+
# Convert EvaluationContext to ConsensusContext for strategy
|
|
756
|
+
consensus_ctx = ConsensusContext(
|
|
757
|
+
artifact=context.artifact,
|
|
758
|
+
goal=context.goal,
|
|
759
|
+
current_ac=context.current_ac,
|
|
760
|
+
constraints=context.constraints,
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
# Strategy handles errors gracefully (returns AnalysisResult.invalid on LLM failure)
|
|
764
|
+
analysis = await self._devil_strategy.analyze(consensus_ctx)
|
|
765
|
+
|
|
766
|
+
# Convert AnalysisResult to Vote
|
|
767
|
+
vote = self._analysis_to_vote(analysis)
|
|
768
|
+
return Result.ok(vote)
|
|
769
|
+
|
|
770
|
+
def _analysis_to_vote(self, analysis: AnalysisResult) -> Vote:
|
|
771
|
+
"""Convert AnalysisResult to Vote for Devil's Advocate.
|
|
772
|
+
|
|
773
|
+
Maps ontological analysis result to consensus voting format:
|
|
774
|
+
- is_valid -> approved
|
|
775
|
+
- confidence -> confidence
|
|
776
|
+
- reasoning + suggestions -> reasoning
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
analysis: The ontological analysis result
|
|
780
|
+
|
|
781
|
+
Returns:
|
|
782
|
+
Vote with Devil's Advocate role
|
|
783
|
+
"""
|
|
784
|
+
# Build reasoning text
|
|
785
|
+
if analysis.is_valid:
|
|
786
|
+
reasoning_text = (
|
|
787
|
+
analysis.reasoning[0]
|
|
788
|
+
if analysis.reasoning
|
|
789
|
+
else "Passed ontological analysis: addresses root cause"
|
|
790
|
+
)
|
|
791
|
+
else:
|
|
792
|
+
# Combine reasoning and suggestions for invalid case
|
|
793
|
+
parts = list(analysis.reasoning)
|
|
794
|
+
if analysis.suggestions:
|
|
795
|
+
parts.append("Suggestions: " + "; ".join(analysis.suggestions))
|
|
796
|
+
reasoning_text = "\n".join(parts) if parts else "Failed ontological analysis"
|
|
797
|
+
|
|
798
|
+
return Vote(
|
|
799
|
+
model=self._devil_strategy.model,
|
|
800
|
+
approved=analysis.is_valid,
|
|
801
|
+
confidence=analysis.confidence,
|
|
802
|
+
reasoning=reasoning_text,
|
|
803
|
+
role=VoterRole.DEVIL,
|
|
804
|
+
)
|
|
805
|
+
|
|
806
|
+
async def _get_judgment(
|
|
807
|
+
self,
|
|
808
|
+
context: EvaluationContext,
|
|
809
|
+
advocate_vote: Vote,
|
|
810
|
+
devil_vote: Vote,
|
|
811
|
+
) -> Result[JudgmentResult, ProviderError | ValidationError]:
|
|
812
|
+
"""Get final judgment from Judge.
|
|
813
|
+
|
|
814
|
+
Args:
|
|
815
|
+
context: Evaluation context
|
|
816
|
+
advocate_vote: The Advocate's position
|
|
817
|
+
devil_vote: The Devil's Advocate's position
|
|
818
|
+
|
|
819
|
+
Returns:
|
|
820
|
+
Result containing JudgmentResult or error
|
|
821
|
+
"""
|
|
822
|
+
# Build prompt with both positions
|
|
823
|
+
user_prompt = f"""{build_consensus_prompt(context)}
|
|
824
|
+
|
|
825
|
+
---
|
|
826
|
+
|
|
827
|
+
## Round 1 Positions
|
|
828
|
+
|
|
829
|
+
### ADVOCATE's Position
|
|
830
|
+
Approved: {advocate_vote.approved}
|
|
831
|
+
Confidence: {advocate_vote.confidence:.2f}
|
|
832
|
+
Reasoning: {advocate_vote.reasoning}
|
|
833
|
+
|
|
834
|
+
### DEVIL'S ADVOCATE's Position (Ontological Analysis)
|
|
835
|
+
Approved: {devil_vote.approved}
|
|
836
|
+
Confidence: {devil_vote.confidence:.2f}
|
|
837
|
+
Reasoning: {devil_vote.reasoning}
|
|
838
|
+
|
|
839
|
+
---
|
|
840
|
+
|
|
841
|
+
Based on both positions above, make your final judgment."""
|
|
842
|
+
|
|
843
|
+
messages = [
|
|
844
|
+
Message(role=MessageRole.SYSTEM, content=JUDGE_SYSTEM_PROMPT),
|
|
845
|
+
Message(role=MessageRole.USER, content=user_prompt),
|
|
846
|
+
]
|
|
847
|
+
|
|
848
|
+
config = CompletionConfig(
|
|
849
|
+
model=self._config.judge_model,
|
|
850
|
+
temperature=self._config.temperature,
|
|
851
|
+
max_tokens=self._config.max_tokens,
|
|
852
|
+
)
|
|
853
|
+
|
|
854
|
+
llm_result = await self._llm.complete(messages, config)
|
|
855
|
+
if llm_result.is_err:
|
|
856
|
+
return Result.err(llm_result.error)
|
|
857
|
+
|
|
858
|
+
return _parse_judgment_response(
|
|
859
|
+
llm_result.value.content, self._config.judge_model
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
|
|
332
863
|
async def run_consensus_evaluation(
|
|
333
864
|
context: EvaluationContext,
|
|
334
865
|
llm_adapter: LiteLLMAdapter,
|
|
@@ -348,3 +879,30 @@ async def run_consensus_evaluation(
|
|
|
348
879
|
"""
|
|
349
880
|
evaluator = ConsensusEvaluator(llm_adapter, config)
|
|
350
881
|
return await evaluator.evaluate(context, trigger_reason)
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
async def run_deliberative_evaluation(
|
|
885
|
+
context: EvaluationContext,
|
|
886
|
+
llm_adapter: LiteLLMAdapter,
|
|
887
|
+
trigger_reason: str = "manual",
|
|
888
|
+
config: DeliberativeConfig | None = None,
|
|
889
|
+
devil_strategy: DevilAdvocateStrategy | None = None,
|
|
890
|
+
) -> Result[tuple[DeliberationResult, list[BaseEvent]], ProviderError | ValidationError]:
|
|
891
|
+
"""Convenience function for running deliberative consensus.
|
|
892
|
+
|
|
893
|
+
Recommended for complex decisions where ensuring root cause
|
|
894
|
+
resolution is important. Uses AOP-based DevilAdvocateStrategy
|
|
895
|
+
for ontological analysis.
|
|
896
|
+
|
|
897
|
+
Args:
|
|
898
|
+
context: Evaluation context
|
|
899
|
+
llm_adapter: LLM adapter
|
|
900
|
+
trigger_reason: Why consensus was triggered
|
|
901
|
+
config: Optional configuration
|
|
902
|
+
devil_strategy: Optional custom strategy for Devil's Advocate
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
Result with DeliberationResult and events
|
|
906
|
+
"""
|
|
907
|
+
evaluator = DeliberativeConsensus(llm_adapter, config, devil_strategy)
|
|
908
|
+
return await evaluator.deliberate(context, trigger_reason)
|