ouroboros-ai 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ouroboros-ai might be problematic. Click here for more details.

Files changed (42) hide show
  1. ouroboros/__init__.py +1 -1
  2. ouroboros/bigbang/__init__.py +9 -0
  3. ouroboros/bigbang/ontology.py +180 -0
  4. ouroboros/cli/commands/__init__.py +2 -0
  5. ouroboros/cli/commands/mcp.py +161 -0
  6. ouroboros/cli/commands/run.py +165 -27
  7. ouroboros/cli/main.py +2 -1
  8. ouroboros/core/ontology_aspect.py +455 -0
  9. ouroboros/core/ontology_questions.py +462 -0
  10. ouroboros/evaluation/__init__.py +16 -1
  11. ouroboros/evaluation/consensus.py +569 -11
  12. ouroboros/evaluation/models.py +81 -0
  13. ouroboros/events/ontology.py +135 -0
  14. ouroboros/mcp/__init__.py +83 -0
  15. ouroboros/mcp/client/__init__.py +20 -0
  16. ouroboros/mcp/client/adapter.py +632 -0
  17. ouroboros/mcp/client/manager.py +600 -0
  18. ouroboros/mcp/client/protocol.py +161 -0
  19. ouroboros/mcp/errors.py +377 -0
  20. ouroboros/mcp/resources/__init__.py +22 -0
  21. ouroboros/mcp/resources/handlers.py +328 -0
  22. ouroboros/mcp/server/__init__.py +21 -0
  23. ouroboros/mcp/server/adapter.py +408 -0
  24. ouroboros/mcp/server/protocol.py +291 -0
  25. ouroboros/mcp/server/security.py +636 -0
  26. ouroboros/mcp/tools/__init__.py +24 -0
  27. ouroboros/mcp/tools/definitions.py +351 -0
  28. ouroboros/mcp/tools/registry.py +269 -0
  29. ouroboros/mcp/types.py +333 -0
  30. ouroboros/orchestrator/__init__.py +31 -0
  31. ouroboros/orchestrator/events.py +40 -0
  32. ouroboros/orchestrator/mcp_config.py +419 -0
  33. ouroboros/orchestrator/mcp_tools.py +483 -0
  34. ouroboros/orchestrator/runner.py +119 -2
  35. ouroboros/providers/claude_code_adapter.py +75 -0
  36. ouroboros/strategies/__init__.py +23 -0
  37. ouroboros/strategies/devil_advocate.py +197 -0
  38. {ouroboros_ai-0.3.0.dist-info → ouroboros_ai-0.4.0.dist-info}/METADATA +10 -5
  39. {ouroboros_ai-0.3.0.dist-info → ouroboros_ai-0.4.0.dist-info}/RECORD +42 -17
  40. {ouroboros_ai-0.3.0.dist-info → ouroboros_ai-0.4.0.dist-info}/WHEEL +0 -0
  41. {ouroboros_ai-0.3.0.dist-info → ouroboros_ai-0.4.0.dist-info}/entry_points.txt +0 -0
  42. {ouroboros_ai-0.3.0.dist-info → ouroboros_ai-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,11 +1,20 @@
1
1
  """Stage 3: Multi-Model Consensus.
2
2
 
3
- Multi-model voting using Frontier tier:
4
- - 3 different models evaluate independently
5
- - 2/3 majority required for approval
6
- - Disagreements are logged with reasoning
3
+ This module provides two consensus evaluation modes:
7
4
 
8
- The ConsensusEvaluator uses multiple LLM models for diverse verification.
5
+ 1. Simple Consensus (ConsensusEvaluator):
6
+ - 3 models evaluate independently
7
+ - 2/3 majority required for approval
8
+ - Fast, straightforward voting
9
+
10
+ 2. Deliberative Consensus (DeliberativeConsensus):
11
+ - Role-based evaluation: Advocate, Devil's Advocate, Judge
12
+ - 2-round deliberation: positions → judgment
13
+ - Devil's Advocate uses ontological questions
14
+ - Deeper analysis of whether solution addresses root cause
15
+
16
+ The deliberative mode is recommended for complex decisions where
17
+ ensuring root cause resolution is important.
9
18
  """
10
19
 
11
20
  import asyncio
@@ -13,8 +22,17 @@ from dataclasses import dataclass
13
22
  import json
14
23
 
15
24
  from ouroboros.core.errors import ProviderError, ValidationError
25
+ from ouroboros.core.ontology_aspect import AnalysisResult
16
26
  from ouroboros.core.types import Result
17
- from ouroboros.evaluation.models import ConsensusResult, EvaluationContext, Vote
27
+ from ouroboros.evaluation.models import (
28
+ ConsensusResult,
29
+ DeliberationResult,
30
+ EvaluationContext,
31
+ FinalVerdict,
32
+ JudgmentResult,
33
+ Vote,
34
+ VoterRole,
35
+ )
18
36
  from ouroboros.events.base import BaseEvent
19
37
  from ouroboros.events.evaluation import (
20
38
  create_stage3_completed_event,
@@ -22,6 +40,7 @@ from ouroboros.events.evaluation import (
22
40
  )
23
41
  from ouroboros.providers.base import CompletionConfig, Message, MessageRole
24
42
  from ouroboros.providers.litellm_adapter import LiteLLMAdapter
43
+ from ouroboros.strategies.devil_advocate import ConsensusContext, DevilAdvocateStrategy
25
44
 
26
45
  # Default models for consensus voting (Frontier tier)
27
46
  # Can be overridden via ConsensusConfig.models
@@ -101,9 +120,10 @@ Cast your vote as a JSON object with: approved (boolean), confidence (0-1), and
101
120
 
102
121
 
103
122
  def extract_json_payload(text: str) -> str | None:
104
- """Extract JSON object from text using index-based approach.
123
+ """Extract JSON object from text using bracket-matching approach.
105
124
 
106
- More reliable than regex for handling nested braces in code snippets.
125
+ Uses brace counting to find the first complete JSON object,
126
+ avoiding issues with multiple disjoint brace blocks (e.g., code snippets).
107
127
 
108
128
  Args:
109
129
  text: Raw text potentially containing JSON
@@ -112,9 +132,37 @@ def extract_json_payload(text: str) -> str | None:
112
132
  Extracted JSON string or None if not found
113
133
  """
114
134
  start = text.find("{")
115
- end = text.rfind("}")
116
- if start != -1 and end != -1 and end > start:
117
- return text[start : end + 1]
135
+ if start == -1:
136
+ return None
137
+
138
+ # Count braces to find matching closing brace
139
+ depth = 0
140
+ in_string = False
141
+ escape_next = False
142
+
143
+ for i, char in enumerate(text[start:], start=start):
144
+ if escape_next:
145
+ escape_next = False
146
+ continue
147
+
148
+ if char == "\\":
149
+ escape_next = True
150
+ continue
151
+
152
+ if char == '"' and not escape_next:
153
+ in_string = not in_string
154
+ continue
155
+
156
+ if in_string:
157
+ continue
158
+
159
+ if char == "{":
160
+ depth += 1
161
+ elif char == "}":
162
+ depth -= 1
163
+ if depth == 0:
164
+ return text[start : i + 1]
165
+
118
166
  return None
119
167
 
120
168
 
@@ -329,6 +377,489 @@ class ConsensusEvaluator:
329
377
  return parse_vote_response(llm_result.value.content, model)
330
378
 
331
379
 
380
+ # Role-based system prompts for deliberative consensus
381
+ ADVOCATE_SYSTEM_PROMPT = """You are the ADVOCATE in a deliberative review.
382
+
383
+ Your role is to find and articulate the STRENGTHS of this solution:
384
+ - Does it correctly implement the acceptance criterion?
385
+ - Does it align with the stated goal?
386
+ - What are its positive aspects and well-designed elements?
387
+ - Is the approach sound and maintainable?
388
+
389
+ You must respond ONLY with a valid JSON object:
390
+ {
391
+ "approved": true,
392
+ "confidence": <float between 0.0 and 1.0>,
393
+ "reasoning": "<string explaining the strengths you found>"
394
+ }
395
+
396
+ Be thorough but honest. If you find genuine strengths, articulate them clearly.
397
+ If you cannot find enough strengths to advocate for approval, you may vote against,
398
+ but this should be rare for your role."""
399
+
400
+
401
+ JUDGE_SYSTEM_PROMPT = """You are the JUDGE in a deliberative review.
402
+
403
+ You will receive:
404
+ 1. ADVOCATE's position (strengths of the solution)
405
+ 2. DEVIL'S ADVOCATE's position (ontological critique - root cause vs symptom)
406
+
407
+ Your task:
408
+ - Weigh both arguments fairly and impartially
409
+ - Consider whether the solution addresses the ROOT CAUSE or just treats symptoms
410
+ - Make a final verdict: APPROVED, REJECTED, or CONDITIONAL
411
+
412
+ You must respond ONLY with a valid JSON object:
413
+ {
414
+ "verdict": "<one of: approved, rejected, conditional>",
415
+ "confidence": <float between 0.0 and 1.0>,
416
+ "reasoning": "<string explaining your judgment>",
417
+ "conditions": ["<condition 1>", "<condition 2>"] or null
418
+ }
419
+
420
+ Guidelines:
421
+ - APPROVED: Solution is sound and addresses the root problem
422
+ - CONDITIONAL: Solution has merit but requires specific changes
423
+ - REJECTED: Solution treats symptoms rather than root cause, or has fundamental issues
424
+
425
+ Be thorough and fair. The best solutions deserve recognition.
426
+ Symptomatic treatments deserve honest critique."""
427
+
428
+
429
+ @dataclass(frozen=True, slots=True)
430
+ class DeliberativeConfig:
431
+ """Configuration for deliberative consensus.
432
+
433
+ Attributes:
434
+ advocate_model: Model for the Advocate role
435
+ devil_model: Model for the Devil's Advocate role
436
+ judge_model: Model for the Judge role
437
+ temperature: Sampling temperature
438
+ max_tokens: Maximum tokens per response
439
+ """
440
+
441
+ advocate_model: str = "openrouter/anthropic/claude-sonnet-4-20250514"
442
+ devil_model: str = "openrouter/openai/gpt-4o"
443
+ judge_model: str = "openrouter/google/gemini-2.5-pro"
444
+ temperature: float = 0.3
445
+ max_tokens: int = 2048
446
+
447
+
448
+ def _parse_judgment_response(
449
+ response_text: str,
450
+ model: str,
451
+ ) -> Result[JudgmentResult, ValidationError]:
452
+ """Parse LLM response into JudgmentResult.
453
+
454
+ Args:
455
+ response_text: Raw LLM response
456
+ model: Model that made the judgment
457
+
458
+ Returns:
459
+ Result containing JudgmentResult or ValidationError
460
+ """
461
+ json_str = extract_json_payload(response_text)
462
+
463
+ if not json_str:
464
+ return Result.err(
465
+ ValidationError(
466
+ f"Could not find JSON in judgment from {model}",
467
+ field="response",
468
+ value=response_text[:100],
469
+ )
470
+ )
471
+
472
+ try:
473
+ data = json.loads(json_str)
474
+ except json.JSONDecodeError as e:
475
+ return Result.err(
476
+ ValidationError(
477
+ f"Invalid JSON in judgment from {model}: {e}",
478
+ field="response",
479
+ )
480
+ )
481
+
482
+ # Validate required fields
483
+ if "verdict" not in data:
484
+ return Result.err(
485
+ ValidationError(
486
+ f"Missing 'verdict' field in judgment from {model}",
487
+ field="verdict",
488
+ )
489
+ )
490
+
491
+ # Parse verdict
492
+ verdict_str = str(data["verdict"]).lower()
493
+ verdict_map = {
494
+ "approved": FinalVerdict.APPROVED,
495
+ "rejected": FinalVerdict.REJECTED,
496
+ "conditional": FinalVerdict.CONDITIONAL,
497
+ }
498
+
499
+ if verdict_str not in verdict_map:
500
+ return Result.err(
501
+ ValidationError(
502
+ f"Invalid verdict '{verdict_str}' from {model}",
503
+ field="verdict",
504
+ value=verdict_str,
505
+ )
506
+ )
507
+
508
+ try:
509
+ confidence = max(0.0, min(1.0, float(data.get("confidence", 0.5))))
510
+ conditions = data.get("conditions")
511
+ if conditions is not None:
512
+ conditions = tuple(str(c) for c in conditions)
513
+
514
+ return Result.ok(
515
+ JudgmentResult(
516
+ verdict=verdict_map[verdict_str],
517
+ confidence=confidence,
518
+ reasoning=str(data.get("reasoning", "No reasoning provided")),
519
+ conditions=conditions,
520
+ )
521
+ )
522
+ except (TypeError, ValueError) as e:
523
+ return Result.err(
524
+ ValidationError(
525
+ f"Invalid field types in judgment from {model}: {e}",
526
+ field="response",
527
+ )
528
+ )
529
+
530
+
531
+ class DeliberativeConsensus:
532
+ """Two-round deliberative consensus evaluator.
533
+
534
+ Uses role-based evaluation with ontological questioning:
535
+ - Round 1: Advocate and Devil's Advocate present positions (parallel)
536
+ - Round 2: Judge reviews both and makes final decision
537
+
538
+ The Devil's Advocate uses DevilAdvocateStrategy with AOP-based
539
+ ontological analysis to ensure the solution addresses the root
540
+ cause rather than just treating symptoms.
541
+
542
+ Example:
543
+ evaluator = DeliberativeConsensus(llm_adapter)
544
+ result = await evaluator.deliberate(context, trigger_reason)
545
+
546
+ # With custom strategy for testing
547
+ mock_strategy = MockDevilStrategy()
548
+ evaluator = DeliberativeConsensus(llm_adapter, devil_strategy=mock_strategy)
549
+ """
550
+
551
+ def __init__(
552
+ self,
553
+ llm_adapter: LiteLLMAdapter,
554
+ config: DeliberativeConfig | None = None,
555
+ devil_strategy: DevilAdvocateStrategy | None = None,
556
+ ) -> None:
557
+ """Initialize evaluator.
558
+
559
+ Args:
560
+ llm_adapter: LLM adapter for completions
561
+ config: Deliberative configuration
562
+ devil_strategy: Optional custom strategy for Devil's Advocate.
563
+ If None, creates default DevilAdvocateStrategy.
564
+ """
565
+ self._llm = llm_adapter
566
+ self._config = config or DeliberativeConfig()
567
+ self._devil_strategy = devil_strategy or DevilAdvocateStrategy(
568
+ llm_adapter=llm_adapter,
569
+ model=self._config.devil_model,
570
+ temperature=self._config.temperature,
571
+ max_tokens=self._config.max_tokens,
572
+ )
573
+
574
+ async def deliberate(
575
+ self,
576
+ context: EvaluationContext,
577
+ trigger_reason: str = "manual",
578
+ ) -> Result[tuple[DeliberationResult, list[BaseEvent]], ProviderError | ValidationError]:
579
+ """Run 2-round deliberative consensus.
580
+
581
+ Round 1: Advocate and Devil's Advocate present positions concurrently
582
+ Round 2: Judge reviews both positions and makes final decision
583
+
584
+ Args:
585
+ context: Evaluation context
586
+ trigger_reason: Why consensus was triggered
587
+
588
+ Returns:
589
+ Result containing DeliberationResult and events, or error
590
+ """
591
+ events: list[BaseEvent] = []
592
+
593
+ # Emit start event
594
+ events.append(
595
+ create_stage3_started_event(
596
+ execution_id=context.execution_id,
597
+ models=[
598
+ self._config.advocate_model,
599
+ self._config.devil_model,
600
+ self._config.judge_model,
601
+ ],
602
+ trigger_reason=f"deliberative:{trigger_reason}",
603
+ )
604
+ )
605
+
606
+ # Round 1: Get Advocate and Devil's Advocate positions concurrently
607
+ advocate_task = self._get_position(context, VoterRole.ADVOCATE)
608
+ devil_task = self._get_position(context, VoterRole.DEVIL)
609
+
610
+ # Type hint for asyncio.gather with return_exceptions=True
611
+ results: list[Result[Vote, ProviderError | ValidationError] | BaseException] = (
612
+ await asyncio.gather(advocate_task, devil_task, return_exceptions=True)
613
+ )
614
+ advocate_result, devil_result = results[0], results[1]
615
+
616
+ # Handle Round 1 errors - type narrowing via isinstance
617
+ if isinstance(advocate_result, BaseException):
618
+ return Result.err(
619
+ ValidationError(f"Advocate failed: {advocate_result}")
620
+ )
621
+ if advocate_result.is_err:
622
+ return Result.err(advocate_result.error)
623
+ advocate_vote = advocate_result.value
624
+
625
+ if isinstance(devil_result, BaseException):
626
+ return Result.err(
627
+ ValidationError(f"Devil's Advocate failed: {devil_result}")
628
+ )
629
+ if devil_result.is_err:
630
+ return Result.err(devil_result.error)
631
+ devil_vote = devil_result.value
632
+
633
+ # Round 2: Judge reviews both positions
634
+ judgment_result = await self._get_judgment(
635
+ context, advocate_vote, devil_vote
636
+ )
637
+
638
+ if judgment_result.is_err:
639
+ return Result.err(judgment_result.error)
640
+ judgment = judgment_result.value
641
+
642
+ # Determine if Devil confirmed this addresses root cause
643
+ # Devil approves (approved=True) means they couldn't find fundamental issues
644
+ is_root_solution = devil_vote.approved
645
+
646
+ deliberation_result = DeliberationResult(
647
+ final_verdict=judgment.verdict,
648
+ advocate_position=advocate_vote,
649
+ devil_position=devil_vote,
650
+ judgment=judgment,
651
+ is_root_solution=is_root_solution,
652
+ )
653
+
654
+ # Emit completion event
655
+ events.append(
656
+ create_stage3_completed_event(
657
+ execution_id=context.execution_id,
658
+ approved=deliberation_result.approved,
659
+ votes=[
660
+ {
661
+ "model": advocate_vote.model,
662
+ "role": advocate_vote.role,
663
+ "approved": advocate_vote.approved,
664
+ "confidence": advocate_vote.confidence,
665
+ "reasoning": advocate_vote.reasoning,
666
+ },
667
+ {
668
+ "model": devil_vote.model,
669
+ "role": devil_vote.role,
670
+ "approved": devil_vote.approved,
671
+ "confidence": devil_vote.confidence,
672
+ "reasoning": devil_vote.reasoning,
673
+ },
674
+ ],
675
+ majority_ratio=1.0 if deliberation_result.approved else 0.0,
676
+ disagreements=[],
677
+ )
678
+ )
679
+
680
+ return Result.ok((deliberation_result, events))
681
+
682
+ async def _get_position(
683
+ self,
684
+ context: EvaluationContext,
685
+ role: VoterRole,
686
+ ) -> Result[Vote, ProviderError | ValidationError]:
687
+ """Get a position from Advocate or Devil's Advocate.
688
+
689
+ Args:
690
+ context: Evaluation context
691
+ role: The role (ADVOCATE or DEVIL)
692
+
693
+ Returns:
694
+ Result containing Vote or error
695
+ """
696
+ if role == VoterRole.ADVOCATE:
697
+ # Advocate uses direct LLM call with role-specific prompt
698
+ system_prompt = ADVOCATE_SYSTEM_PROMPT
699
+ model = self._config.advocate_model
700
+
701
+ messages = [
702
+ Message(role=MessageRole.SYSTEM, content=system_prompt),
703
+ Message(role=MessageRole.USER, content=build_consensus_prompt(context)),
704
+ ]
705
+
706
+ config = CompletionConfig(
707
+ model=model,
708
+ temperature=self._config.temperature,
709
+ max_tokens=self._config.max_tokens,
710
+ )
711
+
712
+ llm_result = await self._llm.complete(messages, config)
713
+ if llm_result.is_err:
714
+ return Result.err(llm_result.error)
715
+
716
+ vote_result = parse_vote_response(llm_result.value.content, model)
717
+ if vote_result.is_err:
718
+ return Result.err(vote_result.error)
719
+
720
+ vote = vote_result.value
721
+ return Result.ok(
722
+ Vote(
723
+ model=vote.model,
724
+ approved=vote.approved,
725
+ confidence=vote.confidence,
726
+ reasoning=vote.reasoning,
727
+ role=role,
728
+ )
729
+ )
730
+
731
+ elif role == VoterRole.DEVIL:
732
+ # Devil uses AOP-based DevilAdvocateStrategy for ontological analysis
733
+ return await self._get_devil_position(context)
734
+
735
+ else:
736
+ return Result.err(
737
+ ValidationError(f"Invalid role for position: {role}")
738
+ )
739
+
740
+ async def _get_devil_position(
741
+ self,
742
+ context: EvaluationContext,
743
+ ) -> Result[Vote, ProviderError | ValidationError]:
744
+ """Get Devil's Advocate position using ontological analysis.
745
+
746
+ Uses DevilAdvocateStrategy to analyze whether the artifact
747
+ addresses root cause or treats symptoms.
748
+
749
+ Args:
750
+ context: Evaluation context
751
+
752
+ Returns:
753
+ Result containing Vote with Devil's Advocate role
754
+ """
755
+ # Convert EvaluationContext to ConsensusContext for strategy
756
+ consensus_ctx = ConsensusContext(
757
+ artifact=context.artifact,
758
+ goal=context.goal,
759
+ current_ac=context.current_ac,
760
+ constraints=context.constraints,
761
+ )
762
+
763
+ # Strategy handles errors gracefully (returns AnalysisResult.invalid on LLM failure)
764
+ analysis = await self._devil_strategy.analyze(consensus_ctx)
765
+
766
+ # Convert AnalysisResult to Vote
767
+ vote = self._analysis_to_vote(analysis)
768
+ return Result.ok(vote)
769
+
770
+ def _analysis_to_vote(self, analysis: AnalysisResult) -> Vote:
771
+ """Convert AnalysisResult to Vote for Devil's Advocate.
772
+
773
+ Maps ontological analysis result to consensus voting format:
774
+ - is_valid -> approved
775
+ - confidence -> confidence
776
+ - reasoning + suggestions -> reasoning
777
+
778
+ Args:
779
+ analysis: The ontological analysis result
780
+
781
+ Returns:
782
+ Vote with Devil's Advocate role
783
+ """
784
+ # Build reasoning text
785
+ if analysis.is_valid:
786
+ reasoning_text = (
787
+ analysis.reasoning[0]
788
+ if analysis.reasoning
789
+ else "Passed ontological analysis: addresses root cause"
790
+ )
791
+ else:
792
+ # Combine reasoning and suggestions for invalid case
793
+ parts = list(analysis.reasoning)
794
+ if analysis.suggestions:
795
+ parts.append("Suggestions: " + "; ".join(analysis.suggestions))
796
+ reasoning_text = "\n".join(parts) if parts else "Failed ontological analysis"
797
+
798
+ return Vote(
799
+ model=self._devil_strategy.model,
800
+ approved=analysis.is_valid,
801
+ confidence=analysis.confidence,
802
+ reasoning=reasoning_text,
803
+ role=VoterRole.DEVIL,
804
+ )
805
+
806
+ async def _get_judgment(
807
+ self,
808
+ context: EvaluationContext,
809
+ advocate_vote: Vote,
810
+ devil_vote: Vote,
811
+ ) -> Result[JudgmentResult, ProviderError | ValidationError]:
812
+ """Get final judgment from Judge.
813
+
814
+ Args:
815
+ context: Evaluation context
816
+ advocate_vote: The Advocate's position
817
+ devil_vote: The Devil's Advocate's position
818
+
819
+ Returns:
820
+ Result containing JudgmentResult or error
821
+ """
822
+ # Build prompt with both positions
823
+ user_prompt = f"""{build_consensus_prompt(context)}
824
+
825
+ ---
826
+
827
+ ## Round 1 Positions
828
+
829
+ ### ADVOCATE's Position
830
+ Approved: {advocate_vote.approved}
831
+ Confidence: {advocate_vote.confidence:.2f}
832
+ Reasoning: {advocate_vote.reasoning}
833
+
834
+ ### DEVIL'S ADVOCATE's Position (Ontological Analysis)
835
+ Approved: {devil_vote.approved}
836
+ Confidence: {devil_vote.confidence:.2f}
837
+ Reasoning: {devil_vote.reasoning}
838
+
839
+ ---
840
+
841
+ Based on both positions above, make your final judgment."""
842
+
843
+ messages = [
844
+ Message(role=MessageRole.SYSTEM, content=JUDGE_SYSTEM_PROMPT),
845
+ Message(role=MessageRole.USER, content=user_prompt),
846
+ ]
847
+
848
+ config = CompletionConfig(
849
+ model=self._config.judge_model,
850
+ temperature=self._config.temperature,
851
+ max_tokens=self._config.max_tokens,
852
+ )
853
+
854
+ llm_result = await self._llm.complete(messages, config)
855
+ if llm_result.is_err:
856
+ return Result.err(llm_result.error)
857
+
858
+ return _parse_judgment_response(
859
+ llm_result.value.content, self._config.judge_model
860
+ )
861
+
862
+
332
863
  async def run_consensus_evaluation(
333
864
  context: EvaluationContext,
334
865
  llm_adapter: LiteLLMAdapter,
@@ -348,3 +879,30 @@ async def run_consensus_evaluation(
348
879
  """
349
880
  evaluator = ConsensusEvaluator(llm_adapter, config)
350
881
  return await evaluator.evaluate(context, trigger_reason)
882
+
883
+
884
+ async def run_deliberative_evaluation(
885
+ context: EvaluationContext,
886
+ llm_adapter: LiteLLMAdapter,
887
+ trigger_reason: str = "manual",
888
+ config: DeliberativeConfig | None = None,
889
+ devil_strategy: DevilAdvocateStrategy | None = None,
890
+ ) -> Result[tuple[DeliberationResult, list[BaseEvent]], ProviderError | ValidationError]:
891
+ """Convenience function for running deliberative consensus.
892
+
893
+ Recommended for complex decisions where ensuring root cause
894
+ resolution is important. Uses AOP-based DevilAdvocateStrategy
895
+ for ontological analysis.
896
+
897
+ Args:
898
+ context: Evaluation context
899
+ llm_adapter: LLM adapter
900
+ trigger_reason: Why consensus was triggered
901
+ config: Optional configuration
902
+ devil_strategy: Optional custom strategy for Devil's Advocate
903
+
904
+ Returns:
905
+ Result with DeliberationResult and events
906
+ """
907
+ evaluator = DeliberativeConsensus(llm_adapter, config, devil_strategy)
908
+ return await evaluator.deliberate(context, trigger_reason)