PyPI - noesium - Versions diffs - 0.1.0__py3-none-any.whl - Mend

noesium 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

noesium/core/__init__.py +4 -0
noesium/core/agent/__init__.py +14 -0
noesium/core/agent/base.py +227 -0
noesium/core/consts.py +6 -0
noesium/core/goalith/conflict/conflict.py +104 -0
noesium/core/goalith/conflict/detector.py +53 -0
noesium/core/goalith/decomposer/__init__.py +6 -0
noesium/core/goalith/decomposer/base.py +46 -0
noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
noesium/core/goalith/decomposer/prompts.py +140 -0
noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
noesium/core/goalith/errors.py +22 -0
noesium/core/goalith/goalgraph/graph.py +526 -0
noesium/core/goalith/goalgraph/node.py +179 -0
noesium/core/goalith/replanner/base.py +31 -0
noesium/core/goalith/replanner/replanner.py +36 -0
noesium/core/goalith/service.py +26 -0
noesium/core/llm/__init__.py +154 -0
noesium/core/llm/base.py +152 -0
noesium/core/llm/litellm.py +528 -0
noesium/core/llm/llamacpp.py +487 -0
noesium/core/llm/message.py +184 -0
noesium/core/llm/ollama.py +459 -0
noesium/core/llm/openai.py +520 -0
noesium/core/llm/openrouter.py +89 -0
noesium/core/llm/prompt.py +551 -0
noesium/core/memory/__init__.py +11 -0
noesium/core/memory/base.py +464 -0
noesium/core/memory/memu/__init__.py +24 -0
noesium/core/memory/memu/config/__init__.py +26 -0
noesium/core/memory/memu/config/activity/config.py +46 -0
noesium/core/memory/memu/config/event/config.py +46 -0
noesium/core/memory/memu/config/markdown_config.py +241 -0
noesium/core/memory/memu/config/profile/config.py +48 -0
noesium/core/memory/memu/llm_adapter.py +129 -0
noesium/core/memory/memu/memory/__init__.py +31 -0
noesium/core/memory/memu/memory/actions/__init__.py +40 -0
noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
noesium/core/memory/memu/memory/actions/base_action.py +342 -0
noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
noesium/core/memory/memu/memory/embeddings.py +130 -0
noesium/core/memory/memu/memory/file_manager.py +306 -0
noesium/core/memory/memu/memory/memory_agent.py +578 -0
noesium/core/memory/memu/memory/recall_agent.py +376 -0
noesium/core/memory/memu/memory_store.py +628 -0
noesium/core/memory/models.py +149 -0
noesium/core/msgbus/__init__.py +12 -0
noesium/core/msgbus/base.py +395 -0
noesium/core/orchestrix/__init__.py +0 -0
noesium/core/py.typed +0 -0
noesium/core/routing/__init__.py +20 -0
noesium/core/routing/base.py +66 -0
noesium/core/routing/router.py +241 -0
noesium/core/routing/strategies/__init__.py +9 -0
noesium/core/routing/strategies/dynamic_complexity.py +361 -0
noesium/core/routing/strategies/self_assessment.py +147 -0
noesium/core/routing/types.py +38 -0
noesium/core/toolify/__init__.py +39 -0
noesium/core/toolify/base.py +360 -0
noesium/core/toolify/config.py +138 -0
noesium/core/toolify/mcp_integration.py +275 -0
noesium/core/toolify/registry.py +214 -0
noesium/core/toolify/toolkits/__init__.py +1 -0
noesium/core/tracing/__init__.py +37 -0
noesium/core/tracing/langgraph_hooks.py +308 -0
noesium/core/tracing/opik_tracing.py +144 -0
noesium/core/tracing/token_tracker.py +166 -0
noesium/core/utils/__init__.py +10 -0
noesium/core/utils/logging.py +172 -0
noesium/core/utils/statistics.py +12 -0
noesium/core/utils/typing.py +17 -0
noesium/core/vector_store/__init__.py +79 -0
noesium/core/vector_store/base.py +94 -0
noesium/core/vector_store/pgvector.py +304 -0
noesium/core/vector_store/weaviate.py +383 -0
noesium-0.1.0.dist-info/METADATA +525 -0
noesium-0.1.0.dist-info/RECORD +86 -0
noesium-0.1.0.dist-info/WHEEL +5 -0
noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
noesium-0.1.0.dist-info/top_level.txt +1 -0

noesium/core/routing/router.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""Main router class for LLM model selection."""
+from typing import Any, Dict, Optional, Type, Union
+from noesium.core.llm import get_llm_client
+from noesium.core.llm.base import BaseLLMClient
+from noesium.core.utils.logging import get_logger
+from .base import BaseRoutingStrategy
+from .strategies import DynamicComplexityStrategy, SelfAssessmentStrategy
+from .types import ModelTier, RoutingResult
+logger = get_logger(__name__)
+class ModelRouter:
+    """
+    Main router class for determining appropriate model tier for queries.
+    This router uses pluggable strategies to analyze query complexity
+    and recommend the most suitable model tier (lite/fast/power).
+    """
+    # Registry of available strategies
+    STRATEGIES = {
+        "self_assessment": SelfAssessmentStrategy,
+        "dynamic_complexity": DynamicComplexityStrategy,
+    }
+    def __init__(
+        self,
+        strategy: Union[str, BaseRoutingStrategy] = "dynamic_complexity",
+        lite_client: Optional[BaseLLMClient] = None,
+        lite_client_config: Optional[Dict[str, Any]] = None,
+        strategy_config: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Initialize the model router.
+        Args:
+            strategy: Routing strategy name or instance
+            lite_client: Pre-configured lite model client
+            lite_client_config: Configuration for lite client creation
+            strategy_config: Strategy-specific configuration
+        """
+        self.strategy_config = (strategy_config or {}).copy()  # Make a copy to avoid modifying original
+        # Setup lite client if needed
+        self.lite_client = self._setup_lite_client(lite_client, lite_client_config)
+        # Initialize routing strategy
+        self.strategy = self._setup_strategy(strategy)
+        logger.info(f"Initialized ModelRouter with strategy: {self.strategy.get_strategy_name()}")
+    def route(self, query: str) -> RoutingResult:
+        """
+        Route a query to the appropriate model tier.
+        Args:
+            query: Input query to analyze
+        Returns:
+            RoutingResult with tier recommendation and analysis
+        """
+        if not query or not query.strip():
+            # Handle empty queries
+            logger.warning("Empty query provided, defaulting to LITE tier")
+            from .types import ComplexityScore
+            return RoutingResult(
+                tier=ModelTier.LITE,
+                confidence=0.5,
+                complexity_score=ComplexityScore(total=0.0),
+                strategy=self.strategy.get_strategy_name(),
+                metadata={"empty_query": True},
+            )
+        try:
+            return self.strategy.route(query.strip())
+        except Exception as e:
+            logger.error(f"Error in routing: {e}")
+            # Fallback to FAST tier on error
+            from .types import ComplexityScore
+            return RoutingResult(
+                tier=ModelTier.FAST,
+                confidence=0.0,
+                complexity_score=ComplexityScore(total=0.5),
+                strategy=self.strategy.get_strategy_name(),
+                metadata={"error": str(e), "fallback": True},
+            )
+    def get_recommended_model_params(
+        self, routing_result: RoutingResult, model_configs: Optional[Dict[ModelTier, Dict[str, Any]]] = None
+    ) -> Dict[str, Any]:
+        """
+        Get recommended model parameters based on routing result.
+        Args:
+            routing_result: Result from route() call
+            model_configs: Optional mapping of tiers to model configurations
+        Returns:
+            Dictionary of model parameters for the recommended tier
+        """
+        tier = routing_result.tier
+        if model_configs and tier in model_configs:
+            return model_configs[tier].copy()
+        # Default configurations for each tier
+        default_configs = {
+            ModelTier.LITE: {
+                "provider": "llamacpp",  # Fast local inference
+                "temperature": 0.3,
+                "max_tokens": 512,
+            },
+            ModelTier.FAST: {
+                "provider": "openai",
+                "chat_model": "gpt-4o-mini",
+                "temperature": 0.7,
+                "max_tokens": 1024,
+            },
+            ModelTier.POWER: {
+                "provider": "openai",
+                "chat_model": "gpt-4o",
+                "temperature": 0.7,
+                "max_tokens": 2048,
+            },
+        }
+        return default_configs.get(tier, default_configs[ModelTier.FAST])
+    def route_and_configure(
+        self, query: str, model_configs: Optional[Dict[ModelTier, Dict[str, Any]]] = None
+    ) -> tuple[RoutingResult, Dict[str, Any]]:
+        """
+        Route query and return both result and recommended model configuration.
+        Args:
+            query: Input query to analyze
+            model_configs: Optional mapping of tiers to model configurations
+        Returns:
+            Tuple of (RoutingResult, model_config_dict)
+        """
+        routing_result = self.route(query)
+        model_config = self.get_recommended_model_params(routing_result, model_configs)
+        return routing_result, model_config
+    def update_strategy_config(self, config: Dict[str, Any]) -> None:
+        """
+        Update strategy configuration.
+        Args:
+            config: New configuration parameters
+        """
+        self.strategy_config.update(config)
+        # Reinitialize strategy with new config
+        strategy_name = self.strategy.get_strategy_name()
+        self.strategy = self._setup_strategy(strategy_name)
+        logger.info(f"Updated strategy configuration for {strategy_name}")
+    def get_strategy_info(self) -> Dict[str, Any]:
+        """
+        Get information about the current routing strategy.
+        Returns:
+            Dictionary with strategy information
+        """
+        return {
+            "name": self.strategy.get_strategy_name(),
+            "config": self.strategy_config.copy(),
+            "requires_lite_client": self.lite_client is not None,
+        }
+    @classmethod
+    def get_available_strategies(cls) -> list[str]:
+        """Get list of available strategy names."""
+        return list(cls.STRATEGIES.keys())
+    @classmethod
+    def register_strategy(cls, name: str, strategy_class: Type[BaseRoutingStrategy]) -> None:
+        """
+        Register a new routing strategy.
+        Args:
+            name: Strategy name
+            strategy_class: Strategy class
+        """
+        if not issubclass(strategy_class, BaseRoutingStrategy):
+            raise ValueError("Strategy class must inherit from BaseRoutingStrategy")
+        cls.STRATEGIES[name] = strategy_class
+        logger.info(f"Registered new routing strategy: {name}")
+    def _setup_lite_client(
+        self, lite_client: Optional[BaseLLMClient], lite_client_config: Optional[Dict[str, Any]]
+    ) -> Optional[BaseLLMClient]:
+        """Setup lite model client if needed."""
+        if lite_client:
+            return lite_client
+        if lite_client_config:
+            try:
+                return get_llm_client(**lite_client_config)
+            except Exception as e:
+                logger.warning(f"Failed to create lite client: {e}")
+                return None
+        # Try to create a default lite client
+        try:
+            # Try llamacpp first (fastest for lite operations)
+            return get_llm_client(provider="llamacpp")
+        except Exception:
+            try:
+                # Fallback to OpenAI with a fast model
+                return get_llm_client(provider="openai", chat_model="gpt-4o-mini")
+            except Exception as e:
+                logger.warning(f"Could not create default lite client: {e}")
+                return None
+    def _setup_strategy(self, strategy: Union[str, BaseRoutingStrategy]) -> BaseRoutingStrategy:
+        """Setup the routing strategy."""
+        if isinstance(strategy, BaseRoutingStrategy):
+            return strategy
+        if isinstance(strategy, str):
+            if strategy not in self.STRATEGIES:
+                raise ValueError(
+                    f"Unknown strategy: {strategy}. " f"Available strategies: {list(self.STRATEGIES.keys())}"
+                )
+            strategy_class = self.STRATEGIES[strategy]
+            return strategy_class(lite_client=self.lite_client, config=self.strategy_config)
+        raise ValueError(f"Invalid strategy type: {type(strategy)}")

noesium/core/routing/strategies/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Routing strategies module."""
+from .dynamic_complexity import DynamicComplexityStrategy
+from .self_assessment import SelfAssessmentStrategy
+__all__ = [
+    "SelfAssessmentStrategy",
+    "DynamicComplexityStrategy",
+]

noesium/core/routing/strategies/dynamic_complexity.py ADDED Viewed

@@ -0,0 +1,361 @@
+"""Dynamic complexity routing strategy implementation."""
+import math
+import re
+from noesium.core.routing.base import BaseRoutingStrategy
+from noesium.core.routing.types import ComplexityScore, ModelTier, RoutingResult
+from noesium.core.utils.logging import get_logger
+logger = get_logger(__name__)
+class DynamicComplexityStrategy(BaseRoutingStrategy):
+    """
+    Routing strategy based on dynamic complexity index calculation.
+    This strategy computes a complexity score from multiple signals:
+    - Linguistic complexity (sentence structure, vocabulary)
+    - Reasoning depth (assessed by lite model)
+    - Knowledge uncertainty (perplexity/confidence analysis)
+    """
+    def __init__(self, lite_client=None, config=None):
+        """
+        Initialize the dynamic complexity strategy.
+        Args:
+            lite_client: LLM client for the lite model
+            config: Configuration dict with optional parameters:
+                - alpha: Weight for linguistic score (default: 0.4)
+                - beta: Weight for reasoning score (default: 0.4)
+                - gamma: Weight for uncertainty score (default: 0.2)
+                - lite_threshold: Max score for lite routing (default: 0.3)
+                - fast_threshold: Max score for fast routing (default: 0.65)
+                - temperature: Temperature for reasoning assessment (default: 0.1)
+                - reasoning_max_tokens: Max tokens for reasoning assessment (default: 3)
+                - uncertainty_max_tokens: Max tokens for uncertainty analysis (default: 64)
+        """
+        super().__init__(lite_client, config)
+        # Weighting parameters
+        self.alpha = self.config.get("alpha", 0.4)  # Linguistic weight
+        self.beta = self.config.get("beta", 0.4)  # Reasoning weight
+        self.gamma = self.config.get("gamma", 0.2)  # Uncertainty weight
+        # Threshold parameters
+        self.lite_threshold = self.config.get("lite_threshold", 0.3)
+        self.fast_threshold = self.config.get("fast_threshold", 0.65)
+        # LLM parameters
+        self.temperature = self.config.get("temperature", 0.1)
+        self.reasoning_max_tokens = self.config.get("reasoning_max_tokens", 3)
+        self.uncertainty_max_tokens = self.config.get("uncertainty_max_tokens", 64)
+        # Validate weights sum to 1.0
+        total_weight = self.alpha + self.beta + self.gamma
+        if abs(total_weight - 1.0) > 0.01:
+            logger.warning(f"Weights don't sum to 1.0 (sum={total_weight}). Normalizing...")
+            self.alpha /= total_weight
+            self.beta /= total_weight
+            self.gamma /= total_weight
+    def route(self, query: str) -> RoutingResult:
+        """
+        Route query based on dynamic complexity index.
+        Args:
+            query: Input query to assess
+        Returns:
+            RoutingResult with tier recommendation and detailed analysis
+        """
+        try:
+            # Calculate individual complexity components
+            linguistic_score = self._calculate_linguistic_score(query)
+            reasoning_score = self._calculate_reasoning_score(query)
+            uncertainty_score = self._calculate_uncertainty_score(query)
+            # Compute weighted complexity index
+            complexity_index = (
+                self.alpha * linguistic_score + self.beta * reasoning_score + self.gamma * uncertainty_score
+            )
+            # Determine tier based on complexity index
+            tier = self._index_to_tier(complexity_index)
+            # Calculate confidence based on component consistency
+            confidence = self._calculate_confidence(linguistic_score, reasoning_score, uncertainty_score)
+            # Create detailed complexity score
+            complexity_score_obj = ComplexityScore(
+                total=complexity_index,
+                linguistic=linguistic_score,
+                reasoning=reasoning_score,
+                uncertainty=uncertainty_score,
+                metadata={
+                    "weights": {"alpha": self.alpha, "beta": self.beta, "gamma": self.gamma},
+                    "components": {
+                        "linguistic": linguistic_score,
+                        "reasoning": reasoning_score,
+                        "uncertainty": uncertainty_score,
+                    },
+                },
+            )
+            return self._create_result(
+                tier=tier,
+                confidence=confidence,
+                complexity_score=complexity_score_obj,
+                metadata={"thresholds": {"lite": self.lite_threshold, "fast": self.fast_threshold}},
+            )
+        except Exception as e:
+            logger.error(f"Error in dynamic complexity routing: {e}")
+            # Fallback to fast tier on error
+            return self._create_result(
+                tier=ModelTier.FAST,
+                confidence=0.0,
+                complexity_score=ComplexityScore(total=0.5),
+                metadata={"error": str(e), "fallback": True},
+            )
+    def get_strategy_name(self) -> str:
+        """Return the strategy name."""
+        return "dynamic_complexity"
+    def _calculate_linguistic_score(self, query: str) -> float:
+        """
+        Calculate linguistic complexity based on sentence structure and vocabulary.
+        Args:
+            query: Input query
+        Returns:
+            Linguistic complexity score (0.0-1.0)
+        """
+        try:
+            # Handle empty query case
+            if not query.strip():
+                return 0.0
+            # Count tokens (approximate)
+            tokens = re.findall(r"\w+", query)
+            token_count = len(tokens)
+            # Count structural complexity indicators
+            clauses = (
+                query.count(",")
+                + query.count(";")
+                + query.count(" and ")
+                + query.count(" or ")
+                + query.count(" but ")
+                + query.count(" because ")
+                + query.count(" if ")
+                + query.count(" when ")
+                + query.count(" while ")
+            )
+            # Count sentences
+            sentences = len(re.split(r"[.!?]+", query.strip()))
+            # Calculate complexity factors
+            token_factor = min(1.0, token_count / 50.0)  # Normalize around 50 tokens
+            clause_factor = min(1.0, clauses / 5.0)  # Normalize around 5 clauses
+            sentence_factor = min(1.0, sentences / 3.0)  # Normalize around 3 sentences
+            # Count complex words (>6 characters as simple heuristic)
+            complex_words = sum(1 for token in tokens if len(token) > 6)
+            vocab_factor = min(1.0, complex_words / max(1, token_count) * 2)
+            # Weighted combination
+            linguistic_score = 0.3 * token_factor + 0.3 * clause_factor + 0.2 * sentence_factor + 0.2 * vocab_factor
+            return min(1.0, max(0.0, linguistic_score))
+        except Exception as e:
+            logger.warning(f"Error calculating linguistic score: {e}")
+            return 0.5
+    def _calculate_reasoning_score(self, query: str) -> float:
+        """
+        Calculate reasoning depth using lite model assessment.
+        Args:
+            query: Input query
+        Returns:
+            Reasoning complexity score (0.0-1.0)
+        """
+        if not self.lite_client:
+            # Fallback: simple keyword-based reasoning detection
+            return self._fallback_reasoning_score(query)
+        try:
+            prompt = f"""Classify reasoning depth of request:
+- 0 = factual recall
+- 1 = some reasoning/planning
+- 2 = multi-step or abstract reasoning
+Request: "{query}"
+Output: number only"""
+            messages = [{"role": "user", "content": prompt}]
+            response = self.lite_client.completion(
+                messages=messages, temperature=self.temperature, max_tokens=self.reasoning_max_tokens
+            )
+            # Parse response
+            response_clean = response.strip()
+            for char in response_clean:
+                if char.isdigit():
+                    score = int(char)
+                    if 0 <= score <= 2:
+                        return score / 2.0  # Normalize to 0-1 range
+            # Fallback if parsing fails
+            logger.warning(f"Could not parse reasoning response: {response}")
+            return self._fallback_reasoning_score(query)
+        except Exception as e:
+            logger.warning(f"Error calculating reasoning score with LLM: {e}")
+            return self._fallback_reasoning_score(query)
+    def _fallback_reasoning_score(self, query: str) -> float:
+        """Fallback reasoning score based on keywords."""
+        reasoning_keywords = [
+            "analyze",
+            "compare",
+            "evaluate",
+            "explain",
+            "why",
+            "how",
+            "cause",
+            "effect",
+            "relationship",
+            "implication",
+            "conclusion",
+            "strategy",
+            "plan",
+            "design",
+            "create",
+            "develop",
+            "solve",
+        ]
+        query_lower = query.lower()
+        keyword_count = sum(1 for keyword in reasoning_keywords if keyword in query_lower)
+        return min(1.0, keyword_count / 3.0)  # Normalize around 3 keywords
+    def _calculate_uncertainty_score(self, query: str) -> float:
+        """
+        Calculate knowledge uncertainty using perplexity analysis.
+        Args:
+            query: Input query
+        Returns:
+            Uncertainty score (0.0-1.0)
+        """
+        if not self.lite_client:
+            # Fallback: domain-based uncertainty estimation
+            return self._fallback_uncertainty_score(query)
+        try:
+            # Check if we can get logprobs (depends on the LLM client implementation)
+            messages = [{"role": "user", "content": query}]
+            # Try to get response with some generation to assess uncertainty
+            response = self.lite_client.completion(
+                messages=messages,
+                temperature=0.1,  # Low temperature for consistency
+                max_tokens=self.uncertainty_max_tokens,
+            )
+            # For now, use response length and coherence as uncertainty proxy
+            # A very short or very long response might indicate uncertainty
+            response_tokens = len(response.split())
+            if response_tokens < 5:  # Very short response
+                uncertainty = 0.7
+            elif response_tokens > 40:  # Very long response
+                uncertainty = 0.6
+            else:
+                uncertainty = 0.3  # Normal length suggests confidence
+            # Adjust based on hedging words
+            hedging_words = ["maybe", "perhaps", "possibly", "might", "could", "uncertain"]
+            hedging_count = sum(1 for word in hedging_words if word in response.lower())
+            uncertainty += min(0.3, hedging_count * 0.1)
+            return min(1.0, max(0.0, uncertainty))
+        except Exception as e:
+            logger.warning(f"Error calculating uncertainty score with LLM: {e}")
+            return self._fallback_uncertainty_score(query)
+    def _fallback_uncertainty_score(self, query: str) -> float:
+        """Fallback uncertainty score based on domain heuristics."""
+        # Questions tend to have higher uncertainty
+        question_count = query.count("?")
+        # Specific vs general queries
+        specific_indicators = ["specific", "exact", "precise", "particular"]
+        general_indicators = ["general", "overview", "broad", "overall"]
+        query_lower = query.lower()
+        specific_score = sum(1 for word in specific_indicators if word in query_lower)
+        general_score = sum(1 for word in general_indicators if word in query_lower)
+        base_uncertainty = 0.4
+        uncertainty_adjustment = question_count * 0.1 + general_score * 0.1 - specific_score * 0.1
+        return min(1.0, max(0.0, base_uncertainty + uncertainty_adjustment))
+    def _index_to_tier(self, complexity_index: float) -> ModelTier:
+        """
+        Convert complexity index to model tier.
+        Args:
+            complexity_index: Overall complexity score (0.0-1.0)
+        Returns:
+            Appropriate ModelTier
+        """
+        if complexity_index < self.lite_threshold:
+            return ModelTier.LITE
+        elif complexity_index < self.fast_threshold:
+            return ModelTier.FAST
+        else:
+            return ModelTier.POWER
+    def _calculate_confidence(self, linguistic: float, reasoning: float, uncertainty: float) -> float:
+        """
+        Calculate confidence based on consistency of component scores.
+        Args:
+            linguistic: Linguistic complexity score
+            reasoning: Reasoning complexity score
+            uncertainty: Uncertainty score
+        Returns:
+            Confidence score (0.0-1.0)
+        """
+        scores = [linguistic, reasoning, uncertainty]
+        # Calculate standard deviation as measure of consistency
+        mean_score = sum(scores) / len(scores)
+        variance = sum((score - mean_score) ** 2 for score in scores) / len(scores)
+        std_dev = math.sqrt(variance)
+        # Higher consistency (lower std_dev) = higher confidence
+        # Max std_dev is ~0.58 (when scores are maximally spread, e.g., 0, 1, 0.5)
+        # Make it more sensitive to inconsistency
+        consistency = 1.0 - min(1.0, std_dev * 2.5)
+        # Base confidence adjusted by consistency
+        base_confidence = 0.6
+        confidence = base_confidence + (consistency * 0.4)
+        return min(1.0, max(0.0, confidence))