PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/advanced_search_system/strategies/evidence_based_strategy_v2.py ADDED Viewed

@@ -0,0 +1,1337 @@
+"""
+Enhanced evidence-based search strategy for complex query resolution.
+This strategy addresses common issues with candidate discovery and evidence gathering:
+1. Multi-stage candidate discovery with fallback mechanisms
+2. Adaptive query generation based on past performance
+3. Cross-constraint search capabilities
+4. Source diversity management
+"""
+import math
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Set, Tuple
+from langchain_core.language_models import BaseChatModel
+from ...utilities.search_utilities import remove_think_tags
+from ..candidates.base_candidate import Candidate
+from ..constraints.base_constraint import Constraint, ConstraintType
+from ..evidence.base_evidence import Evidence
+from .evidence_based_strategy import EvidenceBasedStrategy
+@dataclass
+class QueryPattern:
+    """Pattern for generating search queries."""
+    pattern: str
+    success_rate: float = 0.0
+    usage_count: int = 0
+    constraint_types: List[ConstraintType] = field(default_factory=list)
+@dataclass
+class SourceProfile:
+    """Profile for tracking source effectiveness."""
+    source_name: str
+    success_rate: float = 0.0
+    usage_count: int = 0
+    specialties: List[str] = field(default_factory=list)
+    last_used: Optional[datetime] = None
+class EnhancedEvidenceBasedStrategy(EvidenceBasedStrategy):
+    """
+    Enhanced evidence-based strategy with improved candidate discovery.
+    Key improvements:
+    1. Multi-stage candidate discovery
+    2. Adaptive query patterns
+    3. Cross-constraint capabilities
+    4. Source diversity tracking
+    """
+    def __init__(
+        self,
+        model: BaseChatModel,
+        search: Any,
+        all_links_of_system: List[str],
+        max_iterations: int = 20,
+        confidence_threshold: float = 0.85,
+        candidate_limit: int = 20,  # Increased for better candidate variety
+        evidence_threshold: float = 0.6,
+        max_search_iterations: int = 2,
+        questions_per_iteration: int = 3,
+        min_candidates_threshold: int = 10,  # Increased to ensure we have enough candidates
+        enable_pattern_learning: bool = True,
+    ):
+        """Initialize the enhanced evidence-based strategy."""
+        # Call parent initializer with required arguments
+        super().__init__(
+            model=model,
+            search=search,
+            all_links_of_system=all_links_of_system,
+            max_iterations=max_iterations,
+            confidence_threshold=confidence_threshold,
+            candidate_limit=candidate_limit,
+            evidence_threshold=evidence_threshold,
+            max_search_iterations=max_search_iterations,
+            questions_per_iteration=questions_per_iteration,
+        )
+        # Enhanced-specific attributes
+        self.min_candidates_threshold = min_candidates_threshold
+        self.enable_pattern_learning = enable_pattern_learning
+        # Pattern learning
+        self.query_patterns: Dict[str, QueryPattern] = (
+            self._initialize_patterns()
+        )
+        self.source_profiles: Dict[str, SourceProfile] = (
+            self._initialize_sources()
+        )
+        # Failure tracking
+        self.failed_queries: Set[str] = set()
+        self.constraint_relationships: Dict[str, List[str]] = {}
+    def _initialize_patterns(self) -> Dict[str, QueryPattern]:
+        """Initialize query patterns for different constraint types."""
+        patterns = {
+            "property_basic": QueryPattern(
+                pattern="{value} {constraint_type}",
+                constraint_types=[ConstraintType.PROPERTY],
+            ),
+            "property_character": QueryPattern(
+                pattern="character with {value}",
+                constraint_types=[ConstraintType.PROPERTY],
+            ),
+            "event_show": QueryPattern(
+                pattern="TV show {value}",
+                constraint_types=[ConstraintType.EVENT],
+            ),
+            "statistic_episodes": QueryPattern(
+                pattern="{value} episodes series",
+                constraint_types=[ConstraintType.STATISTIC],
+            ),
+            "cross_constraint": QueryPattern(
+                pattern="{value1} AND {value2}", constraint_types=[]
+            ),
+            "semantic_expansion": QueryPattern(
+                pattern='"{value}" OR "{synonym1}" OR "{synonym2}"',
+                constraint_types=[],
+            ),
+        }
+        return patterns
+    def _initialize_sources(self) -> Dict[str, SourceProfile]:
+        """Initialize source profiles for diversity tracking."""
+        sources = {
+            "wikipedia": SourceProfile(
+                source_name="wikipedia",
+                specialties=["characters", "properties", "general"],
+            ),
+            "imdb": SourceProfile(
+                source_name="imdb",
+                specialties=["tv_shows", "episodes", "statistics"],
+            ),
+            "fandom": SourceProfile(
+                source_name="fandom",
+                specialties=["characters", "properties", "backstory"],
+            ),
+            "tv_databases": SourceProfile(
+                source_name="tv_databases",
+                specialties=["shows", "episodes", "statistics"],
+            ),
+            "web": SourceProfile(source_name="web", specialties=["general"]),
+        }
+        return sources
+    def analyze_topic(self, query: str) -> Dict:
+        """Analyze a topic using enhanced evidence-based approach."""
+        # Initialize
+        self.all_links_of_system.clear()
+        self.questions_by_iteration = []
+        self.findings = []
+        self.iteration = 0
+        if self.progress_callback:
+            self.progress_callback(
+                "Enhanced Evidence-Based Strategy initialized - beginning analysis",
+                1,
+                {
+                    "phase": "initialization",
+                    "strategy": "enhanced_evidence_based",
+                },
+            )
+        # Extract constraints
+        if self.progress_callback:
+            self.progress_callback(
+                "Extracting verifiable constraints from query...",
+                3,
+                {"phase": "constraint_extraction", "query_length": len(query)},
+            )
+        self.constraints = self.constraint_analyzer.extract_constraints(query)
+        if self.progress_callback:
+            self.progress_callback(
+                f"Found {len(self.constraints)} constraints - analyzing relationships",
+                5,
+                {
+                    "phase": "constraint_analysis",
+                    "constraint_count": len(self.constraints),
+                    "constraint_types": {
+                        c.type.name: c.weight for c in self.constraints[:5]
+                    },
+                },
+            )
+        # Analyze constraint relationships
+        self._analyze_constraint_relationships()
+        # Find initial candidates with enhanced discovery
+        if self.progress_callback:
+            self.progress_callback(
+                "Starting enhanced candidate discovery (multi-stage)",
+                8,
+                {"phase": "candidate_discovery_start", "stages": 5},
+            )
+        self._enhanced_candidate_discovery()
+        # Main evidence-gathering loop
+        while (
+            self.iteration < self.max_iterations
+            and not self._has_sufficient_answer()
+        ):
+            self.iteration += 1
+            # Progress for iteration
+            if self.progress_callback:
+                base_progress = 40
+                iteration_progress = base_progress + int(
+                    (self.iteration / self.max_iterations) * 50
+                )
+                self.progress_callback(
+                    f"Iteration {self.iteration}/{self.max_iterations} - gathering evidence",
+                    iteration_progress,
+                    {
+                        "phase": "iteration_start",
+                        "iteration": self.iteration,
+                        "max_iterations": self.max_iterations,
+                        "candidates_count": len(self.candidates),
+                        "top_candidate": (
+                            self.candidates[0].name
+                            if self.candidates
+                            else "None"
+                        ),
+                    },
+                )
+            # Adaptive evidence gathering
+            self._adaptive_evidence_gathering()
+            # Score and prune with pattern learning
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Scoring {len(self.candidates)} candidates based on evidence",
+                    None,
+                    {
+                        "phase": "scoring_start",
+                        "candidate_count": len(self.candidates),
+                    },
+                )
+            self._score_and_prune_adaptive()
+            # Add iteration finding
+            iteration_finding = {
+                "phase": f"Iteration {self.iteration}",
+                "content": self._format_iteration_summary(),
+                "timestamp": self._get_timestamp(),
+                "metadata": {
+                    "candidates": len(self.candidates),
+                    "patterns_used": len(self.query_patterns),
+                    "source_diversity": self._calculate_source_diversity(),
+                },
+            }
+            self.findings.append(iteration_finding)
+            # Adaptive candidate discovery if needed
+            if len(self.candidates) < self.min_candidates_threshold:
+                if self.progress_callback:
+                    self.progress_callback(
+                        f"Too few candidates ({len(self.candidates)}) - searching for more",
+                        None,
+                        {
+                            "phase": "adaptive_discovery",
+                            "current_candidates": len(self.candidates),
+                            "threshold": self.min_candidates_threshold,
+                        },
+                    )
+                self._adaptive_candidate_discovery()
+        # Final verification with source diversity
+        if self.progress_callback:
+            self.progress_callback(
+                "Starting final verification of top candidates",
+                90,
+                {
+                    "phase": "final_verification_start",
+                    "top_candidates": [c.name for c in self.candidates[:3]],
+                },
+            )
+        self._enhanced_final_verification()
+        # Generate final answer
+        if self.progress_callback:
+            self.progress_callback(
+                "Synthesizing final answer based on evidence",
+                95,
+                {
+                    "phase": "synthesis_start",
+                    "candidates_evaluated": len(self.candidates),
+                    "evidence_pieces": sum(
+                        len(c.evidence) for c in self.candidates
+                    ),
+                },
+            )
+        result = self._synthesize_final_answer(query)
+        if self.progress_callback:
+            top_candidate = self.candidates[0] if self.candidates else None
+            self.progress_callback(
+                f"Analysis complete - {top_candidate.name if top_candidate else 'No answer found'}",
+                100,
+                {
+                    "phase": "complete",
+                    "final_answer": (
+                        top_candidate.name
+                        if top_candidate
+                        else "Unable to determine"
+                    ),
+                    "confidence": top_candidate.score if top_candidate else 0,
+                    "candidates_evaluated": len(self.candidates),
+                    "evidence_pieces": sum(
+                        len(c.evidence) for c in self.candidates
+                    ),
+                    "iterations_used": self.iteration,
+                },
+            )
+        return result
+    def _enhanced_candidate_discovery(self):
+        """Enhanced multi-stage candidate discovery."""
+        all_candidates = []
+        discovery_stages = [
+            ("broad", "Broad discovery", self._broad_discovery_search),
+            ("focused", "Focused constraints", self._focused_constraint_search),
+            ("cross", "Cross-constraint", self._cross_constraint_search),
+            ("semantic", "Semantic expansion", self._semantic_expansion_search),
+            ("temporal", "Temporal search", self._temporal_search),
+            (
+                "character_db",
+                "Character databases",
+                self._character_database_search,
+            ),
+            ("fallback", "Fallback search", self._fallback_search),
+        ]
+        stage_progress_base = 10
+        stage_progress_increment = 5
+        for i, (stage_name, stage_desc, stage_func) in enumerate(
+            discovery_stages
+        ):
+            if self.progress_callback:
+                progress = stage_progress_base + (i * stage_progress_increment)
+                self.progress_callback(
+                    f"{stage_desc} search - looking for candidates [{i + 1}/5]",
+                    progress,
+                    {
+                        "phase": f"discovery_{stage_name}",
+                        "stage": i + 1,
+                        "total_stages": 5,
+                        "candidates_found": len(all_candidates),
+                        "stage_description": stage_desc,
+                    },
+                )
+            stage_candidates = stage_func()
+            new_candidates = len(stage_candidates)
+            all_candidates.extend(stage_candidates)
+            if self.progress_callback and new_candidates > 0:
+                self.progress_callback(
+                    f"Found {new_candidates} candidates in {stage_desc} stage",
+                    None,
+                    {
+                        "phase": f"discovery_{stage_name}_results",
+                        "new_candidates": new_candidates,
+                        "total_candidates": len(all_candidates),
+                    },
+                )
+            # Early exit if we have enough candidates
+            if len(all_candidates) >= self.min_candidates_threshold * 2:
+                if self.progress_callback:
+                    self.progress_callback(
+                        f"Found sufficient candidates ({len(all_candidates)}) - ending discovery",
+                        30,
+                        {
+                            "phase": "discovery_complete",
+                            "total_candidates": len(all_candidates),
+                        },
+                    )
+                break
+        # Deduplicate and add candidates
+        self._add_unique_candidates(all_candidates)
+        # If still not enough candidates, try more aggressive discovery
+        if (
+            len(self.candidates) < self.min_candidates_threshold
+            and all_candidates
+        ):
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Only {len(self.candidates)} candidates - trying more aggressive search",
+                    None,
+                    {"phase": "aggressive_discovery"},
+                )
+            self._aggressive_supplemental_search()
+        if self.progress_callback:
+            self.progress_callback(
+                f"Candidate discovery complete - {len(self.candidates)} unique candidates found",
+                35,
+                {
+                    "phase": "discovery_complete",
+                    "unique_candidates": len(self.candidates),
+                    "candidate_names": [c.name for c in self.candidates[:10]],
+                },
+            )
+    def _broad_discovery_search(self) -> List[Candidate]:
+        """Initial broad search to discover potential candidates."""
+        candidates = []
+        # Extended patterns for broader discovery
+        extended_patterns = [
+            "{value}",
+            "character {value}",
+            "TV character {value}",
+            "fictional character {value}",
+            "{value} television",
+            "{value} TV show",
+            "{value} series",
+        ]
+        # Apply patterns to constraints
+        for constraint in self.constraints[:4]:  # Increased from 3
+            for pattern_template in extended_patterns:
+                query = pattern_template.replace("{value}", constraint.value)
+                if query not in self.failed_queries and len(query) > 3:
+                    if self.progress_callback:
+                        self.progress_callback(
+                            f"Broad search: {query[:50]}...",
+                            None,
+                            {
+                                "phase": "broad_search_query",
+                                "query": query,
+                                "constraint_type": constraint.type.value,
+                            },
+                        )
+                    results = self._execute_adaptive_search(query)
+                    extracted = (
+                        self._extract_candidates_with_enhanced_validation(
+                            results, query
+                        )
+                    )
+                    candidates.extend(extracted)
+                    if not extracted:
+                        self.failed_queries.add(query)
+        return candidates
+    def _focused_constraint_search(self) -> List[Candidate]:
+        """Focused search using specific constraint values."""
+        candidates = []
+        # Get high-weight constraints
+        high_weight_constraints = sorted(
+            self.constraints, key=lambda c: c.weight, reverse=True
+        )[:5]
+        for i, constraint in enumerate(high_weight_constraints):
+            # Generate adaptive queries based on past performance
+            queries = self._generate_adaptive_queries(constraint)
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Focused search: {constraint.description[:40]}... [{i + 1}/{len(high_weight_constraints)}]",
+                    None,
+                    {
+                        "phase": "focused_constraint",
+                        "constraint": constraint.description,
+                        "constraint_weight": constraint.weight,
+                        "constraint_rank": i + 1,
+                    },
+                )
+            for query in queries[:3]:  # Limit queries per constraint
+                if query not in self.failed_queries:
+                    results = self._execute_adaptive_search(query)
+                    extracted = self._extract_candidates_from_results(
+                        results, query
+                    )
+                    if extracted:
+                        candidates.extend(extracted)
+                        # Learn from successful query
+                        self._learn_query_pattern(
+                            query, constraint, success=True
+                        )
+                    else:
+                        self.failed_queries.add(query)
+        return candidates
+    def _cross_constraint_search(self) -> List[Candidate]:
+        """Search using combinations of constraints."""
+        candidates = []
+        # Find related constraint pairs
+        constraint_pairs = self._find_related_constraint_pairs()
+        if self.progress_callback and constraint_pairs:
+            self.progress_callback(
+                f"Cross-constraint search - {len(constraint_pairs)} pairs identified",
+                None,
+                {
+                    "phase": "cross_constraint_start",
+                    "pair_count": len(constraint_pairs),
+                },
+            )
+        for i, (constraint1, constraint2) in enumerate(
+            constraint_pairs[:5]
+        ):  # Limit combinations
+            query = self._build_cross_constraint_query(constraint1, constraint2)
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Combining: {constraint1.type.value} + {constraint2.type.value}",
+                    None,
+                    {
+                        "phase": "cross_constraint_pair",
+                        "pair": i + 1,
+                        "constraint1": constraint1.description[:30],
+                        "constraint2": constraint2.description[:30],
+                    },
+                )
+            if query not in self.failed_queries:
+                results = self._execute_adaptive_search(query)
+                extracted = self._extract_candidates_from_results(
+                    results, query
+                )
+                if extracted:
+                    candidates.extend(extracted)
+                    # Update constraint relationships
+                    self._update_constraint_relationships(
+                        constraint1, constraint2, True
+                    )
+                else:
+                    self.failed_queries.add(query)
+        return candidates
+    def _semantic_expansion_search(self) -> List[Candidate]:
+        """Search using semantic variations of constraints."""
+        candidates = []
+        # Use general semantic variations without specific terms
+        for constraint in self.constraints[:4]:
+            variations = self._generate_semantic_variations(constraint)
+            for variation in variations[:3]:  # Limit variations
+                if variation not in self.failed_queries:
+                    results = self._execute_adaptive_search(variation)
+                    extracted = (
+                        self._extract_candidates_with_enhanced_validation(
+                            results, variation
+                        )
+                    )
+                    if extracted:
+                        candidates.extend(extracted)
+                    else:
+                        self.failed_queries.add(variation)
+        return candidates
+    def _fallback_search(self) -> List[Candidate]:
+        """Fallback search with relaxed constraints."""
+        candidates = []
+        # Relax constraints by combining fewer requirements
+        relaxed_queries = self._generate_relaxed_queries()
+        for query in relaxed_queries[:3]:
+            if query not in self.failed_queries:
+                results = self._execute_adaptive_search(query)
+                extracted = self._extract_candidates_from_results(
+                    results, query
+                )
+                candidates.extend(extracted)
+        return candidates
+    def _adaptive_evidence_gathering(self):
+        """Enhanced evidence gathering with source diversity."""
+        evidence_gathered = 0
+        source_usage = {}
+        total_to_gather = (
+            min(5, len(self.candidates)) * 2
+        )  # Max 2 constraints per candidate
+        if self.progress_callback:
+            self.progress_callback(
+                f"Gathering evidence for top {min(5, len(self.candidates))} candidates",
+                None,
+                {
+                    "phase": "evidence_gathering_start",
+                    "candidates_to_process": min(5, len(self.candidates)),
+                    "total_evidence_needed": total_to_gather,
+                },
+            )
+        for i, candidate in enumerate(self.candidates[:5]):
+            unverified = candidate.get_unverified_constraints(self.constraints)
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Processing {candidate.name} [{i + 1}/{min(5, len(self.candidates))}]",
+                    None,
+                    {
+                        "phase": "candidate_evidence",
+                        "candidate": candidate.name,
+                        "candidate_rank": i + 1,
+                        "unverified_constraints": len(unverified),
+                    },
+                )
+            for j, constraint in enumerate(
+                unverified[:2]
+            ):  # Limit per candidate
+                # Select diverse source
+                source = self._select_diverse_source(
+                    source_usage, constraint.type
+                )
+                # Generate evidence query
+                query = self._generate_evidence_query(candidate, constraint)
+                if self.progress_callback:
+                    self.progress_callback(
+                        f"Searching {source} for: {query[:40]}...",
+                        None,
+                        {
+                            "phase": "evidence_search",
+                            "candidate": candidate.name,
+                            "constraint": constraint.description[:50],
+                            "source": source,
+                            "query": query,
+                        },
+                    )
+                # Execute search with selected source
+                results = self._execute_search_with_source(query, source)
+                # Extract and evaluate evidence
+                evidence = self.evidence_evaluator.extract_evidence(
+                    results.get("current_knowledge", ""),
+                    candidate.name,
+                    constraint,
+                )
+                candidate.add_evidence(constraint.id, evidence)
+                evidence_gathered += 1
+                if self.progress_callback:
+                    self.progress_callback(
+                        f"Evidence found: {evidence.confidence:.0%} confidence",
+                        None,
+                        {
+                            "phase": "evidence_result",
+                            "candidate": candidate.name,
+                            "constraint": constraint.description[:50],
+                            "confidence": evidence.confidence,
+                            "evidence_type": evidence.type.value,
+                            "progress": f"{evidence_gathered}/{total_to_gather}",
+                        },
+                    )
+                # Update source profile
+                self._update_source_profile(source, evidence.confidence)
+        if self.progress_callback:
+            self.progress_callback(
+                f"Evidence gathering complete - {evidence_gathered} pieces collected",
+                None,
+                {
+                    "phase": "evidence_complete",
+                    "evidence_count": evidence_gathered,
+                    "source_diversity": self._calculate_source_diversity(),
+                    "sources_used": len(
+                        [s for s in source_usage.values() if s > 0]
+                    ),
+                },
+            )
+    def _generate_adaptive_queries(self, constraint: Constraint) -> List[str]:
+        """Generate queries based on past performance."""
+        queries = []
+        # Get successful patterns for this constraint type
+        successful_patterns = sorted(
+            [
+                p
+                for p in self.query_patterns.values()
+                if constraint.type in p.constraint_types
+                and p.success_rate > 0.3
+            ],
+            key=lambda p: p.success_rate,
+            reverse=True,
+        )
+        # Apply patterns
+        for pattern in successful_patterns[:3]:
+            query = self._apply_pattern_to_constraint(pattern, constraint)
+            queries.append(query)
+        # Add semantic variations
+        semantic_queries = self._generate_semantic_variations(constraint)
+        queries.extend(semantic_queries[:2])
+        # Add fallback basic query
+        queries.append(f"{constraint.value} {constraint.type.value}")
+        return queries
+    def _apply_pattern_to_constraint(
+        self, pattern: QueryPattern, constraint: Constraint
+    ) -> str:
+        """Apply a pattern to a constraint to create a query."""
+        query = pattern.pattern
+        # Replace placeholders
+        query = query.replace("{value}", constraint.value)
+        query = query.replace("{constraint_type}", constraint.type.value)
+        # Add synonyms if pattern requires them
+        if "{synonym" in query:
+            synonyms = self._get_synonyms(constraint.value)
+            for i, synonym in enumerate(synonyms[:2], 1):
+                query = query.replace(f"{{synonym{i}}}", synonym)
+        return query
+    def _build_cross_constraint_query(
+        self, constraint1: Constraint, constraint2: Constraint
+    ) -> str:
+        """Build query combining multiple constraints."""
+        # Identify common terms
+        common_terms = self._find_common_terms(constraint1, constraint2)
+        if common_terms:
+            base = " ".join(common_terms)
+            query = f"{base} {constraint1.value} {constraint2.value}"
+        else:
+            query = f"{constraint1.value} AND {constraint2.value}"
+        # Add type-specific context
+        if (
+            constraint1.type == ConstraintType.PROPERTY
+            and constraint2.type == ConstraintType.EVENT
+        ):
+            query += " TV show character"
+        return query
+    def _select_diverse_source(
+        self, source_usage: Dict[str, int], constraint_type: ConstraintType
+    ) -> str:
+        """Select source to ensure diversity."""
+        # Get specialized sources for constraint type
+        specialized = self._get_specialized_sources(constraint_type)
+        # Sort by usage (least used first) and success rate
+        available_sources = sorted(
+            self.source_profiles.values(),
+            key=lambda s: (source_usage.get(s.source_name, 0), -s.success_rate),
+        )
+        # Prefer specialized sources
+        for source in available_sources:
+            if source.source_name in specialized:
+                source_usage[source.source_name] = (
+                    source_usage.get(source.source_name, 0) + 1
+                )
+                return source.source_name
+        # Fallback to least used source
+        selected = available_sources[0].source_name
+        source_usage[selected] = source_usage.get(selected, 0) + 1
+        return selected
+    def _get_specialized_sources(
+        self, constraint_type: ConstraintType
+    ) -> List[str]:
+        """Get specialized sources for constraint type."""
+        specialization_map = {
+            ConstraintType.PROPERTY: ["fandom", "wikipedia"],
+            ConstraintType.EVENT: ["imdb", "tv_databases"],
+            ConstraintType.STATISTIC: ["imdb", "tv_databases"],
+            ConstraintType.LOCATION: ["wikipedia"],
+        }
+        return specialization_map.get(constraint_type, ["web"])
+    def _update_pattern_success(self, pattern: QueryPattern, success: bool):
+        """Update pattern success rate."""
+        pattern.usage_count += 1
+        if success:
+            # Exponential moving average
+            alpha = 0.3
+            pattern.success_rate = (
+                alpha * 1.0 + (1 - alpha) * pattern.success_rate
+            )
+        else:
+            pattern.success_rate = (1 - 0.3) * pattern.success_rate
+    def _update_source_profile(self, source_name: str, confidence: float):
+        """Update source profile based on evidence quality."""
+        if source_name in self.source_profiles:
+            profile = self.source_profiles[source_name]
+            profile.usage_count += 1
+            profile.last_used = datetime.utcnow()
+            # Update success rate based on confidence
+            alpha = 0.3
+            success = 1.0 if confidence >= self.evidence_threshold else 0.0
+            profile.success_rate = (
+                alpha * success + (1 - alpha) * profile.success_rate
+            )
+    def _analyze_constraint_relationships(self):
+        """Analyze relationships between constraints."""
+        for i, constraint1 in enumerate(self.constraints):
+            for constraint2 in self.constraints[i + 1 :]:
+                # Check for common terms or semantic similarity
+                common_terms = self._find_common_terms(constraint1, constraint2)
+                if common_terms:
+                    self.constraint_relationships[constraint1.id] = (
+                        self.constraint_relationships.get(constraint1.id, [])
+                        + [constraint2.id]
+                    )
+                    self.constraint_relationships[constraint2.id] = (
+                        self.constraint_relationships.get(constraint2.id, [])
+                        + [constraint1.id]
+                    )
+    def _find_related_constraint_pairs(
+        self,
+    ) -> List[Tuple[Constraint, Constraint]]:
+        """Find constraint pairs that might work well together."""
+        pairs = []
+        # Use analyzed relationships
+        for constraint_id, related_ids in self.constraint_relationships.items():
+            constraint = next(
+                (c for c in self.constraints if c.id == constraint_id), None
+            )
+            if constraint:
+                for related_id in related_ids:
+                    related = next(
+                        (c for c in self.constraints if c.id == related_id),
+                        None,
+                    )
+                    if related and (constraint, related) not in pairs:
+                        pairs.append((constraint, related))
+        # Add type-based pairs
+        property_constraints = [
+            c for c in self.constraints if c.type == ConstraintType.PROPERTY
+        ]
+        event_constraints = [
+            c for c in self.constraints if c.type == ConstraintType.EVENT
+        ]
+        for prop in property_constraints[:2]:
+            for event in event_constraints[:2]:
+                if (prop, event) not in pairs:
+                    pairs.append((prop, event))
+        return pairs
+    def _calculate_source_diversity(self) -> float:
+        """Calculate source diversity score."""
+        if not self.source_profiles:
+            return 0.0
+        used_sources = [
+            s for s in self.source_profiles.values() if s.usage_count > 0
+        ]
+        if not used_sources:
+            return 0.0
+        # Calculate entropy of source usage
+        total_usage = sum(s.usage_count for s in used_sources)
+        entropy = 0.0
+        for source in used_sources:
+            if source.usage_count > 0:
+                p = source.usage_count / total_usage
+                entropy -= p * (math.log2(p) if p > 0 else 0)
+        # Normalize by maximum possible entropy
+        max_entropy = -math.log2(1 / len(used_sources))
+        return entropy / max_entropy if max_entropy > 0 else 0.0
+    def _execute_adaptive_search(self, query: str) -> Dict:
+        """Execute search with adaptive source selection."""
+        # Simple wrapper for consistency - could be enhanced with source selection
+        return self._execute_search(query)
+    def _execute_search_with_source(self, query: str, source: str) -> Dict:
+        """Execute search with specific source."""
+        # For now, use the standard search - could be enhanced to use specific sources
+        return self._execute_search(query)
+    def _get_synonyms(self, term: str) -> List[str]:
+        """Get synonyms for a term - generic implementation."""
+        # Use LLM to generate synonyms dynamically
+        try:
+            prompt = f"List 3 synonyms or similar terms for '{term}'. Return only the synonyms, one per line."
+            response = self.model.invoke(prompt)
+            content = remove_think_tags(response.content)
+            synonyms = [
+                line.strip() for line in content.split("\n") if line.strip()
+            ]
+            return synonyms[:3]
+        except:
+            return []  # Return empty list on error
+    def _find_common_terms(
+        self, constraint1: Constraint, constraint2: Constraint
+    ) -> List[str]:
+        """Find common terms between constraints."""
+        terms1 = set(constraint1.value.lower().split())
+        terms2 = set(constraint2.value.lower().split())
+        return list(terms1.intersection(terms2))
+    def _generate_semantic_variations(
+        self, constraint: Constraint
+    ) -> List[str]:
+        """Generate semantically similar queries"""
+        variations = []
+        # Basic variations based on constraint type
+        base_value = (
+            constraint.value
+            if hasattr(constraint, "value")
+            else constraint.description
+        )
+        # Add contextual variations based on type
+        if constraint.type == ConstraintType.PROPERTY:
+            variations.extend(
+                [
+                    f"entity with {base_value}",
+                    f"subject {base_value}",
+                    f"thing {base_value}",
+                    f"{base_value} entity",
+                ]
+            )
+        elif constraint.type == ConstraintType.EVENT:
+            variations.extend(
+                [
+                    f"event {base_value}",
+                    f"occurrence {base_value}",
+                    f"{base_value} happened",
+                    f"when {base_value}",
+                ]
+            )
+        elif constraint.type == ConstraintType.STATISTIC:
+            variations.extend(
+                [
+                    f"number {base_value}",
+                    f"count {base_value}",
+                    f"{base_value} total",
+                    f"quantity {base_value}",
+                ]
+            )
+        else:
+            # Generic variations
+            variations.extend(
+                [
+                    f"{base_value}",
+                    f"specific {base_value}",
+                    f"about {base_value}",
+                    f"regarding {base_value}",
+                ]
+            )
+        return variations[:5]  # Limit variations
+    def _generate_evidence_query(
+        self, candidate: Candidate, constraint: Constraint
+    ) -> str:
+        """Generate evidence query for a candidate-constraint pair."""
+        prompt = f"""Create a search query to verify if "{candidate.name}" satisfies this constraint:
+Constraint: {constraint.description}
+Type: {constraint.type.value}
+Create a specific search query that would find evidence about whether this candidate meets this constraint.
+Return only the search query, no explanation."""
+        response = self.model.invoke(prompt)
+        query = remove_think_tags(response.content).strip()
+        # Fallback to simple query if needed
+        if not query or len(query) < 5:
+            query = f"{candidate.name} {constraint.value}"
+        return query
+    def _add_unique_candidates(self, candidates: List[Candidate]):
+        """Add unique candidates to the main list."""
+        existing_names = {c.name.lower() for c in self.candidates}
+        for candidate in candidates:
+            if candidate.name.lower() not in existing_names:
+                self.candidates.append(candidate)
+                existing_names.add(candidate.name.lower())
+        # Limit total candidates
+        self.candidates = self.candidates[: self.candidate_limit]
+    def _adaptive_candidate_discovery(self):
+        """Adaptive candidate discovery when we have too few candidates."""
+        # Use unused constraints or different strategies
+        unused_constraints = [
+            c
+            for c in self.constraints
+            if not any(c.id in cand.evidence for cand in self.candidates)
+        ]
+        if unused_constraints:
+            # Try cross-constraint search with unused constraints
+            candidates = self._cross_constraint_search()
+            self._add_unique_candidates(candidates)
+        # If still not enough, try semantic expansion
+        if len(self.candidates) < self.min_candidates_threshold:
+            candidates = self._semantic_expansion_search()
+            self._add_unique_candidates(candidates)
+    def _generate_relaxed_queries(self) -> List[str]:
+        """Generate relaxed queries for fallback search."""
+        queries = []
+        # Take most important constraints and relax them
+        important_constraints = sorted(
+            self.constraints, key=lambda c: c.weight, reverse=True
+        )[:3]
+        for constraint in important_constraints:
+            # Simple value-based query
+            queries.append(constraint.value)
+            # Type-based query
+            queries.append(f"{constraint.type.value} {constraint.value}")
+        return queries
+    def _learn_query_pattern(
+        self, query: str, constraint: Constraint, success: bool
+    ):
+        """Learn from query success/failure."""
+        if not self.enable_pattern_learning:
+            return
+        # Extract pattern from query
+        # This is a simplified version - could be enhanced with ML
+        pattern_key = f"{constraint.type.value}_custom"
+        if pattern_key not in self.query_patterns:
+            self.query_patterns[pattern_key] = QueryPattern(
+                pattern=query, constraint_types=[constraint.type]
+            )
+        self._update_pattern_success(self.query_patterns[pattern_key], success)
+    def _update_constraint_relationships(
+        self, constraint1: Constraint, constraint2: Constraint, success: bool
+    ):
+        """Update constraint relationship tracking."""
+        if success:
+            # Strengthen the relationship
+            if constraint1.id not in self.constraint_relationships:
+                self.constraint_relationships[constraint1.id] = []
+            if (
+                constraint2.id
+                not in self.constraint_relationships[constraint1.id]
+            ):
+                self.constraint_relationships[constraint1.id].append(
+                    constraint2.id
+                )
+            if constraint2.id not in self.constraint_relationships:
+                self.constraint_relationships[constraint2.id] = []
+            if (
+                constraint1.id
+                not in self.constraint_relationships[constraint2.id]
+            ):
+                self.constraint_relationships[constraint2.id].append(
+                    constraint1.id
+                )
+    def _wikipedia_search(self) -> List[Candidate]:
+        """Search Wikipedia for general information."""
+        candidates = []
+        # Generic Wikipedia searches based on constraints
+        for constraint in self.constraints[:3]:
+            queries = [
+                f"Wikipedia {constraint.value}",
+                f"Wikipedia list {constraint.value}",
+                f"Wikipedia {constraint.type.value} {constraint.value}",
+            ]
+            for query in queries:
+                if query not in self.failed_queries:
+                    results = self._execute_adaptive_search(query)
+                    extracted = (
+                        self._extract_candidates_with_enhanced_validation(
+                            results, query
+                        )
+                    )
+                    candidates.extend(extracted)
+                    if not extracted:
+                        self.failed_queries.add(query)
+        return candidates
+    def _domain_specific_search(self) -> List[Candidate]:
+        """Search domain-specific sources based on constraint types."""
+        candidates = []
+        # Map constraint types to relevant domains
+        domain_map = {
+            ConstraintType.PROPERTY: ["database", "encyclopedia", "wiki"],
+            ConstraintType.EVENT: ["timeline", "history", "archive"],
+            ConstraintType.STATISTIC: ["data", "statistics", "numbers"],
+            ConstraintType.LOCATION: ["geography", "places", "maps"],
+        }
+        for constraint in self.constraints[:3]:
+            domains = domain_map.get(constraint.type, ["general"])
+            for domain in domains:
+                query = f"{domain} {constraint.value}"
+                if query not in self.failed_queries:
+                    results = self._execute_adaptive_search(query)
+                    extracted = (
+                        self._extract_candidates_with_enhanced_validation(
+                            results, query
+                        )
+                    )
+                    candidates.extend(extracted)
+                    if not extracted:
+                        self.failed_queries.add(query)
+        return candidates
+    def _aggressive_supplemental_search(self):
+        """Aggressive supplemental search when we need more candidates."""
+        additional_candidates = []
+        # Try different query formulations
+        query_templates = [
+            "list of {value}",
+            "examples of {value}",
+            "types of {value}",
+            "{value} instances",
+            "specific {value}",
+            "named {value}",
+        ]
+        for constraint in self.constraints:
+            for template in query_templates:
+                query = template.replace("{value}", constraint.value)
+                if query not in self.failed_queries:
+                    results = self._execute_adaptive_search(query)
+                    extracted = (
+                        self._extract_candidates_with_enhanced_validation(
+                            results, query
+                        )
+                    )
+                    additional_candidates.extend(extracted)
+                    if len(additional_candidates) > 50:  # Enough candidates
+                        break
+            if len(additional_candidates) > 50:
+                break
+        self._add_unique_candidates(additional_candidates)
+    def _extract_candidates_with_enhanced_validation(
+        self, results: Dict, query: str
+    ) -> List[Candidate]:
+        """Extract candidates with better validation."""
+        candidates = self._extract_candidates_from_results(results, query)
+        # Additional validation to filter out non-candidates
+        validated = []
+        for candidate in candidates:
+            # Basic validation - not too short, not a common word
+            if len(candidate.name) > 2 and not self._is_common_word(
+                candidate.name
+            ):
+                validated.append(candidate)
+        return validated[:15]  # Limit per search
+    def _is_common_word(self, name: str) -> bool:
+        """Check if a name is likely a common word rather than a proper name."""
+        common_words = {
+            "the",
+            "and",
+            "or",
+            "but",
+            "in",
+            "on",
+            "at",
+            "to",
+            "for",
+            "of",
+            "with",
+            "by",
+            "from",
+            "up",
+            "about",
+            "into",
+            "it",
+            "is",
+            "was",
+            "are",
+        }
+        return name.lower() in common_words
+    def _score_and_prune_adaptive(self):
+        """Score and prune candidates with adaptive thresholds."""
+        old_count = len(self.candidates)
+        # Use the parent's scoring method
+        super()._score_and_prune_candidates()
+        # Additional adaptive pruning based on iteration
+        if self.iteration > self.max_iterations / 2:
+            # Be more aggressive in later iterations
+            min_score = (
+                max(0.3, self.candidates[0].score * 0.4)
+                if self.candidates
+                else 0.3
+            )
+            self.candidates = [
+                c for c in self.candidates if c.score >= min_score
+            ]
+        if self.progress_callback:
+            pruned = old_count - len(self.candidates)
+            self.progress_callback(
+                f"Scored candidates - kept {len(self.candidates)}, pruned {pruned}",
+                None,
+                {
+                    "phase": "scoring_complete",
+                    "candidates_kept": len(self.candidates),
+                    "candidates_pruned": pruned,
+                    "top_score": self.candidates[0].score
+                    if self.candidates
+                    else 0,
+                    "min_score_threshold": (
+                        min_score
+                        if self.iteration > self.max_iterations / 2
+                        else "adaptive"
+                    ),
+                },
+            )
+    def _enhanced_final_verification(self):
+        """Enhanced final verification with source diversity."""
+        # Use parent's verification as base
+        super()._final_verification()
+        # Additional verification with unused sources
+        if self.candidates:
+            top_candidate = self.candidates[0]
+            # Find least-used sources
+            unused_sources = [
+                s for s in self.source_profiles.values() if s.usage_count < 2
+            ]
+            for source in unused_sources[:2]:
+                weak_constraints = [
+                    c
+                    for c in self.constraints
+                    if c.id not in top_candidate.evidence
+                    or top_candidate.evidence[c.id].confidence
+                    < self.evidence_threshold
+                ]
+                if weak_constraints:
+                    constraint = weak_constraints[0]
+                    query = self._generate_evidence_query(
+                        top_candidate, constraint
+                    )
+                    # Use specific source
+                    results = self._execute_search_with_source(
+                        query, source.source_name
+                    )
+                    evidence = self.evidence_evaluator.extract_evidence(
+                        results.get("current_knowledge", ""),
+                        top_candidate.name,
+                        constraint,
+                    )
+                    if evidence.confidence > (
+                        top_candidate.evidence.get(
+                            constraint.id, Evidence(claim="", confidence=0)
+                        ).confidence
+                    ):
+                        top_candidate.add_evidence(constraint.id, evidence)
+            # Final re-scoring
+            self._score_and_prune_adaptive()

local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.2py3-none-any.whl