PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py ADDED Viewed

@@ -0,0 +1,255 @@
+"""
+Progressive explorer for BrowseComp-style systematic search exploration.
+"""
+import concurrent.futures
+import logging
+from dataclasses import dataclass, field
+from typing import Dict, List, Set, Tuple
+logger = logging.getLogger(__name__)
+@dataclass
+class SearchProgress:
+    """Track search progress and findings."""
+    searched_terms: Set[str] = field(default_factory=set)
+    found_candidates: Dict[str, float] = field(
+        default_factory=dict
+    )  # name -> confidence
+    verified_facts: Dict[str, str] = field(
+        default_factory=dict
+    )  # fact -> source
+    entity_coverage: Dict[str, Set[str]] = field(
+        default_factory=dict
+    )  # entity_type -> searched_entities
+    search_depth: int = 0
+    def update_coverage(self, entity_type: str, entity: str):
+        """Update entity coverage tracking."""
+        if entity_type not in self.entity_coverage:
+            self.entity_coverage[entity_type] = set()
+        self.entity_coverage[entity_type].add(entity.lower())
+    def get_uncovered_entities(
+        self, entities: Dict[str, List[str]]
+    ) -> Dict[str, List[str]]:
+        """Get entities that haven't been searched yet."""
+        uncovered = {}
+        for entity_type, entity_list in entities.items():
+            covered = self.entity_coverage.get(entity_type, set())
+            uncovered_list = [
+                e for e in entity_list if e.lower() not in covered
+            ]
+            if uncovered_list:
+                uncovered[entity_type] = uncovered_list
+        return uncovered
+class ProgressiveExplorer:
+    """
+    Explorer that implements progressive search strategies for BrowseComp.
+    Key features:
+    1. Tracks search progress to avoid redundancy
+    2. Progressively combines entities
+    3. Identifies and pursues promising candidates
+    4. Maintains simple approach without over-filtering
+    """
+    def __init__(self, search_engine, model):
+        self.search_engine = search_engine
+        self.model = model
+        self.progress = SearchProgress()
+        self.max_results_per_search = 20  # Keep more results
+    def explore(
+        self,
+        queries: List[str],
+        constraints: List = None,
+        max_workers: int = 5,
+        extracted_entities: Dict[str, List[str]] = None,
+    ) -> Tuple[List, SearchProgress]:
+        """
+        Execute progressive exploration with entity tracking.
+        Returns both candidates and search progress for strategy use.
+        """
+        all_results = []
+        extracted_entities = extracted_entities or {}
+        # Execute searches in parallel (like source-based strategy)
+        search_results = self._parallel_search(queries, max_workers)
+        # Process results without filtering (trust the LLM later)
+        for query, results in search_results:
+            self.progress.searched_terms.add(query.lower())
+            # Track which entities were covered in this search
+            self._update_entity_coverage(query, extracted_entities)
+            # Extract any specific names/candidates from results
+            candidates = self._extract_candidates_from_results(results, query)
+            for candidate_name, confidence in candidates.items():
+                if candidate_name in self.progress.found_candidates:
+                    # Update confidence if higher
+                    self.progress.found_candidates[candidate_name] = max(
+                        self.progress.found_candidates[candidate_name],
+                        confidence,
+                    )
+                else:
+                    self.progress.found_candidates[candidate_name] = confidence
+            # Keep all results for final synthesis
+            all_results.extend(results)
+        self.progress.search_depth += 1
+        # Return both results and progress
+        return all_results, self.progress
+    def generate_verification_searches(
+        self,
+        candidates: Dict[str, float],
+        constraints: List,
+        max_searches: int = 5,
+    ) -> List[str]:
+        """Generate targeted searches to verify top candidates."""
+        if not candidates:
+            return []
+        # Get top candidates by confidence
+        top_candidates = sorted(
+            candidates.items(), key=lambda x: x[1], reverse=True
+        )[:3]
+        verification_searches = []
+        for candidate_name, confidence in top_candidates:
+            # Generate verification searches for this candidate
+            for constraint in constraints[:2]:  # Verify top constraints
+                search = f'"{candidate_name}" {constraint.description}'
+                if search.lower() not in self.progress.searched_terms:
+                    verification_searches.append(search)
+        return verification_searches[:max_searches]
+    def _extract_candidates_from_results(
+        self, results: List[Dict], query: str
+    ) -> Dict[str, float]:
+        """Extract potential answer candidates from search results."""
+        candidates = {}
+        # Simple extraction based on titles and snippets
+        for result in results[:10]:  # Focus on top results
+            title = result.get("title", "")
+            snippet = result.get("snippet", "")
+            # Look for proper nouns and specific names
+            # This is simplified - in practice, might use NER or more sophisticated extraction
+            combined_text = f"{title} {snippet}"
+            # Extract quoted terms as potential candidates
+            import re
+            quoted_terms = re.findall(r'"([^"]+)"', combined_text)
+            for term in quoted_terms:
+                if (
+                    len(term) > 2 and len(term) < 50
+                ):  # Reasonable length for an answer
+                    candidates[term] = 0.3  # Base confidence from appearance
+            # Boost confidence if appears in title
+            if title:
+                # Titles often contain the actual answer
+                title_words = title.split()
+                for i in range(len(title_words)):
+                    for j in range(i + 1, min(i + 4, len(title_words) + 1)):
+                        phrase = " ".join(title_words[i:j])
+                        if (
+                            len(phrase) > 3 and phrase[0].isupper()
+                        ):  # Likely proper noun
+                            candidates[phrase] = candidates.get(phrase, 0) + 0.2
+        return candidates
+    def _update_entity_coverage(
+        self, query: str, entities: Dict[str, List[str]]
+    ):
+        """Track which entities have been covered in searches."""
+        query_lower = query.lower()
+        for entity_type, entity_list in entities.items():
+            for entity in entity_list:
+                if entity.lower() in query_lower:
+                    self.progress.update_coverage(entity_type, entity)
+    def suggest_next_searches(
+        self, entities: Dict[str, List[str]], max_suggestions: int = 5
+    ) -> List[str]:
+        """Suggest next searches based on coverage and findings."""
+        suggestions = []
+        # 1. Check uncovered entities
+        uncovered = self.progress.get_uncovered_entities(entities)
+        # 2. If we have candidates, verify them with uncovered constraints
+        if self.progress.found_candidates:
+            top_candidate = max(
+                self.progress.found_candidates.items(), key=lambda x: x[1]
+            )[0]
+            # Combine candidate with uncovered entities
+            for entity_type, entity_list in uncovered.items():
+                for entity in entity_list[:2]:
+                    search = f'"{top_candidate}" {entity}'
+                    if search.lower() not in self.progress.searched_terms:
+                        suggestions.append(search)
+        # 3. Otherwise, create new combinations of uncovered entities
+        else:
+            # Focus on systematic coverage
+            if uncovered.get("temporal"):
+                # Year-by-year with key term
+                key_term = (
+                    entities.get("names", [""])[0]
+                    or entities.get("descriptors", [""])[0]
+                )
+                for year in uncovered["temporal"][:3]:
+                    search = f"{key_term} {year}".strip()
+                    if search.lower() not in self.progress.searched_terms:
+                        suggestions.append(search)
+            if uncovered.get("names") and uncovered.get("descriptors"):
+                # Combine names with descriptors
+                for name in uncovered["names"][:2]:
+                    for desc in uncovered["descriptors"][:2]:
+                        search = f"{name} {desc}"
+                        if search.lower() not in self.progress.searched_terms:
+                            suggestions.append(search)
+        return suggestions[:max_suggestions]
+    def _parallel_search(
+        self, queries: List[str], max_workers: int
+    ) -> List[Tuple[str, List[Dict]]]:
+        """Execute searches in parallel and return results."""
+        results = []
+        def search_query(query):
+            try:
+                search_results = self.search_engine.run(query)
+                return (query, search_results or [])
+            except Exception as e:
+                logger.error(f"Error searching '{query}': {str(e)}")
+                return (query, [])
+        # Run searches in parallel
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=max_workers
+        ) as executor:
+            futures = [executor.submit(search_query, q) for q in queries]
+            for future in concurrent.futures.as_completed(futures):
+                results.append(future.result())
+        return results

local_deep_research/advanced_search_system/candidates/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+# Candidates System Package
+from .base_candidate import Candidate
+__all__ = ["Candidate"]

local_deep_research/advanced_search_system/candidates/base_candidate.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""
+Base candidate class for tracking potential answers.
+"""
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+from ..constraints.base_constraint import Constraint
+from ..evidence.base_evidence import Evidence
+@dataclass
+class Candidate:
+    """A potential answer with supporting evidence."""
+    name: str
+    evidence: Dict[str, Evidence] = field(default_factory=dict)
+    score: float = 0.0
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    def add_evidence(self, constraint_id: str, evidence: Evidence):
+        """Add evidence for a constraint."""
+        self.evidence[constraint_id] = evidence
+    def calculate_score(self, constraints: List[Constraint]) -> float:
+        """Calculate overall score based on evidence and constraints."""
+        if not constraints:
+            return 0.0
+        total_score = 0.0
+        total_weight = 0.0
+        for constraint in constraints:
+            evidence = self.evidence.get(constraint.id)
+            if evidence:
+                score = evidence.confidence * constraint.weight
+                total_score += score
+            total_weight += constraint.weight
+        self.score = total_score / total_weight if total_weight > 0 else 0.0
+        return self.score
+    def get_unverified_constraints(
+        self, constraints: List[Constraint]
+    ) -> List[Constraint]:
+        """Get constraints that don't have evidence yet."""
+        unverified = []
+        for constraint in constraints:
+            if constraint.id not in self.evidence:
+                unverified.append(constraint)
+        return unverified
+    def get_weak_evidence(self, threshold: float = 0.5) -> List[str]:
+        """Get constraint IDs with weak evidence."""
+        weak = []
+        for constraint_id, evidence in self.evidence.items():
+            if evidence.confidence < threshold:
+                weak.append(constraint_id)
+        return weak

local_deep_research/advanced_search_system/constraint_checking/README.md ADDED Viewed

@@ -0,0 +1,150 @@
+# Constraint Checking System
+This module provides an inheritance-based constraint checking system for validating candidates against constraints in the Local Deep Research framework.
+## Architecture
+The system is built around inheritance and provides multiple implementations:
+### Base Class
+- **`BaseConstraintChecker`**: Abstract base class defining the interface
+### Concrete Implementations
+- **`DualConfidenceChecker`**: Uses positive/negative/uncertainty confidence scoring
+- **`ThresholdChecker`**: Simple threshold-based checking
+- **`StrictChecker`**: Example of very strict constraint validation
+### Supporting Components
+- **`EvidenceAnalyzer`**: Analyzes evidence using dual confidence scoring
+- **`RejectionEngine`**: Makes rejection decisions based on evidence
+- **`ConstraintCheckResult`**: Data class containing evaluation results
+## Usage Examples
+### Using DualConfidenceChecker
+```python
+from constraint_checking import DualConfidenceChecker
+checker = DualConfidenceChecker(
+    model=llm,
+    evidence_gatherer=evidence_function,
+    negative_threshold=0.25,  # Reject if negative evidence > 25%
+    positive_threshold=0.4,   # Reject if positive evidence < 40%
+)
+result = checker.check_candidate(candidate, constraints)
+```
+### Using ThresholdChecker
+```python
+from constraint_checking import ThresholdChecker
+checker = ThresholdChecker(
+    model=llm,
+    evidence_gatherer=evidence_function,
+    satisfaction_threshold=0.7,     # Individual constraint threshold
+    required_satisfaction_rate=0.8  # Overall satisfaction rate needed
+)
+result = checker.check_candidate(candidate, constraints)
+```
+### Using StrictChecker
+```python
+from constraint_checking import StrictChecker
+checker = StrictChecker(
+    model=llm,
+    evidence_gatherer=evidence_function,
+    strict_threshold=0.9,        # Very high threshold
+    name_pattern_required=True   # NAME_PATTERN constraints are mandatory
+)
+result = checker.check_candidate(candidate, constraints)
+```
+## Creating Custom Variants
+To create your own constraint checker variant:
+1. **Inherit from BaseConstraintChecker**:
+```python
+from .base_constraint_checker import BaseConstraintChecker, ConstraintCheckResult
+class MyCustomChecker(BaseConstraintChecker):
+    def __init__(self, *args, my_param=0.5, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.my_param = my_param
+```
+2. **Implement required methods**:
+```python
+    def check_candidate(self, candidate, constraints):
+        # Your implementation here
+        return ConstraintCheckResult(...)
+    def should_reject_candidate(self, candidate, constraint, evidence_data):
+        # Your rejection logic here
+        return should_reject, reason
+```
+3. **Add custom logic**:
+```python
+    def _my_custom_evaluation(self, candidate, constraint):
+        # Your custom evaluation logic
+        pass
+```
+## Integration with Strategies
+Use in your strategy by initializing the checker:
+```python
+class MyStrategy(BaseStrategy):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Choose your constraint checker
+        self.constraint_checker = DualConfidenceChecker(
+            model=self.model,
+            evidence_gatherer=self._gather_evidence_for_constraint,
+            # ... other parameters
+        )
+    def _evaluate_candidate(self, candidate):
+        result = self.constraint_checker.check_candidate(candidate, self.constraints)
+        # Process result
+        candidate.evaluation_results = result.detailed_results
+        candidate.score = result.total_score
+        return result.total_score
+```
+## Available Checkers
+### DualConfidenceChecker
+- **Best for**: Nuanced evaluation with detailed confidence scoring
+- **Parameters**: `negative_threshold`, `positive_threshold`, `uncertainty_penalty`, `negative_weight`
+- **Output**: Detailed positive/negative/uncertainty scores per constraint
+### ThresholdChecker
+- **Best for**: Fast, simple constraint checking
+- **Parameters**: `satisfaction_threshold`, `required_satisfaction_rate`
+- **Output**: Simple satisfied/not satisfied per constraint
+### StrictChecker
+- **Best for**: Cases requiring very high confidence
+- **Parameters**: `strict_threshold`, `name_pattern_required`
+- **Output**: Binary pass/fail with strict requirements
+## Extending the System
+The inheritance-based design makes it easy to:
+1. **Create specialized checkers** for specific domains
+2. **Mix and match components** (e.g., use DualConfidence evidence analysis with custom rejection logic)
+3. **Add new constraint types** with custom handling
+4. **Implement domain-specific optimizations**
+See `strict_checker.py` for an example of creating a custom variant.

local_deep_research/advanced_search_system/constraint_checking/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+Constraint checking and candidate assessment system.
+This module provides inheritance-based components for checking candidates
+against constraints, with different implementations available.
+"""
+from .base_constraint_checker import (
+    BaseConstraintChecker,
+    ConstraintCheckResult,
+)
+# Legacy imports for backward compatibility
+from .constraint_checker import ConstraintChecker
+from .dual_confidence_checker import DualConfidenceChecker
+from .evidence_analyzer import ConstraintEvidence, EvidenceAnalyzer
+from .rejection_engine import RejectionEngine
+from .strict_checker import StrictChecker
+from .threshold_checker import ThresholdChecker
+__all__ = [
+    # Base classes
+    "BaseConstraintChecker",
+    "ConstraintCheckResult",
+    # Concrete implementations
+    "DualConfidenceChecker",
+    "ThresholdChecker",
+    "StrictChecker",
+    # Supporting components
+    "EvidenceAnalyzer",
+    "ConstraintEvidence",
+    "RejectionEngine",
+    # Legacy
+    "ConstraintChecker",
+]

local_deep_research/advanced_search_system/constraint_checking/base_constraint_checker.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""
+Base constraint checker for inheritance-based constraint checking system.
+This module provides the base interface and common functionality for
+constraint checking implementations.
+"""
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+from langchain_core.language_models import BaseChatModel
+from loguru import logger
+from ..candidates.base_candidate import Candidate
+from ..constraints.base_constraint import Constraint
+@dataclass
+class ConstraintCheckResult:
+    """Result of checking a candidate against all constraints."""
+    candidate: Candidate
+    total_score: float
+    constraint_scores: Dict[str, Dict]
+    should_reject: bool
+    rejection_reason: Optional[str]
+    detailed_results: List[Dict]
+class BaseConstraintChecker(ABC):
+    """
+    Base class for constraint checking implementations.
+    This provides the common interface and shared functionality that
+    all constraint checkers should implement.
+    """
+    def __init__(
+        self,
+        model: BaseChatModel,
+        evidence_gatherer=None,  # Will be passed in from strategy
+        **kwargs,
+    ):
+        """
+        Initialize the base constraint checker.
+        Args:
+            model: Language model for evidence analysis
+            evidence_gatherer: Function to gather evidence (from strategy)
+            **kwargs: Additional parameters for specific implementations
+        """
+        self.model = model
+        self.evidence_gatherer = evidence_gatherer
+    @abstractmethod
+    def check_candidate(
+        self, candidate: Candidate, constraints: List[Constraint]
+    ) -> ConstraintCheckResult:
+        """
+        Check a candidate against all constraints.
+        Args:
+            candidate: The candidate to check
+            constraints: List of constraints to check against
+        Returns:
+            ConstraintCheckResult: Complete evaluation result
+        """
+        pass
+    @abstractmethod
+    def should_reject_candidate(
+        self, candidate: Candidate, constraint: Constraint, evidence_data: any
+    ) -> Tuple[bool, str]:
+        """
+        Determine if a candidate should be rejected for a specific constraint.
+        Args:
+            candidate: The candidate being evaluated
+            constraint: The constraint being checked
+            evidence_data: Evidence data (format depends on implementation)
+        Returns:
+            Tuple[bool, str]: (should_reject, reason)
+        """
+        pass
+    def _gather_evidence_for_constraint(
+        self, candidate: Candidate, constraint: Constraint
+    ) -> List[Dict]:
+        """Gather evidence for a constraint using the provided evidence gatherer."""
+        if self.evidence_gatherer:
+            return self.evidence_gatherer(candidate, constraint)
+        else:
+            logger.warning(
+                "No evidence gatherer provided - cannot gather evidence"
+            )
+            return []
+    def _log_constraint_result(
+        self,
+        candidate: Candidate,
+        constraint: Constraint,
+        score: float,
+        details: Dict,
+    ):
+        """Log constraint evaluation result."""
+        symbol = "✓" if score >= 0.8 else "○" if score >= 0.5 else "✗"
+        logger.info(
+            f"{symbol} {candidate.name} | {constraint.value}: {int(score * 100)}%"
+        )
+    def _calculate_weighted_score(
+        self, constraint_scores: List[float], weights: List[float]
+    ) -> float:
+        """Calculate weighted average score."""
+        if not constraint_scores or not weights:
+            return 0.0
+        return sum(s * w for s, w in zip(constraint_scores, weights)) / sum(
+            weights
+        )

local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl