PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/advanced_search_system/strategies/concurrent_dual_confidence_strategy.py ADDED Viewed

@@ -0,0 +1,446 @@
+"""
+Concurrent dual confidence strategy with progressive search.
+Key features:
+1. Starts with all constraints combined for maximum specificity
+2. Evaluates candidates concurrently as they're found
+3. Progressively loosens constraints if needed
+4. Uses early rejection from dual confidence
+5. Dynamic stopping criteria instead of fixed limits
+"""
+import concurrent.futures
+import threading
+import time
+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple
+from loguru import logger
+from ..candidates.base_candidate import Candidate
+from ..constraints.base_constraint import Constraint, ConstraintType
+from .dual_confidence_with_rejection import DualConfidenceWithRejectionStrategy
+@dataclass
+class SearchState:
+    """Tracks the current state of the concurrent search."""
+    good_candidates: List[Tuple[Candidate, float]] = field(default_factory=list)
+    total_evaluated: int = 0
+    start_time: float = field(default_factory=time.time)
+    remaining_constraints: List[Constraint] = field(default_factory=list)
+    candidates_lock: threading.Lock = field(default_factory=threading.Lock)
+    stop_search: threading.Event = field(default_factory=threading.Event)
+    evaluation_futures: List[concurrent.futures.Future] = field(
+        default_factory=list
+    )
+class ConcurrentDualConfidenceStrategy(DualConfidenceWithRejectionStrategy):
+    """
+    Enhanced strategy that combines concurrent evaluation with progressive search.
+    """
+    def __init__(
+        self,
+        *args,
+        # Concurrent execution settings
+        max_workers: int = 10,
+        # Candidate targets
+        min_good_candidates: int = 3,
+        target_candidates: int = 5,
+        max_candidates: int = 10,
+        # Quality thresholds
+        min_score_threshold: float = 0.65,
+        exceptional_score: float = 0.95,
+        quality_plateau_threshold: float = 0.1,
+        # Time and resource limits
+        max_search_time: float = 30.0,
+        max_evaluations: int = 30,
+        # Search behavior
+        initial_search_timeout: float = 5.0,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        # Thread pool for concurrent evaluations
+        self.evaluation_executor = concurrent.futures.ThreadPoolExecutor(
+            max_workers=max_workers
+        )
+        # Candidate thresholds
+        self.min_good_candidates = min_good_candidates
+        self.target_candidates = target_candidates
+        self.max_candidates = max_candidates
+        # Quality settings
+        self.min_score_threshold = min_score_threshold
+        self.exceptional_score = exceptional_score
+        self.quality_plateau_threshold = quality_plateau_threshold
+        # Resource limits
+        self.max_search_time = max_search_time
+        self.max_evaluations = max_evaluations
+        self.initial_search_timeout = initial_search_timeout
+        # Search state
+        self.state: Optional[SearchState] = None
+    def find_relevant_information(self):
+        """Override to use concurrent search and evaluation."""
+        # Initialize state
+        self.state = SearchState(
+            remaining_constraints=self.constraint_ranking.copy(),
+            start_time=time.time(),
+        )
+        # Classify constraints by difficulty
+        self._classify_constraint_difficulty()
+        # Start progressive search with concurrent evaluation
+        try:
+            self._progressive_search_with_concurrent_eval()
+        finally:
+            # Clean up thread pool
+            self.evaluation_executor.shutdown(wait=False)
+        # Return best candidates
+        self.candidates = [
+            c
+            for c, _ in sorted(
+                self.state.good_candidates, key=lambda x: x[1], reverse=True
+            )[: self.max_candidates]
+        ]
+        logger.info(
+            f"Found {len(self.candidates)} candidates after concurrent search"
+        )
+    def _classify_constraint_difficulty(self):
+        """Rate constraints by how difficult they are to search for."""
+        difficulty_keywords = {
+            # Abstract/subjective (very hard)
+            "complex": 0.9,
+            "deep": 0.9,
+            "emotional": 0.9,
+            "acclaimed": 0.8,
+            "understanding": 0.9,
+            "sophisticated": 0.9,
+            "nuanced": 0.9,
+            # Relative terms (hard)
+            "famous": 0.7,
+            "popular": 0.7,
+            "well-known": 0.7,
+            "notable": 0.7,
+            # Time ranges (medium)
+            "between": 0.5,
+            "during": 0.5,
+            "era": 0.5,
+            "period": 0.5,
+            # Specific facts (easy)
+            "aired": 0.2,
+            "released": 0.2,
+            "episode": 0.1,
+            "season": 0.1,
+            "character": 0.2,
+            "show": 0.2,
+            "movie": 0.2,
+            "series": 0.2,
+            # Named entities (easiest)
+            "hbo": 0.1,
+            "netflix": 0.1,
+            "disney": 0.1,
+            "marvel": 0.1,
+        }
+        for constraint in self.state.remaining_constraints:
+            difficulty = 0.5  # Default medium difficulty
+            constraint_lower = constraint.value.lower()
+            for keyword, score in difficulty_keywords.items():
+                if keyword in constraint_lower:
+                    difficulty = max(difficulty, score)
+            # Constraint type also affects difficulty
+            if constraint.type == ConstraintType.PROPERTY:
+                difficulty = min(difficulty, 0.6)
+            elif constraint.type == ConstraintType.EVENT:
+                difficulty = max(difficulty, 0.7)
+            # Store difficulty as attribute
+            constraint.search_difficulty = difficulty
+        # Sort constraints by difficulty (hardest first, so we can drop them first)
+        self.state.remaining_constraints.sort(
+            key=lambda c: getattr(c, "search_difficulty", 0.5), reverse=True
+        )
+    def _progressive_search_with_concurrent_eval(self):
+        """Main search loop with concurrent evaluation."""
+        iteration = 0
+        while (
+            not self.state.stop_search.is_set()
+            and self.state.remaining_constraints
+        ):
+            iteration += 1
+            # Build query from current constraints
+            query = self._build_combined_query(self.state.remaining_constraints)
+            logger.info(
+                f"Search iteration {iteration}: {len(self.state.remaining_constraints)} constraints"
+            )
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Searching with {len(self.state.remaining_constraints)} constraints",
+                    min(20 + (iteration * 10), 80),
+                    {
+                        "phase": "concurrent_search",
+                        "iteration": iteration,
+                        "constraints": len(self.state.remaining_constraints),
+                        "good_candidates": len(self.state.good_candidates),
+                    },
+                )
+            # Execute search
+            try:
+                search_results = self._execute_search(query)
+                new_candidates = self._extract_relevant_candidates(
+                    search_results,
+                    self.state.remaining_constraints[
+                        0
+                    ],  # Use most important constraint
+                )
+                logger.info(
+                    f"Found {len(new_candidates)} candidates in iteration {iteration}"
+                )
+                # Spawn evaluation threads for each candidate
+                for candidate in new_candidates:
+                    if self.state.stop_search.is_set():
+                        break
+                    if self.state.total_evaluated >= self.max_evaluations:
+                        logger.info("Reached maximum evaluations limit")
+                        self.state.stop_search.set()
+                        break
+                    # Check if we already evaluated this candidate
+                    if self._is_candidate_evaluated(candidate):
+                        continue
+                    # Submit for evaluation
+                    future = self.evaluation_executor.submit(
+                        self._evaluate_candidate_thread, candidate
+                    )
+                    self.state.evaluation_futures.append(future)
+                    self.state.total_evaluated += 1
+            except Exception as e:
+                logger.error(f"Search error in iteration {iteration}: {e}")
+            # Check completed evaluations
+            self._check_evaluation_results()
+            # Determine if we should stop
+            if self._should_stop_search():
+                logger.info("Stopping criteria met")
+                break
+            # Drop hardest constraint if we haven't found enough
+            if len(self.state.good_candidates) < self.min_good_candidates:
+                if len(self.state.remaining_constraints) > 1:
+                    dropped = self.state.remaining_constraints.pop(
+                        0
+                    )  # Remove hardest
+                    logger.info(
+                        f"Dropping constraint: {dropped.value} (difficulty: {getattr(dropped, 'search_difficulty', 0.5):.2f})"
+                    )
+                else:
+                    logger.info("No more constraints to drop")
+                    break
+        # Wait for remaining evaluations
+        self._finalize_evaluations()
+    def _build_combined_query(self, constraints: List[Constraint]) -> str:
+        """Build a query combining all constraints with AND logic."""
+        terms = []
+        for constraint in constraints:
+            value = constraint.value
+            # Quote multi-word values only if they don't already have quotes
+            if " " in value and '"' not in value:
+                value = f'"{value}"'
+            terms.append(value)
+        return " ".join(
+            terms
+        )  # Use space instead of AND for more natural queries
+    def _evaluate_candidate_thread(
+        self, candidate: Candidate
+    ) -> Tuple[Candidate, float]:
+        """Evaluate a candidate in a separate thread."""
+        try:
+            thread_name = threading.current_thread().name
+            logger.info(
+                f"[{thread_name}] Starting evaluation of {candidate.name}"
+            )
+            # Use parent's evaluation with early rejection
+            score = self._evaluate_candidate_immediately(candidate)
+            # Log result
+            if score >= self.min_score_threshold:
+                logger.info(
+                    f"[{thread_name}] ✓ {candidate.name} passed (score: {score:.3f})"
+                )
+                # Add to good candidates
+                with self.state.candidates_lock:
+                    self.state.good_candidates.append((candidate, score))
+                    # Check if we should stop
+                    if self._should_stop_search():
+                        logger.info("Stopping criteria met after evaluation")
+                        self.state.stop_search.set()
+            else:
+                logger.info(
+                    f"[{thread_name}] ❌ {candidate.name} rejected (score: {score:.3f})"
+                )
+            return (candidate, score)
+        except Exception as e:
+            logger.error(
+                f"Error evaluating {candidate.name}: {e}", exc_info=True
+            )
+            return (candidate, 0.0)
+    def _check_evaluation_results(self):
+        """Check completed evaluation futures without blocking."""
+        completed = []
+        for future in self.state.evaluation_futures:
+            if future.done():
+                completed.append(future)
+                try:
+                    future.result()
+                    # Result is already processed in the thread
+                except Exception as e:
+                    logger.error(f"Failed to get future result: {e}")
+        # Remove completed futures
+        for future in completed:
+            self.state.evaluation_futures.remove(future)
+    def _should_stop_search(self) -> bool:
+        """Determine if we should stop searching based on multiple criteria."""
+        # Always respect the stop flag
+        if self.state.stop_search.is_set():
+            return True
+        num_good = len(self.state.good_candidates)
+        # 1. Maximum candidates reached
+        if num_good >= self.max_candidates:
+            logger.info(f"Maximum candidates reached ({self.max_candidates})")
+            return True
+        # 2. Target reached with good quality
+        if num_good >= self.target_candidates:
+            avg_score = sum(s for _, s in self.state.good_candidates) / num_good
+            if avg_score >= 0.8:
+                logger.info(
+                    f"Target reached with high quality (avg: {avg_score:.3f})"
+                )
+                return True
+        # 3. Minimum satisfied with exceptional candidates
+        if num_good >= self.min_good_candidates:
+            top_score = max(s for _, s in self.state.good_candidates)
+            if top_score >= self.exceptional_score:
+                logger.info(
+                    f"Exceptional candidate found (score: {top_score:.3f})"
+                )
+                return True
+        # 4. Time limit reached
+        elapsed = time.time() - self.state.start_time
+        if elapsed > self.max_search_time:
+            logger.info(f"Time limit reached ({elapsed:.1f}s)")
+            return True
+        # 5. Too many evaluations
+        if self.state.total_evaluated >= self.max_evaluations:
+            logger.info(f"Evaluation limit reached ({self.max_evaluations})")
+            return True
+        # 6. No more constraints and sufficient candidates
+        if (
+            not self.state.remaining_constraints
+            and num_good >= self.min_good_candidates
+        ):
+            logger.info("No more constraints and minimum candidates found")
+            return True
+        # 7. Quality plateau detection
+        if num_good >= 5:
+            recent_scores = [s for _, s in self.state.good_candidates[-5:]]
+            score_range = max(recent_scores) - min(recent_scores)
+            if score_range < self.quality_plateau_threshold:
+                logger.info(
+                    f"Quality plateau detected (range: {score_range:.3f})"
+                )
+                return True
+        return False
+    def _is_candidate_evaluated(self, candidate: Candidate) -> bool:
+        """Check if we already evaluated this candidate."""
+        with self.state.candidates_lock:
+            return any(
+                c.name == candidate.name for c, _ in self.state.good_candidates
+            )
+    def _finalize_evaluations(self):
+        """Wait for or cancel remaining evaluations."""
+        if self.state.evaluation_futures:
+            logger.info(
+                f"Finalizing {len(self.state.evaluation_futures)} remaining evaluations"
+            )
+            # Give them a short time to complete
+            wait_time = min(
+                5.0,
+                self.max_search_time - (time.time() - self.state.start_time),
+            )
+            if wait_time > 0:
+                concurrent.futures.wait(
+                    self.state.evaluation_futures,
+                    timeout=wait_time,
+                    return_when=concurrent.futures.FIRST_COMPLETED,
+                )
+            # Cancel any still running
+            for future in self.state.evaluation_futures:
+                if not future.done():
+                    future.cancel()
+        # Final report
+        logger.info(
+            f"""
+Search completed:
+- Total evaluated: {self.state.total_evaluated}
+- Good candidates found: {len(self.state.good_candidates)}
+- Time taken: {time.time() - self.state.start_time:.1f}s
+- Final constraints: {len(self.state.remaining_constraints)}
+"""
+        )

local-deep-research 0.4.4__py3-none-any.whl → 0.5.2__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.2py3-none-any.whl