PyPI - local-deep-research - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

local_deep_research/advanced_search_system/strategies/adaptive_decomposition_strategy.py ADDED Viewed

@@ -0,0 +1,564 @@
+"""
+Adaptive Decomposition Strategy for step-by-step query analysis.
+This strategy dynamically adapts its approach based on intermediate findings,
+making decisions at each step rather than decomposing everything upfront.
+"""
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional
+from langchain_core.language_models import BaseChatModel
+from loguru import logger
+from ...utilities.search_utilities import format_findings, remove_think_tags
+from ..findings.repository import FindingsRepository
+from ..questions.standard_question import StandardQuestionGenerator
+from .base_strategy import BaseSearchStrategy
+from .source_based_strategy import SourceBasedSearchStrategy
+class StepType(Enum):
+    """Types of steps in the adaptive process."""
+    CONSTRAINT_EXTRACTION = "constraint_extraction"
+    INITIAL_SEARCH = "initial_search"
+    VERIFICATION = "verification"
+    REFINEMENT = "refinement"
+    SYNTHESIS = "synthesis"
+@dataclass
+class StepResult:
+    """Result from a single adaptive step."""
+    step_type: StepType
+    description: str
+    findings: Dict
+    next_action: Optional[str] = None
+    confidence: float = 0.0
+class AdaptiveDecompositionStrategy(BaseSearchStrategy):
+    """
+    A strategy that adapts its decomposition based on intermediate findings.
+    Instead of decomposing everything upfront, it takes a step-by-step approach,
+    using each step's findings to inform the next action.
+    """
+    def __init__(
+        self,
+        model: BaseChatModel,
+        search: Any,
+        all_links_of_system: List[str],
+        max_steps: int = 15,
+        min_confidence: float = 0.8,
+        source_search_iterations: int = 2,
+        source_questions_per_iteration: int = 20,
+    ):
+        """Initialize the adaptive decomposition strategy.
+        Args:
+            model: The language model to use
+            search: The search engine instance
+            all_links_of_system: List to store all encountered links
+            max_steps: Maximum steps to prevent infinite loops
+            min_confidence: Minimum confidence to consider answer complete
+            source_search_iterations: Iterations for source-based searches
+            source_questions_per_iteration: Questions per iteration for source-based searches
+        """
+        super().__init__(all_links_of_system)
+        self.model = model
+        self.search = search
+        self.max_steps = max_steps
+        self.min_confidence = min_confidence
+        self.source_search_iterations = source_search_iterations
+        self.source_questions_per_iteration = source_questions_per_iteration
+        self.question_generator = StandardQuestionGenerator(model)
+        self.findings_repository = FindingsRepository(model)
+        self.step_results: List[StepResult] = []
+        self.working_knowledge = {}
+    def analyze_topic(self, query: str) -> Dict:
+        """Analyze a topic using adaptive decomposition.
+        Args:
+            query: The research query to analyze
+        Returns:
+            Dictionary containing analysis results
+        """
+        self.all_links_of_system.clear()
+        self.questions_by_iteration = []
+        self.findings = []
+        self.step_results = []
+        self.working_knowledge = {
+            "original_query": query,
+            "constraints": [],
+            "candidates": [],
+            "verified_facts": [],
+            "uncertainties": [],
+        }
+        # Progress callback for UI
+        if self.progress_callback:
+            self.progress_callback(
+                "Starting adaptive analysis",
+                1,
+                {"phase": "init", "strategy": "adaptive_decomposition"},
+            )
+        logger.info(f"Starting adaptive analysis for: {query}")
+        # Start with constraint extraction
+        current_step = 0
+        while current_step < self.max_steps:
+            # Decide next step based on current knowledge
+            next_step = self._decide_next_step(query, current_step)
+            if next_step is None:
+                break
+            logger.info(f"Step {current_step + 1}: {next_step.step_type.value}")
+            # Update progress
+            if self.progress_callback:
+                self.progress_callback(
+                    f"Step {current_step + 1}: {next_step.step_type.value}",
+                    int((current_step / self.max_steps) * 80) + 10,
+                    {
+                        "phase": "adaptive_step",
+                        "step": current_step + 1,
+                        "step_type": next_step.step_type.value,
+                    },
+                )
+            # Execute the step
+            step_result = self._execute_step(next_step, query)
+            self.step_results.append(step_result)
+            # Check if we have a confident answer
+            if step_result.confidence >= self.min_confidence:
+                logger.info(
+                    f"Confident answer found (confidence: {step_result.confidence})"
+                )
+                break
+            current_step += 1
+        # Final synthesis of all findings
+        final_result = self._synthesize_findings(query)
+        # Update progress
+        if self.progress_callback:
+            self.progress_callback(
+                "Analysis complete",
+                100,
+                {"phase": "complete", "strategy": "adaptive_decomposition"},
+            )
+        return final_result
+    def _decide_next_step(
+        self, query: str, step_count: int
+    ) -> Optional[StepResult]:
+        """Decide the next step based on current knowledge.
+        Args:
+            query: The original query
+            step_count: Current step number
+        Returns:
+            Next step to execute, or None if complete
+        """
+        # Format current knowledge for analysis
+        knowledge_summary = f"""
+Original Query: {query}
+Current Knowledge:
+- Extracted Constraints: {self.working_knowledge.get("constraints", [])}
+- Candidate Locations: {self.working_knowledge.get("candidates", [])}
+- Verified Facts: {self.working_knowledge.get("verified_facts", [])}
+- Uncertainties: {self.working_knowledge.get("uncertainties", [])}
+Previous Steps: {[s.step_type.value for s in self.step_results]}
+"""
+        prompt = f"""Based on the current knowledge, decide the next best action.
+{knowledge_summary}
+What should be the next step? Options:
+1. CONSTRAINT_EXTRACTION - Extract specific constraints from the query
+2. INITIAL_SEARCH - Perform broad search for candidates
+3. VERIFICATION - Verify specific facts about candidates
+4. REFINEMENT - Refine search based on new information
+5. SYNTHESIS - We have enough information to answer
+Respond with:
+NEXT_STEP: [step type]
+DESCRIPTION: [what specifically to do]
+REASONING: [why this is the best next step]
+CONFIDENCE: [0-1 score of how confident we are in the current answer]
+"""
+        response = self.model.invoke(prompt)
+        content = remove_think_tags(response.content)
+        # Parse response
+        lines = content.strip().split("\n")
+        next_step_type = None
+        description = ""
+        confidence = 0.0
+        for line in lines:
+            if line.startswith("NEXT_STEP:"):
+                step_str = line.split(":", 1)[1].strip()
+                try:
+                    next_step_type = StepType[step_str]
+                except KeyError:
+                    # Try to match partial
+                    for step_type in StepType:
+                        if step_type.value in step_str.lower():
+                            next_step_type = step_type
+                            break
+            elif line.startswith("DESCRIPTION:"):
+                description = line.split(":", 1)[1].strip()
+            elif line.startswith("CONFIDENCE:"):
+                try:
+                    confidence = float(line.split(":", 1)[1].strip())
+                except ValueError:
+                    confidence = 0.0
+        if next_step_type is None:
+            return None
+        return StepResult(
+            step_type=next_step_type,
+            description=description,
+            findings={},
+            confidence=confidence,
+        )
+    def _execute_step(self, step: StepResult, query: str) -> StepResult:
+        """Execute a specific step in the adaptive process.
+        Args:
+            step: The step to execute
+            query: The original query
+        Returns:
+            Updated step result with findings
+        """
+        if step.step_type == StepType.CONSTRAINT_EXTRACTION:
+            return self._extract_constraints(query)
+        elif step.step_type == StepType.INITIAL_SEARCH:
+            return self._perform_initial_search(step.description)
+        elif step.step_type == StepType.VERIFICATION:
+            return self._verify_candidates(step.description)
+        elif step.step_type == StepType.REFINEMENT:
+            return self._refine_search(step.description)
+        elif step.step_type == StepType.SYNTHESIS:
+            return self._synthesize_current_knowledge(query)
+        else:
+            return step
+    def _extract_constraints(self, query: str) -> StepResult:
+        """Extract specific constraints and clues from the query.
+        Args:
+            query: The original query
+        Returns:
+            Step result with extracted constraints
+        """
+        prompt = f"""Extract all specific constraints and clues from this query:
+Query: {query}
+List each constraint/clue separately and explain why it's important:
+"""
+        response = self.model.invoke(prompt)
+        content = remove_think_tags(response.content)
+        # Parse constraints
+        constraints = []
+        lines = content.strip().split("\n")
+        for line in lines:
+            if line.strip() and (line[0].isdigit() or line.startswith("-")):
+                constraints.append(line.strip())
+        self.working_knowledge["constraints"] = constraints
+        return StepResult(
+            step_type=StepType.CONSTRAINT_EXTRACTION,
+            description="Extract constraints from query",
+            findings={"constraints": constraints},
+            confidence=0.0,
+        )
+    def _perform_initial_search(self, description: str) -> StepResult:
+        """Perform initial search based on extracted constraints.
+        Args:
+            description: Search description
+        Returns:
+            Step result with search findings
+        """
+        # Use source-based strategy for the search
+        source_strategy = SourceBasedSearchStrategy(
+            model=self.model,
+            search=self.search,
+            all_links_of_system=self.all_links_of_system,
+            include_text_content=True,
+            use_cross_engine_filter=True,
+            use_atomic_facts=True,
+        )
+        source_strategy.max_iterations = 1  # Quick initial search
+        source_strategy.questions_per_iteration = 10
+        if self.progress_callback:
+            source_strategy.set_progress_callback(self.progress_callback)
+        results = source_strategy.analyze_topic(description)
+        # Extract candidate locations from results
+        candidates = self._extract_candidates_from_results(results)
+        self.working_knowledge["candidates"] = candidates
+        return StepResult(
+            step_type=StepType.INITIAL_SEARCH,
+            description=description,
+            findings={"candidates": candidates, "raw_results": results},
+            confidence=0.2,
+        )
+    def _verify_candidates(self, description: str) -> StepResult:
+        """Verify specific facts about candidate locations.
+        Args:
+            description: Verification description
+        Returns:
+            Step result with verification findings
+        """
+        # Perform targeted search for verification
+        source_strategy = SourceBasedSearchStrategy(
+            model=self.model,
+            search=self.search,
+            all_links_of_system=self.all_links_of_system,
+            include_text_content=True,
+            use_cross_engine_filter=True,
+            use_atomic_facts=True,
+        )
+        source_strategy.max_iterations = 1
+        source_strategy.questions_per_iteration = 15
+        if self.progress_callback:
+            source_strategy.set_progress_callback(self.progress_callback)
+        results = source_strategy.analyze_topic(description)
+        # Update verified facts
+        verified_facts = self._extract_verified_facts(results)
+        self.working_knowledge["verified_facts"].extend(verified_facts)
+        # Calculate confidence based on verification
+        confidence = self._calculate_confidence()
+        return StepResult(
+            step_type=StepType.VERIFICATION,
+            description=description,
+            findings={"verified_facts": verified_facts},
+            confidence=confidence,
+        )
+    def _refine_search(self, description: str) -> StepResult:
+        """Refine search based on accumulated knowledge.
+        Args:
+            description: Refinement description
+        Returns:
+            Step result with refined findings
+        """
+        # Similar to verification but with more targeted approach
+        return self._verify_candidates(description)
+    def _synthesize_current_knowledge(self, query: str) -> StepResult:
+        """Synthesize current knowledge into an answer.
+        Args:
+            query: The original query
+        Returns:
+            Step result with synthesized answer
+        """
+        knowledge_summary = f"""
+Original Query: {query}
+Constraints: {self.working_knowledge["constraints"]}
+Candidates: {self.working_knowledge["candidates"]}
+Verified Facts: {self.working_knowledge["verified_facts"]}
+"""
+        prompt = f"""Based on all the information gathered, provide the answer:
+{knowledge_summary}
+Provide:
+1. The specific answer to the query
+2. Supporting evidence for the answer
+3. Confidence level (0-1)
+"""
+        response = self.model.invoke(prompt)
+        content = remove_think_tags(response.content)
+        return StepResult(
+            step_type=StepType.SYNTHESIS,
+            description="Final synthesis",
+            findings={"answer": content},
+            confidence=0.9,
+        )
+    def _synthesize_findings(self, query: str) -> Dict:
+        """Synthesize all findings into final result.
+        Args:
+            query: The original query
+        Returns:
+            Final results dictionary
+        """
+        # Compile all findings
+        all_findings = []
+        all_links = []
+        all_questions = []
+        for step in self.step_results:
+            finding = {
+                "phase": f"{step.step_type.value} (Step {self.step_results.index(step) + 1})",
+                "content": step.description,
+                "findings": step.findings,
+                "confidence": step.confidence,
+                "timestamp": self._get_timestamp(),
+            }
+            all_findings.append(finding)
+            # Extract links and questions from raw results if available
+            if "raw_results" in step.findings:
+                raw = step.findings["raw_results"]
+                if "all_links_of_system" in raw:
+                    all_links.extend(raw["all_links_of_system"])
+                if "questions_by_iteration" in raw:
+                    all_questions.extend(
+                        raw.get("questions_by_iteration", {}).values()
+                    )
+        # Get final answer
+        final_answer = "No confident answer found."
+        for step in reversed(self.step_results):
+            if (
+                step.step_type == StepType.SYNTHESIS
+                and "answer" in step.findings
+            ):
+                final_answer = step.findings["answer"]
+                break
+        # Format questions dictionary
+        questions_dict = {}
+        for i, questions in enumerate(all_questions):
+            if isinstance(questions, list):
+                questions_dict[i + 1] = questions
+        formatted_findings = format_findings(
+            all_findings, final_answer, questions_dict
+        )
+        return {
+            "current_knowledge": final_answer,
+            "formatted_findings": formatted_findings,
+            "findings": all_findings,
+            "iterations": len(self.step_results),
+            "questions_by_iteration": questions_dict,
+            "all_links_of_system": all_links,
+            "sources": all_links,
+            "step_results": [step.__dict__ for step in self.step_results],
+            "strategy": "adaptive_decomposition",
+            "working_knowledge": self.working_knowledge,
+            "questions": {
+                "total": sum(len(q) for q in questions_dict.values()),
+                "by_iteration": questions_dict,
+            },
+        }
+    def _extract_candidates_from_results(self, results: Dict) -> List[str]:
+        """Extract candidate locations from search results."""
+        candidates = []
+        if "current_knowledge" in results:
+            # Use LLM to extract location names
+            prompt = f"""Extract any location names mentioned in this text:
+{results["current_knowledge"][:1000]}
+List only location names, one per line:"""
+            response = self.model.invoke(prompt)
+            content = remove_think_tags(response.content)
+            for line in content.strip().split("\n"):
+                if line.strip():
+                    candidates.append(line.strip())
+        return candidates
+    def _extract_verified_facts(self, results: Dict) -> List[str]:
+        """Extract verified facts from search results."""
+        facts = []
+        if "current_knowledge" in results:
+            # Use LLM to extract verified facts
+            prompt = f"""Extract specific verified facts from this text:
+{results["current_knowledge"][:1000]}
+List only confirmed facts, one per line:"""
+            response = self.model.invoke(prompt)
+            content = remove_think_tags(response.content)
+            for line in content.strip().split("\n"):
+                if line.strip():
+                    facts.append(line.strip())
+        return facts
+    def _calculate_confidence(self) -> float:
+        """Calculate confidence based on verified facts vs constraints."""
+        if not self.working_knowledge["constraints"]:
+            return 0.0
+        verified_count = len(self.working_knowledge["verified_facts"])
+        constraint_count = len(self.working_knowledge["constraints"])
+        # Simple ratio with some adjustments
+        base_confidence = verified_count / constraint_count
+        # Boost if we have specific candidates
+        if self.working_knowledge["candidates"]:
+            base_confidence += 0.1
+        # Cap at 0.95 to leave room for synthesis
+        return min(base_confidence, 0.95)
+    def _get_timestamp(self) -> str:
+        """Get current timestamp for findings."""
+        from datetime import datetime
+        return datetime.utcnow().isoformat()

local_deep_research/advanced_search_system/strategies/base_strategy.py CHANGED Viewed

@@ -21,7 +21,9 @@ class BaseSearchStrategy(ABC):
             all_links_of_system if all_links_of_system is not None else []
         )
-    def set_progress_callback(self, callback: Callable[[str, int, dict], None]) -> None:
+    def set_progress_callback(
+        self, callback: Callable[[str, int, dict], None]
+    ) -> None:
         """Set a callback function to receive progress updates."""
         self.progress_callback = callback
@@ -62,9 +64,7 @@ class BaseSearchStrategy(ABC):
             bool: True if search engine is available, False otherwise
         """
         if not hasattr(self, "search") or self.search is None:
-            error_msg = (
-                "Error: No search engine available. Please check your configuration."
-            )
+            error_msg = "Error: No search engine available. Please check your configuration."
             self._update_progress(
                 error_msg,
                 100,

local-deep-research 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

local-deep-research 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl