PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py ADDED Viewed

@@ -0,0 +1,450 @@
+"""
+IterDRAG strategy implementation.
+"""
+import json
+import logging
+from datetime import datetime
+from typing import Dict, List
+from langchain_core.language_models import BaseLLM
+from ...citation_handler import CitationHandler
+from ...config.config_files import settings
+from ...config.llm_config import get_llm
+from ...config.search_config import get_search
+from ...utilities.db_utils import get_db_setting
+from ...utilities.search_utilities import extract_links_from_search_results
+from ..findings.repository import FindingsRepository
+from ..knowledge.standard_knowledge import StandardKnowledge
+from ..questions.decomposition_question import DecompositionQuestionGenerator
+from .base_strategy import BaseSearchStrategy
+logger = logging.getLogger(__name__)
+class IterDRAGStrategy(BaseSearchStrategy):
+    """IterDRAG strategy that breaks queries into sub-queries."""
+    def __init__(
+        self, model: BaseLLM | None = None, search=None, citation_handler=None
+    ):
+        """Initialize the strategy with optional dependency injection for testing."""
+        super().__init__()
+        self.model = model or get_llm()
+        self.search = search or get_search()
+        self.progress_callback = None
+        self.all_links_of_system = list()
+        self.questions_by_iteration = {}
+        # Use provided citation_handler or create one
+        self.citation_handler = citation_handler or CitationHandler(self.model)
+        # Initialize components
+        self.question_generator = DecompositionQuestionGenerator(self.model)
+        self.knowledge_generator = StandardKnowledge(self.model)
+        self.findings_repository = FindingsRepository(self.model)
+    def _update_progress(
+        self, message: str, progress_percent: int = None, metadata: dict = None
+    ) -> None:
+        """Send a progress update via the callback if available."""
+        if self.progress_callback:
+            self.progress_callback(message, progress_percent, metadata or {})
+    def _generate_subqueries(
+        self, query: str, initial_results: List[Dict], current_knowledge: str
+    ) -> List[str]:
+        """Generate sub-queries based on initial search results and the main query."""
+        try:
+            # Format context for question generation
+            context = f"""Current Query: {query}
+Current Date: {datetime.now().strftime('%Y-%m-%d')}
+Past Questions: {self.questions_by_iteration}
+Current Knowledge: {current_knowledge}
+Initial Search Results:
+{json.dumps(initial_results, indent=2)}"""
+            # Generate sub-queries using the question generator
+            return self.question_generator.generate_questions(
+                query, context, int(get_db_setting("search.questions_per_iteration"))
+            )
+        except Exception as e:
+            logger.error(f"Error generating sub-queries: {str(e)}")
+            return []
+    def analyze_topic(self, query: str) -> Dict:
+        """IterDRAG implementation of the topic analysis process."""
+        findings = []
+        current_knowledge = ""
+        section_links = list()
+        self._update_progress(
+            "Initializing IterDRAG research system",
+            5,
+            {"phase": "init", "strategy": "iterdrag"},
+        )
+        # Check if search engine is available
+        if self.search is None:
+            error_msg = (
+                "Error: No search engine available. Please check your configuration."
+            )
+            self._update_progress(
+                error_msg,
+                100,
+                {
+                    "phase": "error",
+                    "error": "No search engine available",
+                    "status": "failed",
+                },
+            )
+            return {
+                "findings": [],
+                "iterations": 0,
+                "questions": {},
+                "formatted_findings": "Error: Unable to conduct research without a search engine.",
+                "current_knowledge": "",
+                "error": error_msg,
+            }
+        # Initial search for the main query
+        self._update_progress(
+            "Performing initial search for main query",
+            10,
+            {"phase": "search", "iteration": 1},
+        )
+        initial_results = self.search.run(query)
+        if not initial_results:
+            self._update_progress(
+                "No initial results found",
+                15,
+                {"phase": "search_complete", "result_count": 0},
+            )
+            initial_results = []
+        else:
+            self._update_progress(
+                f"Found {len(initial_results)} initial results",
+                15,
+                {"phase": "search_complete", "result_count": len(initial_results)},
+            )
+        # Extract and save links
+        initial_links = extract_links_from_search_results(initial_results)
+        self.all_links_of_system.extend(initial_links)
+        section_links.extend(initial_links)
+        # Generate sub-queries
+        self._update_progress(
+            "Generating sub-queries for IterDRAG analysis",
+            20,
+            {"phase": "iterdrag_decomposition"},
+        )
+        sub_queries = self._generate_subqueries(
+            query, initial_results, current_knowledge
+        )
+        # Store questions in repository and in self.questions_by_iteration
+        self.questions_by_iteration = {0: sub_queries}
+        self.findings_repository.set_questions_by_iteration(self.questions_by_iteration)
+        if not sub_queries:
+            logger.error("No sub-queries were generated to analyze.")
+            finding = {
+                "phase": "Analysis Error",
+                "content": "No sub-queries could be generated for the main question.",
+                "question": query,
+                "search_results": [],
+            }
+            findings.append(finding)
+        else:
+            # Process each sub-query
+            total_subqueries = len(sub_queries)
+            for i, sub_query in enumerate(sub_queries, 1):
+                progress_base = 25 + (i / total_subqueries * 50)
+                self._update_progress(
+                    f"Processing sub-query {i} of {total_subqueries}: {sub_query}",
+                    int(progress_base),
+                    {"phase": "subquery", "subquery_index": i},
+                )
+                # Search for the sub-query
+                try:
+                    sub_results = self.search.run(sub_query)
+                    if not sub_results:
+                        self._update_progress(
+                            f"No results for sub-query: {sub_query}",
+                            int(progress_base + 2),
+                            {"phase": "search_complete", "result_count": 0},
+                        )
+                        sub_results = []
+                    else:
+                        self._update_progress(
+                            f"Found {len(sub_results)} results for sub-query: {sub_query}",
+                            int(progress_base + 2),
+                            {
+                                "phase": "search_complete",
+                                "result_count": len(sub_results),
+                            },
+                        )
+                except Exception as e:
+                    logger.error(f"Error searching for sub-query: {str(e)}")
+                    sub_results = []
+                try:
+                    # Use previous knowledge to answer this sub-query
+                    result = self.citation_handler.analyze_followup(
+                        sub_query,
+                        sub_results,
+                        current_knowledge,
+                        nr_of_links=len(self.all_links_of_system),
+                    )
+                    # Extract and save links AFTER citation handler processes results
+                    sub_links = extract_links_from_search_results(sub_results)
+                    self.all_links_of_system.extend(sub_links)
+                    section_links.extend(sub_links)
+                    if result is not None:
+                        finding = {
+                            "phase": f"Sub-query {i}",
+                            "content": result["content"],
+                            "question": sub_query,
+                            "search_results": sub_results,
+                            "documents": result["documents"],
+                        }
+                        findings.append(finding)
+                        self.findings_repository.add_finding(sub_query, finding)
+                        self.findings_repository.add_documents(result["documents"])
+                        # Add to current knowledge with space around +
+                        current_knowledge = (
+                            current_knowledge + "\n\n\n New: \n" + result["content"]
+                        )
+                except Exception as e:
+                    logger.error(f"Error analyzing sub-query results: {str(e)}")
+                    finding = {
+                        "phase": f"Follow-up Iteration 0.{i + 1}",
+                        "content": "Error analyzing sub-query results.",
+                        "question": sub_query,
+                        "search_results": [],
+                        "documents": [],
+                    }
+                findings.append(finding)
+                # Optionally update current_knowledge even if analysis failed?
+                # current_knowledge += f"\n\nError analyzing results for: {sub_query}"
+            # Final answer synthesis based on all sub-query findings
+            self._update_progress(
+                "Synthesizing final answer from sub-queries",
+                80,
+                {"phase": "final_synthesis"},
+            )
+            try:
+                # Extract finding contents for synthesis
+                finding_contents = [f["content"] for f in findings if "content" in f]
+                # Synthesize findings into a final answer
+                final_answer = self.findings_repository.synthesize_findings(
+                    query=query,
+                    sub_queries=sub_queries,
+                    findings=finding_contents,
+                    accumulated_knowledge=current_knowledge,
+                )
+                # Check if the synthesis failed with an error
+                if isinstance(final_answer, str) and final_answer.startswith("Error:"):
+                    logger.error(f"Synthesis returned an error: {final_answer}")
+                    # Extract error type for better handling
+                    error_type = "unknown"
+                    error_message = final_answer.lower()
+                    if "timeout" in error_message:
+                        error_type = "timeout"
+                    elif "token limit" in error_message:
+                        error_type = "token_limit"
+                    elif "rate limit" in error_message:
+                        error_type = "rate_limit"
+                    elif "connection" in error_message:
+                        error_type = "connection"
+                    # Log with specific error type
+                    logger.error(f"Synthesis failed with error type: {error_type}")
+                    # Add error information to progress update
+                    self._update_progress(
+                        f"Synthesis failed: {error_type}. Attempting fallback...",
+                        85,
+                        {"phase": "synthesis_error", "error_type": error_type},
+                    )
+                    # Try fallback: use the best individual finding as the final answer
+                    longest_finding = ""
+                    for f in findings:
+                        if isinstance(f, dict) and "content" in f:
+                            if len(f["content"]) > len(longest_finding):
+                                longest_finding = f["content"]
+                    if longest_finding:
+                        logger.info("Using longest finding as fallback synthesis")
+                        final_answer = f"""
+# Research Results (Fallback Mode)
+{longest_finding}
+## Note
+The final synthesis could not be completed due to an error: {final_answer}
+This is a fallback response using the most detailed individual finding.
+                        """
+                    else:
+                        # If we don't have any findings with content, use current_knowledge
+                        logger.info("Using current knowledge as fallback synthesis")
+                        final_answer = f"""
+# Research Results (Fallback Mode)
+{current_knowledge}
+## Note
+The final synthesis could not be completed due to an error: {final_answer}
+This is a fallback response using the accumulated knowledge.
+                        """
+                # Create a synthesis finding
+                finding = {
+                    "phase": "Final synthesis",
+                    "content": final_answer,
+                    "question": query,
+                    "search_results": [],
+                    "documents": [],
+                }
+                findings.append(finding)
+                # Store the synthesized content
+                self.findings_repository.add_finding(query + "_synthesis", final_answer)
+                # Update current knowledge with the synthesized version
+                current_knowledge = final_answer
+            except Exception as e:
+                logger.error(f"Error synthesizing final answer: {str(e)}")
+                import traceback
+                logger.error(traceback.format_exc())
+                # Create an error finding
+                error_finding = {
+                    "phase": "Final synthesis error",
+                    "content": f"Error during synthesis: {str(e)}",
+                    "question": query,
+                    "search_results": [],
+                    "documents": [],
+                }
+                findings.append(error_finding)
+                # If synthesis completely fails, construct a fallback answer from the most relevant findings
+                self._update_progress(
+                    "Synthesis failed. Creating fallback summary...",
+                    85,
+                    {"phase": "synthesis_fallback"},
+                )
+                try:
+                    # Extract best content from findings
+                    key_findings = []
+                    for i, f in enumerate(findings):
+                        if isinstance(f, dict) and "content" in f and f.get("content"):
+                            # Only take the first 500 chars of each finding for the fallback
+                            content_preview = f.get("content", "")[:500]
+                            if content_preview:
+                                key_findings.append(
+                                    f"### Finding {i + 1}\n\n{content_preview}..."
+                                )
+                    # Create fallback content
+                    fallback_content = f"""
+# Research Results (Error Recovery Mode)
+## Original Query
+{query}
+## Key Findings
+{chr(10).join(key_findings[:5]) if key_findings else "No valid findings were generated."}
+## Error Information
+The system encountered an error during final synthesis: {str(e)}
+This is an automatically generated fallback response.
+                    """
+                    final_answer = fallback_content
+                except Exception as fallback_error:
+                    # Last resort fallback
+                    logger.error(f"Even fallback creation failed: {fallback_error}")
+                    final_answer = f"""
+# Research Error
+The system encountered multiple errors while processing your query: "{query}"
+Primary error: {str(e)}
+Fallback error: {str(fallback_error)}
+Please try again with a different query or contact support.
+                    """
+        # Compress knowledge if needed
+        if (
+            get_db_setting(
+                "general.knowledge_accumulation",
+                settings.general.knowledge_accumulation,
+            )
+            == "ITERATION"
+        ):
+            try:
+                self._update_progress(
+                    "Compressing knowledge", 90, {"phase": "knowledge_compression"}
+                )
+                current_knowledge = self.knowledge_generator.compress_knowledge(
+                    current_knowledge, query, section_links
+                )
+            except Exception as e:
+                logger.error(f"Error compressing knowledge: {str(e)}")
+        # Format and save findings
+        self._update_progress(
+            "Formatting and saving findings", 95, {"phase": "formatting"}
+        )
+        try:
+            # First, get just the synthesized content without formatting
+            if not isinstance(final_answer, str):
+                logger.error(
+                    "final_answer is not a string, using current_knowledge as fallback"
+                )
+                final_answer = current_knowledge
+            # Ensure latest questions are in the repository
+            self.findings_repository.set_questions_by_iteration(
+                self.questions_by_iteration
+            )
+            # Now format the findings with search questions and sources
+            formatted_findings = self.findings_repository.format_findings_to_text(
+                findings, final_answer
+            )
+        except Exception as e:
+            logger.error(f"Error formatting final findings: {str(e)}")
+            formatted_findings = "Error: Could not format findings due to an error."
+        self._update_progress("Research complete", 100, {"phase": "complete"})
+        return {
+            "findings": findings,  # Keep the detailed findings list
+            "iterations": 1,
+            "questions": self.questions_by_iteration,  # Use the member variable instead of {"0": sub_queries}
+            "formatted_findings": formatted_findings,  # This is the fully formatted string for UI
+            "current_knowledge": current_knowledge,  # This is the synthesized content for knowledge base
+        }

local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl