PyPI - local-deep-research - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

local-deep-research 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py CHANGED Viewed

@@ -80,7 +80,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
     def analyze_topic(self, query: str) -> Dict:
         """
-        Parallel implementation that generates questions and searches all at once.
+        Analyze a topic using parallel search, supporting multiple iterations.
         Args:
             query: The research query to analyze
@@ -89,6 +89,11 @@ class ParallelSearchStrategy(BaseSearchStrategy):
         findings = []
         all_search_results = []
+        current_knowledge = ""
+        # Track all search results across iterations
+        self.all_links_of_system = list()
+        self.questions_by_iteration = {}
         self._update_progress(
             "Initializing parallel research",
@@ -105,184 +110,301 @@ class ParallelSearchStrategy(BaseSearchStrategy):
             return {
                 "findings": [],
                 "iterations": 0,
-                "questions": {},
+                "questions_by_iteration": {},
                 "formatted_findings": "Error: Unable to conduct research without a search engine.",
                 "current_knowledge": "",
                 "error": "No search engine available",
             }
+        # Determine number of iterations to run
+        iterations_to_run = get_db_setting("search.iterations")
+        logger.debug("Selected amount of iterations: " + iterations_to_run)
+        iterations_to_run = int(iterations_to_run)
         try:
-            # Step 1: Generate questions first
-            self._update_progress(
-                "Generating search questions", 10, {"phase": "question_generation"}
-            )
+            # Run each iteration
+            for iteration in range(1, iterations_to_run + 1):
+                iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
-            # Generate 3 additional questions (plus the main query = 4 total)
-            questions = self.question_generator.generate_questions(
-                current_knowledge="",  # No knowledge accumulation
-                query=query,
-                questions_per_iteration=int(
-                    get_db_setting("search.questions_per_iteration")
-                ),  # 3 additional questions
-                questions_by_iteration={},
-            )
+                self._update_progress(
+                    f"Starting iteration {iteration}/{iterations_to_run}",
+                    iteration_progress_base,
+                    {"phase": f"iteration_{iteration}", "iteration": iteration},
+                )
-            # Add the original query as the first question
-            all_questions = [query] + questions
+                # Step 1: Generate questions
+                self._update_progress(
+                    f"Generating search questions for iteration {iteration}",
+                    iteration_progress_base + 5,
+                    {"phase": "question_generation", "iteration": iteration},
+                )
-            # Store in questions_by_iteration
-            self.questions_by_iteration[0] = questions
-            logger.info(f"Generated questions: {questions}")
+                # For first iteration, generate initial questions
+                # For subsequent iterations, generate follow-up questions
+                logger.info("Starting to generate questions")
+                if iteration == 1:
+                    # Generate additional questions (plus the main query)
+                    if iterations_to_run > 1:
+                        context = f"""Iteration: {1} of {iterations_to_run}"""
+                    else:
+                        context = ""
+                    questions = self.question_generator.generate_questions(
+                        current_knowledge=context,
+                        query=query,
+                        questions_per_iteration=int(
+                            get_db_setting("search.questions_per_iteration")
+                        ),
+                        questions_by_iteration=self.questions_by_iteration,
+                    )
-            # Step 2: Run all searches in parallel
-            self._update_progress(
-                "Running parallel searches for all questions",
-                20,
-                {"phase": "parallel_search"},
-            )
+                    # Add the original query as the first question
+                    all_questions = [query] + questions
-            # Function for thread pool
-            def search_question(q):
-                try:
-                    result = self.search.run(q)
-                    return {"question": q, "results": result or []}
-                except Exception as e:
-                    logger.error(f"Error searching for '{q}': {str(e)}")
-                    return {"question": q, "results": [], "error": str(e)}
-            # Run searches in parallel
-            with concurrent.futures.ThreadPoolExecutor(
-                max_workers=len(all_questions)
-            ) as executor:
-                futures = [executor.submit(search_question, q) for q in all_questions]
-                all_search_dict = {}
-                # Process results as they complete
-                for i, future in enumerate(concurrent.futures.as_completed(futures)):
-                    result_dict = future.result()
-                    question = result_dict["question"]
-                    search_results = result_dict["results"]
-                    all_search_dict[question] = search_results
+                    # Store in questions_by_iteration
+                    self.questions_by_iteration[iteration] = questions
+                    logger.info(
+                        f"Generated questions for iteration {iteration}: {questions}"
+                    )
+                else:
+                    # Get past questions from all previous iterations
+                    past_questions = []
+                    for prev_iter in range(1, iteration):
+                        if prev_iter in self.questions_by_iteration:
+                            past_questions.extend(
+                                self.questions_by_iteration[prev_iter]
+                            )
+                    # Generate follow-up questions based on accumulated knowledge if iterations > 2
+                    use_knowledge = iterations_to_run > 2
+                    knowledge_for_questions = current_knowledge if use_knowledge else ""
+                    context = f"""Current Knowledge: {knowledge_for_questions}
+                    Iteration: {iteration} of {iterations_to_run}"""
+                    # Generate questions
+                    questions = self.question_generator.generate_questions(
+                        current_knowledge=context,
+                        query=query,
+                        questions_per_iteration=int(
+                            get_db_setting("search.questions_per_iteration")
+                        ),
+                        questions_by_iteration=self.questions_by_iteration,
+                    )
-                    self._update_progress(
-                        f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
-                        20 + ((i + 1) / len(all_questions) * 40),
-                        {
-                            "phase": "search_complete",
-                            "result_count": len(search_results),
-                            "question": question,
-                        },
+                    # Use only the new questions for this iteration's searches
+                    all_questions = questions
+                    # Store in questions_by_iteration
+                    self.questions_by_iteration[iteration] = questions
+                    logger.info(
+                        f"Generated questions for iteration {iteration}: {questions}"
                     )
-                    # Extract and save links
-                    if not self.use_cross_engine_filter:
-                        links = extract_links_from_search_results(search_results)
-                        self.all_links_of_system.extend(links)
-                    all_search_results.extend(search_results)
+                # Step 2: Run all searches in parallel for this iteration
+                self._update_progress(
+                    f"Running parallel searches for iteration {iteration}",
+                    iteration_progress_base + 10,
+                    {"phase": "parallel_search", "iteration": iteration},
+                )
-            # Step 3: Analysis of collected search results
-            self._update_progress(
-                "Analyzing all collected search results",
-                70,
-                {"phase": "final_analysis"},
-            )
-            if self.use_cross_engine_filter:
+                # Function for thread pool
+                def search_question(q):
+                    try:
+                        result = self.search.run(q)
+                        return {"question": q, "results": result or []}
+                    except Exception as e:
+                        logger.error(f"Error searching for '{q}': {str(e)}")
+                        return {"question": q, "results": [], "error": str(e)}
+                # Run searches in parallel
+                with concurrent.futures.ThreadPoolExecutor(
+                    max_workers=len(all_questions)
+                ) as executor:
+                    futures = [
+                        executor.submit(search_question, q) for q in all_questions
+                    ]
+                    iteration_search_dict = {}
+                    iteration_search_results = []
+                    # Process results as they complete
+                    for i, future in enumerate(
+                        concurrent.futures.as_completed(futures)
+                    ):
+                        result_dict = future.result()
+                        question = result_dict["question"]
+                        search_results = result_dict["results"]
+                        iteration_search_dict[question] = search_results
+                        self._update_progress(
+                            f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
+                            iteration_progress_base
+                            + 10
+                            + ((i + 1) / len(all_questions) * 30),
+                            {
+                                "phase": "search_complete",
+                                "iteration": iteration,
+                                "result_count": len(search_results),
+                                "question": question,
+                            },
+                        )
+                        # Collect all search results for this iteration
+                        iteration_search_results.extend(search_results)
+                # Step 3: Filter and analyze results for this iteration
                 self._update_progress(
-                    "Filtering search results across engines",
-                    65,
-                    {"phase": "cross_engine_filtering"},
+                    f"Analyzing results for iteration {iteration}",
+                    iteration_progress_base + 45,
+                    {"phase": "iteration_analysis", "iteration": iteration},
                 )
-                # Get the current link count (for indexing)
-                existing_link_count = len(self.all_links_of_system)
+                # Apply cross-engine filtering if enabled
+                if self.use_cross_engine_filter:
+                    self._update_progress(
+                        f"Filtering search results for iteration {iteration}",
+                        iteration_progress_base + 45,
+                        {"phase": "cross_engine_filtering", "iteration": iteration},
+                    )
-                # Filter the search results
-                filtered_search_results = self.cross_engine_filter.filter_results(
-                    all_search_results,
-                    query,
-                    reorder=self.filter_reorder,
-                    reindex=self.filter_reindex,
-                    start_index=existing_link_count,  # Start indexing after existing links
-                )
+                    # Get the current link count (for indexing)
+                    existing_link_count = len(self.all_links_of_system)
-                links = extract_links_from_search_results(filtered_search_results)
-                self.all_links_of_system.extend(links)
+                    # Filter the search results
+                    filtered_search_results = self.cross_engine_filter.filter_results(
+                        iteration_search_results,
+                        query,
+                        reorder=self.filter_reorder,
+                        reindex=self.filter_reindex,
+                        start_index=existing_link_count,  # Start indexing after existing links
+                    )
+                    links = extract_links_from_search_results(filtered_search_results)
+                    self.all_links_of_system.extend(links)
+                    self._update_progress(
+                        f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
+                        iteration_progress_base + 50,
+                        {
+                            "phase": "filtering_complete",
+                            "iteration": iteration,
+                            "links_count": len(self.all_links_of_system),
+                        },
+                    )
+                    # Use filtered results for analysis
+                    iteration_search_results = filtered_search_results
+                else:
+                    # Just extract links without filtering
+                    links = extract_links_from_search_results(iteration_search_results)
+                    self.all_links_of_system.extend(links)
+                # Add to all search results
+                all_search_results.extend(iteration_search_results)
+                # Create a finding for this iteration's results
+                if self.include_text_content and iteration_search_results:
+                    # For iteration > 1 with knowledge accumulation, use follow-up analysis
+                    if iteration > 1 and iterations_to_run > 2:
+                        citation_result = self.citation_handler.analyze_followup(
+                            query,
+                            iteration_search_results,
+                            current_knowledge,
+                            len(self.all_links_of_system) - len(links),
+                        )
+                    else:
+                        # For first iteration or without knowledge accumulation, use initial analysis
+                        citation_result = self.citation_handler.analyze_initial(
+                            query, iteration_search_results
+                        )
+                    if citation_result:
+                        # Create a finding for this iteration
+                        iteration_content = citation_result["content"]
+                        # Update current knowledge if iterations > 2
+                        if iterations_to_run > 2:
+                            if current_knowledge:
+                                current_knowledge = f"{current_knowledge}\n\n## FINDINGS FROM ITERATION {iteration}:\n\n{iteration_content}"
+                            else:
+                                current_knowledge = iteration_content
+                        finding = {
+                            "phase": f"Iteration {iteration}",
+                            "content": iteration_content,
+                            "question": query,
+                            "search_results": iteration_search_results,
+                            "documents": citation_result.get("documents", []),
+                        }
+                        findings.append(finding)
+                        # Add documents to repository
+                        if "documents" in citation_result:
+                            self.findings_repository.add_documents(
+                                citation_result["documents"]
+                            )
+                # Mark iteration as complete
+                iteration_progress = 5 + iteration * (70 / iterations_to_run)
                 self._update_progress(
-                    f"Filtered from {len(all_search_results)} to {len(filtered_search_results)} results",
-                    70,
-                    {
-                        "phase": "filtering_complete",
-                        "links_count": len(self.all_links_of_system),
-                    },
+                    f"Completed iteration {iteration}/{iterations_to_run}",
+                    iteration_progress,
+                    {"phase": "iteration_complete", "iteration": iteration},
                 )
-                # Use filtered results for analysis
-                all_search_results = filtered_search_results
+            # Final synthesis after all iterations
+            self._update_progress(
+                "Generating final synthesis", 80, {"phase": "synthesis"}
+            )
-            # Now when we use the citation handler, ensure we're using all_search_results:
+            # Handle final synthesis based on include_text_content flag
             if self.include_text_content:
-                # Use citation handler for analysis of all results together
-                citation_result = self.citation_handler.analyze_initial(
-                    query, all_search_results
-                )
-            if citation_result:
-                synthesized_content = citation_result["content"]
-                finding = {
+                # Generate a final synthesis from all search results
+                if iterations_to_run > 1:
+                    final_citation_result = self.citation_handler.analyze_initial(
+                        query, all_search_results
+                    )
+                    # Add null check for final_citation_result
+                    if final_citation_result:
+                        synthesized_content = final_citation_result["content"]
+                    else:
+                        synthesized_content = (
+                            "No relevant results found in final synthesis."
+                        )
+                else:
+                    # For single iteration, use the content from findings
+                    synthesized_content = (
+                        findings[0]["content"]
+                        if findings
+                        else "No relevant results found."
+                    )
+                # Add a final synthesis finding
+                final_finding = {
                     "phase": "Final synthesis",
                     "content": synthesized_content,
                     "question": query,
                     "search_results": all_search_results,
-                    "documents": citation_result.get("documents", []),
+                    "documents": [],
                 }
-                findings.append(finding)
-                # Transfer questions to repository
-                self.findings_repository.set_questions_by_iteration(
-                    self.questions_by_iteration
-                )
-                # Format findings
-                formatted_findings = self.findings_repository.format_findings_to_text(
-                    findings, synthesized_content
-                )
-                # Add documents to repository
-                if "documents" in citation_result:
-                    self.findings_repository.add_documents(citation_result["documents"])
-                else:
-                    synthesized_content = "No relevant results found."
-                    formatted_findings = synthesized_content
-                    finding = {
-                        "phase": "Error",
-                        "content": "No relevant results found.",
-                        "question": query,
-                        "search_results": all_search_results,
-                        "documents": [],
-                    }
-                    findings.append(finding)
+                findings.append(final_finding)
             else:
                 # Skip LLM analysis, just format the raw search results
                 synthesized_content = "LLM analysis skipped"
-                finding = {
+                final_finding = {
                     "phase": "Raw search results",
                     "content": "LLM analysis was skipped. Displaying raw search results with links.",
                     "question": query,
                     "search_results": all_search_results,
                     "documents": [],
                 }
-                findings.append(finding)
+                findings.append(final_finding)
-                # Transfer questions to repository
-                self.findings_repository.set_questions_by_iteration(
-                    self.questions_by_iteration
-                )
+            # Transfer questions to repository
+            self.findings_repository.set_questions_by_iteration(
+                self.questions_by_iteration
+            )
-                # Format findings without synthesis
-                formatted_findings = self.findings_repository.format_findings_to_text(
-                    findings, "Raw search results (LLM analysis skipped)"
-                )
+            # Format findings
+            formatted_findings = self.findings_repository.format_findings_to_text(
+                findings, synthesized_content
+            )
         except Exception as e:
             import traceback
@@ -305,8 +427,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
         return {
             "findings": findings,
-            "iterations": 1,
-            "questions": self.questions_by_iteration,
+            "iterations": iterations_to_run,
+            "questions_by_iteration": self.questions_by_iteration,
             "formatted_findings": formatted_findings,
             "current_knowledge": synthesized_content,
         }

local_deep_research/advanced_search_system/strategies/source_based_strategy.py ADDED Viewed

@@ -0,0 +1,407 @@
+import concurrent.futures
+import logging
+from typing import Dict
+from ...citation_handler import CitationHandler
+from ...config.llm_config import get_llm
+from ...config.search_config import get_search
+from ...utilities.db_utils import get_db_setting
+from ...utilities.search_utilities import extract_links_from_search_results
+from ..filters.cross_engine_filter import CrossEngineFilter
+from ..findings.repository import FindingsRepository
+from ..questions.standard_question import StandardQuestionGenerator
+from .base_strategy import BaseSearchStrategy
+logger = logging.getLogger(__name__)
+class SourceBasedSearchStrategy(BaseSearchStrategy):
+    """
+    Source-based search strategy that generates questions based on search results and
+    defers content analysis until final synthesis.
+    """
+    def __init__(
+        self,
+        search=None,
+        model=None,
+        citation_handler=None,
+        include_text_content: bool = True,
+        use_cross_engine_filter: bool = True,
+        filter_reorder: bool = True,
+        filter_reindex: bool = True,
+        filter_max_results: int = 20,
+    ):
+        """Initialize with optional dependency injection for testing."""
+        super().__init__()
+        self.search = search or get_search()
+        self.model = model or get_llm()
+        self.progress_callback = None
+        self.all_links_of_system = list()
+        self.all_search_results = []
+        self.questions_by_iteration = {}
+        self.include_text_content = include_text_content
+        self.use_cross_engine_filter = use_cross_engine_filter
+        self.filter_reorder = filter_reorder
+        self.filter_reindex = filter_reindex
+        # Initialize the cross-engine filter
+        self.cross_engine_filter = CrossEngineFilter(
+            model=self.model,
+            max_results=filter_max_results,
+            default_reorder=filter_reorder,
+            default_reindex=filter_reindex,
+        )
+        # Set include_full_content on the search engine if it supports it
+        if hasattr(self.search, "include_full_content"):
+            self.search.include_full_content = include_text_content
+        # Use provided citation_handler or create one
+        self.citation_handler = citation_handler or CitationHandler(self.model)
+        # Initialize components
+        self.question_generator = StandardQuestionGenerator(self.model)
+        self.findings_repository = FindingsRepository(self.model)
+    def _format_search_results_as_context(self, search_results):
+        """Format search results into context for question generation."""
+        context_snippets = []
+        for i, result in enumerate(
+            search_results[:10]
+        ):  # Limit to prevent context overflow
+            title = result.get("title", "Untitled")
+            snippet = result.get("snippet", "")
+            url = result.get("link", "")
+            if snippet:
+                context_snippets.append(
+                    f"Source {i + 1}: {title}\nURL: {url}\nSnippet: {snippet}"
+                )
+        return "\n\n".join(context_snippets)
+    def analyze_topic(self, query: str) -> Dict:
+        """
+        Analyze a topic using source-based search strategy.
+        """
+        logger.info(f"Starting source-based research on topic: {query}")
+        findings = []
+        self.all_search_results = []
+        # Track all search results across iterations
+        self.all_links_of_system = list()
+        self.questions_by_iteration = {}
+        self._update_progress(
+            "Initializing source-based research",
+            5,
+            {
+                "phase": "init",
+                "strategy": "source-based",
+                "include_text_content": self.include_text_content,
+            },
+        )
+        # Check search engine
+        if not self._validate_search_engine():
+            return {
+                "findings": [],
+                "iterations": 0,
+                "questions_by_iteration": {},
+                "formatted_findings": "Error: Unable to conduct research without a search engine.",
+                "current_knowledge": "",
+                "error": "No search engine available",
+            }
+        # Determine number of iterations to run
+        iterations_to_run = get_db_setting("search.iterations")
+        logger.debug("Selected amount of iterations: " + str(iterations_to_run))
+        iterations_to_run = int(iterations_to_run)
+        try:
+            # Run each iteration
+            for iteration in range(1, iterations_to_run + 1):
+                iteration_progress_base = 5 + (iteration - 1) * (70 / iterations_to_run)
+                self._update_progress(
+                    f"Starting iteration {iteration}/{iterations_to_run}",
+                    iteration_progress_base,
+                    {"phase": f"iteration_{iteration}", "iteration": iteration},
+                )
+                # Step 1: Generate or use questions
+                self._update_progress(
+                    f"Generating search questions for iteration {iteration}",
+                    iteration_progress_base + 5,
+                    {"phase": "question_generation", "iteration": iteration},
+                )
+                # For first iteration, use initial query
+                if iteration == 1:
+                    # Generate questions for first iteration
+                    source_context = self._format_search_results_as_context(
+                        self.all_search_results
+                    )
+                    context = f"""Iteration: {iteration} of {iterations_to_run}"""
+                    questions = self.question_generator.generate_questions(
+                        current_knowledge=context,
+                        query=query,
+                        questions_per_iteration=int(
+                            get_db_setting("search.questions_per_iteration")
+                        ),
+                        questions_by_iteration=self.questions_by_iteration,
+                    )
+                    # Always include the original query for the first iteration
+                    if query not in questions:
+                        all_questions = [query] + questions
+                    else:
+                        all_questions = questions
+                    self.questions_by_iteration[iteration] = all_questions
+                    logger.info(
+                        f"Using questions for iteration {iteration}: {all_questions}"
+                    )
+                else:
+                    # For subsequent iterations, generate questions based on previous search results
+                    source_context = self._format_search_results_as_context(
+                        self.all_search_results
+                    )
+                    if iteration != 1:
+                        context = f"""Previous search results:\n{source_context}\n\nIteration: {iteration} of {iterations_to_run}"""
+                    elif iterations_to_run == 1:
+                        context = ""
+                    else:
+                        context = f"""Iteration: {iteration} of {iterations_to_run}"""
+                    # Use standard question generator with search results as context
+                    questions = self.question_generator.generate_questions(
+                        current_knowledge=context,
+                        query=query,
+                        questions_per_iteration=int(
+                            get_db_setting("search.questions_per_iteration")
+                        ),
+                        questions_by_iteration=self.questions_by_iteration,
+                    )
+                    # Use only the new questions for this iteration's searches
+                    all_questions = questions
+                    # Store in questions_by_iteration
+                    self.questions_by_iteration[iteration] = questions
+                    logger.info(
+                        f"Generated questions for iteration {iteration}: {questions}"
+                    )
+                # Step 2: Run all searches in parallel for this iteration
+                self._update_progress(
+                    f"Running parallel searches for iteration {iteration}",
+                    iteration_progress_base + 10,
+                    {"phase": "parallel_search", "iteration": iteration},
+                )
+                # Function for thread pool
+                def search_question(q):
+                    try:
+                        result = self.search.run(q)
+                        return {"question": q, "results": result or []}
+                    except Exception as e:
+                        logger.error(f"Error searching for '{q}': {str(e)}")
+                        return {"question": q, "results": [], "error": str(e)}
+                # Run searches in parallel
+                with concurrent.futures.ThreadPoolExecutor(
+                    max_workers=len(all_questions)
+                ) as executor:
+                    futures = [
+                        executor.submit(search_question, q) for q in all_questions
+                    ]
+                    iteration_search_dict = {}
+                    iteration_search_results = []
+                    # Process results as they complete
+                    for i, future in enumerate(
+                        concurrent.futures.as_completed(futures)
+                    ):
+                        result_dict = future.result()
+                        question = result_dict["question"]
+                        search_results = result_dict["results"]
+                        iteration_search_dict[question] = search_results
+                        self._update_progress(
+                            f"Completed search {i + 1} of {len(all_questions)}: {question[:30]}...",
+                            iteration_progress_base
+                            + 10
+                            + ((i + 1) / len(all_questions) * 30),
+                            {
+                                "phase": "search_complete",
+                                "iteration": iteration,
+                                "result_count": len(search_results),
+                                "question": question,
+                            },
+                        )
+                        # Collect all search results for this iteration
+                        iteration_search_results.extend(search_results)
+                # Step 3: Apply cross-engine filtering if enabled
+                if self.use_cross_engine_filter:
+                    self._update_progress(
+                        f"Filtering search results for iteration {iteration}",
+                        iteration_progress_base + 45,
+                        {"phase": "cross_engine_filtering", "iteration": iteration},
+                    )
+                    # Get the current link count (for indexing)
+                    existing_link_count = len(self.all_links_of_system)
+                    # Filter the search results
+                    filtered_search_results = self.cross_engine_filter.filter_results(
+                        iteration_search_results,
+                        query,
+                        reorder=self.filter_reorder,
+                        reindex=self.filter_reindex,
+                        start_index=existing_link_count,  # Start indexing after existing links
+                    )
+                    links = extract_links_from_search_results(filtered_search_results)
+                    self.all_links_of_system.extend(links)
+                    self._update_progress(
+                        f"Filtered from {len(iteration_search_results)} to {len(filtered_search_results)} results",
+                        iteration_progress_base + 50,
+                        {
+                            "phase": "filtering_complete",
+                            "iteration": iteration,
+                            "links_count": len(self.all_links_of_system),
+                        },
+                    )
+                    # Use filtered results
+                    iteration_search_results = filtered_search_results
+                else:
+                    # Just extract links without filtering
+                    links = extract_links_from_search_results(iteration_search_results)
+                    self.all_links_of_system.extend(links)
+                # Add to all search results
+                self.all_search_results.extend(iteration_search_results)
+                # Create a lightweight finding for this iteration's search metadata (no text content)
+                finding = {
+                    "phase": f"Iteration {iteration}",
+                    "content": f"Searched with {len(all_questions)} questions, found {len(iteration_search_results)} results.",
+                    "question": query,
+                    "search_results": iteration_search_results,
+                    "documents": [],
+                }
+                findings.append(finding)
+                # Mark iteration as complete
+                iteration_progress = 5 + iteration * (70 / iterations_to_run)
+                self._update_progress(
+                    f"Completed iteration {iteration}/{iterations_to_run}",
+                    iteration_progress,
+                    {"phase": "iteration_complete", "iteration": iteration},
+                )
+            # Final filtering of all accumulated search results
+            self._update_progress(
+                "Performing final filtering of all results",
+                80,
+                {"phase": "final_filtering"},
+            )
+            # Apply final cross-engine filtering to all accumulated results if enabled
+            if self.use_cross_engine_filter:
+                final_filtered_results = self.cross_engine_filter.filter_results(
+                    self.all_search_results,
+                    query,
+                    reorder=True,  # Always reorder in final filtering
+                    reindex=False,  # Always reindex in final filtering
+                    max_results=int(get_db_setting("search.final_max_results") or 30),
+                )
+            else:
+                final_filtered_results = self.all_search_results
+            self._update_progress(
+                f"Filtered from {len(self.all_search_results)} to {len(final_filtered_results)} results",
+                iteration_progress_base + 85,
+                {
+                    "phase": "filtering_complete",
+                    "iteration": iteration,
+                    "links_count": len(self.all_links_of_system),
+                },
+            )
+            # Final synthesis after all iterations
+            self._update_progress(
+                "Generating final synthesis", 90, {"phase": "synthesis"}
+            )
+            total_citation_count = len(self.all_links_of_system)
+            # Final synthesis
+            final_citation_result = self.citation_handler.analyze_followup(
+                query,
+                final_filtered_results,
+                previous_knowledge="",  # Empty string as we don't need previous knowledge here
+                nr_of_links=total_citation_count,
+            )
+            # Add null check for final_citation_result
+            if final_citation_result:
+                synthesized_content = final_citation_result["content"]
+                documents = final_citation_result.get("documents", [])
+            else:
+                synthesized_content = "No relevant results found in final synthesis."
+                documents = []
+            # Add a final synthesis finding
+            final_finding = {
+                "phase": "Final synthesis",
+                "content": synthesized_content,
+                "question": query,
+                "search_results": final_filtered_results,
+                "documents": documents,
+            }
+            findings.append(final_finding)
+            # Add documents to repository
+            self.findings_repository.add_documents(documents)
+            # Transfer questions to repository
+            self.findings_repository.set_questions_by_iteration(
+                self.questions_by_iteration
+            )
+            # Format findings
+            formatted_findings = self.findings_repository.format_findings_to_text(
+                findings, synthesized_content
+            )
+        except Exception as e:
+            import traceback
+            error_msg = f"Error in research process: {str(e)}"
+            logger.error(error_msg)
+            logger.error(traceback.format_exc())
+            synthesized_content = f"Error: {str(e)}"
+            formatted_findings = f"Error: {str(e)}"
+            finding = {
+                "phase": "Error",
+                "content": synthesized_content,
+                "question": query,
+                "search_results": [],
+                "documents": [],
+            }
+            findings.append(finding)
+        self._update_progress("Research complete", 100, {"phase": "complete"})
+        return {
+            "findings": findings,
+            "iterations": iterations_to_run,
+            "questions_by_iteration": self.questions_by_iteration,
+            "formatted_findings": formatted_findings,
+            "current_knowledge": synthesized_content,
+        }

local_deep_research/citation_handler.py CHANGED Viewed

@@ -82,18 +82,18 @@ Provide a detailed analysis with citations. Do not create the bibliography, it w
         formatted_sources = self._format_sources(documents)
         # Add fact-checking step
         fact_check_prompt = f"""Analyze these sources for factual consistency:
-        1. Cross-reference major claims between sources
-        2. Identify and flag any contradictions
-        3. Verify basic facts (dates, company names, ownership)
-        4. Note when sources disagree
+1. Cross-reference major claims between sources
+2. Identify and flag any contradictions
+3. Verify basic facts (dates, company names, ownership)
+4. Note when sources disagree
-        Previous Knowledge:
-        {previous_knowledge}
+Previous Knowledge:
+{previous_knowledge}
-        New Sources:
-        {formatted_sources}
+New Sources:
+{formatted_sources}
-        Return any inconsistencies or conflicts found."""
+Return any inconsistencies or conflicts found."""
         if get_db_setting(
             "general.enable_fact_checking", settings.general.enable_fact_checking
         ):
@@ -104,16 +104,15 @@ Provide a detailed analysis with citations. Do not create the bibliography, it w
         prompt = f"""Using the previous knowledge and new sources, answer the question. Include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source. Reflect information from sources critically.
-            Previous Knowledge:
-            {previous_knowledge}
+Previous Knowledge:
+{previous_knowledge}
-            Question: {question}
+Question: {question}
-            New Sources:
-            {formatted_sources}
-            Reflect information from sources critically based on: {fact_check_response}. Never invent sources.
-            Provide a detailed answer with citations.  Example format: "According to [1], ..."
-            """
+New Sources:
+{formatted_sources}
+Reflect information from sources critically based on: {fact_check_response}. Never invent sources.
+Provide a detailed answer with citations.  Example format: "According to [1], ..." """
         response = self.llm.invoke(prompt)

local_deep_research/search_system.py CHANGED Viewed

@@ -13,6 +13,9 @@ from .advanced_search_system.strategies.parallel_search_strategy import (
     ParallelSearchStrategy,
 )
 from .advanced_search_system.strategies.rapid_search_strategy import RapidSearchStrategy
+from .advanced_search_system.strategies.source_based_strategy import (
+    SourceBasedSearchStrategy,
+)
 from .advanced_search_system.strategies.standard_strategy import StandardSearchStrategy
 from .citation_handler import CitationHandler
 from .config.config_files import settings
@@ -31,7 +34,7 @@ class AdvancedSearchSystem:
     def __init__(
         self,
-        strategy_name: str = "parallel",
+        strategy_name: str = "source-based",
         include_text_content: bool = True,
         use_cross_engine_filter: bool = True,
         llm: BaseChatModel | None = None,
@@ -76,6 +79,14 @@ class AdvancedSearchSystem:
         if strategy_name.lower() == "iterdrag":
             logger.info("Creating IterDRAGStrategy instance")
             self.strategy = IterDRAGStrategy(model=self.model, search=self.search)
+        elif strategy_name.lower() == "source-based":
+            logger.info("Creating SourceBasedSearchStrategy instance")
+            self.strategy = SourceBasedSearchStrategy(
+                model=self.model,
+                search=self.search,
+                include_text_content=include_text_content,
+                use_cross_engine_filter=use_cross_engine_filter,
+            )
         elif strategy_name.lower() == "parallel":
             logger.info("Creating ParallelSearchStrategy instance")
             self.strategy = ParallelSearchStrategy(

local_deep_research/web/services/research_service.py CHANGED Viewed

@@ -690,6 +690,10 @@ def run_research_process(
         # Handle error
         error_message = f"Research failed: {str(e)}"
         logger.error(error_message)
+        import traceback
+        logger.error("Exception occurred:" + str(traceback.print_exc()))
         try:
             # Check for common Ollama error patterns in the exception and provide more user-friendly errors
             user_friendly_error = str(e)

{local_deep_research-0.2.2.dist-info → local_deep_research-0.2.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: local-deep-research
-Version: 0.2.2
+Version: 0.2.3
 Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
 Author-Email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
 License: MIT License

{local_deep_research-0.2.2.dist-info → local_deep_research-0.2.3.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-local_deep_research-0.2.2.dist-info/METADATA,sha256=MgFc30qd-f-kk07M_jDRZ7HAq8MzL92pDLxQ34YYQMU,19797
-local_deep_research-0.2.2.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
-local_deep_research-0.2.2.dist-info/entry_points.txt,sha256=GcXS501Rjh-P80S8db7hnrQ23mS_Jg27PwpVQVO77as,113
-local_deep_research-0.2.2.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
+local_deep_research-0.2.3.dist-info/METADATA,sha256=46N1CYIqxccMSv3Iaq-Tm8gEFtnuc1ATUJqfM720HkE,19797
+local_deep_research-0.2.3.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
+local_deep_research-0.2.3.dist-info/entry_points.txt,sha256=GcXS501Rjh-P80S8db7hnrQ23mS_Jg27PwpVQVO77as,113
+local_deep_research-0.2.3.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
 local_deep_research/__init__.py,sha256=tczbsYNZQqfPAuVtz6OFyo-uUqjNQLelEIT2G7mPTwA,870
 local_deep_research/__main__.py,sha256=LIxK5iS6aLAKMFBDpUS3V-jDcxchqi3eSUsI2jAZUXk,371
 local_deep_research/advanced_search_system/__init__.py,sha256=sGusMj4eFIrhXR6QbOM16UDKB6aI-iS4IFivKWpMlh0,234
@@ -21,8 +21,9 @@ local_deep_research/advanced_search_system/repositories/__init__.py,sha256=cCjAR
 local_deep_research/advanced_search_system/strategies/__init__.py,sha256=upbslnB6Ns8RJ0-b1bH74-f5gZbo7evpx1dRrKEkzHA,35
 local_deep_research/advanced_search_system/strategies/base_strategy.py,sha256=cK5DqvsjGlFyqKRtpl0-dI6cip32UIbGS8eqsuL9SjI,3781
 local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py,sha256=eKCyxNVRnN7pOr-8LEzREbRkHX6ffa9hmjGwBYHHDDc,18129
-local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py,sha256=n-UVHHpyRFtMmPdaDQ30wE2V839CWGrLOM-cVLtRUrE,12396
+local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py,sha256=dA5KgS5G_1O82MLhWx1UOZi5P4c7hqWdQPRdtt1B49U,19006
 local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py,sha256=fiLTqCfpyoNlP_rRZB96gdi3KoOkCWk-Nw5fb7E9an4,10389
+local_deep_research/advanced_search_system/strategies/source_based_strategy.py,sha256=PW5gHhpayon3d716Ooo02UITkoxfBGvgzrm7kFITWWo,17312
 local_deep_research/advanced_search_system/strategies/standard_strategy.py,sha256=FbZAHiRAhfFCtA46Im0KxF5QNzursiz0SqhimvNiaXs,12747
 local_deep_research/advanced_search_system/tools/__init__.py,sha256=73jLuCKigwc9lJQ0uD3_F16dgCg4pL-F2cwC6tk9-oc,30
 local_deep_research/advanced_search_system/tools/base_tool.py,sha256=jEs4eroCvo0dHP_uF-5kLiQP7OfkD1YzNAD650a8Ktk,2865
@@ -32,7 +33,7 @@ local_deep_research/advanced_search_system/tools/search_tools/__init__.py,sha256
 local_deep_research/api/__init__.py,sha256=-tJQp7Qm1aPg6fgfuw-w9dfNo8GzrJLOy2i3dG8Drl8,441
 local_deep_research/api/research_functions.py,sha256=8Q_Rzfc0Qj2oLxzvFJIA4ms10uQC0a5SBHkIkSoPcw4,10908
 local_deep_research/app.py,sha256=U_92UX0dpVAQoaXciVNy_By_AyDEWGlXSeTwFpohALQ,155
-local_deep_research/citation_handler.py,sha256=KdfwHqSewPyP2OrxEGu9o15pJtFDYLUsLwOTHkQe8I8,4564
+local_deep_research/citation_handler.py,sha256=NoEvnpf7jqCAJX6H-H8i2Hz69CVPW6UBg12cBRYtVdA,4396
 local_deep_research/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 local_deep_research/config/config_files.py,sha256=k6ptAKIvqGrhnRsfRjT9uD2xBLAiD0vqXoYxggF5mik,10041
 local_deep_research/config/llm_config.py,sha256=KhuDbxjndU939rMCKmeYDctsx7fRoXMoQRv1AgtZKI4,16536
@@ -45,7 +46,7 @@ local_deep_research/defaults/search_engines.toml,sha256=XBnqCxzFvXa1HoKLcb_Jg4EG
 local_deep_research/main.py,sha256=umGmaQmW7bpx27wUAgSNjNr4oSHV6mDX5hoyfb22HEY,7033
 local_deep_research/migrate_db.py,sha256=S1h6Bv0OJdRW4BaH7MIMrUXBRV_yqgH2T6LVOZKTQjI,4634
 local_deep_research/report_generator.py,sha256=-G3KDEbsuU3PdxDfuo5v28DIX7RE1yJCCBU2KgRbNzI,9084
-local_deep_research/search_system.py,sha256=MqaG435RzllyHlVuT7eCc_wC8_rCA4RLW7F5NDp9kxE,7108
+local_deep_research/search_system.py,sha256=YmXu9ui-aB5kGb9rqQWUb7qSvd-iHfp3PvRenPwCdDA,7604
 local_deep_research/setup_data_dir.py,sha256=7MJa2MMdDUnktJVHwMpyNL2079-qylpIyyLpVbF5AUY,1134
 local_deep_research/test_migration.py,sha256=cXY9WbpxLslNEa1vFwLMvcvKBbUe7Wosm--AqmPIPYM,6459
 local_deep_research/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,7 +69,7 @@ local_deep_research/web/routes/api_routes.py,sha256=S0UdCmfm0v1GEM4UiSbI0PE3xUOx
 local_deep_research/web/routes/history_routes.py,sha256=6a_8nX349viuvi1zP5S7BaPPpAh133eTi1NVWO545A8,12622
 local_deep_research/web/routes/research_routes.py,sha256=JlzaP1z-7XAP3E0nkEjLIfYj_NKf5qDcrjxBmUouAhM,23492
 local_deep_research/web/routes/settings_routes.py,sha256=rEvvFCVWJ80zchnzXBv9SAnDXMvDPLGDjSUfLRlCCi0,60012
-local_deep_research/web/services/research_service.py,sha256=sxvW4oNLiiKgQ8w0SblefzMmk8EEaNNOGd8oC96j85E,39556
+local_deep_research/web/services/research_service.py,sha256=0tFx3wactXhZjFuZDHC3aAFgpDTtjfm_c-1HsZLxaos,39656
 local_deep_research/web/services/resource_service.py,sha256=yKgOC6GEOmHqRoGzwf52e19UaGCCS1DbDbOIXgWGvGc,4378
 local_deep_research/web/services/settings_manager.py,sha256=ybnhSlByuKA2oJPElN2WI8bh-ZzC6lP08x0Gsz8Ycbk,24310
 local_deep_research/web/services/settings_service.py,sha256=1XHvNBNs9gzor2AxOEDrqL-JsKyXKk5izCnoXAV78u8,5064
@@ -132,4 +133,4 @@ local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256
 local_deep_research/web_search_engines/search_engine_base.py,sha256=PLU_sAWhWKTOQWcv32GINuhLdIwB0sEQy-pp9oG9Ggo,9835
 local_deep_research/web_search_engines/search_engine_factory.py,sha256=mkIf6F-8-aooS47iqb8SanJ9shnl0UOVia8hr2xX0b0,12751
 local_deep_research/web_search_engines/search_engines_config.py,sha256=GmwpCT6vfeq1wrdr1R-zu6WRQ5XxyE7921HPsgGm3gI,2771
-local_deep_research-0.2.2.dist-info/RECORD,,
+local_deep_research-0.2.3.dist-info/RECORD,,

{local_deep_research-0.2.2.dist-info → local_deep_research-0.2.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{local_deep_research-0.2.2.dist-info → local_deep_research-0.2.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{local_deep_research-0.2.2.dist-info → local_deep_research-0.2.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

local-deep-research 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

local-deep-research 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl