PyPI - local-deep-research - Versions diffs - 0.3.12__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

local-deep-research 0.3.12py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

local_deep_research/__init__.py CHANGED Viewed

@@ -9,6 +9,7 @@ from .__version__ import __version__
 from .config.llm_config import get_llm
 from .config.search_config import get_search
 from .report_generator import get_report_generator
+from .web.app import main
 def get_advanced_search_system(strategy_name: str = "iterdrag"):

local_deep_research/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.12"
1	+ __version__ = "0.4.1"

local_deep_research/advanced_search_system/filters/base_filter.py CHANGED Viewed

@@ -3,17 +3,16 @@
 Base class for search result filters.
 """
-import logging
 from abc import ABC, abstractmethod
 from typing import Dict, List
-logger = logging.getLogger(__name__)
+from langchain_core.language_models.chat_models import BaseChatModel
 class BaseFilter(ABC):
     """Abstract base class for all search result filters."""
-    def __init__(self, model=None):
+    def __init__(self, model: BaseChatModel | None = None):
         """
         Initialize the filter.

local_deep_research/advanced_search_system/filters/cross_engine_filter.py CHANGED Viewed

@@ -3,15 +3,14 @@ Cross-engine search result filter implementation.
 """
 import json
-import logging
 from typing import Dict, List
+from loguru import logger
 from ...utilities.db_utils import get_db_setting
 from ...utilities.search_utilities import remove_think_tags
 from .base_filter import BaseFilter
-logger = logging.getLogger(__name__)
 class CrossEngineFilter(BaseFilter):
     """Filter that ranks and filters results from multiple search engines."""
@@ -194,8 +193,8 @@ If no results seem relevant to the query, return an empty array: []"""
                         result["index"] = str(i + start_index + 1)
                 return top_results
-        except Exception as e:
-            logger.error(f"Cross-engine filtering error: {e}")
+        except Exception:
+            logger.exception("Cross-engine filtering error")
             top_results = results[: min(self.max_results, len(results))]
             # Update indices if requested
             if reindex:

local_deep_research/advanced_search_system/filters/journal_reputation_filter.py ADDED Viewed

@@ -0,0 +1,298 @@
+import time
+import traceback
+from datetime import timedelta
+from typing import Any, Dict, List, Optional
+from langchain_core.language_models.chat_models import BaseChatModel
+from loguru import logger
+from methodtools import lru_cache
+from ...config.llm_config import get_llm
+from ...search_system import AdvancedSearchSystem
+from ...utilities.db_utils import get_db_session, get_db_setting
+from ...web.database.models import Journal
+from ...web_search_engines.search_engine_factory import create_search_engine
+from .base_filter import BaseFilter
+class JournalFilterError(Exception):
+    """
+    Custom exception for errors related to journal filtering.
+    """
+class JournalReputationFilter(BaseFilter):
+    """
+    A filter for academic results that considers the reputation of journals.
+    Note that this filter requires SearXNG to be available in order to work.
+    """
+    def __init__(
+        self,
+        model: BaseChatModel | None = None,
+        reliability_threshold: int | None = None,
+        max_context: int | None = None,
+        exclude_non_published: bool | None = None,
+        quality_reanalysis_period: timedelta | None = None,
+    ):
+        """
+        Args:
+            model: The LLM model to use for analysis.
+            reliability_threshold: The filter scores journal reliability on a
+                scale of 1-10. Results from any journal with a reliability
+                below this threshold will be culled. Will be read from the
+                settings if not specified.
+            max_context: The maximum number of characters to feed into the
+                LLM when assessing journal reliability.
+            exclude_non_published: If true, it will exclude any results that
+                don't have an associated journal publication.
+            quality_reanalysis_period: Period at which to update journal
+                quality assessments.
+        """
+        super().__init__(model)
+        if self.model is None:
+            self.model = get_llm()
+        self.__threshold = reliability_threshold
+        if self.__threshold is None:
+            self.__threshold = int(
+                get_db_setting("search.journal_reputation.threshold", 4)
+            )
+        self.__max_context = max_context
+        if self.__max_context is None:
+            self.__max_context = int(
+                get_db_setting("search.journal_reputation.max_context", 3000)
+            )
+        self.__exclude_non_published = exclude_non_published
+        if self.__exclude_non_published is None:
+            self.__exclude_non_published = bool(
+                get_db_setting("search.journal_reputation.exclude_non_published", False)
+            )
+        self.__quality_reanalysis_period = quality_reanalysis_period
+        if self.__quality_reanalysis_period is None:
+            self.__quality_reanalysis_period = timedelta(
+                days=int(
+                    get_db_setting("search.journal_reputation.reanalysis_period", 365)
+                )
+            )
+        # SearXNG is required so we can search the open web for reputational
+        # information.
+        self.__engine = create_search_engine("searxng", llm=self.model)
+        if self.__engine is None:
+            raise JournalFilterError("SearXNG initialization failed.")
+        self.__db_session = get_db_session()
+    @classmethod
+    def create_default(
+        cls, model: BaseChatModel | None = None, *, engine_name: str
+    ) -> Optional["JournalReputationFilter"]:
+        """
+        Initializes a default configuration of the filter based on the settings.
+        Args:
+            model: Explicitly specify the LLM to use.
+            engine_name: The name of the search engine. Will be used to check
+                the enablement status for that engine.
+        Returns:
+            The filter that it created, or None if filtering is disabled in
+            the settings, or misconfigured.
+        """
+        if not bool(
+            get_db_setting(
+                f"search.engine.web.{engine_name}.journal_reputation.enabled",
+                True,
+            )
+        ):
+            return None
+        try:
+            # Initialize the filter with default settings.
+            return JournalReputationFilter(model=model)
+        except JournalFilterError:
+            logger.error(
+                "SearXNG is not configured, but is required for "
+                "journal reputation filtering. Disabling filtering."
+            )
+            return None
+    def __make_search_system(self) -> AdvancedSearchSystem:
+        """
+        Creates a new `AdvancedSearchSystem` instance.
+        Returns:
+            The system it created.
+        """
+        return AdvancedSearchSystem(
+            llm=self.model,
+            search=self.__engine,
+            # We clamp down on the default iterations and questions for speed.
+            max_iterations=2,
+            questions_per_iteration=3,
+        )
+    @lru_cache(maxsize=1024)
+    def __analyze_journal_reputation(self, journal_name: str) -> int:
+        """
+        Analyzes the reputation of a particular journal.
+        Args:
+            journal_name: The name of the journal.
+        Returns:
+            The reputation of the journal, on a scale from 1-10.
+        """
+        logger.info(f"Analyzing reputation of journal '{journal_name}'...")
+        # Perform a search for information about this journal.
+        journal_info = self.__make_search_system().analyze_topic(
+            f'Assess the reputability and reliability of the journal "'
+            f'{journal_name}", with a particular focus on its quartile '
+            f"ranking and peer review status. Be sure to specify the journal "
+            f"name in any generated questions."
+        )
+        journal_info = "\n".join([f["content"] for f in journal_info["findings"]])
+        logger.debug(f"Received raw info about journal: {journal_info}")
+        # Have the LLM assess the reliability based on this information.
+        prompt = f"""
+        You are a research assistant helping to assess the reliability and
+        reputability of scientific journals. A reputable journal should be
+        peer-reviewed, not predatory, and high-impact. Please review the
+        following  information on the journal "{journal_name}" and output a
+        reputability score between 1 and 10, where 1-3 is not reputable and
+        probably predatory, 4-6 is reputable but low-impact (Q2 or Q3),
+        and 7-10 is reputable Q1 journals. Only output the number, do not
+        provide any explanation or other output.
+        JOURNAL INFORMATION:
+        {journal_info}
+        """
+        if len(prompt) > self.__max_context:
+            # If the prompt is too long, truncate it to fit within the max context size.
+            prompt = prompt[: self.__max_context] + "..."
+        # Generate a response from the LLM model.
+        response = self.model.invoke(prompt).text()
+        logger.debug(f"Got raw LLM response: {response}")
+        # Extract the score from the response.
+        try:
+            reputation_score = int(response.strip())
+        except ValueError:
+            logger.error("Failed to parse reputation score from LLM response.")
+            raise ValueError("Failed to parse reputation score from LLM response.")
+        return max(min(reputation_score, 10), 1)
+    def __add_journal_to_db(self, *, name: str, quality: int) -> None:
+        """
+        Saves the journal quality information to the database.
+        Args:
+            name: The name of the journal.
+            quality: The quality assessment for the journal.
+        """
+        journal = self.__db_session.query(Journal).filter_by(name=name).first()
+        if journal is not None:
+            journal.quality = quality
+            journal.quality_model = self.model.name
+            journal.quality_analysis_time = int(time.time())
+        else:
+            journal = Journal(
+                name=name,
+                quality=quality,
+                quality_model=self.model.name,
+                quality_analysis_time=int(time.time()),
+            )
+            self.__db_session.add(journal)
+        self.__db_session.commit()
+    def __clean_journal_name(self, journal_name: str) -> str:
+        """
+        Cleans up the name of a journal to remove any extraneous information.
+        This is mostly to make caching more effective.
+        Args:
+            journal_name: The raw name of the journal.
+        Returns:
+            The cleaned name.
+        """
+        logger.debug(f"Cleaning raw journal name: {journal_name}")
+        prompt = f"""
+        Clean up the following journal or conference name:
+        "{journal_name}"
+        Remove any references to volumes, pages, months, or years. Expand
+        abbreviations if possible. For conferences, remove locations. Only
+        output the clean name, do not provide any explanation or other output.
+        """
+        response = self.model.invoke(prompt).text()
+        return response.strip()
+    def __check_result(self, result: Dict[str, Any]) -> bool:
+        """
+        Performs a search to determine the reputability of a result journal..
+        Args:
+            result: The result to check.
+        Returns:
+            True if the journal is reputable or if it couldn't determine a
+            reputability score, false otherwise.
+        """
+        journal_name = result.get("journal_ref")
+        if journal_name is None:
+            logger.debug(
+                f"Result {result.get('title')} has no associated "
+                f"journal, not evaluating reputation."
+            )
+            return not self.__exclude_non_published
+        journal_name = self.__clean_journal_name(journal_name)
+        # Check the database first.
+        journal = self.__db_session.query(Journal).filter_by(name=journal_name).first()
+        if (
+            journal is not None
+            and (time.time() - journal.quality_analysis_time)
+            < self.__quality_reanalysis_period.total_seconds()
+        ):
+            logger.debug(f"Found existing reputation for {journal_name} in database.")
+            return journal.quality >= self.__threshold
+        # Evaluate reputation.
+        try:
+            quality = self.__analyze_journal_reputation(journal_name)
+            # Save to the database.
+            self.__add_journal_to_db(name=journal_name, quality=quality)
+            return quality >= self.__threshold
+        except ValueError:
+            # The LLM behaved weirdly. In this case, we will just assume it's
+            # okay.
+            return True
+    def filter_results(self, results: List[Dict], query: str, **kwargs) -> List[Dict]:
+        try:
+            return list(filter(self.__check_result, results))
+        except Exception as e:
+            logger.error(
+                f"Journal quality filtering failed: {e}, {traceback.format_exc()}"
+            )
+            return results

local_deep_research/advanced_search_system/findings/repository.py CHANGED Viewed

@@ -291,9 +291,6 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
                 # Check if we're on Windows
                 if platform.system() == "Windows":
-                    # Windows-compatible timeout using threading
-                    class TimeoutError(Exception):
-                        pass
                     def timeout_handler(timeout_seconds, callback, args):
                         def handler():

local_deep_research/advanced_search_system/strategies/base_strategy.py CHANGED Viewed

@@ -3,11 +3,10 @@ Base class for all search strategies.
 Defines the common interface and shared functionality for different search approaches.
 """
-import logging
 from abc import ABC, abstractmethod
 from typing import Callable, Dict, List, Optional
-logger = logging.getLogger(__name__)
+from loguru import logger
 class BaseSearchStrategy(ABC):

local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py CHANGED Viewed

@@ -3,10 +3,11 @@ IterDRAG strategy implementation.
 """
 import json
-import logging
 from datetime import datetime
 from typing import Dict, List
+from loguru import logger
 from ...citation_handler import CitationHandler
 from ...config.llm_config import get_llm
 from ...config.search_config import get_search
@@ -17,8 +18,6 @@ from ..knowledge.standard_knowledge import StandardKnowledge
 from ..questions.decomposition_question import DecompositionQuestionGenerator
 from .base_strategy import BaseSearchStrategy
-logger = logging.getLogger(__name__)
 class IterDRAGStrategy(BaseSearchStrategy):
     """IterDRAG strategy that breaks queries into sub-queries."""
@@ -83,8 +82,8 @@ Initial Search Results:
             return self.question_generator.generate_questions(
                 query, context, int(get_db_setting("search.questions_per_iteration"))
             )
-        except Exception as e:
-            logger.error(f"Error generating sub-queries: {str(e)}")
+        except Exception:
+            logger.exception("Error generating sub-queries")
             return []
     def analyze_topic(self, query: str) -> Dict:
@@ -204,8 +203,8 @@ Initial Search Results:
                                 "result_count": len(sub_results),
                             },
                         )
-                except Exception as e:
-                    logger.error(f"Error searching for sub-query: {str(e)}")
+                except Exception:
+                    logger.exception("Error searching for sub-query")
                     sub_results = []
                 try:
@@ -238,8 +237,8 @@ Initial Search Results:
                         current_knowledge = (
                             current_knowledge + "\n\n\n New: \n" + result["content"]
                         )
-                except Exception as e:
-                    logger.error(f"Error analyzing sub-query results: {str(e)}")
+                except Exception:
+                    logger.exception("Error analyzing sub-query results:")
                     finding = {
                         "phase": f"Follow-up Iteration 0.{i + 1}",
                         "content": "Error analyzing sub-query results.",
@@ -344,10 +343,7 @@ This is a fallback response using the accumulated knowledge.
                 # Update current knowledge with the synthesized version
                 current_knowledge = final_answer
             except Exception as e:
-                logger.error(f"Error synthesizing final answer: {str(e)}")
-                import traceback
-                logger.error(traceback.format_exc())
+                logger.exception("Error synthesizing final answer")
                 # Create an error finding
                 error_finding = {
@@ -396,7 +392,7 @@ This is an automatically generated fallback response.
                     final_answer = fallback_content
                 except Exception as fallback_error:
                     # Last resort fallback
-                    logger.error(f"Even fallback creation failed: {fallback_error}")
+                    logger.exception("Even fallback creation failed")
                     final_answer = f"""
 # Research Error
@@ -417,8 +413,8 @@ Please try again with a different query or contact support.
                 current_knowledge = self.knowledge_generator.compress_knowledge(
                     current_knowledge, query, section_links
                 )
-            except Exception as e:
-                logger.error(f"Error compressing knowledge: {str(e)}")
+            except Exception:
+                logger.exception("Error compressing knowledge")
         # Format and save findings
         self._update_progress(
@@ -442,8 +438,8 @@ Please try again with a different query or contact support.
             formatted_findings = self.findings_repository.format_findings_to_text(
                 findings, final_answer
             )
-        except Exception as e:
-            logger.error(f"Error formatting final findings: {str(e)}")
+        except Exception:
+            logger.exception("Error formatting final findings")
             formatted_findings = "Error: Could not format findings due to an error."
         self._update_progress("Research complete", 100, {"phase": "complete"})

local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py CHANGED Viewed

@@ -3,9 +3,10 @@ Parallel search strategy implementation for maximum search speed.
 """
 import concurrent.futures
-import logging
 from typing import Dict
+from loguru import logger
 from ...citation_handler import CitationHandler
 from ...config.llm_config import get_llm
 from ...config.search_config import get_search
@@ -16,8 +17,6 @@ from ..findings.repository import FindingsRepository
 from ..questions.standard_question import StandardQuestionGenerator
 from .base_strategy import BaseSearchStrategy
-logger = logging.getLogger(__name__)
 class ParallelSearchStrategy(BaseSearchStrategy):
     """
@@ -212,7 +211,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
                         result = self.search.run(q)
                         return {"question": q, "results": result or []}
                     except Exception as e:
-                        logger.error(f"Error searching for '{q}': {str(e)}")
+                        logger.exception(f"Error searching for '{q}'")
                         return {"question": q, "results": [], "error": str(e)}
                 # Run searches in parallel
@@ -408,11 +407,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
             )
         except Exception as e:
-            import traceback
             error_msg = f"Error in research process: {str(e)}"
-            logger.error(error_msg)
-            logger.error(traceback.format_exc())
+            logger.exception(error_msg)
             synthesized_content = f"Error: {str(e)}"
             formatted_findings = f"Error: {str(e)}"
             finding = {

local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py CHANGED Viewed

@@ -2,9 +2,10 @@
 RapidSearch strategy implementation.
 """
-import logging
 from typing import Dict
+from loguru import logger
 from ...citation_handler import CitationHandler
 from ...config.llm_config import get_llm
 from ...config.search_config import get_search
@@ -14,8 +15,6 @@ from ..knowledge.standard_knowledge import StandardKnowledge
 from ..questions.standard_question import StandardQuestionGenerator
 from .base_strategy import BaseSearchStrategy
-logger = logging.getLogger(__name__)
 class RapidSearchStrategy(BaseSearchStrategy):
     """
@@ -116,7 +115,7 @@ class RapidSearchStrategy(BaseSearchStrategy):
         except Exception as e:
             error_msg = f"Error during initial search: {str(e)}"
-            logger.error(f"SEARCH ERROR: {error_msg}")
+            logger.exception(f"SEARCH ERROR: {error_msg}")
             self._update_progress(
                 error_msg, 15, {"phase": "search_error", "error": str(e)}
             )
@@ -187,7 +186,7 @@ class RapidSearchStrategy(BaseSearchStrategy):
             except Exception as e:
                 error_msg = f"Error during search: {str(e)}"
-                logger.error(f"SEARCH ERROR: {error_msg}")
+                logger.exception(f"SEARCH ERROR: {error_msg}")
                 self._update_progress(
                     error_msg,
                     int(question_progress + 2),
@@ -248,7 +247,7 @@ class RapidSearchStrategy(BaseSearchStrategy):
         except Exception as e:
             error_msg = f"Error synthesizing final answer: {str(e)}"
-            logger.error(error_msg)
+            logger.exception(error_msg)
             synthesized_content = f"Error generating synthesis: {str(e)}"
             formatted_findings = f"Error: {str(e)}"
             finding = {

local_deep_research/advanced_search_system/strategies/source_based_strategy.py CHANGED Viewed

@@ -115,7 +115,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
             }
         # Determine number of iterations to run
-        iterations_to_run = get_db_setting("search.iterations")
+        iterations_to_run = get_db_setting("search.iterations", 2)
         logger.debug("Selected amount of iterations: " + str(iterations_to_run))
         iterations_to_run = int(iterations_to_run)
         try:
@@ -177,7 +177,7 @@ class SourceBasedSearchStrategy(BaseSearchStrategy):
                         current_knowledge=context,
                         query=query,
                         questions_per_iteration=int(
-                            get_db_setting("search.questions_per_iteration")
+                            get_db_setting("search.questions_per_iteration", 2)
                         ),
                         questions_by_iteration=self.questions_by_iteration,
                     )

local_deep_research/advanced_search_system/strategies/standard_strategy.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
-import logging
 from typing import Dict
+from loguru import logger
 from ...citation_handler import CitationHandler
 from ...config.llm_config import get_llm
 from ...config.search_config import get_search
@@ -13,8 +14,6 @@ from ..knowledge.standard_knowledge import StandardKnowledge
 from ..questions.standard_question import StandardQuestionGenerator
 from .base_strategy import BaseSearchStrategy
-logger = logging.getLogger(__name__)
 class StandardSearchStrategy(BaseSearchStrategy):
     """Standard iterative search strategy that generates follow-up questions."""
@@ -112,7 +111,10 @@ Iteration: {iteration + 1} of {total_iterations}"""
             # Call question generator with updated interface
             questions = self.question_generator.generate_questions(
-                query=query, context=context
+                query=query,
+                current_knowledge=context,
+                questions_per_iteration=self.questions_per_iteration,
+                questions_by_iteration=self.questions_by_iteration,
             )
             self.questions_by_iteration[iteration] = questions
@@ -153,7 +155,7 @@ Iteration: {iteration + 1} of {total_iterations}"""
                         search_results = self.search.run(question)
                 except Exception as e:
                     error_msg = f"Error during search: {str(e)}"
-                    logger.error(f"SEARCH ERROR: {error_msg}")
+                    logger.exception(f"SEARCH ERROR: {error_msg}")
                     self._handle_search_error(error_msg, question_progress_base + 10)
                     search_results = []
@@ -237,7 +239,7 @@ Iteration: {iteration + 1} of {total_iterations}"""
                         )
                 except Exception as e:
                     error_msg = f"Error analyzing results: {str(e)}"
-                    logger.info(f"ANALYSIS ERROR: {error_msg}")
+                    logger.exception(f"ANALYSIS ERROR: {error_msg}")
                     self._handle_search_error(error_msg, question_progress_base + 10)
             iteration += 1
@@ -257,7 +259,7 @@ Iteration: {iteration + 1} of {total_iterations}"""
                     logger.info("FINISHED ITERATION - Compressing Knowledge")
                 except Exception as e:
                     error_msg = f"Error compressing knowledge: {str(e)}"
-                    logger.info(f"COMPRESSION ERROR: {error_msg}")
+                    logger.exception(f"COMPRESSION ERROR: {error_msg}")
                     self._handle_search_error(
                         error_msg, int((iteration / total_iterations) * 100 - 3)
                     )

local-deep-research 0.3.12__py3-none-any.whl → 0.4.1__py3-none-any.whl

local-deep-research 0.3.12py3-none-any.whl → 0.4.1py3-none-any.whl