PyPI - local-deep-research - Versions diffs - 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

local-deep-research 0.5.7py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

local_deep_research/error_handling/report_generator.py CHANGED Viewed

@@ -346,6 +346,16 @@ We're here to help you get this working:
                 "- Check search engine settings in Advanced Options\n"
                 "- Ensure required API keys are set for external search engines"
             ),
+            "No search results found|All search engines.*blocked.*rate.*limited": (
+                "No search results were found for your query. This could mean all search engines are unavailable.\n\n"
+                "**Try this:**\n"
+                "- **If using SearXNG:** Check if your SearXNG Docker container is running: `docker ps`\n"
+                "- **Start SearXNG:** `docker run -d -p 8080:8080 searxng/searxng` then set URL to `http://localhost:8080`\n"
+                "- **Try different search terms:** Use broader, more general keywords\n"
+                "- **Check network connection:** Ensure you can access the internet\n"
+                "- **Switch search engines:** Try DuckDuckGo, Brave, or Google (if API key configured)\n"
+                "- **Check for typos** in your research query"
+            ),
             "TypeError.*Context.*Size|'<' not supported between": (
                 "Model configuration issue. The context size setting might not be compatible with your model.\n\n"
                 "**Try this:**\n"

local_deep_research/llm/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""LLM module for Local Deep Research."""
+from .llm_registry import (
+    register_llm,
+    unregister_llm,
+    get_llm_from_registry,
+    is_llm_registered,
+    list_registered_llms,
+    clear_llm_registry,
+)
+__all__ = [
+    "register_llm",
+    "unregister_llm",
+    "get_llm_from_registry",
+    "is_llm_registered",
+    "list_registered_llms",
+    "clear_llm_registry",
+]

local_deep_research/llm/llm_registry.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""Registry for custom LangChain LLMs.
+This module provides a global registry for registering and managing custom LangChain
+LLMs that can be used with Local Deep Research.
+"""
+from typing import Dict, Optional, Union, Callable
+from langchain.chat_models.base import BaseChatModel
+import threading
+import logging
+logger = logging.getLogger(__name__)
+class LLMRegistry:
+    """Thread-safe registry for custom LangChain LLMs."""
+    def __init__(self):
+        self._llms: Dict[
+            str, Union[BaseChatModel, Callable[..., BaseChatModel]]
+        ] = {}
+        self._lock = threading.Lock()
+    def register(
+        self, name: str, llm: Union[BaseChatModel, Callable[..., BaseChatModel]]
+    ) -> None:
+        """Register a custom LLM.
+        Args:
+            name: Unique name for the LLM
+            llm: Either a BaseChatModel instance or a factory function that returns one
+        """
+        with self._lock:
+            if name in self._llms:
+                logger.warning(f"Overwriting existing LLM: {name}")
+            self._llms[name] = llm
+            logger.info(f"Registered custom LLM: {name}")
+    def unregister(self, name: str) -> None:
+        """Unregister a custom LLM.
+        Args:
+            name: Name of the LLM to unregister
+        """
+        with self._lock:
+            if name in self._llms:
+                del self._llms[name]
+                logger.info(f"Unregistered custom LLM: {name}")
+    def get(
+        self, name: str
+    ) -> Optional[Union[BaseChatModel, Callable[..., BaseChatModel]]]:
+        """Get a registered LLM.
+        Args:
+            name: Name of the LLM to retrieve
+        Returns:
+            The LLM instance/factory or None if not found
+        """
+        with self._lock:
+            return self._llms.get(name)
+    def is_registered(self, name: str) -> bool:
+        """Check if an LLM is registered.
+        Args:
+            name: Name to check
+        Returns:
+            True if registered, False otherwise
+        """
+        with self._lock:
+            return name in self._llms
+    def list_registered(self) -> list[str]:
+        """Get list of all registered LLM names.
+        Returns:
+            List of registered LLM names
+        """
+        with self._lock:
+            return list(self._llms.keys())
+    def clear(self) -> None:
+        """Clear all registered LLMs."""
+        with self._lock:
+            self._llms.clear()
+            logger.info("Cleared all registered custom LLMs")
+# Global registry instance
+_llm_registry = LLMRegistry()
+# Public API functions
+def register_llm(
+    name: str, llm: Union[BaseChatModel, Callable[..., BaseChatModel]]
+) -> None:
+    """Register a custom LLM in the global registry.
+    Args:
+        name: Unique name for the LLM
+        llm: Either a BaseChatModel instance or a factory function
+    """
+    _llm_registry.register(name, llm)
+def unregister_llm(name: str) -> None:
+    """Unregister a custom LLM from the global registry.
+    Args:
+        name: Name of the LLM to unregister
+    """
+    _llm_registry.unregister(name)
+def get_llm_from_registry(
+    name: str,
+) -> Optional[Union[BaseChatModel, Callable[..., BaseChatModel]]]:
+    """Get a registered LLM from the global registry.
+    Args:
+        name: Name of the LLM to retrieve
+    Returns:
+        The LLM instance/factory or None if not found
+    """
+    return _llm_registry.get(name)
+def is_llm_registered(name: str) -> bool:
+    """Check if an LLM is registered in the global registry.
+    Args:
+        name: Name to check
+    Returns:
+        True if registered, False otherwise
+    """
+    return _llm_registry.is_registered(name)
+def list_registered_llms() -> list[str]:
+    """Get list of all registered LLM names.
+    Returns:
+        List of registered LLM names
+    """
+    return _llm_registry.list_registered()
+def clear_llm_registry() -> None:
+    """Clear all registered LLMs from the global registry."""
+    _llm_registry.clear()

local_deep_research/metrics/db_models.py CHANGED Viewed

@@ -20,9 +20,7 @@ class TokenUsage(Base):
     __tablename__ = "token_usage"
     id = Column(Integer, primary_key=True)
-    research_id = Column(
-        Integer
-    )  # Removed foreign key constraint to fix token tracking
+    research_id = Column(String(36), index=True)  # UUID string
     model_name = Column(String)
     provider = Column(
         String
@@ -63,9 +61,7 @@ class ModelUsage(Base):
     __table_args__ = (UniqueConstraint("research_id", "model_name"),)
     id = Column(Integer, primary_key=True)
-    research_id = Column(
-        Integer
-    )  # Removed foreign key constraint to fix token tracking
+    research_id = Column(String(36), index=True)  # UUID string
     model_name = Column(String)
     provider = Column(String)
     prompt_tokens = Column(Integer, default=0)
@@ -95,7 +91,7 @@ class SearchCall(Base):
     __tablename__ = "search_calls"
     id = Column(Integer, primary_key=True)
-    research_id = Column(Integer)
+    research_id = Column(String(36), index=True)  # UUID string
     research_query = Column(Text)
     research_mode = Column(String)
     research_phase = Column(String)

local_deep_research/metrics/search_tracker.py CHANGED Viewed

@@ -3,6 +3,7 @@ Search call tracking system for metrics collection.
 Similar to token_counter.py but tracks search engine usage.
 """
+import threading
 from typing import Any, Dict, List, Optional
 from loguru import logger
@@ -19,12 +20,20 @@ class SearchTracker:
     def __init__(self, db: Optional[MetricsDatabase] = None):
         """Initialize the search tracker."""
         self.db = db or MetricsDatabase()
-        self.research_context = {}
+        self._local = threading.local()
     def set_research_context(self, context: Dict[str, Any]) -> None:
-        """Set the current research context for search tracking."""
-        self.research_context = context or {}
-        logger.debug(f"Search tracker context updated: {self.research_context}")
+        """Set the current research context for search tracking (thread-safe)."""
+        self._local.research_context = context or {}
+        logger.debug(
+            f"Search tracker context updated (thread {threading.current_thread().ident}): {self._local.research_context}"
+        )
+    def _get_research_context(self) -> Dict[str, Any]:
+        """Get the research context for the current thread."""
+        if not hasattr(self._local, "research_context"):
+            self._local.research_context = {}
+        return self._local.research_context
     def record_search(
         self,
@@ -37,12 +46,17 @@ class SearchTracker:
     ) -> None:
         """Record a completed search operation directly to database."""
-        # Extract research context
-        research_id = self.research_context.get("research_id")
-        research_query = self.research_context.get("research_query")
-        research_mode = self.research_context.get("research_mode", "unknown")
-        research_phase = self.research_context.get("research_phase", "search")
-        search_iteration = self.research_context.get("search_iteration", 0)
+        # Extract research context (thread-safe)
+        context = self._get_research_context()
+        research_id = context.get("research_id")
+        # Convert research_id to string if it's an integer (for backward compatibility)
+        if isinstance(research_id, int):
+            research_id = str(research_id)
+        research_query = context.get("research_query")
+        research_mode = context.get("research_mode", "unknown")
+        research_phase = context.get("research_phase", "search")
+        search_iteration = context.get("search_iteration", 0)
         # Determine success status
         success_status = "success" if success else "error"
@@ -59,7 +73,7 @@ class SearchTracker:
             with self.db.get_session() as session:
                 # Create search call record
                 search_call = SearchCall(
-                    research_id=research_id,
+                    research_id=research_id,  # String research_id (UUID or converted integer)
                     research_query=research_query,
                     research_mode=research_mode,
                     research_phase=research_phase,

local_deep_research/report_generator.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import importlib
 from typing import Dict, List
+from loguru import logger
 from langchain_core.language_models import BaseChatModel
@@ -125,7 +126,7 @@ class IntegratedReportGenerator:
         sections = {}
         for section in structure:
-            print(f"Processing section: {section['name']}")
+            logger.info(f"Processing section: {section['name']}")
             section_content = []
             section_content.append(f"# {section['name']}\n")
@@ -138,7 +139,7 @@ class IntegratedReportGenerator:
                 # Generate a specific search query for this subsection
                 subsection_query = f"{query} {section['name']} {subsection['name']} {subsection['purpose']}"
-                print(
+                logger.info(
                     f"Researching subsection: {subsection['name']} with query: {subsection_query}"
                 )

local_deep_research/search_system.py CHANGED Viewed

@@ -47,9 +47,8 @@ from .advanced_search_system.strategies.smart_decomposition_strategy import (
 from .advanced_search_system.strategies.source_based_strategy import (
     SourceBasedSearchStrategy,
 )
-from .advanced_search_system.strategies.standard_strategy import (
-    StandardSearchStrategy,
-)
+# StandardSearchStrategy imported lazily to avoid database access during module import
 from .citation_handler import CitationHandler
 from .config.llm_config import get_llm
 from .config.search_config import get_search
@@ -150,7 +149,7 @@ class AdvancedSearchSystem:
                 search=self.search,
                 all_links_of_system=self.all_links_of_system,
             )
-        elif strategy_name.lower() == "source-based":
+        elif strategy_name.lower() in ["source-based", "source_based"]:
             logger.info("Creating SourceBasedSearchStrategy instance")
             self.strategy = SourceBasedSearchStrategy(
                 model=self.model,
@@ -465,15 +464,14 @@ class AdvancedSearchSystem:
             )
             logger.info("Creating FocusedIterationStrategy instance")
-            # PRESERVE SIMPLEQA PERFORMANCE: Keep proven 96.51% accuracy configuration
-            # Original optimal settings: max_iterations=8, questions_per_iteration=5
+            # Use database settings for iterations and questions_per_iteration
             self.strategy = FocusedIterationStrategy(
                 model=self.model,
                 search=self.search,
                 all_links_of_system=self.all_links_of_system,
-                max_iterations=8,  # PROVEN OPTIMAL FOR SIMPLEQA (96.51% accuracy)
-                questions_per_iteration=5,  # PROVEN OPTIMAL FOR SIMPLEQA
-                use_browsecomp_optimization=True,  # Enable BrowseComp optimizations
+                max_iterations=self.max_iterations,  # Use database setting
+                questions_per_iteration=self.questions_per_iteration,  # Use database setting
+                use_browsecomp_optimization=True,  # Enable BrowseComp optimizations for 95% accuracy
             )
         elif strategy_name.lower() in [
             "browsecomp-entity",
@@ -491,6 +489,11 @@ class AdvancedSearchSystem:
             )
         else:
             logger.info("Creating StandardSearchStrategy instance")
+            # Import lazily to avoid database access during module import
+            from .advanced_search_system.strategies.standard_strategy import (
+                StandardSearchStrategy,
+            )
             self.strategy = StandardSearchStrategy(
                 model=self.model,
                 search=self.search,

local_deep_research/utilities/log_utils.py CHANGED Viewed

@@ -75,23 +75,33 @@ def log_for_research(
     @wraps(to_wrap)
     def wrapped(research_id: int, *args: Any, **kwargs: Any) -> Any:
         g.research_id = research_id
-        to_wrap(research_id, *args, **kwargs)
+        result = to_wrap(research_id, *args, **kwargs)
         g.pop("research_id")
+        return result
     return wrapped
-def _get_research_id() -> int | None:
+def _get_research_id(record=None) -> int | None:
     """
     Gets the current research ID, if present.
+    Args:
+        record: Optional loguru record that might contain bound research_id
     Returns:
         The current research ID, or None if it does not exist.
     """
     research_id = None
-    if has_app_context():
+    # First check if research_id is bound to the log record
+    if record and "extra" in record and "research_id" in record["extra"]:
+        research_id = record["extra"]["research_id"]
+    # Then check Flask context
+    elif has_app_context():
         research_id = g.get("research_id")
     return research_id
@@ -104,16 +114,18 @@ def database_sink(message: loguru.Message) -> None:
     """
     record = message.record
-    research_id = _get_research_id()
+    research_id = _get_research_id(record)
     # Create a new database entry.
     db_log = ResearchLog(
         timestamp=record["time"],
-        message=str(message),
+        message=record[
+            "message"
+        ],  # Use raw message to avoid formatting artifacts in web UI
         module=record["name"],
         function=record["function"],
         line_no=int(record["line"]),
-        level=record["level"].name,
+        level=record["level"].name,  # Keep original case
         research_id=research_id,
     )
@@ -137,16 +149,17 @@ def frontend_progress_sink(message: loguru.Message) -> None:
         message: The log message to send.
     """
-    research_id = _get_research_id()
+    record = message.record
+    research_id = _get_research_id(record)
     if research_id is None:
         # If we don't have a research ID, don't send anything.
+        # Can't use logger here as it causes deadlock
         return
-    record = message.record
     frontend_log = dict(
         log_entry=dict(
             message=record["message"],
-            type=record["level"].name,
+            type=record["level"].name,  # Keep original case
             time=record["time"].isoformat(),
         ),
     )
@@ -181,7 +194,7 @@ def config_logger(name: str) -> None:
     # Add a special log level for milestones.
     try:
-        logger.level("milestone", no=26, color="<magenta><bold>")
+        logger.level("MILESTONE", no=26, color="<magenta><bold>")
     except ValueError:
         # Level already exists, that's fine
         pass

local_deep_research/utilities/thread_context.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""
+Utility functions for handling thread-local context propagation.
+This module provides helpers for propagating research context across thread boundaries,
+which is necessary when strategies use ThreadPoolExecutor for parallel searches.
+"""
+import functools
+from typing import Any, Callable, Dict
+from ..metrics.search_tracker import get_search_tracker
+def preserve_research_context(func: Callable) -> Callable:
+    """
+    Decorator that preserves research context across thread boundaries.
+    Use this decorator on functions that will be executed in ThreadPoolExecutor
+    to ensure the research context (including research_id) is properly propagated.
+    Example:
+        @preserve_research_context
+        def search_task(query):
+            return search_engine.run(query)
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        # The context should already be captured in the closure when the decorator runs
+        # Set it in the new thread
+        tracker = get_search_tracker()
+        if hasattr(wrapper, "_research_context"):
+            tracker.set_research_context(wrapper._research_context)
+        return func(*args, **kwargs)
+    # Capture the current context when the decorator is applied
+    wrapper._research_context = get_search_tracker()._get_research_context()
+    return wrapper
+def create_context_preserving_wrapper(
+    func: Callable, context: Dict[str, Any] = None
+) -> Callable:
+    """
+    Create a wrapper function that preserves research context.
+    This is useful when you need to create the wrapper dynamically and can't use a decorator.
+    Args:
+        func: The function to wrap
+        context: Optional explicit context to use. If None, captures current context.
+    Returns:
+        A wrapped function that sets the research context before executing
+    """
+    # Capture context at wrapper creation time if not provided
+    if context is None:
+        context = get_search_tracker()._get_research_context()
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        # Set the captured context in the new thread
+        get_search_tracker().set_research_context(context)
+        return func(*args, **kwargs)
+    return wrapper
+def run_with_context(
+    func: Callable, *args, context: Dict[str, Any] = None, **kwargs
+) -> Any:
+    """
+    Run a function with a specific research context.
+    Args:
+        func: The function to run
+        *args: Positional arguments for the function
+        context: Optional explicit context. If None, uses current context.
+        **kwargs: Keyword arguments for the function
+    Returns:
+        The result of the function call
+    """
+    tracker = get_search_tracker()
+    # Save current context
+    original_context = tracker._get_research_context()
+    try:
+        # Set new context
+        if context is None:
+            context = original_context
+        tracker.set_research_context(context)
+        # Run the function
+        return func(*args, **kwargs)
+    finally:
+        # Restore original context
+        tracker.set_research_context(original_context)

local_deep_research/web/app_factory.py CHANGED Viewed

@@ -61,8 +61,10 @@ def create_app():
     # Disable CSRF for API routes
     @app.before_request
     def disable_csrf_for_api():
-        if request.path.startswith("/api/v1/") or request.path.startswith(
-            "/research/api/"
+        if (
+            request.path.startswith("/api/v1/")
+            or request.path.startswith("/research/api/")
+            or request.path.startswith("/benchmark/api/")
         ):
             csrf.protect = lambda: None
@@ -171,23 +173,45 @@ def register_blueprints(app):
     from .routes.metrics_routes import metrics_bp
     from .routes.research_routes import research_bp
     from .routes.settings_routes import settings_bp
+    from ..benchmarks.web_api.benchmark_routes import benchmark_bp
     # Add root route
     @app.route("/")
     def index():
-        """Root route - redirect to research page"""
-        from flask import redirect, url_for
-        return redirect(url_for("research.index"))
+        """Root route - serve the research page directly"""
+        from .utils.templates import render_template_with_defaults
+        from ..utilities.db_utils import get_db_setting
+        # Load current settings from database
+        settings = {
+            "llm_provider": get_db_setting("llm.provider", "ollama"),
+            "llm_model": get_db_setting("llm.model", ""),
+            "llm_openai_endpoint_url": get_db_setting(
+                "llm.openai_endpoint.url", ""
+            ),
+            "search_tool": get_db_setting("search.tool", ""),
+            "search_iterations": get_db_setting("search.iterations", 2),
+            "search_questions_per_iteration": get_db_setting(
+                "search.questions_per_iteration", 3
+            ),
+        }
+        # Debug logging
+        logger.debug(f"Settings loaded: {settings}")
+        return render_template_with_defaults(
+            "pages/research.html", settings=settings
+        )
     # Register blueprints
     app.register_blueprint(research_bp)
-    app.register_blueprint(history_bp, url_prefix="/research/api")
+    app.register_blueprint(history_bp)  # Already has url_prefix="/history"
     app.register_blueprint(metrics_bp)
-    app.register_blueprint(settings_bp)
+    app.register_blueprint(settings_bp)  # Already has url_prefix="/settings"
     app.register_blueprint(
         api_bp, url_prefix="/research/api"
     )  # Register API blueprint with prefix
+    app.register_blueprint(benchmark_bp)  # Register benchmark blueprint
     # Register API v1 blueprint
     app.register_blueprint(api_blueprint)  # Already has url_prefix='/api/v1'

local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl

local-deep-research 0.5.7py3-none-any.whl → 0.6.0py3-none-any.whl