PyPI - local-deep-research - Versions diffs - 0.3.11__tar.gz → 0.4.0__tar.gz - Mend

local-deep-research 0.3.11tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (194) hide show

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: local-deep-research
-Version: 0.3.11
+Version: 0.4.0
 Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
 Author-Email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
 License: MIT License
@@ -36,7 +36,7 @@ Requires-Dist: langchain-community>=0.3.17
 Requires-Dist: langchain-core>=0.3.34
 Requires-Dist: langchain-ollama>=0.2.3
 Requires-Dist: langchain-openai>=0.3.5
-Requires-Dist: langchain_anthropic>=0.3.7
+Requires-Dist: langchain-anthropic>=0.3.13
 Requires-Dist: duckduckgo_search>=7.3.2
 Requires-Dist: python-dateutil>=2.9.0
 Requires-Dist: typing_extensions>=4.12.2
@@ -67,6 +67,14 @@ Requires-Dist: google-search-results
 Requires-Dist: importlib-resources>=6.5.2
 Requires-Dist: setuptools>=78.1.0
 Requires-Dist: flask-wtf>=1.2.2
+Requires-Dist: optuna>=4.3.0
+Requires-Dist: elasticsearch==8.14.0
+Requires-Dist: methodtools>=0.4.7
+Requires-Dist: loguru>=0.7.3
+Requires-Dist: matplotlib>=3.10.3
+Requires-Dist: pandas>=2.2.3
+Requires-Dist: plotly>=6.0.1
+Requires-Dist: kaleido==0.1.0
 Description-Content-Type: text/markdown
 # Local Deep Research
@@ -106,7 +114,24 @@ Local Deep Research combines the power of large language models with intelligent
 ## ⚡ Quick Start
-### Option 1: Docker (Recommended)
+### Option 1: Docker (Quickstart no MAC/ARM)
+```bash
+# Step 1: Pull and run SearXNG for optimal search results
+docker pull searxng/searxng
+docker run -d -p 8080:8080 --name searxng searxng/searxng
+# Step 2: Pull and run Local Deep Research (Please build your own docker on ARM)
+docker pull localdeepresearch/local-deep-research
+docker run -d -p 5000:5000 --network host --name local-deep-research localdeepresearch/local-deep-research
+# Start containers - Required after each reboot (can be automated with this flag in run command --restart unless-stopped)
+docker start searxng
+docker start local-deep-research
+```
+### Option 2: Docker Compose (Recommended)
 LDR uses Docker compose to bundle the web app and all it's dependencies so
 you can get up and running quickly.
@@ -145,7 +170,7 @@ Then visit `http://127.0.0.1:5000` to start researching!
 See [here](https://github.com/LearningCircuit/local-deep-research/wiki/Installation#docker-installation-recommended) for more information about
 using Docker.
-### Option 2: Python Package (mostly for programmatic access)
+### Option 3: Python Package (mostly for programmatic access)
 ```bash
 # Install the package

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/README.md RENAMED Viewed

@@ -35,7 +35,24 @@ Local Deep Research combines the power of large language models with intelligent
 ## ⚡ Quick Start
-### Option 1: Docker (Recommended)
+### Option 1: Docker (Quickstart no MAC/ARM)
+```bash
+# Step 1: Pull and run SearXNG for optimal search results
+docker pull searxng/searxng
+docker run -d -p 8080:8080 --name searxng searxng/searxng
+# Step 2: Pull and run Local Deep Research (Please build your own docker on ARM)
+docker pull localdeepresearch/local-deep-research
+docker run -d -p 5000:5000 --network host --name local-deep-research localdeepresearch/local-deep-research
+# Start containers - Required after each reboot (can be automated with this flag in run command --restart unless-stopped)
+docker start searxng
+docker start local-deep-research
+```
+### Option 2: Docker Compose (Recommended)
 LDR uses Docker compose to bundle the web app and all it's dependencies so
 you can get up and running quickly.
@@ -74,7 +91,7 @@ Then visit `http://127.0.0.1:5000` to start researching!
 See [here](https://github.com/LearningCircuit/local-deep-research/wiki/Installation#docker-installation-recommended) for more information about
 using Docker.
-### Option 2: Python Package (mostly for programmatic access)
+### Option 3: Python Package (mostly for programmatic access)
 ```bash
 # Install the package

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/pyproject.toml RENAMED Viewed

@@ -25,7 +25,7 @@ dependencies = [
     "langchain-core>=0.3.34",
     "langchain-ollama>=0.2.3",
     "langchain-openai>=0.3.5",
-    "langchain_anthropic>=0.3.7",
+    "langchain-anthropic>=0.3.13",
     "duckduckgo_search>=7.3.2",
     "python-dateutil>=2.9.0",
     "typing_extensions>=4.12.2",
@@ -56,8 +56,16 @@ dependencies = [
     "importlib-resources>=6.5.2",
     "setuptools>=78.1.0",
     "flask-wtf>=1.2.2",
+    "optuna>=4.3.0",
+    "elasticsearch==8.14.0",
+    "methodtools>=0.4.7",
+    "loguru>=0.7.3",
+    "matplotlib>=3.10.3",
+    "pandas>=2.2.3",
+    "plotly>=6.0.1",
+    "kaleido==0.1.0",
 ]
-version = "0.3.11"
+version = "0.4.0"
 [project.license]
 file = "LICENSE"
@@ -109,4 +117,7 @@ dev = [
     "flake8>=7.1.2",
     "jupyter>=1.1.1",
     "cookiecutter>=2.6.0",
+    "pandas>=2.2.3",
+    "optuna>=4.3.0",
+    "pytest-mock>=3.14.0",
 ]

local_deep_research-0.4.0/src/local_deep_research/__version__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.4.0"

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/src/local_deep_research/advanced_search_system/filters/base_filter.py RENAMED Viewed

@@ -3,17 +3,16 @@
 Base class for search result filters.
 """
-import logging
 from abc import ABC, abstractmethod
 from typing import Dict, List
-logger = logging.getLogger(__name__)
+from langchain_core.language_models.chat_models import BaseChatModel
 class BaseFilter(ABC):
     """Abstract base class for all search result filters."""
-    def __init__(self, model=None):
+    def __init__(self, model: BaseChatModel | None = None):
         """
         Initialize the filter.

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/src/local_deep_research/advanced_search_system/filters/cross_engine_filter.py RENAMED Viewed

@@ -3,15 +3,14 @@ Cross-engine search result filter implementation.
 """
 import json
-import logging
 from typing import Dict, List
+from loguru import logger
 from ...utilities.db_utils import get_db_setting
 from ...utilities.search_utilities import remove_think_tags
 from .base_filter import BaseFilter
-logger = logging.getLogger(__name__)
 class CrossEngineFilter(BaseFilter):
     """Filter that ranks and filters results from multiple search engines."""
@@ -194,8 +193,8 @@ If no results seem relevant to the query, return an empty array: []"""
                         result["index"] = str(i + start_index + 1)
                 return top_results
-        except Exception as e:
-            logger.error(f"Cross-engine filtering error: {e}")
+        except Exception:
+            logger.exception("Cross-engine filtering error")
             top_results = results[: min(self.max_results, len(results))]
             # Update indices if requested
             if reindex:

local_deep_research-0.4.0/src/local_deep_research/advanced_search_system/filters/journal_reputation_filter.py ADDED Viewed

@@ -0,0 +1,298 @@
+import time
+import traceback
+from datetime import timedelta
+from typing import Any, Dict, List, Optional
+from langchain_core.language_models.chat_models import BaseChatModel
+from loguru import logger
+from methodtools import lru_cache
+from ...config.llm_config import get_llm
+from ...search_system import AdvancedSearchSystem
+from ...utilities.db_utils import get_db_session, get_db_setting
+from ...web.database.models import Journal
+from ...web_search_engines.search_engine_factory import create_search_engine
+from .base_filter import BaseFilter
+class JournalFilterError(Exception):
+    """
+    Custom exception for errors related to journal filtering.
+    """
+class JournalReputationFilter(BaseFilter):
+    """
+    A filter for academic results that considers the reputation of journals.
+    Note that this filter requires SearXNG to be available in order to work.
+    """
+    def __init__(
+        self,
+        model: BaseChatModel | None = None,
+        reliability_threshold: int | None = None,
+        max_context: int | None = None,
+        exclude_non_published: bool | None = None,
+        quality_reanalysis_period: timedelta | None = None,
+    ):
+        """
+        Args:
+            model: The LLM model to use for analysis.
+            reliability_threshold: The filter scores journal reliability on a
+                scale of 1-10. Results from any journal with a reliability
+                below this threshold will be culled. Will be read from the
+                settings if not specified.
+            max_context: The maximum number of characters to feed into the
+                LLM when assessing journal reliability.
+            exclude_non_published: If true, it will exclude any results that
+                don't have an associated journal publication.
+            quality_reanalysis_period: Period at which to update journal
+                quality assessments.
+        """
+        super().__init__(model)
+        if self.model is None:
+            self.model = get_llm()
+        self.__threshold = reliability_threshold
+        if self.__threshold is None:
+            self.__threshold = int(
+                get_db_setting("search.journal_reputation.threshold", 4)
+            )
+        self.__max_context = max_context
+        if self.__max_context is None:
+            self.__max_context = int(
+                get_db_setting("search.journal_reputation.max_context", 3000)
+            )
+        self.__exclude_non_published = exclude_non_published
+        if self.__exclude_non_published is None:
+            self.__exclude_non_published = bool(
+                get_db_setting("search.journal_reputation.exclude_non_published", False)
+            )
+        self.__quality_reanalysis_period = quality_reanalysis_period
+        if self.__quality_reanalysis_period is None:
+            self.__quality_reanalysis_period = timedelta(
+                days=int(
+                    get_db_setting("search.journal_reputation.reanalysis_period", 365)
+                )
+            )
+        # SearXNG is required so we can search the open web for reputational
+        # information.
+        self.__engine = create_search_engine("searxng", llm=self.model)
+        if self.__engine is None:
+            raise JournalFilterError("SearXNG initialization failed.")
+        self.__db_session = get_db_session()
+    @classmethod
+    def create_default(
+        cls, model: BaseChatModel | None = None, *, engine_name: str
+    ) -> Optional["JournalReputationFilter"]:
+        """
+        Initializes a default configuration of the filter based on the settings.
+        Args:
+            model: Explicitly specify the LLM to use.
+            engine_name: The name of the search engine. Will be used to check
+                the enablement status for that engine.
+        Returns:
+            The filter that it created, or None if filtering is disabled in
+            the settings, or misconfigured.
+        """
+        if not bool(
+            get_db_setting(
+                f"search.engine.web.{engine_name}.journal_reputation.enabled",
+                True,
+            )
+        ):
+            return None
+        try:
+            # Initialize the filter with default settings.
+            return JournalReputationFilter(model=model)
+        except JournalFilterError:
+            logger.error(
+                "SearXNG is not configured, but is required for "
+                "journal reputation filtering. Disabling filtering."
+            )
+            return None
+    def __make_search_system(self) -> AdvancedSearchSystem:
+        """
+        Creates a new `AdvancedSearchSystem` instance.
+        Returns:
+            The system it created.
+        """
+        return AdvancedSearchSystem(
+            llm=self.model,
+            search=self.__engine,
+            # We clamp down on the default iterations and questions for speed.
+            max_iterations=2,
+            questions_per_iteration=3,
+        )
+    @lru_cache(maxsize=1024)
+    def __analyze_journal_reputation(self, journal_name: str) -> int:
+        """
+        Analyzes the reputation of a particular journal.
+        Args:
+            journal_name: The name of the journal.
+        Returns:
+            The reputation of the journal, on a scale from 1-10.
+        """
+        logger.info(f"Analyzing reputation of journal '{journal_name}'...")
+        # Perform a search for information about this journal.
+        journal_info = self.__make_search_system().analyze_topic(
+            f'Assess the reputability and reliability of the journal "'
+            f'{journal_name}", with a particular focus on its quartile '
+            f"ranking and peer review status. Be sure to specify the journal "
+            f"name in any generated questions."
+        )
+        journal_info = "\n".join([f["content"] for f in journal_info["findings"]])
+        logger.debug(f"Received raw info about journal: {journal_info}")
+        # Have the LLM assess the reliability based on this information.
+        prompt = f"""
+        You are a research assistant helping to assess the reliability and
+        reputability of scientific journals. A reputable journal should be
+        peer-reviewed, not predatory, and high-impact. Please review the
+        following  information on the journal "{journal_name}" and output a
+        reputability score between 1 and 10, where 1-3 is not reputable and
+        probably predatory, 4-6 is reputable but low-impact (Q2 or Q3),
+        and 7-10 is reputable Q1 journals. Only output the number, do not
+        provide any explanation or other output.
+        JOURNAL INFORMATION:
+        {journal_info}
+        """
+        if len(prompt) > self.__max_context:
+            # If the prompt is too long, truncate it to fit within the max context size.
+            prompt = prompt[: self.__max_context] + "..."
+        # Generate a response from the LLM model.
+        response = self.model.invoke(prompt).text()
+        logger.debug(f"Got raw LLM response: {response}")
+        # Extract the score from the response.
+        try:
+            reputation_score = int(response.strip())
+        except ValueError:
+            logger.error("Failed to parse reputation score from LLM response.")
+            raise ValueError("Failed to parse reputation score from LLM response.")
+        return max(min(reputation_score, 10), 1)
+    def __add_journal_to_db(self, *, name: str, quality: int) -> None:
+        """
+        Saves the journal quality information to the database.
+        Args:
+            name: The name of the journal.
+            quality: The quality assessment for the journal.
+        """
+        journal = self.__db_session.query(Journal).filter_by(name=name).first()
+        if journal is not None:
+            journal.quality = quality
+            journal.quality_model = self.model.name
+            journal.quality_analysis_time = int(time.time())
+        else:
+            journal = Journal(
+                name=name,
+                quality=quality,
+                quality_model=self.model.name,
+                quality_analysis_time=int(time.time()),
+            )
+            self.__db_session.add(journal)
+        self.__db_session.commit()
+    def __clean_journal_name(self, journal_name: str) -> str:
+        """
+        Cleans up the name of a journal to remove any extraneous information.
+        This is mostly to make caching more effective.
+        Args:
+            journal_name: The raw name of the journal.
+        Returns:
+            The cleaned name.
+        """
+        logger.debug(f"Cleaning raw journal name: {journal_name}")
+        prompt = f"""
+        Clean up the following journal or conference name:
+        "{journal_name}"
+        Remove any references to volumes, pages, months, or years. Expand
+        abbreviations if possible. For conferences, remove locations. Only
+        output the clean name, do not provide any explanation or other output.
+        """
+        response = self.model.invoke(prompt).text()
+        return response.strip()
+    def __check_result(self, result: Dict[str, Any]) -> bool:
+        """
+        Performs a search to determine the reputability of a result journal..
+        Args:
+            result: The result to check.
+        Returns:
+            True if the journal is reputable or if it couldn't determine a
+            reputability score, false otherwise.
+        """
+        journal_name = result.get("journal_ref")
+        if journal_name is None:
+            logger.debug(
+                f"Result {result.get('title')} has no associated "
+                f"journal, not evaluating reputation."
+            )
+            return not self.__exclude_non_published
+        journal_name = self.__clean_journal_name(journal_name)
+        # Check the database first.
+        journal = self.__db_session.query(Journal).filter_by(name=journal_name).first()
+        if (
+            journal is not None
+            and (time.time() - journal.quality_analysis_time)
+            < self.__quality_reanalysis_period.total_seconds()
+        ):
+            logger.debug(f"Found existing reputation for {journal_name} in database.")
+            return journal.quality >= self.__threshold
+        # Evaluate reputation.
+        try:
+            quality = self.__analyze_journal_reputation(journal_name)
+            # Save to the database.
+            self.__add_journal_to_db(name=journal_name, quality=quality)
+            return quality >= self.__threshold
+        except ValueError:
+            # The LLM behaved weirdly. In this case, we will just assume it's
+            # okay.
+            return True
+    def filter_results(self, results: List[Dict], query: str, **kwargs) -> List[Dict]:
+        try:
+            return list(filter(self.__check_result, results))
+        except Exception as e:
+            logger.error(
+                f"Journal quality filtering failed: {e}, {traceback.format_exc()}"
+            )
+            return results

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/src/local_deep_research/advanced_search_system/findings/repository.py RENAMED Viewed

@@ -291,9 +291,6 @@ Use IEEE style citations [1], [2], etc. Never make up your own citations.
                 # Check if we're on Windows
                 if platform.system() == "Windows":
-                    # Windows-compatible timeout using threading
-                    class TimeoutError(Exception):
-                        pass
                     def timeout_handler(timeout_seconds, callback, args):
                         def handler():

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/src/local_deep_research/advanced_search_system/strategies/base_strategy.py RENAMED Viewed

@@ -3,11 +3,10 @@ Base class for all search strategies.
 Defines the common interface and shared functionality for different search approaches.
 """
-import logging
 from abc import ABC, abstractmethod
 from typing import Callable, Dict, List, Optional
-logger = logging.getLogger(__name__)
+from loguru import logger
 class BaseSearchStrategy(ABC):

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/src/local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py RENAMED Viewed

@@ -3,10 +3,11 @@ IterDRAG strategy implementation.
 """
 import json
-import logging
 from datetime import datetime
 from typing import Dict, List
+from loguru import logger
 from ...citation_handler import CitationHandler
 from ...config.llm_config import get_llm
 from ...config.search_config import get_search
@@ -17,8 +18,6 @@ from ..knowledge.standard_knowledge import StandardKnowledge
 from ..questions.decomposition_question import DecompositionQuestionGenerator
 from .base_strategy import BaseSearchStrategy
-logger = logging.getLogger(__name__)
 class IterDRAGStrategy(BaseSearchStrategy):
     """IterDRAG strategy that breaks queries into sub-queries."""
@@ -83,8 +82,8 @@ Initial Search Results:
             return self.question_generator.generate_questions(
                 query, context, int(get_db_setting("search.questions_per_iteration"))
             )
-        except Exception as e:
-            logger.error(f"Error generating sub-queries: {str(e)}")
+        except Exception:
+            logger.exception("Error generating sub-queries")
             return []
     def analyze_topic(self, query: str) -> Dict:
@@ -204,8 +203,8 @@ Initial Search Results:
                                 "result_count": len(sub_results),
                             },
                         )
-                except Exception as e:
-                    logger.error(f"Error searching for sub-query: {str(e)}")
+                except Exception:
+                    logger.exception("Error searching for sub-query")
                     sub_results = []
                 try:
@@ -238,8 +237,8 @@ Initial Search Results:
                         current_knowledge = (
                             current_knowledge + "\n\n\n New: \n" + result["content"]
                         )
-                except Exception as e:
-                    logger.error(f"Error analyzing sub-query results: {str(e)}")
+                except Exception:
+                    logger.exception("Error analyzing sub-query results:")
                     finding = {
                         "phase": f"Follow-up Iteration 0.{i + 1}",
                         "content": "Error analyzing sub-query results.",
@@ -344,10 +343,7 @@ This is a fallback response using the accumulated knowledge.
                 # Update current knowledge with the synthesized version
                 current_knowledge = final_answer
             except Exception as e:
-                logger.error(f"Error synthesizing final answer: {str(e)}")
-                import traceback
-                logger.error(traceback.format_exc())
+                logger.exception("Error synthesizing final answer")
                 # Create an error finding
                 error_finding = {
@@ -396,7 +392,7 @@ This is an automatically generated fallback response.
                     final_answer = fallback_content
                 except Exception as fallback_error:
                     # Last resort fallback
-                    logger.error(f"Even fallback creation failed: {fallback_error}")
+                    logger.exception("Even fallback creation failed")
                     final_answer = f"""
 # Research Error
@@ -417,8 +413,8 @@ Please try again with a different query or contact support.
                 current_knowledge = self.knowledge_generator.compress_knowledge(
                     current_knowledge, query, section_links
                 )
-            except Exception as e:
-                logger.error(f"Error compressing knowledge: {str(e)}")
+            except Exception:
+                logger.exception("Error compressing knowledge")
         # Format and save findings
         self._update_progress(
@@ -442,8 +438,8 @@ Please try again with a different query or contact support.
             formatted_findings = self.findings_repository.format_findings_to_text(
                 findings, final_answer
             )
-        except Exception as e:
-            logger.error(f"Error formatting final findings: {str(e)}")
+        except Exception:
+            logger.exception("Error formatting final findings")
             formatted_findings = "Error: Could not format findings due to an error."
         self._update_progress("Research complete", 100, {"phase": "complete"})

{local_deep_research-0.3.11 → local_deep_research-0.4.0}/src/local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py RENAMED Viewed

@@ -3,9 +3,10 @@ Parallel search strategy implementation for maximum search speed.
 """
 import concurrent.futures
-import logging
 from typing import Dict
+from loguru import logger
 from ...citation_handler import CitationHandler
 from ...config.llm_config import get_llm
 from ...config.search_config import get_search
@@ -16,8 +17,6 @@ from ..findings.repository import FindingsRepository
 from ..questions.standard_question import StandardQuestionGenerator
 from .base_strategy import BaseSearchStrategy
-logger = logging.getLogger(__name__)
 class ParallelSearchStrategy(BaseSearchStrategy):
     """
@@ -212,7 +211,7 @@ class ParallelSearchStrategy(BaseSearchStrategy):
                         result = self.search.run(q)
                         return {"question": q, "results": result or []}
                     except Exception as e:
-                        logger.error(f"Error searching for '{q}': {str(e)}")
+                        logger.exception(f"Error searching for '{q}'")
                         return {"question": q, "results": [], "error": str(e)}
                 # Run searches in parallel
@@ -408,11 +407,8 @@ class ParallelSearchStrategy(BaseSearchStrategy):
             )
         except Exception as e:
-            import traceback
             error_msg = f"Error in research process: {str(e)}"
-            logger.error(error_msg)
-            logger.error(traceback.format_exc())
+            logger.exception(error_msg)
             synthesized_content = f"Error: {str(e)}"
             formatted_findings = f"Error: {str(e)}"
             finding = {

local-deep-research 0.3.11__tar.gz → 0.4.0__tar.gz

local-deep-research 0.3.11tar.gz → 0.4.0tar.gz