PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/api/research_functions.py CHANGED Viewed

@@ -3,89 +3,115 @@ API module for Local Deep Research.
 Provides programmatic access to search and research capabilities.
 """
-from typing import Dict, List, Optional, Union, Any, Callable
 import logging
 import os
-import traceback
+from typing import Any, Callable, Dict, Optional
 import toml
-from ..search_system import AdvancedSearchSystem
+from ..config.llm_config import get_llm
+from ..config.search_config import get_search
 from ..report_generator import IntegratedReportGenerator
-from ..config import get_llm, get_search, settings
-from ..utilties.search_utilities import remove_think_tags
+from ..search_system import AdvancedSearchSystem
+from ..utilities.search_utilities import remove_think_tags
 logger = logging.getLogger(__name__)
-def quick_summary(
-    query: str,
+def _init_search_system(
+    model_name: str | None = None,
+    temperature: float = 0.7,
+    provider: str | None = None,
+    openai_endpoint_url: str | None = None,
+    progress_callback: Callable[[str, int, dict], None] | None = None,
     search_tool: Optional[str] = None,
     iterations: int = 1,
     questions_per_iteration: int = 1,
-    max_results: int = 20,
-    max_filtered_results: int = 5,
-    region: str = "us",
-    time_period: str = "y",
-    safe_search: bool = True,
-    temperature: float = 0.7,
-    progress_callback: Optional[Callable] = None,
-) -> Dict[str, Any]:
+) -> AdvancedSearchSystem:
     """
-    Generate a quick research summary for a given query.
+    Initializes the advanced search system with specified parameters. This function sets up
+    and returns an instance of the AdvancedSearchSystem using the provided configuration
+    options such as model name, temperature for randomness in responses, provider service
+    details, endpoint URL, and an optional search tool.
     Args:
-        query: The research query to analyze
+        model_name: Name of the model to use (if None, uses database setting)
+        temperature: LLM temperature for generation
+        provider: Provider to use (if None, uses database setting)
+        openai_endpoint_url: Custom endpoint URL to use (if None, uses database
+            setting)
+        progress_callback: Optional callback function to receive progress updates
         search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
         iterations: Number of research cycles to perform
         questions_per_iteration: Number of questions to generate per cycle
-        max_results: Maximum number of search results to consider
-        max_filtered_results: Maximum results after relevance filtering
-        region: Search region/locale
-        time_period: Time period for search results (d=day, w=week, m=month, y=year)
-        safe_search: Whether to enable safe search
-        temperature: LLM temperature for generation
-        progress_callback: Optional callback function to receive progress updates
     Returns:
-        Dictionary containing the research results with keys:
-        - 'summary': The generated summary text
-        - 'findings': List of detailed findings from each search
-        - 'iterations': Number of iterations performed
-        - 'questions': Questions generated during research
-    """
-    logger.info(f"Generating quick summary for query: {query}")
+        AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem.
+    """
     # Get language model with custom temperature
-    llm = get_llm(temperature=temperature)
+    llm = get_llm(
+        temperature=temperature,
+        openai_endpoint_url=openai_endpoint_url,
+        model_name=model_name,
+        provider=provider,
+    )
+    # Set the search engine if specified
+    search_engine = None
+    if search_tool:
+        search_engine = get_search(search_tool, llm_instance=llm)
+        if search_engine is None:
+            logger.warning(
+                f"Could not create search engine '{search_tool}', using default."
+            )
     # Create search system with custom parameters
-    system = AdvancedSearchSystem()
+    system = AdvancedSearchSystem(llm=llm, search=search_engine)
     # Override default settings with user-provided values
-    system.max_iterations = iterations
+    system.max_iterations = iterations
     system.questions_per_iteration = questions_per_iteration
-    system.model = llm  # Ensure the model is directly attached to the system
-    # Set the search engine if specified
-    if search_tool:
-        search_engine = get_search(search_tool)
-        if search_engine:
-            system.search = search_engine
-        else:
-            logger.warning(f"Could not create search engine '{search_tool}', using default.")
     # Set progress callback if provided
     if progress_callback:
         system.set_progress_callback(progress_callback)
+    return system
+def quick_summary(
+    query: str,
+    **kwargs: Any,
+) -> Dict[str, Any]:
+    """
+    Generate a quick research summary for a given query.
+    Args:
+        query: The research query to analyze
+        **kwargs: Configuration for the search system. Will be forwarded to
+            `_init_search_system()`.
+    Returns:
+        Dictionary containing the research results with keys:
+        - 'summary': The generated summary text
+        - 'findings': List of detailed findings from each search
+        - 'iterations': Number of iterations performed
+        - 'questions': Questions generated during research
+    """
+    logger.info("Generating quick summary for query: %s", query)
+    system = _init_search_system(**kwargs)
     # Perform the search and analysis
     results = system.analyze_topic(query)
     # Extract the summary from the current knowledge
     if results and "current_knowledge" in results:
         summary = results["current_knowledge"]
     else:
         summary = "Unable to generate summary for the query."
     # Prepare the return value
     return {
         "summary": summary,
@@ -93,100 +119,60 @@ def quick_summary(
         "iterations": results.get("iterations", 0),
         "questions": results.get("questions", {}),
         "formatted_findings": results.get("formatted_findings", ""),
-        "sources": results.get("all_links_of_system", [])
+        "sources": results.get("all_links_of_system", []),
     }
 def generate_report(
     query: str,
-    search_tool: Optional[str] = None,
-    iterations: int = 2,
-    questions_per_iteration: int = 2,
-    searches_per_section: int = 2,
-    max_results: int = 50,
-    max_filtered_results: int = 5,
-    region: str = "us",
-    time_period: str = "y",
-    safe_search: bool = True,
-    temperature: float = 0.7,
     output_file: Optional[str] = None,
     progress_callback: Optional[Callable] = None,
+    searches_per_section: int = 2,
+    **kwargs: Any,
 ) -> Dict[str, Any]:
     """
     Generate a comprehensive, structured research report for a given query.
     Args:
         query: The research query to analyze
-        search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
-        iterations: Number of research cycles to perform
-        questions_per_iteration: Number of questions to generate per cycle
-        searches_per_section: Number of searches to perform per report section
-        max_results: Maximum number of search results to consider
-        max_filtered_results: Maximum results after relevance filtering
-        region: Search region/locale
-        time_period: Time period for search results (d=day, w=week, m=month, y=year)
-        safe_search: Whether to enable safe search
-        temperature: LLM temperature for generation
         output_file: Optional path to save report markdown file
         progress_callback: Optional callback function to receive progress updates
+        searches_per_section: The number of searches to perform for each
+            section in the report.
     Returns:
         Dictionary containing the research report with keys:
         - 'content': The full report content in markdown format
         - 'metadata': Report metadata including generated timestamp and query
     """
-    logger.info(f"Generating comprehensive research report for query: {query}")
+    logger.info("Generating comprehensive research report for query: %s", query)
+    system = _init_search_system(**kwargs)
-    # Get language model with custom temperature
-    llm = get_llm(temperature=temperature)
-    # Create search system with custom parameters
-    system = AdvancedSearchSystem()
-    # Override default settings with user-provided values
-    system.max_iterations = iterations
-    system.questions_per_iteration = questions_per_iteration
-    system.model = llm  # Ensure the model is directly attached to the system
-    # Set the search engine if specified
-    if search_tool:
-        search_engine = get_search(
-            search_tool,
-            llm_instance=llm,
-            max_results=max_results,
-            max_filtered_results=max_filtered_results,
-            region=region,
-            time_period=time_period,
-            safe_search=safe_search
-        )
-        if search_engine:
-            system.search = search_engine
-        else:
-            logger.warning(f"Could not create search engine '{search_tool}', using default.")
     # Set progress callback if provided
     if progress_callback:
         system.set_progress_callback(progress_callback)
     # Perform the initial research
     initial_findings = system.analyze_topic(query)
     # Generate the structured report
-    report_generator = IntegratedReportGenerator(searches_per_section=searches_per_section)
-    report_generator.model = llm  # Ensure the model is set on the report generator too
+    report_generator = IntegratedReportGenerator(
+        search_system=system,
+        llm=system.model,
+        searches_per_section=searches_per_section,
+    )
     report = report_generator.generate_report(initial_findings, query)
     # Save report to file if path is provided
     if output_file and report and "content" in report:
-            with open(output_file, "w", encoding="utf-8") as f:
-                f.write(report["content"])
-            logger.info(f"Report saved to {output_file}")
-            report["file_path"] = output_file
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(report["content"])
+        logger.info(f"Report saved to {output_file}")
+        report["file_path"] = output_file
     return report
 def analyze_documents(
     query: str,
     collection_name: str,
@@ -197,7 +183,7 @@ def analyze_documents(
 ) -> Dict[str, Any]:
     """
     Search and analyze documents in a specific local collection.
     Args:
         query: The search query
         collection_name: Name of the local document collection to search
@@ -205,58 +191,63 @@ def analyze_documents(
         temperature: LLM temperature for summary generation
         force_reindex: Whether to force reindexing the collection
         output_file: Optional path to save analysis results to a file
     Returns:
         Dictionary containing:
         - 'summary': Summary of the findings
         - 'documents': List of matching documents with content and metadata
     """
-    logger.info(f"Analyzing documents in collection '{collection_name}' for query: {query}")
+    logger.info(
+        f"Analyzing documents in collection '{collection_name}' for query: {query}"
+    )
     # Get language model with custom temperature
     llm = get_llm(temperature=temperature)
     # Get search engine for the specified collection
     search = get_search(collection_name, llm_instance=llm)
     if not search:
         return {
             "summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
-            "documents": []
+            "documents": [],
         }
     # Set max results
     search.max_results = max_results
     # Force reindex if requested
-    if force_reindex and hasattr(search, 'embedding_manager'):
-            for folder_path in search.folder_paths:
-                search.embedding_manager.index_folder(folder_path, force_reindex=True)
+    if force_reindex and hasattr(search, "embedding_manager"):
+        for folder_path in search.folder_paths:
+            search.embedding_manager.index_folder(folder_path, force_reindex=True)
     # Perform the search
     results = search.run(query)
     if not results:
         return {
             "summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
-            "documents": []
+            "documents": [],
         }
     # Get LLM to generate a summary of the results
-    docs_text = "\n\n".join([f"Document {i+1}: {doc.get('content', doc.get('snippet', ''))[:1000]}"
-                            for i, doc in enumerate(results[:5])])  # Limit to first 5 docs and 1000 chars each
+    docs_text = "\n\n".join(
+        [
+            f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
+            for i, doc in enumerate(results[:5])
+        ]
+    )  # Limit to first 5 docs and 1000 chars each
     summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
     {docs_text}
     Provide a concise summary of the key information found in these documents related to the query.
     """
     summary_response = llm.invoke(summary_prompt)
-    if hasattr(summary_response, 'content'):
+    if hasattr(summary_response, "content"):
         summary = remove_think_tags(summary_response.content)
     else:
         summary = str(summary_response)
@@ -266,38 +257,42 @@ def analyze_documents(
         "summary": summary,
         "documents": results,
         "collection": collection_name,
-        "document_count": len(results)
+        "document_count": len(results),
     }
     # Save to file if requested
     if output_file:
-            with open(output_file, "w", encoding="utf-8") as f:
-                f.write(f"# Document Analysis: {query}\n\n")
-                f.write(f"## Summary\n\n{summary}\n\n")
-                f.write(f"## Documents Found: {len(results)}\n\n")
-                for i, doc in enumerate(results):
-                    f.write(f"### Document {i+1}: {doc.get('title', 'Untitled')}\n\n")
-                    f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
-                    f.write(f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n")
-                    f.write("---\n\n")
-            analysis_result["file_path"] = output_file
-            logger.info(f"Analysis saved to {output_file}")
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(f"# Document Analysis: {query}\n\n")
+            f.write(f"## Summary\n\n{summary}\n\n")
+            f.write(f"## Documents Found: {len(results)}\n\n")
+            for i, doc in enumerate(results):
+                f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
+                f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
+                f.write(
+                    f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"
+                )
+                f.write("---\n\n")
+        analysis_result["file_path"] = output_file
+        logger.info(f"Analysis saved to {output_file}")
     return analysis_result
 def get_available_search_engines() -> Dict[str, str]:
     """
     Get a dictionary of available search engines.
     Returns:
         Dictionary mapping engine names to descriptions
     """
     from ..web_search_engines.search_engine_factory import get_available_engines
     engines = get_available_engines()
     # Add some descriptions for common engines
     descriptions = {
         "auto": "Automatic selection based on query type",
@@ -306,25 +301,24 @@ def get_available_search_engines() -> Dict[str, str]:
         "pubmed": "Medical and biomedical literature",
         "semantic_scholar": "Academic papers across all fields",
         "github": "Code repositories and technical documentation",
-        "local_all": "All local document collections"
+        "local_all": "All local document collections",
     }
     return {engine: descriptions.get(engine, "Search engine") for engine in engines}
 def get_available_collections() -> Dict[str, Dict[str, Any]]:
     """
     Get a dictionary of available local document collections.
     Returns:
         Dictionary mapping collection names to their configuration
     """
+    from ..config.config_files import LOCAL_COLLECTIONS_FILE
-    from ..config import LOCAL_COLLECTIONS_FILE
     if os.path.exists(LOCAL_COLLECTIONS_FILE):
-            collections = toml.load(LOCAL_COLLECTIONS_FILE)
-            return collections
+        collections = toml.load(LOCAL_COLLECTIONS_FILE)
+        return collections
     return {}

local_deep_research/app.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Main entry point for the Local Deep Research application.
+"""
+from src.local_deep_research.web.app import main
+if __name__ == "__main__":
+    main()

local_deep_research/citation_handler.py CHANGED Viewed

@@ -1,10 +1,12 @@
 # citation_handler.py
+from typing import Any, Dict, List, Union
 from langchain_core.documents import Document
-from typing import Dict, List, Union, Any
-import re
-from .utilties.search_utilities import remove_think_tags
-from .config import settings
+from .config.config_files import settings
+from .utilities.db_utils import get_db_setting
 class CitationHandler:
     def __init__(self, llm):
@@ -13,7 +15,10 @@ class CitationHandler:
     def _create_documents(
         self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
     ) -> List[Document]:
-        """Convert search results to LangChain documents format and add index to original search results."""
+        """
+        Convert search results to LangChain documents format and add index
+        to original search results.
+        """
         documents = []
         if isinstance(search_results, str):
             return documents
@@ -22,14 +27,14 @@ class CitationHandler:
             if isinstance(result, dict):
                 # Add index to the original search result dictionary
                 result["index"] = str(i + nr_of_links + 1)
                 content = result.get("full_content", result.get("snippet", ""))
                 documents.append(
                     Document(
                         page_content=content,
                         metadata={
-                            "source": result.get("link", f"source_{i+1}"),
-                            "title": result.get("title", f"Source {i+1}"),
+                            "source": result.get("link", f"source_{i + 1}"),
+                            "title": result.get("title", f"Source {i + 1}"),
                             "index": i + nr_of_links + 1,
                         },
                     )
@@ -57,19 +62,20 @@ Question: {query}
 Sources:
 {formatted_sources}
-Provide a detailed analysis with citations and always keep URLS. Never make up sources. Example format: "According to the research [1], ..."
+Provide a detailed analysis with citations. Do not create the bibliography, it will be provided automatically.  Never make up sources. Never write or create urls. Only write text relevant to the question. Example format: "According to the research [1], ..."
 """
         response = self.llm.invoke(prompt)
-        return {"content": remove_think_tags(response.content), "documents": documents}
+        if not isinstance(response, str):
+            response = response.content
+        return {"content": response, "documents": documents}
     def analyze_followup(
         self,
         question: str,
         search_results: Union[str, List[Dict]],
         previous_knowledge: str,
-        nr_of_links : int
+        nr_of_links: int,
     ) -> Dict[str, Any]:
         """Process follow-up analysis with citations."""
         documents = self._create_documents(search_results, nr_of_links=nr_of_links)
@@ -80,7 +86,7 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         2. Identify and flag any contradictions
         3. Verify basic facts (dates, company names, ownership)
         4. Note when sources disagree
         Previous Knowledge:
         {previous_knowledge}
@@ -88,8 +94,11 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         {formatted_sources}
         Return any inconsistencies or conflicts found."""
-        if settings.GENERAL.ENABLE_FACT_CHECKING:
-            fact_check_response = remove_think_tags(self.llm.invoke(fact_check_prompt).content)
+        if get_db_setting(
+            "general.enable_fact_checking", settings.general.enable_fact_checking
+        ):
+            fact_check_response = self.llm.invoke(fact_check_prompt).content
         else:
             fact_check_response = ""
@@ -108,4 +117,4 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         response = self.llm.invoke(prompt)
-        return {"content": remove_think_tags(response.content), "documents": documents}
+        return {"content": response.content, "documents": documents}

local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl