PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/api/research_functions.py CHANGED Viewed

@@ -3,18 +3,21 @@ API module for Local Deep Research.
 Provides programmatic access to search and research capabilities.
 """
-from typing import Dict, List, Optional, Union, Any, Callable
 import logging
 import os
-import traceback
+from typing import Any, Callable, Dict, Optional
 import toml
+from .. import get_report_generator  # Use the lazy import function
+from ..config.llm_config import get_llm
+from ..config.search_config import get_search
 from ..search_system import AdvancedSearchSystem
-from ..report_generator import IntegratedReportGenerator
-from ..config import get_llm, get_search, settings
-from ..utilties.search_utilities import remove_think_tags
+from ..utilities.search_utilities import remove_think_tags
 logger = logging.getLogger(__name__)
 def quick_summary(
     query: str,
     search_tool: Optional[str] = None,
@@ -30,7 +33,7 @@ def quick_summary(
 ) -> Dict[str, Any]:
     """
     Generate a quick research summary for a given query.
     Args:
         query: The research query to analyze
         search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
@@ -43,7 +46,7 @@ def quick_summary(
         safe_search: Whether to enable safe search
         temperature: LLM temperature for generation
         progress_callback: Optional callback function to receive progress updates
     Returns:
         Dictionary containing the research results with keys:
         - 'summary': The generated summary text
@@ -51,41 +54,42 @@ def quick_summary(
         - 'iterations': Number of iterations performed
         - 'questions': Questions generated during research
     """
-    logger.info(f"Generating quick summary for query: {query}")
+    logger.info("Generating quick summary for query: %s", query)
     # Get language model with custom temperature
     llm = get_llm(temperature=temperature)
     # Create search system with custom parameters
     system = AdvancedSearchSystem()
     # Override default settings with user-provided values
-    system.max_iterations = iterations
+    system.max_iterations = iterations
     system.questions_per_iteration = questions_per_iteration
     system.model = llm  # Ensure the model is directly attached to the system
     # Set the search engine if specified
     if search_tool:
         search_engine = get_search(search_tool)
         if search_engine:
             system.search = search_engine
         else:
-            logger.warning(f"Could not create search engine '{search_tool}', using default.")
+            logger.warning(
+                f"Could not create search engine '{search_tool}', using default."
+            )
     # Set progress callback if provided
     if progress_callback:
         system.set_progress_callback(progress_callback)
     # Perform the search and analysis
     results = system.analyze_topic(query)
     # Extract the summary from the current knowledge
     if results and "current_knowledge" in results:
         summary = results["current_knowledge"]
     else:
         summary = "Unable to generate summary for the query."
     # Prepare the return value
     return {
         "summary": summary,
@@ -93,7 +97,7 @@ def quick_summary(
         "iterations": results.get("iterations", 0),
         "questions": results.get("questions", {}),
         "formatted_findings": results.get("formatted_findings", ""),
-        "sources": results.get("all_links_of_system", [])
+        "sources": results.get("all_links_of_system", []),
     }
@@ -114,7 +118,7 @@ def generate_report(
 ) -> Dict[str, Any]:
     """
     Generate a comprehensive, structured research report for a given query.
     Args:
         query: The research query to analyze
         search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default
@@ -129,26 +133,25 @@ def generate_report(
         temperature: LLM temperature for generation
         output_file: Optional path to save report markdown file
         progress_callback: Optional callback function to receive progress updates
     Returns:
         Dictionary containing the research report with keys:
         - 'content': The full report content in markdown format
         - 'metadata': Report metadata including generated timestamp and query
     """
-    logger.info(f"Generating comprehensive research report for query: {query}")
+    logger.info("Generating comprehensive research report for query: %s", query)
     # Get language model with custom temperature
     llm = get_llm(temperature=temperature)
     # Create search system with custom parameters
     system = AdvancedSearchSystem()
     # Override default settings with user-provided values
     system.max_iterations = iterations
     system.questions_per_iteration = questions_per_iteration
     system.model = llm  # Ensure the model is directly attached to the system
     # Set the search engine if specified
     if search_tool:
         search_engine = get_search(
@@ -158,35 +161,36 @@ def generate_report(
             max_filtered_results=max_filtered_results,
             region=region,
             time_period=time_period,
-            safe_search=safe_search
+            safe_search=safe_search,
         )
         if search_engine:
             system.search = search_engine
         else:
-            logger.warning(f"Could not create search engine '{search_tool}', using default.")
+            logger.warning(
+                f"Could not create search engine '{search_tool}', using default."
+            )
     # Set progress callback if provided
     if progress_callback:
         system.set_progress_callback(progress_callback)
     # Perform the initial research
     initial_findings = system.analyze_topic(query)
     # Generate the structured report
-    report_generator = IntegratedReportGenerator(searches_per_section=searches_per_section)
+    report_generator = get_report_generator(searches_per_section=searches_per_section)
     report_generator.model = llm  # Ensure the model is set on the report generator too
     report = report_generator.generate_report(initial_findings, query)
     # Save report to file if path is provided
     if output_file and report and "content" in report:
-            with open(output_file, "w", encoding="utf-8") as f:
-                f.write(report["content"])
-            logger.info(f"Report saved to {output_file}")
-            report["file_path"] = output_file
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(report["content"])
+        logger.info(f"Report saved to {output_file}")
+        report["file_path"] = output_file
     return report
 def analyze_documents(
     query: str,
     collection_name: str,
@@ -197,7 +201,7 @@ def analyze_documents(
 ) -> Dict[str, Any]:
     """
     Search and analyze documents in a specific local collection.
     Args:
         query: The search query
         collection_name: Name of the local document collection to search
@@ -205,58 +209,63 @@ def analyze_documents(
         temperature: LLM temperature for summary generation
         force_reindex: Whether to force reindexing the collection
         output_file: Optional path to save analysis results to a file
     Returns:
         Dictionary containing:
         - 'summary': Summary of the findings
         - 'documents': List of matching documents with content and metadata
     """
-    logger.info(f"Analyzing documents in collection '{collection_name}' for query: {query}")
+    logger.info(
+        f"Analyzing documents in collection '{collection_name}' for query: {query}"
+    )
     # Get language model with custom temperature
     llm = get_llm(temperature=temperature)
     # Get search engine for the specified collection
     search = get_search(collection_name, llm_instance=llm)
     if not search:
         return {
             "summary": f"Error: Collection '{collection_name}' not found or not properly configured.",
-            "documents": []
+            "documents": [],
         }
     # Set max results
     search.max_results = max_results
     # Force reindex if requested
-    if force_reindex and hasattr(search, 'embedding_manager'):
-            for folder_path in search.folder_paths:
-                search.embedding_manager.index_folder(folder_path, force_reindex=True)
+    if force_reindex and hasattr(search, "embedding_manager"):
+        for folder_path in search.folder_paths:
+            search.embedding_manager.index_folder(folder_path, force_reindex=True)
     # Perform the search
     results = search.run(query)
     if not results:
         return {
             "summary": f"No documents found in collection '{collection_name}' for query: '{query}'",
-            "documents": []
+            "documents": [],
         }
     # Get LLM to generate a summary of the results
-    docs_text = "\n\n".join([f"Document {i+1}: {doc.get('content', doc.get('snippet', ''))[:1000]}"
-                            for i, doc in enumerate(results[:5])])  # Limit to first 5 docs and 1000 chars each
+    docs_text = "\n\n".join(
+        [
+            f"Document {i + 1}:" f" {doc.get('content', doc.get('snippet', ''))[:1000]}"
+            for i, doc in enumerate(results[:5])
+        ]
+    )  # Limit to first 5 docs and 1000 chars each
     summary_prompt = f"""Analyze these document excerpts related to the query: "{query}"
     {docs_text}
     Provide a concise summary of the key information found in these documents related to the query.
     """
     summary_response = llm.invoke(summary_prompt)
-    if hasattr(summary_response, 'content'):
+    if hasattr(summary_response, "content"):
         summary = remove_think_tags(summary_response.content)
     else:
         summary = str(summary_response)
@@ -266,38 +275,42 @@ def analyze_documents(
         "summary": summary,
         "documents": results,
         "collection": collection_name,
-        "document_count": len(results)
+        "document_count": len(results),
     }
     # Save to file if requested
     if output_file:
-            with open(output_file, "w", encoding="utf-8") as f:
-                f.write(f"# Document Analysis: {query}\n\n")
-                f.write(f"## Summary\n\n{summary}\n\n")
-                f.write(f"## Documents Found: {len(results)}\n\n")
-                for i, doc in enumerate(results):
-                    f.write(f"### Document {i+1}: {doc.get('title', 'Untitled')}\n\n")
-                    f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
-                    f.write(f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n")
-                    f.write("---\n\n")
-            analysis_result["file_path"] = output_file
-            logger.info(f"Analysis saved to {output_file}")
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(f"# Document Analysis: {query}\n\n")
+            f.write(f"## Summary\n\n{summary}\n\n")
+            f.write(f"## Documents Found: {len(results)}\n\n")
+            for i, doc in enumerate(results):
+                f.write(f"### Document {i + 1}:" f" {doc.get('title', 'Untitled')}\n\n")
+                f.write(f"**Source:** {doc.get('link', 'Unknown')}\n\n")
+                f.write(
+                    f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n"
+                )
+                f.write("---\n\n")
+        analysis_result["file_path"] = output_file
+        logger.info(f"Analysis saved to {output_file}")
     return analysis_result
 def get_available_search_engines() -> Dict[str, str]:
     """
     Get a dictionary of available search engines.
     Returns:
         Dictionary mapping engine names to descriptions
     """
     from ..web_search_engines.search_engine_factory import get_available_engines
     engines = get_available_engines()
     # Add some descriptions for common engines
     descriptions = {
         "auto": "Automatic selection based on query type",
@@ -306,25 +319,24 @@ def get_available_search_engines() -> Dict[str, str]:
         "pubmed": "Medical and biomedical literature",
         "semantic_scholar": "Academic papers across all fields",
         "github": "Code repositories and technical documentation",
-        "local_all": "All local document collections"
+        "local_all": "All local document collections",
     }
     return {engine: descriptions.get(engine, "Search engine") for engine in engines}
 def get_available_collections() -> Dict[str, Dict[str, Any]]:
     """
     Get a dictionary of available local document collections.
     Returns:
         Dictionary mapping collection names to their configuration
     """
+    from ..config.config_files import LOCAL_COLLECTIONS_FILE
-    from ..config import LOCAL_COLLECTIONS_FILE
     if os.path.exists(LOCAL_COLLECTIONS_FILE):
-            collections = toml.load(LOCAL_COLLECTIONS_FILE)
-            return collections
+        collections = toml.load(LOCAL_COLLECTIONS_FILE)
+        return collections
     return {}

local_deep_research/app.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+Main entry point for the Local Deep Research application.
+"""
+from src.local_deep_research.web.app import main
+if __name__ == "__main__":
+    main()

local_deep_research/citation_handler.py CHANGED Viewed

@@ -1,10 +1,12 @@
 # citation_handler.py
+from typing import Any, Dict, List, Union
 from langchain_core.documents import Document
-from typing import Dict, List, Union, Any
-import re
-from .utilties.search_utilities import remove_think_tags
-from .config import settings
+from .config.config_files import settings
+from .utilities.db_utils import get_db_setting
 class CitationHandler:
     def __init__(self, llm):
@@ -13,7 +15,10 @@ class CitationHandler:
     def _create_documents(
         self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
     ) -> List[Document]:
-        """Convert search results to LangChain documents format and add index to original search results."""
+        """
+        Convert search results to LangChain documents format and add index
+        to original search results.
+        """
         documents = []
         if isinstance(search_results, str):
             return documents
@@ -22,14 +27,14 @@ class CitationHandler:
             if isinstance(result, dict):
                 # Add index to the original search result dictionary
                 result["index"] = str(i + nr_of_links + 1)
                 content = result.get("full_content", result.get("snippet", ""))
                 documents.append(
                     Document(
                         page_content=content,
                         metadata={
-                            "source": result.get("link", f"source_{i+1}"),
-                            "title": result.get("title", f"Source {i+1}"),
+                            "source": result.get("link", f"source_{i + 1}"),
+                            "title": result.get("title", f"Source {i + 1}"),
                             "index": i + nr_of_links + 1,
                         },
                     )
@@ -57,19 +62,20 @@ Question: {query}
 Sources:
 {formatted_sources}
-Provide a detailed analysis with citations and always keep URLS. Never make up sources. Example format: "According to the research [1], ..."
+Provide a detailed analysis with citations. Do not create the bibliography, it will be provided automatically.  Never make up sources. Never write or create urls. Only write text relevant to the question. Example format: "According to the research [1], ..."
 """
         response = self.llm.invoke(prompt)
-        return {"content": remove_think_tags(response.content), "documents": documents}
+        if not isinstance(response, str):
+            response = response.content
+        return {"content": response, "documents": documents}
     def analyze_followup(
         self,
         question: str,
         search_results: Union[str, List[Dict]],
         previous_knowledge: str,
-        nr_of_links : int
+        nr_of_links: int,
     ) -> Dict[str, Any]:
         """Process follow-up analysis with citations."""
         documents = self._create_documents(search_results, nr_of_links=nr_of_links)
@@ -80,7 +86,7 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         2. Identify and flag any contradictions
         3. Verify basic facts (dates, company names, ownership)
         4. Note when sources disagree
         Previous Knowledge:
         {previous_knowledge}
@@ -88,8 +94,11 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         {formatted_sources}
         Return any inconsistencies or conflicts found."""
-        if settings.GENERAL.ENABLE_FACT_CHECKING:
-            fact_check_response = remove_think_tags(self.llm.invoke(fact_check_prompt).content)
+        if get_db_setting(
+            "general.enable_fact_checking", settings.general.enable_fact_checking
+        ):
+            fact_check_response = self.llm.invoke(fact_check_prompt).content
         else:
             fact_check_response = ""
@@ -108,4 +117,4 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         response = self.llm.invoke(prompt)
-        return {"content": remove_think_tags(response.content), "documents": documents}
+        return {"content": response.content, "documents": documents}

local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.0py3-none-any.whl