PyPI - local-deep-research - Versions diffs - 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl - Mend

local-deep-research 0.1.16py3-none-any.whl → 0.1.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

local_deep_research/citation_handler.py CHANGED Viewed

@@ -50,7 +50,6 @@ class CitationHandler:
         documents = self._create_documents(search_results)
         formatted_sources = self._format_sources(documents)
-        print(formatted_sources)
         prompt = f"""Analyze the following information concerning the question and include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source.
 Question: {query}
@@ -75,7 +74,6 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
         """Process follow-up analysis with citations."""
         documents = self._create_documents(search_results, nr_of_links=nr_of_links)
         formatted_sources = self._format_sources(documents)
-        print(formatted_sources)
         # Add fact-checking step
         fact_check_prompt = f"""Analyze these sources for factual consistency:
         1. Cross-reference major claims between sources

local_deep_research/config.py CHANGED Viewed

@@ -20,7 +20,7 @@ def get_config_dir():
         from platformdirs import user_config_dir
         config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
-    print(f"Looking for config in: {config_dir}")
+    logger.info(f"Looking for config in: {config_dir}")
     return config_dir
 # Define config paths
 CONFIG_DIR = get_config_dir() / "config"
@@ -31,9 +31,6 @@ LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
 SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
 LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
-print("CONFIGDIR:", CONFIG_DIR)
-print("SECRETS_FILE:", SECRETS_FILE)
-print("SETTINGS_FILE:", SETTINGS_FILE)
 # Set environment variable for Dynaconf to use

local_deep_research/defaults/llm_config.py CHANGED Viewed

@@ -34,7 +34,7 @@ class ModelProvider(Enum):
 DEFAULT_PROVIDER = ModelProvider.OLLAMA  # Change this to your preferred provider
 # Set your default model name here
-DEFAULT_MODEL = "mistral"  # Your default model
+DEFAULT_MODEL = "gemma3:12b"  # Your default model
 # Set default model parameters
 DEFAULT_TEMPERATURE = 0.7
@@ -210,7 +210,7 @@ def is_anthropic_available():
 def is_openai_endpoint_available():
     """Check if OpenAI endpoint is available"""
-    print(os.getenv("OPENAI_ENDPOINT_API_KEY"))
     try:
         api_key = settings.OPENAI_ENDPOINT_API_KEY
         return bool(api_key)

local_deep_research/defaults/main.toml CHANGED Viewed

@@ -26,13 +26,13 @@ debug = true
 tool = "auto"
 # Number of research cycles
-iterations = 3
+iterations = 2
 # Questions generated per cycle
-questions_per_iteration = 3
+questions_per_iteration = 2
 # Searches per report section
-searches_per_section = 3
+searches_per_section = 2
 # Results per search query
 max_results = 50

local_deep_research/report_generator.py CHANGED Viewed

@@ -15,10 +15,6 @@ class IntegratedReportGenerator:
             searches_per_section  # Control search depth per section
         )
-    def _remove_think_tags(self, text: str) -> str:
-        print(text)
-        return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
     def generate_report(self, initial_findings: Dict, query: str) -> Dict:
         """Generate a complete research report with section-specific research."""
@@ -63,7 +59,7 @@ class IntegratedReportGenerator:
         Each subsection must include its purpose after the | symbol.
         """
-        response = self._remove_think_tags(self.model.invoke(prompt).content)
+        response = search_utilities.remove_think_tags(self.model.invoke(prompt).content)
         # Parse the structure
         structure = []

local_deep_research/search_system.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Dict, List, Optional, Callable
 from datetime import datetime
-from .utilties.search_utilities import remove_think_tags, format_findings_to_text, print_search_results, format_links
+from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
 import os
 from .utilties.enums import KnowledgeAccumulationApproach
 from .config import settings, get_llm, get_search

local_deep_research/utilties/search_utilities.py CHANGED Viewed

@@ -3,7 +3,6 @@ import re
 def remove_think_tags(text: str) -> str:
     text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
-    print(text)
     return text
@@ -17,13 +16,13 @@ def extract_links_from_search_results(search_results: list) -> list:
     """
     links = []
     for result in search_results:
-        #print(result)
         try:
             title = result.get("title", "").strip()
             url = result.get("link", "").strip()
             index = result.get("index", "").strip()
-            print("INDEX:",index)
             if title and url:
                 links.append({"title": title, "url": url, "index": index})
         except Exception:
@@ -111,5 +110,5 @@ def print_search_results(search_results):
     links = extract_links_from_search_results(search_results)
     if links:
         formatted_text=format_links(links=links)
-    print(formatted_text)
+    logger.info(formatted_text)

local_deep_research/web_search_engines/engines/full_search.py CHANGED Viewed

@@ -7,6 +7,9 @@ import json, os
 from .utilties.search_utilities import remove_think_tags
 from datetime import datetime
 from local_deep_research import config
+import logging
+logger = logging.getLogger(__name__)
 class FullSearchResults:
     def __init__(
@@ -57,13 +60,12 @@ class FullSearchResults:
         try:
             # Get LLM's evaluation
             response = self.llm.invoke(prompt)
-            # print(response)
             good_indices = json.loads(remove_think_tags(response.content))
             # Return only the results with good URLs
             return [r for i, r in enumerate(results) if i in good_indices]
         except Exception as e:
-            print(f"URL filtering error: {e}")
+            logger.error(f"URL filtering error: {e}")
             return []
     def remove_boilerplate(self, html: str) -> str:
@@ -75,9 +77,8 @@ class FullSearchResults:
     def run(self, query: str):
         nr_full_text = 0
-        # Step 1: Get search results from DuckDuckGo
+        # Step 1: Get search results
         search_results = self.web_search.invoke(query)
-        #print(type(search_results))
         if not isinstance(search_results, list):
             raise ValueError("Expected the search results in list format.")
@@ -89,9 +90,9 @@ class FullSearchResults:
         # Extract URLs from filtered results
         urls = [result.get("link") for result in filtered_results if result.get("link")]
-        print(urls)
         if not urls:
-            print("\n === NO VALID LINKS ===\n")
+            logger.error("\n === NO VALID LINKS ===\n")
             return []
         # Step 3: Download the full HTML pages for filtered URLs
@@ -117,8 +118,8 @@ class FullSearchResults:
             link = result.get("link")
             result["full_content"] = url_to_content.get(link, None)
-        print("FULL SEARCH WITH FILTERED URLS")
-        print("Full text retrieved: ", nr_full_text)
+        logger.info("FULL SEARCH WITH FILTERED URLS")
+        logger.info("Full text retrieved: ", nr_full_text)
         return filtered_results
     def invoke(self, query: str):

local_deep_research/web_search_engines/engines/search_engine_arxiv.py CHANGED Viewed

@@ -121,7 +121,7 @@ class ArXivSearchEngine(BaseSearchEngine):
             return previews
         except Exception as e:
-            print(f"Error getting arXiv previews: {e}")
+            logger.error(f"Error getting arXiv previews: {e}")
             return []
     def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -138,10 +138,10 @@ class ArXivSearchEngine(BaseSearchEngine):
         """
         # Check if we should get full content
         if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
-            print("Snippet-only mode, skipping full content retrieval")
+            logger.info("Snippet-only mode, skipping full content retrieval")
             return relevant_items
-        print("Getting full content for relevant arXiv papers")
+        logger.info("Getting full content for relevant arXiv papers")
         results = []
         pdf_count = 0  # Track number of PDFs processed
@@ -198,7 +198,7 @@ class ArXivSearchEngine(BaseSearchEngine):
                                     if pdf_text.strip():  # Only use if we got meaningful text
                                         result["content"] = pdf_text
                                         result["full_content"] = pdf_text
-                                        print(f"Successfully extracted text from PDF using PyPDF2")
+                                        logger.info(f"Successfully extracted text from PDF using PyPDF2")
                             except (ImportError, Exception) as e1:
                                 # Fall back to pdfplumber
                                 try:
@@ -211,20 +211,20 @@ class ArXivSearchEngine(BaseSearchEngine):
                                         if pdf_text.strip():  # Only use if we got meaningful text
                                             result["content"] = pdf_text
                                             result["full_content"] = pdf_text
-                                            print(f"Successfully extracted text from PDF using pdfplumber")
+                                            logger.info(f"Successfully extracted text from PDF using pdfplumber")
                                 except (ImportError, Exception) as e2:
-                                    print(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
-                                    print(f"Using paper summary as content instead")
+                                    logger.error(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
+                                    logger.error(f"Using paper summary as content instead")
                         except Exception as e:
-                            print(f"Error extracting text from PDF: {e}")
-                            print(f"Using paper summary as content instead")
+                            logger.error(f"Error extracting text from PDF: {e}")
+                            logger.error(f"Using paper summary as content instead")
                     except Exception as e:
-                        print(f"Error downloading paper {paper.title}: {e}")
+                        logger.error(f"Error downloading paper {paper.title}: {e}")
                         result["pdf_path"] = None
                         pdf_count -= 1  # Decrement counter if download fails
                 elif self.include_full_text and self.download_dir and pdf_count >= self.max_full_text:
                     # Reached PDF limit
-                    print(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
+                    logger.info(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
                     result["content"] = paper.summary
                     result["full_content"] = paper.summary
@@ -242,7 +242,7 @@ class ArXivSearchEngine(BaseSearchEngine):
         Returns:
             List of search results
         """
-        print("---Execute a search using arXiv---")
+        logger.info("---Execute a search using arXiv---")
         # Use the implementation from the parent class which handles all phases
         results = super().run(query)
@@ -308,12 +308,12 @@ class ArXivSearchEngine(BaseSearchEngine):
                         paper_path = paper.download_pdf(dirpath=self.download_dir)
                         result["pdf_path"] = str(paper_path)
                     except Exception as e:
-                        print(f"Error downloading paper: {e}")
+                        logger.error(f"Error downloading paper: {e}")
             return result
         except Exception as e:
-            print(f"Error getting paper details: {e}")
+            logger.error(f"Error getting paper details: {e}")
             return {}
     def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:

local_deep_research/web_search_engines/engines/search_engine_brave.py CHANGED Viewed

@@ -5,7 +5,8 @@ from langchain_core.language_models import BaseLLM
 from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
 from local_deep_research import config
+import logging
+logger = logging.getLogger(__name__)
 class BraveSearchEngine(BaseSearchEngine):
     """Brave search engine implementation with two-phase approach"""
@@ -100,7 +101,7 @@ class BraveSearchEngine(BaseSearchEngine):
                     safesearch=brave_safe_search
                 )
             except ImportError:
-                print("Warning: FullSearchResults not available. Full content retrieval disabled.")
+                logger.warning("Warning: FullSearchResults not available. Full content retrieval disabled.")
                 self.include_full_content = False
     def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -113,7 +114,7 @@ class BraveSearchEngine(BaseSearchEngine):
         Returns:
             List of preview dictionaries
         """
-        print("Getting search results from Brave Search")
+        logger.info("Getting search results from Brave Search")
         try:
             # Get search results from Brave Search
@@ -125,7 +126,7 @@ class BraveSearchEngine(BaseSearchEngine):
                     import json
                     raw_results = json.loads(raw_results)
                 except json.JSONDecodeError:
-                    print("Error: Unable to parse BraveSearch response as JSON.")
+                    logger.error("Error: Unable to parse BraveSearch response as JSON.")
                     return []
             # Format results as previews
@@ -151,7 +152,7 @@ class BraveSearchEngine(BaseSearchEngine):
             return previews
         except Exception as e:
-            print(f"Error getting Brave Search results: {e}")
+            logger.error(f"Error getting Brave Search results: {e}")
             return []
     def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -168,7 +169,7 @@ class BraveSearchEngine(BaseSearchEngine):
         """
         # Check if we should get full content
         if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
-            print("Snippet-only mode, skipping full content retrieval")
+            logger.info("Snippet-only mode, skipping full content retrieval")
             # Return the relevant items with their full Brave information
             results = []
@@ -188,7 +189,7 @@ class BraveSearchEngine(BaseSearchEngine):
         # If full content retrieval is enabled
         if self.include_full_content and hasattr(self, 'full_search'):
-            print("Retrieving full webpage content")
+            logger.info("Retrieving full webpage content")
             try:
                 # Extract only the links from relevant items
@@ -200,7 +201,7 @@ class BraveSearchEngine(BaseSearchEngine):
                 return results_with_content
             except Exception as e:
-                print(f"Error retrieving full content: {e}")
+                logger.error(f"Error retrieving full content: {e}")
                 # Fall back to returning the items without full content
         # Return items with their full Brave information
@@ -231,7 +232,7 @@ class BraveSearchEngine(BaseSearchEngine):
         Returns:
             List of search results
         """
-        print("---Execute a search using Brave Search---")
+        logger.info("---Execute a search using Brave Search---")
         # Use the implementation from the parent class which handles all phases
         results = super().run(query)

local_deep_research/web_search_engines/engines/search_engine_ddg.py CHANGED Viewed

@@ -4,6 +4,8 @@ from langchain_core.language_models import BaseLLM
 from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
 from local_deep_research.web_search_engines.engines.full_search import FullSearchResults  # Import the FullSearchResults class
+import logging
+logger = logging.getLogger(__name__)
 class DuckDuckGoSearchEngine(BaseSearchEngine):
     """DuckDuckGo search engine implementation with two-phase retrieval"""
@@ -66,7 +68,7 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
         Returns:
             List of search results
         """
-        print("---Execute a search using DuckDuckGo---")
+        logger.info("---Execute a search using DuckDuckGo---")
         # Implementation of the two-phase approach (from parent class)
         return super().run(query)
@@ -103,7 +105,7 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
             return previews
         except Exception as e:
-            print(f"Error getting DuckDuckGo previews: {e}")
+            logger.error(f"Error getting DuckDuckGo previews: {e}")
             return []
     def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:

local_deep_research/web_search_engines/engines/search_engine_local.py CHANGED Viewed

@@ -764,7 +764,7 @@ class LocalSearchEngine(BaseSearchEngine):
         # Phase 3: Get full content for relevant items
         if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
-            print("Returning snippet-only results as per config")
+            logger.info("Returning snippet-only results as per config")
             results = relevant_items
         else:
             results = self._get_full_content(relevant_items)

local_deep_research/web_search_engines/engines/search_engine_serpapi.py CHANGED Viewed

@@ -5,7 +5,8 @@ from langchain_core.language_models import BaseLLM
 from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
 from local_deep_research import config
+import logging
+logger = logging.getLogger(__name__)
 class SerpAPISearchEngine(BaseSearchEngine):
     """Google search engine implementation using SerpAPI with two-phase approach"""
@@ -92,7 +93,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
                     safesearch="Moderate" if safe_search else "Off"
                 )
             except ImportError:
-                print("Warning: FullSearchResults not available. Full content retrieval disabled.")
+                logger.warning("Warning: FullSearchResults not available. Full content retrieval disabled.")
                 self.include_full_content = False
     def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -105,7 +106,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
         Returns:
             List of preview dictionaries
         """
-        print("Getting search results from SerpAPI")
+        logger.info("Getting search results from SerpAPI")
         try:
             # Get search results from SerpAPI
@@ -134,7 +135,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
             return previews
         except Exception as e:
-            print(f"Error getting SerpAPI results: {e}")
+            logger.error(f"Error getting SerpAPI results: {e}")
             return []
     def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -151,7 +152,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
         """
         # Check if we should get full content
         if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
-            print("Snippet-only mode, skipping full content retrieval")
+            logger.info("Snippet-only mode, skipping full content retrieval")
             # Return the relevant items with their full SerpAPI information
             results = []
@@ -171,7 +172,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
         # If full content retrieval is enabled
         if self.include_full_content and hasattr(self, 'full_search'):
-            print("Retrieving full webpage content")
+            logger.info("Retrieving full webpage content")
             try:
                 # Extract only the links from relevant items
@@ -185,7 +186,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
                 return results_with_content
             except Exception as e:
-                print(f"Error retrieving full content: {e}")
+                logger.info(f"Error retrieving full content: {e}")
                 # Fall back to returning the items without full content
         # Return items with their full SerpAPI information
@@ -216,7 +217,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
         Returns:
             List of search results
         """
-        print("---Execute a search using SerpAPI (Google)---")
+        logger.info("---Execute a search using SerpAPI (Google)---")
         # Use the implementation from the parent class which handles all phases
         results = super().run(query)

local_deep_research/web_search_engines/search_engine_base.py CHANGED Viewed

@@ -53,7 +53,7 @@ class BaseSearchEngine(ABC):
             List of search results with full content (if available)
         """
         # Ensure we're measuring time correctly for citation tracking
-        start_time = datetime.now()
         # Step 1: Get preview information for items
         previews = self._get_previews(query)

local-deep-research 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl

local-deep-research 0.1.16py3-none-any.whl → 0.1.17py3-none-any.whl