local-deep-research 0.1.16__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. local_deep_research/citation_handler.py +0 -2
  2. local_deep_research/config.py +1 -4
  3. local_deep_research/defaults/llm_config.py +2 -2
  4. local_deep_research/defaults/main.toml +3 -3
  5. local_deep_research/report_generator.py +1 -5
  6. local_deep_research/search_system.py +1 -1
  7. local_deep_research/utilties/search_utilities.py +3 -4
  8. local_deep_research/web_search_engines/engines/full_search.py +9 -8
  9. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
  10. local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
  11. local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
  12. local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
  13. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
  14. local_deep_research/web_search_engines/search_engine_base.py +1 -1
  15. local_deep_research-0.1.17.dist-info/METADATA +393 -0
  16. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/RECORD +20 -22
  17. local_deep_research/local_collections.py +0 -141
  18. local_deep_research/web_search_engines/full_search.py +0 -254
  19. local_deep_research-0.1.16.dist-info/METADATA +0 -346
  20. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/WHEEL +0 -0
  21. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/entry_points.txt +0 -0
  22. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/licenses/LICENSE +0 -0
  23. {local_deep_research-0.1.16.dist-info → local_deep_research-0.1.17.dist-info}/top_level.txt +0 -0
@@ -50,7 +50,6 @@ class CitationHandler:
50
50
 
51
51
  documents = self._create_documents(search_results)
52
52
  formatted_sources = self._format_sources(documents)
53
- print(formatted_sources)
54
53
  prompt = f"""Analyze the following information concerning the question and include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source.
55
54
 
56
55
  Question: {query}
@@ -75,7 +74,6 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
75
74
  """Process follow-up analysis with citations."""
76
75
  documents = self._create_documents(search_results, nr_of_links=nr_of_links)
77
76
  formatted_sources = self._format_sources(documents)
78
- print(formatted_sources)
79
77
  # Add fact-checking step
80
78
  fact_check_prompt = f"""Analyze these sources for factual consistency:
81
79
  1. Cross-reference major claims between sources
@@ -20,7 +20,7 @@ def get_config_dir():
20
20
  from platformdirs import user_config_dir
21
21
  config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
22
22
 
23
- print(f"Looking for config in: {config_dir}")
23
+ logger.info(f"Looking for config in: {config_dir}")
24
24
  return config_dir
25
25
  # Define config paths
26
26
  CONFIG_DIR = get_config_dir() / "config"
@@ -31,9 +31,6 @@ LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
31
31
  SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
32
32
 
33
33
  LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
34
- print("CONFIGDIR:", CONFIG_DIR)
35
- print("SECRETS_FILE:", SECRETS_FILE)
36
- print("SETTINGS_FILE:", SETTINGS_FILE)
37
34
 
38
35
 
39
36
  # Set environment variable for Dynaconf to use
@@ -34,7 +34,7 @@ class ModelProvider(Enum):
34
34
  DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change this to your preferred provider
35
35
 
36
36
  # Set your default model name here
37
- DEFAULT_MODEL = "mistral" # Your default model
37
+ DEFAULT_MODEL = "gemma3:12b" # Your default model
38
38
 
39
39
  # Set default model parameters
40
40
  DEFAULT_TEMPERATURE = 0.7
@@ -210,7 +210,7 @@ def is_anthropic_available():
210
210
 
211
211
  def is_openai_endpoint_available():
212
212
  """Check if OpenAI endpoint is available"""
213
- print(os.getenv("OPENAI_ENDPOINT_API_KEY"))
213
+
214
214
  try:
215
215
  api_key = settings.OPENAI_ENDPOINT_API_KEY
216
216
  return bool(api_key)
@@ -26,13 +26,13 @@ debug = true
26
26
  tool = "auto"
27
27
 
28
28
  # Number of research cycles
29
- iterations = 3
29
+ iterations = 2
30
30
 
31
31
  # Questions generated per cycle
32
- questions_per_iteration = 3
32
+ questions_per_iteration = 2
33
33
 
34
34
  # Searches per report section
35
- searches_per_section = 3
35
+ searches_per_section = 2
36
36
 
37
37
  # Results per search query
38
38
  max_results = 50
@@ -15,10 +15,6 @@ class IntegratedReportGenerator:
15
15
  searches_per_section # Control search depth per section
16
16
  )
17
17
 
18
- def _remove_think_tags(self, text: str) -> str:
19
- print(text)
20
- return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
21
-
22
18
  def generate_report(self, initial_findings: Dict, query: str) -> Dict:
23
19
  """Generate a complete research report with section-specific research."""
24
20
 
@@ -63,7 +59,7 @@ class IntegratedReportGenerator:
63
59
  Each subsection must include its purpose after the | symbol.
64
60
  """
65
61
 
66
- response = self._remove_think_tags(self.model.invoke(prompt).content)
62
+ response = search_utilities.remove_think_tags(self.model.invoke(prompt).content)
67
63
 
68
64
  # Parse the structure
69
65
  structure = []
@@ -1,6 +1,6 @@
1
1
  from typing import Dict, List, Optional, Callable
2
2
  from datetime import datetime
3
- from .utilties.search_utilities import remove_think_tags, format_findings_to_text, print_search_results, format_links
3
+ from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
4
4
  import os
5
5
  from .utilties.enums import KnowledgeAccumulationApproach
6
6
  from .config import settings, get_llm, get_search
@@ -3,7 +3,6 @@ import re
3
3
 
4
4
  def remove_think_tags(text: str) -> str:
5
5
  text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
6
- print(text)
7
6
  return text
8
7
 
9
8
 
@@ -17,13 +16,13 @@ def extract_links_from_search_results(search_results: list) -> list:
17
16
  """
18
17
  links = []
19
18
  for result in search_results:
20
- #print(result)
19
+
21
20
  try:
22
21
 
23
22
  title = result.get("title", "").strip()
24
23
  url = result.get("link", "").strip()
25
24
  index = result.get("index", "").strip()
26
- print("INDEX:",index)
25
+
27
26
  if title and url:
28
27
  links.append({"title": title, "url": url, "index": index})
29
28
  except Exception:
@@ -111,5 +110,5 @@ def print_search_results(search_results):
111
110
  links = extract_links_from_search_results(search_results)
112
111
  if links:
113
112
  formatted_text=format_links(links=links)
114
- print(formatted_text)
113
+ logger.info(formatted_text)
115
114
 
@@ -7,6 +7,9 @@ import json, os
7
7
  from .utilties.search_utilities import remove_think_tags
8
8
  from datetime import datetime
9
9
  from local_deep_research import config
10
+ import logging
11
+ logger = logging.getLogger(__name__)
12
+
10
13
 
11
14
  class FullSearchResults:
12
15
  def __init__(
@@ -57,13 +60,12 @@ class FullSearchResults:
57
60
  try:
58
61
  # Get LLM's evaluation
59
62
  response = self.llm.invoke(prompt)
60
- # print(response)
61
63
  good_indices = json.loads(remove_think_tags(response.content))
62
64
 
63
65
  # Return only the results with good URLs
64
66
  return [r for i, r in enumerate(results) if i in good_indices]
65
67
  except Exception as e:
66
- print(f"URL filtering error: {e}")
68
+ logger.error(f"URL filtering error: {e}")
67
69
  return []
68
70
 
69
71
  def remove_boilerplate(self, html: str) -> str:
@@ -75,9 +77,8 @@ class FullSearchResults:
75
77
 
76
78
  def run(self, query: str):
77
79
  nr_full_text = 0
78
- # Step 1: Get search results from DuckDuckGo
80
+ # Step 1: Get search results
79
81
  search_results = self.web_search.invoke(query)
80
- #print(type(search_results))
81
82
  if not isinstance(search_results, list):
82
83
  raise ValueError("Expected the search results in list format.")
83
84
 
@@ -89,9 +90,9 @@ class FullSearchResults:
89
90
 
90
91
  # Extract URLs from filtered results
91
92
  urls = [result.get("link") for result in filtered_results if result.get("link")]
92
- print(urls)
93
+
93
94
  if not urls:
94
- print("\n === NO VALID LINKS ===\n")
95
+ logger.error("\n === NO VALID LINKS ===\n")
95
96
  return []
96
97
 
97
98
  # Step 3: Download the full HTML pages for filtered URLs
@@ -117,8 +118,8 @@ class FullSearchResults:
117
118
  link = result.get("link")
118
119
  result["full_content"] = url_to_content.get(link, None)
119
120
 
120
- print("FULL SEARCH WITH FILTERED URLS")
121
- print("Full text retrieved: ", nr_full_text)
121
+ logger.info("FULL SEARCH WITH FILTERED URLS")
122
+ logger.info("Full text retrieved: ", nr_full_text)
122
123
  return filtered_results
123
124
 
124
125
  def invoke(self, query: str):
@@ -121,7 +121,7 @@ class ArXivSearchEngine(BaseSearchEngine):
121
121
  return previews
122
122
 
123
123
  except Exception as e:
124
- print(f"Error getting arXiv previews: {e}")
124
+ logger.error(f"Error getting arXiv previews: {e}")
125
125
  return []
126
126
 
127
127
  def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -138,10 +138,10 @@ class ArXivSearchEngine(BaseSearchEngine):
138
138
  """
139
139
  # Check if we should get full content
140
140
  if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
141
- print("Snippet-only mode, skipping full content retrieval")
141
+ logger.info("Snippet-only mode, skipping full content retrieval")
142
142
  return relevant_items
143
143
 
144
- print("Getting full content for relevant arXiv papers")
144
+ logger.info("Getting full content for relevant arXiv papers")
145
145
 
146
146
  results = []
147
147
  pdf_count = 0 # Track number of PDFs processed
@@ -198,7 +198,7 @@ class ArXivSearchEngine(BaseSearchEngine):
198
198
  if pdf_text.strip(): # Only use if we got meaningful text
199
199
  result["content"] = pdf_text
200
200
  result["full_content"] = pdf_text
201
- print(f"Successfully extracted text from PDF using PyPDF2")
201
+ logger.info(f"Successfully extracted text from PDF using PyPDF2")
202
202
  except (ImportError, Exception) as e1:
203
203
  # Fall back to pdfplumber
204
204
  try:
@@ -211,20 +211,20 @@ class ArXivSearchEngine(BaseSearchEngine):
211
211
  if pdf_text.strip(): # Only use if we got meaningful text
212
212
  result["content"] = pdf_text
213
213
  result["full_content"] = pdf_text
214
- print(f"Successfully extracted text from PDF using pdfplumber")
214
+ logger.info(f"Successfully extracted text from PDF using pdfplumber")
215
215
  except (ImportError, Exception) as e2:
216
- print(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
217
- print(f"Using paper summary as content instead")
216
+ logger.error(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
217
+ logger.error(f"Using paper summary as content instead")
218
218
  except Exception as e:
219
- print(f"Error extracting text from PDF: {e}")
220
- print(f"Using paper summary as content instead")
219
+ logger.error(f"Error extracting text from PDF: {e}")
220
+ logger.error(f"Using paper summary as content instead")
221
221
  except Exception as e:
222
- print(f"Error downloading paper {paper.title}: {e}")
222
+ logger.error(f"Error downloading paper {paper.title}: {e}")
223
223
  result["pdf_path"] = None
224
224
  pdf_count -= 1 # Decrement counter if download fails
225
225
  elif self.include_full_text and self.download_dir and pdf_count >= self.max_full_text:
226
226
  # Reached PDF limit
227
- print(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
227
+ logger.info(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
228
228
  result["content"] = paper.summary
229
229
  result["full_content"] = paper.summary
230
230
 
@@ -242,7 +242,7 @@ class ArXivSearchEngine(BaseSearchEngine):
242
242
  Returns:
243
243
  List of search results
244
244
  """
245
- print("---Execute a search using arXiv---")
245
+ logger.info("---Execute a search using arXiv---")
246
246
 
247
247
  # Use the implementation from the parent class which handles all phases
248
248
  results = super().run(query)
@@ -308,12 +308,12 @@ class ArXivSearchEngine(BaseSearchEngine):
308
308
  paper_path = paper.download_pdf(dirpath=self.download_dir)
309
309
  result["pdf_path"] = str(paper_path)
310
310
  except Exception as e:
311
- print(f"Error downloading paper: {e}")
311
+ logger.error(f"Error downloading paper: {e}")
312
312
 
313
313
  return result
314
314
 
315
315
  except Exception as e:
316
- print(f"Error getting paper details: {e}")
316
+ logger.error(f"Error getting paper details: {e}")
317
317
  return {}
318
318
 
319
319
  def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
@@ -5,7 +5,8 @@ from langchain_core.language_models import BaseLLM
5
5
 
6
6
  from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
7
7
  from local_deep_research import config
8
-
8
+ import logging
9
+ logger = logging.getLogger(__name__)
9
10
 
10
11
  class BraveSearchEngine(BaseSearchEngine):
11
12
  """Brave search engine implementation with two-phase approach"""
@@ -100,7 +101,7 @@ class BraveSearchEngine(BaseSearchEngine):
100
101
  safesearch=brave_safe_search
101
102
  )
102
103
  except ImportError:
103
- print("Warning: FullSearchResults not available. Full content retrieval disabled.")
104
+ logger.warning("Warning: FullSearchResults not available. Full content retrieval disabled.")
104
105
  self.include_full_content = False
105
106
 
106
107
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -113,7 +114,7 @@ class BraveSearchEngine(BaseSearchEngine):
113
114
  Returns:
114
115
  List of preview dictionaries
115
116
  """
116
- print("Getting search results from Brave Search")
117
+ logger.info("Getting search results from Brave Search")
117
118
 
118
119
  try:
119
120
  # Get search results from Brave Search
@@ -125,7 +126,7 @@ class BraveSearchEngine(BaseSearchEngine):
125
126
  import json
126
127
  raw_results = json.loads(raw_results)
127
128
  except json.JSONDecodeError:
128
- print("Error: Unable to parse BraveSearch response as JSON.")
129
+ logger.error("Error: Unable to parse BraveSearch response as JSON.")
129
130
  return []
130
131
 
131
132
  # Format results as previews
@@ -151,7 +152,7 @@ class BraveSearchEngine(BaseSearchEngine):
151
152
  return previews
152
153
 
153
154
  except Exception as e:
154
- print(f"Error getting Brave Search results: {e}")
155
+ logger.error(f"Error getting Brave Search results: {e}")
155
156
  return []
156
157
 
157
158
  def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -168,7 +169,7 @@ class BraveSearchEngine(BaseSearchEngine):
168
169
  """
169
170
  # Check if we should get full content
170
171
  if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
171
- print("Snippet-only mode, skipping full content retrieval")
172
+ logger.info("Snippet-only mode, skipping full content retrieval")
172
173
 
173
174
  # Return the relevant items with their full Brave information
174
175
  results = []
@@ -188,7 +189,7 @@ class BraveSearchEngine(BaseSearchEngine):
188
189
 
189
190
  # If full content retrieval is enabled
190
191
  if self.include_full_content and hasattr(self, 'full_search'):
191
- print("Retrieving full webpage content")
192
+ logger.info("Retrieving full webpage content")
192
193
 
193
194
  try:
194
195
  # Extract only the links from relevant items
@@ -200,7 +201,7 @@ class BraveSearchEngine(BaseSearchEngine):
200
201
  return results_with_content
201
202
 
202
203
  except Exception as e:
203
- print(f"Error retrieving full content: {e}")
204
+ logger.error(f"Error retrieving full content: {e}")
204
205
  # Fall back to returning the items without full content
205
206
 
206
207
  # Return items with their full Brave information
@@ -231,7 +232,7 @@ class BraveSearchEngine(BaseSearchEngine):
231
232
  Returns:
232
233
  List of search results
233
234
  """
234
- print("---Execute a search using Brave Search---")
235
+ logger.info("---Execute a search using Brave Search---")
235
236
 
236
237
  # Use the implementation from the parent class which handles all phases
237
238
  results = super().run(query)
@@ -4,6 +4,8 @@ from langchain_core.language_models import BaseLLM
4
4
 
5
5
  from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
6
6
  from local_deep_research.web_search_engines.engines.full_search import FullSearchResults # Import the FullSearchResults class
7
+ import logging
8
+ logger = logging.getLogger(__name__)
7
9
 
8
10
  class DuckDuckGoSearchEngine(BaseSearchEngine):
9
11
  """DuckDuckGo search engine implementation with two-phase retrieval"""
@@ -66,7 +68,7 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
66
68
  Returns:
67
69
  List of search results
68
70
  """
69
- print("---Execute a search using DuckDuckGo---")
71
+ logger.info("---Execute a search using DuckDuckGo---")
70
72
 
71
73
  # Implementation of the two-phase approach (from parent class)
72
74
  return super().run(query)
@@ -103,7 +105,7 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
103
105
  return previews
104
106
 
105
107
  except Exception as e:
106
- print(f"Error getting DuckDuckGo previews: {e}")
108
+ logger.error(f"Error getting DuckDuckGo previews: {e}")
107
109
  return []
108
110
 
109
111
  def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -764,7 +764,7 @@ class LocalSearchEngine(BaseSearchEngine):
764
764
 
765
765
  # Phase 3: Get full content for relevant items
766
766
  if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
767
- print("Returning snippet-only results as per config")
767
+ logger.info("Returning snippet-only results as per config")
768
768
  results = relevant_items
769
769
  else:
770
770
  results = self._get_full_content(relevant_items)
@@ -5,7 +5,8 @@ from langchain_core.language_models import BaseLLM
5
5
 
6
6
  from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
7
7
  from local_deep_research import config
8
-
8
+ import logging
9
+ logger = logging.getLogger(__name__)
9
10
 
10
11
  class SerpAPISearchEngine(BaseSearchEngine):
11
12
  """Google search engine implementation using SerpAPI with two-phase approach"""
@@ -92,7 +93,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
92
93
  safesearch="Moderate" if safe_search else "Off"
93
94
  )
94
95
  except ImportError:
95
- print("Warning: FullSearchResults not available. Full content retrieval disabled.")
96
+ logger.warning("Warning: FullSearchResults not available. Full content retrieval disabled.")
96
97
  self.include_full_content = False
97
98
 
98
99
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
@@ -105,7 +106,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
105
106
  Returns:
106
107
  List of preview dictionaries
107
108
  """
108
- print("Getting search results from SerpAPI")
109
+ logger.info("Getting search results from SerpAPI")
109
110
 
110
111
  try:
111
112
  # Get search results from SerpAPI
@@ -134,7 +135,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
134
135
  return previews
135
136
 
136
137
  except Exception as e:
137
- print(f"Error getting SerpAPI results: {e}")
138
+ logger.error(f"Error getting SerpAPI results: {e}")
138
139
  return []
139
140
 
140
141
  def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
@@ -151,7 +152,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
151
152
  """
152
153
  # Check if we should get full content
153
154
  if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
154
- print("Snippet-only mode, skipping full content retrieval")
155
+ logger.info("Snippet-only mode, skipping full content retrieval")
155
156
 
156
157
  # Return the relevant items with their full SerpAPI information
157
158
  results = []
@@ -171,7 +172,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
171
172
 
172
173
  # If full content retrieval is enabled
173
174
  if self.include_full_content and hasattr(self, 'full_search'):
174
- print("Retrieving full webpage content")
175
+ logger.info("Retrieving full webpage content")
175
176
 
176
177
  try:
177
178
  # Extract only the links from relevant items
@@ -185,7 +186,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
185
186
  return results_with_content
186
187
 
187
188
  except Exception as e:
188
- print(f"Error retrieving full content: {e}")
189
+ logger.info(f"Error retrieving full content: {e}")
189
190
  # Fall back to returning the items without full content
190
191
 
191
192
  # Return items with their full SerpAPI information
@@ -216,7 +217,7 @@ class SerpAPISearchEngine(BaseSearchEngine):
216
217
  Returns:
217
218
  List of search results
218
219
  """
219
- print("---Execute a search using SerpAPI (Google)---")
220
+ logger.info("---Execute a search using SerpAPI (Google)---")
220
221
 
221
222
  # Use the implementation from the parent class which handles all phases
222
223
  results = super().run(query)
@@ -53,7 +53,7 @@ class BaseSearchEngine(ABC):
53
53
  List of search results with full content (if available)
54
54
  """
55
55
  # Ensure we're measuring time correctly for citation tracking
56
- start_time = datetime.now()
56
+
57
57
 
58
58
  # Step 1: Get preview information for items
59
59
  previews = self._get_previews(query)