local-deep-research 0.1.15__py3-none-any.whl → 0.1.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/citation_handler.py +0 -2
- local_deep_research/config.py +1 -4
- local_deep_research/defaults/llm_config.py +2 -2
- local_deep_research/defaults/main.toml +3 -3
- local_deep_research/defaults/search_engines.toml +2 -2
- local_deep_research/report_generator.py +1 -5
- local_deep_research/search_system.py +9 -10
- local_deep_research/utilties/search_utilities.py +3 -4
- local_deep_research/web_search_engines/engines/full_search.py +9 -8
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -14
- local_deep_research/web_search_engines/engines/search_engine_brave.py +10 -9
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -2
- local_deep_research/web_search_engines/engines/search_engine_local.py +1 -1
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +102 -661
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +9 -8
- local_deep_research/web_search_engines/search_engine_base.py +6 -15
- local_deep_research-0.1.17.dist-info/METADATA +393 -0
- {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/RECORD +22 -24
- local_deep_research/local_collections.py +0 -141
- local_deep_research/web_search_engines/full_search.py +0 -254
- local_deep_research-0.1.15.dist-info/METADATA +0 -346
- {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/WHEEL +0 -0
- {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/licenses/LICENSE +0 -0
- {local_deep_research-0.1.15.dist-info → local_deep_research-0.1.17.dist-info}/top_level.txt +0 -0
@@ -50,7 +50,6 @@ class CitationHandler:
|
|
50
50
|
|
51
51
|
documents = self._create_documents(search_results)
|
52
52
|
formatted_sources = self._format_sources(documents)
|
53
|
-
print(formatted_sources)
|
54
53
|
prompt = f"""Analyze the following information concerning the question and include citations using numbers in square brackets [1], [2], etc. When citing, use the source number provided at the start of each source.
|
55
54
|
|
56
55
|
Question: {query}
|
@@ -75,7 +74,6 @@ Provide a detailed analysis with citations and always keep URLS. Never make up s
|
|
75
74
|
"""Process follow-up analysis with citations."""
|
76
75
|
documents = self._create_documents(search_results, nr_of_links=nr_of_links)
|
77
76
|
formatted_sources = self._format_sources(documents)
|
78
|
-
print(formatted_sources)
|
79
77
|
# Add fact-checking step
|
80
78
|
fact_check_prompt = f"""Analyze these sources for factual consistency:
|
81
79
|
1. Cross-reference major claims between sources
|
local_deep_research/config.py
CHANGED
@@ -20,7 +20,7 @@ def get_config_dir():
|
|
20
20
|
from platformdirs import user_config_dir
|
21
21
|
config_dir = Path(user_config_dir("local_deep_research", "LearningCircuit"))
|
22
22
|
|
23
|
-
|
23
|
+
logger.info(f"Looking for config in: {config_dir}")
|
24
24
|
return config_dir
|
25
25
|
# Define config paths
|
26
26
|
CONFIG_DIR = get_config_dir() / "config"
|
@@ -31,9 +31,6 @@ LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
|
|
31
31
|
SEARCH_ENGINES_FILE = CONFIG_DIR / "search_engines.toml"
|
32
32
|
|
33
33
|
LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
|
34
|
-
print("CONFIGDIR:", CONFIG_DIR)
|
35
|
-
print("SECRETS_FILE:", SECRETS_FILE)
|
36
|
-
print("SETTINGS_FILE:", SETTINGS_FILE)
|
37
34
|
|
38
35
|
|
39
36
|
# Set environment variable for Dynaconf to use
|
@@ -34,7 +34,7 @@ class ModelProvider(Enum):
|
|
34
34
|
DEFAULT_PROVIDER = ModelProvider.OLLAMA # Change this to your preferred provider
|
35
35
|
|
36
36
|
# Set your default model name here
|
37
|
-
DEFAULT_MODEL = "
|
37
|
+
DEFAULT_MODEL = "gemma3:12b" # Your default model
|
38
38
|
|
39
39
|
# Set default model parameters
|
40
40
|
DEFAULT_TEMPERATURE = 0.7
|
@@ -210,7 +210,7 @@ def is_anthropic_available():
|
|
210
210
|
|
211
211
|
def is_openai_endpoint_available():
|
212
212
|
"""Check if OpenAI endpoint is available"""
|
213
|
-
|
213
|
+
|
214
214
|
try:
|
215
215
|
api_key = settings.OPENAI_ENDPOINT_API_KEY
|
216
216
|
return bool(api_key)
|
@@ -26,13 +26,13 @@ debug = true
|
|
26
26
|
tool = "auto"
|
27
27
|
|
28
28
|
# Number of research cycles
|
29
|
-
iterations =
|
29
|
+
iterations = 2
|
30
30
|
|
31
31
|
# Questions generated per cycle
|
32
|
-
questions_per_iteration =
|
32
|
+
questions_per_iteration = 2
|
33
33
|
|
34
34
|
# Searches per report section
|
35
|
-
searches_per_section =
|
35
|
+
searches_per_section = 2
|
36
36
|
|
37
37
|
# Results per search query
|
38
38
|
max_results = 50
|
@@ -37,7 +37,7 @@ module_path = "local_deep_research.web_search_engines.engines.search_engine_pubm
|
|
37
37
|
class_name = "PubMedSearchEngine"
|
38
38
|
requires_api_key = false
|
39
39
|
api_key_env = "NCBI_API_KEY"
|
40
|
-
reliability = 0.
|
40
|
+
reliability = 0.98
|
41
41
|
strengths = [
|
42
42
|
"biomedical literature", "medical research", "clinical studies",
|
43
43
|
"life sciences", "health information", "scientific papers"
|
@@ -191,7 +191,7 @@ module_path = "local_deep_research.web_search_engines.engines.search_engine_sema
|
|
191
191
|
class_name = "SemanticScholarSearchEngine"
|
192
192
|
requires_api_key = false
|
193
193
|
api_key_env = "S2_API_KEY"
|
194
|
-
reliability = 0.
|
194
|
+
reliability = 0.87
|
195
195
|
strengths = [
|
196
196
|
"comprehensive scientific literature",
|
197
197
|
"extensive citation network",
|
@@ -15,10 +15,6 @@ class IntegratedReportGenerator:
|
|
15
15
|
searches_per_section # Control search depth per section
|
16
16
|
)
|
17
17
|
|
18
|
-
def _remove_think_tags(self, text: str) -> str:
|
19
|
-
print(text)
|
20
|
-
return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|
21
|
-
|
22
18
|
def generate_report(self, initial_findings: Dict, query: str) -> Dict:
|
23
19
|
"""Generate a complete research report with section-specific research."""
|
24
20
|
|
@@ -63,7 +59,7 @@ class IntegratedReportGenerator:
|
|
63
59
|
Each subsection must include its purpose after the | symbol.
|
64
60
|
"""
|
65
61
|
|
66
|
-
response =
|
62
|
+
response = search_utilities.remove_think_tags(self.model.invoke(prompt).content)
|
67
63
|
|
68
64
|
# Parse the structure
|
69
65
|
structure = []
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import Dict, List, Optional, Callable
|
2
2
|
from datetime import datetime
|
3
|
-
from .utilties.search_utilities import remove_think_tags, format_findings_to_text,
|
3
|
+
from .utilties.search_utilities import remove_think_tags, format_findings_to_text, format_links
|
4
4
|
import os
|
5
5
|
from .utilties.enums import KnowledgeAccumulationApproach
|
6
6
|
from .config import settings, get_llm, get_search
|
@@ -28,7 +28,7 @@ class AdvancedSearchSystem:
|
|
28
28
|
|
29
29
|
# Check if search is available, log warning if not
|
30
30
|
if self.search is None:
|
31
|
-
|
31
|
+
logger.info("WARNING: Search system initialized with no search engine! Research will not be effective.")
|
32
32
|
self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
|
33
33
|
|
34
34
|
|
@@ -101,7 +101,7 @@ class AdvancedSearchSystem:
|
|
101
101
|
self._update_progress("Knowledge compression complete", None)
|
102
102
|
response = remove_think_tags(response.content)
|
103
103
|
response = str(response) #+ "\n\n" + str(formatted_links)
|
104
|
-
|
104
|
+
|
105
105
|
return response
|
106
106
|
|
107
107
|
def analyze_topic(self, query: str) -> Dict:
|
@@ -165,7 +165,7 @@ class AdvancedSearchSystem:
|
|
165
165
|
search_results = self.search.run(question)
|
166
166
|
except Exception as e:
|
167
167
|
error_msg = f"Error during search: {str(e)}"
|
168
|
-
|
168
|
+
logger.info(f"SEARCH ERROR: {error_msg}")
|
169
169
|
self._update_progress(error_msg,
|
170
170
|
int(question_progress_base + 2),
|
171
171
|
{"phase": "search_error", "error": str(e)})
|
@@ -190,7 +190,7 @@ class AdvancedSearchSystem:
|
|
190
190
|
self._update_progress(f"Analyzing results for: {question}",
|
191
191
|
int(question_progress_base + 5),
|
192
192
|
{"phase": "analysis"})
|
193
|
-
|
193
|
+
|
194
194
|
|
195
195
|
try:
|
196
196
|
result = self.citation_handler.analyze_followup(
|
@@ -203,7 +203,7 @@ class AdvancedSearchSystem:
|
|
203
203
|
if links:
|
204
204
|
formatted_links=format_links(links=links)
|
205
205
|
|
206
|
-
logger.
|
206
|
+
logger.info(f"Generated questions: {formatted_links}")
|
207
207
|
if result is not None:
|
208
208
|
results_with_links = str(result["content"])
|
209
209
|
findings.append(
|
@@ -219,7 +219,6 @@ class AdvancedSearchSystem:
|
|
219
219
|
if settings.general.knowledge_accumulation != str(KnowledgeAccumulationApproach.NO_KNOWLEDGE.value):
|
220
220
|
current_knowledge = current_knowledge + "\n\n\n New: \n" + results_with_links
|
221
221
|
|
222
|
-
logger.info(settings.general.knowledge_accumulation)
|
223
222
|
if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
|
224
223
|
logger.info("Compressing knowledge")
|
225
224
|
self._update_progress(f"Compress Knowledge for: {question}",
|
@@ -232,7 +231,7 @@ class AdvancedSearchSystem:
|
|
232
231
|
{"phase": "analysis_complete"})
|
233
232
|
except Exception as e:
|
234
233
|
error_msg = f"Error analyzing results: {str(e)}"
|
235
|
-
|
234
|
+
logger.info(f"ANALYSIS ERROR: {error_msg}")
|
236
235
|
self._update_progress(error_msg,
|
237
236
|
int(question_progress_base + 10),
|
238
237
|
{"phase": "analysis_error", "error": str(e)})
|
@@ -251,7 +250,7 @@ class AdvancedSearchSystem:
|
|
251
250
|
logger.info("FINISHED ITERATION - Compressing Knowledge")
|
252
251
|
except Exception as e:
|
253
252
|
error_msg = f"Error compressing knowledge: {str(e)}"
|
254
|
-
|
253
|
+
logger.info(f"COMPRESSION ERROR: {error_msg}")
|
255
254
|
self._update_progress(error_msg,
|
256
255
|
int((iteration / total_iterations) * 100 - 3),
|
257
256
|
{"phase": "compression_error", "error": str(e)})
|
@@ -266,7 +265,7 @@ class AdvancedSearchSystem:
|
|
266
265
|
formatted_findings = self._save_findings(findings, current_knowledge, query)
|
267
266
|
except Exception as e:
|
268
267
|
error_msg = f"Error saving findings: {str(e)}"
|
269
|
-
|
268
|
+
logger.info(f"SAVE ERROR: {error_msg}")
|
270
269
|
self._update_progress(error_msg,
|
271
270
|
int((iteration / total_iterations) * 100),
|
272
271
|
{"phase": "save_error", "error": str(e)})
|
@@ -3,7 +3,6 @@ import re
|
|
3
3
|
|
4
4
|
def remove_think_tags(text: str) -> str:
|
5
5
|
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
|
6
|
-
print(text)
|
7
6
|
return text
|
8
7
|
|
9
8
|
|
@@ -17,13 +16,13 @@ def extract_links_from_search_results(search_results: list) -> list:
|
|
17
16
|
"""
|
18
17
|
links = []
|
19
18
|
for result in search_results:
|
20
|
-
|
19
|
+
|
21
20
|
try:
|
22
21
|
|
23
22
|
title = result.get("title", "").strip()
|
24
23
|
url = result.get("link", "").strip()
|
25
24
|
index = result.get("index", "").strip()
|
26
|
-
|
25
|
+
|
27
26
|
if title and url:
|
28
27
|
links.append({"title": title, "url": url, "index": index})
|
29
28
|
except Exception:
|
@@ -111,5 +110,5 @@ def print_search_results(search_results):
|
|
111
110
|
links = extract_links_from_search_results(search_results)
|
112
111
|
if links:
|
113
112
|
formatted_text=format_links(links=links)
|
114
|
-
|
113
|
+
logger.info(formatted_text)
|
115
114
|
|
@@ -7,6 +7,9 @@ import json, os
|
|
7
7
|
from .utilties.search_utilities import remove_think_tags
|
8
8
|
from datetime import datetime
|
9
9
|
from local_deep_research import config
|
10
|
+
import logging
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
10
13
|
|
11
14
|
class FullSearchResults:
|
12
15
|
def __init__(
|
@@ -57,13 +60,12 @@ class FullSearchResults:
|
|
57
60
|
try:
|
58
61
|
# Get LLM's evaluation
|
59
62
|
response = self.llm.invoke(prompt)
|
60
|
-
# print(response)
|
61
63
|
good_indices = json.loads(remove_think_tags(response.content))
|
62
64
|
|
63
65
|
# Return only the results with good URLs
|
64
66
|
return [r for i, r in enumerate(results) if i in good_indices]
|
65
67
|
except Exception as e:
|
66
|
-
|
68
|
+
logger.error(f"URL filtering error: {e}")
|
67
69
|
return []
|
68
70
|
|
69
71
|
def remove_boilerplate(self, html: str) -> str:
|
@@ -75,9 +77,8 @@ class FullSearchResults:
|
|
75
77
|
|
76
78
|
def run(self, query: str):
|
77
79
|
nr_full_text = 0
|
78
|
-
# Step 1: Get search results
|
80
|
+
# Step 1: Get search results
|
79
81
|
search_results = self.web_search.invoke(query)
|
80
|
-
#print(type(search_results))
|
81
82
|
if not isinstance(search_results, list):
|
82
83
|
raise ValueError("Expected the search results in list format.")
|
83
84
|
|
@@ -89,9 +90,9 @@ class FullSearchResults:
|
|
89
90
|
|
90
91
|
# Extract URLs from filtered results
|
91
92
|
urls = [result.get("link") for result in filtered_results if result.get("link")]
|
92
|
-
|
93
|
+
|
93
94
|
if not urls:
|
94
|
-
|
95
|
+
logger.error("\n === NO VALID LINKS ===\n")
|
95
96
|
return []
|
96
97
|
|
97
98
|
# Step 3: Download the full HTML pages for filtered URLs
|
@@ -117,8 +118,8 @@ class FullSearchResults:
|
|
117
118
|
link = result.get("link")
|
118
119
|
result["full_content"] = url_to_content.get(link, None)
|
119
120
|
|
120
|
-
|
121
|
-
|
121
|
+
logger.info("FULL SEARCH WITH FILTERED URLS")
|
122
|
+
logger.info("Full text retrieved: ", nr_full_text)
|
122
123
|
return filtered_results
|
123
124
|
|
124
125
|
def invoke(self, query: str):
|
@@ -121,7 +121,7 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
121
121
|
return previews
|
122
122
|
|
123
123
|
except Exception as e:
|
124
|
-
|
124
|
+
logger.error(f"Error getting arXiv previews: {e}")
|
125
125
|
return []
|
126
126
|
|
127
127
|
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
@@ -138,10 +138,10 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
138
138
|
"""
|
139
139
|
# Check if we should get full content
|
140
140
|
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
141
|
-
|
141
|
+
logger.info("Snippet-only mode, skipping full content retrieval")
|
142
142
|
return relevant_items
|
143
143
|
|
144
|
-
|
144
|
+
logger.info("Getting full content for relevant arXiv papers")
|
145
145
|
|
146
146
|
results = []
|
147
147
|
pdf_count = 0 # Track number of PDFs processed
|
@@ -198,7 +198,7 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
198
198
|
if pdf_text.strip(): # Only use if we got meaningful text
|
199
199
|
result["content"] = pdf_text
|
200
200
|
result["full_content"] = pdf_text
|
201
|
-
|
201
|
+
logger.info(f"Successfully extracted text from PDF using PyPDF2")
|
202
202
|
except (ImportError, Exception) as e1:
|
203
203
|
# Fall back to pdfplumber
|
204
204
|
try:
|
@@ -211,20 +211,20 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
211
211
|
if pdf_text.strip(): # Only use if we got meaningful text
|
212
212
|
result["content"] = pdf_text
|
213
213
|
result["full_content"] = pdf_text
|
214
|
-
|
214
|
+
logger.info(f"Successfully extracted text from PDF using pdfplumber")
|
215
215
|
except (ImportError, Exception) as e2:
|
216
|
-
|
217
|
-
|
216
|
+
logger.error(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
|
217
|
+
logger.error(f"Using paper summary as content instead")
|
218
218
|
except Exception as e:
|
219
|
-
|
220
|
-
|
219
|
+
logger.error(f"Error extracting text from PDF: {e}")
|
220
|
+
logger.error(f"Using paper summary as content instead")
|
221
221
|
except Exception as e:
|
222
|
-
|
222
|
+
logger.error(f"Error downloading paper {paper.title}: {e}")
|
223
223
|
result["pdf_path"] = None
|
224
224
|
pdf_count -= 1 # Decrement counter if download fails
|
225
225
|
elif self.include_full_text and self.download_dir and pdf_count >= self.max_full_text:
|
226
226
|
# Reached PDF limit
|
227
|
-
|
227
|
+
logger.info(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
|
228
228
|
result["content"] = paper.summary
|
229
229
|
result["full_content"] = paper.summary
|
230
230
|
|
@@ -242,7 +242,7 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
242
242
|
Returns:
|
243
243
|
List of search results
|
244
244
|
"""
|
245
|
-
|
245
|
+
logger.info("---Execute a search using arXiv---")
|
246
246
|
|
247
247
|
# Use the implementation from the parent class which handles all phases
|
248
248
|
results = super().run(query)
|
@@ -308,12 +308,12 @@ class ArXivSearchEngine(BaseSearchEngine):
|
|
308
308
|
paper_path = paper.download_pdf(dirpath=self.download_dir)
|
309
309
|
result["pdf_path"] = str(paper_path)
|
310
310
|
except Exception as e:
|
311
|
-
|
311
|
+
logger.error(f"Error downloading paper: {e}")
|
312
312
|
|
313
313
|
return result
|
314
314
|
|
315
315
|
except Exception as e:
|
316
|
-
|
316
|
+
logger.error(f"Error getting paper details: {e}")
|
317
317
|
return {}
|
318
318
|
|
319
319
|
def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
@@ -5,7 +5,8 @@ from langchain_core.language_models import BaseLLM
|
|
5
5
|
|
6
6
|
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
7
7
|
from local_deep_research import config
|
8
|
-
|
8
|
+
import logging
|
9
|
+
logger = logging.getLogger(__name__)
|
9
10
|
|
10
11
|
class BraveSearchEngine(BaseSearchEngine):
|
11
12
|
"""Brave search engine implementation with two-phase approach"""
|
@@ -100,7 +101,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
100
101
|
safesearch=brave_safe_search
|
101
102
|
)
|
102
103
|
except ImportError:
|
103
|
-
|
104
|
+
logger.warning("Warning: FullSearchResults not available. Full content retrieval disabled.")
|
104
105
|
self.include_full_content = False
|
105
106
|
|
106
107
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
@@ -113,7 +114,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
113
114
|
Returns:
|
114
115
|
List of preview dictionaries
|
115
116
|
"""
|
116
|
-
|
117
|
+
logger.info("Getting search results from Brave Search")
|
117
118
|
|
118
119
|
try:
|
119
120
|
# Get search results from Brave Search
|
@@ -125,7 +126,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
125
126
|
import json
|
126
127
|
raw_results = json.loads(raw_results)
|
127
128
|
except json.JSONDecodeError:
|
128
|
-
|
129
|
+
logger.error("Error: Unable to parse BraveSearch response as JSON.")
|
129
130
|
return []
|
130
131
|
|
131
132
|
# Format results as previews
|
@@ -151,7 +152,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
151
152
|
return previews
|
152
153
|
|
153
154
|
except Exception as e:
|
154
|
-
|
155
|
+
logger.error(f"Error getting Brave Search results: {e}")
|
155
156
|
return []
|
156
157
|
|
157
158
|
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
@@ -168,7 +169,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
168
169
|
"""
|
169
170
|
# Check if we should get full content
|
170
171
|
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
171
|
-
|
172
|
+
logger.info("Snippet-only mode, skipping full content retrieval")
|
172
173
|
|
173
174
|
# Return the relevant items with their full Brave information
|
174
175
|
results = []
|
@@ -188,7 +189,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
188
189
|
|
189
190
|
# If full content retrieval is enabled
|
190
191
|
if self.include_full_content and hasattr(self, 'full_search'):
|
191
|
-
|
192
|
+
logger.info("Retrieving full webpage content")
|
192
193
|
|
193
194
|
try:
|
194
195
|
# Extract only the links from relevant items
|
@@ -200,7 +201,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
200
201
|
return results_with_content
|
201
202
|
|
202
203
|
except Exception as e:
|
203
|
-
|
204
|
+
logger.error(f"Error retrieving full content: {e}")
|
204
205
|
# Fall back to returning the items without full content
|
205
206
|
|
206
207
|
# Return items with their full Brave information
|
@@ -231,7 +232,7 @@ class BraveSearchEngine(BaseSearchEngine):
|
|
231
232
|
Returns:
|
232
233
|
List of search results
|
233
234
|
"""
|
234
|
-
|
235
|
+
logger.info("---Execute a search using Brave Search---")
|
235
236
|
|
236
237
|
# Use the implementation from the parent class which handles all phases
|
237
238
|
results = super().run(query)
|
@@ -4,6 +4,8 @@ from langchain_core.language_models import BaseLLM
|
|
4
4
|
|
5
5
|
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
6
6
|
from local_deep_research.web_search_engines.engines.full_search import FullSearchResults # Import the FullSearchResults class
|
7
|
+
import logging
|
8
|
+
logger = logging.getLogger(__name__)
|
7
9
|
|
8
10
|
class DuckDuckGoSearchEngine(BaseSearchEngine):
|
9
11
|
"""DuckDuckGo search engine implementation with two-phase retrieval"""
|
@@ -66,7 +68,7 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
|
|
66
68
|
Returns:
|
67
69
|
List of search results
|
68
70
|
"""
|
69
|
-
|
71
|
+
logger.info("---Execute a search using DuckDuckGo---")
|
70
72
|
|
71
73
|
# Implementation of the two-phase approach (from parent class)
|
72
74
|
return super().run(query)
|
@@ -103,7 +105,7 @@ class DuckDuckGoSearchEngine(BaseSearchEngine):
|
|
103
105
|
return previews
|
104
106
|
|
105
107
|
except Exception as e:
|
106
|
-
|
108
|
+
logger.error(f"Error getting DuckDuckGo previews: {e}")
|
107
109
|
return []
|
108
110
|
|
109
111
|
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
@@ -764,7 +764,7 @@ class LocalSearchEngine(BaseSearchEngine):
|
|
764
764
|
|
765
765
|
# Phase 3: Get full content for relevant items
|
766
766
|
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
767
|
-
|
767
|
+
logger.info("Returning snippet-only results as per config")
|
768
768
|
results = relevant_items
|
769
769
|
else:
|
770
770
|
results = self._get_full_content(relevant_items)
|