local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,367 @@
1
+ from typing import Dict, List, Any, Optional
2
+ from langchain_core.language_models import BaseLLM
3
+
4
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
5
+ from local_deep_research import config
6
+ import arxiv
7
+ import logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class ArXivSearchEngine(BaseSearchEngine):
11
+ """arXiv search engine implementation with two-phase approach"""
12
+
13
+ def __init__(self,
14
+ max_results: int = 10,
15
+ sort_by: str = "relevance",
16
+ sort_order: str = "descending",
17
+ include_full_text: bool = False,
18
+ download_dir: Optional[str] = None,
19
+ max_full_text: int = 1,
20
+ llm: Optional[BaseLLM] = None,
21
+ max_filtered_results: Optional[int] = None): # Added this parameter
22
+ """
23
+ Initialize the arXiv search engine.
24
+
25
+ Args:
26
+ max_results: Maximum number of search results
27
+ sort_by: Sorting criteria ('relevance', 'lastUpdatedDate', or 'submittedDate')
28
+ sort_order: Sort order ('ascending' or 'descending')
29
+ include_full_text: Whether to include full paper content in results (downloads PDF)
30
+ download_dir: Directory to download PDFs to (if include_full_text is True)
31
+ max_full_text: Maximum number of PDFs to download and process (default: 1)
32
+ llm: Language model for relevance filtering
33
+ max_filtered_results: Maximum number of results to keep after filtering
34
+ """
35
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
36
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
37
+
38
+ #max_results = min(max_results, 20) # required for arxiv
39
+ self.max_results = 20 # TODO this needs to be corrected.
40
+ self.sort_by = sort_by
41
+ self.sort_order = sort_order
42
+ self.include_full_text = include_full_text
43
+ self.download_dir = download_dir
44
+ self.max_full_text = max_full_text
45
+
46
+ # Map sort parameters to arxiv package parameters
47
+ self.sort_criteria = {
48
+ 'relevance': arxiv.SortCriterion.Relevance,
49
+ 'lastUpdatedDate': arxiv.SortCriterion.LastUpdatedDate,
50
+ 'submittedDate': arxiv.SortCriterion.SubmittedDate
51
+ }
52
+
53
+ self.sort_directions = {
54
+ 'ascending': arxiv.SortOrder.Ascending,
55
+ 'descending': arxiv.SortOrder.Descending
56
+ }
57
+
58
+ def _get_search_results(self, query: str) -> List[Any]:
59
+ """
60
+ Helper method to get search results from arXiv API.
61
+
62
+ Args:
63
+ query: The search query
64
+
65
+ Returns:
66
+ List of arXiv paper objects
67
+ """
68
+ # Configure the search client
69
+ sort_criteria = self.sort_criteria.get(self.sort_by, arxiv.SortCriterion.Relevance)
70
+ sort_order = self.sort_directions.get(self.sort_order, arxiv.SortOrder.Descending)
71
+
72
+
73
+
74
+ # Create the search client
75
+ client = arxiv.Client(page_size=self.max_results)
76
+
77
+ # Create the search query
78
+ search = arxiv.Search(
79
+ query=query,
80
+ max_results=self.max_results,
81
+ sort_by=sort_criteria,
82
+ sort_order=sort_order
83
+ )
84
+
85
+ # Get the search results
86
+ papers = list(client.results(search))
87
+
88
+ return papers
89
+
90
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
91
+ """
92
+ Get preview information for arXiv papers.
93
+
94
+ Args:
95
+ query: The search query
96
+
97
+ Returns:
98
+ List of preview dictionaries
99
+ """
100
+ logger.info("Getting paper previews from arXiv")
101
+
102
+ try:
103
+ # Get search results from arXiv
104
+ papers = self._get_search_results(query)
105
+
106
+ # Store the paper objects for later use
107
+ self._papers = {paper.entry_id: paper for paper in papers}
108
+
109
+ # Format results as previews with basic information
110
+ previews = []
111
+ for paper in papers:
112
+ preview = {
113
+ "id": paper.entry_id, # Use entry_id as ID
114
+ "title": paper.title,
115
+ "link": paper.entry_id, # arXiv URL
116
+ "snippet": paper.summary[:250] + "..." if len(paper.summary) > 250 else paper.summary,
117
+ "authors": [author.name for author in paper.authors[:3]], # First 3 authors
118
+ "published": paper.published.strftime("%Y-%m-%d") if paper.published else None
119
+ }
120
+
121
+ previews.append(preview)
122
+
123
+ return previews
124
+
125
+ except Exception as e:
126
+ print(f"Error getting arXiv previews: {e}")
127
+ return []
128
+
129
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
130
+ """
131
+ Get full content for the relevant arXiv papers.
132
+ Downloads PDFs and extracts text when include_full_text is True.
133
+ Limits the number of PDFs processed to max_full_text.
134
+
135
+ Args:
136
+ relevant_items: List of relevant preview dictionaries
137
+
138
+ Returns:
139
+ List of result dictionaries with full content
140
+ """
141
+ # Check if we should get full content
142
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
143
+ print("Snippet-only mode, skipping full content retrieval")
144
+ return relevant_items
145
+
146
+ print("Getting full content for relevant arXiv papers")
147
+
148
+ results = []
149
+ pdf_count = 0 # Track number of PDFs processed
150
+
151
+ for item in relevant_items:
152
+ # Start with the preview data
153
+ result = item.copy()
154
+
155
+ # Get the paper ID
156
+ paper_id = item.get("id")
157
+
158
+ # Try to get the full paper from our cache
159
+ paper = None
160
+ if hasattr(self, '_papers') and paper_id in self._papers:
161
+ paper = self._papers[paper_id]
162
+
163
+ if paper:
164
+ # Add complete paper information
165
+ result.update({
166
+ "pdf_url": paper.pdf_url,
167
+ "authors": [author.name for author in paper.authors], # All authors
168
+ "published": paper.published.strftime("%Y-%m-%d") if paper.published else None,
169
+ "updated": paper.updated.strftime("%Y-%m-%d") if paper.updated else None,
170
+ "categories": paper.categories,
171
+ "summary": paper.summary, # Full summary
172
+ "comment": paper.comment,
173
+ "journal_ref": paper.journal_ref,
174
+ "doi": paper.doi
175
+ })
176
+
177
+ # Default to using summary as content
178
+ result["content"] = paper.summary
179
+ result["full_content"] = paper.summary
180
+
181
+ # Download PDF and extract text if requested and within limit
182
+ if self.include_full_text and self.download_dir and pdf_count < self.max_full_text:
183
+ try:
184
+ # Download the paper
185
+ pdf_count += 1 # Increment counter before attempting download
186
+ paper_path = paper.download_pdf(dirpath=self.download_dir)
187
+ result["pdf_path"] = str(paper_path)
188
+
189
+ # Extract text from PDF
190
+ try:
191
+ # Try PyPDF2 first
192
+ try:
193
+ import PyPDF2
194
+ with open(paper_path, 'rb') as pdf_file:
195
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
196
+ pdf_text = ""
197
+ for page in pdf_reader.pages:
198
+ pdf_text += page.extract_text() + "\n\n"
199
+
200
+ if pdf_text.strip(): # Only use if we got meaningful text
201
+ result["content"] = pdf_text
202
+ result["full_content"] = pdf_text
203
+ print(f"Successfully extracted text from PDF using PyPDF2")
204
+ except (ImportError, Exception) as e1:
205
+ # Fall back to pdfplumber
206
+ try:
207
+ import pdfplumber
208
+ with pdfplumber.open(paper_path) as pdf:
209
+ pdf_text = ""
210
+ for page in pdf.pages:
211
+ pdf_text += page.extract_text() + "\n\n"
212
+
213
+ if pdf_text.strip(): # Only use if we got meaningful text
214
+ result["content"] = pdf_text
215
+ result["full_content"] = pdf_text
216
+ print(f"Successfully extracted text from PDF using pdfplumber")
217
+ except (ImportError, Exception) as e2:
218
+ print(f"PDF text extraction failed: {str(e1)}, then {str(e2)}")
219
+ print(f"Using paper summary as content instead")
220
+ except Exception as e:
221
+ print(f"Error extracting text from PDF: {e}")
222
+ print(f"Using paper summary as content instead")
223
+ except Exception as e:
224
+ print(f"Error downloading paper {paper.title}: {e}")
225
+ result["pdf_path"] = None
226
+ pdf_count -= 1 # Decrement counter if download fails
227
+ elif self.include_full_text and self.download_dir and pdf_count >= self.max_full_text:
228
+ # Reached PDF limit
229
+ print(f"Maximum number of PDFs ({self.max_full_text}) reached. Skipping remaining PDFs.")
230
+ result["content"] = paper.summary
231
+ result["full_content"] = paper.summary
232
+
233
+ results.append(result)
234
+
235
+ return results
236
+
237
+ def run(self, query: str) -> List[Dict[str, Any]]:
238
+ """
239
+ Execute a search using arXiv with the two-phase approach.
240
+
241
+ Args:
242
+ query: The search query
243
+
244
+ Returns:
245
+ List of search results
246
+ """
247
+ print("---Execute a search using arXiv---")
248
+
249
+ # Use the implementation from the parent class which handles all phases
250
+ results = super().run(query)
251
+
252
+ # Clean up
253
+ if hasattr(self, '_papers'):
254
+ del self._papers
255
+
256
+ return results
257
+
258
+ def get_paper_details(self, arxiv_id: str) -> Dict[str, Any]:
259
+ """
260
+ Get detailed information about a specific arXiv paper.
261
+
262
+ Args:
263
+ arxiv_id: arXiv ID of the paper (e.g., '2101.12345')
264
+
265
+ Returns:
266
+ Dictionary with paper information
267
+ """
268
+ try:
269
+ # Create the search client
270
+ client = arxiv.Client()
271
+
272
+ # Search for the specific paper
273
+ search = arxiv.Search(id_list=[arxiv_id], max_results=1)
274
+
275
+ # Get the paper
276
+ papers = list(client.results(search))
277
+ if not papers:
278
+ return {}
279
+
280
+ paper = papers[0]
281
+
282
+ # Format result based on config
283
+ result = {
284
+ "title": paper.title,
285
+ "link": paper.entry_id,
286
+ "snippet": paper.summary[:250] + "..." if len(paper.summary) > 250 else paper.summary,
287
+ "authors": [author.name for author in paper.authors[:3]] # First 3 authors
288
+ }
289
+
290
+ # Add full content if not in snippet-only mode
291
+ if not hasattr(config, 'SEARCH_SNIPPETS_ONLY') or not config.SEARCH_SNIPPETS_ONLY:
292
+ result.update({
293
+ "pdf_url": paper.pdf_url,
294
+ "authors": [author.name for author in paper.authors], # All authors
295
+ "published": paper.published.strftime("%Y-%m-%d") if paper.published else None,
296
+ "updated": paper.updated.strftime("%Y-%m-%d") if paper.updated else None,
297
+ "categories": paper.categories,
298
+ "summary": paper.summary, # Full summary
299
+ "comment": paper.comment,
300
+ "journal_ref": paper.journal_ref,
301
+ "doi": paper.doi,
302
+ "content": paper.summary, # Use summary as content
303
+ "full_content": paper.summary # For consistency
304
+ })
305
+
306
+ # Download PDF if requested
307
+ if self.include_full_text and self.download_dir:
308
+ try:
309
+ # Download the paper
310
+ paper_path = paper.download_pdf(dirpath=self.download_dir)
311
+ result["pdf_path"] = str(paper_path)
312
+ except Exception as e:
313
+ print(f"Error downloading paper: {e}")
314
+
315
+ return result
316
+
317
+ except Exception as e:
318
+ print(f"Error getting paper details: {e}")
319
+ return {}
320
+
321
+ def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
322
+ """
323
+ Search for papers by a specific author.
324
+
325
+ Args:
326
+ author_name: Name of the author
327
+ max_results: Maximum number of results (defaults to self.max_results)
328
+
329
+ Returns:
330
+ List of papers by the author
331
+ """
332
+ original_max_results = self.max_results
333
+
334
+ try:
335
+ if max_results:
336
+ self.max_results = max_results
337
+
338
+ query = f"au:\"{author_name}\""
339
+ return self.run(query)
340
+
341
+ finally:
342
+ # Restore original value
343
+ self.max_results = original_max_results
344
+
345
+ def search_by_category(self, category: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
346
+ """
347
+ Search for papers in a specific arXiv category.
348
+
349
+ Args:
350
+ category: arXiv category (e.g., 'cs.AI', 'physics.optics')
351
+ max_results: Maximum number of results (defaults to self.max_results)
352
+
353
+ Returns:
354
+ List of papers in the category
355
+ """
356
+ original_max_results = self.max_results
357
+
358
+ try:
359
+ if max_results:
360
+ self.max_results = max_results
361
+
362
+ query = f"cat:{category}"
363
+ return self.run(query)
364
+
365
+ finally:
366
+ # Restore original value
367
+ self.max_results = original_max_results
@@ -0,0 +1,245 @@
1
+ from langchain_community.tools import BraveSearch
2
+ from typing import Dict, List, Any, Optional
3
+ import os
4
+ from langchain_core.language_models import BaseLLM
5
+
6
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
7
+ from local_deep_research import config
8
+
9
+
10
+ class BraveSearchEngine(BaseSearchEngine):
11
+ """Brave search engine implementation with two-phase approach"""
12
+
13
+ def __init__(self,
14
+ max_results: int = 10,
15
+ region: str = "US",
16
+ time_period: str = "y",
17
+ safe_search: bool = True,
18
+ search_language: str = "English",
19
+ api_key: Optional[str] = None,
20
+ language_code_mapping: Optional[Dict[str, str]] = None,
21
+ llm: Optional[BaseLLM] = None,
22
+ include_full_content: bool = False,
23
+ max_filtered_results: Optional[int] = None,
24
+ **kwargs):
25
+ """
26
+ Initialize the Brave search engine.
27
+
28
+ Args:
29
+ max_results: Maximum number of search results
30
+ region: Region code for search results
31
+ time_period: Time period for search results
32
+ safe_search: Whether to enable safe search
33
+ search_language: Language for search results
34
+ api_key: Brave Search API key (can also be set in BRAVE_API_KEY env)
35
+ language_code_mapping: Mapping from language names to codes
36
+ llm: Language model for relevance filtering
37
+ include_full_content: Whether to include full webpage content in results
38
+ max_filtered_results: Maximum number of results to keep after filtering
39
+ **kwargs: Additional parameters (ignored but accepted for compatibility)
40
+ """
41
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
42
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
43
+
44
+ self.max_results = max_results
45
+ self.include_full_content = include_full_content
46
+
47
+ # Set up language code mapping
48
+ if language_code_mapping is None:
49
+ language_code_mapping = {
50
+ "english": "en",
51
+ "spanish": "es",
52
+ "chinese": "zh",
53
+ "hindi": "hi",
54
+ "french": "fr",
55
+ "arabic": "ar",
56
+ "bengali": "bn",
57
+ "portuguese": "pt",
58
+ "russian": "ru",
59
+ }
60
+
61
+ # Get API key
62
+ brave_api_key = api_key or os.getenv("BRAVE_API_KEY")
63
+ if not brave_api_key:
64
+ raise ValueError("BRAVE_API_KEY not found. Please provide api_key or set the BRAVE_API_KEY environment variable.")
65
+
66
+ # Get language code
67
+ language_code = language_code_mapping.get(search_language.lower(), "en")
68
+
69
+ # Convert time period format to Brave's format
70
+ brave_time_period = f"p{time_period}"
71
+
72
+ # Convert safe search to Brave's format
73
+ brave_safe_search = "moderate" if safe_search else "off"
74
+
75
+ # Initialize Brave Search
76
+ self.engine = BraveSearch.from_api_key(
77
+ api_key=brave_api_key,
78
+ search_kwargs={
79
+ "count": min(20, max_results),
80
+ "country": region.upper(),
81
+ "search_lang": language_code,
82
+ "safesearch": brave_safe_search,
83
+ "freshness": brave_time_period,
84
+ }
85
+ )
86
+
87
+ # Set user agent for Brave Search
88
+ os.environ["USER_AGENT"] = "Local Deep Research/1.0"
89
+
90
+ # If full content is requested, initialize FullSearchResults
91
+ if include_full_content:
92
+ # Import FullSearchResults only if needed
93
+ try:
94
+ from local_deep_research.web_search_engines.engines.full_search import FullSearchResults
95
+ self.full_search = FullSearchResults(
96
+ llm=llm,
97
+ web_search=self.engine,
98
+ language=search_language,
99
+ max_results=max_results,
100
+ region=region,
101
+ time=time_period,
102
+ safesearch=brave_safe_search
103
+ )
104
+ except ImportError:
105
+ print("Warning: FullSearchResults not available. Full content retrieval disabled.")
106
+ self.include_full_content = False
107
+
108
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
109
+ """
110
+ Get preview information from Brave Search.
111
+
112
+ Args:
113
+ query: The search query
114
+
115
+ Returns:
116
+ List of preview dictionaries
117
+ """
118
+ print("Getting search results from Brave Search")
119
+
120
+ try:
121
+ # Get search results from Brave Search
122
+ raw_results = self.engine.run(query[:400])
123
+
124
+ # Parse results if they're in string format
125
+ if isinstance(raw_results, str):
126
+ try:
127
+ import json
128
+ raw_results = json.loads(raw_results)
129
+ except json.JSONDecodeError:
130
+ print("Error: Unable to parse BraveSearch response as JSON.")
131
+ return []
132
+
133
+ # Format results as previews
134
+ previews = []
135
+ for i, result in enumerate(raw_results):
136
+ preview = {
137
+ "id": i, # Use index as ID
138
+ "title": result.get("title", ""),
139
+ "link": result.get("link", ""),
140
+ "snippet": result.get("snippet", ""),
141
+ "displayed_link": result.get("link", ""),
142
+ "position": i
143
+ }
144
+
145
+ # Store full Brave result for later
146
+ preview["_full_result"] = result
147
+
148
+ previews.append(preview)
149
+
150
+ # Store the previews for potential full content retrieval
151
+ self._search_results = previews
152
+
153
+ return previews
154
+
155
+ except Exception as e:
156
+ print(f"Error getting Brave Search results: {e}")
157
+ return []
158
+
159
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
160
+ """
161
+ Get full content for the relevant search results.
162
+ If include_full_content is True and FullSearchResults is available,
163
+ retrieves full webpage content for the results.
164
+
165
+ Args:
166
+ relevant_items: List of relevant preview dictionaries
167
+
168
+ Returns:
169
+ List of result dictionaries with full content if requested
170
+ """
171
+ # Check if we should get full content
172
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
173
+ print("Snippet-only mode, skipping full content retrieval")
174
+
175
+ # Return the relevant items with their full Brave information
176
+ results = []
177
+ for item in relevant_items:
178
+ # Use the full result if available, otherwise use the preview
179
+ if "_full_result" in item:
180
+ result = item["_full_result"]
181
+ # Remove temporary field
182
+ if "_full_result" in result:
183
+ del result["_full_result"]
184
+ else:
185
+ result = item
186
+
187
+ results.append(result)
188
+
189
+ return results
190
+
191
+ # If full content retrieval is enabled
192
+ if self.include_full_content and hasattr(self, 'full_search'):
193
+ print("Retrieving full webpage content")
194
+
195
+ try:
196
+ # Extract only the links from relevant items
197
+ links = [item.get("link") for item in relevant_items if item.get("link")]
198
+
199
+ # Use FullSearchResults to get full content
200
+ results_with_content = self.full_search._get_full_content(relevant_items)
201
+
202
+ return results_with_content
203
+
204
+ except Exception as e:
205
+ print(f"Error retrieving full content: {e}")
206
+ # Fall back to returning the items without full content
207
+
208
+ # Return items with their full Brave information
209
+ results = []
210
+ for item in relevant_items:
211
+ # Use the full result if available, otherwise use the preview
212
+ if "_full_result" in item:
213
+ result = item["_full_result"].copy()
214
+ # Remove temporary field
215
+ if "_full_result" in result:
216
+ del result["_full_result"]
217
+ else:
218
+ result = item.copy()
219
+ if "_full_result" in result:
220
+ del result["_full_result"]
221
+
222
+ results.append(result)
223
+
224
+ return results
225
+
226
+ def run(self, query: str) -> List[Dict[str, Any]]:
227
+ """
228
+ Execute a search using Brave Search with the two-phase approach.
229
+
230
+ Args:
231
+ query: The search query
232
+
233
+ Returns:
234
+ List of search results
235
+ """
236
+ print("---Execute a search using Brave Search---")
237
+
238
+ # Use the implementation from the parent class which handles all phases
239
+ results = super().run(query)
240
+
241
+ # Clean up
242
+ if hasattr(self, '_search_results'):
243
+ del self._search_results
244
+
245
+ return results