local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,283 @@
1
+ from typing import Dict, List, Any, Optional
2
+ import os
3
+ import requests
4
+ import time
5
+ import random
6
+ import logging
7
+ from requests.exceptions import RequestException
8
+ from urllib.parse import quote_plus
9
+ from langchain_core.language_models import BaseLLM
10
+
11
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
12
+
13
+ # Set up logging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class GooglePSESearchEngine(BaseSearchEngine):
18
+ """Google Programmable Search Engine implementation"""
19
+
20
+ def __init__(self,
21
+ max_results: int = 10,
22
+ region: str = "us",
23
+ safe_search: bool = True,
24
+ search_language: str = "English",
25
+ api_key: Optional[str] = None,
26
+ search_engine_id: Optional[str] = None,
27
+ llm: Optional[BaseLLM] = None,
28
+ include_full_content: bool = False,
29
+ max_filtered_results: Optional[int] = None,
30
+ max_retries: int = 3,
31
+ retry_delay: float = 2.0,
32
+ **kwargs):
33
+ """
34
+ Initialize the Google Programmable Search Engine.
35
+
36
+ Args:
37
+ max_results: Maximum number of search results
38
+ region: Region code for search results
39
+ safe_search: Whether to enable safe search
40
+ search_language: Language for search results
41
+ api_key: Google API key (can also be set in GOOGLE_PSE_API_KEY env)
42
+ search_engine_id: Google CSE ID (can also be set in GOOGLE_PSE_ENGINE_ID env)
43
+ llm: Language model for relevance filtering
44
+ include_full_content: Whether to include full webpage content in results
45
+ max_filtered_results: Maximum number of results to keep after filtering
46
+ max_retries: Maximum number of retry attempts for API requests
47
+ retry_delay: Base delay in seconds between retry attempts
48
+ **kwargs: Additional parameters (ignored but accepted for compatibility)
49
+ """
50
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
51
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
52
+
53
+ self.max_results = max_results
54
+ self.include_full_content = include_full_content
55
+
56
+ # Retry configuration
57
+ self.max_retries = max_retries
58
+ self.retry_delay = retry_delay
59
+
60
+ # Rate limiting - keep track of last request time
61
+ self.last_request_time = 0
62
+ self.min_request_interval = 0.5 # Minimum time between requests in seconds
63
+
64
+ # Language code mapping
65
+ language_code_mapping = {
66
+ "english": "en",
67
+ "spanish": "es",
68
+ "french": "fr",
69
+ "german": "de",
70
+ "italian": "it",
71
+ "japanese": "ja",
72
+ "korean": "ko",
73
+ "portuguese": "pt",
74
+ "russian": "ru",
75
+ "chinese": "zh-CN"
76
+ }
77
+
78
+ # Get language code
79
+ search_language = search_language.lower()
80
+ self.language = language_code_mapping.get(search_language, "en")
81
+
82
+ # Safe search setting
83
+ self.safe = "active" if safe_search else "off"
84
+
85
+ # Region/Country setting
86
+ self.region = region
87
+
88
+ # API key and Search Engine ID
89
+ self.api_key = api_key or os.getenv("GOOGLE_PSE_API_KEY")
90
+ self.search_engine_id = search_engine_id or os.getenv("GOOGLE_PSE_ENGINE_ID")
91
+
92
+ if not self.api_key:
93
+ raise ValueError("Google API key is required. Set it in the GOOGLE_PSE_API_KEY environment variable.")
94
+ if not self.search_engine_id:
95
+ raise ValueError("Google Search Engine ID is required. Set it in the GOOGLE_PSE_ENGINE_ID environment variable.")
96
+
97
+ # Validate connection and credentials
98
+ self._validate_connection()
99
+
100
+ def _validate_connection(self):
101
+ """Test the connection to ensure API key and Search Engine ID are valid"""
102
+ try:
103
+ # Make a minimal test query
104
+ response = self._make_request("test")
105
+
106
+ # Check if we got a valid response
107
+ if response.get("error"):
108
+ error_msg = response["error"].get("message", "Unknown error")
109
+ raise ValueError(f"Google PSE API error: {error_msg}")
110
+
111
+ # If we get here, the connection is valid
112
+ logger.info("Google PSE connection validated successfully")
113
+ return True
114
+
115
+ except Exception as e:
116
+ # Log the error and re-raise
117
+ logger.error(f"Error validating Google PSE connection: {str(e)}")
118
+ raise
119
+
120
+ def _respect_rate_limit(self):
121
+ """Ensure we don't exceed rate limits by adding appropriate delay between requests"""
122
+ current_time = time.time()
123
+ elapsed = current_time - self.last_request_time
124
+
125
+ # If we've made a request recently, wait until the minimum interval has passed
126
+ if elapsed < self.min_request_interval:
127
+ sleep_time = self.min_request_interval - elapsed
128
+ logger.debug(f"Rate limiting: sleeping for {sleep_time:.2f}s")
129
+ time.sleep(sleep_time)
130
+
131
+ # Update the last request time
132
+ self.last_request_time = time.time()
133
+
134
+ def _make_request(self, query: str, start_index: int = 1) -> Dict:
135
+ """
136
+ Make a request to the Google PSE API with retry logic and rate limiting
137
+
138
+ Args:
139
+ query: Search query string
140
+ start_index: Starting index for pagination
141
+
142
+ Returns:
143
+ JSON response from the API
144
+
145
+ Raises:
146
+ RequestException: If all retry attempts fail
147
+ """
148
+ # Base URL for the API
149
+ url = "https://www.googleapis.com/customsearch/v1"
150
+
151
+ # Parameters for the request
152
+ params = {
153
+ "key": self.api_key,
154
+ "cx": self.search_engine_id,
155
+ "q": query,
156
+ "num": min(10, self.max_results), # Max 10 per request
157
+ "start": start_index,
158
+ "safe": self.safe,
159
+ "lr": f"lang_{self.language}",
160
+ "gl": self.region
161
+ }
162
+
163
+ # Implement retry logic with exponential backoff
164
+ attempt = 0
165
+ last_exception = None
166
+
167
+ while attempt < self.max_retries:
168
+ try:
169
+ # Respect rate limits
170
+ self._respect_rate_limit()
171
+
172
+ # Add jitter to retries after the first attempt
173
+ if attempt > 0:
174
+ jitter = random.uniform(0.5, 1.5)
175
+ sleep_time = self.retry_delay * (2 ** (attempt - 1)) * jitter
176
+ logger.info(f"Retry attempt {attempt+1}/{self.max_retries} for query '{query}'. Waiting {sleep_time:.2f}s")
177
+ time.sleep(sleep_time)
178
+
179
+ # Make the request
180
+ logger.debug(f"Making request to Google PSE API: {query} (start_index={start_index})")
181
+ response = requests.get(url, params=params, timeout=10)
182
+
183
+ # Check for HTTP errors
184
+ response.raise_for_status()
185
+
186
+ # Return the JSON response
187
+ return response.json()
188
+
189
+ except RequestException as e:
190
+ logger.warning(f"Request error on attempt {attempt+1}/{self.max_retries}: {str(e)}")
191
+ last_exception = e
192
+ except Exception as e:
193
+ logger.warning(f"Error on attempt {attempt+1}/{self.max_retries}: {str(e)}")
194
+ last_exception = e
195
+
196
+ attempt += 1
197
+
198
+ # If we get here, all retries failed
199
+ error_msg = f"Failed to get response from Google PSE API after {self.max_retries} attempts"
200
+ logger.error(error_msg)
201
+
202
+ if last_exception:
203
+ raise RequestException(f"{error_msg}: {str(last_exception)}")
204
+ else:
205
+ raise RequestException(error_msg)
206
+
207
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
208
+ """Get search result previews/snippets"""
209
+ results = []
210
+
211
+ # Google PSE API returns a maximum of 10 results per request
212
+ # We may need to make multiple requests to get the desired number
213
+ start_index = 1
214
+ total_results = 0
215
+
216
+ while total_results < self.max_results:
217
+ try:
218
+ response = self._make_request(query, start_index)
219
+
220
+ # Break if no items
221
+ if "items" not in response:
222
+ break
223
+
224
+ items = response.get("items", [])
225
+
226
+ # Process each result
227
+ for item in items:
228
+ title = item.get("title", "")
229
+ snippet = item.get("snippet", "")
230
+ url = item.get("link", "")
231
+
232
+ # Skip results without URL
233
+ if not url:
234
+ continue
235
+
236
+ results.append({
237
+ "title": title,
238
+ "snippet": snippet,
239
+ "url": url,
240
+ "source": "Google Programmable Search"
241
+ })
242
+
243
+ total_results += 1
244
+ if total_results >= self.max_results:
245
+ break
246
+
247
+ # Check if there are more results
248
+ if not items or total_results >= self.max_results:
249
+ break
250
+
251
+ # Update start index for next request
252
+ start_index += len(items)
253
+
254
+ # Add a small delay between multiple requests to be respectful of the API
255
+ if total_results < self.max_results:
256
+ time.sleep(self.min_request_interval)
257
+
258
+ except Exception as e:
259
+ logger.error(f"Error getting search results: {str(e)}")
260
+ break
261
+
262
+ logger.info(f"Retrieved {len(results)} search results for query: '{query}'")
263
+ return results
264
+
265
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
266
+ """Get full content for search results"""
267
+ # Use the BaseSearchEngine implementation
268
+ return super()._get_full_content(relevant_items)
269
+
270
+ def run(self, query: str) -> List[Dict[str, Any]]:
271
+ """Run the search engine to get results for a query"""
272
+ # Get search result previews/snippets
273
+ search_results = self._get_previews(query)
274
+
275
+ # Filter for relevance if we have an LLM and max_filtered_results
276
+ if self.llm and self.max_filtered_results:
277
+ search_results = self._filter_for_relevance(query, search_results)
278
+
279
+ # Get full content if needed
280
+ if self.include_full_content:
281
+ search_results = self._get_full_content(search_results)
282
+
283
+ return search_results
@@ -0,0 +1,337 @@
1
+ import requests
2
+ from typing import Dict, List, Any, Optional
3
+ import os
4
+ from datetime import datetime, timedelta
5
+ from langchain_core.language_models import BaseLLM
6
+
7
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
8
+ from local_deep_research import config
9
+
10
+
11
+ class GuardianSearchEngine(BaseSearchEngine):
12
+ """The Guardian API search engine implementation"""
13
+
14
+ def __init__(self,
15
+ max_results: int = 10,
16
+ api_key: Optional[str] = None,
17
+ from_date: Optional[str] = None,
18
+ to_date: Optional[str] = None,
19
+ section: Optional[str] = None,
20
+ order_by: str = "relevance",
21
+ llm: Optional[BaseLLM] = None):
22
+ """
23
+ Initialize The Guardian search engine.
24
+
25
+ Args:
26
+ max_results: Maximum number of search results
27
+ api_key: The Guardian API key (can also be set in GUARDIAN_API_KEY env)
28
+ from_date: Start date for search (YYYY-MM-DD format, default 1 month ago)
29
+ to_date: End date for search (YYYY-MM-DD format, default today)
30
+ section: Filter by section (e.g., "politics", "technology", "sport")
31
+ order_by: Sort order ("relevance", "newest", "oldest")
32
+ llm: Language model for relevance filtering
33
+ """
34
+ # Initialize the BaseSearchEngine with the LLM
35
+ super().__init__(llm=llm)
36
+
37
+ self.max_results = max_results
38
+ self.api_key = api_key or os.getenv("GUARDIAN_API_KEY")
39
+
40
+ if not self.api_key:
41
+ raise ValueError("Guardian API key not found. Please provide api_key or set the GUARDIAN_API_KEY environment variable.")
42
+
43
+ # Set date ranges if not provided
44
+ if not from_date:
45
+ # Default to one month ago
46
+ one_month_ago = datetime.now() - timedelta(days=30)
47
+ self.from_date = one_month_ago.strftime("%Y-%m-%d")
48
+ else:
49
+ self.from_date = from_date
50
+
51
+ if not to_date:
52
+ # Default to today
53
+ self.to_date = datetime.now().strftime("%Y-%m-%d")
54
+ else:
55
+ self.to_date = to_date
56
+
57
+ self.section = section
58
+ self.order_by = order_by
59
+
60
+ # API base URL
61
+ self.api_url = "https://content.guardianapis.com/search"
62
+
63
+ def _get_all_data(self, query: str) -> List[Dict[str, Any]]:
64
+ """
65
+ Get all article data from The Guardian API in a single call.
66
+ Always requests all fields for simplicity.
67
+
68
+ Args:
69
+ query: The search query
70
+
71
+ Returns:
72
+ List of articles with all data
73
+ """
74
+ try:
75
+ # Always request all fields for simplicity
76
+ params = {
77
+ "q": query,
78
+ "api-key": self.api_key,
79
+ "from-date": self.from_date,
80
+ "to-date": self.to_date,
81
+ "order-by": self.order_by,
82
+ "page-size": min(self.max_results, 50), # API maximum is 50
83
+ "show-fields": "headline,trailText,byline,body,publication",
84
+ "show-tags": "keyword"
85
+ }
86
+
87
+ # Add section filter if specified
88
+ if self.section:
89
+ params["section"] = self.section
90
+
91
+ # Execute the API request
92
+ response = requests.get(self.api_url, params=params)
93
+ response.raise_for_status()
94
+
95
+ data = response.json()
96
+
97
+ # Extract results from the response
98
+ articles = data.get("response", {}).get("results", [])
99
+
100
+ # Format results to include all data
101
+ formatted_articles = []
102
+ for i, article in enumerate(articles):
103
+ if i >= self.max_results:
104
+ break
105
+
106
+ fields = article.get("fields", {})
107
+
108
+ # Format the article with all fields
109
+ result = {
110
+ "id": article.get("id", ""),
111
+ "title": fields.get("headline", article.get("webTitle", "")),
112
+ "link": article.get("webUrl", ""),
113
+ "snippet": fields.get("trailText", ""),
114
+ "publication_date": article.get("webPublicationDate", ""),
115
+ "section": article.get("sectionName", ""),
116
+ "author": fields.get("byline", ""),
117
+ "content": fields.get("body", ""),
118
+ "full_content": fields.get("body", "")
119
+ }
120
+
121
+ # Extract tags/keywords
122
+ tags = article.get("tags", [])
123
+ result["keywords"] = [tag.get("webTitle", "") for tag in tags if tag.get("type") == "keyword"]
124
+
125
+ formatted_articles.append(result)
126
+
127
+ return formatted_articles
128
+
129
+ except Exception as e:
130
+ print(f"Error getting data from The Guardian API: {e}")
131
+ return []
132
+
133
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
134
+ """
135
+ Get preview information for Guardian articles.
136
+ Actually gets all data but returns only preview fields.
137
+
138
+ Args:
139
+ query: The search query
140
+
141
+ Returns:
142
+ List of preview dictionaries
143
+ """
144
+ print("Getting articles from The Guardian API")
145
+
146
+ # Get all article data
147
+ articles = self._get_all_data(query)
148
+
149
+ # Store full articles for later use (implementation detail)
150
+ self._full_articles = {a["id"]: a for a in articles}
151
+
152
+ # Return only preview fields for each article
153
+ previews = []
154
+ for article in articles:
155
+ preview = {
156
+ "id": article["id"],
157
+ "title": article["title"],
158
+ "link": article["link"],
159
+ "snippet": article["snippet"],
160
+ "publication_date": article["publication_date"],
161
+ "section": article["section"],
162
+ "author": article["author"],
163
+ "keywords": article.get("keywords", [])
164
+ }
165
+ previews.append(preview)
166
+
167
+ return previews
168
+
169
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
170
+ """
171
+ Get full content for the relevant Guardian articles.
172
+ Restores full content from the cached data.
173
+
174
+ Args:
175
+ relevant_items: List of relevant preview dictionaries
176
+
177
+ Returns:
178
+ List of result dictionaries with full content
179
+ """
180
+ print("Adding full content to relevant Guardian articles")
181
+
182
+ # Check if we should add full content
183
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
184
+ return relevant_items
185
+
186
+ # Get full articles for relevant items
187
+ results = []
188
+ for item in relevant_items:
189
+ article_id = item.get("id", "")
190
+
191
+ # Get the full article from our cache
192
+ if hasattr(self, '_full_articles') and article_id in self._full_articles:
193
+ results.append(self._full_articles[article_id])
194
+ else:
195
+ # If not found (shouldn't happen), just use the preview
196
+ results.append(item)
197
+
198
+ return results
199
+
200
+ def run(self, query: str) -> List[Dict[str, Any]]:
201
+ """
202
+ Execute a search using The Guardian API with the two-phase approach.
203
+
204
+ Args:
205
+ query: The search query
206
+
207
+ Returns:
208
+ List of search results
209
+ """
210
+ print("---Execute a search using The Guardian---")
211
+
212
+ # Use the implementation from the parent class which handles all phases
213
+ results = super().run(query)
214
+
215
+ # Clean up the cache after use
216
+ if hasattr(self, '_full_articles'):
217
+ del self._full_articles
218
+
219
+ return results
220
+
221
+ def get_article_by_id(self, article_id: str) -> Dict[str, Any]:
222
+ """
223
+ Get a specific article by its ID.
224
+
225
+ Args:
226
+ article_id: The Guardian article ID
227
+
228
+ Returns:
229
+ Dictionary with article information
230
+ """
231
+ try:
232
+ # Guardian article API URL
233
+ url = f"https://content.guardianapis.com/{article_id}"
234
+
235
+ # Always request all fields
236
+ response = requests.get(
237
+ url,
238
+ params={
239
+ "api-key": self.api_key,
240
+ "show-fields": "headline,trailText,body,byline,publication",
241
+ "show-tags": "keyword"
242
+ }
243
+ )
244
+ response.raise_for_status()
245
+
246
+ data = response.json()
247
+ article = data.get("response", {}).get("content", {})
248
+
249
+ if not article:
250
+ return {}
251
+
252
+ fields = article.get("fields", {})
253
+
254
+ # Format the article with all fields
255
+ result = {
256
+ "id": article_id,
257
+ "title": fields.get("headline", article.get("webTitle", "")),
258
+ "link": article.get("webUrl", ""),
259
+ "snippet": fields.get("trailText", ""),
260
+ "publication_date": article.get("webPublicationDate", ""),
261
+ "section": article.get("sectionName", ""),
262
+ "author": fields.get("byline", "")
263
+ }
264
+
265
+ # Only include full content if not in snippet-only mode
266
+ if not hasattr(config, 'SEARCH_SNIPPETS_ONLY') or not config.SEARCH_SNIPPETS_ONLY:
267
+ result["content"] = fields.get("body", "")
268
+ result["full_content"] = fields.get("body", "")
269
+
270
+ # Extract tags/keywords
271
+ tags = article.get("tags", [])
272
+ result["keywords"] = [tag.get("webTitle", "") for tag in tags if tag.get("type") == "keyword"]
273
+
274
+ return result
275
+
276
+ except Exception as e:
277
+ print(f"Error getting article details: {e}")
278
+ return {}
279
+
280
+ def search_by_section(self, section: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
281
+ """
282
+ Search for articles in a specific section.
283
+
284
+ Args:
285
+ section: The Guardian section name (e.g., "politics", "technology")
286
+ max_results: Maximum number of search results (defaults to self.max_results)
287
+
288
+ Returns:
289
+ List of articles in the section
290
+ """
291
+ original_section = self.section
292
+ original_max_results = self.max_results
293
+
294
+ try:
295
+ # Set section and max_results for this search
296
+ self.section = section
297
+ if max_results:
298
+ self.max_results = max_results
299
+
300
+ # Use empty query to get all articles in the section
301
+ return self.run("")
302
+
303
+ finally:
304
+ # Restore original values
305
+ self.section = original_section
306
+ self.max_results = original_max_results
307
+
308
+ def get_recent_articles(self, days: int = 7, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
309
+ """
310
+ Get recent articles from The Guardian.
311
+
312
+ Args:
313
+ days: Number of days to look back
314
+ max_results: Maximum number of results (defaults to self.max_results)
315
+
316
+ Returns:
317
+ List of recent articles
318
+ """
319
+ original_from_date = self.from_date
320
+ original_order_by = self.order_by
321
+ original_max_results = self.max_results
322
+
323
+ try:
324
+ # Set parameters for this search
325
+ self.from_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
326
+ self.order_by = "newest"
327
+ if max_results:
328
+ self.max_results = max_results
329
+
330
+ # Use empty query to get all recent articles
331
+ return self.run("")
332
+
333
+ finally:
334
+ # Restore original values
335
+ self.from_date = original_from_date
336
+ self.order_by = original_order_by
337
+ self.max_results = original_max_results