local-deep-research 0.1.13__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. local_deep_research/config.py +8 -8
  2. local_deep_research/defaults/search_engines.toml +39 -18
  3. local_deep_research/search_system.py +15 -9
  4. local_deep_research/utilties/enums.py +4 -4
  5. local_deep_research/web/app.py +3 -2
  6. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +3 -5
  7. local_deep_research/web_search_engines/engines/search_engine_brave.py +3 -5
  8. local_deep_research/web_search_engines/engines/search_engine_ddg.py +4 -3
  9. local_deep_research/web_search_engines/engines/search_engine_github.py +2 -4
  10. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +2 -4
  11. local_deep_research/web_search_engines/engines/search_engine_guardian.py +323 -78
  12. local_deep_research/web_search_engines/engines/search_engine_local_all.py +3 -5
  13. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +3 -4
  14. local_deep_research/web_search_engines/engines/search_engine_searxng.py +3 -2
  15. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +1128 -0
  16. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +2 -4
  17. local_deep_research/web_search_engines/engines/search_engine_wayback.py +2 -4
  18. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +2 -4
  19. local_deep_research/web_search_engines/search_engine_base.py +12 -4
  20. {local_deep_research-0.1.13.dist-info → local_deep_research-0.1.14.dist-info}/METADATA +1 -1
  21. {local_deep_research-0.1.13.dist-info → local_deep_research-0.1.14.dist-info}/RECORD +25 -25
  22. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +0 -623
  23. {local_deep_research-0.1.13.dist-info → local_deep_research-0.1.14.dist-info}/WHEEL +0 -0
  24. {local_deep_research-0.1.13.dist-info → local_deep_research-0.1.14.dist-info}/entry_points.txt +0 -0
  25. {local_deep_research-0.1.13.dist-info → local_deep_research-0.1.14.dist-info}/licenses/LICENSE +0 -0
  26. {local_deep_research-0.1.13.dist-info → local_deep_research-0.1.14.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,20 @@
1
1
  import requests
2
- from typing import Dict, List, Any, Optional
2
+ import logging
3
+ from typing import Dict, List, Any, Optional, Tuple
3
4
  import os
4
5
  from datetime import datetime, timedelta
5
6
  from langchain_core.language_models import BaseLLM
6
7
 
7
8
  from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
8
9
  from local_deep_research import config
10
+ from local_deep_research.utilties.search_utilities import remove_think_tags
9
11
 
12
+ # Setup logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
10
15
 
11
16
  class GuardianSearchEngine(BaseSearchEngine):
12
- """The Guardian API search engine implementation"""
17
+ """Enhanced Guardian API search engine implementation with LLM query optimization"""
13
18
 
14
19
  def __init__(self,
15
20
  max_results: int = 10,
@@ -18,9 +23,12 @@ class GuardianSearchEngine(BaseSearchEngine):
18
23
  to_date: Optional[str] = None,
19
24
  section: Optional[str] = None,
20
25
  order_by: str = "relevance",
21
- llm: Optional[BaseLLM] = None):
26
+ llm: Optional[BaseLLM] = None,
27
+ max_filtered_results: Optional[int] = None,
28
+ optimize_queries: bool = True,
29
+ adaptive_search: bool = True):
22
30
  """
23
- Initialize The Guardian search engine.
31
+ Initialize The Guardian search engine with enhanced features.
24
32
 
25
33
  Args:
26
34
  max_results: Maximum number of search results
@@ -29,13 +37,16 @@ class GuardianSearchEngine(BaseSearchEngine):
29
37
  to_date: End date for search (YYYY-MM-DD format, default today)
30
38
  section: Filter by section (e.g., "politics", "technology", "sport")
31
39
  order_by: Sort order ("relevance", "newest", "oldest")
32
- llm: Language model for relevance filtering
40
+ llm: Language model for relevance filtering and query optimization
41
+ max_filtered_results: Maximum number of results to keep after filtering
42
+ optimize_queries: Whether to optimize queries using LLM
43
+ adaptive_search: Whether to use adaptive search (adjusting date ranges)
33
44
  """
34
- # Initialize the BaseSearchEngine with the LLM
35
- super().__init__(llm=llm)
36
-
37
- self.max_results = max_results
45
+ # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
46
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
38
47
  self.api_key = api_key or os.getenv("GUARDIAN_API_KEY")
48
+ self.optimize_queries = optimize_queries
49
+ self.adaptive_search = adaptive_search
39
50
 
40
51
  if not self.api_key:
41
52
  raise ValueError("Guardian API key not found. Please provide api_key or set the GUARDIAN_API_KEY environment variable.")
@@ -56,10 +67,203 @@ class GuardianSearchEngine(BaseSearchEngine):
56
67
 
57
68
  self.section = section
58
69
  self.order_by = order_by
70
+ self._original_date_params = {
71
+ "from_date": self.from_date,
72
+ "to_date": self.to_date
73
+ }
59
74
 
60
75
  # API base URL
61
76
  self.api_url = "https://content.guardianapis.com/search"
62
77
 
78
+ def _optimize_query_for_guardian(self, query: str) -> str:
79
+ """
80
+ Optimize a natural language query for Guardian search.
81
+ Uses LLM to transform questions into effective news search queries.
82
+
83
+ Args:
84
+ query: Natural language query
85
+
86
+ Returns:
87
+ Optimized query string for Guardian
88
+ """
89
+ # Handle extremely long queries by truncating first
90
+ if len(query) > 150:
91
+ simple_query = " ".join(query.split()[:10])
92
+ logger.info(f"Query too long ({len(query)} chars), truncating to: {simple_query}")
93
+ query = simple_query
94
+
95
+ if not self.llm or not self.optimize_queries:
96
+ # Return original query if no LLM available or optimization disabled
97
+ return query
98
+
99
+ try:
100
+ # Prompt for query optimization
101
+ prompt = f"""Transform this natural language question into a very short Guardian news search query.
102
+
103
+ Original query: "{query}"
104
+
105
+ CRITICAL RULES:
106
+ 1. ONLY RETURN THE EXACT SEARCH QUERY - NO EXPLANATIONS, NO COMMENTS
107
+ 2. Keep it EXTREMELY BRIEF - MAXIMUM 3-4 words total
108
+ 3. Focus only on the main topic/person/event
109
+ 4. Include proper names when relevant
110
+ 5. Remove ALL unnecessary words
111
+ 6. DO NOT use Boolean operators (no AND/OR)
112
+ 7. DO NOT use quotes
113
+
114
+ EXAMPLE CONVERSIONS:
115
+ ✓ "What's the impact of rising interest rates on UK housing market?" → "UK housing rates"
116
+ ✓ "Latest developments in the Ukraine-Russia peace negotiations" → "Ukraine Russia negotiations"
117
+ ✓ "How are tech companies responding to AI regulation?" → "tech AI regulation"
118
+ ✓ "What is Donald Trump's current political activity?" → "Trump political activity"
119
+
120
+ Return ONLY the extremely brief search query.
121
+ """
122
+
123
+ # Get response from LLM
124
+ response = self.llm.invoke(prompt)
125
+ optimized_query = remove_think_tags(response.content).strip()
126
+
127
+ # Clean up the query - remove any explanations
128
+ lines = optimized_query.split('\n')
129
+ for line in lines:
130
+ line = line.strip()
131
+ if line and not line.lower().startswith(('here', 'i would', 'the best', 'this query')):
132
+ optimized_query = line
133
+ break
134
+
135
+ # Remove any quotes that wrap the entire query
136
+ if optimized_query.startswith('"') and optimized_query.endswith('"') and optimized_query.count('"') == 2:
137
+ optimized_query = optimized_query[1:-1]
138
+
139
+ logger.info(f"Original query: '{query}'")
140
+ logger.info(f"Optimized for Guardian: '{optimized_query}'")
141
+
142
+ return optimized_query
143
+
144
+ except Exception as e:
145
+ logger.error(f"Error optimizing query: {e}")
146
+ return query # Fall back to original query on error
147
+
148
+ def _adapt_dates_for_query_type(self, query: str) -> None:
149
+ """
150
+ Adapt date range based on query type (historical vs current).
151
+
152
+ Args:
153
+ query: The search query
154
+ """
155
+ # Fast path - for very short queries, default to recent news
156
+ if len(query.split()) <= 4:
157
+ logger.info("Short query detected, defaulting to recent news")
158
+ # Default to 60 days for short queries
159
+ recent = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d")
160
+ self.from_date = recent
161
+ self.order_by = "newest"
162
+ return
163
+
164
+ if not self.llm or not self.adaptive_search:
165
+ return
166
+
167
+ try:
168
+ prompt = f"""Is this query asking about HISTORICAL events or CURRENT events?
169
+
170
+ Query: "{query}"
171
+
172
+ ONE WORD ANSWER ONLY:
173
+ - "HISTORICAL" if about past events (older than 1 year)
174
+ - "CURRENT" if about recent events (within past year)
175
+ - "UNCLEAR" if can't determine
176
+
177
+ ONE WORD ONLY:"""
178
+
179
+ response = self.llm.invoke(prompt)
180
+ answer = remove_think_tags(response.content).strip().upper()
181
+
182
+ # Reset to original parameters first
183
+ self.from_date = self._original_date_params["from_date"]
184
+ self.to_date = self._original_date_params["to_date"]
185
+
186
+ if "HISTORICAL" in answer:
187
+ # For historical queries, go back 10 years
188
+ logger.info("Query classified as HISTORICAL - extending search timeframe")
189
+ ten_years_ago = (datetime.now() - timedelta(days=3650)).strftime("%Y-%m-%d")
190
+ self.from_date = ten_years_ago
191
+
192
+ elif "CURRENT" in answer:
193
+ # For current events, focus on recent content
194
+ logger.info("Query classified as CURRENT - focusing on recent content")
195
+ recent = (datetime.now() - timedelta(days=60)).strftime("%Y-%m-%d")
196
+ self.from_date = recent
197
+ self.order_by = "newest" # Prioritize newest for current events
198
+
199
+ except Exception as e:
200
+ logger.error(f"Error adapting dates for query type: {e}")
201
+ # Keep original date parameters on error
202
+
203
+ def _adaptive_search(self, query: str) -> Tuple[List[Dict[str, Any]], str]:
204
+ """
205
+ Perform adaptive search that progressively adjusts parameters based on results.
206
+
207
+ Args:
208
+ query: The search query
209
+
210
+ Returns:
211
+ Tuple of (list of articles, search strategy used)
212
+ """
213
+ # Try with current parameters
214
+ articles = self._get_all_data(query)
215
+ strategy = "initial"
216
+
217
+ # If no results or too few, try different strategies
218
+ if len(articles) < 3 and self.adaptive_search:
219
+ logger.info(f"Initial search found only {len(articles)} results, trying alternative strategies")
220
+
221
+ # Try with expanded date range
222
+ original_from_date = self.from_date
223
+ original_order_by = self.order_by
224
+
225
+ # Strategy 1: Expand to 6 months
226
+ logger.info("Strategy 1: Expanding time range to 6 months")
227
+ six_months_ago = (datetime.now() - timedelta(days=180)).strftime("%Y-%m-%d")
228
+ self.from_date = six_months_ago
229
+
230
+ articles1 = self._get_all_data(query)
231
+ if len(articles1) > len(articles):
232
+ articles = articles1
233
+ strategy = "expanded_6mo"
234
+
235
+ # Strategy 2: Expand to all time and try relevance order
236
+ if len(articles) < 3:
237
+ logger.info("Strategy 2: Expanding to all time with relevance ordering")
238
+ self.from_date = "2000-01-01" # Effectively "all time"
239
+ self.order_by = "relevance"
240
+
241
+ articles2 = self._get_all_data(query)
242
+ if len(articles2) > len(articles):
243
+ articles = articles2
244
+ strategy = "all_time_relevance"
245
+
246
+ # Strategy 3: Try removing section constraints
247
+ if len(articles) < 3 and self.section:
248
+ logger.info("Strategy 3: Removing section constraint")
249
+ original_section = self.section
250
+ self.section = None
251
+
252
+ articles3 = self._get_all_data(query)
253
+ if len(articles3) > len(articles):
254
+ articles = articles3
255
+ strategy = "no_section"
256
+
257
+ # Restore section setting
258
+ self.section = original_section
259
+
260
+ # Restore original settings
261
+ self.from_date = original_from_date
262
+ self.order_by = original_order_by
263
+
264
+ logger.info(f"Adaptive search using strategy '{strategy}' found {len(articles)} results")
265
+ return articles, strategy
266
+
63
267
  def _get_all_data(self, query: str) -> List[Dict[str, Any]]:
64
268
  """
65
269
  Get all article data from The Guardian API in a single call.
@@ -72,14 +276,31 @@ class GuardianSearchEngine(BaseSearchEngine):
72
276
  List of articles with all data
73
277
  """
74
278
  try:
279
+ # Ensure query is not empty
280
+ if not query or query.strip() == "":
281
+ query = "news"
282
+ logger.warning("Empty query provided, using 'news' as default")
283
+
284
+ # Ensure query is not too long for API
285
+ if len(query) > 100:
286
+ logger.warning(f"Query too long for Guardian API ({len(query)} chars), truncating")
287
+ query = query[:100]
288
+
75
289
  # Always request all fields for simplicity
290
+ # Ensure max_results is an integer to avoid comparison errors
291
+ page_size = min(int(self.max_results) if self.max_results is not None else 10, 50)
292
+
293
+ # Log full parameters for debugging
294
+ logger.info(f"Guardian API search query: '{query}'")
295
+ logger.info(f"Guardian API date range: {self.from_date} to {self.to_date}")
296
+
76
297
  params = {
77
298
  "q": query,
78
299
  "api-key": self.api_key,
79
300
  "from-date": self.from_date,
80
301
  "to-date": self.to_date,
81
302
  "order-by": self.order_by,
82
- "page-size": min(self.max_results, 50), # API maximum is 50
303
+ "page-size": page_size, # API maximum is 50
83
304
  "show-fields": "headline,trailText,byline,body,publication",
84
305
  "show-tags": "keyword"
85
306
  }
@@ -88,6 +309,11 @@ class GuardianSearchEngine(BaseSearchEngine):
88
309
  if self.section:
89
310
  params["section"] = self.section
90
311
 
312
+ # Log the complete request parameters (except API key)
313
+ log_params = params.copy()
314
+ log_params["api-key"] = "REDACTED"
315
+ logger.info(f"Guardian API request parameters: {log_params}")
316
+
91
317
  # Execute the API request
92
318
  response = requests.get(self.api_url, params=params)
93
319
  response.raise_for_status()
@@ -96,6 +322,7 @@ class GuardianSearchEngine(BaseSearchEngine):
96
322
 
97
323
  # Extract results from the response
98
324
  articles = data.get("response", {}).get("results", [])
325
+ logger.info(f"Guardian API returned {len(articles)} articles")
99
326
 
100
327
  # Format results to include all data
101
328
  formatted_articles = []
@@ -127,13 +354,12 @@ class GuardianSearchEngine(BaseSearchEngine):
127
354
  return formatted_articles
128
355
 
129
356
  except Exception as e:
130
- print(f"Error getting data from The Guardian API: {e}")
357
+ logger.error(f"Error getting data from The Guardian API: {e}")
131
358
  return []
132
359
 
133
360
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
134
361
  """
135
- Get preview information for Guardian articles.
136
- Actually gets all data but returns only preview fields.
362
+ Get preview information for Guardian articles with enhanced optimization.
137
363
 
138
364
  Args:
139
365
  query: The search query
@@ -141,12 +367,29 @@ class GuardianSearchEngine(BaseSearchEngine):
141
367
  Returns:
142
368
  List of preview dictionaries
143
369
  """
144
- print("Getting articles from The Guardian API")
370
+ logger.info(f"Getting articles from The Guardian API for query: {query}")
145
371
 
146
- # Get all article data
147
- articles = self._get_all_data(query)
372
+ # Step 1: Optimize the query using LLM
373
+ optimized_query = self._optimize_query_for_guardian(query)
374
+
375
+ # Step 2: Adapt date parameters based on query type
376
+ self._adapt_dates_for_query_type(optimized_query)
377
+
378
+ # Step 3: Perform adaptive search
379
+ articles, strategy = self._adaptive_search(optimized_query)
148
380
 
149
- # Store full articles for later use (implementation detail)
381
+ # Store search metadata for debugging
382
+ self._search_metadata = {
383
+ "original_query": query,
384
+ "optimized_query": optimized_query,
385
+ "strategy": strategy,
386
+ "from_date": self.from_date,
387
+ "to_date": self.to_date,
388
+ "section": self.section,
389
+ "order_by": self.order_by
390
+ }
391
+
392
+ # Store full articles for later use
150
393
  self._full_articles = {a["id"]: a for a in articles}
151
394
 
152
395
  # Return only preview fields for each article
@@ -177,7 +420,7 @@ class GuardianSearchEngine(BaseSearchEngine):
177
420
  Returns:
178
421
  List of result dictionaries with full content
179
422
  """
180
- print("Adding full content to relevant Guardian articles")
423
+ logger.info(f"Adding full content to {len(relevant_items)} relevant Guardian articles")
181
424
 
182
425
  # Check if we should add full content
183
426
  if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
@@ -199,7 +442,7 @@ class GuardianSearchEngine(BaseSearchEngine):
199
442
 
200
443
  def run(self, query: str) -> List[Dict[str, Any]]:
201
444
  """
202
- Execute a search using The Guardian API with the two-phase approach.
445
+ Execute a search using The Guardian API with the enhanced approach.
203
446
 
204
447
  Args:
205
448
  query: The search query
@@ -207,75 +450,77 @@ class GuardianSearchEngine(BaseSearchEngine):
207
450
  Returns:
208
451
  List of search results
209
452
  """
210
- print("---Execute a search using The Guardian---")
453
+ logger.info(f"---Execute a search using The Guardian (enhanced)---")
211
454
 
212
- # Use the implementation from the parent class which handles all phases
213
- results = super().run(query)
455
+ # Additional safety check for None query
456
+ if query is None:
457
+ logger.error("None query passed to Guardian search engine")
458
+ query = "news"
214
459
 
215
- # Clean up the cache after use
216
- if hasattr(self, '_full_articles'):
217
- del self._full_articles
460
+ try:
461
+ # Get previews with our enhanced method
462
+ previews = self._get_previews(query)
218
463
 
219
- return results
220
-
221
- def get_article_by_id(self, article_id: str) -> Dict[str, Any]:
222
- """
223
- Get a specific article by its ID.
224
-
225
- Args:
226
- article_id: The Guardian article ID
464
+ # If no results, try one more time with a simplified query
465
+ if not previews:
466
+ simple_query = " ".join([w for w in query.split() if len(w) > 3][:3])
467
+ logger.warning(f"No Guardian articles found, trying simplified query: {simple_query}")
468
+ previews = self._get_previews(simple_query)
469
+
470
+ # If still no results, try with a very generic query as last resort
471
+ if not previews and "trump" in query.lower():
472
+ logger.warning("Trying last resort query: 'Donald Trump'")
473
+ previews = self._get_previews("Donald Trump")
474
+ elif not previews:
475
+ logger.warning("Trying last resort query: 'news'")
476
+ previews = self._get_previews("news")
227
477
 
228
- Returns:
229
- Dictionary with article information
230
- """
231
- try:
232
- # Guardian article API URL
233
- url = f"https://content.guardianapis.com/{article_id}"
234
-
235
- # Always request all fields
236
- response = requests.get(
237
- url,
238
- params={
239
- "api-key": self.api_key,
240
- "show-fields": "headline,trailText,body,byline,publication",
241
- "show-tags": "keyword"
242
- }
243
- )
244
- response.raise_for_status()
478
+ # If still no results after all attempts, return empty list
479
+ if not previews:
480
+ logger.warning(f"No Guardian articles found after multiple attempts")
481
+ return []
245
482
 
246
- data = response.json()
247
- article = data.get("response", {}).get("content", {})
483
+ # Filter for relevance if we have an LLM
484
+ if self.llm and hasattr(self, 'max_filtered_results') and self.max_filtered_results:
485
+ filtered_items = self._filter_for_relevance(previews, query)
486
+ if not filtered_items:
487
+ # Fall back to unfiltered results if everything was filtered out
488
+ logger.warning("All articles filtered out, using unfiltered results")
489
+ filtered_items = previews[:self.max_filtered_results]
490
+ else:
491
+ filtered_items = previews
248
492
 
249
- if not article:
250
- return {}
251
-
252
- fields = article.get("fields", {})
253
-
254
- # Format the article with all fields
255
- result = {
256
- "id": article_id,
257
- "title": fields.get("headline", article.get("webTitle", "")),
258
- "link": article.get("webUrl", ""),
259
- "snippet": fields.get("trailText", ""),
260
- "publication_date": article.get("webPublicationDate", ""),
261
- "section": article.get("sectionName", ""),
262
- "author": fields.get("byline", "")
263
- }
493
+ # Get full content for relevant items
494
+ results = self._get_full_content(filtered_items)
264
495
 
265
- # Only include full content if not in snippet-only mode
266
- if not hasattr(config, 'SEARCH_SNIPPETS_ONLY') or not config.SEARCH_SNIPPETS_ONLY:
267
- result["content"] = fields.get("body", "")
268
- result["full_content"] = fields.get("body", "")
496
+ # Add source information to make it clear these are from The Guardian
497
+ for result in results:
498
+ if "source" not in result:
499
+ result["source"] = "The Guardian"
269
500
 
270
- # Extract tags/keywords
271
- tags = article.get("tags", [])
272
- result["keywords"] = [tag.get("webTitle", "") for tag in tags if tag.get("type") == "keyword"]
501
+ # Clean up the cache after use
502
+ if hasattr(self, '_full_articles'):
503
+ del self._full_articles
504
+
505
+ # Restore original date parameters
506
+ self.from_date = self._original_date_params["from_date"]
507
+ self.to_date = self._original_date_params["to_date"]
273
508
 
274
- return result
509
+ # Log search metadata if available
510
+ if hasattr(self, '_search_metadata'):
511
+ logger.info(f"Search metadata: {self._search_metadata}")
512
+ del self._search_metadata
513
+
514
+ return results
275
515
 
276
516
  except Exception as e:
277
- print(f"Error getting article details: {e}")
278
- return {}
517
+ logger.error(f"Error in Guardian search: {e}")
518
+
519
+ # Restore original date parameters on error
520
+ self.from_date = self._original_date_params["from_date"]
521
+ self.to_date = self._original_date_params["to_date"]
522
+
523
+ return []
279
524
 
280
525
  def search_by_section(self, section: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
281
526
  """
@@ -283,7 +528,7 @@ class GuardianSearchEngine(BaseSearchEngine):
283
528
 
284
529
  Args:
285
530
  section: The Guardian section name (e.g., "politics", "technology")
286
- max_results: Maximum number of search results (defaults to self.max_results)
531
+ max_results: Maximum number of results (defaults to self.max_results)
287
532
 
288
533
  Returns:
289
534
  List of articles in the section
@@ -35,11 +35,9 @@ class LocalAllSearchEngine(BaseSearchEngine):
35
35
  max_filtered_results: Maximum results after filtering
36
36
  **kwargs: Additional parameters passed to LocalSearchEngine instances
37
37
  """
38
- # Initialize the base search engine
39
- super().__init__(llm=llm, max_filtered_results=max_filtered_results)
40
-
41
- self.max_results = max_results
42
-
38
+ # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
39
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
40
+
43
41
  # Find all local collection search engines
44
42
  self.local_engines = {}
45
43
  try:
@@ -44,10 +44,9 @@ class PubMedSearchEngine(BaseSearchEngine):
44
44
  max_filtered_results: Maximum number of results to keep after filtering
45
45
  optimize_queries: Whether to optimize natural language queries for PubMed
46
46
  """
47
- # Initialize the BaseSearchEngine with the LLM and max_filtered_results
48
- super().__init__(llm=llm, max_filtered_results=max_filtered_results)
49
-
50
- self.max_results = max_results
47
+ # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
48
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
49
+ self.max_results=max(self.max_results,25)
51
50
  self.api_key = api_key
52
51
  self.days_limit = days_limit
53
52
  self.get_abstracts = get_abstracts
@@ -51,8 +51,9 @@ class SearXNGSearchEngine(BaseSearchEngine):
51
51
  include_full_content: Whether to include full webpage content in results
52
52
  api_key: Alternative way to provide instance URL (takes precedence over instance_url)
53
53
  """
54
- # Initialize the BaseSearchEngine with the LLM and max_filtered_results
55
- super().__init__(llm=llm, max_filtered_results=max_filtered_results)
54
+
55
+ # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
56
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
56
57
 
57
58
  # Get instance URL from various sources in priority order:
58
59
  # 1. api_key parameter (which is actually the instance URL)