local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +96 -84
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +72 -44
  41. local_deep_research/search_system.py +147 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1592 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +211 -159
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.0.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,38 +1,41 @@
1
- import requests
2
1
  import logging
2
+ import re
3
+ import time
3
4
  import xml.etree.ElementTree as ET
4
- from typing import Dict, List, Any, Optional, Tuple
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import requests
5
8
  from langchain_core.language_models import BaseLLM
6
- import time
7
- import re
8
- from datetime import datetime
9
9
 
10
- from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
11
- from local_deep_research import config
10
+ from ...config import search_config
11
+ from ..search_engine_base import BaseSearchEngine
12
12
 
13
13
  # Setup logging
14
14
  logging.basicConfig(level=logging.INFO)
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
+
17
18
  class PubMedSearchEngine(BaseSearchEngine):
18
19
  """
19
20
  PubMed search engine implementation with two-phase approach and adaptive search.
20
21
  Provides efficient access to biomedical literature while minimizing API usage.
21
22
  """
22
-
23
- def __init__(self,
24
- max_results: int = 10,
25
- api_key: Optional[str] = None,
26
- days_limit: Optional[int] = None,
27
- get_abstracts: bool = True,
28
- get_full_text: bool = False,
29
- full_text_limit: int = 3,
30
- llm: Optional[BaseLLM] = None,
31
- max_filtered_results: Optional[int] = None,
32
- optimize_queries: bool = True):
23
+
24
+ def __init__(
25
+ self,
26
+ max_results: int = 10,
27
+ api_key: Optional[str] = None,
28
+ days_limit: Optional[int] = None,
29
+ get_abstracts: bool = True,
30
+ get_full_text: bool = False,
31
+ full_text_limit: int = 3,
32
+ llm: Optional[BaseLLM] = None,
33
+ max_filtered_results: Optional[int] = None,
34
+ optimize_queries: bool = True,
35
+ ):
33
36
  """
34
37
  Initialize the PubMed search engine.
35
-
38
+
36
39
  Args:
37
40
  max_results: Maximum number of search results
38
41
  api_key: NCBI API key for higher rate limits (optional)
@@ -45,32 +48,34 @@ class PubMedSearchEngine(BaseSearchEngine):
45
48
  optimize_queries: Whether to optimize natural language queries for PubMed
46
49
  """
47
50
  # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
48
- super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
49
- self.max_results=max(self.max_results,25)
51
+ super().__init__(
52
+ llm=llm, max_filtered_results=max_filtered_results, max_results=max_results
53
+ )
54
+ self.max_results = max(self.max_results, 25)
50
55
  self.api_key = api_key
51
56
  self.days_limit = days_limit
52
57
  self.get_abstracts = get_abstracts
53
58
  self.get_full_text = get_full_text
54
59
  self.full_text_limit = full_text_limit
55
60
  self.optimize_queries = optimize_queries
56
-
61
+
57
62
  # Base API URLs
58
63
  self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
59
64
  self.search_url = f"{self.base_url}/esearch.fcgi"
60
65
  self.summary_url = f"{self.base_url}/esummary.fcgi"
61
66
  self.fetch_url = f"{self.base_url}/efetch.fcgi"
62
67
  self.link_url = f"{self.base_url}/elink.fcgi"
63
-
68
+
64
69
  # PMC base URL for full text
65
70
  self.pmc_url = "https://www.ncbi.nlm.nih.gov/pmc/articles/"
66
-
71
+
67
72
  def _get_result_count(self, query: str) -> int:
68
73
  """
69
74
  Get the total number of results for a query without retrieving the results themselves.
70
-
75
+
71
76
  Args:
72
77
  query: The search query
73
-
78
+
74
79
  Returns:
75
80
  Total number of matching results
76
81
  """
@@ -80,69 +85,70 @@ class PubMedSearchEngine(BaseSearchEngine):
80
85
  "db": "pubmed",
81
86
  "term": query,
82
87
  "retmode": "json",
83
- "retmax": 0 # Don't need actual results, just the count
88
+ "retmax": 0, # Don't need actual results, just the count
84
89
  }
85
-
90
+
86
91
  # Add API key if available
87
92
  if self.api_key:
88
93
  params["api_key"] = self.api_key
89
-
94
+
90
95
  # Execute search request
91
96
  response = requests.get(self.search_url, params=params)
92
97
  response.raise_for_status()
93
-
98
+
94
99
  # Parse response
95
100
  data = response.json()
96
101
  count = int(data["esearchresult"]["count"])
97
-
98
- logger.info(f"Query '{query}' has {count} total results in PubMed")
102
+
103
+ logger.info("Query '%s' has %s total results in PubMed", query, count)
99
104
  return count
100
-
105
+
101
106
  except Exception as e:
102
107
  logger.error(f"Error getting result count: {e}")
103
108
  return 0
104
-
109
+
105
110
  def _extract_core_terms(self, query: str) -> str:
106
111
  """
107
112
  Extract core terms from a complex query for volume estimation.
108
-
113
+
109
114
  Args:
110
115
  query: PubMed query string
111
-
116
+
112
117
  Returns:
113
118
  Simplified query with core terms
114
119
  """
115
120
  # Remove field specifications and operators
116
- simplified = re.sub(r'\[\w+\]', '', query) # Remove [Field] tags
117
- simplified = re.sub(r'\b(AND|OR|NOT)\b', '', simplified) # Remove operators
118
-
121
+ simplified = re.sub(r"\[\w+\]", "", query) # Remove [Field] tags
122
+ simplified = re.sub(r"\b(AND|OR|NOT)\b", "", simplified) # Remove operators
123
+
119
124
  # Remove quotes and parentheses
120
- simplified = simplified.replace('"', '').replace('(', '').replace(')', '')
121
-
125
+ simplified = simplified.replace('"', "").replace("(", "").replace(")", "")
126
+
122
127
  # Split by whitespace and join terms with 4+ chars (likely meaningful)
123
128
  terms = [term for term in simplified.split() if len(term) >= 4]
124
-
129
+
125
130
  # Join with AND to create a basic search
126
131
  return " ".join(terms[:5]) # Limit to top 5 terms
127
-
132
+
128
133
  def _expand_time_window(self, time_filter: str) -> str:
129
134
  """
130
135
  Expand a time window to get more results.
131
-
136
+
132
137
  Args:
133
138
  time_filter: Current time filter
134
-
139
+
135
140
  Returns:
136
141
  Expanded time filter
137
142
  """
138
143
  # Parse current time window
139
144
  import re
145
+
140
146
  match = re.match(r'"last (\d+) (\w+)"[pdat]', time_filter)
141
147
  if not match:
142
148
  return '"last 10 years"[pdat]'
143
-
149
+
144
150
  amount, unit = int(match.group(1)), match.group(2)
145
-
151
+
146
152
  # Expand based on current unit
147
153
  if unit == "months" or unit == "month":
148
154
  if amount < 6:
@@ -158,24 +164,24 @@ class PubMedSearchEngine(BaseSearchEngine):
158
164
  return '"last 5 years"[pdat]'
159
165
  else:
160
166
  return '"last 10 years"[pdat]'
161
-
167
+
162
168
  return '"last 10 years"[pdat]'
163
-
169
+
164
170
  def _optimize_query_for_pubmed(self, query: str) -> str:
165
171
  """
166
172
  Optimize a natural language query for PubMed search.
167
173
  Uses LLM to transform questions into effective keyword-based queries.
168
-
174
+
169
175
  Args:
170
176
  query: Natural language query
171
-
177
+
172
178
  Returns:
173
179
  Optimized query string for PubMed
174
180
  """
175
181
  if not self.llm or not self.optimize_queries:
176
182
  # Return original query if no LLM available or optimization disabled
177
183
  return query
178
-
184
+
179
185
  try:
180
186
  # Prompt for query optimization
181
187
  prompt = f"""Transform this natural language question into an optimized PubMed search query.
@@ -200,138 +206,194 @@ EXAMPLE QUERIES:
200
206
 
201
207
  Return ONLY the search query without any explanations.
202
208
  """
203
-
209
+
204
210
  # Get response from LLM
205
211
  response = self.llm.invoke(prompt)
206
212
  raw_response = response.content.strip()
207
-
213
+
208
214
  # Clean up the query - extract only the actual query and remove any explanations
209
215
  # First check if there are multiple lines and take the first non-empty line
210
- lines = raw_response.split('\n')
216
+ lines = raw_response.split("\n")
211
217
  cleaned_lines = [line.strip() for line in lines if line.strip()]
212
-
218
+
213
219
  if cleaned_lines:
214
220
  optimized_query = cleaned_lines[0]
215
-
221
+
216
222
  # Remove any quotes that wrap the entire query
217
223
  if optimized_query.startswith('"') and optimized_query.endswith('"'):
218
224
  optimized_query = optimized_query[1:-1]
219
-
225
+
220
226
  # Remove any explanation phrases that might be at the beginning
221
- explanation_starters = ["here is", "here's", "this query", "the following"]
227
+ explanation_starters = [
228
+ "here is",
229
+ "here's",
230
+ "this query",
231
+ "the following",
232
+ ]
222
233
  for starter in explanation_starters:
223
234
  if optimized_query.lower().startswith(starter):
224
235
  # Find the actual query part - typically after a colon
225
- colon_pos = optimized_query.find(':')
236
+ colon_pos = optimized_query.find(":")
226
237
  if colon_pos > 0:
227
- optimized_query = optimized_query[colon_pos + 1:].strip()
228
-
238
+ optimized_query = optimized_query[colon_pos + 1 :].strip()
239
+
229
240
  # Check if the query still seems to contain explanations
230
- if len(optimized_query) > 200 or "this query will" in optimized_query.lower():
241
+ if (
242
+ len(optimized_query) > 200
243
+ or "this query will" in optimized_query.lower()
244
+ ):
231
245
  # It's probably still an explanation - try to extract just the query part
232
246
  # Look for common patterns in the explanation like parentheses
233
- pattern = r'\([^)]+\)\s+AND\s+'
247
+ pattern = r"\([^)]+\)\s+AND\s+"
234
248
  import re
249
+
235
250
  matches = re.findall(pattern, optimized_query)
236
251
  if matches:
237
252
  # Extract just the query syntax parts
238
253
  query_parts = []
239
- for part in re.split(r'\.\s+', optimized_query):
240
- if '(' in part and ')' in part and ('AND' in part or 'OR' in part):
254
+ for part in re.split(r"\.\s+", optimized_query):
255
+ if (
256
+ "(" in part
257
+ and ")" in part
258
+ and ("AND" in part or "OR" in part)
259
+ ):
241
260
  query_parts.append(part)
242
261
  if query_parts:
243
- optimized_query = ' '.join(query_parts)
262
+ optimized_query = " ".join(query_parts)
244
263
  else:
245
264
  # Fall back to original query if cleaning fails
246
265
  logger.warning("Failed to extract a clean query from LLM response")
247
266
  optimized_query = query
248
-
267
+
249
268
  # Final safety check - if query looks too much like an explanation, use original
250
269
  if len(optimized_query.split()) > 30:
251
270
  logger.warning("Query too verbose, falling back to simpler form")
252
271
  # Create a simple query from the original
253
- words = [w for w in query.split() if len(w) > 3 and w.lower() not in ('what', 'are', 'the', 'and', 'for', 'with', 'from', 'have', 'been', 'recent')]
254
- optimized_query = ' AND '.join(words[:3])
255
-
256
- # Safety check for invalid or overly complex MeSH terms
272
+ words = [
273
+ w
274
+ for w in query.split()
275
+ if len(w) > 3
276
+ and w.lower()
277
+ not in (
278
+ "what",
279
+ "are",
280
+ "the",
281
+ "and",
282
+ "for",
283
+ "with",
284
+ "from",
285
+ "have",
286
+ "been",
287
+ "recent",
288
+ )
289
+ ]
290
+ optimized_query = " AND ".join(words[:3])
291
+
292
+ # Safety check for invalid or overly complex MeSH terms
257
293
  # This helps prevent errors with non-existent or complex MeSH terms
258
294
  import re
295
+
259
296
  mesh_terms = re.findall(r'"[^"]+"[Mesh]', optimized_query)
260
- known_valid_mesh = ["Vaccines", "COVID-19", "Influenza", "Infectious Disease Medicine",
261
- "Communicable Diseases", "RNA, Messenger", "Vaccination",
262
- "Immunization"]
263
-
297
+ known_valid_mesh = [
298
+ "Vaccines",
299
+ "COVID-19",
300
+ "Influenza",
301
+ "Infectious Disease Medicine",
302
+ "Communicable Diseases",
303
+ "RNA, Messenger",
304
+ "Vaccination",
305
+ "Immunization",
306
+ ]
307
+
264
308
  # Replace potentially problematic MeSH terms with Title/Abstract searches
265
309
  for term in mesh_terms:
266
- term_name = term.split('"')[1] # Extract term name without quotes and [Mesh]
310
+ term_name = term.split('"')[
311
+ 1
312
+ ] # Extract term name without quotes and [Mesh]
267
313
  if not any(valid in term_name for valid in known_valid_mesh):
268
314
  # Replace with Title/Abstract search
269
315
  replacement = f"{term_name.lower()}[Title/Abstract]"
270
316
  optimized_query = optimized_query.replace(term, replacement)
271
-
317
+
272
318
  # Simplify the query if still no results are found
273
319
  self._simplify_query_cache = optimized_query
274
-
320
+
275
321
  # Log original and optimized queries
276
- logger.info(f"Original query: '{query}'")
322
+ logger.info("Original query: '%s'", query)
277
323
  logger.info(f"Optimized for PubMed: '{optimized_query}'")
278
-
324
+
279
325
  return optimized_query
280
-
326
+
281
327
  except Exception as e:
282
328
  logger.error(f"Error optimizing query: {e}")
283
329
  return query # Fall back to original query on error
284
-
330
+
285
331
  def _simplify_query(self, query: str) -> str:
286
332
  """
287
333
  Simplify a PubMed query that returned no results.
288
334
  Progressively removes elements to get a more basic query.
289
-
335
+
290
336
  Args:
291
337
  query: The original query that returned no results
292
-
338
+
293
339
  Returns:
294
340
  Simplified query
295
341
  """
296
342
  logger.info(f"Simplifying query: {query}")
297
-
343
+
298
344
  # Attempt different simplification strategies
299
-
345
+
300
346
  # 1. Remove any MeSH terms and replace with Title/Abstract
301
347
  import re
302
- simplified = re.sub(r'"[^"]+"[Mesh]', lambda m: m.group(0).split('"')[1].lower() + "[Title/Abstract]", query)
303
-
348
+
349
+ simplified = re.sub(
350
+ r'"[^"]+"[Mesh]',
351
+ lambda m: m.group(0).split('"')[1].lower() + "[Title/Abstract]",
352
+ query,
353
+ )
354
+
304
355
  # 2. If that doesn't work, focus on just mRNA and vaccines - the core concepts
305
356
  if simplified == query: # No changes were made
306
- simplified = "(mRNA[Title/Abstract] OR \"messenger RNA\"[Title/Abstract]) AND vaccin*[Title/Abstract]"
307
-
357
+ simplified = '(mRNA[Title/Abstract] OR "messenger RNA"[Title/Abstract]) AND vaccin*[Title/Abstract]'
358
+
308
359
  logger.info(f"Simplified query: {simplified}")
309
360
  return simplified
310
-
361
+
311
362
  def _is_historical_focused(self, query: str) -> bool:
312
363
  """
313
364
  Determine if a query is specifically focused on historical/older information using LLM.
314
365
  Default assumption is that queries should prioritize recent information unless
315
366
  explicitly asking for historical content.
316
-
367
+
317
368
  Args:
318
369
  query: The search query
319
-
370
+
320
371
  Returns:
321
372
  Boolean indicating if the query is focused on historical information
322
373
  """
323
374
  if not self.llm:
324
375
  # Fall back to basic keyword check if no LLM available
325
- historical_terms = ["history", "historical", "early", "initial", "first", "original",
326
- "before", "prior to", "origins", "evolution", "development"]
376
+ historical_terms = [
377
+ "history",
378
+ "historical",
379
+ "early",
380
+ "initial",
381
+ "first",
382
+ "original",
383
+ "before",
384
+ "prior to",
385
+ "origins",
386
+ "evolution",
387
+ "development",
388
+ ]
327
389
  historical_years = [str(year) for year in range(1900, 2020)]
328
-
390
+
329
391
  query_lower = query.lower()
330
392
  has_historical_term = any(term in query_lower for term in historical_terms)
331
393
  has_past_year = any(year in query for year in historical_years)
332
-
394
+
333
395
  return has_historical_term or has_past_year
334
-
396
+
335
397
  try:
336
398
  # Use LLM to determine if the query is focused on historical information
337
399
  prompt = f"""Determine if this query is specifically asking for HISTORICAL or OLDER information.
@@ -343,40 +405,51 @@ Answer ONLY "no" if the query is asking about recent, current, or new informatio
343
405
 
344
406
  The default assumption should be that medical and scientific queries want RECENT information unless clearly specified otherwise.
345
407
  """
346
-
408
+
347
409
  response = self.llm.invoke(prompt)
348
410
  answer = response.content.strip().lower()
349
-
411
+
350
412
  # Log the determination
351
413
  logger.info(f"Historical focus determination for query: '{query}'")
352
414
  logger.info(f"LLM determined historical focus: {answer}")
353
-
415
+
354
416
  return "yes" in answer
355
-
417
+
356
418
  except Exception as e:
357
419
  logger.error(f"Error determining historical focus: {e}")
358
420
  # Fall back to basic keyword check
359
- historical_terms = ["history", "historical", "early", "initial", "first", "original",
360
- "before", "prior to", "origins", "evolution", "development"]
421
+ historical_terms = [
422
+ "history",
423
+ "historical",
424
+ "early",
425
+ "initial",
426
+ "first",
427
+ "original",
428
+ "before",
429
+ "prior to",
430
+ "origins",
431
+ "evolution",
432
+ "development",
433
+ ]
361
434
  return any(term in query.lower() for term in historical_terms)
362
-
435
+
363
436
  def _adaptive_search(self, query: str) -> Tuple[List[str], str]:
364
437
  """
365
438
  Perform an adaptive search that adjusts based on topic volume and whether
366
439
  the query focuses on historical information.
367
-
440
+
368
441
  Args:
369
442
  query: The search query (already optimized)
370
-
443
+
371
444
  Returns:
372
445
  Tuple of (list of PMIDs, search strategy used)
373
446
  """
374
447
  # Estimate topic volume
375
448
  estimated_volume = self._get_result_count(query)
376
-
449
+
377
450
  # Determine if the query is focused on historical information
378
451
  is_historical_focused = self._is_historical_focused(query)
379
-
452
+
380
453
  if is_historical_focused:
381
454
  # User wants historical information - no date filtering
382
455
  time_filter = None
@@ -397,44 +470,52 @@ The default assumption should be that medical and scientific queries want RECENT
397
470
  # Rare topic - still use recency but with wider range
398
471
  time_filter = '"last 10 years"[pdat]'
399
472
  strategy = "rare_topic"
400
-
473
+
401
474
  # Run search based on strategy
402
475
  if time_filter:
403
476
  # Try with adaptive time filter
404
477
  query_with_time = f"({query}) AND {time_filter}"
405
- logger.info(f"Using adaptive search strategy: {strategy} with filter: {time_filter}")
478
+ logger.info(
479
+ f"Using adaptive search strategy: {strategy} with filter: {time_filter}"
480
+ )
406
481
  results = self._search_pubmed(query_with_time)
407
-
482
+
408
483
  # If too few results, gradually expand time window
409
- if len(results) < 5 and not '"last 10 years"[pdat]' in time_filter:
410
- logger.info(f"Insufficient results ({len(results)}), expanding time window")
484
+ if len(results) < 5 and '"last 10 years"[pdat]' not in time_filter:
485
+ logger.info(
486
+ f"Insufficient results ({len(results)}), expanding time window"
487
+ )
411
488
  expanded_time = self._expand_time_window(time_filter)
412
489
  query_with_expanded_time = f"({query}) AND {expanded_time}"
413
490
  expanded_results = self._search_pubmed(query_with_expanded_time)
414
-
491
+
415
492
  if len(expanded_results) > len(results):
416
- logger.info(f"Expanded time window yielded {len(expanded_results)} results")
493
+ logger.info(
494
+ f"Expanded time window yielded {len(expanded_results)} results"
495
+ )
417
496
  return expanded_results, f"{strategy}_expanded"
418
-
497
+
419
498
  # If still no results, try without time filter
420
499
  if not results:
421
- logger.info("No results with time filter, trying without time restrictions")
500
+ logger.info(
501
+ "No results with time filter, trying without time restrictions"
502
+ )
422
503
  results = self._search_pubmed(query)
423
504
  strategy = "no_time_filter"
424
505
  else:
425
506
  # Historical query - run without time filter
426
- logger.info(f"Using historical search strategy without date filtering")
507
+ logger.info("Using historical search strategy without date filtering")
427
508
  results = self._search_pubmed(query)
428
-
509
+
429
510
  return results, strategy
430
-
511
+
431
512
  def _search_pubmed(self, query: str) -> List[str]:
432
513
  """
433
514
  Search PubMed and return a list of article IDs.
434
-
515
+
435
516
  Args:
436
517
  query: The search query
437
-
518
+
438
519
  Returns:
439
520
  List of PubMed IDs matching the query
440
521
  """
@@ -445,76 +526,76 @@ The default assumption should be that medical and scientific queries want RECENT
445
526
  "term": query,
446
527
  "retmode": "json",
447
528
  "retmax": self.max_results,
448
- "usehistory": "y"
529
+ "usehistory": "y",
449
530
  }
450
-
531
+
451
532
  # Add API key if available
452
533
  if self.api_key:
453
534
  params["api_key"] = self.api_key
454
-
535
+
455
536
  # Add date restriction if specified
456
537
  if self.days_limit:
457
538
  params["reldate"] = self.days_limit
458
539
  params["datetype"] = "pdat" # Publication date
459
-
540
+
460
541
  # Execute search request
461
542
  response = requests.get(self.search_url, params=params)
462
543
  response.raise_for_status()
463
-
544
+
464
545
  # Parse response
465
546
  data = response.json()
466
547
  id_list = data["esearchresult"]["idlist"]
467
-
548
+
468
549
  logger.info(f"PubMed search for '{query}' found {len(id_list)} results")
469
550
  return id_list
470
-
551
+
471
552
  except Exception as e:
472
553
  logger.error(f"Error searching PubMed: {e}")
473
554
  return []
474
-
555
+
475
556
  def _get_article_summaries(self, id_list: List[str]) -> List[Dict[str, Any]]:
476
557
  """
477
558
  Get summaries for a list of PubMed article IDs.
478
-
559
+
479
560
  Args:
480
561
  id_list: List of PubMed IDs
481
-
562
+
482
563
  Returns:
483
564
  List of article summary dictionaries
484
565
  """
485
566
  if not id_list:
486
567
  return []
487
-
568
+
488
569
  try:
489
570
  # Prepare parameters
490
571
  params = {
491
572
  "db": "pubmed",
492
573
  "id": ",".join(id_list),
493
574
  "retmode": "json",
494
- "rettype": "summary"
575
+ "rettype": "summary",
495
576
  }
496
-
577
+
497
578
  # Add API key if available
498
579
  if self.api_key:
499
580
  params["api_key"] = self.api_key
500
-
581
+
501
582
  # Execute request
502
583
  response = requests.get(self.summary_url, params=params)
503
584
  response.raise_for_status()
504
-
585
+
505
586
  # Parse response
506
587
  data = response.json()
507
588
  summaries = []
508
-
589
+
509
590
  for pmid in id_list:
510
591
  if pmid in data["result"]:
511
592
  article = data["result"][pmid]
512
-
593
+
513
594
  # Extract authors (if available)
514
595
  authors = []
515
596
  if "authors" in article:
516
597
  authors = [author["name"] for author in article["authors"]]
517
-
598
+
518
599
  # Create summary dictionary
519
600
  summary = {
520
601
  "id": pmid,
@@ -524,73 +605,73 @@ The default assumption should be that medical and scientific queries want RECENT
524
605
  "authors": authors,
525
606
  "journal": article.get("fulljournalname", ""),
526
607
  "doi": article.get("doi", ""),
527
- "link": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
608
+ "link": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
528
609
  }
529
-
610
+
530
611
  summaries.append(summary)
531
-
612
+
532
613
  return summaries
533
-
614
+
534
615
  except Exception as e:
535
616
  logger.error(f"Error getting article summaries: {e}")
536
617
  return []
537
-
618
+
538
619
  def _get_article_abstracts(self, id_list: List[str]) -> Dict[str, str]:
539
620
  """
540
621
  Get abstracts for a list of PubMed article IDs.
541
-
622
+
542
623
  Args:
543
624
  id_list: List of PubMed IDs
544
-
625
+
545
626
  Returns:
546
627
  Dictionary mapping PubMed IDs to their abstracts
547
628
  """
548
629
  if not id_list:
549
630
  return {}
550
-
631
+
551
632
  try:
552
633
  # Prepare parameters
553
634
  params = {
554
635
  "db": "pubmed",
555
636
  "id": ",".join(id_list),
556
637
  "retmode": "xml",
557
- "rettype": "abstract"
638
+ "rettype": "abstract",
558
639
  }
559
-
640
+
560
641
  # Add API key if available
561
642
  if self.api_key:
562
643
  params["api_key"] = self.api_key
563
-
644
+
564
645
  # Execute request
565
646
  response = requests.get(self.fetch_url, params=params)
566
647
  response.raise_for_status()
567
-
648
+
568
649
  # Parse XML response
569
650
  root = ET.fromstring(response.text)
570
-
651
+
571
652
  # Extract abstracts
572
653
  abstracts = {}
573
-
654
+
574
655
  for article in root.findall(".//PubmedArticle"):
575
656
  pmid_elem = article.find(".//PMID")
576
657
  pmid = pmid_elem.text if pmid_elem is not None else None
577
-
658
+
578
659
  if pmid is None:
579
660
  continue
580
-
661
+
581
662
  # Find abstract text
582
663
  abstract_text = ""
583
664
  abstract_elem = article.find(".//AbstractText")
584
-
665
+
585
666
  if abstract_elem is not None:
586
667
  abstract_text = abstract_elem.text or ""
587
-
668
+
588
669
  # Some abstracts are split into multiple sections
589
670
  for section in article.findall(".//AbstractText"):
590
671
  # Get section label if it exists
591
672
  label = section.get("Label")
592
673
  section_text = section.text or ""
593
-
674
+
594
675
  if label and section_text:
595
676
  if abstract_text:
596
677
  abstract_text += f"\n\n{label}: {section_text}"
@@ -601,30 +682,30 @@ The default assumption should be that medical and scientific queries want RECENT
601
682
  abstract_text += f"\n\n{section_text}"
602
683
  else:
603
684
  abstract_text = section_text
604
-
685
+
605
686
  # Store in dictionary
606
687
  if pmid and abstract_text:
607
688
  abstracts[pmid] = abstract_text
608
-
689
+
609
690
  return abstracts
610
-
691
+
611
692
  except Exception as e:
612
693
  logger.error(f"Error getting article abstracts: {e}")
613
694
  return {}
614
-
695
+
615
696
  def _find_pmc_ids(self, pmid_list: List[str]) -> Dict[str, str]:
616
697
  """
617
698
  Find PMC IDs for the given PubMed IDs (for full-text access).
618
-
699
+
619
700
  Args:
620
701
  pmid_list: List of PubMed IDs
621
-
702
+
622
703
  Returns:
623
704
  Dictionary mapping PubMed IDs to their PMC IDs (if available)
624
705
  """
625
706
  if not pmid_list or not self.get_full_text:
626
707
  return {}
627
-
708
+
628
709
  try:
629
710
  # Prepare parameters
630
711
  params = {
@@ -632,89 +713,84 @@ The default assumption should be that medical and scientific queries want RECENT
632
713
  "db": "pmc",
633
714
  "linkname": "pubmed_pmc",
634
715
  "id": ",".join(pmid_list),
635
- "retmode": "json"
716
+ "retmode": "json",
636
717
  }
637
-
718
+
638
719
  # Add API key if available
639
720
  if self.api_key:
640
721
  params["api_key"] = self.api_key
641
-
722
+
642
723
  # Execute request
643
724
  response = requests.get(self.link_url, params=params)
644
725
  response.raise_for_status()
645
-
726
+
646
727
  # Parse response
647
728
  data = response.json()
648
-
729
+
649
730
  # Map PubMed IDs to PMC IDs
650
731
  pmid_to_pmcid = {}
651
-
732
+
652
733
  for linkset in data.get("linksets", []):
653
734
  pmid = linkset.get("ids", [None])[0]
654
-
735
+
655
736
  if not pmid:
656
737
  continue
657
-
738
+
658
739
  for link in linkset.get("linksetdbs", []):
659
740
  if link.get("linkname") == "pubmed_pmc":
660
741
  pmcids = link.get("links", [])
661
742
  if pmcids:
662
743
  pmid_to_pmcid[str(pmid)] = f"PMC{pmcids[0]}"
663
-
744
+
664
745
  logger.info(f"Found {len(pmid_to_pmcid)} PMC IDs for full-text access")
665
746
  return pmid_to_pmcid
666
-
747
+
667
748
  except Exception as e:
668
749
  logger.error(f"Error finding PMC IDs: {e}")
669
750
  return {}
670
-
751
+
671
752
  def _get_pmc_full_text(self, pmcid: str) -> str:
672
753
  """
673
754
  Get full text for a PMC article.
674
-
755
+
675
756
  Args:
676
757
  pmcid: PMC ID of the article
677
-
758
+
678
759
  Returns:
679
760
  Full text content or empty string if not available
680
761
  """
681
762
  try:
682
763
  # Prepare parameters
683
- params = {
684
- "db": "pmc",
685
- "id": pmcid,
686
- "retmode": "xml",
687
- "rettype": "full"
688
- }
689
-
764
+ params = {"db": "pmc", "id": pmcid, "retmode": "xml", "rettype": "full"}
765
+
690
766
  # Add API key if available
691
767
  if self.api_key:
692
768
  params["api_key"] = self.api_key
693
-
769
+
694
770
  # Execute request
695
771
  response = requests.get(self.fetch_url, params=params)
696
772
  response.raise_for_status()
697
-
773
+
698
774
  # Parse XML response
699
775
  root = ET.fromstring(response.text)
700
-
776
+
701
777
  # Extract full text
702
778
  full_text = []
703
-
779
+
704
780
  # Extract article title
705
781
  title_elem = root.find(".//article-title")
706
782
  if title_elem is not None and title_elem.text:
707
783
  full_text.append(f"# {title_elem.text}")
708
-
784
+
709
785
  # Extract abstract
710
786
  abstract_paras = root.findall(".//abstract//p")
711
787
  if abstract_paras:
712
788
  full_text.append("\n## Abstract\n")
713
789
  for p in abstract_paras:
714
- text = ''.join(p.itertext())
790
+ text = "".join(p.itertext())
715
791
  if text:
716
792
  full_text.append(text)
717
-
793
+
718
794
  # Extract body content
719
795
  body = root.find(".//body")
720
796
  if body is not None:
@@ -723,37 +799,37 @@ The default assumption should be that medical and scientific queries want RECENT
723
799
  title = section.find(".//title")
724
800
  if title is not None and title.text:
725
801
  full_text.append(f"\n## {title.text}\n")
726
-
802
+
727
803
  # Get paragraphs
728
804
  for p in section.findall(".//p"):
729
- text = ''.join(p.itertext())
805
+ text = "".join(p.itertext())
730
806
  if text:
731
807
  full_text.append(text)
732
-
808
+
733
809
  return "\n\n".join(full_text)
734
-
810
+
735
811
  except Exception as e:
736
812
  logger.error(f"Error getting PMC full text: {e}")
737
813
  return ""
738
-
814
+
739
815
  def _get_previews(self, query: str) -> List[Dict[str, Any]]:
740
816
  """
741
817
  Get preview information for PubMed articles.
742
-
818
+
743
819
  Args:
744
820
  query: The search query
745
-
821
+
746
822
  Returns:
747
823
  List of preview dictionaries
748
824
  """
749
825
  logger.info(f"Getting PubMed previews for query: {query}")
750
-
826
+
751
827
  # Optimize the query for PubMed if LLM is available
752
828
  optimized_query = self._optimize_query_for_pubmed(query)
753
-
829
+
754
830
  # Perform adaptive search
755
831
  pmid_list, strategy = self._adaptive_search(optimized_query)
756
-
832
+
757
833
  # If no results, try a simplified query
758
834
  if not pmid_list:
759
835
  logger.warning(f"No PubMed results found using strategy: {strategy}")
@@ -763,17 +839,17 @@ The default assumption should be that medical and scientific queries want RECENT
763
839
  pmid_list, strategy = self._adaptive_search(simplified_query)
764
840
  if pmid_list:
765
841
  logger.info(f"Simplified query found {len(pmid_list)} results")
766
-
842
+
767
843
  if not pmid_list:
768
- logger.warning(f"No PubMed results found after query simplification")
844
+ logger.warning("No PubMed results found after query simplification")
769
845
  return []
770
-
846
+
771
847
  # Get article summaries
772
848
  summaries = self._get_article_summaries(pmid_list)
773
-
849
+
774
850
  # Rate limit compliance (NCBI allows 10 requests per second with an API key, 3 without)
775
851
  time.sleep(0.1 if self.api_key else 0.33)
776
-
852
+
777
853
  # Format as previews
778
854
  previews = []
779
855
  for summary in summaries:
@@ -782,7 +858,7 @@ The default assumption should be that medical and scientific queries want RECENT
782
858
  if len(authors_text) > 100:
783
859
  # Truncate long author lists
784
860
  authors_text = authors_text[:97] + "..."
785
-
861
+
786
862
  # Create preview with basic information
787
863
  preview = {
788
864
  "id": summary["id"],
@@ -795,73 +871,81 @@ The default assumption should be that medical and scientific queries want RECENT
795
871
  "doi": summary.get("doi", ""),
796
872
  "source": "PubMed",
797
873
  "_pmid": summary["id"], # Store PMID for later use
798
- "_search_strategy": strategy # Store search strategy for analytics
874
+ "_search_strategy": strategy, # Store search strategy for analytics
799
875
  }
800
-
876
+
801
877
  previews.append(preview)
802
-
878
+
803
879
  logger.info(f"Found {len(previews)} PubMed previews using strategy: {strategy}")
804
880
  return previews
805
-
806
- def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
881
+
882
+ def _get_full_content(
883
+ self, relevant_items: List[Dict[str, Any]]
884
+ ) -> List[Dict[str, Any]]:
807
885
  """
808
886
  Get full content for the relevant PubMed articles.
809
887
  Efficiently manages which content to retrieve (abstracts and/or full text).
810
-
888
+
811
889
  Args:
812
890
  relevant_items: List of relevant preview dictionaries
813
-
891
+
814
892
  Returns:
815
893
  List of result dictionaries with full content
816
894
  """
817
895
  # Check if we should add full content
818
- if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
896
+ if (
897
+ hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
898
+ and search_config.SEARCH_SNIPPETS_ONLY
899
+ ):
819
900
  logger.info("Snippet-only mode, skipping full content retrieval")
820
901
  return relevant_items
821
-
902
+
822
903
  logger.info(f"Getting content for {len(relevant_items)} PubMed articles")
823
-
904
+
824
905
  # Collect all PMIDs for relevant items
825
906
  pmids = []
826
907
  for item in relevant_items:
827
908
  if "_pmid" in item:
828
909
  pmids.append(item["_pmid"])
829
-
910
+
830
911
  # Get abstracts if requested and PMIDs exist
831
912
  abstracts = {}
832
913
  if self.get_abstracts and pmids:
833
914
  abstracts = self._get_article_abstracts(pmids)
834
-
915
+
835
916
  # Find PMC IDs for full-text retrieval (if enabled)
836
917
  pmid_to_pmcid = {}
837
918
  if self.get_full_text and pmids:
838
919
  pmid_to_pmcid = self._find_pmc_ids(pmids)
839
-
920
+
840
921
  # Add content to results
841
922
  results = []
842
923
  for item in relevant_items:
843
924
  result = item.copy()
844
925
  pmid = item.get("_pmid", "")
845
-
926
+
846
927
  # Add abstract if available
847
928
  if pmid in abstracts:
848
929
  result["abstract"] = abstracts[pmid]
849
-
930
+
850
931
  # Use abstract as content if no full text
851
932
  if pmid not in pmid_to_pmcid:
852
933
  result["full_content"] = abstracts[pmid]
853
934
  result["content"] = abstracts[pmid]
854
935
  result["content_type"] = "abstract"
855
-
936
+
856
937
  # Add full text for a limited number of top articles
857
- if (pmid in pmid_to_pmcid and
858
- self.get_full_text and
859
- len([r for r in results if r.get("content_type") == "full_text"]) < self.full_text_limit):
860
-
938
+ if (
939
+ pmid in pmid_to_pmcid
940
+ and self.get_full_text
941
+ and len([r for r in results if r.get("content_type") == "full_text"])
942
+ < self.full_text_limit
943
+ ):
944
+
861
945
  # Get full text content
862
946
  pmcid = pmid_to_pmcid[pmid]
863
947
  full_text = self._get_pmc_full_text(pmcid)
864
-
948
+
865
949
  if full_text:
866
950
  result["full_content"] = full_text
867
951
  result["content"] = full_text
@@ -872,120 +956,128 @@ The default assumption should be that medical and scientific queries want RECENT
872
956
  result["full_content"] = abstracts[pmid]
873
957
  result["content"] = abstracts[pmid]
874
958
  result["content_type"] = "abstract"
875
-
959
+
876
960
  # Remove temporary fields
877
961
  if "_pmid" in result:
878
962
  del result["_pmid"]
879
963
  if "_search_strategy" in result:
880
964
  del result["_search_strategy"]
881
-
965
+
882
966
  results.append(result)
883
-
967
+
884
968
  return results
885
-
886
- def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
969
+
970
+ def search_by_author(
971
+ self, author_name: str, max_results: Optional[int] = None
972
+ ) -> List[Dict[str, Any]]:
887
973
  """
888
974
  Search for articles by a specific author.
889
-
975
+
890
976
  Args:
891
977
  author_name: Name of the author
892
978
  max_results: Maximum number of results (defaults to self.max_results)
893
-
979
+
894
980
  Returns:
895
981
  List of articles by the author
896
982
  """
897
983
  original_max_results = self.max_results
898
-
984
+
899
985
  try:
900
986
  if max_results:
901
987
  self.max_results = max_results
902
-
988
+
903
989
  query = f"{author_name}[Author]"
904
990
  return self.run(query)
905
-
991
+
906
992
  finally:
907
993
  # Restore original value
908
994
  self.max_results = original_max_results
909
-
910
- def search_by_journal(self, journal_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
995
+
996
+ def search_by_journal(
997
+ self, journal_name: str, max_results: Optional[int] = None
998
+ ) -> List[Dict[str, Any]]:
911
999
  """
912
1000
  Search for articles in a specific journal.
913
-
1001
+
914
1002
  Args:
915
1003
  journal_name: Name of the journal
916
1004
  max_results: Maximum number of results (defaults to self.max_results)
917
-
1005
+
918
1006
  Returns:
919
1007
  List of articles from the journal
920
1008
  """
921
1009
  original_max_results = self.max_results
922
-
1010
+
923
1011
  try:
924
1012
  if max_results:
925
1013
  self.max_results = max_results
926
-
1014
+
927
1015
  query = f"{journal_name}[Journal]"
928
1016
  return self.run(query)
929
-
1017
+
930
1018
  finally:
931
1019
  # Restore original value
932
1020
  self.max_results = original_max_results
933
-
934
- def search_recent(self, query: str, days: int = 30, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
1021
+
1022
+ def search_recent(
1023
+ self, query: str, days: int = 30, max_results: Optional[int] = None
1024
+ ) -> List[Dict[str, Any]]:
935
1025
  """
936
1026
  Search for recent articles matching the query.
937
-
1027
+
938
1028
  Args:
939
1029
  query: The search query
940
1030
  days: Number of days to look back
941
1031
  max_results: Maximum number of results (defaults to self.max_results)
942
-
1032
+
943
1033
  Returns:
944
1034
  List of recent articles matching the query
945
1035
  """
946
1036
  original_max_results = self.max_results
947
1037
  original_days_limit = self.days_limit
948
-
1038
+
949
1039
  try:
950
1040
  if max_results:
951
1041
  self.max_results = max_results
952
-
1042
+
953
1043
  # Set days limit for this search
954
1044
  self.days_limit = days
955
-
1045
+
956
1046
  return self.run(query)
957
-
1047
+
958
1048
  finally:
959
1049
  # Restore original values
960
1050
  self.max_results = original_max_results
961
1051
  self.days_limit = original_days_limit
962
-
963
- def advanced_search(self, terms: Dict[str, str], max_results: Optional[int] = None) -> List[Dict[str, Any]]:
1052
+
1053
+ def advanced_search(
1054
+ self, terms: Dict[str, str], max_results: Optional[int] = None
1055
+ ) -> List[Dict[str, Any]]:
964
1056
  """
965
1057
  Perform an advanced search with field-specific terms.
966
-
1058
+
967
1059
  Args:
968
1060
  terms: Dictionary mapping fields to search terms
969
1061
  Valid fields: Author, Journal, Title, MeSH, Affiliation, etc.
970
1062
  max_results: Maximum number of results (defaults to self.max_results)
971
-
1063
+
972
1064
  Returns:
973
1065
  List of articles matching the advanced query
974
1066
  """
975
1067
  original_max_results = self.max_results
976
-
1068
+
977
1069
  try:
978
1070
  if max_results:
979
1071
  self.max_results = max_results
980
-
1072
+
981
1073
  # Build advanced query string
982
1074
  query_parts = []
983
1075
  for field, term in terms.items():
984
1076
  query_parts.append(f"{term}[{field}]")
985
-
1077
+
986
1078
  query = " AND ".join(query_parts)
987
1079
  return self.run(query)
988
-
1080
+
989
1081
  finally:
990
1082
  # Restore original value
991
- self.max_results = original_max_results
1083
+ self.max_results = original_max_results