local-deep-research 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/defaults/search_engines.toml +2 -2
- local_deep_research/search_system.py +9 -10
- local_deep_research/web/app.py +6 -22
- local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +102 -661
- local_deep_research/web_search_engines/search_engine_base.py +5 -14
- {local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/METADATA +1 -1
- {local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/RECORD +11 -11
- {local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/WHEEL +0 -0
- {local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/licenses/LICENSE +0 -0
- {local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/top_level.txt +0 -0
@@ -37,7 +37,7 @@ module_path = "local_deep_research.web_search_engines.engines.search_engine_pubm
|
|
37
37
|
class_name = "PubMedSearchEngine"
|
38
38
|
requires_api_key = false
|
39
39
|
api_key_env = "NCBI_API_KEY"
|
40
|
-
reliability = 0.
|
40
|
+
reliability = 0.98
|
41
41
|
strengths = [
|
42
42
|
"biomedical literature", "medical research", "clinical studies",
|
43
43
|
"life sciences", "health information", "scientific papers"
|
@@ -191,7 +191,7 @@ module_path = "local_deep_research.web_search_engines.engines.search_engine_sema
|
|
191
191
|
class_name = "SemanticScholarSearchEngine"
|
192
192
|
requires_api_key = false
|
193
193
|
api_key_env = "S2_API_KEY"
|
194
|
-
reliability = 0.
|
194
|
+
reliability = 0.87
|
195
195
|
strengths = [
|
196
196
|
"comprehensive scientific literature",
|
197
197
|
"extensive citation network",
|
@@ -28,7 +28,7 @@ class AdvancedSearchSystem:
|
|
28
28
|
|
29
29
|
# Check if search is available, log warning if not
|
30
30
|
if self.search is None:
|
31
|
-
|
31
|
+
logger.info("WARNING: Search system initialized with no search engine! Research will not be effective.")
|
32
32
|
self._update_progress("WARNING: No search engine available", None, {"error": "No search engine configured properly"})
|
33
33
|
|
34
34
|
|
@@ -101,7 +101,7 @@ class AdvancedSearchSystem:
|
|
101
101
|
self._update_progress("Knowledge compression complete", None)
|
102
102
|
response = remove_think_tags(response.content)
|
103
103
|
response = str(response) #+ "\n\n" + str(formatted_links)
|
104
|
-
|
104
|
+
|
105
105
|
return response
|
106
106
|
|
107
107
|
def analyze_topic(self, query: str) -> Dict:
|
@@ -165,7 +165,7 @@ class AdvancedSearchSystem:
|
|
165
165
|
search_results = self.search.run(question)
|
166
166
|
except Exception as e:
|
167
167
|
error_msg = f"Error during search: {str(e)}"
|
168
|
-
|
168
|
+
logger.info(f"SEARCH ERROR: {error_msg}")
|
169
169
|
self._update_progress(error_msg,
|
170
170
|
int(question_progress_base + 2),
|
171
171
|
{"phase": "search_error", "error": str(e)})
|
@@ -190,7 +190,7 @@ class AdvancedSearchSystem:
|
|
190
190
|
self._update_progress(f"Analyzing results for: {question}",
|
191
191
|
int(question_progress_base + 5),
|
192
192
|
{"phase": "analysis"})
|
193
|
-
|
193
|
+
|
194
194
|
|
195
195
|
try:
|
196
196
|
result = self.citation_handler.analyze_followup(
|
@@ -203,7 +203,7 @@ class AdvancedSearchSystem:
|
|
203
203
|
if links:
|
204
204
|
formatted_links=format_links(links=links)
|
205
205
|
|
206
|
-
logger.
|
206
|
+
logger.info(f"Generated questions: {formatted_links}")
|
207
207
|
if result is not None:
|
208
208
|
results_with_links = str(result["content"])
|
209
209
|
findings.append(
|
@@ -219,7 +219,6 @@ class AdvancedSearchSystem:
|
|
219
219
|
if settings.general.knowledge_accumulation != str(KnowledgeAccumulationApproach.NO_KNOWLEDGE.value):
|
220
220
|
current_knowledge = current_knowledge + "\n\n\n New: \n" + results_with_links
|
221
221
|
|
222
|
-
logger.info(settings.general.knowledge_accumulation)
|
223
222
|
if settings.general.knowledge_accumulation == str(KnowledgeAccumulationApproach.QUESTION.value):
|
224
223
|
logger.info("Compressing knowledge")
|
225
224
|
self._update_progress(f"Compress Knowledge for: {question}",
|
@@ -232,7 +231,7 @@ class AdvancedSearchSystem:
|
|
232
231
|
{"phase": "analysis_complete"})
|
233
232
|
except Exception as e:
|
234
233
|
error_msg = f"Error analyzing results: {str(e)}"
|
235
|
-
|
234
|
+
logger.info(f"ANALYSIS ERROR: {error_msg}")
|
236
235
|
self._update_progress(error_msg,
|
237
236
|
int(question_progress_base + 10),
|
238
237
|
{"phase": "analysis_error", "error": str(e)})
|
@@ -251,7 +250,7 @@ class AdvancedSearchSystem:
|
|
251
250
|
logger.info("FINISHED ITERATION - Compressing Knowledge")
|
252
251
|
except Exception as e:
|
253
252
|
error_msg = f"Error compressing knowledge: {str(e)}"
|
254
|
-
|
253
|
+
logger.info(f"COMPRESSION ERROR: {error_msg}")
|
255
254
|
self._update_progress(error_msg,
|
256
255
|
int((iteration / total_iterations) * 100 - 3),
|
257
256
|
{"phase": "compression_error", "error": str(e)})
|
@@ -266,7 +265,7 @@ class AdvancedSearchSystem:
|
|
266
265
|
formatted_findings = self._save_findings(findings, current_knowledge, query)
|
267
266
|
except Exception as e:
|
268
267
|
error_msg = f"Error saving findings: {str(e)}"
|
269
|
-
|
268
|
+
logger.info(f"SAVE ERROR: {error_msg}")
|
270
269
|
self._update_progress(error_msg,
|
271
270
|
int((iteration / total_iterations) * 100),
|
272
271
|
{"phase": "save_error", "error": str(e)})
|
@@ -278,7 +277,7 @@ class AdvancedSearchSystem:
|
|
278
277
|
"findings": findings,
|
279
278
|
"iterations": iteration,
|
280
279
|
"questions": self.questions_by_iteration,
|
281
|
-
"formatted_findings": formatted_findings
|
280
|
+
"formatted_findings": formatted_findings,
|
282
281
|
"current_knowledge": current_knowledge
|
283
282
|
}
|
284
283
|
|
local_deep_research/web/app.py
CHANGED
@@ -1001,27 +1001,14 @@ def run_research_process(research_id, query, mode):
|
|
1001
1001
|
if mode == 'quick':
|
1002
1002
|
# Quick Summary
|
1003
1003
|
if results.get('findings'):
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
# Safer access to formatted_findings with logging
|
1008
|
-
print(f"Results keys: {list(results.keys())}")
|
1009
|
-
|
1010
|
-
# Check if formatted_findings exists in results
|
1011
|
-
if 'formatted_findings' not in results:
|
1012
|
-
logger.info("WARNING: 'formatted_findings' not found in results, using fallback")
|
1013
|
-
# Create fallback formatted findings from available data
|
1014
|
-
raw_formatted_findings = "# Research Findings\n\n"
|
1015
|
-
raw_formatted_findings = raw_formatted_findings + str(results.get('current_knowledge'))
|
1016
|
-
for i, finding in enumerate(results.get('findings', [])):
|
1017
|
-
raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
|
1018
|
-
else:
|
1019
|
-
raw_formatted_findings = results['formatted_findings']
|
1020
|
-
logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
|
1004
|
+
|
1005
|
+
raw_formatted_findings = results['formatted_findings']
|
1006
|
+
logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
|
1021
1007
|
|
1022
1008
|
try:
|
1009
|
+
clean_markdown = raw_formatted_findings
|
1023
1010
|
# ADDED CODE: Convert debug output to clean markdown
|
1024
|
-
clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
1011
|
+
#clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
1025
1012
|
print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
|
1026
1013
|
|
1027
1014
|
# First send a progress update for generating the summary
|
@@ -1693,10 +1680,7 @@ def convert_debug_to_markdown(raw_text, query):
|
|
1693
1680
|
lines_after = len(content.split("\n"))
|
1694
1681
|
print(f"Removed {lines_before - lines_after} divider lines")
|
1695
1682
|
|
1696
|
-
|
1697
|
-
if "COMPLETE RESEARCH OUTPUT" in content:
|
1698
|
-
print("Found and removing COMPLETE RESEARCH OUTPUT section")
|
1699
|
-
content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
|
1683
|
+
|
1700
1684
|
|
1701
1685
|
# Remove SEARCH QUESTIONS BY ITERATION section
|
1702
1686
|
if "SEARCH QUESTIONS BY ITERATION:" in content:
|
@@ -64,6 +64,7 @@ class SemanticScholarSearchEngine(BaseSearchEngine):
|
|
64
64
|
"""
|
65
65
|
# Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
|
66
66
|
super().__init__(llm=llm, max_filtered_results=max_filtered_results, max_results=max_results)
|
67
|
+
|
67
68
|
self.api_key = api_key
|
68
69
|
self.year_range = year_range
|
69
70
|
self.get_abstracts = get_abstracts
|
@@ -82,13 +83,7 @@ class SemanticScholarSearchEngine(BaseSearchEngine):
|
|
82
83
|
# Base API URLs
|
83
84
|
self.base_url = "https://api.semanticscholar.org/graph/v1"
|
84
85
|
self.paper_search_url = f"{self.base_url}/paper/search"
|
85
|
-
self.paper_bulk_search_url = f"{self.base_url}/paper/search/bulk"
|
86
|
-
self.paper_batch_url = f"{self.base_url}/paper/batch"
|
87
86
|
self.paper_details_url = f"{self.base_url}/paper"
|
88
|
-
self.author_search_url = f"{self.base_url}/author/search"
|
89
|
-
self.author_details_url = f"{self.base_url}/author"
|
90
|
-
self.recommendations_url = "https://api.semanticscholar.org/recommendations/v1/papers"
|
91
|
-
self.datasets_url = "https://api.semanticscholar.org/datasets/v1"
|
92
87
|
|
93
88
|
# Create a session with retry capabilities
|
94
89
|
self.session = self._create_session()
|
@@ -133,15 +128,6 @@ class SemanticScholarSearchEngine(BaseSearchEngine):
|
|
133
128
|
|
134
129
|
self.last_request_time = time.time()
|
135
130
|
|
136
|
-
def _get_headers(self) -> Dict[str, str]:
|
137
|
-
"""Get the headers for API requests"""
|
138
|
-
headers = {"Accept": "application/json"}
|
139
|
-
|
140
|
-
if self.api_key:
|
141
|
-
headers["x-api-key"] = self.api_key
|
142
|
-
|
143
|
-
return headers
|
144
|
-
|
145
131
|
def _make_request(self, url: str, params: Optional[Dict] = None, data: Optional[Dict] = None,
|
146
132
|
method: str = "GET") -> Dict:
|
147
133
|
"""
|
@@ -226,16 +212,16 @@ Return ONLY the optimized search query with no explanation.
|
|
226
212
|
return query
|
227
213
|
|
228
214
|
logger.info(f"Original query: '{query}'")
|
229
|
-
logger.info(f"Optimized for
|
215
|
+
logger.info(f"Optimized for search: '{optimized_query}'")
|
230
216
|
|
231
217
|
return optimized_query
|
232
218
|
except Exception as e:
|
233
219
|
logger.error(f"Error optimizing query: {e}")
|
234
220
|
return query # Fall back to original query on error
|
235
221
|
|
236
|
-
def
|
222
|
+
def _direct_search(self, query: str) -> List[Dict[str, Any]]:
|
237
223
|
"""
|
238
|
-
|
224
|
+
Make a direct search request to the Semantic Scholar API.
|
239
225
|
|
240
226
|
Args:
|
241
227
|
query: The search query
|
@@ -244,6 +230,7 @@ Return ONLY the optimized search query with no explanation.
|
|
244
230
|
List of paper dictionaries
|
245
231
|
"""
|
246
232
|
try:
|
233
|
+
# Configure fields to retrieve
|
247
234
|
fields = [
|
248
235
|
"paperId",
|
249
236
|
"externalIds",
|
@@ -260,7 +247,7 @@ Return ONLY the optimized search query with no explanation.
|
|
260
247
|
|
261
248
|
params = {
|
262
249
|
"query": query,
|
263
|
-
"limit": min(self.max_results, 100), #
|
250
|
+
"limit": min(self.max_results, 100), # API limit is 100 per request
|
264
251
|
"fields": ",".join(fields)
|
265
252
|
}
|
266
253
|
|
@@ -281,147 +268,29 @@ Return ONLY the optimized search query with no explanation.
|
|
281
268
|
|
282
269
|
if "data" in response:
|
283
270
|
papers = response["data"]
|
284
|
-
logger.info(f"Found {len(papers)} papers
|
271
|
+
logger.info(f"Found {len(papers)} papers with direct search for query: '{query}'")
|
285
272
|
return papers
|
286
273
|
else:
|
287
|
-
logger.warning(f"No data in response for query: '{query}'")
|
274
|
+
logger.warning(f"No data in response for direct search query: '{query}'")
|
288
275
|
return []
|
289
276
|
|
290
277
|
except Exception as e:
|
291
|
-
logger.error(f"Error
|
292
|
-
return []
|
293
|
-
|
294
|
-
def _search_papers_bulk(self, query: str, limit: int = 1000) -> List[Dict[str, Any]]:
|
295
|
-
"""
|
296
|
-
Search for papers using the bulk search API, which can return up to 1000 papers.
|
297
|
-
|
298
|
-
Args:
|
299
|
-
query: The search query
|
300
|
-
limit: Maximum number of results (up to 1000)
|
301
|
-
|
302
|
-
Returns:
|
303
|
-
List of paper dictionaries
|
304
|
-
"""
|
305
|
-
try:
|
306
|
-
fields = [
|
307
|
-
"paperId",
|
308
|
-
"externalIds",
|
309
|
-
"url",
|
310
|
-
"title",
|
311
|
-
"abstract",
|
312
|
-
"venue",
|
313
|
-
"year",
|
314
|
-
"authors",
|
315
|
-
"fieldsOfStudy"
|
316
|
-
]
|
317
|
-
|
318
|
-
if self.get_tldr:
|
319
|
-
fields.append("tldr")
|
320
|
-
|
321
|
-
params = {
|
322
|
-
"query": query,
|
323
|
-
"limit": min(limit, 1000), # Bulk search API can return up to 1000 results
|
324
|
-
"fields": ",".join(fields)
|
325
|
-
}
|
326
|
-
|
327
|
-
# Add year filter if specified
|
328
|
-
if self.year_range:
|
329
|
-
start_year, end_year = self.year_range
|
330
|
-
params["year"] = f"{start_year}-{end_year}"
|
331
|
-
|
332
|
-
# Add fields of study filter if specified
|
333
|
-
if self.fields_of_study:
|
334
|
-
params["fieldsOfStudy"] = ",".join(self.fields_of_study)
|
335
|
-
|
336
|
-
# Add publication types filter if specified
|
337
|
-
if self.publication_types:
|
338
|
-
params["publicationTypes"] = ",".join(self.publication_types)
|
339
|
-
|
340
|
-
response = self._make_request(self.paper_bulk_search_url, params)
|
341
|
-
|
342
|
-
if "data" in response:
|
343
|
-
papers = response["data"]
|
344
|
-
logger.info(f"Found {len(papers)} papers using bulk search for query: '{query}'")
|
345
|
-
total_count = response.get("total", 0)
|
346
|
-
logger.info(f"Total available results: {total_count}")
|
347
|
-
|
348
|
-
# Handle continuation token for pagination if needed
|
349
|
-
if "token" in response and len(papers) < min(total_count, limit):
|
350
|
-
token = response["token"]
|
351
|
-
logger.info(f"Continuation token available: {token}")
|
352
|
-
# The caller would need to handle continuation tokens for pagination
|
353
|
-
|
354
|
-
return papers
|
355
|
-
else:
|
356
|
-
logger.warning(f"No data in response for bulk query: '{query}'")
|
357
|
-
return []
|
358
|
-
|
359
|
-
except Exception as e:
|
360
|
-
logger.error(f"Error in bulk paper search: {e}")
|
278
|
+
logger.error(f"Error in direct search: {e}")
|
361
279
|
return []
|
362
280
|
|
363
|
-
def _get_paper_details(self, paper_id: str) -> Dict[str, Any]:
|
364
|
-
"""
|
365
|
-
Get detailed information about a specific paper.
|
366
|
-
|
367
|
-
Args:
|
368
|
-
paper_id: Semantic Scholar Paper ID
|
369
|
-
|
370
|
-
Returns:
|
371
|
-
Dictionary with paper details
|
372
|
-
"""
|
373
|
-
try:
|
374
|
-
# Construct fields parameter
|
375
|
-
fields = [
|
376
|
-
"paperId",
|
377
|
-
"externalIds",
|
378
|
-
"corpusId",
|
379
|
-
"url",
|
380
|
-
"title",
|
381
|
-
"abstract",
|
382
|
-
"venue",
|
383
|
-
"year",
|
384
|
-
"authors",
|
385
|
-
"fieldsOfStudy"
|
386
|
-
]
|
387
|
-
|
388
|
-
if self.get_tldr:
|
389
|
-
fields.append("tldr")
|
390
|
-
|
391
|
-
if self.get_embeddings:
|
392
|
-
fields.append("embedding")
|
393
|
-
|
394
|
-
# Add citation and reference fields if requested
|
395
|
-
if self.get_citations:
|
396
|
-
fields.append(f"citations.limit({self.citation_limit})")
|
397
|
-
|
398
|
-
if self.get_references:
|
399
|
-
fields.append(f"references.limit({self.reference_limit})")
|
400
|
-
|
401
|
-
# Make the request
|
402
|
-
url = f"{self.paper_details_url}/{paper_id}"
|
403
|
-
params = {"fields": ",".join(fields)}
|
404
|
-
|
405
|
-
return self._make_request(url, params)
|
406
|
-
|
407
|
-
except Exception as e:
|
408
|
-
logger.error(f"Error getting paper details for {paper_id}: {e}")
|
409
|
-
return {}
|
410
|
-
|
411
|
-
|
412
281
|
def _adaptive_search(self, query: str) -> Tuple[List[Dict[str, Any]], str]:
|
413
282
|
"""
|
414
283
|
Perform an adaptive search that adjusts based on result volume.
|
415
284
|
Uses LLM to generate better fallback queries when available.
|
416
285
|
|
417
286
|
Args:
|
418
|
-
query: The search query
|
287
|
+
query: The search query
|
419
288
|
|
420
289
|
Returns:
|
421
290
|
Tuple of (list of paper results, search strategy used)
|
422
291
|
"""
|
423
292
|
# Start with a standard search
|
424
|
-
papers = self.
|
293
|
+
papers = self._direct_search(query)
|
425
294
|
strategy = "standard"
|
426
295
|
|
427
296
|
# If no results, try different variations
|
@@ -430,7 +299,7 @@ Return ONLY the optimized search query with no explanation.
|
|
430
299
|
if '"' in query:
|
431
300
|
unquoted_query = query.replace('"', '')
|
432
301
|
logger.info(f"No results with quoted terms, trying without quotes: {unquoted_query}")
|
433
|
-
papers = self.
|
302
|
+
papers = self._direct_search(unquoted_query)
|
434
303
|
|
435
304
|
if papers:
|
436
305
|
strategy = "unquoted"
|
@@ -440,21 +309,20 @@ Return ONLY the optimized search query with no explanation.
|
|
440
309
|
if self.llm:
|
441
310
|
try:
|
442
311
|
# Generate alternate search queries focusing on core concepts
|
443
|
-
prompt = f"""You are helping refine a search query
|
312
|
+
prompt = f"""You are helping refine a search query that returned no results.
|
444
313
|
|
445
|
-
|
314
|
+
Original query: "{query}"
|
446
315
|
|
447
|
-
|
316
|
+
The query might be too specific or use natural language phrasing that doesn't match academic paper keywords.
|
448
317
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
5. Format each as a concise keyword-focused search term (not a natural language question)
|
318
|
+
Please provide THREE alternative search queries that:
|
319
|
+
1. Focus on the core academic concepts
|
320
|
+
2. Use precise terminology commonly found in academic papers
|
321
|
+
3. Break down complex queries into more searchable components
|
322
|
+
4. Format each as a concise keyword-focused search term (not a natural language question)
|
455
323
|
|
456
|
-
|
457
|
-
|
324
|
+
Format each query on a new line with no numbering or explanation. Keep each query under 8 words and very focused.
|
325
|
+
"""
|
458
326
|
# Get the LLM's response
|
459
327
|
response = self.llm.invoke(prompt)
|
460
328
|
|
@@ -469,7 +337,7 @@ Return ONLY the optimized search query with no explanation.
|
|
469
337
|
# Try each alternative query
|
470
338
|
for alt_query in alt_queries[:3]: # Limit to first 3 alternatives
|
471
339
|
logger.info(f"Trying LLM-suggested query: {alt_query}")
|
472
|
-
alt_papers = self.
|
340
|
+
alt_papers = self._direct_search(alt_query)
|
473
341
|
|
474
342
|
if alt_papers:
|
475
343
|
logger.info(f"Found {len(alt_papers)} papers using LLM-suggested query: {alt_query}")
|
@@ -479,53 +347,80 @@ Return ONLY the optimized search query with no explanation.
|
|
479
347
|
logger.error(f"Error using LLM for query refinement: {e}")
|
480
348
|
# Fall through to simpler strategies
|
481
349
|
|
482
|
-
# Fallback
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
important_query = ' '.join(important_terms[:5]) # Limit to 5 terms
|
492
|
-
logger.info(f"Trying with important cancer terms: {important_query}")
|
493
|
-
papers = self._search_papers(important_query)
|
350
|
+
# Fallback: Try with the longest words (likely specific terms)
|
351
|
+
words = re.findall(r'\w+', query)
|
352
|
+
longer_words = [word for word in words if len(word) > 6]
|
353
|
+
if longer_words:
|
354
|
+
# Use up to 3 of the longest words
|
355
|
+
longer_words = sorted(longer_words, key=len, reverse=True)[:3]
|
356
|
+
key_terms_query = ' '.join(longer_words)
|
357
|
+
logger.info(f"Trying with key terms: {key_terms_query}")
|
358
|
+
papers = self._direct_search(key_terms_query)
|
494
359
|
|
495
360
|
if papers:
|
496
|
-
strategy = "
|
361
|
+
strategy = "key_terms"
|
497
362
|
return papers, strategy
|
363
|
+
|
364
|
+
# Final fallback: Try with just the longest word
|
365
|
+
if words:
|
366
|
+
longest_word = max(words, key=len)
|
367
|
+
if len(longest_word) > 5: # Only use if it's reasonably long
|
368
|
+
logger.info(f"Trying with single key term: {longest_word}")
|
369
|
+
papers = self._direct_search(longest_word)
|
498
370
|
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
371
|
+
if papers:
|
372
|
+
strategy = "single_term"
|
373
|
+
return papers, strategy
|
374
|
+
|
375
|
+
return papers, strategy
|
376
|
+
|
377
|
+
def _get_paper_details(self, paper_id: str) -> Dict[str, Any]:
|
378
|
+
"""
|
379
|
+
Get detailed information about a specific paper.
|
380
|
+
|
381
|
+
Args:
|
382
|
+
paper_id: Semantic Scholar Paper ID
|
504
383
|
|
505
|
-
|
506
|
-
|
384
|
+
Returns:
|
385
|
+
Dictionary with paper details
|
386
|
+
"""
|
387
|
+
try:
|
388
|
+
# Construct fields parameter
|
389
|
+
fields = [
|
390
|
+
"paperId",
|
391
|
+
"externalIds",
|
392
|
+
"corpusId",
|
393
|
+
"url",
|
394
|
+
"title",
|
395
|
+
"abstract",
|
396
|
+
"venue",
|
397
|
+
"year",
|
398
|
+
"authors",
|
399
|
+
"fieldsOfStudy"
|
400
|
+
]
|
507
401
|
|
508
|
-
if
|
509
|
-
|
510
|
-
logger.info(f"Trying with specific cancer-treatment pair: {specific_query}")
|
511
|
-
papers = self._search_papers(specific_query)
|
402
|
+
if self.get_tldr:
|
403
|
+
fields.append("tldr")
|
512
404
|
|
513
|
-
|
514
|
-
|
515
|
-
return papers, strategy
|
516
|
-
|
517
|
-
# Fallback 3: Extract the longest word (likely a specific term)
|
518
|
-
longest_word = max(re.findall(r'\w+', query), key=len, default='')
|
519
|
-
if len(longest_word) > 6:
|
520
|
-
logger.info(f"Trying with primary keyword: {longest_word}")
|
521
|
-
papers = self._search_papers(longest_word)
|
405
|
+
if self.get_embeddings:
|
406
|
+
fields.append("embedding")
|
522
407
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
408
|
+
# Add citation and reference fields if requested
|
409
|
+
if self.get_citations:
|
410
|
+
fields.append(f"citations.limit({self.citation_limit})")
|
411
|
+
|
412
|
+
if self.get_references:
|
413
|
+
fields.append(f"references.limit({self.reference_limit})")
|
414
|
+
|
415
|
+
# Make the request
|
416
|
+
url = f"{self.paper_details_url}/{paper_id}"
|
417
|
+
params = {"fields": ",".join(fields)}
|
418
|
+
|
419
|
+
return self._make_request(url, params)
|
420
|
+
|
421
|
+
except Exception as e:
|
422
|
+
logger.error(f"Error getting paper details for {paper_id}: {e}")
|
423
|
+
return {}
|
529
424
|
|
530
425
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
531
426
|
"""
|
@@ -542,11 +437,11 @@ Return ONLY the optimized search query with no explanation.
|
|
542
437
|
# Optimize the query if LLM is available
|
543
438
|
optimized_query = self._optimize_query(query)
|
544
439
|
|
545
|
-
#
|
440
|
+
# Use the adaptive search approach
|
546
441
|
papers, strategy = self._adaptive_search(optimized_query)
|
547
442
|
|
548
443
|
if not papers:
|
549
|
-
logger.warning(f"No Semantic Scholar results found
|
444
|
+
logger.warning(f"No Semantic Scholar results found")
|
550
445
|
return []
|
551
446
|
|
552
447
|
# Format as previews
|
@@ -583,10 +478,10 @@ Return ONLY the optimized search query with no explanation.
|
|
583
478
|
"id": paper_id if paper_id else "",
|
584
479
|
"title": title if title else "",
|
585
480
|
"link": url if url else "",
|
586
|
-
"snippet": snippet,
|
587
|
-
"authors": authors,
|
481
|
+
"snippet": snippet,
|
482
|
+
"authors": authors,
|
588
483
|
"venue": venue if venue else "",
|
589
|
-
"year": year,
|
484
|
+
"year": year,
|
590
485
|
"external_ids": external_ids if external_ids else {},
|
591
486
|
"source": "Semantic Scholar",
|
592
487
|
"_paper_id": paper_id if paper_id else "",
|
@@ -602,6 +497,13 @@ Return ONLY the optimized search query with no explanation.
|
|
602
497
|
logger.error(f"Error processing paper preview: {e}")
|
603
498
|
# Continue with the next paper
|
604
499
|
|
500
|
+
# Sort by year (newer first) if available
|
501
|
+
previews = sorted(
|
502
|
+
previews,
|
503
|
+
key=lambda p: p.get("year", 0) if p.get("year") is not None else 0,
|
504
|
+
reverse=True
|
505
|
+
)
|
506
|
+
|
605
507
|
logger.info(f"Found {len(previews)} Semantic Scholar previews using strategy: {strategy}")
|
606
508
|
return previews
|
607
509
|
|
@@ -664,465 +566,4 @@ Return ONLY the optimized search query with no explanation.
|
|
664
566
|
|
665
567
|
results.append(result)
|
666
568
|
|
667
|
-
return results
|
668
|
-
|
669
|
-
def search_by_author(self, author_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
670
|
-
"""
|
671
|
-
Search for papers by a specific author.
|
672
|
-
|
673
|
-
Args:
|
674
|
-
author_name: Name of the author
|
675
|
-
max_results: Maximum number of results (defaults to self.max_results)
|
676
|
-
|
677
|
-
Returns:
|
678
|
-
List of papers by the author
|
679
|
-
"""
|
680
|
-
original_max_results = self.max_results
|
681
|
-
|
682
|
-
try:
|
683
|
-
if max_results:
|
684
|
-
self.max_results = max_results
|
685
|
-
|
686
|
-
# First search for the author
|
687
|
-
params = {
|
688
|
-
"query": author_name,
|
689
|
-
"limit": 5 # Limit to top 5 author matches
|
690
|
-
}
|
691
|
-
|
692
|
-
response = self._make_request(self.author_search_url, params)
|
693
|
-
|
694
|
-
if "data" not in response or not response["data"]:
|
695
|
-
logger.warning(f"No authors found matching: {author_name}")
|
696
|
-
return []
|
697
|
-
|
698
|
-
# Use the first (best) author match
|
699
|
-
author = response["data"][0]
|
700
|
-
author_id = author.get("authorId")
|
701
|
-
|
702
|
-
if not author_id:
|
703
|
-
logger.warning(f"No valid author ID found for: {author_name}")
|
704
|
-
return []
|
705
|
-
|
706
|
-
# Get the author's papers
|
707
|
-
fields = [
|
708
|
-
"papers.paperId",
|
709
|
-
"papers.title",
|
710
|
-
"papers.abstract",
|
711
|
-
"papers.venue",
|
712
|
-
"papers.year",
|
713
|
-
"papers.authors"
|
714
|
-
]
|
715
|
-
|
716
|
-
if self.get_tldr:
|
717
|
-
fields.append("papers.tldr")
|
718
|
-
|
719
|
-
url = f"{self.author_details_url}/{author_id}"
|
720
|
-
author_params = {
|
721
|
-
"fields": ",".join(fields)
|
722
|
-
}
|
723
|
-
|
724
|
-
author_data = self._make_request(url, author_params)
|
725
|
-
|
726
|
-
if "papers" not in author_data or not author_data["papers"]:
|
727
|
-
logger.warning(f"No papers found for author: {author_name}")
|
728
|
-
return []
|
729
|
-
|
730
|
-
# Format as paper results
|
731
|
-
papers = author_data["papers"][:self.max_results]
|
732
|
-
|
733
|
-
# Convert to standard results format
|
734
|
-
results = []
|
735
|
-
for paper in papers:
|
736
|
-
# Format authors
|
737
|
-
authors = []
|
738
|
-
if "authors" in paper and paper["authors"]:
|
739
|
-
authors = [author.get("name", "") for author in paper["authors"]]
|
740
|
-
|
741
|
-
result = {
|
742
|
-
"id": paper.get("paperId", ""),
|
743
|
-
"title": paper.get("title", ""),
|
744
|
-
"link": f"https://www.semanticscholar.org/paper/{paper.get('paperId', '')}",
|
745
|
-
"snippet": paper.get("abstract", "")[:250] + "..." if paper.get("abstract", "") and len(paper.get("abstract", "")) > 250 else paper.get("abstract", ""),
|
746
|
-
"authors": authors,
|
747
|
-
"venue": paper.get("venue", ""),
|
748
|
-
"year": paper.get("year"),
|
749
|
-
"source": "Semantic Scholar",
|
750
|
-
|
751
|
-
# Include TLDR if available
|
752
|
-
"tldr": paper.get("tldr", {}).get("text", "") if paper.get("tldr") else ""
|
753
|
-
}
|
754
|
-
|
755
|
-
results.append(result)
|
756
|
-
|
757
|
-
# Add citations and references if needed
|
758
|
-
if self.get_citations or self.get_references:
|
759
|
-
results = self._get_full_content(results)
|
760
|
-
|
761
|
-
return results
|
762
|
-
|
763
|
-
finally:
|
764
|
-
# Restore original value
|
765
|
-
self.max_results = original_max_results
|
766
|
-
|
767
|
-
def search_by_venue(self, venue_name: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
768
|
-
"""
|
769
|
-
Search for papers in a specific venue.
|
770
|
-
|
771
|
-
Args:
|
772
|
-
venue_name: Name of the venue (conference or journal)
|
773
|
-
max_results: Maximum number of results (defaults to self.max_results)
|
774
|
-
|
775
|
-
Returns:
|
776
|
-
List of papers from the venue
|
777
|
-
"""
|
778
|
-
original_max_results = self.max_results
|
779
|
-
|
780
|
-
try:
|
781
|
-
if max_results:
|
782
|
-
self.max_results = max_results
|
783
|
-
|
784
|
-
# Semantic Scholar doesn't have a dedicated venue search API
|
785
|
-
# So we search for papers with the venue in the query
|
786
|
-
query = f'venue:"{venue_name}"'
|
787
|
-
return self.run(query)
|
788
|
-
|
789
|
-
finally:
|
790
|
-
# Restore original value
|
791
|
-
self.max_results = original_max_results
|
792
|
-
|
793
|
-
def search_by_year(self, query: str, year: int, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
794
|
-
"""
|
795
|
-
Search for papers from a specific year matching the query.
|
796
|
-
|
797
|
-
Args:
|
798
|
-
query: The search query
|
799
|
-
year: Publication year
|
800
|
-
max_results: Maximum number of results (defaults to self.max_results)
|
801
|
-
|
802
|
-
Returns:
|
803
|
-
List of papers from the specified year matching the query
|
804
|
-
"""
|
805
|
-
original_max_results = self.max_results
|
806
|
-
original_year_range = self.year_range
|
807
|
-
|
808
|
-
try:
|
809
|
-
if max_results:
|
810
|
-
self.max_results = max_results
|
811
|
-
|
812
|
-
# Set year range for this search
|
813
|
-
self.year_range = (year, year)
|
814
|
-
|
815
|
-
return self.run(query)
|
816
|
-
|
817
|
-
finally:
|
818
|
-
# Restore original values
|
819
|
-
self.max_results = original_max_results
|
820
|
-
self.year_range = original_year_range
|
821
|
-
|
822
|
-
def search_by_field(self, query: str, field_of_study: str, max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
823
|
-
"""
|
824
|
-
Search for papers in a specific field of study.
|
825
|
-
|
826
|
-
Args:
|
827
|
-
query: The search query
|
828
|
-
field_of_study: Field of study (e.g., "Computer Science", "Medicine")
|
829
|
-
max_results: Maximum number of results (defaults to self.max_results)
|
830
|
-
|
831
|
-
Returns:
|
832
|
-
List of papers in the specified field matching the query
|
833
|
-
"""
|
834
|
-
original_max_results = self.max_results
|
835
|
-
|
836
|
-
try:
|
837
|
-
if max_results:
|
838
|
-
self.max_results = max_results
|
839
|
-
|
840
|
-
# Add field of study to query
|
841
|
-
field_query = f'{query} fieldofstudy:"{field_of_study}"'
|
842
|
-
return self.run(field_query)
|
843
|
-
|
844
|
-
finally:
|
845
|
-
# Restore original value
|
846
|
-
self.max_results = original_max_results
|
847
|
-
|
848
|
-
def get_paper_by_id(self, paper_id: str) -> Dict[str, Any]:
|
849
|
-
"""
|
850
|
-
Get a specific paper by its Semantic Scholar ID.
|
851
|
-
|
852
|
-
Args:
|
853
|
-
paper_id: Semantic Scholar paper ID
|
854
|
-
|
855
|
-
Returns:
|
856
|
-
Dictionary with paper information
|
857
|
-
"""
|
858
|
-
paper_details = self._get_paper_details(paper_id)
|
859
|
-
|
860
|
-
if not paper_details:
|
861
|
-
return {}
|
862
|
-
|
863
|
-
# Format authors
|
864
|
-
authors = []
|
865
|
-
if "authors" in paper_details and paper_details["authors"]:
|
866
|
-
authors = [author.get("name", "") for author in paper_details["authors"]]
|
867
|
-
|
868
|
-
# Create formatted result
|
869
|
-
result = {
|
870
|
-
"id": paper_details.get("paperId", ""),
|
871
|
-
"title": paper_details.get("title", ""),
|
872
|
-
"link": paper_details.get("url", ""),
|
873
|
-
"abstract": paper_details.get("abstract", ""),
|
874
|
-
"authors": authors,
|
875
|
-
"venue": paper_details.get("venue", ""),
|
876
|
-
"year": paper_details.get("year"),
|
877
|
-
"fields_of_study": paper_details.get("fieldsOfStudy", []),
|
878
|
-
"external_ids": paper_details.get("externalIds", {}),
|
879
|
-
"source": "Semantic Scholar",
|
880
|
-
|
881
|
-
# Include TLDR if available
|
882
|
-
"tldr": paper_details.get("tldr", {}).get("text", "") if paper_details.get("tldr") else ""
|
883
|
-
}
|
884
|
-
|
885
|
-
# Add citations and references if requested
|
886
|
-
if self.get_citations and "citations" in paper_details:
|
887
|
-
result["citations"] = paper_details["citations"]
|
888
|
-
|
889
|
-
if self.get_references and "references" in paper_details:
|
890
|
-
result["references"] = paper_details["references"]
|
891
|
-
|
892
|
-
# Add embedding if requested
|
893
|
-
if self.get_embeddings and "embedding" in paper_details:
|
894
|
-
result["embedding"] = paper_details["embedding"]
|
895
|
-
|
896
|
-
return result
|
897
|
-
|
898
|
-
def get_paper_by_doi(self, doi: str) -> Dict[str, Any]:
|
899
|
-
"""
|
900
|
-
Get a paper by its DOI.
|
901
|
-
|
902
|
-
Args:
|
903
|
-
doi: Digital Object Identifier
|
904
|
-
|
905
|
-
Returns:
|
906
|
-
Dictionary with paper information
|
907
|
-
"""
|
908
|
-
try:
|
909
|
-
# The Semantic Scholar API supports DOI lookup
|
910
|
-
url = f"{self.paper_details_url}/DOI:{doi}"
|
911
|
-
fields = [
|
912
|
-
"paperId",
|
913
|
-
"externalIds",
|
914
|
-
"url",
|
915
|
-
"title",
|
916
|
-
"abstract",
|
917
|
-
"venue",
|
918
|
-
"year",
|
919
|
-
"authors",
|
920
|
-
"fieldsOfStudy"
|
921
|
-
]
|
922
|
-
|
923
|
-
if self.get_tldr:
|
924
|
-
fields.append("tldr")
|
925
|
-
|
926
|
-
if self.get_embeddings:
|
927
|
-
fields.append("embedding")
|
928
|
-
|
929
|
-
# Add citation and reference fields if requested
|
930
|
-
if self.get_citations:
|
931
|
-
fields.append(f"citations.limit({self.citation_limit})")
|
932
|
-
|
933
|
-
if self.get_references:
|
934
|
-
fields.append(f"references.limit({self.reference_limit})")
|
935
|
-
|
936
|
-
params = {"fields": ",".join(fields)}
|
937
|
-
paper_details = self._make_request(url, params)
|
938
|
-
|
939
|
-
if not paper_details:
|
940
|
-
return {}
|
941
|
-
|
942
|
-
# Format the paper info the same way as get_paper_by_id
|
943
|
-
# Format authors
|
944
|
-
authors = []
|
945
|
-
if "authors" in paper_details and paper_details["authors"]:
|
946
|
-
authors = [author.get("name", "") for author in paper_details["authors"]]
|
947
|
-
|
948
|
-
# Create formatted result
|
949
|
-
result = {
|
950
|
-
"id": paper_details.get("paperId", ""),
|
951
|
-
"title": paper_details.get("title", ""),
|
952
|
-
"link": paper_details.get("url", ""),
|
953
|
-
"abstract": paper_details.get("abstract", ""),
|
954
|
-
"authors": authors,
|
955
|
-
"venue": paper_details.get("venue", ""),
|
956
|
-
"year": paper_details.get("year"),
|
957
|
-
"fields_of_study": paper_details.get("fieldsOfStudy", []),
|
958
|
-
"external_ids": paper_details.get("externalIds", {}),
|
959
|
-
"source": "Semantic Scholar",
|
960
|
-
|
961
|
-
# Include TLDR if available
|
962
|
-
"tldr": paper_details.get("tldr", {}).get("text", "") if paper_details.get("tldr") else ""
|
963
|
-
}
|
964
|
-
|
965
|
-
# Add citations and references if requested
|
966
|
-
if self.get_citations and "citations" in paper_details:
|
967
|
-
result["citations"] = paper_details["citations"]
|
968
|
-
|
969
|
-
if self.get_references and "references" in paper_details:
|
970
|
-
result["references"] = paper_details["references"]
|
971
|
-
|
972
|
-
# Add embedding if requested
|
973
|
-
if self.get_embeddings and "embedding" in paper_details:
|
974
|
-
result["embedding"] = paper_details["embedding"]
|
975
|
-
|
976
|
-
return result
|
977
|
-
|
978
|
-
except Exception as e:
|
979
|
-
logger.error(f"Error getting paper by DOI {doi}: {e}")
|
980
|
-
return {}
|
981
|
-
|
982
|
-
def get_papers_batch(self, paper_ids: List[str], fields: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
983
|
-
"""
|
984
|
-
Get details for multiple papers in a single batch request.
|
985
|
-
|
986
|
-
Args:
|
987
|
-
paper_ids: List of paper IDs (Semantic Scholar IDs, DOIs, arXiv IDs, etc.)
|
988
|
-
fields: Fields to include in the response
|
989
|
-
|
990
|
-
Returns:
|
991
|
-
List of paper details
|
992
|
-
"""
|
993
|
-
if not paper_ids:
|
994
|
-
return []
|
995
|
-
|
996
|
-
if fields is None:
|
997
|
-
fields = [
|
998
|
-
"paperId",
|
999
|
-
"externalIds",
|
1000
|
-
"url",
|
1001
|
-
"title",
|
1002
|
-
"abstract",
|
1003
|
-
"venue",
|
1004
|
-
"year",
|
1005
|
-
"authors",
|
1006
|
-
"referenceCount",
|
1007
|
-
"citationCount"
|
1008
|
-
]
|
1009
|
-
|
1010
|
-
if self.get_tldr:
|
1011
|
-
fields.append("tldr")
|
1012
|
-
|
1013
|
-
try:
|
1014
|
-
# Construct request params
|
1015
|
-
params = {
|
1016
|
-
"fields": ",".join(fields)
|
1017
|
-
}
|
1018
|
-
|
1019
|
-
# Make POST request with paper IDs in the body
|
1020
|
-
response = self._make_request(
|
1021
|
-
self.paper_batch_url,
|
1022
|
-
params=params,
|
1023
|
-
data={"ids": paper_ids},
|
1024
|
-
method="POST"
|
1025
|
-
)
|
1026
|
-
|
1027
|
-
if isinstance(response, list):
|
1028
|
-
return response
|
1029
|
-
else:
|
1030
|
-
logger.warning("Unexpected response format from batch API")
|
1031
|
-
return []
|
1032
|
-
|
1033
|
-
except Exception as e:
|
1034
|
-
logger.error(f"Error in batch paper lookup: {e}")
|
1035
|
-
return []
|
1036
|
-
|
1037
|
-
def get_paper_recommendations(self,
|
1038
|
-
positive_paper_ids: List[str],
|
1039
|
-
negative_paper_ids: Optional[List[str]] = None,
|
1040
|
-
max_results: Optional[int] = None) -> List[Dict[str, Any]]:
|
1041
|
-
"""
|
1042
|
-
Get recommended papers based on positive and negative examples.
|
1043
|
-
|
1044
|
-
Args:
|
1045
|
-
positive_paper_ids: List of paper IDs to use as positive examples
|
1046
|
-
negative_paper_ids: Optional list of paper IDs to use as negative examples
|
1047
|
-
max_results: Maximum number of recommendations to return
|
1048
|
-
|
1049
|
-
Returns:
|
1050
|
-
List of recommended papers
|
1051
|
-
"""
|
1052
|
-
if not positive_paper_ids:
|
1053
|
-
return []
|
1054
|
-
|
1055
|
-
limit = max_results or self.max_results
|
1056
|
-
|
1057
|
-
try:
|
1058
|
-
# Construct the request payload
|
1059
|
-
payload = {
|
1060
|
-
"positivePaperIds": positive_paper_ids
|
1061
|
-
}
|
1062
|
-
|
1063
|
-
if negative_paper_ids:
|
1064
|
-
payload["negativePaperIds"] = negative_paper_ids
|
1065
|
-
|
1066
|
-
# Define fields to include in the response
|
1067
|
-
fields = [
|
1068
|
-
"paperId",
|
1069
|
-
"externalIds",
|
1070
|
-
"url",
|
1071
|
-
"title",
|
1072
|
-
"abstract",
|
1073
|
-
"venue",
|
1074
|
-
"year",
|
1075
|
-
"authors"
|
1076
|
-
]
|
1077
|
-
|
1078
|
-
if self.get_tldr:
|
1079
|
-
fields.append("tldr")
|
1080
|
-
|
1081
|
-
# Request parameters
|
1082
|
-
params = {
|
1083
|
-
"fields": ",".join(fields),
|
1084
|
-
"limit": limit
|
1085
|
-
}
|
1086
|
-
|
1087
|
-
# Make POST request to recommendations endpoint
|
1088
|
-
response = self._make_request(
|
1089
|
-
self.recommendations_url,
|
1090
|
-
params=params,
|
1091
|
-
data=payload,
|
1092
|
-
method="POST"
|
1093
|
-
)
|
1094
|
-
|
1095
|
-
if "recommendedPapers" not in response:
|
1096
|
-
return []
|
1097
|
-
|
1098
|
-
papers = response["recommendedPapers"]
|
1099
|
-
|
1100
|
-
# Format as standard results
|
1101
|
-
results = []
|
1102
|
-
for paper in papers:
|
1103
|
-
# Format authors
|
1104
|
-
authors = []
|
1105
|
-
if "authors" in paper and paper["authors"]:
|
1106
|
-
authors = [author.get("name", "") for author in paper["authors"]]
|
1107
|
-
|
1108
|
-
result = {
|
1109
|
-
"id": paper.get("paperId", ""),
|
1110
|
-
"title": paper.get("title", ""),
|
1111
|
-
"link": paper.get("url", ""),
|
1112
|
-
"snippet": paper.get("abstract", "")[:250] + "..." if paper.get("abstract", "") and len(paper.get("abstract", "")) > 250 else paper.get("abstract", ""),
|
1113
|
-
"authors": authors,
|
1114
|
-
"venue": paper.get("venue", ""),
|
1115
|
-
"year": paper.get("year"),
|
1116
|
-
"source": "Semantic Scholar",
|
1117
|
-
|
1118
|
-
# Include TLDR if available
|
1119
|
-
"tldr": paper.get("tldr", {}).get("text", "") if paper.get("tldr") else ""
|
1120
|
-
}
|
1121
|
-
|
1122
|
-
results.append(result)
|
1123
|
-
|
1124
|
-
return results
|
1125
|
-
|
1126
|
-
except Exception as e:
|
1127
|
-
logger.error(f"Error getting paper recommendations: {e}")
|
1128
|
-
return []
|
569
|
+
return results
|
@@ -65,13 +65,8 @@ class BaseSearchEngine(ABC):
|
|
65
65
|
filtered_items = self._filter_for_relevance(previews, query)
|
66
66
|
if not filtered_items:
|
67
67
|
logger.info(f"All preview results were filtered out as irrelevant for query: {query}")
|
68
|
-
#
|
69
|
-
|
70
|
-
from local_deep_research import config
|
71
|
-
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
72
|
-
return previews[:self.max_filtered_results or 5] # Return unfiltered but limited results
|
73
|
-
else:
|
74
|
-
filtered_items = previews[:self.max_filtered_results or 5]
|
68
|
+
# Do not fall back to previews, return empty list instead
|
69
|
+
return []
|
75
70
|
|
76
71
|
# Step 3: Get full content for filtered items
|
77
72
|
# Import config inside the method to avoid circular import
|
@@ -166,17 +161,13 @@ Respond with ONLY the JSON array, no other text."""
|
|
166
161
|
|
167
162
|
return ranked_results
|
168
163
|
else:
|
169
|
-
logger.info("Could not find JSON array in response, returning
|
170
|
-
|
171
|
-
return previews[:self.max_filtered_results]
|
172
|
-
return previews
|
164
|
+
logger.info("Could not find JSON array in response, returning no previews")
|
165
|
+
return []
|
173
166
|
|
174
167
|
except Exception as e:
|
175
168
|
logger.info(f"Relevance filtering error: {e}")
|
176
169
|
# Fall back to returning all previews (or top N) on error
|
177
|
-
|
178
|
-
return previews[:self.max_filtered_results]
|
179
|
-
return previews
|
170
|
+
return[]
|
180
171
|
|
181
172
|
@abstractmethod
|
182
173
|
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: local-deep-research
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.16
|
4
4
|
Summary: AI-powered research assistant with deep, iterative analysis using LLMs and web searches
|
5
5
|
Author-email: LearningCircuit <185559241+LearningCircuit@users.noreply.github.com>, HashedViking <6432677+HashedViking@users.noreply.github.com>
|
6
6
|
License: MIT License
|
@@ -4,19 +4,19 @@ local_deep_research/config.py,sha256=lucqOE4KeNm1ynYdcHYWJLE5fJ0QN-1QKZpRcBPsHe8
|
|
4
4
|
local_deep_research/local_collections.py,sha256=SB-fdptT7qS0klJUVx_Rs9OgDwafMUgI46984WlZGKI,6076
|
5
5
|
local_deep_research/main.py,sha256=uQXtGQ6LtZNd5Qw63D5ke4Q_LjYimouWVSUknVsk3JQ,3645
|
6
6
|
local_deep_research/report_generator.py,sha256=UOiSw_vPHgtUpI8L9_UaOlpBVBloPB-ilhAo-1d2B9M,8200
|
7
|
-
local_deep_research/search_system.py,sha256=
|
7
|
+
local_deep_research/search_system.py,sha256=KNSn_8ciEGfSRR0k0ggIzZF6jqNXoYsCnUmdKgtKO0E,15481
|
8
8
|
local_deep_research/defaults/__init__.py,sha256=2Vvlkl-gmP_qPYWegE4JBgummypogl3VXrQ1XzptFDU,1381
|
9
9
|
local_deep_research/defaults/llm_config.py,sha256=88IGWPPvikSKmAqfqsGovBx2Jac5eh2sBY_LIW624Ik,7910
|
10
10
|
local_deep_research/defaults/local_collections.toml,sha256=_edVWVHrhunMfazjejhJlGPRkHKKIP51qQtNkMgNEiA,1406
|
11
11
|
local_deep_research/defaults/main.toml,sha256=l_J9JAPhKEp63IsLBO0hQDVimxogEpnrEVnNjiOeUxg,1403
|
12
|
-
local_deep_research/defaults/search_engines.toml,sha256
|
12
|
+
local_deep_research/defaults/search_engines.toml,sha256=TYkOqVaZq9JPawz4fIPyGdkAtYa4t8F9H50VY-wv2ak,8101
|
13
13
|
local_deep_research/utilties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
local_deep_research/utilties/enums.py,sha256=TVAZiu9szNbdacfb7whgaQJJlSk7oYByADaAierD4CE,229
|
15
15
|
local_deep_research/utilties/llm_utils.py,sha256=IGv-_gJWqLTpO3_op1NHIwxKaFEzmXhhVYSLTTSMnIA,4522
|
16
16
|
local_deep_research/utilties/search_utilities.py,sha256=C8ycFd7blcq5vtnd6GxP8dkepZT6EEqHFtT3WYxF0Ck,4151
|
17
17
|
local_deep_research/utilties/setup_utils.py,sha256=t6GNp7lK1nLPdPNCkYUk82IATGM62vqy8UBD-KqShOs,215
|
18
18
|
local_deep_research/web/__init__.py,sha256=3oHMatNu8r24FBtpojriIVbHYOVSHj4Q-quycMKOuDk,62
|
19
|
-
local_deep_research/web/app.py,sha256=
|
19
|
+
local_deep_research/web/app.py,sha256=5_VLNdhJOqdgacucglUdS_lVURNgYNbXhK9vME6JmzA,72431
|
20
20
|
local_deep_research/web/static/css/styles.css,sha256=mW217FfZNW1pzMtlbuXE2fRBJekeIdIoy4m-yXFirj4,23782
|
21
21
|
local_deep_research/web/static/js/app.js,sha256=GPncdWpw2YNTs56JY-0tjTTr9JnX-fIZSZX0agwKZMU,172813
|
22
22
|
local_deep_research/web/templates/api_keys_config.html,sha256=jA8Y-nfUGJ1dTvbw2jK_8xPy2x6UG_5gHpbrTJAex2g,3527
|
@@ -29,7 +29,7 @@ local_deep_research/web/templates/settings.html,sha256=S9A-tdpzMhP2Zw7kp2jxKlwaW
|
|
29
29
|
local_deep_research/web/templates/settings_dashboard.html,sha256=De-v1KNdVvkXme5i3YZ6sIfU9aAKDc_N-AW9n4PZoso,9109
|
30
30
|
local_deep_research/web_search_engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
31
|
local_deep_research/web_search_engines/full_search.py,sha256=3SSTvD12g4pNlZCSGh8jwsyYWpQglgqjADnq8dG1zyI,9756
|
32
|
-
local_deep_research/web_search_engines/search_engine_base.py,sha256=
|
32
|
+
local_deep_research/web_search_engines/search_engine_base.py,sha256=Knmf45pMYd7hYc9x8jG8gOtMnribsXDcOXGrA50LK3E,8100
|
33
33
|
local_deep_research/web_search_engines/search_engine_factory.py,sha256=B_QaqoAwnVXCmHNdqGbo94LekWY6wpBw_PWNkI120qE,10728
|
34
34
|
local_deep_research/web_search_engines/search_engines_config.py,sha256=bNCuR09NOk5cjnKIgDQfhPipqmvDKeE7WP_6p8LLZf0,1979
|
35
35
|
local_deep_research/web_search_engines/engines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -45,13 +45,13 @@ local_deep_research/web_search_engines/engines/search_engine_local.py,sha256=uAs
|
|
45
45
|
local_deep_research/web_search_engines/engines/search_engine_local_all.py,sha256=CRNcxBzNd9kanyIJYaUDB7qfXYxVCvd4L2mX8jL73v0,5955
|
46
46
|
local_deep_research/web_search_engines/engines/search_engine_pubmed.py,sha256=MayfzM2R0XoI7cpXlG1XJ1ktfTN_6H-Xs9RmD89UAao,39236
|
47
47
|
local_deep_research/web_search_engines/engines/search_engine_searxng.py,sha256=GMy6qDMSaVBtjWRm48XBu6TjLAy1HfcO2EFTwr8S9rk,18048
|
48
|
-
local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=
|
48
|
+
local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py,sha256=6VMymjFJ7pyV2nv5dRfFofXgg0kG82rkwbICVnNDNH4,23352
|
49
49
|
local_deep_research/web_search_engines/engines/search_engine_serpapi.py,sha256=iy-QmT99Tf2cJlfCrPbEhtMB7a_zCKppvlUKi7VBrlE,9118
|
50
50
|
local_deep_research/web_search_engines/engines/search_engine_wayback.py,sha256=astAvSLajDZ6rwgthJ3iBcHSWuDSYPO7uilIxaJhXmU,18132
|
51
51
|
local_deep_research/web_search_engines/engines/search_engine_wikipedia.py,sha256=KSGJECbEcxZpVK-PhYsTCtzedSK0l1AjQmvGtx8KBks,9799
|
52
|
-
local_deep_research-0.1.
|
53
|
-
local_deep_research-0.1.
|
54
|
-
local_deep_research-0.1.
|
55
|
-
local_deep_research-0.1.
|
56
|
-
local_deep_research-0.1.
|
57
|
-
local_deep_research-0.1.
|
52
|
+
local_deep_research-0.1.16.dist-info/licenses/LICENSE,sha256=Qg2CaTdu6SWnSqk1_JtgBPp_Da-LdqJDhT1Vt1MUc5s,1072
|
53
|
+
local_deep_research-0.1.16.dist-info/METADATA,sha256=KHrqDTRQmo_FAt1KFZLmYlbO0eQKfjqvpHFu_kRUd_w,15151
|
54
|
+
local_deep_research-0.1.16.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
55
|
+
local_deep_research-0.1.16.dist-info/entry_points.txt,sha256=u-Y6Z3MWtR3dmsTDFYhXyfkPv7mALUA7YAnY4Fi1XDs,97
|
56
|
+
local_deep_research-0.1.16.dist-info/top_level.txt,sha256=h6-uVE_wSuLOcoWwT9szhX23mBWufu77MqmM25UfbCY,20
|
57
|
+
local_deep_research-0.1.16.dist-info/RECORD,,
|
File without changes
|
{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/entry_points.txt
RENAMED
File without changes
|
{local_deep_research-0.1.14.dist-info → local_deep_research-0.1.16.dist-info}/licenses/LICENSE
RENAMED
File without changes
|
File without changes
|