cite-agent 1.3.5__py3-none-any.whl → 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cite-agent might be problematic. Click here for more details.

Files changed (37) hide show
  1. cite_agent/__version__.py +1 -1
  2. cite_agent/cli.py +22 -2
  3. cite_agent/enhanced_ai_agent.py +407 -82
  4. cite_agent/project_detector.py +148 -0
  5. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/METADATA +1 -1
  6. cite_agent-1.3.7.dist-info/RECORD +31 -0
  7. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/top_level.txt +0 -1
  8. cite_agent-1.3.5.dist-info/RECORD +0 -56
  9. src/__init__.py +0 -1
  10. src/services/__init__.py +0 -132
  11. src/services/auth_service/__init__.py +0 -3
  12. src/services/auth_service/auth_manager.py +0 -33
  13. src/services/graph/__init__.py +0 -1
  14. src/services/graph/knowledge_graph.py +0 -194
  15. src/services/llm_service/__init__.py +0 -5
  16. src/services/llm_service/llm_manager.py +0 -495
  17. src/services/paper_service/__init__.py +0 -5
  18. src/services/paper_service/openalex.py +0 -231
  19. src/services/performance_service/__init__.py +0 -1
  20. src/services/performance_service/rust_performance.py +0 -395
  21. src/services/research_service/__init__.py +0 -23
  22. src/services/research_service/chatbot.py +0 -2056
  23. src/services/research_service/citation_manager.py +0 -436
  24. src/services/research_service/context_manager.py +0 -1441
  25. src/services/research_service/conversation_manager.py +0 -597
  26. src/services/research_service/critical_paper_detector.py +0 -577
  27. src/services/research_service/enhanced_research.py +0 -121
  28. src/services/research_service/enhanced_synthesizer.py +0 -375
  29. src/services/research_service/query_generator.py +0 -777
  30. src/services/research_service/synthesizer.py +0 -1273
  31. src/services/search_service/__init__.py +0 -5
  32. src/services/search_service/indexer.py +0 -186
  33. src/services/search_service/search_engine.py +0 -342
  34. src/services/simple_enhanced_main.py +0 -287
  35. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/WHEEL +0 -0
  36. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/entry_points.txt +0 -0
  37. {cite_agent-1.3.5.dist-info → cite_agent-1.3.7.dist-info}/licenses/LICENSE +0 -0
@@ -1,577 +0,0 @@
1
- # src/services/research_service/critical_paper_detector.py
2
-
3
- import logging
4
- import re
5
- import math
6
- from typing import List, Dict, Any, Set, Optional
7
- from collections import Counter
8
- import networkx as nx # type: ignore[import]
9
- from datetime import datetime, timezone
10
-
11
- # Configure structured logging
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- def _utc_timestamp() -> str:
16
- return datetime.now(timezone.utc).isoformat()
17
-
18
- class CriticalPaperDetector:
19
- """
20
- Enhanced critical paper detector with comprehensive error handling, security, and observability.
21
-
22
- Features:
23
- - Secure paper analysis and scoring
24
- - Input validation and sanitization
25
- - Comprehensive error handling and fallback logic
26
- - Structured logging and monitoring
27
- - Protection against injection attacks
28
- - Multi-factor paper importance scoring
29
- """
30
-
31
- def __init__(self, db_operations=None):
32
- """
33
- Initialize detector with enhanced security and error handling.
34
-
35
- Args:
36
- db_operations: Optional database operations instance
37
-
38
- Raises:
39
- ValueError: If initialization fails
40
- """
41
- try:
42
- #logger.info("Initializing CriticalPaperDetector with enhanced security")
43
-
44
- self.db = db_operations
45
-
46
- # Define importance indicators with enhanced coverage
47
- self.method_terms = {
48
- "novel", "methodology", "approach", "framework", "technique",
49
- "algorithm", "protocol", "procedure", "process", "method",
50
- "implementation", "design", "architecture", "strategy",
51
- "paradigm", "model", "system", "mechanism", "solution"
52
- }
53
-
54
- self.result_terms = {
55
- "significant", "breakthrough", "discovery", "finding",
56
- "evidence", "proves", "demonstrates", "shows", "reveals",
57
- "establishes", "confirms", "validates", "supports", "indicates",
58
- "suggests", "implies", "concludes", "determines", "identifies"
59
- }
60
-
61
- self.contradiction_terms = {
62
- "contrary", "opposed", "conflict", "contradiction", "inconsistent",
63
- "challenge", "dispute", "unlike", "differs", "contrast",
64
- "disagreement", "debate", "controversy", "question", "doubt",
65
- "skepticism", "criticism", "limitation", "weakness", "flaw"
66
- }
67
-
68
- #logger.info("CriticalPaperDetector initialized successfully")
69
-
70
- except Exception as e:
71
- logger.error(f"Failed to initialize CriticalPaperDetector: {str(e)}")
72
- raise
73
-
74
- def _validate_papers(self, papers: List[Dict[str, Any]]) -> None:
75
- """
76
- Validate papers list for security and safety.
77
-
78
- Args:
79
- papers: Papers list to validate
80
-
81
- Raises:
82
- ValueError: If papers list is invalid
83
- """
84
- if not isinstance(papers, list):
85
- raise ValueError("Papers must be a list")
86
-
87
- if len(papers) > 1000: # Reasonable limit
88
- raise ValueError("Too many papers (max 1000)")
89
-
90
- for i, paper in enumerate(papers):
91
- if not isinstance(paper, dict):
92
- raise ValueError(f"Paper at index {i} must be a dictionary")
93
-
94
- # Validate required fields
95
- if "id" not in paper:
96
- raise ValueError(f"Paper at index {i} missing required 'id' field")
97
-
98
- paper_id = str(paper["id"])
99
- if len(paper_id) > 100:
100
- raise ValueError(f"Paper ID at index {i} too long (max 100 characters)")
101
-
102
- # Check for potentially dangerous content in text fields
103
- text_fields = ["title", "abstract", "summary"]
104
- for field in text_fields:
105
- if field in paper and paper[field]:
106
- content = str(paper[field])
107
- if len(content) > 10000: # Reasonable limit
108
- raise ValueError(f"Paper {field} at index {i} too long (max 10000 characters)")
109
-
110
- def _validate_threshold(self, threshold_percentage: int) -> None:
111
- """
112
- Validate threshold percentage.
113
-
114
- Args:
115
- threshold_percentage: Threshold percentage to validate
116
-
117
- Raises:
118
- ValueError: If threshold is invalid
119
- """
120
- if not isinstance(threshold_percentage, int):
121
- raise ValueError("Threshold percentage must be an integer")
122
-
123
- if threshold_percentage < 1 or threshold_percentage > 50:
124
- raise ValueError("Threshold percentage must be between 1 and 50")
125
-
126
- def _sanitize_text(self, text: str, max_length: int = 10000) -> str:
127
- """
128
- Sanitize text to prevent injection attacks.
129
-
130
- Args:
131
- text: Text to sanitize
132
- max_length: Maximum allowed length
133
-
134
- Returns:
135
- Sanitized text
136
- """
137
- if not isinstance(text, str):
138
- return ""
139
-
140
- if len(text) > max_length:
141
- text = text[:max_length]
142
-
143
- # Basic XSS protection
144
- sanitized = text.replace('<', '&lt;').replace('>', '&gt;')
145
-
146
- # Remove null bytes and other control characters
147
- sanitized = ''.join(char for char in sanitized if ord(char) >= 32 or char in '\n\r\t')
148
-
149
- return sanitized.strip()
150
-
151
- async def identify_critical_papers(self, papers: List[Dict[str, Any]],
152
- threshold_percentage: int = 20) -> List[Dict[str, Any]]:
153
- """
154
- Identify critical papers with enhanced error handling and security.
155
-
156
- Args:
157
- papers: List of paper dictionaries with metadata
158
- threshold_percentage: Percentage of papers to mark as critical
159
-
160
- Returns:
161
- List of critical papers with scores
162
-
163
- Raises:
164
- ValueError: If inputs are invalid
165
- """
166
- try:
167
- # Input validation
168
- self._validate_papers(papers)
169
- self._validate_threshold(threshold_percentage)
170
-
171
- if not papers:
172
- #logger.info("No papers provided for critical analysis")
173
- return []
174
-
175
- #logger.info(f"Analyzing {len(papers)} papers for critical importance (threshold: {threshold_percentage}%)")
176
-
177
- # Calculate scores for all papers with error handling
178
- paper_scores = {}
179
- for i, paper in enumerate(papers):
180
- try:
181
- score = self._calculate_paper_score(paper)
182
- paper_scores[paper["id"]] = score
183
- except Exception as e:
184
- logger.warning(f"Error calculating score for paper {i}: {str(e)}")
185
- paper_scores[paper["id"]] = 0.0 # Default score
186
-
187
- # Determine threshold based on percentage
188
- num_critical = max(1, int(len(papers) * threshold_percentage / 100))
189
-
190
- # Get top scoring papers
191
- top_papers = sorted(
192
- [(paper_id, score) for paper_id, score in paper_scores.items()],
193
- key=lambda x: x[1],
194
- reverse=True
195
- )[:num_critical]
196
-
197
- # Format results with error handling
198
- results = []
199
- for paper_id, score in top_papers:
200
- try:
201
- paper_data = next((p for p in papers if p["id"] == paper_id), None)
202
- if paper_data:
203
- results.append({
204
- "paper_id": paper_id,
205
- "title": self._sanitize_text(paper_data.get("title", "Unknown"), max_length=200),
206
- "score": round(score, 2),
207
- "factors": self._get_factor_breakdown(paper_data, score),
208
- "analyzed_at": _utc_timestamp()
209
- })
210
- except Exception as e:
211
- logger.warning(f"Error formatting result for paper {paper_id}: {str(e)}")
212
- continue
213
-
214
- #logger.info(f"Successfully identified {len(results)} critical papers")
215
- return results
216
-
217
- except ValueError as e:
218
- logger.error(f"Invalid input for critical paper identification: {str(e)}")
219
- raise
220
- except Exception as e:
221
- logger.error(f"Error identifying critical papers: {str(e)}")
222
- return []
223
-
224
- def _calculate_paper_score(self, paper: Dict[str, Any]) -> float:
225
- """
226
- Calculate importance score for a paper with enhanced error handling.
227
-
228
- Args:
229
- paper: Paper dictionary with metadata
230
-
231
- Returns:
232
- Numerical score (higher = more important)
233
- """
234
- try:
235
- score = 0.0
236
-
237
- # Factor 1: Citation impact (if available)
238
- try:
239
- citation_score = self._calculate_citation_score(paper)
240
- score += citation_score * 0.25 # 25% weight
241
- except Exception as e:
242
- logger.warning(f"Error calculating citation score: {str(e)}")
243
- score += 0.0
244
-
245
- # Factor 2: Recency
246
- try:
247
- recency_score = self._calculate_recency_score(paper)
248
- score += recency_score * 0.15 # 15% weight
249
- except Exception as e:
250
- logger.warning(f"Error calculating recency score: {str(e)}")
251
- score += 5.0 # Default middle score
252
-
253
- # Factor 3: Title and abstract significance
254
- try:
255
- significance_score = self._calculate_significance_score(paper)
256
- score += significance_score * 0.20 # 20% weight
257
- except Exception as e:
258
- logger.warning(f"Error calculating significance score: {str(e)}")
259
- score += 0.0
260
-
261
- # Factor 4: Methodology novelty
262
- try:
263
- methodology_score = self._calculate_methodology_score(paper)
264
- score += methodology_score * 0.20 # 20% weight
265
- except Exception as e:
266
- logger.warning(f"Error calculating methodology score: {str(e)}")
267
- score += 0.0
268
-
269
- # Factor 5: Contradiction potential
270
- try:
271
- contradiction_score = self._calculate_contradiction_score(paper)
272
- score += contradiction_score * 0.20 # 20% weight
273
- except Exception as e:
274
- logger.warning(f"Error calculating contradiction score: {str(e)}")
275
- score += 0.0
276
-
277
- return max(0.0, min(10.0, score)) # Ensure score is between 0 and 10
278
-
279
- except Exception as e:
280
- logger.error(f"Error calculating paper score: {str(e)}")
281
- return 0.0
282
-
283
- def _calculate_citation_score(self, paper: Dict[str, Any]) -> float:
284
- """
285
- Calculate score based on citation count with enhanced error handling.
286
-
287
- Args:
288
- paper: Paper dictionary
289
-
290
- Returns:
291
- Citation score
292
- """
293
- try:
294
- citation_count = paper.get("citation_count", 0)
295
-
296
- # Validate citation count
297
- if not isinstance(citation_count, (int, float)):
298
- return 0.0
299
-
300
- citation_count = max(0, int(citation_count))
301
-
302
- # Log-scale to prevent extremely cited papers from dominating
303
- if citation_count > 0:
304
- return min(10.0, 2.0 * math.log10(citation_count + 1))
305
- return 0.0
306
-
307
- except Exception as e:
308
- logger.warning(f"Error calculating citation score: {str(e)}")
309
- return 0.0
310
-
311
- def _calculate_recency_score(self, paper: Dict[str, Any]) -> float:
312
- """
313
- Calculate score based on paper recency with enhanced error handling.
314
-
315
- Args:
316
- paper: Paper dictionary
317
-
318
- Returns:
319
- Recency score
320
- """
321
- try:
322
- year = paper.get("year")
323
-
324
- # Try to extract year from various fields
325
- if not year:
326
- # Try published_date
327
- if paper.get("published_date"):
328
- year_match = re.search(r'20\d\d', str(paper.get("published_date", "")))
329
- if year_match:
330
- year = int(year_match.group(0))
331
-
332
- # Try publication_date
333
- if not year and paper.get("publication_date"):
334
- year_match = re.search(r'20\d\d', str(paper.get("publication_date", "")))
335
- if year_match:
336
- year = int(year_match.group(0))
337
-
338
- if not year:
339
- return 5.0 # Middle score if unknown
340
-
341
- # Convert to int if it's a string
342
- if isinstance(year, str):
343
- try:
344
- year = int(year)
345
- except ValueError:
346
- return 5.0
347
-
348
- # Validate year range
349
- current_year = datetime.now().year
350
- if year < 1900 or year > current_year + 1:
351
- return 5.0 # Invalid year, use middle score
352
-
353
- # Scoring by recency
354
- years_old = current_year - year
355
-
356
- if years_old <= 1:
357
- return 10.0 # Very recent (0-1 years)
358
- elif years_old <= 3:
359
- return 8.0 # Recent (1-3 years)
360
- elif years_old <= 5:
361
- return 6.0 # Somewhat recent (3-5 years)
362
- elif years_old <= 10:
363
- return 4.0 # Older but still relevant (5-10 years)
364
- else:
365
- return 2.0 # Much older (>10 years)
366
-
367
- except Exception as e:
368
- logger.warning(f"Error calculating recency score: {str(e)}")
369
- return 5.0 # Default middle score
370
-
371
- def _calculate_significance_score(self, paper: Dict[str, Any]) -> float:
372
- """
373
- Calculate score based on title and abstract significance indicators with enhanced error handling.
374
-
375
- Args:
376
- paper: Paper dictionary
377
-
378
- Returns:
379
- Significance score
380
- """
381
- try:
382
- title = self._sanitize_text(paper.get("title", ""), max_length=1000).lower()
383
- abstract = self._sanitize_text(paper.get("abstract", ""), max_length=5000).lower()
384
-
385
- combined_text = title + " " + abstract
386
-
387
- # Check for significant result terms
388
- result_count = sum(1 for term in self.result_terms if term in combined_text)
389
-
390
- # Score based on significance indicators
391
- score = min(10.0, result_count * 2.0)
392
-
393
- return score
394
-
395
- except Exception as e:
396
- logger.warning(f"Error calculating significance score: {str(e)}")
397
- return 0.0
398
-
399
- def _calculate_methodology_score(self, paper: Dict[str, Any]) -> float:
400
- """
401
- Calculate score based on methodology innovation indicators with enhanced error handling.
402
-
403
- Args:
404
- paper: Paper dictionary
405
-
406
- Returns:
407
- Methodology score
408
- """
409
- try:
410
- title = self._sanitize_text(paper.get("title", ""), max_length=1000).lower()
411
- abstract = self._sanitize_text(paper.get("abstract", ""), max_length=5000).lower()
412
-
413
- combined_text = title + " " + abstract
414
-
415
- # Check for methodology terms
416
- method_count = sum(1 for term in self.method_terms if term in combined_text)
417
-
418
- # Score based on methodology indicators
419
- score = min(10.0, method_count * 2.0)
420
-
421
- return score
422
-
423
- except Exception as e:
424
- logger.warning(f"Error calculating methodology score: {str(e)}")
425
- return 0.0
426
-
427
- def _calculate_contradiction_score(self, paper: Dict[str, Any]) -> float:
428
- """
429
- Calculate score based on contradiction/challenge indicators with enhanced error handling.
430
-
431
- Args:
432
- paper: Paper dictionary
433
-
434
- Returns:
435
- Contradiction score
436
- """
437
- try:
438
- title = self._sanitize_text(paper.get("title", ""), max_length=1000).lower()
439
- abstract = self._sanitize_text(paper.get("abstract", ""), max_length=5000).lower()
440
-
441
- combined_text = title + " " + abstract
442
-
443
- # Check for contradiction terms
444
- contradiction_count = sum(1 for term in self.contradiction_terms if term in combined_text)
445
-
446
- # Score based on contradiction indicators
447
- score = min(10.0, contradiction_count * 2.0)
448
-
449
- return score
450
-
451
- except Exception as e:
452
- logger.warning(f"Error calculating contradiction score: {str(e)}")
453
- return 0.0
454
-
455
- def _get_factor_breakdown(self, paper: Dict[str, Any], total_score: float) -> Dict[str, Any]:
456
- """
457
- Get detailed breakdown of scoring factors with enhanced error handling.
458
-
459
- Args:
460
- paper: Paper dictionary
461
- total_score: Total calculated score
462
-
463
- Returns:
464
- Factor breakdown dictionary
465
- """
466
- try:
467
- factors = {}
468
-
469
- # Calculate individual factor scores
470
- try:
471
- factors["citation_impact"] = round(self._calculate_citation_score(paper) * 0.25, 2)
472
- except Exception:
473
- factors["citation_impact"] = 0.0
474
-
475
- try:
476
- factors["recency"] = round(self._calculate_recency_score(paper) * 0.15, 2)
477
- except Exception:
478
- factors["recency"] = 0.0
479
-
480
- try:
481
- factors["significance"] = round(self._calculate_significance_score(paper) * 0.20, 2)
482
- except Exception:
483
- factors["significance"] = 0.0
484
-
485
- try:
486
- factors["methodology"] = round(self._calculate_methodology_score(paper) * 0.20, 2)
487
- except Exception:
488
- factors["methodology"] = 0.0
489
-
490
- try:
491
- factors["contradiction_potential"] = round(self._calculate_contradiction_score(paper) * 0.20, 2)
492
- except Exception:
493
- factors["contradiction_potential"] = 0.0
494
-
495
- # Add metadata
496
- factors["total_score"] = round(total_score, 2)
497
- factors["calculated_at"] = _utc_timestamp()
498
-
499
- return factors
500
-
501
- except Exception as e:
502
- logger.warning(f"Error getting factor breakdown: {str(e)}")
503
- return {
504
- "total_score": round(total_score, 2),
505
- "error": "Factor breakdown unavailable"
506
- }
507
-
508
- async def health_check(self) -> Dict[str, Any]:
509
- """
510
- Perform health check of the critical paper detector.
511
-
512
- Returns:
513
- Health status
514
- """
515
- try:
516
- health_status = {
517
- "status": "healthy",
518
- "timestamp": _utc_timestamp(),
519
- "components": {}
520
- }
521
-
522
- # Check term sets
523
- try:
524
- term_counts = {
525
- "method_terms": len(self.method_terms),
526
- "result_terms": len(self.result_terms),
527
- "contradiction_terms": len(self.contradiction_terms)
528
- }
529
- health_status["components"]["term_sets"] = {
530
- "status": "healthy",
531
- "counts": term_counts
532
- }
533
- except Exception as e:
534
- health_status["components"]["term_sets"] = {"status": "error", "error": str(e)}
535
- health_status["status"] = "degraded"
536
-
537
- # Check database operations if available
538
- if self.db:
539
- try:
540
- health_status["components"]["database"] = {"status": "available"}
541
- except Exception as e:
542
- health_status["components"]["database"] = {"status": "error", "error": str(e)}
543
- health_status["status"] = "degraded"
544
- else:
545
- health_status["components"]["database"] = {"status": "not_configured"}
546
-
547
- #logger.info(f"Health check completed: {health_status['status']}")
548
- return health_status
549
-
550
- except Exception as e:
551
- logger.error(f"Health check failed: {str(e)}")
552
- return {
553
- "status": "error",
554
- "error": str(e),
555
- "timestamp": _utc_timestamp()
556
- }
557
-
558
- def get_stats(self) -> Dict[str, Any]:
559
- """
560
- Get statistics about the detector.
561
-
562
- Returns:
563
- Statistics dictionary
564
- """
565
- try:
566
- stats = {
567
- "method_terms_count": len(self.method_terms),
568
- "result_terms_count": len(self.result_terms),
569
- "contradiction_terms_count": len(self.contradiction_terms),
570
- "database_configured": self.db is not None
571
- }
572
-
573
- return stats
574
-
575
- except Exception as e:
576
- logger.error(f"Error getting stats: {str(e)}")
577
- return {"error": str(e)}
@@ -1,121 +0,0 @@
1
- """High-level orchestration service combining search and synthesis."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- from typing import Any, Dict, List, Optional
7
-
8
- from src.services.llm_service import LLMManager
9
- from src.services.paper_service import OpenAlexClient
10
- from src.services.performance_service.rust_performance import HighPerformanceService
11
- from src.services.research_service.enhanced_synthesizer import EnhancedSynthesizer
12
- from src.services.search_service import SearchEngine
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
-
17
- class EnhancedResearchService:
18
- """Bundle search + synthesis into a cohesive workflow."""
19
-
20
- def __init__(
21
- self,
22
- *,
23
- search_engine: Optional[SearchEngine] = None,
24
- synthesizer: Optional[EnhancedSynthesizer] = None,
25
- llm_manager: Optional[LLMManager] = None,
26
- openalex_client: Optional[OpenAlexClient] = None,
27
- performance_service: Optional[HighPerformanceService] = None,
28
- ) -> None:
29
- self.openalex = openalex_client or OpenAlexClient()
30
- self.llm = llm_manager or LLMManager()
31
- self.performance = performance_service or HighPerformanceService()
32
- self.search_engine = search_engine or SearchEngine(
33
- openalex_client=self.openalex,
34
- performance_service=self.performance,
35
- )
36
- self.synthesizer = synthesizer or EnhancedSynthesizer(
37
- llm_manager=self.llm,
38
- openalex_client=self.openalex,
39
- performance_service=self.performance,
40
- )
41
-
42
- async def conduct_research(
43
- self,
44
- query: str,
45
- *,
46
- limit: int = 10,
47
- max_words: int = 600,
48
- style: str = "comprehensive",
49
- include_advanced: bool = True,
50
- context: Optional[Dict[str, Any]] = None,
51
- ) -> Dict[str, Any]:
52
- if not query or not query.strip():
53
- raise ValueError("Query must be a non-empty string")
54
-
55
- context = context or {}
56
- context.setdefault("original_query", query)
57
-
58
- search_payload = await self.search_engine.search_papers(
59
- query,
60
- limit=limit,
61
- sources=("openalex", "pubmed") if include_advanced else ("openalex",),
62
- include_metadata=True,
63
- include_abstracts=True,
64
- )
65
-
66
- paper_ids = [paper["id"] for paper in search_payload.get("papers", [])]
67
- raw_papers: List[Dict[str, Any]] = []
68
- if paper_ids:
69
- raw_papers = await self.openalex.get_papers_bulk(paper_ids)
70
-
71
- if not raw_papers:
72
- # Fall back to lightly formatted payloads if bulk fetch fails
73
- raw_papers = [self._paper_stub(paper) for paper in search_payload.get("papers", [])]
74
-
75
- synthesis = await self.synthesizer.synthesize_research(
76
- papers=raw_papers,
77
- max_words=max_words,
78
- style=style,
79
- context=context,
80
- include_visualizations=True,
81
- include_topic_modeling=True,
82
- include_quality_assessment=True,
83
- )
84
-
85
- return {
86
- "query": query,
87
- "search": search_payload,
88
- "synthesis": synthesis,
89
- }
90
-
91
- async def get_health_status(self) -> Dict[str, Any]:
92
- search_stats = {
93
- "openalex": True,
94
- "web_search": True,
95
- }
96
- try:
97
- kg_stats = await self.synthesizer.kg.stats()
98
- except Exception as exc: # pragma: no cover - KG optional
99
- logger.info("Knowledge graph stats unavailable", extra={"error": str(exc)})
100
- kg_stats = {"entities": 0, "relationships": 0}
101
-
102
- llm_health = await self.llm.health_check()
103
- return {
104
- "search": search_stats,
105
- "knowledge_graph": kg_stats,
106
- "llm": llm_health,
107
- }
108
-
109
- def _paper_stub(self, payload: Dict[str, Any]) -> Dict[str, Any]:
110
- return {
111
- "id": payload.get("id"),
112
- "title": payload.get("title"),
113
- "abstract": payload.get("abstract", ""),
114
- "authors": payload.get("authors", []),
115
- "publication_year": payload.get("year"),
116
- "doi": payload.get("doi"),
117
- "concepts": [{"display_name": kw} for kw in payload.get("keywords", [])],
118
- }
119
-
120
-
121
- __all__ = ["EnhancedResearchService"]