cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cite_agent/__init__.py +13 -13
  2. cite_agent/__version__.py +1 -1
  3. cite_agent/action_first_mode.py +150 -0
  4. cite_agent/adaptive_providers.py +413 -0
  5. cite_agent/archive_api_client.py +186 -0
  6. cite_agent/auth.py +0 -1
  7. cite_agent/auto_expander.py +70 -0
  8. cite_agent/cache.py +379 -0
  9. cite_agent/circuit_breaker.py +370 -0
  10. cite_agent/citation_network.py +377 -0
  11. cite_agent/cli.py +8 -16
  12. cite_agent/cli_conversational.py +113 -3
  13. cite_agent/confidence_calibration.py +381 -0
  14. cite_agent/deduplication.py +325 -0
  15. cite_agent/enhanced_ai_agent.py +689 -371
  16. cite_agent/error_handler.py +228 -0
  17. cite_agent/execution_safety.py +329 -0
  18. cite_agent/full_paper_reader.py +239 -0
  19. cite_agent/observability.py +398 -0
  20. cite_agent/offline_mode.py +348 -0
  21. cite_agent/paper_comparator.py +368 -0
  22. cite_agent/paper_summarizer.py +420 -0
  23. cite_agent/pdf_extractor.py +350 -0
  24. cite_agent/proactive_boundaries.py +266 -0
  25. cite_agent/quality_gate.py +442 -0
  26. cite_agent/request_queue.py +390 -0
  27. cite_agent/response_enhancer.py +257 -0
  28. cite_agent/response_formatter.py +458 -0
  29. cite_agent/response_pipeline.py +295 -0
  30. cite_agent/response_style_enhancer.py +259 -0
  31. cite_agent/self_healing.py +418 -0
  32. cite_agent/similarity_finder.py +524 -0
  33. cite_agent/streaming_ui.py +13 -9
  34. cite_agent/thinking_blocks.py +308 -0
  35. cite_agent/tool_orchestrator.py +416 -0
  36. cite_agent/trend_analyzer.py +540 -0
  37. cite_agent/unpaywall_client.py +226 -0
  38. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
  39. cite_agent-1.4.3.dist-info/RECORD +62 -0
  40. cite_agent-1.3.9.dist-info/RECORD +0 -32
  41. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
  42. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
  43. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
  44. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,540 @@
1
+ """
2
+ Research Trend Analyzer - Analyze research trends and predict future directions
3
+
4
+ Provides tools for:
5
+ - Topic evolution analysis
6
+ - Emerging topic detection
7
+ - Publication trend visualization
8
+ - Research direction prediction
9
+ """
10
+
11
+ from typing import List, Dict, Any, Optional, Tuple
12
+ from collections import defaultdict, Counter
13
+ from datetime import datetime
14
+ import logging
15
+ import re
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ResearchTrendAnalyzer:
21
+ """Analyze trends in academic research"""
22
+
23
+ def __init__(self, archive_client=None):
24
+ """
25
+ Initialize trend analyzer
26
+
27
+ Args:
28
+ archive_client: ArchiveAPIClient instance for fetching papers
29
+ """
30
+ self.archive_client = archive_client
31
+
32
+ def analyze_topic_evolution(self, topic: str, years: int = 10, granularity: str = "year") -> Dict[str, Any]:
33
+ """
34
+ Analyze how a research topic has evolved over time
35
+
36
+ Args:
37
+ topic: Research topic to analyze
38
+ years: Number of years to look back
39
+ granularity: Time granularity ("year" or "quarter")
40
+
41
+ Returns:
42
+ Evolution data with publication counts, citation trends, key papers
43
+ """
44
+ if not self.archive_client:
45
+ return {"error": "Archive client required"}
46
+
47
+ try:
48
+ current_year = datetime.now().year
49
+ start_year = current_year - years
50
+
51
+ # Fetch papers from each year
52
+ yearly_data = {}
53
+
54
+ for year in range(start_year, current_year + 1):
55
+ papers = self._fetch_papers_for_year(topic, year)
56
+
57
+ yearly_data[year] = {
58
+ 'year': year,
59
+ 'paper_count': len(papers),
60
+ 'total_citations': sum(p.get('citationCount', 0) for p in papers),
61
+ 'avg_citations': sum(p.get('citationCount', 0) for p in papers) / max(len(papers), 1),
62
+ 'top_papers': sorted(
63
+ papers,
64
+ key=lambda x: x.get('citationCount', 0),
65
+ reverse=True
66
+ )[:5],
67
+ 'keywords': self._extract_trending_keywords(papers)
68
+ }
69
+
70
+ # Calculate growth metrics
71
+ growth_rate = self._calculate_growth_rate(yearly_data)
72
+
73
+ # Detect inflection points
74
+ inflection_points = self._detect_inflection_points(yearly_data)
75
+
76
+ # Extract emerging keywords
77
+ emerging_keywords = self._identify_emerging_keywords(yearly_data)
78
+
79
+ return {
80
+ 'topic': topic,
81
+ 'time_range': f'{start_year}-{current_year}',
82
+ 'yearly_data': yearly_data,
83
+ 'growth_rate': growth_rate,
84
+ 'inflection_points': inflection_points,
85
+ 'emerging_keywords': emerging_keywords,
86
+ 'trend': self._classify_trend(growth_rate)
87
+ }
88
+
89
+ except Exception as e:
90
+ logger.error(f"Error analyzing topic evolution: {e}")
91
+ return {"error": str(e)}
92
+
93
+ def emerging_topics(self, field: str, min_papers: int = 20, time_window: int = 2) -> List[Dict[str, Any]]:
94
+ """
95
+ Detect emerging research topics in a field
96
+
97
+ Args:
98
+ field: Research field to analyze
99
+ min_papers: Minimum papers for a topic to be considered
100
+ time_window: Years to look back for "emerging" status
101
+
102
+ Returns:
103
+ List of emerging topics with growth metrics
104
+ """
105
+ if not self.archive_client:
106
+ return []
107
+
108
+ try:
109
+ current_year = datetime.now().year
110
+ recent_years = range(current_year - time_window, current_year + 1)
111
+ older_years = range(current_year - time_window * 2, current_year - time_window)
112
+
113
+ # Fetch papers from both periods
114
+ recent_papers = []
115
+ older_papers = []
116
+
117
+ for year in recent_years:
118
+ papers = self._fetch_papers_for_year(field, year, limit=200)
119
+ recent_papers.extend(papers)
120
+
121
+ for year in older_years:
122
+ papers = self._fetch_papers_for_year(field, year, limit=200)
123
+ older_papers.extend(papers)
124
+
125
+ # Extract keywords/phrases from both periods
126
+ recent_keywords = self._extract_all_keywords(recent_papers)
127
+ older_keywords = self._extract_all_keywords(older_papers)
128
+
129
+ # Find keywords with significant growth
130
+ emerging = []
131
+
132
+ for keyword, recent_count in recent_keywords.items():
133
+ if recent_count < min_papers:
134
+ continue
135
+
136
+ older_count = older_keywords.get(keyword, 0)
137
+
138
+ # Calculate growth
139
+ if older_count == 0:
140
+ growth = float('inf') if recent_count > 0 else 0
141
+ else:
142
+ growth = (recent_count - older_count) / older_count
143
+
144
+ # Filter for significant growth
145
+ if growth > 1.0: # 100% growth
146
+ emerging.append({
147
+ 'topic': keyword,
148
+ 'recent_papers': recent_count,
149
+ 'older_papers': older_count,
150
+ 'growth_rate': round(growth * 100, 1),
151
+ 'status': 'emerging' if older_count < 10 else 'accelerating'
152
+ })
153
+
154
+ # Sort by growth rate
155
+ emerging.sort(key=lambda x: x['growth_rate'], reverse=True)
156
+
157
+ return emerging[:20] # Top 20 emerging topics
158
+
159
+ except Exception as e:
160
+ logger.error(f"Error detecting emerging topics: {e}")
161
+ return []
162
+
163
+ def predict_next_papers(self, topic: str, limit: int = 10) -> List[Dict[str, Any]]:
164
+ """
165
+ Predict/recommend next papers to read based on trends
166
+
167
+ Args:
168
+ topic: Research topic
169
+ limit: Maximum papers to return
170
+
171
+ Returns:
172
+ List of recommended papers sorted by relevance and recency
173
+ """
174
+ if not self.archive_client:
175
+ return []
176
+
177
+ try:
178
+ # Get recent papers (last 2 years)
179
+ current_year = datetime.now().year
180
+ recent_papers = []
181
+
182
+ for year in range(current_year - 1, current_year + 1):
183
+ papers = self._fetch_papers_for_year(topic, year, limit=50)
184
+ recent_papers.extend(papers)
185
+
186
+ # Score papers by multiple factors
187
+ scored_papers = []
188
+
189
+ for paper in recent_papers:
190
+ score = self._calculate_relevance_score(paper)
191
+
192
+ scored_papers.append({
193
+ 'paper': paper,
194
+ 'score': score,
195
+ 'title': paper.get('title'),
196
+ 'authors': [a.get('name') for a in paper.get('authors', [])[:3]],
197
+ 'year': paper.get('year'),
198
+ 'citations': paper.get('citationCount', 0),
199
+ 'reason': self._generate_recommendation_reason(paper, score)
200
+ })
201
+
202
+ # Sort by score
203
+ scored_papers.sort(key=lambda x: x['score'], reverse=True)
204
+
205
+ return scored_papers[:limit]
206
+
207
+ except Exception as e:
208
+ logger.error(f"Error predicting next papers: {e}")
209
+ return []
210
+
211
+ def compare_research_trends(self, topics: List[str], years: int = 10) -> Dict[str, Any]:
212
+ """
213
+ Compare research trends across multiple topics
214
+
215
+ Args:
216
+ topics: List of topics to compare
217
+ years: Number of years to analyze
218
+
219
+ Returns:
220
+ Comparative trend data
221
+ """
222
+ if not self.archive_client:
223
+ return {"error": "Archive client required"}
224
+
225
+ try:
226
+ current_year = datetime.now().year
227
+ start_year = current_year - years
228
+
229
+ comparison = {
230
+ 'topics': topics,
231
+ 'time_range': f'{start_year}-{current_year}',
232
+ 'data': {}
233
+ }
234
+
235
+ # Analyze each topic
236
+ for topic in topics:
237
+ yearly_counts = {}
238
+
239
+ for year in range(start_year, current_year + 1):
240
+ papers = self._fetch_papers_for_year(topic, year, limit=100)
241
+ yearly_counts[year] = len(papers)
242
+
243
+ comparison['data'][topic] = {
244
+ 'yearly_counts': yearly_counts,
245
+ 'total_papers': sum(yearly_counts.values()),
246
+ 'avg_per_year': sum(yearly_counts.values()) / len(yearly_counts),
247
+ 'peak_year': max(yearly_counts, key=yearly_counts.get),
248
+ 'trend': self._classify_trend(self._calculate_simple_growth_rate(yearly_counts))
249
+ }
250
+
251
+ # Determine leader
252
+ leader = max(
253
+ comparison['data'].items(),
254
+ key=lambda x: x[1]['total_papers']
255
+ )[0]
256
+
257
+ comparison['leader'] = leader
258
+ comparison['insights'] = self._generate_comparison_insights(comparison['data'])
259
+
260
+ return comparison
261
+
262
+ except Exception as e:
263
+ logger.error(f"Error comparing trends: {e}")
264
+ return {"error": str(e)}
265
+
266
+ def _fetch_papers_for_year(self, topic: str, year: int, limit: int = 100) -> List[Dict[str, Any]]:
267
+ """Fetch papers for a specific year"""
268
+ if not self.archive_client:
269
+ return []
270
+
271
+ try:
272
+ # Query with year filter
273
+ query = f"{topic} year:{year}"
274
+
275
+ results = self.archive_client.search_papers(
276
+ query=query,
277
+ limit=limit,
278
+ fields=['paperId', 'title', 'authors', 'year', 'citationCount', 'abstract']
279
+ )
280
+
281
+ papers = results.get('data', [])
282
+
283
+ # Filter by year (sometimes API returns adjacent years)
284
+ return [p for p in papers if p.get('year') == year]
285
+
286
+ except Exception as e:
287
+ logger.warning(f"Could not fetch papers for {year}: {e}")
288
+ return []
289
+
290
+ def _extract_trending_keywords(self, papers: List[Dict[str, Any]], top_n: int = 10) -> List[str]:
291
+ """Extract trending keywords from papers"""
292
+ all_words = []
293
+
294
+ for paper in papers:
295
+ # Extract from title and abstract
296
+ text = f"{paper.get('title', '')} {paper.get('abstract', '')}"
297
+ text = text.lower()
298
+
299
+ # Simple keyword extraction (could be enhanced with NLP)
300
+ words = re.findall(r'\b[a-z]{4,}\b', text) # Words 4+ chars
301
+ all_words.extend(words)
302
+
303
+ # Count and filter common words
304
+ stop_words = {'that', 'this', 'with', 'from', 'have', 'been', 'using', 'which', 'their', 'they'}
305
+ word_counts = Counter(w for w in all_words if w not in stop_words)
306
+
307
+ return [word for word, count in word_counts.most_common(top_n)]
308
+
309
+ def _calculate_growth_rate(self, yearly_data: Dict[int, Dict]) -> float:
310
+ """Calculate overall growth rate"""
311
+ years = sorted(yearly_data.keys())
312
+
313
+ if len(years) < 2:
314
+ return 0.0
315
+
316
+ first_year_count = yearly_data[years[0]]['paper_count']
317
+ last_year_count = yearly_data[years[-1]]['paper_count']
318
+
319
+ if first_year_count == 0:
320
+ return float('inf') if last_year_count > 0 else 0.0
321
+
322
+ return (last_year_count - first_year_count) / first_year_count
323
+
324
+ def _calculate_simple_growth_rate(self, yearly_counts: Dict[int, int]) -> float:
325
+ """Calculate simple growth rate from year->count mapping"""
326
+ years = sorted(yearly_counts.keys())
327
+
328
+ if len(years) < 2:
329
+ return 0.0
330
+
331
+ first_count = yearly_counts[years[0]]
332
+ last_count = yearly_counts[years[-1]]
333
+
334
+ if first_count == 0:
335
+ return float('inf') if last_count > 0 else 0.0
336
+
337
+ return (last_count - first_count) / first_count
338
+
339
+ def _detect_inflection_points(self, yearly_data: Dict[int, Dict]) -> List[Dict[str, Any]]:
340
+ """Detect significant inflection points in trend"""
341
+ inflection_points = []
342
+ years = sorted(yearly_data.keys())
343
+
344
+ for i in range(1, len(years) - 1):
345
+ prev_year = years[i - 1]
346
+ curr_year = years[i]
347
+ next_year = years[i + 1]
348
+
349
+ prev_count = yearly_data[prev_year]['paper_count']
350
+ curr_count = yearly_data[curr_year]['paper_count']
351
+ next_count = yearly_data[next_year]['paper_count']
352
+
353
+ # Check for significant change in direction
354
+ if curr_count > prev_count * 1.5 and curr_count > next_count:
355
+ inflection_points.append({
356
+ 'year': curr_year,
357
+ 'type': 'peak',
358
+ 'paper_count': curr_count
359
+ })
360
+ elif curr_count < prev_count * 0.5 and curr_count < next_count:
361
+ inflection_points.append({
362
+ 'year': curr_year,
363
+ 'type': 'trough',
364
+ 'paper_count': curr_count
365
+ })
366
+
367
+ return inflection_points
368
+
369
+ def _identify_emerging_keywords(self, yearly_data: Dict[int, Dict]) -> List[Dict[str, Any]]:
370
+ """Identify keywords that emerged recently"""
371
+ years = sorted(yearly_data.keys())
372
+
373
+ if len(years) < 2:
374
+ return []
375
+
376
+ # Compare recent vs older keywords
377
+ recent_years = years[-3:] if len(years) >= 3 else years[-2:]
378
+ older_years = years[:-3] if len(years) >= 3 else years[:-2]
379
+
380
+ recent_keywords = Counter()
381
+ older_keywords = Counter()
382
+
383
+ for year in recent_years:
384
+ keywords = yearly_data[year]['keywords']
385
+ recent_keywords.update(keywords)
386
+
387
+ for year in older_years:
388
+ keywords = yearly_data[year]['keywords']
389
+ older_keywords.update(keywords)
390
+
391
+ # Find new keywords
392
+ emerging = []
393
+ for keyword, recent_count in recent_keywords.items():
394
+ older_count = older_keywords.get(keyword, 0)
395
+
396
+ if older_count == 0 and recent_count >= 2:
397
+ emerging.append({
398
+ 'keyword': keyword,
399
+ 'recent_mentions': recent_count,
400
+ 'status': 'new'
401
+ })
402
+ elif older_count > 0 and recent_count > older_count * 2:
403
+ emerging.append({
404
+ 'keyword': keyword,
405
+ 'recent_mentions': recent_count,
406
+ 'older_mentions': older_count,
407
+ 'growth': round((recent_count - older_count) / older_count * 100, 1),
408
+ 'status': 'growing'
409
+ })
410
+
411
+ return emerging[:10]
412
+
413
+ def _classify_trend(self, growth_rate: float) -> str:
414
+ """Classify trend based on growth rate"""
415
+ if growth_rate > 1.0:
416
+ return 'exponential_growth'
417
+ elif growth_rate > 0.5:
418
+ return 'strong_growth'
419
+ elif growth_rate > 0.2:
420
+ return 'moderate_growth'
421
+ elif growth_rate > -0.2:
422
+ return 'stable'
423
+ elif growth_rate > -0.5:
424
+ return 'declining'
425
+ else:
426
+ return 'strong_decline'
427
+
428
+ def _extract_all_keywords(self, papers: List[Dict[str, Any]]) -> Counter:
429
+ """Extract and count all keywords from papers"""
430
+ keywords = Counter()
431
+
432
+ for paper in papers:
433
+ text = f"{paper.get('title', '')} {paper.get('abstract', '')}"
434
+ text = text.lower()
435
+
436
+ # Extract bigrams and trigrams (more meaningful than single words)
437
+ words = re.findall(r'\b[a-z]+\b', text)
438
+
439
+ # Bigrams
440
+ for i in range(len(words) - 1):
441
+ bigram = f"{words[i]} {words[i+1]}"
442
+ if len(bigram) > 8: # Filter very short bigrams
443
+ keywords[bigram] += 1
444
+
445
+ # Trigrams
446
+ for i in range(len(words) - 2):
447
+ trigram = f"{words[i]} {words[i+1]} {words[i+2]}"
448
+ if len(trigram) > 12:
449
+ keywords[trigram] += 1
450
+
451
+ return keywords
452
+
453
+ def _calculate_relevance_score(self, paper: Dict[str, Any]) -> float:
454
+ """Calculate relevance score for paper recommendation"""
455
+ score = 0.0
456
+
457
+ # Recency (papers from current year get boost)
458
+ current_year = datetime.now().year
459
+ year = paper.get('year', current_year - 10)
460
+ recency = max(0, 1 - (current_year - year) / 10) # 0-1 score
461
+ score += recency * 30
462
+
463
+ # Citations (normalize to 0-40 range)
464
+ citations = paper.get('citationCount', 0)
465
+ citation_score = min(citations / 100, 1.0) * 40
466
+ score += citation_score
467
+
468
+ # Citation velocity (citations per year)
469
+ age = max(1, current_year - year)
470
+ velocity = citations / age
471
+ velocity_score = min(velocity / 50, 1.0) * 30
472
+ score += velocity_score
473
+
474
+ return round(score, 2)
475
+
476
+ def _generate_recommendation_reason(self, paper: Dict[str, Any], score: float) -> str:
477
+ """Generate human-readable reason for recommendation"""
478
+ current_year = datetime.now().year
479
+ year = paper.get('year', current_year)
480
+ citations = paper.get('citationCount', 0)
481
+
482
+ reasons = []
483
+
484
+ if year >= current_year:
485
+ reasons.append("Very recent")
486
+ elif year >= current_year - 1:
487
+ reasons.append("Recent")
488
+
489
+ if citations > 100:
490
+ reasons.append("Highly cited")
491
+ elif citations > 50:
492
+ reasons.append("Well cited")
493
+
494
+ age = max(1, current_year - year)
495
+ velocity = citations / age
496
+
497
+ if velocity > 50:
498
+ reasons.append("High impact")
499
+
500
+ if not reasons:
501
+ reasons.append("Relevant")
502
+
503
+ return " · ".join(reasons)
504
+
505
+ def _generate_comparison_insights(self, comparison_data: Dict[str, Any]) -> List[str]:
506
+ """Generate insights from comparison data"""
507
+ insights = []
508
+
509
+ # Find fastest growing
510
+ growth_rates = {
511
+ topic: self._calculate_simple_growth_rate(data['yearly_counts'])
512
+ for topic, data in comparison_data.items()
513
+ }
514
+
515
+ fastest = max(growth_rates, key=growth_rates.get)
516
+ insights.append(f"{fastest} shows the fastest growth")
517
+
518
+ # Find most established
519
+ total_papers = {
520
+ topic: data['total_papers']
521
+ for topic, data in comparison_data.items()
522
+ }
523
+
524
+ most_established = max(total_papers, key=total_papers.get)
525
+ insights.append(f"{most_established} has the most publications")
526
+
527
+ return insights
528
+
529
+
530
+ def get_trend_analyzer(archive_client=None) -> ResearchTrendAnalyzer:
531
+ """
532
+ Get ResearchTrendAnalyzer instance
533
+
534
+ Args:
535
+ archive_client: ArchiveAPIClient instance
536
+
537
+ Returns:
538
+ ResearchTrendAnalyzer instance
539
+ """
540
+ return ResearchTrendAnalyzer(archive_client)