cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cite_agent/__init__.py +13 -13
  2. cite_agent/__version__.py +1 -1
  3. cite_agent/action_first_mode.py +150 -0
  4. cite_agent/adaptive_providers.py +413 -0
  5. cite_agent/archive_api_client.py +186 -0
  6. cite_agent/auth.py +0 -1
  7. cite_agent/auto_expander.py +70 -0
  8. cite_agent/cache.py +379 -0
  9. cite_agent/circuit_breaker.py +370 -0
  10. cite_agent/citation_network.py +377 -0
  11. cite_agent/cli.py +8 -16
  12. cite_agent/cli_conversational.py +113 -3
  13. cite_agent/confidence_calibration.py +381 -0
  14. cite_agent/deduplication.py +325 -0
  15. cite_agent/enhanced_ai_agent.py +689 -371
  16. cite_agent/error_handler.py +228 -0
  17. cite_agent/execution_safety.py +329 -0
  18. cite_agent/full_paper_reader.py +239 -0
  19. cite_agent/observability.py +398 -0
  20. cite_agent/offline_mode.py +348 -0
  21. cite_agent/paper_comparator.py +368 -0
  22. cite_agent/paper_summarizer.py +420 -0
  23. cite_agent/pdf_extractor.py +350 -0
  24. cite_agent/proactive_boundaries.py +266 -0
  25. cite_agent/quality_gate.py +442 -0
  26. cite_agent/request_queue.py +390 -0
  27. cite_agent/response_enhancer.py +257 -0
  28. cite_agent/response_formatter.py +458 -0
  29. cite_agent/response_pipeline.py +295 -0
  30. cite_agent/response_style_enhancer.py +259 -0
  31. cite_agent/self_healing.py +418 -0
  32. cite_agent/similarity_finder.py +524 -0
  33. cite_agent/streaming_ui.py +13 -9
  34. cite_agent/thinking_blocks.py +308 -0
  35. cite_agent/tool_orchestrator.py +416 -0
  36. cite_agent/trend_analyzer.py +540 -0
  37. cite_agent/unpaywall_client.py +226 -0
  38. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
  39. cite_agent-1.4.3.dist-info/RECORD +62 -0
  40. cite_agent-1.3.9.dist-info/RECORD +0 -32
  41. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
  42. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
  43. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
  44. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,368 @@
1
+ """
2
+ Smart Paper Comparison - Compare research papers systematically
3
+
4
+ Provides tools for:
5
+ - Comparing methodologies
6
+ - Comparing results/metrics
7
+ - Finding contradictions
8
+ - Analyzing methodology overlap
9
+ """
10
+
11
+ from typing import List, Dict, Any, Optional
12
+ import logging
13
+ import re
14
+ from collections import defaultdict
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class PaperComparator:
20
+ """Compare papers across multiple dimensions"""
21
+
22
+ def __init__(self, paper_reader=None):
23
+ """
24
+ Initialize paper comparator
25
+
26
+ Args:
27
+ paper_reader: FullPaperReader instance for reading PDFs
28
+ """
29
+ self.paper_reader = paper_reader
30
+
31
+ def compare_methodologies(self, papers: List[Dict[str, Any]]) -> Dict[str, Any]:
32
+ """
33
+ Compare methodologies across papers
34
+
35
+ Args:
36
+ papers: List of paper objects with metadata
37
+
38
+ Returns:
39
+ Structured comparison of methodologies
40
+ """
41
+ if len(papers) < 2:
42
+ return {"error": "Need at least 2 papers to compare"}
43
+
44
+ comparison = {
45
+ 'papers': [],
46
+ 'dimensions': [],
47
+ 'comparison_table': []
48
+ }
49
+
50
+ # Extract methodology from each paper
51
+ for paper in papers:
52
+ paper_info = {
53
+ 'id': paper.get('paperId') or paper.get('doi'),
54
+ 'title': paper.get('title'),
55
+ 'year': paper.get('year'),
56
+ 'authors': [a.get('name') for a in paper.get('authors', [])[:3]]
57
+ }
58
+
59
+ # Try to extract methodology
60
+ methodology = self._extract_methodology(paper)
61
+ paper_info['methodology'] = methodology
62
+
63
+ comparison['papers'].append(paper_info)
64
+
65
+ # Identify common dimensions
66
+ dimensions = self._identify_methodology_dimensions(comparison['papers'])
67
+ comparison['dimensions'] = dimensions
68
+
69
+ # Build comparison table
70
+ for dimension in dimensions:
71
+ row = {
72
+ 'dimension': dimension,
73
+ 'values': []
74
+ }
75
+
76
+ for paper_info in comparison['papers']:
77
+ methodology = paper_info['methodology']
78
+ value = self._extract_dimension_value(methodology, dimension)
79
+ row['values'].append({
80
+ 'paper': paper_info['title'][:50],
81
+ 'value': value
82
+ })
83
+
84
+ comparison['comparison_table'].append(row)
85
+
86
+ return comparison
87
+
88
+ def compare_results(self, papers: List[Dict[str, Any]], metric: Optional[str] = None) -> Dict[str, Any]:
89
+ """
90
+ Compare numerical results/metrics across papers
91
+
92
+ Args:
93
+ papers: List of paper objects
94
+ metric: Specific metric to compare (e.g., "accuracy", "F1")
95
+
96
+ Returns:
97
+ Comparison of numerical results
98
+ """
99
+ results = {
100
+ 'papers': [],
101
+ 'metrics': defaultdict(list)
102
+ }
103
+
104
+ for paper in papers:
105
+ paper_id = paper.get('paperId') or paper.get('doi')
106
+ title = paper.get('title', 'Unknown')
107
+
108
+ # Extract all numerical results
109
+ numbers = self._extract_numbers_from_paper(paper)
110
+
111
+ paper_results = {
112
+ 'id': paper_id,
113
+ 'title': title,
114
+ 'year': paper.get('year'),
115
+ 'metrics': numbers
116
+ }
117
+
118
+ results['papers'].append(paper_results)
119
+
120
+ # Group by metric type
121
+ for metric_name, value in numbers.items():
122
+ results['metrics'][metric_name].append({
123
+ 'paper': title[:50],
124
+ 'value': value,
125
+ 'year': paper.get('year')
126
+ })
127
+
128
+ # If specific metric requested, filter
129
+ if metric:
130
+ metric_lower = metric.lower()
131
+ filtered_metrics = {
132
+ k: v for k, v in results['metrics'].items()
133
+ if metric_lower in k.lower()
134
+ }
135
+ results['metrics'] = filtered_metrics
136
+
137
+ # Add rankings
138
+ for metric_name, values in results['metrics'].items():
139
+ # Sort by value (higher is better for most metrics)
140
+ sorted_values = sorted(values, key=lambda x: x['value'], reverse=True)
141
+ for i, item in enumerate(sorted_values, 1):
142
+ item['rank'] = i
143
+
144
+ return results
145
+
146
+ def find_contradictions(self, papers: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
147
+ """
148
+ Find contradicting findings across papers
149
+
150
+ Args:
151
+ papers: List of paper objects
152
+
153
+ Returns:
154
+ List of contradictions found
155
+ """
156
+ contradictions = []
157
+
158
+ # Extract claims from each paper
159
+ paper_claims = []
160
+ for paper in papers:
161
+ claims = self._extract_claims(paper)
162
+ paper_claims.append({
163
+ 'paper': paper.get('title', 'Unknown'),
164
+ 'year': paper.get('year'),
165
+ 'claims': claims
166
+ })
167
+
168
+ # Compare claims pairwise
169
+ for i in range(len(paper_claims)):
170
+ for j in range(i + 1, len(paper_claims)):
171
+ paper1 = paper_claims[i]
172
+ paper2 = paper_claims[j]
173
+
174
+ # Check for contradictions
175
+ for claim1 in paper1['claims']:
176
+ for claim2 in paper2['claims']:
177
+ if self._are_contradictory(claim1, claim2):
178
+ contradictions.append({
179
+ 'paper1': paper1['paper'],
180
+ 'year1': paper1['year'],
181
+ 'claim1': claim1,
182
+ 'paper2': paper2['paper'],
183
+ 'year2': paper2['year'],
184
+ 'claim2': claim2,
185
+ 'confidence': 'medium' # Would need NLP for high confidence
186
+ })
187
+
188
+ return contradictions
189
+
190
+ def methodology_overlap(self, papers: List[Dict[str, Any]]) -> Dict[str, Any]:
191
+ """
192
+ Analyze methodology overlap across papers
193
+
194
+ Args:
195
+ papers: List of paper objects
196
+
197
+ Returns:
198
+ Analysis of common techniques and unique approaches
199
+ """
200
+ techniques = defaultdict(list)
201
+
202
+ # Extract techniques from each paper
203
+ for paper in papers:
204
+ paper_title = paper.get('title', 'Unknown')
205
+ paper_techniques = self._extract_techniques(paper)
206
+
207
+ for technique in paper_techniques:
208
+ techniques[technique].append(paper_title[:50])
209
+
210
+ # Categorize
211
+ common_techniques = {k: v for k, v in techniques.items() if len(v) >= len(papers) / 2}
212
+ unique_techniques = {k: v for k, v in techniques.items() if len(v) == 1}
213
+ partial_overlap = {k: v for k, v in techniques.items() if 1 < len(v) < len(papers) / 2}
214
+
215
+ return {
216
+ 'total_papers': len(papers),
217
+ 'common_techniques': common_techniques, # Used by most papers
218
+ 'unique_techniques': unique_techniques, # Used by only one paper
219
+ 'partial_overlap': partial_overlap, # Used by some papers
220
+ 'overlap_score': len(common_techniques) / max(len(techniques), 1)
221
+ }
222
+
223
+ def _extract_methodology(self, paper: Dict[str, Any]) -> Dict[str, Any]:
224
+ """Extract methodology information from paper"""
225
+ methodology = {
226
+ 'dataset': None,
227
+ 'model': None,
228
+ 'baseline': None,
229
+ 'evaluation': None
230
+ }
231
+
232
+ # Try abstract first
233
+ abstract = paper.get('abstract', '')
234
+
235
+ # Look for common methodology keywords
236
+ if 'dataset' in abstract.lower():
237
+ dataset_match = re.search(r'(\w+\s+dataset|\w+\s+corpus)', abstract, re.IGNORECASE)
238
+ if dataset_match:
239
+ methodology['dataset'] = dataset_match.group(0)
240
+
241
+ if 'model' in abstract.lower() or 'architecture' in abstract.lower():
242
+ model_keywords = ['transformer', 'bert', 'gpt', 'lstm', 'cnn', 'neural network']
243
+ for keyword in model_keywords:
244
+ if keyword.lower() in abstract.lower():
245
+ methodology['model'] = keyword
246
+ break
247
+
248
+ if 'baseline' in abstract.lower():
249
+ methodology['baseline'] = 'Yes (mentioned)'
250
+
251
+ # Evaluation metrics
252
+ metrics = ['accuracy', 'f1', 'precision', 'recall', 'bleu', 'rouge', 'perplexity']
253
+ found_metrics = [m for m in metrics if m.lower() in abstract.lower()]
254
+ if found_metrics:
255
+ methodology['evaluation'] = ', '.join(found_metrics)
256
+
257
+ return methodology
258
+
259
+ def _identify_methodology_dimensions(self, papers: List[Dict[str, Any]]) -> List[str]:
260
+ """Identify common methodology dimensions across papers"""
261
+ dimensions = set()
262
+
263
+ for paper in papers:
264
+ methodology = paper.get('methodology', {})
265
+ dimensions.update(methodology.keys())
266
+
267
+ return sorted(list(dimensions))
268
+
269
+ def _extract_dimension_value(self, methodology: Dict[str, Any], dimension: str) -> str:
270
+ """Extract value for a specific methodology dimension"""
271
+ value = methodology.get(dimension)
272
+ return str(value) if value else 'Not mentioned'
273
+
274
+ def _extract_numbers_from_paper(self, paper: Dict[str, Any]) -> Dict[str, float]:
275
+ """Extract numerical results from paper"""
276
+ numbers = {}
277
+ abstract = paper.get('abstract', '')
278
+
279
+ # Common metric patterns
280
+ patterns = {
281
+ 'accuracy': r'accuracy[:\s]+(\d+\.?\d*)%?',
282
+ 'f1_score': r'f1[:\s]+(\d+\.?\d*)',
283
+ 'precision': r'precision[:\s]+(\d+\.?\d*)%?',
284
+ 'recall': r'recall[:\s]+(\d+\.?\d*)%?',
285
+ 'bleu': r'bleu[:\s]+(\d+\.?\d*)',
286
+ 'rouge': r'rouge[:\s]+(\d+\.?\d*)',
287
+ }
288
+
289
+ for metric, pattern in patterns.items():
290
+ match = re.search(pattern, abstract, re.IGNORECASE)
291
+ if match:
292
+ try:
293
+ numbers[metric] = float(match.group(1))
294
+ except ValueError:
295
+ pass
296
+
297
+ return numbers
298
+
299
+ def _extract_claims(self, paper: Dict[str, Any]) -> List[str]:
300
+ """Extract key claims from paper"""
301
+ claims = []
302
+ abstract = paper.get('abstract', '')
303
+
304
+ # Simple heuristic: sentences with strong verbs
305
+ strong_verbs = ['show', 'demonstrate', 'prove', 'achieve', 'outperform', 'improve']
306
+
307
+ sentences = abstract.split('.')
308
+ for sentence in sentences:
309
+ if any(verb in sentence.lower() for verb in strong_verbs):
310
+ claims.append(sentence.strip())
311
+
312
+ return claims[:5] # Top 5 claims
313
+
314
+ def _are_contradictory(self, claim1: str, claim2: str) -> bool:
315
+ """Check if two claims contradict each other (simple heuristic)"""
316
+ # Simple keyword-based contradiction detection
317
+ contradictory_pairs = [
318
+ ('outperform', 'underperform'),
319
+ ('better', 'worse'),
320
+ ('increase', 'decrease'),
321
+ ('improve', 'degrade'),
322
+ ('superior', 'inferior')
323
+ ]
324
+
325
+ claim1_lower = claim1.lower()
326
+ claim2_lower = claim2.lower()
327
+
328
+ for word1, word2 in contradictory_pairs:
329
+ if word1 in claim1_lower and word2 in claim2_lower:
330
+ return True
331
+ if word2 in claim1_lower and word1 in claim2_lower:
332
+ return True
333
+
334
+ return False
335
+
336
+ def _extract_techniques(self, paper: Dict[str, Any]) -> List[str]:
337
+ """Extract methodological techniques from paper"""
338
+ techniques = []
339
+ abstract = paper.get('abstract', '').lower()
340
+
341
+ # Common ML/NLP techniques
342
+ technique_keywords = [
343
+ 'transformer', 'attention', 'bert', 'gpt', 'lstm', 'rnn', 'cnn',
344
+ 'fine-tuning', 'pre-training', 'transfer learning',
345
+ 'neural network', 'deep learning', 'reinforcement learning',
346
+ 'supervised', 'unsupervised', 'semi-supervised',
347
+ 'embedding', 'representation learning',
348
+ 'data augmentation', 'regularization', 'dropout'
349
+ ]
350
+
351
+ for technique in technique_keywords:
352
+ if technique in abstract:
353
+ techniques.append(technique.title())
354
+
355
+ return techniques
356
+
357
+
358
+ def get_paper_comparator(paper_reader=None) -> PaperComparator:
359
+ """
360
+ Get PaperComparator instance
361
+
362
+ Args:
363
+ paper_reader: FullPaperReader instance
364
+
365
+ Returns:
366
+ PaperComparator instance
367
+ """
368
+ return PaperComparator(paper_reader)