cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cite_agent/__init__.py +13 -13
- cite_agent/__version__.py +1 -1
- cite_agent/action_first_mode.py +150 -0
- cite_agent/adaptive_providers.py +413 -0
- cite_agent/archive_api_client.py +186 -0
- cite_agent/auth.py +0 -1
- cite_agent/auto_expander.py +70 -0
- cite_agent/cache.py +379 -0
- cite_agent/circuit_breaker.py +370 -0
- cite_agent/citation_network.py +377 -0
- cite_agent/cli.py +8 -16
- cite_agent/cli_conversational.py +113 -3
- cite_agent/confidence_calibration.py +381 -0
- cite_agent/deduplication.py +325 -0
- cite_agent/enhanced_ai_agent.py +689 -371
- cite_agent/error_handler.py +228 -0
- cite_agent/execution_safety.py +329 -0
- cite_agent/full_paper_reader.py +239 -0
- cite_agent/observability.py +398 -0
- cite_agent/offline_mode.py +348 -0
- cite_agent/paper_comparator.py +368 -0
- cite_agent/paper_summarizer.py +420 -0
- cite_agent/pdf_extractor.py +350 -0
- cite_agent/proactive_boundaries.py +266 -0
- cite_agent/quality_gate.py +442 -0
- cite_agent/request_queue.py +390 -0
- cite_agent/response_enhancer.py +257 -0
- cite_agent/response_formatter.py +458 -0
- cite_agent/response_pipeline.py +295 -0
- cite_agent/response_style_enhancer.py +259 -0
- cite_agent/self_healing.py +418 -0
- cite_agent/similarity_finder.py +524 -0
- cite_agent/streaming_ui.py +13 -9
- cite_agent/thinking_blocks.py +308 -0
- cite_agent/tool_orchestrator.py +416 -0
- cite_agent/trend_analyzer.py +540 -0
- cite_agent/unpaywall_client.py +226 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
- cite_agent-1.4.3.dist-info/RECORD +62 -0
- cite_agent-1.3.9.dist-info/RECORD +0 -32
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Smart Paper Comparison - Compare research papers systematically
|
|
3
|
+
|
|
4
|
+
Provides tools for:
|
|
5
|
+
- Comparing methodologies
|
|
6
|
+
- Comparing results/metrics
|
|
7
|
+
- Finding contradictions
|
|
8
|
+
- Analyzing methodology overlap
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import List, Dict, Any, Optional
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PaperComparator:
|
|
20
|
+
"""Compare papers across multiple dimensions"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, paper_reader=None):
|
|
23
|
+
"""
|
|
24
|
+
Initialize paper comparator
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
paper_reader: FullPaperReader instance for reading PDFs
|
|
28
|
+
"""
|
|
29
|
+
self.paper_reader = paper_reader
|
|
30
|
+
|
|
31
|
+
def compare_methodologies(self, papers: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Compare methodologies across papers
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
papers: List of paper objects with metadata
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Structured comparison of methodologies
|
|
40
|
+
"""
|
|
41
|
+
if len(papers) < 2:
|
|
42
|
+
return {"error": "Need at least 2 papers to compare"}
|
|
43
|
+
|
|
44
|
+
comparison = {
|
|
45
|
+
'papers': [],
|
|
46
|
+
'dimensions': [],
|
|
47
|
+
'comparison_table': []
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Extract methodology from each paper
|
|
51
|
+
for paper in papers:
|
|
52
|
+
paper_info = {
|
|
53
|
+
'id': paper.get('paperId') or paper.get('doi'),
|
|
54
|
+
'title': paper.get('title'),
|
|
55
|
+
'year': paper.get('year'),
|
|
56
|
+
'authors': [a.get('name') for a in paper.get('authors', [])[:3]]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Try to extract methodology
|
|
60
|
+
methodology = self._extract_methodology(paper)
|
|
61
|
+
paper_info['methodology'] = methodology
|
|
62
|
+
|
|
63
|
+
comparison['papers'].append(paper_info)
|
|
64
|
+
|
|
65
|
+
# Identify common dimensions
|
|
66
|
+
dimensions = self._identify_methodology_dimensions(comparison['papers'])
|
|
67
|
+
comparison['dimensions'] = dimensions
|
|
68
|
+
|
|
69
|
+
# Build comparison table
|
|
70
|
+
for dimension in dimensions:
|
|
71
|
+
row = {
|
|
72
|
+
'dimension': dimension,
|
|
73
|
+
'values': []
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for paper_info in comparison['papers']:
|
|
77
|
+
methodology = paper_info['methodology']
|
|
78
|
+
value = self._extract_dimension_value(methodology, dimension)
|
|
79
|
+
row['values'].append({
|
|
80
|
+
'paper': paper_info['title'][:50],
|
|
81
|
+
'value': value
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
comparison['comparison_table'].append(row)
|
|
85
|
+
|
|
86
|
+
return comparison
|
|
87
|
+
|
|
88
|
+
def compare_results(self, papers: List[Dict[str, Any]], metric: Optional[str] = None) -> Dict[str, Any]:
|
|
89
|
+
"""
|
|
90
|
+
Compare numerical results/metrics across papers
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
papers: List of paper objects
|
|
94
|
+
metric: Specific metric to compare (e.g., "accuracy", "F1")
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Comparison of numerical results
|
|
98
|
+
"""
|
|
99
|
+
results = {
|
|
100
|
+
'papers': [],
|
|
101
|
+
'metrics': defaultdict(list)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
for paper in papers:
|
|
105
|
+
paper_id = paper.get('paperId') or paper.get('doi')
|
|
106
|
+
title = paper.get('title', 'Unknown')
|
|
107
|
+
|
|
108
|
+
# Extract all numerical results
|
|
109
|
+
numbers = self._extract_numbers_from_paper(paper)
|
|
110
|
+
|
|
111
|
+
paper_results = {
|
|
112
|
+
'id': paper_id,
|
|
113
|
+
'title': title,
|
|
114
|
+
'year': paper.get('year'),
|
|
115
|
+
'metrics': numbers
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
results['papers'].append(paper_results)
|
|
119
|
+
|
|
120
|
+
# Group by metric type
|
|
121
|
+
for metric_name, value in numbers.items():
|
|
122
|
+
results['metrics'][metric_name].append({
|
|
123
|
+
'paper': title[:50],
|
|
124
|
+
'value': value,
|
|
125
|
+
'year': paper.get('year')
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
# If specific metric requested, filter
|
|
129
|
+
if metric:
|
|
130
|
+
metric_lower = metric.lower()
|
|
131
|
+
filtered_metrics = {
|
|
132
|
+
k: v for k, v in results['metrics'].items()
|
|
133
|
+
if metric_lower in k.lower()
|
|
134
|
+
}
|
|
135
|
+
results['metrics'] = filtered_metrics
|
|
136
|
+
|
|
137
|
+
# Add rankings
|
|
138
|
+
for metric_name, values in results['metrics'].items():
|
|
139
|
+
# Sort by value (higher is better for most metrics)
|
|
140
|
+
sorted_values = sorted(values, key=lambda x: x['value'], reverse=True)
|
|
141
|
+
for i, item in enumerate(sorted_values, 1):
|
|
142
|
+
item['rank'] = i
|
|
143
|
+
|
|
144
|
+
return results
|
|
145
|
+
|
|
146
|
+
def find_contradictions(self, papers: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
147
|
+
"""
|
|
148
|
+
Find contradicting findings across papers
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
papers: List of paper objects
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
List of contradictions found
|
|
155
|
+
"""
|
|
156
|
+
contradictions = []
|
|
157
|
+
|
|
158
|
+
# Extract claims from each paper
|
|
159
|
+
paper_claims = []
|
|
160
|
+
for paper in papers:
|
|
161
|
+
claims = self._extract_claims(paper)
|
|
162
|
+
paper_claims.append({
|
|
163
|
+
'paper': paper.get('title', 'Unknown'),
|
|
164
|
+
'year': paper.get('year'),
|
|
165
|
+
'claims': claims
|
|
166
|
+
})
|
|
167
|
+
|
|
168
|
+
# Compare claims pairwise
|
|
169
|
+
for i in range(len(paper_claims)):
|
|
170
|
+
for j in range(i + 1, len(paper_claims)):
|
|
171
|
+
paper1 = paper_claims[i]
|
|
172
|
+
paper2 = paper_claims[j]
|
|
173
|
+
|
|
174
|
+
# Check for contradictions
|
|
175
|
+
for claim1 in paper1['claims']:
|
|
176
|
+
for claim2 in paper2['claims']:
|
|
177
|
+
if self._are_contradictory(claim1, claim2):
|
|
178
|
+
contradictions.append({
|
|
179
|
+
'paper1': paper1['paper'],
|
|
180
|
+
'year1': paper1['year'],
|
|
181
|
+
'claim1': claim1,
|
|
182
|
+
'paper2': paper2['paper'],
|
|
183
|
+
'year2': paper2['year'],
|
|
184
|
+
'claim2': claim2,
|
|
185
|
+
'confidence': 'medium' # Would need NLP for high confidence
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
return contradictions
|
|
189
|
+
|
|
190
|
+
def methodology_overlap(self, papers: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
191
|
+
"""
|
|
192
|
+
Analyze methodology overlap across papers
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
papers: List of paper objects
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Analysis of common techniques and unique approaches
|
|
199
|
+
"""
|
|
200
|
+
techniques = defaultdict(list)
|
|
201
|
+
|
|
202
|
+
# Extract techniques from each paper
|
|
203
|
+
for paper in papers:
|
|
204
|
+
paper_title = paper.get('title', 'Unknown')
|
|
205
|
+
paper_techniques = self._extract_techniques(paper)
|
|
206
|
+
|
|
207
|
+
for technique in paper_techniques:
|
|
208
|
+
techniques[technique].append(paper_title[:50])
|
|
209
|
+
|
|
210
|
+
# Categorize
|
|
211
|
+
common_techniques = {k: v for k, v in techniques.items() if len(v) >= len(papers) / 2}
|
|
212
|
+
unique_techniques = {k: v for k, v in techniques.items() if len(v) == 1}
|
|
213
|
+
partial_overlap = {k: v for k, v in techniques.items() if 1 < len(v) < len(papers) / 2}
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
'total_papers': len(papers),
|
|
217
|
+
'common_techniques': common_techniques, # Used by most papers
|
|
218
|
+
'unique_techniques': unique_techniques, # Used by only one paper
|
|
219
|
+
'partial_overlap': partial_overlap, # Used by some papers
|
|
220
|
+
'overlap_score': len(common_techniques) / max(len(techniques), 1)
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
def _extract_methodology(self, paper: Dict[str, Any]) -> Dict[str, Any]:
|
|
224
|
+
"""Extract methodology information from paper"""
|
|
225
|
+
methodology = {
|
|
226
|
+
'dataset': None,
|
|
227
|
+
'model': None,
|
|
228
|
+
'baseline': None,
|
|
229
|
+
'evaluation': None
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
# Try abstract first
|
|
233
|
+
abstract = paper.get('abstract', '')
|
|
234
|
+
|
|
235
|
+
# Look for common methodology keywords
|
|
236
|
+
if 'dataset' in abstract.lower():
|
|
237
|
+
dataset_match = re.search(r'(\w+\s+dataset|\w+\s+corpus)', abstract, re.IGNORECASE)
|
|
238
|
+
if dataset_match:
|
|
239
|
+
methodology['dataset'] = dataset_match.group(0)
|
|
240
|
+
|
|
241
|
+
if 'model' in abstract.lower() or 'architecture' in abstract.lower():
|
|
242
|
+
model_keywords = ['transformer', 'bert', 'gpt', 'lstm', 'cnn', 'neural network']
|
|
243
|
+
for keyword in model_keywords:
|
|
244
|
+
if keyword.lower() in abstract.lower():
|
|
245
|
+
methodology['model'] = keyword
|
|
246
|
+
break
|
|
247
|
+
|
|
248
|
+
if 'baseline' in abstract.lower():
|
|
249
|
+
methodology['baseline'] = 'Yes (mentioned)'
|
|
250
|
+
|
|
251
|
+
# Evaluation metrics
|
|
252
|
+
metrics = ['accuracy', 'f1', 'precision', 'recall', 'bleu', 'rouge', 'perplexity']
|
|
253
|
+
found_metrics = [m for m in metrics if m.lower() in abstract.lower()]
|
|
254
|
+
if found_metrics:
|
|
255
|
+
methodology['evaluation'] = ', '.join(found_metrics)
|
|
256
|
+
|
|
257
|
+
return methodology
|
|
258
|
+
|
|
259
|
+
def _identify_methodology_dimensions(self, papers: List[Dict[str, Any]]) -> List[str]:
|
|
260
|
+
"""Identify common methodology dimensions across papers"""
|
|
261
|
+
dimensions = set()
|
|
262
|
+
|
|
263
|
+
for paper in papers:
|
|
264
|
+
methodology = paper.get('methodology', {})
|
|
265
|
+
dimensions.update(methodology.keys())
|
|
266
|
+
|
|
267
|
+
return sorted(list(dimensions))
|
|
268
|
+
|
|
269
|
+
def _extract_dimension_value(self, methodology: Dict[str, Any], dimension: str) -> str:
|
|
270
|
+
"""Extract value for a specific methodology dimension"""
|
|
271
|
+
value = methodology.get(dimension)
|
|
272
|
+
return str(value) if value else 'Not mentioned'
|
|
273
|
+
|
|
274
|
+
def _extract_numbers_from_paper(self, paper: Dict[str, Any]) -> Dict[str, float]:
|
|
275
|
+
"""Extract numerical results from paper"""
|
|
276
|
+
numbers = {}
|
|
277
|
+
abstract = paper.get('abstract', '')
|
|
278
|
+
|
|
279
|
+
# Common metric patterns
|
|
280
|
+
patterns = {
|
|
281
|
+
'accuracy': r'accuracy[:\s]+(\d+\.?\d*)%?',
|
|
282
|
+
'f1_score': r'f1[:\s]+(\d+\.?\d*)',
|
|
283
|
+
'precision': r'precision[:\s]+(\d+\.?\d*)%?',
|
|
284
|
+
'recall': r'recall[:\s]+(\d+\.?\d*)%?',
|
|
285
|
+
'bleu': r'bleu[:\s]+(\d+\.?\d*)',
|
|
286
|
+
'rouge': r'rouge[:\s]+(\d+\.?\d*)',
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
for metric, pattern in patterns.items():
|
|
290
|
+
match = re.search(pattern, abstract, re.IGNORECASE)
|
|
291
|
+
if match:
|
|
292
|
+
try:
|
|
293
|
+
numbers[metric] = float(match.group(1))
|
|
294
|
+
except ValueError:
|
|
295
|
+
pass
|
|
296
|
+
|
|
297
|
+
return numbers
|
|
298
|
+
|
|
299
|
+
def _extract_claims(self, paper: Dict[str, Any]) -> List[str]:
|
|
300
|
+
"""Extract key claims from paper"""
|
|
301
|
+
claims = []
|
|
302
|
+
abstract = paper.get('abstract', '')
|
|
303
|
+
|
|
304
|
+
# Simple heuristic: sentences with strong verbs
|
|
305
|
+
strong_verbs = ['show', 'demonstrate', 'prove', 'achieve', 'outperform', 'improve']
|
|
306
|
+
|
|
307
|
+
sentences = abstract.split('.')
|
|
308
|
+
for sentence in sentences:
|
|
309
|
+
if any(verb in sentence.lower() for verb in strong_verbs):
|
|
310
|
+
claims.append(sentence.strip())
|
|
311
|
+
|
|
312
|
+
return claims[:5] # Top 5 claims
|
|
313
|
+
|
|
314
|
+
def _are_contradictory(self, claim1: str, claim2: str) -> bool:
|
|
315
|
+
"""Check if two claims contradict each other (simple heuristic)"""
|
|
316
|
+
# Simple keyword-based contradiction detection
|
|
317
|
+
contradictory_pairs = [
|
|
318
|
+
('outperform', 'underperform'),
|
|
319
|
+
('better', 'worse'),
|
|
320
|
+
('increase', 'decrease'),
|
|
321
|
+
('improve', 'degrade'),
|
|
322
|
+
('superior', 'inferior')
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
claim1_lower = claim1.lower()
|
|
326
|
+
claim2_lower = claim2.lower()
|
|
327
|
+
|
|
328
|
+
for word1, word2 in contradictory_pairs:
|
|
329
|
+
if word1 in claim1_lower and word2 in claim2_lower:
|
|
330
|
+
return True
|
|
331
|
+
if word2 in claim1_lower and word1 in claim2_lower:
|
|
332
|
+
return True
|
|
333
|
+
|
|
334
|
+
return False
|
|
335
|
+
|
|
336
|
+
def _extract_techniques(self, paper: Dict[str, Any]) -> List[str]:
|
|
337
|
+
"""Extract methodological techniques from paper"""
|
|
338
|
+
techniques = []
|
|
339
|
+
abstract = paper.get('abstract', '').lower()
|
|
340
|
+
|
|
341
|
+
# Common ML/NLP techniques
|
|
342
|
+
technique_keywords = [
|
|
343
|
+
'transformer', 'attention', 'bert', 'gpt', 'lstm', 'rnn', 'cnn',
|
|
344
|
+
'fine-tuning', 'pre-training', 'transfer learning',
|
|
345
|
+
'neural network', 'deep learning', 'reinforcement learning',
|
|
346
|
+
'supervised', 'unsupervised', 'semi-supervised',
|
|
347
|
+
'embedding', 'representation learning',
|
|
348
|
+
'data augmentation', 'regularization', 'dropout'
|
|
349
|
+
]
|
|
350
|
+
|
|
351
|
+
for technique in technique_keywords:
|
|
352
|
+
if technique in abstract:
|
|
353
|
+
techniques.append(technique.title())
|
|
354
|
+
|
|
355
|
+
return techniques
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def get_paper_comparator(paper_reader=None) -> PaperComparator:
|
|
359
|
+
"""
|
|
360
|
+
Get PaperComparator instance
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
paper_reader: FullPaperReader instance
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
PaperComparator instance
|
|
367
|
+
"""
|
|
368
|
+
return PaperComparator(paper_reader)
|