cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cite_agent/__init__.py +13 -13
- cite_agent/__version__.py +1 -1
- cite_agent/action_first_mode.py +150 -0
- cite_agent/adaptive_providers.py +413 -0
- cite_agent/archive_api_client.py +186 -0
- cite_agent/auth.py +0 -1
- cite_agent/auto_expander.py +70 -0
- cite_agent/cache.py +379 -0
- cite_agent/circuit_breaker.py +370 -0
- cite_agent/citation_network.py +377 -0
- cite_agent/cli.py +8 -16
- cite_agent/cli_conversational.py +113 -3
- cite_agent/confidence_calibration.py +381 -0
- cite_agent/deduplication.py +325 -0
- cite_agent/enhanced_ai_agent.py +689 -371
- cite_agent/error_handler.py +228 -0
- cite_agent/execution_safety.py +329 -0
- cite_agent/full_paper_reader.py +239 -0
- cite_agent/observability.py +398 -0
- cite_agent/offline_mode.py +348 -0
- cite_agent/paper_comparator.py +368 -0
- cite_agent/paper_summarizer.py +420 -0
- cite_agent/pdf_extractor.py +350 -0
- cite_agent/proactive_boundaries.py +266 -0
- cite_agent/quality_gate.py +442 -0
- cite_agent/request_queue.py +390 -0
- cite_agent/response_enhancer.py +257 -0
- cite_agent/response_formatter.py +458 -0
- cite_agent/response_pipeline.py +295 -0
- cite_agent/response_style_enhancer.py +259 -0
- cite_agent/self_healing.py +418 -0
- cite_agent/similarity_finder.py +524 -0
- cite_agent/streaming_ui.py +13 -9
- cite_agent/thinking_blocks.py +308 -0
- cite_agent/tool_orchestrator.py +416 -0
- cite_agent/trend_analyzer.py +540 -0
- cite_agent/unpaywall_client.py +226 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
- cite_agent-1.4.3.dist-info/RECORD +62 -0
- cite_agent-1.3.9.dist-info/RECORD +0 -32
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
- {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Similarity Finder - Discover similar papers and researchers
|
|
3
|
+
|
|
4
|
+
Provides tools for:
|
|
5
|
+
- Finding similar papers
|
|
6
|
+
- Discovering related researchers
|
|
7
|
+
- Institution rankings
|
|
8
|
+
- Collaboration network analysis
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import List, Dict, Any, Optional, Set
|
|
12
|
+
from collections import defaultdict, Counter
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SimilarityFinder:
|
|
19
|
+
"""Find similar papers and researchers"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, archive_client=None):
|
|
22
|
+
"""
|
|
23
|
+
Initialize similarity finder
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
archive_client: ArchiveAPIClient instance
|
|
27
|
+
"""
|
|
28
|
+
self.archive_client = archive_client
|
|
29
|
+
|
|
30
|
+
def find_similar_papers(self, paper_id: str, limit: int = 10, method: str = "citations") -> List[Dict[str, Any]]:
|
|
31
|
+
"""
|
|
32
|
+
Find papers similar to a given paper
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
paper_id: Paper ID (DOI, arXiv, or Semantic Scholar ID)
|
|
36
|
+
limit: Maximum papers to return
|
|
37
|
+
method: Similarity method ("citations", "keywords", or "authors")
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of similar papers with similarity scores
|
|
41
|
+
"""
|
|
42
|
+
if not self.archive_client:
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
# Get base paper
|
|
47
|
+
base_paper = self.archive_client.get_paper(
|
|
48
|
+
paper_id,
|
|
49
|
+
fields=['paperId', 'title', 'authors', 'year', 'citationCount', 'abstract', 'references']
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if not base_paper:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
similar = []
|
|
56
|
+
|
|
57
|
+
if method == "citations":
|
|
58
|
+
similar = self._find_similar_by_citations(base_paper, limit * 2)
|
|
59
|
+
elif method == "keywords":
|
|
60
|
+
similar = self._find_similar_by_keywords(base_paper, limit * 2)
|
|
61
|
+
elif method == "authors":
|
|
62
|
+
similar = self._find_similar_by_authors(base_paper, limit * 2)
|
|
63
|
+
else:
|
|
64
|
+
# Hybrid approach
|
|
65
|
+
citation_similar = self._find_similar_by_citations(base_paper, limit)
|
|
66
|
+
keyword_similar = self._find_similar_by_keywords(base_paper, limit)
|
|
67
|
+
similar = self._merge_similarity_results([citation_similar, keyword_similar])
|
|
68
|
+
|
|
69
|
+
# Score and rank
|
|
70
|
+
scored = []
|
|
71
|
+
for paper in similar:
|
|
72
|
+
score = self._calculate_similarity_score(base_paper, paper, method)
|
|
73
|
+
scored.append({
|
|
74
|
+
**paper,
|
|
75
|
+
'similarity_score': score,
|
|
76
|
+
'similarity_reasons': self._explain_similarity(base_paper, paper)
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
# Sort by score and deduplicate
|
|
80
|
+
scored.sort(key=lambda x: x['similarity_score'], reverse=True)
|
|
81
|
+
|
|
82
|
+
# Remove base paper if present
|
|
83
|
+
base_id = base_paper.get('paperId')
|
|
84
|
+
scored = [p for p in scored if p.get('paperId') != base_id]
|
|
85
|
+
|
|
86
|
+
return scored[:limit]
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.error(f"Error finding similar papers: {e}")
|
|
90
|
+
return []
|
|
91
|
+
|
|
92
|
+
def find_similar_researchers(self, topic: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
93
|
+
"""
|
|
94
|
+
Find researchers working on a topic
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
topic: Research topic
|
|
98
|
+
limit: Maximum researchers to return
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List of researchers with publication counts and metrics
|
|
102
|
+
"""
|
|
103
|
+
if not self.archive_client:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
# Search for papers on topic
|
|
108
|
+
results = self.archive_client.search_papers(
|
|
109
|
+
query=topic,
|
|
110
|
+
limit=100,
|
|
111
|
+
fields=['authors', 'year', 'citationCount']
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
papers = results.get('data', [])
|
|
115
|
+
|
|
116
|
+
# Aggregate by author
|
|
117
|
+
author_stats = defaultdict(lambda: {
|
|
118
|
+
'papers': [],
|
|
119
|
+
'total_citations': 0,
|
|
120
|
+
'years': set(),
|
|
121
|
+
'paper_count': 0
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
for paper in papers:
|
|
125
|
+
citations = paper.get('citationCount', 0)
|
|
126
|
+
year = paper.get('year')
|
|
127
|
+
|
|
128
|
+
for author in paper.get('authors', []):
|
|
129
|
+
author_id = author.get('authorId')
|
|
130
|
+
author_name = author.get('name')
|
|
131
|
+
|
|
132
|
+
if not author_name:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
key = author_id or author_name
|
|
136
|
+
|
|
137
|
+
author_stats[key]['name'] = author_name
|
|
138
|
+
author_stats[key]['author_id'] = author_id
|
|
139
|
+
author_stats[key]['papers'].append(paper)
|
|
140
|
+
author_stats[key]['total_citations'] += citations
|
|
141
|
+
if year:
|
|
142
|
+
author_stats[key]['years'].add(year)
|
|
143
|
+
author_stats[key]['paper_count'] += 1
|
|
144
|
+
|
|
145
|
+
# Calculate metrics and rank
|
|
146
|
+
researchers = []
|
|
147
|
+
|
|
148
|
+
for key, stats in author_stats.items():
|
|
149
|
+
if stats['paper_count'] < 2: # Filter out one-time authors
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
years = sorted(list(stats['years']))
|
|
153
|
+
active_years = max(years) - min(years) + 1 if len(years) > 1 else 1
|
|
154
|
+
|
|
155
|
+
h_index = self._calculate_h_index([p.get('citationCount', 0) for p in stats['papers']])
|
|
156
|
+
|
|
157
|
+
researchers.append({
|
|
158
|
+
'name': stats['name'],
|
|
159
|
+
'author_id': stats['author_id'],
|
|
160
|
+
'paper_count': stats['paper_count'],
|
|
161
|
+
'total_citations': stats['total_citations'],
|
|
162
|
+
'avg_citations': round(stats['total_citations'] / stats['paper_count'], 1),
|
|
163
|
+
'h_index': h_index,
|
|
164
|
+
'active_years': active_years,
|
|
165
|
+
'first_year': min(years) if years else None,
|
|
166
|
+
'latest_year': max(years) if years else None,
|
|
167
|
+
'productivity': round(stats['paper_count'] / active_years, 2),
|
|
168
|
+
'relevance_score': self._calculate_researcher_relevance(stats)
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
# Sort by relevance
|
|
172
|
+
researchers.sort(key=lambda x: x['relevance_score'], reverse=True)
|
|
173
|
+
|
|
174
|
+
return researchers[:limit]
|
|
175
|
+
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.error(f"Error finding similar researchers: {e}")
|
|
178
|
+
return []
|
|
179
|
+
|
|
180
|
+
def find_collaborators(self, author_name: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
181
|
+
"""
|
|
182
|
+
Find an author's collaborators
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
author_name: Author name
|
|
186
|
+
limit: Maximum collaborators to return
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
List of co-authors with collaboration metrics
|
|
190
|
+
"""
|
|
191
|
+
if not self.archive_client:
|
|
192
|
+
return []
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
# Search for author's papers
|
|
196
|
+
results = self.archive_client.search_papers(
|
|
197
|
+
query=f'author:"{author_name}"',
|
|
198
|
+
limit=100,
|
|
199
|
+
fields=['authors', 'year', 'citationCount']
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
papers = results.get('data', [])
|
|
203
|
+
|
|
204
|
+
# Count co-authorships
|
|
205
|
+
collaborator_stats = defaultdict(lambda: {
|
|
206
|
+
'papers': [],
|
|
207
|
+
'years': set(),
|
|
208
|
+
'total_citations': 0
|
|
209
|
+
})
|
|
210
|
+
|
|
211
|
+
for paper in papers:
|
|
212
|
+
citations = paper.get('citationCount', 0)
|
|
213
|
+
year = paper.get('year')
|
|
214
|
+
|
|
215
|
+
for author in paper.get('authors', []):
|
|
216
|
+
name = author.get('name', '')
|
|
217
|
+
|
|
218
|
+
# Skip the author themselves
|
|
219
|
+
if name.lower() == author_name.lower():
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
collaborator_stats[name]['papers'].append(paper)
|
|
223
|
+
collaborator_stats[name]['total_citations'] += citations
|
|
224
|
+
if year:
|
|
225
|
+
collaborator_stats[name]['years'].add(year)
|
|
226
|
+
|
|
227
|
+
# Format results
|
|
228
|
+
collaborators = []
|
|
229
|
+
|
|
230
|
+
for name, stats in collaborator_stats.items():
|
|
231
|
+
years = sorted(list(stats['years']))
|
|
232
|
+
|
|
233
|
+
collaborators.append({
|
|
234
|
+
'name': name,
|
|
235
|
+
'joint_papers': len(stats['papers']),
|
|
236
|
+
'total_citations': stats['total_citations'],
|
|
237
|
+
'avg_citations': round(stats['total_citations'] / len(stats['papers']), 1),
|
|
238
|
+
'first_collaboration': min(years) if years else None,
|
|
239
|
+
'latest_collaboration': max(years) if years else None,
|
|
240
|
+
'collaboration_span': max(years) - min(years) + 1 if len(years) > 1 else 1
|
|
241
|
+
})
|
|
242
|
+
|
|
243
|
+
# Sort by number of joint papers
|
|
244
|
+
collaborators.sort(key=lambda x: x['joint_papers'], reverse=True)
|
|
245
|
+
|
|
246
|
+
return collaborators[:limit]
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
logger.error(f"Error finding collaborators: {e}")
|
|
250
|
+
return []
|
|
251
|
+
|
|
252
|
+
def institution_rankings(self, topic: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
253
|
+
"""
|
|
254
|
+
Rank institutions by research output in a topic
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
topic: Research topic
|
|
258
|
+
limit: Maximum institutions to return
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
List of institutions with publication metrics
|
|
262
|
+
"""
|
|
263
|
+
if not self.archive_client:
|
|
264
|
+
return []
|
|
265
|
+
|
|
266
|
+
try:
|
|
267
|
+
# Search for papers
|
|
268
|
+
results = self.archive_client.search_papers(
|
|
269
|
+
query=topic,
|
|
270
|
+
limit=200,
|
|
271
|
+
fields=['authors', 'year', 'citationCount']
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
papers = results.get('data', [])
|
|
275
|
+
|
|
276
|
+
# Extract institutions from author affiliations
|
|
277
|
+
institution_stats = defaultdict(lambda: {
|
|
278
|
+
'papers': [],
|
|
279
|
+
'authors': set(),
|
|
280
|
+
'total_citations': 0,
|
|
281
|
+
'years': set()
|
|
282
|
+
})
|
|
283
|
+
|
|
284
|
+
for paper in papers:
|
|
285
|
+
citations = paper.get('citationCount', 0)
|
|
286
|
+
year = paper.get('year')
|
|
287
|
+
|
|
288
|
+
for author in paper.get('authors', []):
|
|
289
|
+
# Extract institution from author info
|
|
290
|
+
# Note: Semantic Scholar doesn't always provide affiliations
|
|
291
|
+
# This is a simplified version
|
|
292
|
+
author_name = author.get('name', '')
|
|
293
|
+
|
|
294
|
+
# In a real implementation, would need affiliation data
|
|
295
|
+
# For now, we'll skip this or use heuristics
|
|
296
|
+
|
|
297
|
+
continue
|
|
298
|
+
|
|
299
|
+
# Note: Without reliable affiliation data from API,
|
|
300
|
+
# this feature is limited. Would need to:
|
|
301
|
+
# 1. Use OpenAlex API which has better affiliation data
|
|
302
|
+
# 2. Parse affiliations from paper metadata
|
|
303
|
+
# 3. Use a separate affiliation database
|
|
304
|
+
|
|
305
|
+
logger.warning("Institution rankings require affiliation data not available in current API")
|
|
306
|
+
|
|
307
|
+
return []
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"Error ranking institutions: {e}")
|
|
311
|
+
return []
|
|
312
|
+
|
|
313
|
+
def _find_similar_by_citations(self, base_paper: Dict[str, Any], limit: int) -> List[Dict[str, Any]]:
|
|
314
|
+
"""Find papers with similar citation patterns"""
|
|
315
|
+
similar = []
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
# Get papers cited by this paper (references)
|
|
319
|
+
references = base_paper.get('references', [])
|
|
320
|
+
|
|
321
|
+
if not references:
|
|
322
|
+
# Fallback: get references via API
|
|
323
|
+
paper_id = base_paper.get('paperId')
|
|
324
|
+
if paper_id:
|
|
325
|
+
references = self.archive_client.get_paper_references(paper_id, limit=50)
|
|
326
|
+
|
|
327
|
+
# For each reference, find papers that also cite it
|
|
328
|
+
# (papers with similar references are likely similar)
|
|
329
|
+
for ref in references[:10]: # Sample top references
|
|
330
|
+
ref_id = ref.get('paperId')
|
|
331
|
+
if ref_id:
|
|
332
|
+
citing_papers = self.archive_client.get_paper_citations(ref_id, limit=20)
|
|
333
|
+
similar.extend(citing_papers)
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
logger.warning(f"Error in citation-based similarity: {e}")
|
|
337
|
+
|
|
338
|
+
return similar
|
|
339
|
+
|
|
340
|
+
def _find_similar_by_keywords(self, base_paper: Dict[str, Any], limit: int) -> List[Dict[str, Any]]:
|
|
341
|
+
"""Find papers with similar keywords/topics"""
|
|
342
|
+
similar = []
|
|
343
|
+
|
|
344
|
+
try:
|
|
345
|
+
# Extract keywords from title and abstract
|
|
346
|
+
title = base_paper.get('title', '')
|
|
347
|
+
abstract = base_paper.get('abstract', '')
|
|
348
|
+
|
|
349
|
+
# Simple keyword extraction (first 3-4 significant words)
|
|
350
|
+
keywords = []
|
|
351
|
+
|
|
352
|
+
# Extract from title
|
|
353
|
+
title_words = [w for w in title.lower().split() if len(w) > 4]
|
|
354
|
+
keywords.extend(title_words[:4])
|
|
355
|
+
|
|
356
|
+
# Build search query
|
|
357
|
+
query = " ".join(keywords)
|
|
358
|
+
|
|
359
|
+
# Search for similar papers
|
|
360
|
+
results = self.archive_client.search_papers(
|
|
361
|
+
query=query,
|
|
362
|
+
limit=limit,
|
|
363
|
+
fields=['paperId', 'title', 'authors', 'year', 'citationCount', 'abstract']
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
similar = results.get('data', [])
|
|
367
|
+
|
|
368
|
+
except Exception as e:
|
|
369
|
+
logger.warning(f"Error in keyword-based similarity: {e}")
|
|
370
|
+
|
|
371
|
+
return similar
|
|
372
|
+
|
|
373
|
+
def _find_similar_by_authors(self, base_paper: Dict[str, Any], limit: int) -> List[Dict[str, Any]]:
|
|
374
|
+
"""Find papers by the same authors"""
|
|
375
|
+
similar = []
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
authors = base_paper.get('authors', [])
|
|
379
|
+
|
|
380
|
+
if not authors:
|
|
381
|
+
return []
|
|
382
|
+
|
|
383
|
+
# Get papers by first author
|
|
384
|
+
first_author = authors[0].get('name')
|
|
385
|
+
|
|
386
|
+
if first_author:
|
|
387
|
+
results = self.archive_client.search_papers(
|
|
388
|
+
query=f'author:"{first_author}"',
|
|
389
|
+
limit=limit,
|
|
390
|
+
fields=['paperId', 'title', 'authors', 'year', 'citationCount', 'abstract']
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
similar = results.get('data', [])
|
|
394
|
+
|
|
395
|
+
except Exception as e:
|
|
396
|
+
logger.warning(f"Error in author-based similarity: {e}")
|
|
397
|
+
|
|
398
|
+
return similar
|
|
399
|
+
|
|
400
|
+
def _merge_similarity_results(self, result_lists: List[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
|
401
|
+
"""Merge and deduplicate similarity results from multiple methods"""
|
|
402
|
+
seen = set()
|
|
403
|
+
merged = []
|
|
404
|
+
|
|
405
|
+
for results in result_lists:
|
|
406
|
+
for paper in results:
|
|
407
|
+
paper_id = paper.get('paperId')
|
|
408
|
+
|
|
409
|
+
if not paper_id or paper_id in seen:
|
|
410
|
+
continue
|
|
411
|
+
|
|
412
|
+
seen.add(paper_id)
|
|
413
|
+
merged.append(paper)
|
|
414
|
+
|
|
415
|
+
return merged
|
|
416
|
+
|
|
417
|
+
def _calculate_similarity_score(self, base_paper: Dict[str, Any], candidate: Dict[str, Any], method: str) -> float:
|
|
418
|
+
"""Calculate similarity score between two papers"""
|
|
419
|
+
score = 0.0
|
|
420
|
+
|
|
421
|
+
# Year proximity (papers from similar years are more similar)
|
|
422
|
+
base_year = base_paper.get('year', 2020)
|
|
423
|
+
cand_year = candidate.get('year', 2020)
|
|
424
|
+
year_diff = abs(base_year - cand_year)
|
|
425
|
+
year_score = max(0, 1 - year_diff / 20) * 20
|
|
426
|
+
score += year_score
|
|
427
|
+
|
|
428
|
+
# Citation count similarity (papers with similar citation counts)
|
|
429
|
+
base_cites = base_paper.get('citationCount', 0)
|
|
430
|
+
cand_cites = candidate.get('citationCount', 0)
|
|
431
|
+
|
|
432
|
+
if base_cites > 0:
|
|
433
|
+
cite_ratio = min(cand_cites / base_cites, base_cites / max(cand_cites, 1))
|
|
434
|
+
cite_score = cite_ratio * 30
|
|
435
|
+
score += cite_score
|
|
436
|
+
|
|
437
|
+
# Author overlap
|
|
438
|
+
base_authors = {a.get('name', '').lower() for a in base_paper.get('authors', [])}
|
|
439
|
+
cand_authors = {a.get('name', '').lower() for a in candidate.get('authors', [])}
|
|
440
|
+
|
|
441
|
+
overlap = len(base_authors & cand_authors)
|
|
442
|
+
author_score = min(overlap * 25, 50)
|
|
443
|
+
score += author_score
|
|
444
|
+
|
|
445
|
+
return round(score, 2)
|
|
446
|
+
|
|
447
|
+
def _explain_similarity(self, base_paper: Dict[str, Any], candidate: Dict[str, Any]) -> List[str]:
|
|
448
|
+
"""Generate human-readable similarity explanations"""
|
|
449
|
+
reasons = []
|
|
450
|
+
|
|
451
|
+
# Check author overlap
|
|
452
|
+
base_authors = {a.get('name', '').lower() for a in base_paper.get('authors', [])}
|
|
453
|
+
cand_authors = {a.get('name', '').lower() for a in candidate.get('authors', [])}
|
|
454
|
+
|
|
455
|
+
overlap = base_authors & cand_authors
|
|
456
|
+
if overlap:
|
|
457
|
+
reasons.append(f"Shared authors: {', '.join(overlap)}")
|
|
458
|
+
|
|
459
|
+
# Check year proximity
|
|
460
|
+
base_year = base_paper.get('year')
|
|
461
|
+
cand_year = candidate.get('year')
|
|
462
|
+
|
|
463
|
+
if base_year and cand_year and abs(base_year - cand_year) <= 2:
|
|
464
|
+
reasons.append(f"Published around same time ({cand_year})")
|
|
465
|
+
|
|
466
|
+
# Check citation similarity
|
|
467
|
+
base_cites = base_paper.get('citationCount', 0)
|
|
468
|
+
cand_cites = candidate.get('citationCount', 0)
|
|
469
|
+
|
|
470
|
+
if base_cites > 0 and cand_cites > 0:
|
|
471
|
+
ratio = cand_cites / base_cites
|
|
472
|
+
if 0.5 <= ratio <= 2.0:
|
|
473
|
+
reasons.append("Similar citation count")
|
|
474
|
+
|
|
475
|
+
if not reasons:
|
|
476
|
+
reasons.append("Related topic")
|
|
477
|
+
|
|
478
|
+
return reasons
|
|
479
|
+
|
|
480
|
+
def _calculate_h_index(self, citation_counts: List[int]) -> int:
|
|
481
|
+
"""Calculate h-index from citation counts"""
|
|
482
|
+
citation_counts_sorted = sorted(citation_counts, reverse=True)
|
|
483
|
+
|
|
484
|
+
h = 0
|
|
485
|
+
for i, citations in enumerate(citation_counts_sorted, 1):
|
|
486
|
+
if citations >= i:
|
|
487
|
+
h = i
|
|
488
|
+
else:
|
|
489
|
+
break
|
|
490
|
+
|
|
491
|
+
return h
|
|
492
|
+
|
|
493
|
+
def _calculate_researcher_relevance(self, stats: Dict[str, Any]) -> float:
|
|
494
|
+
"""Calculate relevance score for a researcher"""
|
|
495
|
+
score = 0.0
|
|
496
|
+
|
|
497
|
+
# Number of papers
|
|
498
|
+
paper_count = stats['paper_count']
|
|
499
|
+
score += min(paper_count * 10, 40)
|
|
500
|
+
|
|
501
|
+
# Total citations
|
|
502
|
+
citations = stats['total_citations']
|
|
503
|
+
score += min(citations / 10, 30)
|
|
504
|
+
|
|
505
|
+
# Productivity (papers per year)
|
|
506
|
+
years = len(stats['years'])
|
|
507
|
+
if years > 0:
|
|
508
|
+
productivity = paper_count / years
|
|
509
|
+
score += min(productivity * 10, 30)
|
|
510
|
+
|
|
511
|
+
return round(score, 2)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def get_similarity_finder(archive_client=None) -> SimilarityFinder:
|
|
515
|
+
"""
|
|
516
|
+
Get SimilarityFinder instance
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
archive_client: ArchiveAPIClient instance
|
|
520
|
+
|
|
521
|
+
Returns:
|
|
522
|
+
SimilarityFinder instance
|
|
523
|
+
"""
|
|
524
|
+
return SimilarityFinder(archive_client)
|
cite_agent/streaming_ui.py
CHANGED
|
@@ -5,7 +5,6 @@ Minimal, clean, conversational interface for data analysis assistant
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import sys
|
|
8
|
-
import time
|
|
9
8
|
import asyncio
|
|
10
9
|
from typing import Optional, AsyncGenerator
|
|
11
10
|
from rich.console import Console
|
|
@@ -31,7 +30,8 @@ class StreamingChatUI:
|
|
|
31
30
|
self.app_name = app_name
|
|
32
31
|
self.working_dir = working_dir
|
|
33
32
|
self.console = Console()
|
|
34
|
-
|
|
33
|
+
# Stream responses as full chunks (no artificial typing delay)
|
|
34
|
+
self.typing_speed = 0.0
|
|
35
35
|
|
|
36
36
|
def show_header(self):
|
|
37
37
|
"""Display minimal header on startup"""
|
|
@@ -61,12 +61,15 @@ class StreamingChatUI:
|
|
|
61
61
|
# No prefix for agent - just stream naturally
|
|
62
62
|
buffer = ""
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
self.
|
|
69
|
-
|
|
64
|
+
try:
|
|
65
|
+
async for chunk in content_generator:
|
|
66
|
+
buffer += chunk
|
|
67
|
+
self.console.print(chunk, end="", style="white")
|
|
68
|
+
if self.typing_speed:
|
|
69
|
+
await asyncio.sleep(self.typing_speed)
|
|
70
|
+
except KeyboardInterrupt:
|
|
71
|
+
self.console.print("\n[dim]⏹️ Streaming interrupted by user.[/dim]")
|
|
72
|
+
return buffer
|
|
70
73
|
|
|
71
74
|
self.console.print() # Newline after response
|
|
72
75
|
self.console.print() # Extra space for readability
|
|
@@ -180,7 +183,8 @@ async def simulate_streaming(text: str, chunk_size: int = 5) -> AsyncGenerator[s
|
|
|
180
183
|
for i in range(0, len(text), chunk_size):
|
|
181
184
|
chunk = text[i:i + chunk_size]
|
|
182
185
|
yield chunk
|
|
183
|
-
|
|
186
|
+
# No artificial delay; mimic immediate chunk availability
|
|
187
|
+
await asyncio.sleep(0)
|
|
184
188
|
|
|
185
189
|
|
|
186
190
|
# Example usage
|