cite-agent 1.3.9__py3-none-any.whl → 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. cite_agent/__init__.py +13 -13
  2. cite_agent/__version__.py +1 -1
  3. cite_agent/action_first_mode.py +150 -0
  4. cite_agent/adaptive_providers.py +413 -0
  5. cite_agent/archive_api_client.py +186 -0
  6. cite_agent/auth.py +0 -1
  7. cite_agent/auto_expander.py +70 -0
  8. cite_agent/cache.py +379 -0
  9. cite_agent/circuit_breaker.py +370 -0
  10. cite_agent/citation_network.py +377 -0
  11. cite_agent/cli.py +8 -16
  12. cite_agent/cli_conversational.py +113 -3
  13. cite_agent/confidence_calibration.py +381 -0
  14. cite_agent/deduplication.py +325 -0
  15. cite_agent/enhanced_ai_agent.py +689 -371
  16. cite_agent/error_handler.py +228 -0
  17. cite_agent/execution_safety.py +329 -0
  18. cite_agent/full_paper_reader.py +239 -0
  19. cite_agent/observability.py +398 -0
  20. cite_agent/offline_mode.py +348 -0
  21. cite_agent/paper_comparator.py +368 -0
  22. cite_agent/paper_summarizer.py +420 -0
  23. cite_agent/pdf_extractor.py +350 -0
  24. cite_agent/proactive_boundaries.py +266 -0
  25. cite_agent/quality_gate.py +442 -0
  26. cite_agent/request_queue.py +390 -0
  27. cite_agent/response_enhancer.py +257 -0
  28. cite_agent/response_formatter.py +458 -0
  29. cite_agent/response_pipeline.py +295 -0
  30. cite_agent/response_style_enhancer.py +259 -0
  31. cite_agent/self_healing.py +418 -0
  32. cite_agent/similarity_finder.py +524 -0
  33. cite_agent/streaming_ui.py +13 -9
  34. cite_agent/thinking_blocks.py +308 -0
  35. cite_agent/tool_orchestrator.py +416 -0
  36. cite_agent/trend_analyzer.py +540 -0
  37. cite_agent/unpaywall_client.py +226 -0
  38. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/METADATA +15 -1
  39. cite_agent-1.4.3.dist-info/RECORD +62 -0
  40. cite_agent-1.3.9.dist-info/RECORD +0 -32
  41. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/WHEEL +0 -0
  42. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/entry_points.txt +0 -0
  43. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/licenses/LICENSE +0 -0
  44. {cite_agent-1.3.9.dist-info → cite_agent-1.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,377 @@
1
+ """
2
+ Citation Network Mapper - Find foundational papers and research lineages
3
+
4
+ Provides tools for:
5
+ - Mapping citation networks
6
+ - Finding seminal papers
7
+ - Tracing research lineage
8
+ - Suggesting reading order
9
+ """
10
+
11
+ from typing import List, Dict, Any, Optional, Set, Tuple
12
+ import logging
13
+ from collections import defaultdict, deque
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class CitationNetwork:
19
+ """Maps and analyzes citation networks between papers"""
20
+
21
+ def __init__(self, archive_client=None):
22
+ """
23
+ Initialize citation network mapper
24
+
25
+ Args:
26
+ archive_client: ArchiveAPIClient instance for fetching citation data
27
+ """
28
+ self.archive_client = archive_client
29
+ self._citation_cache = {}
30
+ self._reference_cache = {}
31
+
32
+ def map_citations(self, paper_id: str, depth: int = 1, max_papers: int = 50) -> Dict[str, Any]:
33
+ """
34
+ Map citation network for a paper
35
+
36
+ Args:
37
+ paper_id: DOI, arXiv ID, or Semantic Scholar ID
38
+ depth: How many citation levels to traverse (1-3)
39
+ max_papers: Maximum papers to include
40
+
41
+ Returns:
42
+ Dictionary with nodes (papers) and edges (citations)
43
+ """
44
+ if not self.archive_client:
45
+ logger.warning("No archive client provided - citation mapping unavailable")
46
+ return {"nodes": [], "edges": [], "error": "Archive client required"}
47
+
48
+ try:
49
+ # Get base paper
50
+ base_paper = self._fetch_paper(paper_id)
51
+ if not base_paper:
52
+ return {"nodes": [], "edges": [], "error": f"Paper {paper_id} not found"}
53
+
54
+ nodes = []
55
+ edges = []
56
+ visited = set()
57
+
58
+ # BFS traversal
59
+ queue = deque([(base_paper, 0)]) # (paper, current_depth)
60
+
61
+ while queue and len(nodes) < max_papers:
62
+ paper, current_depth = queue.popleft()
63
+ paper_id = paper.get('paperId') or paper.get('id')
64
+
65
+ if not paper_id or paper_id in visited:
66
+ continue
67
+
68
+ visited.add(paper_id)
69
+
70
+ # Add node
71
+ nodes.append({
72
+ 'id': paper_id,
73
+ 'title': paper.get('title', 'Unknown'),
74
+ 'year': paper.get('year'),
75
+ 'citationCount': paper.get('citationCount', 0),
76
+ 'authors': [a.get('name') for a in paper.get('authors', [])[:3]],
77
+ 'depth': current_depth
78
+ })
79
+
80
+ # Get citations if within depth limit
81
+ if current_depth < depth:
82
+ citations = self._fetch_citations(paper_id)
83
+ for cited_paper in citations[:20]: # Limit per paper
84
+ cited_id = cited_paper.get('paperId') or cited_paper.get('id')
85
+ if cited_id and cited_id not in visited:
86
+ edges.append({
87
+ 'source': paper_id,
88
+ 'target': cited_id,
89
+ 'type': 'cites'
90
+ })
91
+ queue.append((cited_paper, current_depth + 1))
92
+
93
+ return {
94
+ 'nodes': nodes,
95
+ 'edges': edges,
96
+ 'stats': {
97
+ 'total_papers': len(nodes),
98
+ 'total_citations': len(edges),
99
+ 'max_depth': depth,
100
+ 'most_cited': max(nodes, key=lambda x: x['citationCount']) if nodes else None
101
+ }
102
+ }
103
+
104
+ except Exception as e:
105
+ logger.error(f"Error mapping citations: {e}")
106
+ return {"nodes": [], "edges": [], "error": str(e)}
107
+
108
+ def find_seminal_papers(self, topic: str, min_citations: int = 100, limit: int = 20) -> List[Dict[str, Any]]:
109
+ """
110
+ Find foundational/seminal papers in a field
111
+
112
+ Args:
113
+ topic: Research topic or query
114
+ min_citations: Minimum citation count
115
+ limit: Maximum papers to return
116
+
117
+ Returns:
118
+ List of highly-cited foundational papers
119
+ """
120
+ if not self.archive_client:
121
+ return []
122
+
123
+ try:
124
+ # Search for papers
125
+ results = self.archive_client.search_papers(
126
+ query=topic,
127
+ limit=limit * 2, # Get more to filter
128
+ fields=['title', 'authors', 'year', 'citationCount', 'abstract', 'paperId']
129
+ )
130
+
131
+ papers = results.get('data', [])
132
+
133
+ # Filter by citation count and sort
134
+ seminal = [p for p in papers if p.get('citationCount', 0) >= min_citations]
135
+ seminal.sort(key=lambda x: x.get('citationCount', 0), reverse=True)
136
+
137
+ # Enhance with network metrics
138
+ enhanced = []
139
+ for paper in seminal[:limit]:
140
+ paper_id = paper.get('paperId') or paper.get('id')
141
+
142
+ # Get citation velocity (citations per year)
143
+ year = paper.get('year', 2024)
144
+ age = max(1, 2025 - year)
145
+ citations = paper.get('citationCount', 0)
146
+ velocity = citations / age
147
+
148
+ enhanced.append({
149
+ 'id': paper_id,
150
+ 'title': paper.get('title'),
151
+ 'authors': [a.get('name') for a in paper.get('authors', [])[:5]],
152
+ 'year': year,
153
+ 'citations': citations,
154
+ 'citation_velocity': round(velocity, 1),
155
+ 'abstract': paper.get('abstract', '')[:300],
156
+ 'influential': citations > min_citations * 2 # Highly influential
157
+ })
158
+
159
+ return enhanced
160
+
161
+ except Exception as e:
162
+ logger.error(f"Error finding seminal papers: {e}")
163
+ return []
164
+
165
+ def trace_research_lineage(self, paper1_id: str, paper2_id: str, max_depth: int = 4) -> Dict[str, Any]:
166
+ """
167
+ Find citation path between two papers
168
+
169
+ Args:
170
+ paper1_id: First paper ID
171
+ paper2_id: Second paper ID
172
+ max_depth: Maximum path length to search
173
+
174
+ Returns:
175
+ Shortest citation path between papers
176
+ """
177
+ if not self.archive_client:
178
+ return {"path": [], "error": "Archive client required"}
179
+
180
+ try:
181
+ # BFS to find shortest path
182
+ queue = deque([(paper1_id, [paper1_id])])
183
+ visited = {paper1_id}
184
+
185
+ while queue:
186
+ current_id, path = queue.popleft()
187
+
188
+ if len(path) > max_depth:
189
+ continue
190
+
191
+ if current_id == paper2_id:
192
+ # Found path! Get paper details
193
+ detailed_path = []
194
+ for pid in path:
195
+ paper = self._fetch_paper(pid)
196
+ if paper:
197
+ detailed_path.append({
198
+ 'id': pid,
199
+ 'title': paper.get('title'),
200
+ 'year': paper.get('year'),
201
+ 'authors': [a.get('name') for a in paper.get('authors', [])[:3]]
202
+ })
203
+
204
+ return {
205
+ 'path': detailed_path,
206
+ 'length': len(path) - 1,
207
+ 'connection_type': 'direct' if len(path) == 2 else 'indirect'
208
+ }
209
+
210
+ # Explore citations and references
211
+ citations = self._fetch_citations(current_id)
212
+ references = self._fetch_references(current_id)
213
+
214
+ for paper in citations + references:
215
+ paper_id = paper.get('paperId') or paper.get('id')
216
+ if paper_id and paper_id not in visited:
217
+ visited.add(paper_id)
218
+ queue.append((paper_id, path + [paper_id]))
219
+
220
+ return {
221
+ 'path': [],
222
+ 'error': f'No citation path found within {max_depth} steps'
223
+ }
224
+
225
+ except Exception as e:
226
+ logger.error(f"Error tracing lineage: {e}")
227
+ return {"path": [], "error": str(e)}
228
+
229
+ def suggest_reading_order(self, paper_ids: List[str]) -> List[Dict[str, Any]]:
230
+ """
231
+ Suggest optimal reading order based on citations
232
+
233
+ Args:
234
+ paper_ids: List of paper IDs to order
235
+
236
+ Returns:
237
+ Papers sorted by foundational-to-recent order
238
+ """
239
+ if not self.archive_client:
240
+ return []
241
+
242
+ try:
243
+ # Fetch all papers
244
+ papers = []
245
+ for pid in paper_ids:
246
+ paper = self._fetch_paper(pid)
247
+ if paper:
248
+ papers.append(paper)
249
+
250
+ # Build citation graph
251
+ citation_graph = defaultdict(set)
252
+
253
+ for paper in papers:
254
+ paper_id = paper.get('paperId') or paper.get('id')
255
+ references = self._fetch_references(paper_id)
256
+
257
+ for ref in references:
258
+ ref_id = ref.get('paperId') or ref.get('id')
259
+ if ref_id in paper_ids:
260
+ citation_graph[paper_id].add(ref_id)
261
+
262
+ # Topological sort (foundational papers first)
263
+ ordered = []
264
+ visited = set()
265
+
266
+ def dfs(paper_id):
267
+ if paper_id in visited:
268
+ return
269
+ visited.add(paper_id)
270
+
271
+ # Visit dependencies first
272
+ for dep_id in citation_graph.get(paper_id, []):
273
+ dfs(dep_id)
274
+
275
+ # Find paper details
276
+ paper = next((p for p in papers if p.get('paperId') == paper_id or p.get('id') == paper_id), None)
277
+ if paper and paper not in ordered:
278
+ ordered.append(paper)
279
+
280
+ # Process all papers
281
+ for paper in papers:
282
+ paper_id = paper.get('paperId') or paper.get('id')
283
+ dfs(paper_id)
284
+
285
+ # Format output
286
+ result = []
287
+ for i, paper in enumerate(ordered, 1):
288
+ result.append({
289
+ 'order': i,
290
+ 'id': paper.get('paperId') or paper.get('id'),
291
+ 'title': paper.get('title'),
292
+ 'year': paper.get('year'),
293
+ 'authors': [a.get('name') for a in paper.get('authors', [])[:3]],
294
+ 'reason': 'Foundational' if i <= len(ordered) // 3 else 'Recent' if i > 2 * len(ordered) // 3 else 'Core'
295
+ })
296
+
297
+ return result
298
+
299
+ except Exception as e:
300
+ logger.error(f"Error suggesting reading order: {e}")
301
+ return []
302
+
303
+ def _fetch_paper(self, paper_id: str) -> Optional[Dict[str, Any]]:
304
+ """Fetch paper details from API or cache"""
305
+ if paper_id in self._citation_cache:
306
+ return self._citation_cache[paper_id]
307
+
308
+ if not self.archive_client:
309
+ return None
310
+
311
+ try:
312
+ result = self.archive_client.get_paper(
313
+ paper_id,
314
+ fields=['paperId', 'title', 'authors', 'year', 'citationCount', 'abstract']
315
+ )
316
+
317
+ if result:
318
+ self._citation_cache[paper_id] = result
319
+ return result
320
+
321
+ except Exception as e:
322
+ logger.warning(f"Could not fetch paper {paper_id}: {e}")
323
+
324
+ return None
325
+
326
+ def _fetch_citations(self, paper_id: str, limit: int = 50) -> List[Dict[str, Any]]:
327
+ """Fetch papers citing this paper"""
328
+ if paper_id in self._citation_cache:
329
+ cached = self._citation_cache[paper_id]
330
+ if 'citations' in cached:
331
+ return cached['citations']
332
+
333
+ if not self.archive_client:
334
+ return []
335
+
336
+ try:
337
+ # Semantic Scholar API: /paper/{id}/citations
338
+ citations = self.archive_client.get_paper_citations(paper_id, limit=limit)
339
+ return citations if citations else []
340
+
341
+ except Exception as e:
342
+ logger.warning(f"Could not fetch citations for {paper_id}: {e}")
343
+ return []
344
+
345
+ def _fetch_references(self, paper_id: str, limit: int = 50) -> List[Dict[str, Any]]:
346
+ """Fetch papers referenced by this paper"""
347
+ if paper_id in self._reference_cache:
348
+ return self._reference_cache[paper_id]
349
+
350
+ if not self.archive_client:
351
+ return []
352
+
353
+ try:
354
+ # Semantic Scholar API: /paper/{id}/references
355
+ references = self.archive_client.get_paper_references(paper_id, limit=limit)
356
+
357
+ if references:
358
+ self._reference_cache[paper_id] = references
359
+ return references
360
+
361
+ except Exception as e:
362
+ logger.warning(f"Could not fetch references for {paper_id}: {e}")
363
+
364
+ return []
365
+
366
+
367
+ def get_citation_network(archive_client=None) -> CitationNetwork:
368
+ """
369
+ Get CitationNetwork instance
370
+
371
+ Args:
372
+ archive_client: ArchiveAPIClient instance
373
+
374
+ Returns:
375
+ CitationNetwork instance
376
+ """
377
+ return CitationNetwork(archive_client)
cite_agent/cli.py CHANGED
@@ -225,7 +225,8 @@ class NocturnalCLI:
225
225
  def _show_ready_panel(self):
226
226
  panel = Panel(
227
227
  "Systems check complete.\n"
228
- "Type [bold]help[/] for commands or [bold]tips[/] for power moves.",
228
+ "Type [bold]help[/] for commands or [bold]tips[/] for power moves.\n"
229
+ "[dim]Press Ctrl+C while the agent is thinking to interrupt and ask something else.[/dim]",
229
230
  title="✅ Cite Agent ready!",
230
231
  border_style="green",
231
232
  padding=(1, 2),
@@ -460,16 +461,9 @@ class NocturnalCLI:
460
461
  finally:
461
462
  live.stop()
462
463
 
463
- # Print response with typing effect for natural feel
464
+ # Print response immediately (no artificial typing delay)
464
465
  self.console.print("[bold violet]🤖 Agent[/]: ", end="", highlight=False)
465
-
466
- # Character-by-character streaming (like ChatGPT) - faster for long responses
467
- import time
468
- for char in response.response:
469
- self.console.print(char, end="", style="white")
470
- time.sleep(0.003) # 3ms per character (~333 chars/sec) - faster than before
471
-
472
- self.console.print() # Newline after response
466
+ self.console.print(response.response)
473
467
 
474
468
  # Save to history automatically
475
469
  self.workflow.save_query_result(
@@ -482,12 +476,10 @@ class NocturnalCLI:
482
476
  }
483
477
  )
484
478
 
485
- # Show usage stats occasionally
486
- if hasattr(self.agent, 'daily_token_usage') and self.agent.daily_token_usage > 0:
487
- stats = self.agent.get_usage_stats()
488
- if stats['usage_percentage'] > 10: # Show if >10% used
489
- self.console.print(f"\n📊 Usage: {stats['usage_percentage']:.1f}% of daily limit")
490
-
479
+ except KeyboardInterrupt:
480
+ live.stop()
481
+ self.console.print("\n[dim]⏹️ Interrupted. Ask another question when ready.[/dim]")
482
+ continue
491
483
  except Exception as e:
492
484
  self.console.print(f"\n[error]❌ Error: {e}[/error]")
493
485
 
@@ -8,7 +8,7 @@ import asyncio
8
8
  import os
9
9
  import sys
10
10
  from pathlib import Path
11
- from typing import Optional
11
+ from typing import List, Optional
12
12
 
13
13
  # Add nocturnal_archive to path
14
14
  sys.path.insert(0, str(Path(__file__).parent))
@@ -93,6 +93,85 @@ Remember:
93
93
 
94
94
  # Store this for when we make requests
95
95
  self.jarvis_prompt = jarvis_system_prompt
96
+
97
+ async def _build_environment_snapshot(self, limit: int = 8) -> Optional[str]:
98
+ """Return a short summary of the current workspace."""
99
+ if not self.agent:
100
+ return None
101
+
102
+ try:
103
+ listing = await self.agent._get_workspace_listing(limit=limit) # type: ignore[attr-defined]
104
+ except Exception:
105
+ listing = {"base": self.working_dir, "items": []}
106
+
107
+ base = listing.get("base") or self.working_dir
108
+ items = listing.get("items") or listing.get("entries") or []
109
+
110
+ lines: List[str] = [f"📂 Working directory: {base}"]
111
+
112
+ if items:
113
+ preview_count = min(len(items), 6)
114
+ preview_lines = [
115
+ f" • {item.get('name')} ({item.get('type', 'item')})"
116
+ for item in items[:preview_count]
117
+ ]
118
+ if len(items) > preview_count:
119
+ preview_lines.append(f" • … {len(items) - preview_count} more")
120
+ lines.append("Contents snapshot:\n" + "\n".join(preview_lines))
121
+
122
+ if listing.get("error"):
123
+ lines.append(f"⚠️ Workspace note: {listing['error']}")
124
+
125
+ note = listing.get("note")
126
+ if note:
127
+ lines.append(note)
128
+
129
+ return "\n\n".join(lines)
130
+
131
+ @staticmethod
132
+ def _looks_like_grounding_question(text: str) -> bool:
133
+ lowered = text.lower().strip()
134
+ if not lowered:
135
+ return False
136
+ grounding_phrases = [
137
+ "where are we",
138
+ "where am i",
139
+ "what directory",
140
+ "current directory",
141
+ "pwd",
142
+ "show files",
143
+ "list files",
144
+ "where is this",
145
+ ]
146
+ return any(phrase in lowered for phrase in grounding_phrases)
147
+
148
+ @staticmethod
149
+ def _is_small_talk_probe(text: str) -> bool:
150
+ lowered = text.lower().strip()
151
+ return lowered in {"test", "hi", "hello", "hey", "ping"}
152
+
153
+ async def _respond_with_grounding(self) -> None:
154
+ snapshot = await self._build_environment_snapshot()
155
+ if not snapshot:
156
+ snapshot = "I can’t access the workspace details right now, but I’m ready to help."
157
+
158
+ async def snapshot_gen():
159
+ async for chunk in simulate_streaming(snapshot, chunk_size=4):
160
+ yield chunk
161
+
162
+ await self.ui.stream_agent_response(snapshot_gen())
163
+
164
+ async def _respond_with_acknowledgement(self) -> None:
165
+ message = (
166
+ "Ready when you are. Try `help` for guidance or ask me to summarise a file like "
167
+ "`summarize README.md`."
168
+ )
169
+
170
+ async def ack_gen():
171
+ async for chunk in simulate_streaming(message, chunk_size=4):
172
+ yield chunk
173
+
174
+ await self.ui.stream_agent_response(ack_gen())
96
175
 
97
176
  async def run(self):
98
177
  """Main conversation loop"""
@@ -121,6 +200,24 @@ Remember:
121
200
  yield chunk
122
201
 
123
202
  await self.ui.stream_agent_response(welcome_gen())
203
+
204
+ snapshot = await self._build_environment_snapshot()
205
+ if snapshot:
206
+ async def snapshot_gen():
207
+ async for chunk in simulate_streaming(snapshot, chunk_size=4):
208
+ yield chunk
209
+ await self.ui.stream_agent_response(snapshot_gen())
210
+
211
+ quick_tips = (
212
+ "Quick tips: `help` for options • `read_file README.md` to inspect docs • "
213
+ "`summarize docs/…` or `analyze data.csv` to get started."
214
+ )
215
+
216
+ async def tips_gen():
217
+ async for chunk in simulate_streaming(quick_tips, chunk_size=4):
218
+ yield chunk
219
+
220
+ await self.ui.stream_agent_response(tips_gen())
124
221
 
125
222
  # Main conversation loop
126
223
  while self.conversation_active:
@@ -160,15 +257,28 @@ Remember:
160
257
  - Use appropriate tools
161
258
  - Stream response naturally
162
259
  """
260
+
261
+ stripped = user_input.strip()
262
+ if not stripped:
263
+ return
264
+
265
+ lowered = stripped.lower()
266
+
267
+ if self._is_small_talk_probe(stripped):
268
+ await self._respond_with_acknowledgement()
269
+ return
270
+ if self._looks_like_grounding_question(stripped):
271
+ await self._respond_with_grounding()
272
+ return
163
273
 
164
274
  # Determine if this is a web search request
165
- is_web_search = any(keyword in user_input.lower() for keyword in [
275
+ is_web_search = any(keyword in lowered for keyword in [
166
276
  'google', 'search for', 'browse', 'look up', 'find on the web',
167
277
  'what does', 'who is', 'recent news'
168
278
  ])
169
279
 
170
280
  # Determine if this is a data analysis request
171
- is_data_analysis = any(keyword in user_input.lower() for keyword in [
281
+ is_data_analysis = any(keyword in lowered for keyword in [
172
282
  'analyze', 'data', 'csv', 'plot', 'graph', 'test', 'regression',
173
283
  'correlation', 'statistics', 'mean', 'median', 'distribution'
174
284
  ])