cite-agent 1.0.5__py3-none-any.whl → 1.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cite-agent might be problematic. Click here for more details.
- cite_agent/cli.py +374 -39
- cite_agent/cli_workflow.py +276 -0
- cite_agent/enhanced_ai_agent.py +527 -80
- cite_agent/session_manager.py +215 -0
- cite_agent/updater.py +50 -17
- cite_agent/workflow.py +427 -0
- cite_agent/workflow_integration.py +275 -0
- cite_agent-1.2.3.dist-info/METADATA +442 -0
- {cite_agent-1.0.5.dist-info → cite_agent-1.2.3.dist-info}/RECORD +13 -9
- cite_agent-1.0.5.dist-info/METADATA +0 -235
- {cite_agent-1.0.5.dist-info → cite_agent-1.2.3.dist-info}/WHEEL +0 -0
- {cite_agent-1.0.5.dist-info → cite_agent-1.2.3.dist-info}/entry_points.txt +0 -0
- {cite_agent-1.0.5.dist-info → cite_agent-1.2.3.dist-info}/licenses/LICENSE +0 -0
- {cite_agent-1.0.5.dist-info → cite_agent-1.2.3.dist-info}/top_level.txt +0 -0
cite_agent/workflow.py
ADDED
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Workflow Integration Module - Reduces context switching for scholars
|
|
4
|
+
Features:
|
|
5
|
+
- BibTeX export for citation managers
|
|
6
|
+
- Local paper library management
|
|
7
|
+
- Clipboard integration
|
|
8
|
+
- Markdown export for note-taking apps
|
|
9
|
+
- Session history and replay
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import re
|
|
16
|
+
import subprocess
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Dict, List, Optional, Any
|
|
20
|
+
from dataclasses import dataclass, asdict
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Paper:
|
|
25
|
+
"""Represents an academic paper"""
|
|
26
|
+
title: str
|
|
27
|
+
authors: List[str]
|
|
28
|
+
year: int
|
|
29
|
+
doi: Optional[str] = None
|
|
30
|
+
url: Optional[str] = None
|
|
31
|
+
abstract: Optional[str] = None
|
|
32
|
+
venue: Optional[str] = None
|
|
33
|
+
citation_count: int = 0
|
|
34
|
+
paper_id: Optional[str] = None
|
|
35
|
+
added_date: Optional[str] = None
|
|
36
|
+
notes: Optional[str] = None
|
|
37
|
+
tags: List[str] = None
|
|
38
|
+
|
|
39
|
+
def __post_init__(self):
|
|
40
|
+
if self.tags is None:
|
|
41
|
+
self.tags = []
|
|
42
|
+
if self.added_date is None:
|
|
43
|
+
self.added_date = datetime.now().isoformat()
|
|
44
|
+
|
|
45
|
+
def to_bibtex(self, citation_key: Optional[str] = None) -> str:
|
|
46
|
+
"""Convert paper to BibTeX format"""
|
|
47
|
+
if not citation_key:
|
|
48
|
+
# Generate citation key: FirstAuthorYearTitleWord
|
|
49
|
+
first_author = self.authors[0].split()[-1] if self.authors else "Unknown"
|
|
50
|
+
title_word = self.title.split()[0] if self.title else "Paper"
|
|
51
|
+
citation_key = f"{first_author}{self.year}{title_word}".replace(" ", "")
|
|
52
|
+
|
|
53
|
+
bibtex = f"@article{{{citation_key},\n"
|
|
54
|
+
bibtex += f" title = {{{self.title}}},\n"
|
|
55
|
+
|
|
56
|
+
if self.authors:
|
|
57
|
+
authors_str = " and ".join(self.authors)
|
|
58
|
+
bibtex += f" author = {{{authors_str}}},\n"
|
|
59
|
+
|
|
60
|
+
bibtex += f" year = {{{self.year}}},\n"
|
|
61
|
+
|
|
62
|
+
if self.venue:
|
|
63
|
+
bibtex += f" journal = {{{self.venue}}},\n"
|
|
64
|
+
|
|
65
|
+
if self.doi:
|
|
66
|
+
bibtex += f" doi = {{{self.doi}}},\n"
|
|
67
|
+
|
|
68
|
+
if self.url:
|
|
69
|
+
bibtex += f" url = {{{self.url}}},\n"
|
|
70
|
+
|
|
71
|
+
if self.abstract:
|
|
72
|
+
# Clean abstract for BibTeX
|
|
73
|
+
clean_abstract = self.abstract.replace("\n", " ").replace("{", "").replace("}", "")
|
|
74
|
+
bibtex += f" abstract = {{{clean_abstract}}},\n"
|
|
75
|
+
|
|
76
|
+
bibtex += "}\n"
|
|
77
|
+
return bibtex
|
|
78
|
+
|
|
79
|
+
def to_apa_citation(self) -> str:
|
|
80
|
+
"""Convert paper to APA format citation"""
|
|
81
|
+
authors_part = ""
|
|
82
|
+
if len(self.authors) == 1:
|
|
83
|
+
authors_part = self.authors[0]
|
|
84
|
+
elif len(self.authors) == 2:
|
|
85
|
+
authors_part = f"{self.authors[0]} & {self.authors[1]}"
|
|
86
|
+
elif len(self.authors) > 2:
|
|
87
|
+
authors_part = f"{self.authors[0]} et al."
|
|
88
|
+
|
|
89
|
+
citation = f"{authors_part} ({self.year}). {self.title}."
|
|
90
|
+
|
|
91
|
+
if self.venue:
|
|
92
|
+
citation += f" {self.venue}."
|
|
93
|
+
|
|
94
|
+
if self.doi:
|
|
95
|
+
citation += f" https://doi.org/{self.doi}"
|
|
96
|
+
elif self.url:
|
|
97
|
+
citation += f" {self.url}"
|
|
98
|
+
|
|
99
|
+
return citation
|
|
100
|
+
|
|
101
|
+
def to_markdown(self) -> str:
|
|
102
|
+
"""Convert paper to markdown format"""
|
|
103
|
+
md = f"# {self.title}\n\n"
|
|
104
|
+
|
|
105
|
+
if self.authors:
|
|
106
|
+
md += f"**Authors:** {', '.join(self.authors)}\n\n"
|
|
107
|
+
|
|
108
|
+
md += f"**Year:** {self.year}\n\n"
|
|
109
|
+
|
|
110
|
+
if self.venue:
|
|
111
|
+
md += f"**Venue:** {self.venue}\n\n"
|
|
112
|
+
|
|
113
|
+
if self.citation_count:
|
|
114
|
+
md += f"**Citations:** {self.citation_count}\n\n"
|
|
115
|
+
|
|
116
|
+
if self.doi:
|
|
117
|
+
md += f"**DOI:** [{self.doi}](https://doi.org/{self.doi})\n\n"
|
|
118
|
+
elif self.url:
|
|
119
|
+
md += f"**URL:** {self.url}\n\n"
|
|
120
|
+
|
|
121
|
+
if self.abstract:
|
|
122
|
+
md += f"## Abstract\n\n{self.abstract}\n\n"
|
|
123
|
+
|
|
124
|
+
if self.notes:
|
|
125
|
+
md += f"## Notes\n\n{self.notes}\n\n"
|
|
126
|
+
|
|
127
|
+
if self.tags:
|
|
128
|
+
md += f"**Tags:** {', '.join(self.tags)}\n\n"
|
|
129
|
+
|
|
130
|
+
md += f"*Added: {self.added_date}*\n"
|
|
131
|
+
|
|
132
|
+
return md
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class WorkflowManager:
|
|
136
|
+
"""Manages scholar workflow integrations"""
|
|
137
|
+
|
|
138
|
+
def __init__(self):
|
|
139
|
+
self.config_dir = Path.home() / ".cite_agent"
|
|
140
|
+
self.library_dir = self.config_dir / "library"
|
|
141
|
+
self.exports_dir = self.config_dir / "exports"
|
|
142
|
+
self.history_dir = self.config_dir / "history"
|
|
143
|
+
self.bibtex_file = self.exports_dir / "references.bib"
|
|
144
|
+
|
|
145
|
+
# Create directories
|
|
146
|
+
self.config_dir.mkdir(exist_ok=True)
|
|
147
|
+
self.library_dir.mkdir(exist_ok=True)
|
|
148
|
+
self.exports_dir.mkdir(exist_ok=True)
|
|
149
|
+
self.history_dir.mkdir(exist_ok=True)
|
|
150
|
+
|
|
151
|
+
def add_paper(self, paper: Paper) -> bool:
|
|
152
|
+
"""Add paper to local library"""
|
|
153
|
+
try:
|
|
154
|
+
# Generate paper ID if not provided
|
|
155
|
+
if not paper.paper_id:
|
|
156
|
+
paper.paper_id = self._generate_paper_id(paper)
|
|
157
|
+
|
|
158
|
+
# Save paper as JSON
|
|
159
|
+
paper_file = self.library_dir / f"{paper.paper_id}.json"
|
|
160
|
+
with open(paper_file, 'w') as f:
|
|
161
|
+
json.dump(asdict(paper), f, indent=2)
|
|
162
|
+
|
|
163
|
+
return True
|
|
164
|
+
except Exception as e:
|
|
165
|
+
print(f"Error adding paper: {e}")
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
def get_paper(self, paper_id: str) -> Optional[Paper]:
|
|
169
|
+
"""Retrieve paper from library"""
|
|
170
|
+
try:
|
|
171
|
+
paper_file = self.library_dir / f"{paper_id}.json"
|
|
172
|
+
if not paper_file.exists():
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
with open(paper_file, 'r') as f:
|
|
176
|
+
data = json.load(f)
|
|
177
|
+
return Paper(**data)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
print(f"Error retrieving paper: {e}")
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
def list_papers(self, tag: Optional[str] = None) -> List[Paper]:
|
|
183
|
+
"""List all papers in library, optionally filtered by tag"""
|
|
184
|
+
papers = []
|
|
185
|
+
for paper_file in self.library_dir.glob("*.json"):
|
|
186
|
+
try:
|
|
187
|
+
with open(paper_file, 'r') as f:
|
|
188
|
+
data = json.load(f)
|
|
189
|
+
paper = Paper(**data)
|
|
190
|
+
|
|
191
|
+
if tag is None or tag in paper.tags:
|
|
192
|
+
papers.append(paper)
|
|
193
|
+
except Exception as e:
|
|
194
|
+
print(f"Error reading {paper_file}: {e}")
|
|
195
|
+
|
|
196
|
+
# Sort by added date (newest first)
|
|
197
|
+
papers.sort(key=lambda p: p.added_date or "", reverse=True)
|
|
198
|
+
return papers
|
|
199
|
+
|
|
200
|
+
def export_to_bibtex(self, papers: Optional[List[Paper]] = None, append: bool = True) -> bool:
|
|
201
|
+
"""Export papers to BibTeX file"""
|
|
202
|
+
try:
|
|
203
|
+
if papers is None:
|
|
204
|
+
# Export all papers from library
|
|
205
|
+
papers = self.list_papers()
|
|
206
|
+
|
|
207
|
+
mode = 'a' if append else 'w'
|
|
208
|
+
with open(self.bibtex_file, mode) as f:
|
|
209
|
+
if not append:
|
|
210
|
+
f.write("% Generated by Cite-Agent\n")
|
|
211
|
+
f.write(f"% Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
|
212
|
+
|
|
213
|
+
for paper in papers:
|
|
214
|
+
f.write(paper.to_bibtex())
|
|
215
|
+
f.write("\n")
|
|
216
|
+
|
|
217
|
+
return True
|
|
218
|
+
except Exception as e:
|
|
219
|
+
print(f"Error exporting to BibTeX: {e}")
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
def export_to_markdown(self, papers: Optional[List[Paper]] = None, output_file: Optional[Path] = None) -> bool:
|
|
223
|
+
"""Export papers to markdown file"""
|
|
224
|
+
try:
|
|
225
|
+
if papers is None:
|
|
226
|
+
papers = self.list_papers()
|
|
227
|
+
|
|
228
|
+
if output_file is None:
|
|
229
|
+
output_file = self.exports_dir / f"papers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
|
230
|
+
|
|
231
|
+
with open(output_file, 'w') as f:
|
|
232
|
+
f.write(f"# Research Library Export\n\n")
|
|
233
|
+
f.write(f"*Exported: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n\n")
|
|
234
|
+
f.write(f"Total papers: {len(papers)}\n\n")
|
|
235
|
+
f.write("---\n\n")
|
|
236
|
+
|
|
237
|
+
for i, paper in enumerate(papers, 1):
|
|
238
|
+
f.write(f"## {i}. {paper.title}\n\n")
|
|
239
|
+
f.write(paper.to_markdown())
|
|
240
|
+
f.write("\n---\n\n")
|
|
241
|
+
|
|
242
|
+
return True
|
|
243
|
+
except Exception as e:
|
|
244
|
+
print(f"Error exporting to markdown: {e}")
|
|
245
|
+
return False
|
|
246
|
+
|
|
247
|
+
def copy_to_clipboard(self, text: str) -> bool:
|
|
248
|
+
"""Copy text to system clipboard"""
|
|
249
|
+
try:
|
|
250
|
+
# Try multiple clipboard commands based on platform
|
|
251
|
+
commands = [
|
|
252
|
+
['xclip', '-selection', 'clipboard'], # Linux X11
|
|
253
|
+
['xsel', '--clipboard', '--input'], # Linux alternative
|
|
254
|
+
['wl-copy'], # Linux Wayland
|
|
255
|
+
['pbcopy'], # macOS
|
|
256
|
+
['clip'], # Windows
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
for cmd in commands:
|
|
260
|
+
try:
|
|
261
|
+
subprocess.run(
|
|
262
|
+
cmd,
|
|
263
|
+
input=text.encode('utf-8'),
|
|
264
|
+
check=True,
|
|
265
|
+
stdout=subprocess.DEVNULL,
|
|
266
|
+
stderr=subprocess.DEVNULL
|
|
267
|
+
)
|
|
268
|
+
return True
|
|
269
|
+
except (FileNotFoundError, subprocess.CalledProcessError):
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
# If all commands fail, save to temp file as fallback
|
|
273
|
+
temp_file = self.config_dir / "clipboard.txt"
|
|
274
|
+
with open(temp_file, 'w') as f:
|
|
275
|
+
f.write(text)
|
|
276
|
+
print(f"⚠️ Clipboard unavailable. Saved to: {temp_file}")
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
except Exception as e:
|
|
280
|
+
print(f"Error copying to clipboard: {e}")
|
|
281
|
+
return False
|
|
282
|
+
|
|
283
|
+
def save_query_result(self, query: str, response: str, metadata: Optional[Dict[str, Any]] = None) -> bool:
|
|
284
|
+
"""Save query and response to history"""
|
|
285
|
+
try:
|
|
286
|
+
timestamp = datetime.now()
|
|
287
|
+
history_file = self.history_dir / f"{timestamp.strftime('%Y%m%d')}.jsonl"
|
|
288
|
+
|
|
289
|
+
entry = {
|
|
290
|
+
"timestamp": timestamp.isoformat(),
|
|
291
|
+
"query": query,
|
|
292
|
+
"response": response,
|
|
293
|
+
"metadata": metadata or {}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
with open(history_file, 'a') as f:
|
|
297
|
+
f.write(json.dumps(entry) + '\n')
|
|
298
|
+
|
|
299
|
+
return True
|
|
300
|
+
except Exception as e:
|
|
301
|
+
print(f"Error saving query result: {e}")
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
def get_history(self, days: int = 7) -> List[Dict[str, Any]]:
|
|
305
|
+
"""Retrieve query history from last N days"""
|
|
306
|
+
history = []
|
|
307
|
+
cutoff = datetime.now()
|
|
308
|
+
|
|
309
|
+
for history_file in sorted(self.history_dir.glob("*.jsonl"), reverse=True):
|
|
310
|
+
try:
|
|
311
|
+
with open(history_file, 'r') as f:
|
|
312
|
+
for line in f:
|
|
313
|
+
entry = json.loads(line)
|
|
314
|
+
history.append(entry)
|
|
315
|
+
except Exception as e:
|
|
316
|
+
print(f"Error reading history: {e}")
|
|
317
|
+
|
|
318
|
+
return history[:100] # Limit to last 100 queries
|
|
319
|
+
|
|
320
|
+
def search_library(self, query: str) -> List[Paper]:
|
|
321
|
+
"""Search papers in library by title, author, or abstract"""
|
|
322
|
+
query_lower = query.lower()
|
|
323
|
+
results = []
|
|
324
|
+
|
|
325
|
+
for paper in self.list_papers():
|
|
326
|
+
# Search in title
|
|
327
|
+
if query_lower in paper.title.lower():
|
|
328
|
+
results.append(paper)
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
# Search in authors
|
|
332
|
+
if any(query_lower in author.lower() for author in paper.authors):
|
|
333
|
+
results.append(paper)
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
# Search in abstract
|
|
337
|
+
if paper.abstract and query_lower in paper.abstract.lower():
|
|
338
|
+
results.append(paper)
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
return results
|
|
342
|
+
|
|
343
|
+
def add_note_to_paper(self, paper_id: str, note: str) -> bool:
|
|
344
|
+
"""Add note to a paper in the library"""
|
|
345
|
+
paper = self.get_paper(paper_id)
|
|
346
|
+
if not paper:
|
|
347
|
+
return False
|
|
348
|
+
|
|
349
|
+
if paper.notes:
|
|
350
|
+
paper.notes += f"\n\n{note}"
|
|
351
|
+
else:
|
|
352
|
+
paper.notes = note
|
|
353
|
+
|
|
354
|
+
return self.add_paper(paper)
|
|
355
|
+
|
|
356
|
+
def tag_paper(self, paper_id: str, tags: List[str]) -> bool:
|
|
357
|
+
"""Add tags to a paper"""
|
|
358
|
+
paper = self.get_paper(paper_id)
|
|
359
|
+
if not paper:
|
|
360
|
+
return False
|
|
361
|
+
|
|
362
|
+
paper.tags = list(set(paper.tags + tags))
|
|
363
|
+
return self.add_paper(paper)
|
|
364
|
+
|
|
365
|
+
def _generate_paper_id(self, paper: Paper) -> str:
|
|
366
|
+
"""Generate unique paper ID"""
|
|
367
|
+
# Use DOI if available
|
|
368
|
+
if paper.doi:
|
|
369
|
+
return paper.doi.replace('/', '_').replace('.', '_')
|
|
370
|
+
|
|
371
|
+
# Otherwise use hash of title + first author + year
|
|
372
|
+
content = f"{paper.title}{paper.authors[0] if paper.authors else ''}{paper.year}"
|
|
373
|
+
return hashlib.sha256(content.encode()).hexdigest()[:12]
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def parse_paper_from_response(response_text: str) -> Optional[Paper]:
|
|
377
|
+
"""
|
|
378
|
+
Extract paper information from agent response text
|
|
379
|
+
This is a helper to convert agent responses into Paper objects
|
|
380
|
+
"""
|
|
381
|
+
# This is a simple parser - could be enhanced with more sophisticated NLP
|
|
382
|
+
try:
|
|
383
|
+
# Try to extract common patterns
|
|
384
|
+
title_match = re.search(r'(?:title|Title):\s*["\']?([^"\'\n]+)["\']?', response_text)
|
|
385
|
+
authors_match = re.search(r'(?:author|Author)s?:\s*(.+?)(?:\n|Year)', response_text, re.IGNORECASE)
|
|
386
|
+
year_match = re.search(r'(?:year|Year):\s*(\d{4})', response_text)
|
|
387
|
+
doi_match = re.search(r'(?:doi|DOI):\s*(10\.\d+/[^\s\n]+)', response_text)
|
|
388
|
+
|
|
389
|
+
if not title_match:
|
|
390
|
+
return None
|
|
391
|
+
|
|
392
|
+
title = title_match.group(1).strip()
|
|
393
|
+
|
|
394
|
+
authors = []
|
|
395
|
+
if authors_match:
|
|
396
|
+
author_text = authors_match.group(1)
|
|
397
|
+
# Split by common delimiters
|
|
398
|
+
authors = [a.strip() for a in re.split(r'[,&]|\sand\s', author_text)]
|
|
399
|
+
|
|
400
|
+
year = int(year_match.group(1)) if year_match else datetime.now().year
|
|
401
|
+
doi = doi_match.group(1) if doi_match else None
|
|
402
|
+
|
|
403
|
+
return Paper(
|
|
404
|
+
title=title,
|
|
405
|
+
authors=authors,
|
|
406
|
+
year=year,
|
|
407
|
+
doi=doi
|
|
408
|
+
)
|
|
409
|
+
except Exception as e:
|
|
410
|
+
print(f"Error parsing paper: {e}")
|
|
411
|
+
return None
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
# Convenience function for quick exports
|
|
415
|
+
def quick_export_bibtex(paper_data: Dict[str, Any]) -> str:
|
|
416
|
+
"""Quick convert dict to BibTeX format"""
|
|
417
|
+
paper = Paper(
|
|
418
|
+
title=paper_data.get('title', ''),
|
|
419
|
+
authors=paper_data.get('authors', []),
|
|
420
|
+
year=paper_data.get('year', datetime.now().year),
|
|
421
|
+
doi=paper_data.get('doi'),
|
|
422
|
+
url=paper_data.get('url'),
|
|
423
|
+
venue=paper_data.get('venue'),
|
|
424
|
+
abstract=paper_data.get('abstract')
|
|
425
|
+
)
|
|
426
|
+
return paper.to_bibtex()
|
|
427
|
+
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Workflow Integration Module
|
|
3
|
+
Reduces context switching for scholars by providing integrated tools
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, List, Any, Optional
|
|
11
|
+
import logging
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
class WorkflowIntegration:
|
|
16
|
+
"""Handles workflow integration features to reduce context switching"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, data_dir: str = "~/.cite_agent"):
|
|
19
|
+
self.data_dir = Path(data_dir).expanduser()
|
|
20
|
+
self.data_dir.mkdir(exist_ok=True)
|
|
21
|
+
|
|
22
|
+
# Initialize subdirectories
|
|
23
|
+
(self.data_dir / "papers").mkdir(exist_ok=True)
|
|
24
|
+
(self.data_dir / "citations").mkdir(exist_ok=True)
|
|
25
|
+
(self.data_dir / "sessions").mkdir(exist_ok=True)
|
|
26
|
+
|
|
27
|
+
def save_paper_to_library(self, paper: Dict[str, Any], user_id: str) -> str:
|
|
28
|
+
"""Save a paper to user's local library"""
|
|
29
|
+
paper_id = str(uuid.uuid4())
|
|
30
|
+
paper_data = {
|
|
31
|
+
"id": paper_id,
|
|
32
|
+
"user_id": user_id,
|
|
33
|
+
"saved_at": datetime.now().isoformat(),
|
|
34
|
+
"paper": paper,
|
|
35
|
+
"tags": [],
|
|
36
|
+
"notes": ""
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Save to user's paper library
|
|
40
|
+
paper_file = self.data_dir / "papers" / f"{user_id}_{paper_id}.json"
|
|
41
|
+
with open(paper_file, 'w') as f:
|
|
42
|
+
json.dump(paper_data, f, indent=2)
|
|
43
|
+
|
|
44
|
+
logger.info("Paper saved to library", paper_id=paper_id, user_id=user_id)
|
|
45
|
+
return paper_id
|
|
46
|
+
|
|
47
|
+
def export_to_bibtex(self, papers: List[Dict[str, Any]], filename: str = None) -> str:
|
|
48
|
+
"""Export papers to BibTeX format"""
|
|
49
|
+
if not filename:
|
|
50
|
+
filename = f"citations_{datetime.now().strftime('%Y%m%d_%H%M%S')}.bib"
|
|
51
|
+
|
|
52
|
+
bibtex_file = self.data_dir / "citations" / filename
|
|
53
|
+
|
|
54
|
+
bibtex_entries = []
|
|
55
|
+
for paper in papers:
|
|
56
|
+
entry = self._format_bibtex_entry(paper)
|
|
57
|
+
bibtex_entries.append(entry)
|
|
58
|
+
|
|
59
|
+
with open(bibtex_file, 'w', encoding='utf-8') as f:
|
|
60
|
+
f.write('\n\n'.join(bibtex_entries))
|
|
61
|
+
|
|
62
|
+
logger.info("BibTeX exported", filename=filename, count=len(papers))
|
|
63
|
+
return str(bibtex_file)
|
|
64
|
+
|
|
65
|
+
def _format_bibtex_entry(self, paper: Dict[str, Any]) -> str:
|
|
66
|
+
"""Format a paper as BibTeX entry"""
|
|
67
|
+
# Extract key information
|
|
68
|
+
title = paper.get('title', 'Unknown Title')
|
|
69
|
+
authors = paper.get('authors', [])
|
|
70
|
+
year = paper.get('year', '2024')
|
|
71
|
+
venue = paper.get('venue', paper.get('journal', 'Unknown Venue'))
|
|
72
|
+
doi = paper.get('doi', '')
|
|
73
|
+
|
|
74
|
+
# Generate citation key
|
|
75
|
+
first_author = authors[0].get('name', 'Unknown') if authors else 'Unknown'
|
|
76
|
+
citation_key = f"{first_author.split()[-1].lower()}{year}"
|
|
77
|
+
|
|
78
|
+
# Format authors
|
|
79
|
+
author_list = " and ".join([author.get('name', 'Unknown') for author in authors])
|
|
80
|
+
|
|
81
|
+
# Create BibTeX entry
|
|
82
|
+
entry = f"""@article{{{citation_key},
|
|
83
|
+
title = {{{title}}},
|
|
84
|
+
author = {{{author_list}}},
|
|
85
|
+
journal = {{{venue}}},
|
|
86
|
+
year = {{{year}}}"""
|
|
87
|
+
|
|
88
|
+
if doi:
|
|
89
|
+
entry += f",\n doi = {{{doi}}}"
|
|
90
|
+
|
|
91
|
+
entry += "\n}"
|
|
92
|
+
|
|
93
|
+
return entry
|
|
94
|
+
|
|
95
|
+
def save_session_history(self, user_id: str, query: str, response: Dict[str, Any]) -> str:
|
|
96
|
+
"""Save query and response to session history"""
|
|
97
|
+
session_id = str(uuid.uuid4())
|
|
98
|
+
session_data = {
|
|
99
|
+
"session_id": session_id,
|
|
100
|
+
"user_id": user_id,
|
|
101
|
+
"timestamp": datetime.now().isoformat(),
|
|
102
|
+
"query": query,
|
|
103
|
+
"response": response,
|
|
104
|
+
"papers_found": len(response.get('papers', [])),
|
|
105
|
+
"tools_used": response.get('tools_used', [])
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
session_file = self.data_dir / "sessions" / f"{user_id}_{session_id}.json"
|
|
109
|
+
with open(session_file, 'w') as f:
|
|
110
|
+
json.dump(session_data, f, indent=2)
|
|
111
|
+
|
|
112
|
+
logger.info("Session saved", session_id=session_id, user_id=user_id)
|
|
113
|
+
return session_id
|
|
114
|
+
|
|
115
|
+
def get_user_library(self, user_id: str) -> List[Dict[str, Any]]:
|
|
116
|
+
"""Get user's saved paper library"""
|
|
117
|
+
library = []
|
|
118
|
+
papers_dir = self.data_dir / "papers"
|
|
119
|
+
|
|
120
|
+
for paper_file in papers_dir.glob(f"{user_id}_*.json"):
|
|
121
|
+
try:
|
|
122
|
+
with open(paper_file, 'r') as f:
|
|
123
|
+
paper_data = json.load(f)
|
|
124
|
+
library.append(paper_data)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
logger.error("Error loading paper", file=paper_file, error=str(e))
|
|
127
|
+
|
|
128
|
+
# Sort by saved date
|
|
129
|
+
library.sort(key=lambda x: x.get('saved_at', ''), reverse=True)
|
|
130
|
+
return library
|
|
131
|
+
|
|
132
|
+
def search_library(self, user_id: str, query: str) -> List[Dict[str, Any]]:
|
|
133
|
+
"""Search user's saved paper library"""
|
|
134
|
+
library = self.get_user_library(user_id)
|
|
135
|
+
results = []
|
|
136
|
+
|
|
137
|
+
query_lower = query.lower()
|
|
138
|
+
for paper_data in library:
|
|
139
|
+
paper = paper_data.get('paper', {})
|
|
140
|
+
|
|
141
|
+
# Search in title, authors, abstract
|
|
142
|
+
title = paper.get('title', '').lower()
|
|
143
|
+
authors = ' '.join([author.get('name', '') for author in paper.get('authors', [])]).lower()
|
|
144
|
+
abstract = paper.get('abstract', '').lower()
|
|
145
|
+
|
|
146
|
+
if (query_lower in title or
|
|
147
|
+
query_lower in authors or
|
|
148
|
+
query_lower in abstract):
|
|
149
|
+
results.append(paper_data)
|
|
150
|
+
|
|
151
|
+
return results
|
|
152
|
+
|
|
153
|
+
def generate_citation_suggestions(self, paper: Dict[str, Any]) -> List[str]:
|
|
154
|
+
"""Generate citation suggestions for a paper"""
|
|
155
|
+
suggestions = []
|
|
156
|
+
|
|
157
|
+
# Suggest related papers based on keywords
|
|
158
|
+
title = paper.get('title', '')
|
|
159
|
+
abstract = paper.get('abstract', '')
|
|
160
|
+
|
|
161
|
+
# Extract potential keywords
|
|
162
|
+
keywords = self._extract_keywords(title + ' ' + abstract)
|
|
163
|
+
|
|
164
|
+
for keyword in keywords[:5]: # Top 5 keywords
|
|
165
|
+
suggestions.append(f"Find papers related to: {keyword}")
|
|
166
|
+
|
|
167
|
+
# Suggest citation format options
|
|
168
|
+
suggestions.append("Format citation in APA style")
|
|
169
|
+
suggestions.append("Format citation in MLA style")
|
|
170
|
+
suggestions.append("Format citation in Chicago style")
|
|
171
|
+
|
|
172
|
+
# Suggest verification
|
|
173
|
+
if paper.get('doi'):
|
|
174
|
+
suggestions.append(f"Verify DOI: {paper['doi']}")
|
|
175
|
+
|
|
176
|
+
return suggestions
|
|
177
|
+
|
|
178
|
+
def _extract_keywords(self, text: str) -> List[str]:
|
|
179
|
+
"""Extract potential keywords from text"""
|
|
180
|
+
# Simple keyword extraction (could be enhanced with NLP)
|
|
181
|
+
words = text.lower().split()
|
|
182
|
+
|
|
183
|
+
# Filter out common words
|
|
184
|
+
stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'this', 'that', 'these', 'those'}
|
|
185
|
+
|
|
186
|
+
keywords = [word for word in words if len(word) > 3 and word not in stop_words]
|
|
187
|
+
|
|
188
|
+
# Count frequency and return top keywords
|
|
189
|
+
from collections import Counter
|
|
190
|
+
keyword_counts = Counter(keywords)
|
|
191
|
+
return [word for word, count in keyword_counts.most_common(10)]
|
|
192
|
+
|
|
193
|
+
def export_to_markdown(self, papers: List[Dict[str, Any]], filename: str = None) -> str:
|
|
194
|
+
"""Export papers to Markdown format for Obsidian/Notion"""
|
|
195
|
+
if not filename:
|
|
196
|
+
filename = f"papers_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
|
197
|
+
|
|
198
|
+
markdown_file = self.data_dir / "citations" / filename
|
|
199
|
+
|
|
200
|
+
markdown_content = "# Research Papers\n\n"
|
|
201
|
+
markdown_content += f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
202
|
+
|
|
203
|
+
for i, paper in enumerate(papers, 1):
|
|
204
|
+
markdown_content += f"## {i}. {paper.get('title', 'Unknown Title')}\n\n"
|
|
205
|
+
|
|
206
|
+
# Authors
|
|
207
|
+
authors = paper.get('authors', [])
|
|
208
|
+
if authors:
|
|
209
|
+
author_names = [author.get('name', 'Unknown') for author in authors]
|
|
210
|
+
markdown_content += f"**Authors:** {', '.join(author_names)}\n\n"
|
|
211
|
+
|
|
212
|
+
# Venue and year
|
|
213
|
+
venue = paper.get('venue', paper.get('journal', 'Unknown Venue'))
|
|
214
|
+
year = paper.get('year', 'Unknown Year')
|
|
215
|
+
markdown_content += f"**Venue:** {venue} ({year})\n\n"
|
|
216
|
+
|
|
217
|
+
# DOI
|
|
218
|
+
if paper.get('doi'):
|
|
219
|
+
markdown_content += f"**DOI:** {paper['doi']}\n\n"
|
|
220
|
+
|
|
221
|
+
# Abstract
|
|
222
|
+
if paper.get('abstract'):
|
|
223
|
+
markdown_content += f"**Abstract:** {paper['abstract']}\n\n"
|
|
224
|
+
|
|
225
|
+
# Citation count
|
|
226
|
+
if paper.get('citation_count'):
|
|
227
|
+
markdown_content += f"**Citations:** {paper['citation_count']}\n\n"
|
|
228
|
+
|
|
229
|
+
markdown_content += "---\n\n"
|
|
230
|
+
|
|
231
|
+
with open(markdown_file, 'w', encoding='utf-8') as f:
|
|
232
|
+
f.write(markdown_content)
|
|
233
|
+
|
|
234
|
+
logger.info("Markdown exported", filename=filename, count=len(papers))
|
|
235
|
+
return str(markdown_file)
|
|
236
|
+
|
|
237
|
+
def get_session_history(self, user_id: str, limit: int = 10) -> List[Dict[str, Any]]:
|
|
238
|
+
"""Get user's recent session history"""
|
|
239
|
+
sessions = []
|
|
240
|
+
sessions_dir = self.data_dir / "sessions"
|
|
241
|
+
|
|
242
|
+
for session_file in sessions_dir.glob(f"{user_id}_*.json"):
|
|
243
|
+
try:
|
|
244
|
+
with open(session_file, 'r') as f:
|
|
245
|
+
session_data = json.load(f)
|
|
246
|
+
sessions.append(session_data)
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.error("Error loading session", file=session_file, error=str(e))
|
|
249
|
+
|
|
250
|
+
# Sort by timestamp and limit
|
|
251
|
+
sessions.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
|
|
252
|
+
return sessions[:limit]
|
|
253
|
+
|
|
254
|
+
def create_citation_network(self, papers: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
255
|
+
"""Create a citation network visualization data"""
|
|
256
|
+
network = {
|
|
257
|
+
"nodes": [],
|
|
258
|
+
"edges": []
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
for paper in papers:
|
|
262
|
+
# Add paper as node
|
|
263
|
+
node = {
|
|
264
|
+
"id": paper.get('id', str(uuid.uuid4())),
|
|
265
|
+
"label": paper.get('title', 'Unknown Title'),
|
|
266
|
+
"year": paper.get('year', 2024),
|
|
267
|
+
"citations": paper.get('citation_count', 0),
|
|
268
|
+
"venue": paper.get('venue', 'Unknown Venue')
|
|
269
|
+
}
|
|
270
|
+
network["nodes"].append(node)
|
|
271
|
+
|
|
272
|
+
# Add edges based on citations (simplified)
|
|
273
|
+
# In a real implementation, you'd analyze actual citation relationships
|
|
274
|
+
|
|
275
|
+
return network
|