nexus-cli 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus/__init__.py +8 -0
- nexus/cli.py +1914 -0
- nexus/integrations/__init__.py +0 -0
- nexus/knowledge/__init__.py +13 -0
- nexus/knowledge/search.py +233 -0
- nexus/knowledge/vault.py +662 -0
- nexus/research/__init__.py +12 -0
- nexus/research/pdf.py +497 -0
- nexus/research/zotero.py +521 -0
- nexus/teaching/__init__.py +14 -0
- nexus/teaching/courses.py +388 -0
- nexus/teaching/quarto.py +385 -0
- nexus/utils/__init__.py +0 -0
- nexus/utils/config.py +157 -0
- nexus/writing/__init__.py +12 -0
- nexus/writing/bibliography.py +339 -0
- nexus/writing/manuscript.py +397 -0
- nexus_cli-0.3.0.dist-info/METADATA +369 -0
- nexus_cli-0.3.0.dist-info/RECORD +21 -0
- nexus_cli-0.3.0.dist-info/WHEEL +4 -0
- nexus_cli-0.3.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Bibliography management for Nexus CLI."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class BibEntry:
|
|
10
|
+
"""A bibliography entry."""
|
|
11
|
+
|
|
12
|
+
key: str
|
|
13
|
+
entry_type: str
|
|
14
|
+
title: str
|
|
15
|
+
authors: list[str] = field(default_factory=list)
|
|
16
|
+
year: str = ""
|
|
17
|
+
journal: str = ""
|
|
18
|
+
doi: str = ""
|
|
19
|
+
url: str = ""
|
|
20
|
+
abstract: str = ""
|
|
21
|
+
|
|
22
|
+
def to_dict(self) -> dict:
|
|
23
|
+
"""Convert to dictionary."""
|
|
24
|
+
return {
|
|
25
|
+
"key": self.key,
|
|
26
|
+
"entry_type": self.entry_type,
|
|
27
|
+
"title": self.title,
|
|
28
|
+
"authors": self.authors,
|
|
29
|
+
"year": self.year,
|
|
30
|
+
"journal": self.journal,
|
|
31
|
+
"doi": self.doi,
|
|
32
|
+
"url": self.url,
|
|
33
|
+
"abstract": self.abstract[:200] + "..." if len(self.abstract) > 200 else self.abstract,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def format_apa(self) -> str:
|
|
37
|
+
"""Format as APA citation."""
|
|
38
|
+
if not self.authors:
|
|
39
|
+
author_str = "Unknown"
|
|
40
|
+
elif len(self.authors) == 1:
|
|
41
|
+
author_str = self.authors[0]
|
|
42
|
+
elif len(self.authors) == 2:
|
|
43
|
+
author_str = f"{self.authors[0]} & {self.authors[1]}"
|
|
44
|
+
elif len(self.authors) <= 5:
|
|
45
|
+
author_str = ", ".join(self.authors[:-1]) + f", & {self.authors[-1]}"
|
|
46
|
+
else:
|
|
47
|
+
author_str = f"{self.authors[0]} et al."
|
|
48
|
+
|
|
49
|
+
year = self.year or "n.d."
|
|
50
|
+
return f"{author_str} ({year}). {self.title}."
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BibFileParser:
|
|
54
|
+
"""Parse BibTeX files."""
|
|
55
|
+
|
|
56
|
+
def parse_file(self, path: Path) -> list[BibEntry]:
|
|
57
|
+
"""Parse a .bib file and return entries."""
|
|
58
|
+
path = Path(path).expanduser()
|
|
59
|
+
if not path.exists():
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
content = path.read_text(encoding="utf-8", errors="ignore")
|
|
64
|
+
return self._parse_bibtex(content)
|
|
65
|
+
except Exception:
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
def _parse_bibtex(self, content: str) -> list[BibEntry]:
|
|
69
|
+
"""Parse BibTeX content."""
|
|
70
|
+
entries = []
|
|
71
|
+
|
|
72
|
+
# Pattern for BibTeX entries
|
|
73
|
+
entry_pattern = re.compile(r"@(\w+)\s*\{\s*([^,]+)\s*,(.+?)\n\s*\}", re.DOTALL | re.MULTILINE)
|
|
74
|
+
|
|
75
|
+
for match in entry_pattern.finditer(content):
|
|
76
|
+
entry_type = match.group(1).lower()
|
|
77
|
+
key = match.group(2).strip()
|
|
78
|
+
fields_text = match.group(3)
|
|
79
|
+
|
|
80
|
+
# Skip preamble, string, etc.
|
|
81
|
+
if entry_type in ("preamble", "string", "comment"):
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
# Parse fields
|
|
85
|
+
fields = self._parse_fields(fields_text)
|
|
86
|
+
|
|
87
|
+
entry = BibEntry(
|
|
88
|
+
key=key,
|
|
89
|
+
entry_type=entry_type,
|
|
90
|
+
title=fields.get("title", ""),
|
|
91
|
+
authors=self._parse_authors(fields.get("author", "")),
|
|
92
|
+
year=fields.get("year", ""),
|
|
93
|
+
journal=fields.get("journal", fields.get("booktitle", "")),
|
|
94
|
+
doi=fields.get("doi", ""),
|
|
95
|
+
url=fields.get("url", ""),
|
|
96
|
+
abstract=fields.get("abstract", ""),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
entries.append(entry)
|
|
100
|
+
|
|
101
|
+
return entries
|
|
102
|
+
|
|
103
|
+
def _parse_fields(self, text: str) -> dict:
|
|
104
|
+
"""Parse BibTeX fields from entry content."""
|
|
105
|
+
fields = {}
|
|
106
|
+
|
|
107
|
+
# Pattern for field = {value} or field = "value" or field = value
|
|
108
|
+
field_pattern = re.compile(r"(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|\"([^\"]*)\"|(\d+))", re.DOTALL)
|
|
109
|
+
|
|
110
|
+
for match in field_pattern.finditer(text):
|
|
111
|
+
field_name = match.group(1).lower()
|
|
112
|
+
value = match.group(2) or match.group(3) or match.group(4) or ""
|
|
113
|
+
# Clean up value
|
|
114
|
+
value = re.sub(r"\s+", " ", value.strip())
|
|
115
|
+
value = value.replace("{", "").replace("}", "")
|
|
116
|
+
fields[field_name] = value
|
|
117
|
+
|
|
118
|
+
return fields
|
|
119
|
+
|
|
120
|
+
def _parse_authors(self, author_str: str) -> list[str]:
|
|
121
|
+
"""Parse author string into list of names."""
|
|
122
|
+
if not author_str:
|
|
123
|
+
return []
|
|
124
|
+
|
|
125
|
+
# Split by " and "
|
|
126
|
+
authors = re.split(r"\s+and\s+", author_str, flags=re.IGNORECASE)
|
|
127
|
+
|
|
128
|
+
# Clean up each name
|
|
129
|
+
cleaned = []
|
|
130
|
+
for author in authors:
|
|
131
|
+
author = author.strip()
|
|
132
|
+
# Handle "Last, First" format
|
|
133
|
+
if "," in author:
|
|
134
|
+
parts = author.split(",", 1)
|
|
135
|
+
if len(parts) == 2:
|
|
136
|
+
author = f"{parts[1].strip()} {parts[0].strip()}"
|
|
137
|
+
cleaned.append(author)
|
|
138
|
+
|
|
139
|
+
return cleaned
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class BibliographyManager:
|
|
143
|
+
"""Manage bibliographies for manuscripts."""
|
|
144
|
+
|
|
145
|
+
def __init__(self, zotero_db: Path | None = None):
|
|
146
|
+
"""Initialize bibliography manager.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
zotero_db: Path to Zotero database for integration
|
|
150
|
+
"""
|
|
151
|
+
self.zotero_db = Path(zotero_db).expanduser() if zotero_db else None
|
|
152
|
+
self._parser = BibFileParser()
|
|
153
|
+
|
|
154
|
+
def parse_bib_file(self, path: Path) -> list[BibEntry]:
|
|
155
|
+
"""Parse a .bib file."""
|
|
156
|
+
return self._parser.parse_file(path)
|
|
157
|
+
|
|
158
|
+
def find_bib_files(self, manuscript_path: Path) -> list[Path]:
|
|
159
|
+
"""Find all .bib files in a manuscript directory."""
|
|
160
|
+
manuscript_path = Path(manuscript_path).expanduser()
|
|
161
|
+
if not manuscript_path.exists():
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
bib_files = list(manuscript_path.rglob("*.bib"))
|
|
165
|
+
return sorted(bib_files)
|
|
166
|
+
|
|
167
|
+
def get_manuscript_bibliography(self, manuscript_path: Path) -> list[BibEntry]:
|
|
168
|
+
"""Get all bibliography entries for a manuscript."""
|
|
169
|
+
bib_files = self.find_bib_files(manuscript_path)
|
|
170
|
+
all_entries = []
|
|
171
|
+
|
|
172
|
+
for bib_file in bib_files:
|
|
173
|
+
entries = self.parse_bib_file(bib_file)
|
|
174
|
+
all_entries.extend(entries)
|
|
175
|
+
|
|
176
|
+
# Remove duplicates by key
|
|
177
|
+
seen = set()
|
|
178
|
+
unique = []
|
|
179
|
+
for entry in all_entries:
|
|
180
|
+
if entry.key not in seen:
|
|
181
|
+
seen.add(entry.key)
|
|
182
|
+
unique.append(entry)
|
|
183
|
+
|
|
184
|
+
return unique
|
|
185
|
+
|
|
186
|
+
def search_bibliography(
|
|
187
|
+
self,
|
|
188
|
+
manuscript_path: Path,
|
|
189
|
+
query: str,
|
|
190
|
+
) -> list[BibEntry]:
|
|
191
|
+
"""Search bibliography entries for a manuscript."""
|
|
192
|
+
entries = self.get_manuscript_bibliography(manuscript_path)
|
|
193
|
+
pattern = re.compile(query, re.IGNORECASE)
|
|
194
|
+
|
|
195
|
+
return [
|
|
196
|
+
e
|
|
197
|
+
for e in entries
|
|
198
|
+
if (pattern.search(e.title) or pattern.search(e.key) or any(pattern.search(a) for a in e.authors))
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
def get_from_zotero(self, key: str) -> BibEntry | None:
|
|
202
|
+
"""Get an entry from Zotero by key."""
|
|
203
|
+
if not self.zotero_db or not self.zotero_db.exists():
|
|
204
|
+
return None
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
from nexus.research.zotero import ZoteroClient
|
|
208
|
+
|
|
209
|
+
client = ZoteroClient(self.zotero_db)
|
|
210
|
+
item = client.get(key)
|
|
211
|
+
|
|
212
|
+
if not item:
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
return BibEntry(
|
|
216
|
+
key=item.key,
|
|
217
|
+
entry_type=item.item_type,
|
|
218
|
+
title=item.title,
|
|
219
|
+
authors=item.authors,
|
|
220
|
+
year=item.date[:4] if item.date else "",
|
|
221
|
+
doi=item.doi,
|
|
222
|
+
url=item.url,
|
|
223
|
+
abstract=item.abstract,
|
|
224
|
+
)
|
|
225
|
+
except Exception:
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
def search_zotero(self, query: str, limit: int = 20) -> list[BibEntry]:
|
|
229
|
+
"""Search Zotero library."""
|
|
230
|
+
if not self.zotero_db or not self.zotero_db.exists():
|
|
231
|
+
return []
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
from nexus.research.zotero import ZoteroClient
|
|
235
|
+
|
|
236
|
+
client = ZoteroClient(self.zotero_db)
|
|
237
|
+
items = client.search(query, limit=limit)
|
|
238
|
+
|
|
239
|
+
return [
|
|
240
|
+
BibEntry(
|
|
241
|
+
key=item.key,
|
|
242
|
+
entry_type=item.item_type,
|
|
243
|
+
title=item.title,
|
|
244
|
+
authors=item.authors,
|
|
245
|
+
year=item.date[:4] if item.date else "",
|
|
246
|
+
doi=item.doi,
|
|
247
|
+
url=item.url,
|
|
248
|
+
abstract=item.abstract,
|
|
249
|
+
)
|
|
250
|
+
for item in items
|
|
251
|
+
]
|
|
252
|
+
except Exception:
|
|
253
|
+
return []
|
|
254
|
+
|
|
255
|
+
def export_bibtex(self, entries: list[BibEntry]) -> str:
|
|
256
|
+
"""Export entries as BibTeX."""
|
|
257
|
+
lines = []
|
|
258
|
+
|
|
259
|
+
for entry in entries:
|
|
260
|
+
# Build BibTeX entry
|
|
261
|
+
lines.append(f"@{entry.entry_type}{{{entry.key},")
|
|
262
|
+
lines.append(f" title = {{{entry.title}}},")
|
|
263
|
+
|
|
264
|
+
if entry.authors:
|
|
265
|
+
author_str = " and ".join(entry.authors)
|
|
266
|
+
lines.append(f" author = {{{author_str}}},")
|
|
267
|
+
|
|
268
|
+
if entry.year:
|
|
269
|
+
lines.append(f" year = {{{entry.year}}},")
|
|
270
|
+
|
|
271
|
+
if entry.journal:
|
|
272
|
+
lines.append(f" journal = {{{entry.journal}}},")
|
|
273
|
+
|
|
274
|
+
if entry.doi:
|
|
275
|
+
lines.append(f" doi = {{{entry.doi}}},")
|
|
276
|
+
|
|
277
|
+
if entry.url:
|
|
278
|
+
lines.append(f" url = {{{entry.url}}},")
|
|
279
|
+
|
|
280
|
+
lines.append("}")
|
|
281
|
+
lines.append("")
|
|
282
|
+
|
|
283
|
+
return "\n".join(lines)
|
|
284
|
+
|
|
285
|
+
def find_cited_keys(self, content: str) -> list[str]:
|
|
286
|
+
"""Find citation keys used in content (LaTeX/Quarto format)."""
|
|
287
|
+
keys = set()
|
|
288
|
+
|
|
289
|
+
# LaTeX format: \cite{key1,key2} or \citep{key} or \citet{key}
|
|
290
|
+
latex_pattern = re.compile(r"\\cite[pt]?\{([^}]+)\}")
|
|
291
|
+
for match in latex_pattern.finditer(content):
|
|
292
|
+
for key in match.group(1).split(","):
|
|
293
|
+
keys.add(key.strip())
|
|
294
|
+
|
|
295
|
+
# Pandoc/Quarto format: [@key1; @key2] or @key
|
|
296
|
+
pandoc_pattern = re.compile(r"@([\w:-]+)")
|
|
297
|
+
for match in pandoc_pattern.finditer(content):
|
|
298
|
+
key = match.group(1)
|
|
299
|
+
# Skip common false positives
|
|
300
|
+
if key not in ("fig", "tbl", "eq", "sec", "lst"):
|
|
301
|
+
keys.add(key)
|
|
302
|
+
|
|
303
|
+
return sorted(keys)
|
|
304
|
+
|
|
305
|
+
def check_citations(self, manuscript_path: Path) -> dict:
|
|
306
|
+
"""Check for missing or unused citations in a manuscript."""
|
|
307
|
+
manuscript_path = Path(manuscript_path).expanduser()
|
|
308
|
+
|
|
309
|
+
# Get all citation keys from bibliography
|
|
310
|
+
bib_entries = self.get_manuscript_bibliography(manuscript_path)
|
|
311
|
+
bib_keys = {e.key for e in bib_entries}
|
|
312
|
+
|
|
313
|
+
# Get all cited keys from manuscript files
|
|
314
|
+
cited_keys = set()
|
|
315
|
+
for qmd_file in manuscript_path.rglob("*.qmd"):
|
|
316
|
+
try:
|
|
317
|
+
content = qmd_file.read_text()
|
|
318
|
+
cited_keys.update(self.find_cited_keys(content))
|
|
319
|
+
except Exception:
|
|
320
|
+
pass
|
|
321
|
+
|
|
322
|
+
for tex_file in manuscript_path.rglob("*.tex"):
|
|
323
|
+
try:
|
|
324
|
+
content = tex_file.read_text()
|
|
325
|
+
cited_keys.update(self.find_cited_keys(content))
|
|
326
|
+
except Exception:
|
|
327
|
+
pass
|
|
328
|
+
|
|
329
|
+
# Find missing and unused
|
|
330
|
+
missing = cited_keys - bib_keys
|
|
331
|
+
unused = bib_keys - cited_keys
|
|
332
|
+
|
|
333
|
+
return {
|
|
334
|
+
"cited_count": len(cited_keys),
|
|
335
|
+
"bibliography_count": len(bib_keys),
|
|
336
|
+
"missing": sorted(missing),
|
|
337
|
+
"unused": sorted(unused),
|
|
338
|
+
"all_good": len(missing) == 0,
|
|
339
|
+
}
|