nexus-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,339 @@
1
+ """Bibliography management for Nexus CLI."""
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass
9
+ class BibEntry:
10
+ """A bibliography entry."""
11
+
12
+ key: str
13
+ entry_type: str
14
+ title: str
15
+ authors: list[str] = field(default_factory=list)
16
+ year: str = ""
17
+ journal: str = ""
18
+ doi: str = ""
19
+ url: str = ""
20
+ abstract: str = ""
21
+
22
+ def to_dict(self) -> dict:
23
+ """Convert to dictionary."""
24
+ return {
25
+ "key": self.key,
26
+ "entry_type": self.entry_type,
27
+ "title": self.title,
28
+ "authors": self.authors,
29
+ "year": self.year,
30
+ "journal": self.journal,
31
+ "doi": self.doi,
32
+ "url": self.url,
33
+ "abstract": self.abstract[:200] + "..." if len(self.abstract) > 200 else self.abstract,
34
+ }
35
+
36
+ def format_apa(self) -> str:
37
+ """Format as APA citation."""
38
+ if not self.authors:
39
+ author_str = "Unknown"
40
+ elif len(self.authors) == 1:
41
+ author_str = self.authors[0]
42
+ elif len(self.authors) == 2:
43
+ author_str = f"{self.authors[0]} & {self.authors[1]}"
44
+ elif len(self.authors) <= 5:
45
+ author_str = ", ".join(self.authors[:-1]) + f", & {self.authors[-1]}"
46
+ else:
47
+ author_str = f"{self.authors[0]} et al."
48
+
49
+ year = self.year or "n.d."
50
+ return f"{author_str} ({year}). {self.title}."
51
+
52
+
53
+ class BibFileParser:
54
+ """Parse BibTeX files."""
55
+
56
+ def parse_file(self, path: Path) -> list[BibEntry]:
57
+ """Parse a .bib file and return entries."""
58
+ path = Path(path).expanduser()
59
+ if not path.exists():
60
+ return []
61
+
62
+ try:
63
+ content = path.read_text(encoding="utf-8", errors="ignore")
64
+ return self._parse_bibtex(content)
65
+ except Exception:
66
+ return []
67
+
68
+ def _parse_bibtex(self, content: str) -> list[BibEntry]:
69
+ """Parse BibTeX content."""
70
+ entries = []
71
+
72
+ # Pattern for BibTeX entries
73
+ entry_pattern = re.compile(r"@(\w+)\s*\{\s*([^,]+)\s*,(.+?)\n\s*\}", re.DOTALL | re.MULTILINE)
74
+
75
+ for match in entry_pattern.finditer(content):
76
+ entry_type = match.group(1).lower()
77
+ key = match.group(2).strip()
78
+ fields_text = match.group(3)
79
+
80
+ # Skip preamble, string, etc.
81
+ if entry_type in ("preamble", "string", "comment"):
82
+ continue
83
+
84
+ # Parse fields
85
+ fields = self._parse_fields(fields_text)
86
+
87
+ entry = BibEntry(
88
+ key=key,
89
+ entry_type=entry_type,
90
+ title=fields.get("title", ""),
91
+ authors=self._parse_authors(fields.get("author", "")),
92
+ year=fields.get("year", ""),
93
+ journal=fields.get("journal", fields.get("booktitle", "")),
94
+ doi=fields.get("doi", ""),
95
+ url=fields.get("url", ""),
96
+ abstract=fields.get("abstract", ""),
97
+ )
98
+
99
+ entries.append(entry)
100
+
101
+ return entries
102
+
103
+ def _parse_fields(self, text: str) -> dict:
104
+ """Parse BibTeX fields from entry content."""
105
+ fields = {}
106
+
107
+ # Pattern for field = {value} or field = "value" or field = value
108
+ field_pattern = re.compile(r"(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|\"([^\"]*)\"|(\d+))", re.DOTALL)
109
+
110
+ for match in field_pattern.finditer(text):
111
+ field_name = match.group(1).lower()
112
+ value = match.group(2) or match.group(3) or match.group(4) or ""
113
+ # Clean up value
114
+ value = re.sub(r"\s+", " ", value.strip())
115
+ value = value.replace("{", "").replace("}", "")
116
+ fields[field_name] = value
117
+
118
+ return fields
119
+
120
+ def _parse_authors(self, author_str: str) -> list[str]:
121
+ """Parse author string into list of names."""
122
+ if not author_str:
123
+ return []
124
+
125
+ # Split by " and "
126
+ authors = re.split(r"\s+and\s+", author_str, flags=re.IGNORECASE)
127
+
128
+ # Clean up each name
129
+ cleaned = []
130
+ for author in authors:
131
+ author = author.strip()
132
+ # Handle "Last, First" format
133
+ if "," in author:
134
+ parts = author.split(",", 1)
135
+ if len(parts) == 2:
136
+ author = f"{parts[1].strip()} {parts[0].strip()}"
137
+ cleaned.append(author)
138
+
139
+ return cleaned
140
+
141
+
142
+ class BibliographyManager:
143
+ """Manage bibliographies for manuscripts."""
144
+
145
+ def __init__(self, zotero_db: Path | None = None):
146
+ """Initialize bibliography manager.
147
+
148
+ Args:
149
+ zotero_db: Path to Zotero database for integration
150
+ """
151
+ self.zotero_db = Path(zotero_db).expanduser() if zotero_db else None
152
+ self._parser = BibFileParser()
153
+
154
+ def parse_bib_file(self, path: Path) -> list[BibEntry]:
155
+ """Parse a .bib file."""
156
+ return self._parser.parse_file(path)
157
+
158
+ def find_bib_files(self, manuscript_path: Path) -> list[Path]:
159
+ """Find all .bib files in a manuscript directory."""
160
+ manuscript_path = Path(manuscript_path).expanduser()
161
+ if not manuscript_path.exists():
162
+ return []
163
+
164
+ bib_files = list(manuscript_path.rglob("*.bib"))
165
+ return sorted(bib_files)
166
+
167
+ def get_manuscript_bibliography(self, manuscript_path: Path) -> list[BibEntry]:
168
+ """Get all bibliography entries for a manuscript."""
169
+ bib_files = self.find_bib_files(manuscript_path)
170
+ all_entries = []
171
+
172
+ for bib_file in bib_files:
173
+ entries = self.parse_bib_file(bib_file)
174
+ all_entries.extend(entries)
175
+
176
+ # Remove duplicates by key
177
+ seen = set()
178
+ unique = []
179
+ for entry in all_entries:
180
+ if entry.key not in seen:
181
+ seen.add(entry.key)
182
+ unique.append(entry)
183
+
184
+ return unique
185
+
186
+ def search_bibliography(
187
+ self,
188
+ manuscript_path: Path,
189
+ query: str,
190
+ ) -> list[BibEntry]:
191
+ """Search bibliography entries for a manuscript."""
192
+ entries = self.get_manuscript_bibliography(manuscript_path)
193
+ pattern = re.compile(query, re.IGNORECASE)
194
+
195
+ return [
196
+ e
197
+ for e in entries
198
+ if (pattern.search(e.title) or pattern.search(e.key) or any(pattern.search(a) for a in e.authors))
199
+ ]
200
+
201
+ def get_from_zotero(self, key: str) -> BibEntry | None:
202
+ """Get an entry from Zotero by key."""
203
+ if not self.zotero_db or not self.zotero_db.exists():
204
+ return None
205
+
206
+ try:
207
+ from nexus.research.zotero import ZoteroClient
208
+
209
+ client = ZoteroClient(self.zotero_db)
210
+ item = client.get(key)
211
+
212
+ if not item:
213
+ return None
214
+
215
+ return BibEntry(
216
+ key=item.key,
217
+ entry_type=item.item_type,
218
+ title=item.title,
219
+ authors=item.authors,
220
+ year=item.date[:4] if item.date else "",
221
+ doi=item.doi,
222
+ url=item.url,
223
+ abstract=item.abstract,
224
+ )
225
+ except Exception:
226
+ return None
227
+
228
+ def search_zotero(self, query: str, limit: int = 20) -> list[BibEntry]:
229
+ """Search Zotero library."""
230
+ if not self.zotero_db or not self.zotero_db.exists():
231
+ return []
232
+
233
+ try:
234
+ from nexus.research.zotero import ZoteroClient
235
+
236
+ client = ZoteroClient(self.zotero_db)
237
+ items = client.search(query, limit=limit)
238
+
239
+ return [
240
+ BibEntry(
241
+ key=item.key,
242
+ entry_type=item.item_type,
243
+ title=item.title,
244
+ authors=item.authors,
245
+ year=item.date[:4] if item.date else "",
246
+ doi=item.doi,
247
+ url=item.url,
248
+ abstract=item.abstract,
249
+ )
250
+ for item in items
251
+ ]
252
+ except Exception:
253
+ return []
254
+
255
+ def export_bibtex(self, entries: list[BibEntry]) -> str:
256
+ """Export entries as BibTeX."""
257
+ lines = []
258
+
259
+ for entry in entries:
260
+ # Build BibTeX entry
261
+ lines.append(f"@{entry.entry_type}{{{entry.key},")
262
+ lines.append(f" title = {{{entry.title}}},")
263
+
264
+ if entry.authors:
265
+ author_str = " and ".join(entry.authors)
266
+ lines.append(f" author = {{{author_str}}},")
267
+
268
+ if entry.year:
269
+ lines.append(f" year = {{{entry.year}}},")
270
+
271
+ if entry.journal:
272
+ lines.append(f" journal = {{{entry.journal}}},")
273
+
274
+ if entry.doi:
275
+ lines.append(f" doi = {{{entry.doi}}},")
276
+
277
+ if entry.url:
278
+ lines.append(f" url = {{{entry.url}}},")
279
+
280
+ lines.append("}")
281
+ lines.append("")
282
+
283
+ return "\n".join(lines)
284
+
285
+ def find_cited_keys(self, content: str) -> list[str]:
286
+ """Find citation keys used in content (LaTeX/Quarto format)."""
287
+ keys = set()
288
+
289
+ # LaTeX format: \cite{key1,key2} or \citep{key} or \citet{key}
290
+ latex_pattern = re.compile(r"\\cite[pt]?\{([^}]+)\}")
291
+ for match in latex_pattern.finditer(content):
292
+ for key in match.group(1).split(","):
293
+ keys.add(key.strip())
294
+
295
+ # Pandoc/Quarto format: [@key1; @key2] or @key
296
+ pandoc_pattern = re.compile(r"@([\w:-]+)")
297
+ for match in pandoc_pattern.finditer(content):
298
+ key = match.group(1)
299
+ # Skip common false positives
300
+ if key not in ("fig", "tbl", "eq", "sec", "lst"):
301
+ keys.add(key)
302
+
303
+ return sorted(keys)
304
+
305
+ def check_citations(self, manuscript_path: Path) -> dict:
306
+ """Check for missing or unused citations in a manuscript."""
307
+ manuscript_path = Path(manuscript_path).expanduser()
308
+
309
+ # Get all citation keys from bibliography
310
+ bib_entries = self.get_manuscript_bibliography(manuscript_path)
311
+ bib_keys = {e.key for e in bib_entries}
312
+
313
+ # Get all cited keys from manuscript files
314
+ cited_keys = set()
315
+ for qmd_file in manuscript_path.rglob("*.qmd"):
316
+ try:
317
+ content = qmd_file.read_text()
318
+ cited_keys.update(self.find_cited_keys(content))
319
+ except Exception:
320
+ pass
321
+
322
+ for tex_file in manuscript_path.rglob("*.tex"):
323
+ try:
324
+ content = tex_file.read_text()
325
+ cited_keys.update(self.find_cited_keys(content))
326
+ except Exception:
327
+ pass
328
+
329
+ # Find missing and unused
330
+ missing = cited_keys - bib_keys
331
+ unused = bib_keys - cited_keys
332
+
333
+ return {
334
+ "cited_count": len(cited_keys),
335
+ "bibliography_count": len(bib_keys),
336
+ "missing": sorted(missing),
337
+ "unused": sorted(unused),
338
+ "all_good": len(missing) == 0,
339
+ }