nexus-cli 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus/__init__.py +8 -0
- nexus/cli.py +1914 -0
- nexus/integrations/__init__.py +0 -0
- nexus/knowledge/__init__.py +13 -0
- nexus/knowledge/search.py +233 -0
- nexus/knowledge/vault.py +662 -0
- nexus/research/__init__.py +12 -0
- nexus/research/pdf.py +497 -0
- nexus/research/zotero.py +521 -0
- nexus/teaching/__init__.py +14 -0
- nexus/teaching/courses.py +388 -0
- nexus/teaching/quarto.py +385 -0
- nexus/utils/__init__.py +0 -0
- nexus/utils/config.py +157 -0
- nexus/writing/__init__.py +12 -0
- nexus/writing/bibliography.py +339 -0
- nexus/writing/manuscript.py +397 -0
- nexus_cli-0.3.0.dist-info/METADATA +369 -0
- nexus_cli-0.3.0.dist-info/RECORD +21 -0
- nexus_cli-0.3.0.dist-info/WHEEL +4 -0
- nexus_cli-0.3.0.dist-info/entry_points.txt +2 -0
nexus/knowledge/vault.py
ADDED
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
"""Obsidian vault operations for Nexus CLI."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import subprocess
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import date
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class SearchResult:
|
|
13
|
+
"""A search result from the vault."""
|
|
14
|
+
|
|
15
|
+
path: str
|
|
16
|
+
line_number: int
|
|
17
|
+
content: str
|
|
18
|
+
match_text: str = ""
|
|
19
|
+
|
|
20
|
+
def to_dict(self) -> dict:
|
|
21
|
+
"""Convert to dictionary."""
|
|
22
|
+
return {
|
|
23
|
+
"path": self.path,
|
|
24
|
+
"line_number": self.line_number,
|
|
25
|
+
"content": self.content,
|
|
26
|
+
"match_text": self.match_text,
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Note:
|
|
32
|
+
"""A note from the vault."""
|
|
33
|
+
|
|
34
|
+
path: str
|
|
35
|
+
title: str
|
|
36
|
+
content: str
|
|
37
|
+
frontmatter: dict = field(default_factory=dict)
|
|
38
|
+
links: list[str] = field(default_factory=list)
|
|
39
|
+
tags: list[str] = field(default_factory=list)
|
|
40
|
+
|
|
41
|
+
def to_dict(self) -> dict:
|
|
42
|
+
"""Convert to dictionary."""
|
|
43
|
+
return {
|
|
44
|
+
"path": self.path,
|
|
45
|
+
"title": self.title,
|
|
46
|
+
"frontmatter": self.frontmatter,
|
|
47
|
+
"links": self.links,
|
|
48
|
+
"tags": self.tags,
|
|
49
|
+
"content_preview": self.content[:200] + "..." if len(self.content) > 200 else self.content,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class VaultManager:
|
|
54
|
+
"""Manages operations on an Obsidian vault."""
|
|
55
|
+
|
|
56
|
+
def __init__(self, vault_path: Path, templates_path: Path | None = None):
|
|
57
|
+
"""Initialize with vault path.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
vault_path: Path to the Obsidian vault root
|
|
61
|
+
templates_path: Path to templates folder (defaults to vault/_SYSTEM/templates)
|
|
62
|
+
"""
|
|
63
|
+
self.vault_path = Path(vault_path).expanduser()
|
|
64
|
+
self.templates_path = (
|
|
65
|
+
Path(templates_path).expanduser() if templates_path else self.vault_path / "_SYSTEM" / "templates"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
def exists(self) -> bool:
|
|
69
|
+
"""Check if the vault exists."""
|
|
70
|
+
return self.vault_path.exists() and self.vault_path.is_dir()
|
|
71
|
+
|
|
72
|
+
def note_count(self) -> int:
|
|
73
|
+
"""Count total notes in vault."""
|
|
74
|
+
if not self.exists():
|
|
75
|
+
return 0
|
|
76
|
+
return len(list(self.vault_path.rglob("*.md")))
|
|
77
|
+
|
|
78
|
+
def _resolve_path(self, note_path: str) -> Path:
|
|
79
|
+
"""Resolve a note path to full path, adding .md if needed."""
|
|
80
|
+
path = Path(note_path)
|
|
81
|
+
if not path.suffix:
|
|
82
|
+
path = path.with_suffix(".md")
|
|
83
|
+
if not path.is_absolute():
|
|
84
|
+
path = self.vault_path / path
|
|
85
|
+
return path
|
|
86
|
+
|
|
87
|
+
def read(self, note_path: str) -> Note:
|
|
88
|
+
"""Read a note from the vault.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
note_path: Path relative to vault root (e.g., "projects/my-project.md")
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Note object with content and metadata
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
FileNotFoundError: If note doesn't exist
|
|
98
|
+
"""
|
|
99
|
+
full_path = self._resolve_path(note_path)
|
|
100
|
+
|
|
101
|
+
if not full_path.exists():
|
|
102
|
+
raise FileNotFoundError(f"Note not found: {note_path}")
|
|
103
|
+
|
|
104
|
+
content = full_path.read_text()
|
|
105
|
+
|
|
106
|
+
# Parse frontmatter
|
|
107
|
+
frontmatter = {}
|
|
108
|
+
body = content
|
|
109
|
+
if content.startswith("---"):
|
|
110
|
+
parts = content.split("---", 2)
|
|
111
|
+
if len(parts) >= 3:
|
|
112
|
+
import yaml
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
frontmatter = yaml.safe_load(parts[1]) or {}
|
|
116
|
+
except yaml.YAMLError:
|
|
117
|
+
pass
|
|
118
|
+
body = parts[2].strip()
|
|
119
|
+
|
|
120
|
+
# Extract wiki links [[link]]
|
|
121
|
+
links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
|
|
122
|
+
|
|
123
|
+
# Extract tags #tag
|
|
124
|
+
tags = re.findall(r"(?:^|\s)#([a-zA-Z][a-zA-Z0-9_/-]*)", content)
|
|
125
|
+
|
|
126
|
+
# Get title from frontmatter or filename
|
|
127
|
+
title = frontmatter.get("title", full_path.stem)
|
|
128
|
+
|
|
129
|
+
return Note(
|
|
130
|
+
path=str(full_path.relative_to(self.vault_path)),
|
|
131
|
+
title=title,
|
|
132
|
+
content=body,
|
|
133
|
+
frontmatter=frontmatter,
|
|
134
|
+
links=links,
|
|
135
|
+
tags=list(set(tags)),
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def write(self, note_path: str, content: str, frontmatter: dict | None = None) -> Path:
|
|
139
|
+
"""Write content to a note.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
note_path: Path relative to vault root
|
|
143
|
+
content: Note content (markdown)
|
|
144
|
+
frontmatter: Optional YAML frontmatter dict
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Full path to the created/updated note
|
|
148
|
+
"""
|
|
149
|
+
full_path = self._resolve_path(note_path)
|
|
150
|
+
|
|
151
|
+
# Ensure parent directory exists
|
|
152
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
153
|
+
|
|
154
|
+
# Build content with frontmatter
|
|
155
|
+
if frontmatter:
|
|
156
|
+
import yaml
|
|
157
|
+
|
|
158
|
+
fm_str = yaml.dump(frontmatter, default_flow_style=False, sort_keys=False)
|
|
159
|
+
full_content = f"---\n{fm_str}---\n\n{content}"
|
|
160
|
+
else:
|
|
161
|
+
full_content = content
|
|
162
|
+
|
|
163
|
+
full_path.write_text(full_content)
|
|
164
|
+
return full_path
|
|
165
|
+
|
|
166
|
+
def search(self, query: str, limit: int = 20) -> list[SearchResult]:
|
|
167
|
+
"""Search vault using ripgrep.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
query: Search query (supports regex)
|
|
171
|
+
limit: Maximum results to return
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
List of SearchResult objects
|
|
175
|
+
"""
|
|
176
|
+
if not self.exists():
|
|
177
|
+
return []
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
result = subprocess.run(
|
|
181
|
+
[
|
|
182
|
+
"rg",
|
|
183
|
+
"--json",
|
|
184
|
+
"--max-count",
|
|
185
|
+
str(limit * 2), # Get extra to filter
|
|
186
|
+
"--glob",
|
|
187
|
+
"*.md",
|
|
188
|
+
"--ignore-case",
|
|
189
|
+
query,
|
|
190
|
+
str(self.vault_path),
|
|
191
|
+
],
|
|
192
|
+
capture_output=True,
|
|
193
|
+
text=True,
|
|
194
|
+
timeout=30,
|
|
195
|
+
)
|
|
196
|
+
except subprocess.TimeoutExpired:
|
|
197
|
+
return []
|
|
198
|
+
except FileNotFoundError:
|
|
199
|
+
# ripgrep not installed
|
|
200
|
+
return self._fallback_search(query, limit)
|
|
201
|
+
|
|
202
|
+
results = []
|
|
203
|
+
for line in result.stdout.strip().split("\n"):
|
|
204
|
+
if not line:
|
|
205
|
+
continue
|
|
206
|
+
try:
|
|
207
|
+
data = json.loads(line)
|
|
208
|
+
if data.get("type") == "match":
|
|
209
|
+
match_data = data["data"]
|
|
210
|
+
path = Path(match_data["path"]["text"])
|
|
211
|
+
rel_path = str(path.relative_to(self.vault_path))
|
|
212
|
+
|
|
213
|
+
# Get line content
|
|
214
|
+
line_content = match_data["lines"]["text"].strip()
|
|
215
|
+
|
|
216
|
+
# Get match text
|
|
217
|
+
match_text = ""
|
|
218
|
+
if match_data.get("submatches"):
|
|
219
|
+
match_text = match_data["submatches"][0]["match"]["text"]
|
|
220
|
+
|
|
221
|
+
results.append(
|
|
222
|
+
SearchResult(
|
|
223
|
+
path=rel_path,
|
|
224
|
+
line_number=match_data["line_number"],
|
|
225
|
+
content=line_content,
|
|
226
|
+
match_text=match_text,
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
if len(results) >= limit:
|
|
231
|
+
break
|
|
232
|
+
except (json.JSONDecodeError, KeyError):
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
return results
|
|
236
|
+
|
|
237
|
+
def _fallback_search(self, query: str, limit: int) -> list[SearchResult]:
|
|
238
|
+
"""Fallback search without ripgrep (slower)."""
|
|
239
|
+
results = []
|
|
240
|
+
pattern = re.compile(query, re.IGNORECASE)
|
|
241
|
+
|
|
242
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
243
|
+
try:
|
|
244
|
+
content = md_file.read_text()
|
|
245
|
+
for i, line in enumerate(content.split("\n"), 1):
|
|
246
|
+
if pattern.search(line):
|
|
247
|
+
results.append(
|
|
248
|
+
SearchResult(
|
|
249
|
+
path=str(md_file.relative_to(self.vault_path)),
|
|
250
|
+
line_number=i,
|
|
251
|
+
content=line.strip(),
|
|
252
|
+
match_text=query,
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
if len(results) >= limit:
|
|
256
|
+
return results
|
|
257
|
+
except (OSError, UnicodeDecodeError):
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
return results
|
|
261
|
+
|
|
262
|
+
def search_files(self, query: str, limit: int = 20) -> list[str]:
|
|
263
|
+
"""Search for file names matching query.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
query: Search query for file names
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
List of matching file paths
|
|
270
|
+
"""
|
|
271
|
+
if not self.exists():
|
|
272
|
+
return []
|
|
273
|
+
|
|
274
|
+
pattern = re.compile(query, re.IGNORECASE)
|
|
275
|
+
matches = []
|
|
276
|
+
|
|
277
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
278
|
+
if pattern.search(md_file.name) or pattern.search(str(md_file)):
|
|
279
|
+
matches.append(str(md_file.relative_to(self.vault_path)))
|
|
280
|
+
if len(matches) >= limit:
|
|
281
|
+
break
|
|
282
|
+
|
|
283
|
+
return matches
|
|
284
|
+
|
|
285
|
+
def backlinks(self, note_path: str) -> list[str]:
|
|
286
|
+
"""Find notes that link to this note.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
note_path: Path to note to find backlinks for
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
List of paths to notes that link to this note
|
|
293
|
+
"""
|
|
294
|
+
# Get the note name without extension for wiki link matching
|
|
295
|
+
note_name = Path(note_path).stem
|
|
296
|
+
|
|
297
|
+
# Search for [[note_name]] or [[note_name|alias]]
|
|
298
|
+
pattern = rf"\[\[{re.escape(note_name)}(?:\|[^\]]+)?\]\]"
|
|
299
|
+
|
|
300
|
+
results = self.search(pattern, limit=100)
|
|
301
|
+
|
|
302
|
+
# Get unique file paths (excluding the source note)
|
|
303
|
+
source_path = self._resolve_path(note_path)
|
|
304
|
+
linking_notes = set()
|
|
305
|
+
|
|
306
|
+
for result in results:
|
|
307
|
+
full_result_path = self.vault_path / result.path
|
|
308
|
+
if full_result_path != source_path:
|
|
309
|
+
linking_notes.add(result.path)
|
|
310
|
+
|
|
311
|
+
return sorted(linking_notes)
|
|
312
|
+
|
|
313
|
+
def daily(self, target_date: date | None = None) -> Path:
|
|
314
|
+
"""Get or create a daily note.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
target_date: Date for the note (defaults to today)
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
Path to the daily note
|
|
321
|
+
"""
|
|
322
|
+
if target_date is None:
|
|
323
|
+
target_date = date.today()
|
|
324
|
+
|
|
325
|
+
daily_path = f"50-DAILY/{target_date.isoformat()}.md"
|
|
326
|
+
full_path = self._resolve_path(daily_path)
|
|
327
|
+
|
|
328
|
+
if not full_path.exists():
|
|
329
|
+
# Create from template or default
|
|
330
|
+
template_content = self._load_template("daily")
|
|
331
|
+
if template_content:
|
|
332
|
+
content = template_content.replace("{{date}}", target_date.isoformat())
|
|
333
|
+
content = content.replace("{{date:YYYY-MM-DD}}", target_date.isoformat())
|
|
334
|
+
content = content.replace(
|
|
335
|
+
"{{date:dddd}}",
|
|
336
|
+
target_date.strftime("%A"),
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
content = f"# {target_date.isoformat()}\n\n## Tasks\n\n- [ ] \n\n## Notes\n\n"
|
|
340
|
+
|
|
341
|
+
self.write(
|
|
342
|
+
daily_path,
|
|
343
|
+
content,
|
|
344
|
+
frontmatter={"type": "daily", "date": target_date.isoformat()},
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
return full_path
|
|
348
|
+
|
|
349
|
+
def _load_template(self, template_name: str) -> str | None:
|
|
350
|
+
"""Load a template by name.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
template_name: Template name (without .md extension)
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Template content or None if not found
|
|
357
|
+
"""
|
|
358
|
+
template_path = self.templates_path / f"{template_name}.md"
|
|
359
|
+
|
|
360
|
+
if not template_path.exists():
|
|
361
|
+
# Try alternate locations
|
|
362
|
+
alt_paths = [
|
|
363
|
+
self.templates_path / f"tpl-{template_name}.md",
|
|
364
|
+
self.vault_path / "templates" / f"{template_name}.md",
|
|
365
|
+
self.vault_path / "_templates" / f"{template_name}.md",
|
|
366
|
+
]
|
|
367
|
+
for alt in alt_paths:
|
|
368
|
+
if alt.exists():
|
|
369
|
+
template_path = alt
|
|
370
|
+
break
|
|
371
|
+
else:
|
|
372
|
+
return None
|
|
373
|
+
|
|
374
|
+
return template_path.read_text()
|
|
375
|
+
|
|
376
|
+
def template(
|
|
377
|
+
self,
|
|
378
|
+
template_name: str,
|
|
379
|
+
dest_path: str,
|
|
380
|
+
variables: dict | None = None,
|
|
381
|
+
) -> Path:
|
|
382
|
+
"""Create a note from a template.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
template_name: Name of template to use
|
|
386
|
+
dest_path: Destination path for new note
|
|
387
|
+
variables: Variables to substitute in template
|
|
388
|
+
|
|
389
|
+
Returns:
|
|
390
|
+
Path to created note
|
|
391
|
+
|
|
392
|
+
Raises:
|
|
393
|
+
FileNotFoundError: If template doesn't exist
|
|
394
|
+
"""
|
|
395
|
+
template_content = self._load_template(template_name)
|
|
396
|
+
if template_content is None:
|
|
397
|
+
raise FileNotFoundError(f"Template not found: {template_name}")
|
|
398
|
+
|
|
399
|
+
# Substitute variables
|
|
400
|
+
content = template_content
|
|
401
|
+
if variables:
|
|
402
|
+
for key, value in variables.items():
|
|
403
|
+
content = content.replace(f"{{{{{key}}}}}", str(value))
|
|
404
|
+
|
|
405
|
+
# Always substitute date
|
|
406
|
+
today = date.today()
|
|
407
|
+
content = content.replace("{{date}}", today.isoformat())
|
|
408
|
+
content = content.replace("{{date:YYYY-MM-DD}}", today.isoformat())
|
|
409
|
+
|
|
410
|
+
# Write the note
|
|
411
|
+
return self.write(dest_path, content)
|
|
412
|
+
|
|
413
|
+
def list_templates(self) -> list[str]:
|
|
414
|
+
"""List available templates.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
List of template names
|
|
418
|
+
"""
|
|
419
|
+
if not self.templates_path.exists():
|
|
420
|
+
return []
|
|
421
|
+
|
|
422
|
+
templates = []
|
|
423
|
+
for md_file in self.templates_path.glob("*.md"):
|
|
424
|
+
name = md_file.stem
|
|
425
|
+
# Remove common prefixes
|
|
426
|
+
if name.startswith("tpl-"):
|
|
427
|
+
name = name[4:]
|
|
428
|
+
templates.append(name)
|
|
429
|
+
|
|
430
|
+
return sorted(templates)
|
|
431
|
+
|
|
432
|
+
def recent(self, limit: int = 10) -> list[str]:
|
|
433
|
+
"""Get recently modified notes.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
limit: Maximum number of notes to return
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
List of note paths, most recent first
|
|
440
|
+
"""
|
|
441
|
+
if not self.exists():
|
|
442
|
+
return []
|
|
443
|
+
|
|
444
|
+
notes = []
|
|
445
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
446
|
+
# Skip system folders
|
|
447
|
+
rel_path = md_file.relative_to(self.vault_path)
|
|
448
|
+
if str(rel_path).startswith(("_", ".")):
|
|
449
|
+
continue
|
|
450
|
+
notes.append((md_file, md_file.stat().st_mtime))
|
|
451
|
+
|
|
452
|
+
# Sort by modification time, most recent first
|
|
453
|
+
notes.sort(key=lambda x: x[1], reverse=True)
|
|
454
|
+
|
|
455
|
+
return [str(n[0].relative_to(self.vault_path)) for n in notes[:limit]]
|
|
456
|
+
|
|
457
|
+
def orphans(self) -> list[str]:
|
|
458
|
+
"""Find orphan notes (not linked from anywhere).
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
List of paths to orphan notes
|
|
462
|
+
"""
|
|
463
|
+
if not self.exists():
|
|
464
|
+
return []
|
|
465
|
+
|
|
466
|
+
# Get all notes
|
|
467
|
+
all_notes = set()
|
|
468
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
469
|
+
rel_path = md_file.relative_to(self.vault_path)
|
|
470
|
+
if not str(rel_path).startswith(("_", ".")):
|
|
471
|
+
all_notes.add(md_file.stem)
|
|
472
|
+
|
|
473
|
+
# Find all links
|
|
474
|
+
linked_notes = set()
|
|
475
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
476
|
+
try:
|
|
477
|
+
content = md_file.read_text()
|
|
478
|
+
links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
|
|
479
|
+
linked_notes.update(links)
|
|
480
|
+
except (OSError, UnicodeDecodeError):
|
|
481
|
+
continue
|
|
482
|
+
|
|
483
|
+
# Find orphans
|
|
484
|
+
orphans = all_notes - linked_notes
|
|
485
|
+
|
|
486
|
+
# Get full paths
|
|
487
|
+
orphan_paths = []
|
|
488
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
489
|
+
if md_file.stem in orphans:
|
|
490
|
+
rel_path = md_file.relative_to(self.vault_path)
|
|
491
|
+
if not str(rel_path).startswith(("_", ".")):
|
|
492
|
+
orphan_paths.append(str(rel_path))
|
|
493
|
+
|
|
494
|
+
return sorted(orphan_paths)
|
|
495
|
+
|
|
496
|
+
def graph(self, limit: int | None = None, include_tags: bool = False) -> dict:
|
|
497
|
+
"""Generate graph data for vault visualization.
|
|
498
|
+
|
|
499
|
+
Creates a graph representation of notes and their connections.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
limit: Optional limit on number of nodes (takes most connected)
|
|
503
|
+
include_tags: Include tag nodes in the graph
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
Graph dict with 'nodes' and 'edges' lists
|
|
507
|
+
"""
|
|
508
|
+
if not self.exists():
|
|
509
|
+
return {"nodes": [], "edges": []}
|
|
510
|
+
|
|
511
|
+
# Collect all notes and their links
|
|
512
|
+
note_data = {}
|
|
513
|
+
tag_connections = {}
|
|
514
|
+
|
|
515
|
+
for md_file in self.vault_path.rglob("*.md"):
|
|
516
|
+
rel_path = md_file.relative_to(self.vault_path)
|
|
517
|
+
if str(rel_path).startswith(("_", ".")):
|
|
518
|
+
continue
|
|
519
|
+
|
|
520
|
+
try:
|
|
521
|
+
content = md_file.read_text()
|
|
522
|
+
note_id = md_file.stem
|
|
523
|
+
|
|
524
|
+
# Extract links
|
|
525
|
+
links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
|
|
526
|
+
|
|
527
|
+
# Extract tags if requested
|
|
528
|
+
tags = []
|
|
529
|
+
if include_tags:
|
|
530
|
+
tags = re.findall(r"#([\w/-]+)", content)
|
|
531
|
+
for tag in tags:
|
|
532
|
+
if tag not in tag_connections:
|
|
533
|
+
tag_connections[tag] = []
|
|
534
|
+
tag_connections[tag].append(note_id)
|
|
535
|
+
|
|
536
|
+
note_data[note_id] = {
|
|
537
|
+
"path": str(rel_path),
|
|
538
|
+
"links": links,
|
|
539
|
+
"tags": tags,
|
|
540
|
+
"size": len(content),
|
|
541
|
+
}
|
|
542
|
+
except (OSError, UnicodeDecodeError):
|
|
543
|
+
continue
|
|
544
|
+
|
|
545
|
+
# Calculate connectivity scores
|
|
546
|
+
link_counts = {}
|
|
547
|
+
for note_id, data in note_data.items():
|
|
548
|
+
# Count outgoing links
|
|
549
|
+
out_links = len(data["links"])
|
|
550
|
+
# Count incoming links (how many notes link to this one)
|
|
551
|
+
in_links = sum(1 for other_data in note_data.values() if note_id in other_data["links"])
|
|
552
|
+
link_counts[note_id] = out_links + in_links
|
|
553
|
+
|
|
554
|
+
# Apply limit by taking most connected nodes
|
|
555
|
+
if limit and len(note_data) > limit:
|
|
556
|
+
sorted_notes = sorted(link_counts.items(), key=lambda x: x[1], reverse=True)
|
|
557
|
+
keep_notes = {note_id for note_id, _ in sorted_notes[:limit]}
|
|
558
|
+
note_data = {k: v for k, v in note_data.items() if k in keep_notes}
|
|
559
|
+
|
|
560
|
+
# Build nodes list
|
|
561
|
+
nodes = []
|
|
562
|
+
for note_id, data in note_data.items():
|
|
563
|
+
nodes.append(
|
|
564
|
+
{
|
|
565
|
+
"id": note_id,
|
|
566
|
+
"label": note_id.replace("-", " ").replace("_", " ").title(),
|
|
567
|
+
"path": data["path"],
|
|
568
|
+
"size": min(data["size"] / 100, 50), # Scale size for visualization
|
|
569
|
+
"connections": link_counts.get(note_id, 0),
|
|
570
|
+
"tags": data["tags"],
|
|
571
|
+
}
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Add tag nodes if requested
|
|
575
|
+
if include_tags:
|
|
576
|
+
for tag, connected_notes in tag_connections.items():
|
|
577
|
+
# Only include tags with multiple connections
|
|
578
|
+
if len(connected_notes) > 1:
|
|
579
|
+
nodes.append(
|
|
580
|
+
{
|
|
581
|
+
"id": f"tag:{tag}",
|
|
582
|
+
"label": f"#{tag}",
|
|
583
|
+
"type": "tag",
|
|
584
|
+
"connections": len(connected_notes),
|
|
585
|
+
}
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
# Build edges list
|
|
589
|
+
edges = []
|
|
590
|
+
edge_id = 0
|
|
591
|
+
for note_id, data in note_data.items():
|
|
592
|
+
for target in data["links"]:
|
|
593
|
+
# Only include edge if target exists in our node set
|
|
594
|
+
if target in note_data:
|
|
595
|
+
edges.append(
|
|
596
|
+
{
|
|
597
|
+
"id": edge_id,
|
|
598
|
+
"source": note_id,
|
|
599
|
+
"target": target,
|
|
600
|
+
"type": "link",
|
|
601
|
+
}
|
|
602
|
+
)
|
|
603
|
+
edge_id += 1
|
|
604
|
+
|
|
605
|
+
# Add tag edges if requested
|
|
606
|
+
if include_tags:
|
|
607
|
+
for tag in data["tags"]:
|
|
608
|
+
if f"tag:{tag}" in [n["id"] for n in nodes]:
|
|
609
|
+
edges.append(
|
|
610
|
+
{
|
|
611
|
+
"id": edge_id,
|
|
612
|
+
"source": note_id,
|
|
613
|
+
"target": f"tag:{tag}",
|
|
614
|
+
"type": "tag",
|
|
615
|
+
}
|
|
616
|
+
)
|
|
617
|
+
edge_id += 1
|
|
618
|
+
|
|
619
|
+
return {"nodes": nodes, "edges": edges}
|
|
620
|
+
|
|
621
|
+
def graph_stats(self) -> dict:
|
|
622
|
+
"""Get statistics about the vault graph.
|
|
623
|
+
|
|
624
|
+
Returns:
|
|
625
|
+
Dictionary with graph metrics
|
|
626
|
+
"""
|
|
627
|
+
graph_data = self.graph()
|
|
628
|
+
nodes = graph_data["nodes"]
|
|
629
|
+
edges = graph_data["edges"]
|
|
630
|
+
|
|
631
|
+
if not nodes:
|
|
632
|
+
return {
|
|
633
|
+
"total_notes": 0,
|
|
634
|
+
"total_connections": 0,
|
|
635
|
+
"avg_connections": 0,
|
|
636
|
+
"most_connected": [],
|
|
637
|
+
"clusters": 0,
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
# Calculate statistics
|
|
641
|
+
connection_counts = [n["connections"] for n in nodes if "connections" in n]
|
|
642
|
+
avg_connections = sum(connection_counts) / len(connection_counts) if connection_counts else 0
|
|
643
|
+
|
|
644
|
+
# Find most connected notes
|
|
645
|
+
sorted_nodes = sorted(nodes, key=lambda x: x.get("connections", 0), reverse=True)
|
|
646
|
+
most_connected = [
|
|
647
|
+
{
|
|
648
|
+
"id": n["id"],
|
|
649
|
+
"label": n["label"],
|
|
650
|
+
"connections": n.get("connections", 0),
|
|
651
|
+
}
|
|
652
|
+
for n in sorted_nodes[:10]
|
|
653
|
+
]
|
|
654
|
+
|
|
655
|
+
return {
|
|
656
|
+
"total_notes": len([n for n in nodes if n.get("type") != "tag"]),
|
|
657
|
+
"total_tags": len([n for n in nodes if n.get("type") == "tag"]),
|
|
658
|
+
"total_connections": len(edges),
|
|
659
|
+
"avg_connections": round(avg_connections, 2),
|
|
660
|
+
"most_connected": most_connected,
|
|
661
|
+
"density": round(len(edges) / (len(nodes) * (len(nodes) - 1)) * 2, 4) if len(nodes) > 1 else 0,
|
|
662
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Research domain - Zotero, PDFs, literature."""
|
|
2
|
+
|
|
3
|
+
from nexus.research.pdf import PDFDocument, PDFExtractor, PDFSearchResult
|
|
4
|
+
from nexus.research.zotero import ZoteroClient, ZoteroItem
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"ZoteroClient",
|
|
8
|
+
"ZoteroItem",
|
|
9
|
+
"PDFExtractor",
|
|
10
|
+
"PDFDocument",
|
|
11
|
+
"PDFSearchResult",
|
|
12
|
+
]
|