nexus-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,662 @@
1
+ """Obsidian vault operations for Nexus CLI."""
2
+
3
+ import json
4
+ import re
5
+ import subprocess
6
+ from dataclasses import dataclass, field
7
+ from datetime import date
8
+ from pathlib import Path
9
+
10
+
11
+ @dataclass
12
+ class SearchResult:
13
+ """A search result from the vault."""
14
+
15
+ path: str
16
+ line_number: int
17
+ content: str
18
+ match_text: str = ""
19
+
20
+ def to_dict(self) -> dict:
21
+ """Convert to dictionary."""
22
+ return {
23
+ "path": self.path,
24
+ "line_number": self.line_number,
25
+ "content": self.content,
26
+ "match_text": self.match_text,
27
+ }
28
+
29
+
30
+ @dataclass
31
+ class Note:
32
+ """A note from the vault."""
33
+
34
+ path: str
35
+ title: str
36
+ content: str
37
+ frontmatter: dict = field(default_factory=dict)
38
+ links: list[str] = field(default_factory=list)
39
+ tags: list[str] = field(default_factory=list)
40
+
41
+ def to_dict(self) -> dict:
42
+ """Convert to dictionary."""
43
+ return {
44
+ "path": self.path,
45
+ "title": self.title,
46
+ "frontmatter": self.frontmatter,
47
+ "links": self.links,
48
+ "tags": self.tags,
49
+ "content_preview": self.content[:200] + "..." if len(self.content) > 200 else self.content,
50
+ }
51
+
52
+
53
+ class VaultManager:
54
+ """Manages operations on an Obsidian vault."""
55
+
56
+ def __init__(self, vault_path: Path, templates_path: Path | None = None):
57
+ """Initialize with vault path.
58
+
59
+ Args:
60
+ vault_path: Path to the Obsidian vault root
61
+ templates_path: Path to templates folder (defaults to vault/_SYSTEM/templates)
62
+ """
63
+ self.vault_path = Path(vault_path).expanduser()
64
+ self.templates_path = (
65
+ Path(templates_path).expanduser() if templates_path else self.vault_path / "_SYSTEM" / "templates"
66
+ )
67
+
68
+ def exists(self) -> bool:
69
+ """Check if the vault exists."""
70
+ return self.vault_path.exists() and self.vault_path.is_dir()
71
+
72
+ def note_count(self) -> int:
73
+ """Count total notes in vault."""
74
+ if not self.exists():
75
+ return 0
76
+ return len(list(self.vault_path.rglob("*.md")))
77
+
78
+ def _resolve_path(self, note_path: str) -> Path:
79
+ """Resolve a note path to full path, adding .md if needed."""
80
+ path = Path(note_path)
81
+ if not path.suffix:
82
+ path = path.with_suffix(".md")
83
+ if not path.is_absolute():
84
+ path = self.vault_path / path
85
+ return path
86
+
87
+ def read(self, note_path: str) -> Note:
88
+ """Read a note from the vault.
89
+
90
+ Args:
91
+ note_path: Path relative to vault root (e.g., "projects/my-project.md")
92
+
93
+ Returns:
94
+ Note object with content and metadata
95
+
96
+ Raises:
97
+ FileNotFoundError: If note doesn't exist
98
+ """
99
+ full_path = self._resolve_path(note_path)
100
+
101
+ if not full_path.exists():
102
+ raise FileNotFoundError(f"Note not found: {note_path}")
103
+
104
+ content = full_path.read_text()
105
+
106
+ # Parse frontmatter
107
+ frontmatter = {}
108
+ body = content
109
+ if content.startswith("---"):
110
+ parts = content.split("---", 2)
111
+ if len(parts) >= 3:
112
+ import yaml
113
+
114
+ try:
115
+ frontmatter = yaml.safe_load(parts[1]) or {}
116
+ except yaml.YAMLError:
117
+ pass
118
+ body = parts[2].strip()
119
+
120
+ # Extract wiki links [[link]]
121
+ links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
122
+
123
+ # Extract tags #tag
124
+ tags = re.findall(r"(?:^|\s)#([a-zA-Z][a-zA-Z0-9_/-]*)", content)
125
+
126
+ # Get title from frontmatter or filename
127
+ title = frontmatter.get("title", full_path.stem)
128
+
129
+ return Note(
130
+ path=str(full_path.relative_to(self.vault_path)),
131
+ title=title,
132
+ content=body,
133
+ frontmatter=frontmatter,
134
+ links=links,
135
+ tags=list(set(tags)),
136
+ )
137
+
138
+ def write(self, note_path: str, content: str, frontmatter: dict | None = None) -> Path:
139
+ """Write content to a note.
140
+
141
+ Args:
142
+ note_path: Path relative to vault root
143
+ content: Note content (markdown)
144
+ frontmatter: Optional YAML frontmatter dict
145
+
146
+ Returns:
147
+ Full path to the created/updated note
148
+ """
149
+ full_path = self._resolve_path(note_path)
150
+
151
+ # Ensure parent directory exists
152
+ full_path.parent.mkdir(parents=True, exist_ok=True)
153
+
154
+ # Build content with frontmatter
155
+ if frontmatter:
156
+ import yaml
157
+
158
+ fm_str = yaml.dump(frontmatter, default_flow_style=False, sort_keys=False)
159
+ full_content = f"---\n{fm_str}---\n\n{content}"
160
+ else:
161
+ full_content = content
162
+
163
+ full_path.write_text(full_content)
164
+ return full_path
165
+
166
+ def search(self, query: str, limit: int = 20) -> list[SearchResult]:
167
+ """Search vault using ripgrep.
168
+
169
+ Args:
170
+ query: Search query (supports regex)
171
+ limit: Maximum results to return
172
+
173
+ Returns:
174
+ List of SearchResult objects
175
+ """
176
+ if not self.exists():
177
+ return []
178
+
179
+ try:
180
+ result = subprocess.run(
181
+ [
182
+ "rg",
183
+ "--json",
184
+ "--max-count",
185
+ str(limit * 2), # Get extra to filter
186
+ "--glob",
187
+ "*.md",
188
+ "--ignore-case",
189
+ query,
190
+ str(self.vault_path),
191
+ ],
192
+ capture_output=True,
193
+ text=True,
194
+ timeout=30,
195
+ )
196
+ except subprocess.TimeoutExpired:
197
+ return []
198
+ except FileNotFoundError:
199
+ # ripgrep not installed
200
+ return self._fallback_search(query, limit)
201
+
202
+ results = []
203
+ for line in result.stdout.strip().split("\n"):
204
+ if not line:
205
+ continue
206
+ try:
207
+ data = json.loads(line)
208
+ if data.get("type") == "match":
209
+ match_data = data["data"]
210
+ path = Path(match_data["path"]["text"])
211
+ rel_path = str(path.relative_to(self.vault_path))
212
+
213
+ # Get line content
214
+ line_content = match_data["lines"]["text"].strip()
215
+
216
+ # Get match text
217
+ match_text = ""
218
+ if match_data.get("submatches"):
219
+ match_text = match_data["submatches"][0]["match"]["text"]
220
+
221
+ results.append(
222
+ SearchResult(
223
+ path=rel_path,
224
+ line_number=match_data["line_number"],
225
+ content=line_content,
226
+ match_text=match_text,
227
+ )
228
+ )
229
+
230
+ if len(results) >= limit:
231
+ break
232
+ except (json.JSONDecodeError, KeyError):
233
+ continue
234
+
235
+ return results
236
+
237
+ def _fallback_search(self, query: str, limit: int) -> list[SearchResult]:
238
+ """Fallback search without ripgrep (slower)."""
239
+ results = []
240
+ pattern = re.compile(query, re.IGNORECASE)
241
+
242
+ for md_file in self.vault_path.rglob("*.md"):
243
+ try:
244
+ content = md_file.read_text()
245
+ for i, line in enumerate(content.split("\n"), 1):
246
+ if pattern.search(line):
247
+ results.append(
248
+ SearchResult(
249
+ path=str(md_file.relative_to(self.vault_path)),
250
+ line_number=i,
251
+ content=line.strip(),
252
+ match_text=query,
253
+ )
254
+ )
255
+ if len(results) >= limit:
256
+ return results
257
+ except (OSError, UnicodeDecodeError):
258
+ continue
259
+
260
+ return results
261
+
262
+ def search_files(self, query: str, limit: int = 20) -> list[str]:
263
+ """Search for file names matching query.
264
+
265
+ Args:
266
+ query: Search query for file names
267
+
268
+ Returns:
269
+ List of matching file paths
270
+ """
271
+ if not self.exists():
272
+ return []
273
+
274
+ pattern = re.compile(query, re.IGNORECASE)
275
+ matches = []
276
+
277
+ for md_file in self.vault_path.rglob("*.md"):
278
+ if pattern.search(md_file.name) or pattern.search(str(md_file)):
279
+ matches.append(str(md_file.relative_to(self.vault_path)))
280
+ if len(matches) >= limit:
281
+ break
282
+
283
+ return matches
284
+
285
+ def backlinks(self, note_path: str) -> list[str]:
286
+ """Find notes that link to this note.
287
+
288
+ Args:
289
+ note_path: Path to note to find backlinks for
290
+
291
+ Returns:
292
+ List of paths to notes that link to this note
293
+ """
294
+ # Get the note name without extension for wiki link matching
295
+ note_name = Path(note_path).stem
296
+
297
+ # Search for [[note_name]] or [[note_name|alias]]
298
+ pattern = rf"\[\[{re.escape(note_name)}(?:\|[^\]]+)?\]\]"
299
+
300
+ results = self.search(pattern, limit=100)
301
+
302
+ # Get unique file paths (excluding the source note)
303
+ source_path = self._resolve_path(note_path)
304
+ linking_notes = set()
305
+
306
+ for result in results:
307
+ full_result_path = self.vault_path / result.path
308
+ if full_result_path != source_path:
309
+ linking_notes.add(result.path)
310
+
311
+ return sorted(linking_notes)
312
+
313
+ def daily(self, target_date: date | None = None) -> Path:
314
+ """Get or create a daily note.
315
+
316
+ Args:
317
+ target_date: Date for the note (defaults to today)
318
+
319
+ Returns:
320
+ Path to the daily note
321
+ """
322
+ if target_date is None:
323
+ target_date = date.today()
324
+
325
+ daily_path = f"50-DAILY/{target_date.isoformat()}.md"
326
+ full_path = self._resolve_path(daily_path)
327
+
328
+ if not full_path.exists():
329
+ # Create from template or default
330
+ template_content = self._load_template("daily")
331
+ if template_content:
332
+ content = template_content.replace("{{date}}", target_date.isoformat())
333
+ content = content.replace("{{date:YYYY-MM-DD}}", target_date.isoformat())
334
+ content = content.replace(
335
+ "{{date:dddd}}",
336
+ target_date.strftime("%A"),
337
+ )
338
+ else:
339
+ content = f"# {target_date.isoformat()}\n\n## Tasks\n\n- [ ] \n\n## Notes\n\n"
340
+
341
+ self.write(
342
+ daily_path,
343
+ content,
344
+ frontmatter={"type": "daily", "date": target_date.isoformat()},
345
+ )
346
+
347
+ return full_path
348
+
349
+ def _load_template(self, template_name: str) -> str | None:
350
+ """Load a template by name.
351
+
352
+ Args:
353
+ template_name: Template name (without .md extension)
354
+
355
+ Returns:
356
+ Template content or None if not found
357
+ """
358
+ template_path = self.templates_path / f"{template_name}.md"
359
+
360
+ if not template_path.exists():
361
+ # Try alternate locations
362
+ alt_paths = [
363
+ self.templates_path / f"tpl-{template_name}.md",
364
+ self.vault_path / "templates" / f"{template_name}.md",
365
+ self.vault_path / "_templates" / f"{template_name}.md",
366
+ ]
367
+ for alt in alt_paths:
368
+ if alt.exists():
369
+ template_path = alt
370
+ break
371
+ else:
372
+ return None
373
+
374
+ return template_path.read_text()
375
+
376
+ def template(
377
+ self,
378
+ template_name: str,
379
+ dest_path: str,
380
+ variables: dict | None = None,
381
+ ) -> Path:
382
+ """Create a note from a template.
383
+
384
+ Args:
385
+ template_name: Name of template to use
386
+ dest_path: Destination path for new note
387
+ variables: Variables to substitute in template
388
+
389
+ Returns:
390
+ Path to created note
391
+
392
+ Raises:
393
+ FileNotFoundError: If template doesn't exist
394
+ """
395
+ template_content = self._load_template(template_name)
396
+ if template_content is None:
397
+ raise FileNotFoundError(f"Template not found: {template_name}")
398
+
399
+ # Substitute variables
400
+ content = template_content
401
+ if variables:
402
+ for key, value in variables.items():
403
+ content = content.replace(f"{{{{{key}}}}}", str(value))
404
+
405
+ # Always substitute date
406
+ today = date.today()
407
+ content = content.replace("{{date}}", today.isoformat())
408
+ content = content.replace("{{date:YYYY-MM-DD}}", today.isoformat())
409
+
410
+ # Write the note
411
+ return self.write(dest_path, content)
412
+
413
+ def list_templates(self) -> list[str]:
414
+ """List available templates.
415
+
416
+ Returns:
417
+ List of template names
418
+ """
419
+ if not self.templates_path.exists():
420
+ return []
421
+
422
+ templates = []
423
+ for md_file in self.templates_path.glob("*.md"):
424
+ name = md_file.stem
425
+ # Remove common prefixes
426
+ if name.startswith("tpl-"):
427
+ name = name[4:]
428
+ templates.append(name)
429
+
430
+ return sorted(templates)
431
+
432
+ def recent(self, limit: int = 10) -> list[str]:
433
+ """Get recently modified notes.
434
+
435
+ Args:
436
+ limit: Maximum number of notes to return
437
+
438
+ Returns:
439
+ List of note paths, most recent first
440
+ """
441
+ if not self.exists():
442
+ return []
443
+
444
+ notes = []
445
+ for md_file in self.vault_path.rglob("*.md"):
446
+ # Skip system folders
447
+ rel_path = md_file.relative_to(self.vault_path)
448
+ if str(rel_path).startswith(("_", ".")):
449
+ continue
450
+ notes.append((md_file, md_file.stat().st_mtime))
451
+
452
+ # Sort by modification time, most recent first
453
+ notes.sort(key=lambda x: x[1], reverse=True)
454
+
455
+ return [str(n[0].relative_to(self.vault_path)) for n in notes[:limit]]
456
+
457
+ def orphans(self) -> list[str]:
458
+ """Find orphan notes (not linked from anywhere).
459
+
460
+ Returns:
461
+ List of paths to orphan notes
462
+ """
463
+ if not self.exists():
464
+ return []
465
+
466
+ # Get all notes
467
+ all_notes = set()
468
+ for md_file in self.vault_path.rglob("*.md"):
469
+ rel_path = md_file.relative_to(self.vault_path)
470
+ if not str(rel_path).startswith(("_", ".")):
471
+ all_notes.add(md_file.stem)
472
+
473
+ # Find all links
474
+ linked_notes = set()
475
+ for md_file in self.vault_path.rglob("*.md"):
476
+ try:
477
+ content = md_file.read_text()
478
+ links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
479
+ linked_notes.update(links)
480
+ except (OSError, UnicodeDecodeError):
481
+ continue
482
+
483
+ # Find orphans
484
+ orphans = all_notes - linked_notes
485
+
486
+ # Get full paths
487
+ orphan_paths = []
488
+ for md_file in self.vault_path.rglob("*.md"):
489
+ if md_file.stem in orphans:
490
+ rel_path = md_file.relative_to(self.vault_path)
491
+ if not str(rel_path).startswith(("_", ".")):
492
+ orphan_paths.append(str(rel_path))
493
+
494
+ return sorted(orphan_paths)
495
+
496
+ def graph(self, limit: int | None = None, include_tags: bool = False) -> dict:
497
+ """Generate graph data for vault visualization.
498
+
499
+ Creates a graph representation of notes and their connections.
500
+
501
+ Args:
502
+ limit: Optional limit on number of nodes (takes most connected)
503
+ include_tags: Include tag nodes in the graph
504
+
505
+ Returns:
506
+ Graph dict with 'nodes' and 'edges' lists
507
+ """
508
+ if not self.exists():
509
+ return {"nodes": [], "edges": []}
510
+
511
+ # Collect all notes and their links
512
+ note_data = {}
513
+ tag_connections = {}
514
+
515
+ for md_file in self.vault_path.rglob("*.md"):
516
+ rel_path = md_file.relative_to(self.vault_path)
517
+ if str(rel_path).startswith(("_", ".")):
518
+ continue
519
+
520
+ try:
521
+ content = md_file.read_text()
522
+ note_id = md_file.stem
523
+
524
+ # Extract links
525
+ links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
526
+
527
+ # Extract tags if requested
528
+ tags = []
529
+ if include_tags:
530
+ tags = re.findall(r"#([\w/-]+)", content)
531
+ for tag in tags:
532
+ if tag not in tag_connections:
533
+ tag_connections[tag] = []
534
+ tag_connections[tag].append(note_id)
535
+
536
+ note_data[note_id] = {
537
+ "path": str(rel_path),
538
+ "links": links,
539
+ "tags": tags,
540
+ "size": len(content),
541
+ }
542
+ except (OSError, UnicodeDecodeError):
543
+ continue
544
+
545
+ # Calculate connectivity scores
546
+ link_counts = {}
547
+ for note_id, data in note_data.items():
548
+ # Count outgoing links
549
+ out_links = len(data["links"])
550
+ # Count incoming links (how many notes link to this one)
551
+ in_links = sum(1 for other_data in note_data.values() if note_id in other_data["links"])
552
+ link_counts[note_id] = out_links + in_links
553
+
554
+ # Apply limit by taking most connected nodes
555
+ if limit and len(note_data) > limit:
556
+ sorted_notes = sorted(link_counts.items(), key=lambda x: x[1], reverse=True)
557
+ keep_notes = {note_id for note_id, _ in sorted_notes[:limit]}
558
+ note_data = {k: v for k, v in note_data.items() if k in keep_notes}
559
+
560
+ # Build nodes list
561
+ nodes = []
562
+ for note_id, data in note_data.items():
563
+ nodes.append(
564
+ {
565
+ "id": note_id,
566
+ "label": note_id.replace("-", " ").replace("_", " ").title(),
567
+ "path": data["path"],
568
+ "size": min(data["size"] / 100, 50), # Scale size for visualization
569
+ "connections": link_counts.get(note_id, 0),
570
+ "tags": data["tags"],
571
+ }
572
+ )
573
+
574
+ # Add tag nodes if requested
575
+ if include_tags:
576
+ for tag, connected_notes in tag_connections.items():
577
+ # Only include tags with multiple connections
578
+ if len(connected_notes) > 1:
579
+ nodes.append(
580
+ {
581
+ "id": f"tag:{tag}",
582
+ "label": f"#{tag}",
583
+ "type": "tag",
584
+ "connections": len(connected_notes),
585
+ }
586
+ )
587
+
588
+ # Build edges list
589
+ edges = []
590
+ edge_id = 0
591
+ for note_id, data in note_data.items():
592
+ for target in data["links"]:
593
+ # Only include edge if target exists in our node set
594
+ if target in note_data:
595
+ edges.append(
596
+ {
597
+ "id": edge_id,
598
+ "source": note_id,
599
+ "target": target,
600
+ "type": "link",
601
+ }
602
+ )
603
+ edge_id += 1
604
+
605
+ # Add tag edges if requested
606
+ if include_tags:
607
+ for tag in data["tags"]:
608
+ if f"tag:{tag}" in [n["id"] for n in nodes]:
609
+ edges.append(
610
+ {
611
+ "id": edge_id,
612
+ "source": note_id,
613
+ "target": f"tag:{tag}",
614
+ "type": "tag",
615
+ }
616
+ )
617
+ edge_id += 1
618
+
619
+ return {"nodes": nodes, "edges": edges}
620
+
621
+ def graph_stats(self) -> dict:
622
+ """Get statistics about the vault graph.
623
+
624
+ Returns:
625
+ Dictionary with graph metrics
626
+ """
627
+ graph_data = self.graph()
628
+ nodes = graph_data["nodes"]
629
+ edges = graph_data["edges"]
630
+
631
+ if not nodes:
632
+ return {
633
+ "total_notes": 0,
634
+ "total_connections": 0,
635
+ "avg_connections": 0,
636
+ "most_connected": [],
637
+ "clusters": 0,
638
+ }
639
+
640
+ # Calculate statistics
641
+ connection_counts = [n["connections"] for n in nodes if "connections" in n]
642
+ avg_connections = sum(connection_counts) / len(connection_counts) if connection_counts else 0
643
+
644
+ # Find most connected notes
645
+ sorted_nodes = sorted(nodes, key=lambda x: x.get("connections", 0), reverse=True)
646
+ most_connected = [
647
+ {
648
+ "id": n["id"],
649
+ "label": n["label"],
650
+ "connections": n.get("connections", 0),
651
+ }
652
+ for n in sorted_nodes[:10]
653
+ ]
654
+
655
+ return {
656
+ "total_notes": len([n for n in nodes if n.get("type") != "tag"]),
657
+ "total_tags": len([n for n in nodes if n.get("type") == "tag"]),
658
+ "total_connections": len(edges),
659
+ "avg_connections": round(avg_connections, 2),
660
+ "most_connected": most_connected,
661
+ "density": round(len(edges) / (len(nodes) * (len(nodes) - 1)) * 2, 4) if len(nodes) > 1 else 0,
662
+ }
@@ -0,0 +1,12 @@
1
+ """Research domain - Zotero, PDFs, literature."""
2
+
3
+ from nexus.research.pdf import PDFDocument, PDFExtractor, PDFSearchResult
4
+ from nexus.research.zotero import ZoteroClient, ZoteroItem
5
+
6
+ __all__ = [
7
+ "ZoteroClient",
8
+ "ZoteroItem",
9
+ "PDFExtractor",
10
+ "PDFDocument",
11
+ "PDFSearchResult",
12
+ ]