neurostack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neurostack/__init__.py +3 -0
- neurostack/__main__.py +5 -0
- neurostack/brief.py +159 -0
- neurostack/chunker.py +119 -0
- neurostack/cli.py +870 -0
- neurostack/community.py +226 -0
- neurostack/community_search.py +212 -0
- neurostack/config.py +88 -0
- neurostack/embedder.py +127 -0
- neurostack/graph.py +226 -0
- neurostack/leiden.py +180 -0
- neurostack/preflight.py +114 -0
- neurostack/reranker.py +63 -0
- neurostack/schema.py +348 -0
- neurostack/search.py +698 -0
- neurostack/server.py +381 -0
- neurostack/session_index.py +521 -0
- neurostack/summarizer.py +124 -0
- neurostack/triples.py +109 -0
- neurostack/watcher.py +552 -0
- neurostack-0.1.0.dist-info/METADATA +197 -0
- neurostack-0.1.0.dist-info/RECORD +25 -0
- neurostack-0.1.0.dist-info/WHEEL +4 -0
- neurostack-0.1.0.dist-info/entry_points.txt +2 -0
- neurostack-0.1.0.dist-info/licenses/LICENSE +201 -0
neurostack/__init__.py
ADDED
neurostack/__main__.py
ADDED
neurostack/brief.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Session brief generator: vault DB + external memory sources + git log."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sqlite3
|
|
5
|
+
import subprocess
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from .config import get_config
|
|
10
|
+
from .schema import DB_PATH, get_db
|
|
11
|
+
|
|
12
|
+
# Optional external memory DB (e.g. engram) -- configurable via env var
|
|
13
|
+
EXTERNAL_MEMORY_DB = Path(os.environ.get(
|
|
14
|
+
"NEUROSTACK_MEMORY_DB",
|
|
15
|
+
str(Path.home() / ".engram" / "engram.db"),
|
|
16
|
+
))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_recent_vault_changes(conn: sqlite3.Connection, limit: int = 10) -> list[dict]:
|
|
20
|
+
"""Get recently modified notes with summaries."""
|
|
21
|
+
rows = conn.execute(
|
|
22
|
+
"""
|
|
23
|
+
SELECT n.path, n.title, s.summary_text, n.updated_at
|
|
24
|
+
FROM notes n
|
|
25
|
+
LEFT JOIN summaries s ON s.note_path = n.path
|
|
26
|
+
ORDER BY n.updated_at DESC
|
|
27
|
+
LIMIT ?
|
|
28
|
+
""",
|
|
29
|
+
(limit,),
|
|
30
|
+
).fetchall()
|
|
31
|
+
return [dict(r) for r in rows]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_git_recent(vault_root: Path, limit: int = 5) -> list[str]:
|
|
35
|
+
"""Get recent git commits from the vault."""
|
|
36
|
+
try:
|
|
37
|
+
result = subprocess.run(
|
|
38
|
+
["git", "log", f"--max-count={limit}", "--oneline", "--no-decorate"],
|
|
39
|
+
cwd=vault_root,
|
|
40
|
+
capture_output=True,
|
|
41
|
+
text=True,
|
|
42
|
+
timeout=5,
|
|
43
|
+
)
|
|
44
|
+
if result.returncode == 0:
|
|
45
|
+
return [l.strip() for l in result.stdout.strip().split("\n") if l.strip()]
|
|
46
|
+
except (subprocess.TimeoutExpired, FileNotFoundError):
|
|
47
|
+
pass
|
|
48
|
+
return []
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_external_memories(limit: int = 5) -> list[dict]:
|
|
52
|
+
"""Get recent observations from external memory DB (e.g. engram)."""
|
|
53
|
+
if not EXTERNAL_MEMORY_DB.exists():
|
|
54
|
+
return []
|
|
55
|
+
try:
|
|
56
|
+
econn = sqlite3.connect(str(EXTERNAL_MEMORY_DB))
|
|
57
|
+
econn.row_factory = sqlite3.Row
|
|
58
|
+
rows = econn.execute(
|
|
59
|
+
"""
|
|
60
|
+
SELECT topic_key, content, timestamp
|
|
61
|
+
FROM memories
|
|
62
|
+
ORDER BY timestamp DESC
|
|
63
|
+
LIMIT ?
|
|
64
|
+
""",
|
|
65
|
+
(limit,),
|
|
66
|
+
).fetchall()
|
|
67
|
+
result = [dict(r) for r in rows]
|
|
68
|
+
econn.close()
|
|
69
|
+
return result
|
|
70
|
+
except Exception:
|
|
71
|
+
return []
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_top_notes(conn: sqlite3.Connection, limit: int = 5) -> list[dict]:
|
|
75
|
+
"""Get top notes by PageRank."""
|
|
76
|
+
rows = conn.execute(
|
|
77
|
+
"""
|
|
78
|
+
SELECT gs.note_path, n.title, gs.pagerank, gs.in_degree
|
|
79
|
+
FROM graph_stats gs
|
|
80
|
+
JOIN notes n ON n.path = gs.note_path
|
|
81
|
+
ORDER BY gs.pagerank DESC
|
|
82
|
+
LIMIT ?
|
|
83
|
+
""",
|
|
84
|
+
(limit,),
|
|
85
|
+
).fetchall()
|
|
86
|
+
return [dict(r) for r in rows]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def generate_brief(vault_root: Path = None) -> str:
|
|
90
|
+
"""Generate a compact session brief."""
|
|
91
|
+
if vault_root is None:
|
|
92
|
+
vault_root = get_config().vault_root
|
|
93
|
+
conn = get_db(DB_PATH)
|
|
94
|
+
|
|
95
|
+
now = datetime.now()
|
|
96
|
+
hour = now.hour
|
|
97
|
+
if 5 <= hour < 12:
|
|
98
|
+
time_ctx = "morning"
|
|
99
|
+
elif 12 <= hour < 17:
|
|
100
|
+
time_ctx = "afternoon"
|
|
101
|
+
elif 17 <= hour < 21:
|
|
102
|
+
time_ctx = "evening"
|
|
103
|
+
else:
|
|
104
|
+
time_ctx = "night"
|
|
105
|
+
|
|
106
|
+
parts = [f"## Session Brief ({now.strftime('%Y-%m-%d %H:%M')}, {time_ctx})\n"]
|
|
107
|
+
|
|
108
|
+
# Vault stats
|
|
109
|
+
note_count = conn.execute("SELECT COUNT(*) as c FROM notes").fetchone()["c"]
|
|
110
|
+
chunk_count = conn.execute("SELECT COUNT(*) as c FROM chunks").fetchone()["c"]
|
|
111
|
+
summary_count = conn.execute("SELECT COUNT(*) as c FROM summaries").fetchone()["c"]
|
|
112
|
+
embedded_count = conn.execute(
|
|
113
|
+
"SELECT COUNT(*) as c FROM chunks WHERE embedding IS NOT NULL"
|
|
114
|
+
).fetchone()["c"]
|
|
115
|
+
parts.append(
|
|
116
|
+
f"**Vault:** {note_count} notes, {chunk_count} chunks, "
|
|
117
|
+
f"{embedded_count} embedded, {summary_count} summarized\n"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Recent changes
|
|
121
|
+
changes = get_recent_vault_changes(conn, limit=5)
|
|
122
|
+
if changes:
|
|
123
|
+
parts.append("**Recent changes:**")
|
|
124
|
+
for c in changes:
|
|
125
|
+
summary = c.get("summary_text", "") or ""
|
|
126
|
+
if summary:
|
|
127
|
+
parts.append(f"- `{c['path']}`: {summary[:100]}")
|
|
128
|
+
else:
|
|
129
|
+
parts.append(f"- `{c['path']}` ({c['title']})")
|
|
130
|
+
parts.append("")
|
|
131
|
+
|
|
132
|
+
# Git history
|
|
133
|
+
commits = get_git_recent(vault_root, limit=3)
|
|
134
|
+
if commits:
|
|
135
|
+
parts.append("**Recent commits:**")
|
|
136
|
+
for c in commits:
|
|
137
|
+
parts.append(f"- {c}")
|
|
138
|
+
parts.append("")
|
|
139
|
+
|
|
140
|
+
# External memory observations
|
|
141
|
+
memories = get_external_memories(limit=3)
|
|
142
|
+
if memories:
|
|
143
|
+
parts.append("**Recent memories:**")
|
|
144
|
+
for e in memories:
|
|
145
|
+
content = e.get("content", "")[:100]
|
|
146
|
+
parts.append(f"- [{e.get('topic_key', '?')}] {content}")
|
|
147
|
+
parts.append("")
|
|
148
|
+
|
|
149
|
+
# Top connected notes
|
|
150
|
+
top = get_top_notes(conn, limit=5)
|
|
151
|
+
if top:
|
|
152
|
+
parts.append("**Most connected notes:**")
|
|
153
|
+
for t in top:
|
|
154
|
+
parts.append(
|
|
155
|
+
f"- `{t['note_path']}` (PR: {t['pagerank']:.4f}, {t['in_degree']} inlinks)"
|
|
156
|
+
)
|
|
157
|
+
parts.append("")
|
|
158
|
+
|
|
159
|
+
return "\n".join(parts)
|
neurostack/chunker.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Markdown heading-based chunker with frontmatter parser."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import yaml
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Chunk:
|
|
13
|
+
heading_path: str # e.g. "## Architecture > ### Networking"
|
|
14
|
+
content: str
|
|
15
|
+
position: int
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ParsedNote:
|
|
20
|
+
path: str
|
|
21
|
+
title: str
|
|
22
|
+
frontmatter: dict = field(default_factory=dict)
|
|
23
|
+
content_hash: str = ""
|
|
24
|
+
chunks: list[Chunk] = field(default_factory=list)
|
|
25
|
+
wiki_links: list[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
|
|
29
|
+
HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
|
30
|
+
WIKI_LINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:\|[^\]]+?)?\]\]")
|
|
31
|
+
|
|
32
|
+
# Target max chunk size in characters (~500 tokens ≈ ~2000 chars)
|
|
33
|
+
MAX_CHUNK_CHARS = 2000
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def parse_frontmatter(text: str) -> tuple[dict, str]:
|
|
37
|
+
"""Extract YAML frontmatter and return (metadata, remaining_content)."""
|
|
38
|
+
match = FRONTMATTER_RE.match(text)
|
|
39
|
+
if not match:
|
|
40
|
+
return {}, text
|
|
41
|
+
try:
|
|
42
|
+
fm = yaml.safe_load(match.group(1)) or {}
|
|
43
|
+
except yaml.YAMLError:
|
|
44
|
+
fm = {}
|
|
45
|
+
return fm, text[match.end() :]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def chunk_by_headings(content: str) -> list[Chunk]:
|
|
49
|
+
"""Split markdown content into chunks by ## headings."""
|
|
50
|
+
chunks = []
|
|
51
|
+
heading_stack: list[tuple[int, str]] = []
|
|
52
|
+
current_lines: list[str] = []
|
|
53
|
+
position = 0
|
|
54
|
+
|
|
55
|
+
def flush():
|
|
56
|
+
nonlocal position
|
|
57
|
+
text = "\n".join(current_lines).strip()
|
|
58
|
+
if text:
|
|
59
|
+
path = " > ".join(f"{'#' * lvl} {name}" for lvl, name in heading_stack) if heading_stack else "(intro)"
|
|
60
|
+
# Split oversized chunks
|
|
61
|
+
if len(text) > MAX_CHUNK_CHARS:
|
|
62
|
+
for i in range(0, len(text), MAX_CHUNK_CHARS):
|
|
63
|
+
sub = text[i : i + MAX_CHUNK_CHARS].strip()
|
|
64
|
+
if sub:
|
|
65
|
+
chunks.append(Chunk(heading_path=path, content=sub, position=position))
|
|
66
|
+
position += 1
|
|
67
|
+
else:
|
|
68
|
+
chunks.append(Chunk(heading_path=path, content=text, position=position))
|
|
69
|
+
position += 1
|
|
70
|
+
|
|
71
|
+
for line in content.split("\n"):
|
|
72
|
+
m = HEADING_RE.match(line)
|
|
73
|
+
if m:
|
|
74
|
+
level = len(m.group(1))
|
|
75
|
+
name = m.group(2).strip()
|
|
76
|
+
# Flush current content
|
|
77
|
+
flush()
|
|
78
|
+
current_lines = [line]
|
|
79
|
+
# Update heading stack
|
|
80
|
+
while heading_stack and heading_stack[-1][0] >= level:
|
|
81
|
+
heading_stack.pop()
|
|
82
|
+
heading_stack.append((level, name))
|
|
83
|
+
else:
|
|
84
|
+
current_lines.append(line)
|
|
85
|
+
|
|
86
|
+
flush()
|
|
87
|
+
return chunks
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def extract_wiki_links(content: str) -> list[str]:
|
|
91
|
+
"""Extract wiki-link targets from markdown content."""
|
|
92
|
+
return list(set(WIKI_LINK_RE.findall(content)))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def parse_note(path: Path, vault_root: Path) -> ParsedNote:
|
|
96
|
+
"""Parse a markdown note into structured data."""
|
|
97
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
98
|
+
content_hash = hashlib.sha256(text.encode()).hexdigest()[:16]
|
|
99
|
+
|
|
100
|
+
frontmatter, body = parse_frontmatter(text)
|
|
101
|
+
|
|
102
|
+
# Title: from frontmatter, first H1, or filename
|
|
103
|
+
title = frontmatter.get("title", "")
|
|
104
|
+
if not title:
|
|
105
|
+
h1 = re.search(r"^#\s+(.+)$", body, re.MULTILINE)
|
|
106
|
+
title = h1.group(1) if h1 else path.stem
|
|
107
|
+
|
|
108
|
+
rel_path = str(path.relative_to(vault_root))
|
|
109
|
+
chunks = chunk_by_headings(body)
|
|
110
|
+
wiki_links = extract_wiki_links(body)
|
|
111
|
+
|
|
112
|
+
return ParsedNote(
|
|
113
|
+
path=rel_path,
|
|
114
|
+
title=title,
|
|
115
|
+
frontmatter=frontmatter,
|
|
116
|
+
content_hash=content_hash,
|
|
117
|
+
chunks=chunks,
|
|
118
|
+
wiki_links=wiki_links,
|
|
119
|
+
)
|