optulus 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ """ContextForge package."""
2
+
3
+ __all__ = ["__version__"]
4
+ __version__ = "0.1.0"
contextforge/cli.py ADDED
@@ -0,0 +1,185 @@
1
+ """Typer CLI entrypoints."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from pathlib import Path
8
+
9
+ import typer
10
+ from rich.console import Console
11
+ from rich.table import Table
12
+
13
+ from contextforge.graph.store import GraphStore
14
+ from contextforge.indexer.run import run_full_index
15
+ from contextforge.query.context import build_context
16
+ from contextforge.utils.config import load_repo_config
17
+
18
+ app = typer.Typer(help="ContextForge CLI")
19
+ console = Console(stderr=True)
20
+
21
+
22
+ def _err(msg: str) -> None:
23
+ console.print(f"[red]Error:[/red] {msg}")
24
+ raise typer.Exit(1)
25
+
26
+
27
+ def _repo_root(path: str) -> Path:
28
+ return Path(path).resolve()
29
+
30
+
31
+ def _require_index(repo_root: Path) -> Path:
32
+ cf_dir = repo_root / ".cf"
33
+ db_path = cf_dir / "index.db"
34
+ if not cf_dir.exists():
35
+ console.print("[yellow]ContextForge is not initialized in this repo yet.[/yellow]")
36
+ _err(
37
+ f"No index at {db_path}. Run: cf index <repo> (creates <repo>/.cf/) then cd into that repo for cf context."
38
+ )
39
+ if not db_path.exists():
40
+ _err(
41
+ f"No index at {db_path}. Run: cf index <repo> (creates <repo>/.cf/) then cd into that repo for cf context."
42
+ )
43
+ return db_path
44
+
45
+
46
+ @app.command()
47
+ def index(
48
+ path: str,
49
+ watch: bool = typer.Option(False, "--watch"),
50
+ langs: str | None = typer.Option(None, "--langs"),
51
+ reset: bool = typer.Option(False, "--reset"),
52
+ ) -> None:
53
+ repo_root = _repo_root(path)
54
+ if not repo_root.exists():
55
+ _err(f"Path does not exist: {repo_root}")
56
+ config = load_repo_config(repo_root)
57
+ lang_override: set[str] | None = None
58
+ if langs:
59
+ lang_override = {x.strip() for x in langs.split(",") if x.strip()}
60
+ selected_langs = lang_override if lang_override is not None else set(config.index_languages)
61
+ run_full_index(repo_root, reset=reset, langs=lang_override, console=console)
62
+
63
+ if watch:
64
+ from contextforge.indexer.watch import watch_and_reindex
65
+
66
+ watch_and_reindex(repo_root, selected_langs, set(config.index_exclude), config.batch_size)
67
+
68
+
69
+ @app.command()
70
+ def context(
71
+ task: str,
72
+ max_tokens: int = typer.Option(8000, "--max-tokens"),
73
+ top: int = typer.Option(20, "--top"),
74
+ json_out: bool = typer.Option(False, "--json"),
75
+ ) -> None:
76
+ repo_root = Path.cwd()
77
+ db_path = _require_index(repo_root)
78
+ store = GraphStore(db_path)
79
+ result = build_context(task=task, store=store, repo_root=repo_root, top_n=top, max_tokens=max_tokens)
80
+ if json_out:
81
+ typer.echo(
82
+ json.dumps(
83
+ {
84
+ "session_id": result.session_id,
85
+ "task": result.task,
86
+ "token_count": result.token_count,
87
+ "baseline_tokens": result.baseline_tokens,
88
+ "savings": result.baseline_tokens - result.token_count,
89
+ "nodes": [
90
+ {"id": s.node.id, "kind": s.node.kind, "name": s.node.name, "path": s.node.path, "score": s.score}
91
+ for s in result.nodes
92
+ ],
93
+ "context_block": json.loads(result.context_block),
94
+ }
95
+ )
96
+ )
97
+ return
98
+ table = Table(title=f'Context for: "{task}"')
99
+ table.add_column("#")
100
+ table.add_column("Kind")
101
+ table.add_column("Name")
102
+ table.add_column("Path")
103
+ table.add_column("Score")
104
+ table.add_column("Tokens")
105
+ for i, sn in enumerate(result.nodes, start=1):
106
+ table.add_row(str(i), sn.node.kind, sn.node.name, sn.node.path, f"{sn.score:.2f}", str(len((sn.node.snippet or "").split())))
107
+ console.print(table)
108
+ console.print(f"Token estimate: {result.token_count}")
109
+ console.print(f"Baseline (est): {result.baseline_tokens}")
110
+ console.print(f"Savings: {result.baseline_tokens - result.token_count}")
111
+ console.print("\n--- CONTEXT BLOCK ---")
112
+ console.print(result.context_block)
113
+
114
+
115
+ @app.command()
116
+ def stats(since: str | None = typer.Option(None, "--since")) -> None:
117
+ repo_root = Path.cwd()
118
+ db_path = _require_index(repo_root)
119
+ store = GraphStore(db_path)
120
+ sessions = store.list_sessions()
121
+ if since:
122
+ now = time.time()
123
+ if since.endswith("d"):
124
+ days = int(since[:-1] or "0")
125
+ cutoff = now - days * 86400
126
+ sessions = [s for s in sessions if (s.created_at or 0) >= cutoff]
127
+ baseline = 12000
128
+ total_tokens = sum(s.token_count or 0 for s in sessions)
129
+ total_baseline = len(sessions) * baseline
130
+ savings = total_baseline - total_tokens
131
+ dollars = (savings / 1_000_000) * 15
132
+ table = Table(title="ContextForge Stats")
133
+ table.add_column("Metric")
134
+ table.add_column("Value")
135
+ table.add_row("Total sessions", str(len(sessions)))
136
+ table.add_row("Avg tokens returned", f"{(total_tokens / len(sessions)):.1f}" if sessions else "0")
137
+ table.add_row("Cumulative token savings", str(savings))
138
+ table.add_row("Estimated dollar savings", f"${dollars:.2f}")
139
+ console.print(table)
140
+ counts: dict[str, int] = {}
141
+ for sess in sessions:
142
+ for node_id in sess.nodes_returned or []:
143
+ counts[node_id] = counts.get(node_id, 0) + 1
144
+ top = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[:10]
145
+ top_table = Table(title="Top Retrieved Nodes")
146
+ top_table.add_column("Node")
147
+ top_table.add_column("Path")
148
+ top_table.add_column("Count")
149
+ for node_id, count in top:
150
+ node = store.get_node(node_id)
151
+ if node is None:
152
+ continue
153
+ top_table.add_row(node.name, node.path, str(count))
154
+ console.print(top_table)
155
+
156
+
157
+ @app.command()
158
+ def serve(
159
+ transport: str = typer.Option(
160
+ "stdio",
161
+ "--transport",
162
+ help="stdio for Claude Code / Cursor MCP; sse or streamable-http for HTTP clients",
163
+ ),
164
+ host: str = typer.Option("127.0.0.1", "--host", help="Bind address for sse/streamable-http"),
165
+ port: int = typer.Option(8765, "--port", help="Port for sse/streamable-http (ignored for stdio)"),
166
+ auto_index: bool | None = typer.Option(
167
+ None,
168
+ "--auto-index/--no-auto-index",
169
+ help="Create or repair .cf/index.db on server start; default follows CONTEXTFORGE_AUTO_INDEX",
170
+ ),
171
+ ) -> None:
172
+ from contextforge.mcp.server import run_server
173
+
174
+ if transport not in ("stdio", "sse", "streamable-http"):
175
+ _err(f"Unknown transport {transport!r}; use stdio, sse, or streamable-http")
176
+ run_server(
177
+ transport=transport, # type: ignore[arg-type]
178
+ host=host,
179
+ port=port,
180
+ auto_index=auto_index,
181
+ )
182
+
183
+
184
+ if __name__ == "__main__":
185
+ app()
File without changes
@@ -0,0 +1,50 @@
1
+ """SQLite schema and migrations for ContextForge."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sqlite3
6
+
7
+ SCHEMA_SQL = """
8
+ CREATE TABLE IF NOT EXISTS nodes (
9
+ id TEXT PRIMARY KEY,
10
+ kind TEXT NOT NULL,
11
+ name TEXT NOT NULL,
12
+ path TEXT NOT NULL,
13
+ language TEXT,
14
+ start_line INTEGER,
15
+ end_line INTEGER,
16
+ snippet TEXT,
17
+ file_hash TEXT,
18
+ embedding BLOB,
19
+ created_at REAL
20
+ );
21
+
22
+ CREATE TABLE IF NOT EXISTS edges (
23
+ src_id TEXT NOT NULL,
24
+ dst_id TEXT NOT NULL,
25
+ kind TEXT NOT NULL,
26
+ weight REAL DEFAULT 1.0,
27
+ PRIMARY KEY (src_id, dst_id, kind)
28
+ );
29
+
30
+ CREATE TABLE IF NOT EXISTS sessions (
31
+ id TEXT PRIMARY KEY,
32
+ task_text TEXT NOT NULL,
33
+ nodes_returned TEXT,
34
+ nodes_used TEXT,
35
+ outcome_signal INTEGER,
36
+ token_count INTEGER,
37
+ created_at REAL
38
+ );
39
+
40
+ CREATE INDEX IF NOT EXISTS idx_nodes_path ON nodes(path);
41
+ CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind);
42
+ CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src_id);
43
+ CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst_id);
44
+ """
45
+
46
+
47
+ def migrate(conn: sqlite3.Connection) -> None:
48
+ """Create required tables and indexes."""
49
+ conn.executescript(SCHEMA_SQL)
50
+ conn.commit()
@@ -0,0 +1,224 @@
1
+ """Graph store abstraction over SQLite."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sqlite3
7
+ import time
8
+ import uuid
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+
12
+ import numpy as np
13
+
14
+ from contextforge.graph.schema import migrate
15
+
16
+
17
+ def _pack_embedding(vector: np.ndarray | None) -> bytes | None:
18
+ if vector is None:
19
+ return None
20
+ return np.asarray(vector, dtype=np.float32).tobytes()
21
+
22
+
23
+ def _unpack_embedding(blob: bytes | None) -> np.ndarray | None:
24
+ if blob is None:
25
+ return None
26
+ return np.frombuffer(blob, dtype=np.float32)
27
+
28
+
29
+ @dataclass(slots=True)
30
+ class Node:
31
+ id: str
32
+ kind: str
33
+ name: str
34
+ path: str
35
+ language: str | None = None
36
+ start_line: int | None = None
37
+ end_line: int | None = None
38
+ snippet: str | None = None
39
+ file_hash: str | None = None
40
+ embedding: np.ndarray | None = None
41
+ created_at: float | None = None
42
+
43
+
44
+ @dataclass(slots=True)
45
+ class Edge:
46
+ src_id: str
47
+ dst_id: str
48
+ kind: str
49
+ weight: float = 1.0
50
+
51
+
52
+ @dataclass(slots=True)
53
+ class Session:
54
+ id: str
55
+ task_text: str
56
+ nodes_returned: list[str] | None = None
57
+ nodes_used: list[str] | None = None
58
+ outcome_signal: int | None = None
59
+ token_count: int | None = None
60
+ created_at: float | None = None
61
+
62
+
63
+ class GraphStore:
64
+ """Read/write helper for nodes, edges, and sessions."""
65
+
66
+ def __init__(self, db_path: Path) -> None:
67
+ self.db_path = db_path
68
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
69
+ self.conn = sqlite3.connect(self.db_path)
70
+ self.conn.row_factory = sqlite3.Row
71
+ migrate(self.conn)
72
+
73
+ def close(self) -> None:
74
+ self.conn.close()
75
+
76
+ def upsert_nodes(self, nodes: list[Node]) -> None:
77
+ self.conn.executemany(
78
+ """
79
+ INSERT INTO nodes (id, kind, name, path, language, start_line, end_line, snippet, file_hash, embedding, created_at)
80
+ VALUES (:id, :kind, :name, :path, :language, :start_line, :end_line, :snippet, :file_hash, :embedding, :created_at)
81
+ ON CONFLICT(id) DO UPDATE SET
82
+ kind = excluded.kind,
83
+ name = excluded.name,
84
+ path = excluded.path,
85
+ language = excluded.language,
86
+ start_line = excluded.start_line,
87
+ end_line = excluded.end_line,
88
+ snippet = excluded.snippet,
89
+ file_hash = excluded.file_hash,
90
+ embedding = excluded.embedding
91
+ """,
92
+ [
93
+ {
94
+ "id": n.id,
95
+ "kind": n.kind,
96
+ "name": n.name,
97
+ "path": n.path,
98
+ "language": n.language,
99
+ "start_line": n.start_line,
100
+ "end_line": n.end_line,
101
+ "snippet": n.snippet,
102
+ "file_hash": n.file_hash,
103
+ "embedding": _pack_embedding(n.embedding),
104
+ "created_at": n.created_at if n.created_at is not None else time.time(),
105
+ }
106
+ for n in nodes
107
+ ],
108
+ )
109
+ self.conn.commit()
110
+
111
+ def upsert_edges(self, edges: list[Edge]) -> None:
112
+ self.conn.executemany(
113
+ """
114
+ INSERT INTO edges (src_id, dst_id, kind, weight)
115
+ VALUES (?, ?, ?, ?)
116
+ ON CONFLICT(src_id, dst_id, kind) DO UPDATE SET
117
+ weight = excluded.weight
118
+ """,
119
+ [(e.src_id, e.dst_id, e.kind, e.weight) for e in edges],
120
+ )
121
+ self.conn.commit()
122
+
123
+ def get_node(self, node_id: str) -> Node | None:
124
+ row = self.conn.execute("SELECT * FROM nodes WHERE id = ?", (node_id,)).fetchone()
125
+ if row is None:
126
+ return None
127
+ return Node(
128
+ id=row["id"],
129
+ kind=row["kind"],
130
+ name=row["name"],
131
+ path=row["path"],
132
+ language=row["language"],
133
+ start_line=row["start_line"],
134
+ end_line=row["end_line"],
135
+ snippet=row["snippet"],
136
+ file_hash=row["file_hash"],
137
+ embedding=_unpack_embedding(row["embedding"]),
138
+ created_at=row["created_at"],
139
+ )
140
+
141
+ def list_nodes(self) -> list[Node]:
142
+ rows = self.conn.execute("SELECT * FROM nodes").fetchall()
143
+ return [
144
+ Node(
145
+ id=row["id"],
146
+ kind=row["kind"],
147
+ name=row["name"],
148
+ path=row["path"],
149
+ language=row["language"],
150
+ start_line=row["start_line"],
151
+ end_line=row["end_line"],
152
+ snippet=row["snippet"],
153
+ file_hash=row["file_hash"],
154
+ embedding=_unpack_embedding(row["embedding"]),
155
+ created_at=row["created_at"],
156
+ )
157
+ for row in rows
158
+ ]
159
+
160
+ def get_edges_for_node(self, node_id: str) -> list[Edge]:
161
+ rows = self.conn.execute(
162
+ "SELECT * FROM edges WHERE src_id = ? OR dst_id = ?",
163
+ (node_id, node_id),
164
+ ).fetchall()
165
+ return [Edge(src_id=r["src_id"], dst_id=r["dst_id"], kind=r["kind"], weight=r["weight"]) for r in rows]
166
+
167
+ def file_hash(self, path: str) -> str | None:
168
+ row = self.conn.execute(
169
+ "SELECT file_hash FROM nodes WHERE kind='file' AND path = ? LIMIT 1",
170
+ (path,),
171
+ ).fetchone()
172
+ return None if row is None else row["file_hash"]
173
+
174
+ def create_session(self, task_text: str, nodes_returned: list[str], token_count: int) -> Session:
175
+ session = Session(
176
+ id=str(uuid.uuid4()),
177
+ task_text=task_text,
178
+ nodes_returned=nodes_returned,
179
+ token_count=token_count,
180
+ created_at=time.time(),
181
+ )
182
+ self.conn.execute(
183
+ """
184
+ INSERT INTO sessions (id, task_text, nodes_returned, nodes_used, outcome_signal, token_count, created_at)
185
+ VALUES (?, ?, ?, ?, ?, ?, ?)
186
+ """,
187
+ (
188
+ session.id,
189
+ session.task_text,
190
+ json.dumps(nodes_returned),
191
+ None,
192
+ None,
193
+ token_count,
194
+ session.created_at,
195
+ ),
196
+ )
197
+ self.conn.commit()
198
+ return session
199
+
200
+ def update_session_outcome(self, session_id: str, used_node_ids: list[str], success: bool) -> None:
201
+ self.conn.execute(
202
+ """
203
+ UPDATE sessions
204
+ SET nodes_used = ?, outcome_signal = ?
205
+ WHERE id = ?
206
+ """,
207
+ (json.dumps(used_node_ids), 1 if success else 0, session_id),
208
+ )
209
+ self.conn.commit()
210
+
211
+ def list_sessions(self) -> list[Session]:
212
+ rows = self.conn.execute("SELECT * FROM sessions ORDER BY created_at DESC").fetchall()
213
+ return [
214
+ Session(
215
+ id=r["id"],
216
+ task_text=r["task_text"],
217
+ nodes_returned=json.loads(r["nodes_returned"]) if r["nodes_returned"] else None,
218
+ nodes_used=json.loads(r["nodes_used"]) if r["nodes_used"] else None,
219
+ outcome_signal=r["outcome_signal"],
220
+ token_count=r["token_count"],
221
+ created_at=r["created_at"],
222
+ )
223
+ for r in rows
224
+ ]
File without changes
@@ -0,0 +1,64 @@
1
+ """Node embedding utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ from pathlib import Path
7
+
8
+ import numpy as np
9
+ from rich.console import Console
10
+ from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn
11
+
12
+ from contextforge.graph.store import Node
13
+
14
+ MODEL = "BAAI/bge-small-en-v1.5"
15
+
16
+ console = Console(stderr=True)
17
+
18
+
19
+ def _fallback_embedding(text: str, size: int = 384) -> np.ndarray:
20
+ digest = hashlib.sha256(text.encode("utf-8")).digest()
21
+ seed = int.from_bytes(digest[:8], "little")
22
+ rng = np.random.default_rng(seed)
23
+ vec = rng.random(size, dtype=np.float32)
24
+ norm = np.linalg.norm(vec)
25
+ if norm == 0:
26
+ return vec
27
+ return vec / norm
28
+
29
+
30
+ def embed_nodes(nodes: list[Node], batch_size: int = 64, cache_dir: Path | None = None) -> list[Node]:
31
+ if not nodes:
32
+ return nodes
33
+
34
+ model = None
35
+ try:
36
+ from sentence_transformers import SentenceTransformer
37
+
38
+ cache = cache_dir or (Path.home() / ".cache" / "contextforge")
39
+ cache.mkdir(parents=True, exist_ok=True)
40
+ model = SentenceTransformer(MODEL, cache_folder=str(cache))
41
+ except Exception:
42
+ console.print("[yellow]Warning:[/yellow] embedding model unavailable, using fallback vectors")
43
+
44
+ texts = [f"{n.kind}: {n.name}. {n.snippet or ''}" for n in nodes]
45
+ with Progress(
46
+ TextColumn("[progress.description]{task.description}"),
47
+ BarColumn(),
48
+ TaskProgressColumn(),
49
+ TimeRemainingColumn(),
50
+ console=console,
51
+ transient=True,
52
+ ) as progress:
53
+ task = progress.add_task("Embedding nodes", total=len(nodes))
54
+ for i in range(0, len(nodes), batch_size):
55
+ batch = nodes[i : i + batch_size]
56
+ batch_texts = texts[i : i + batch_size]
57
+ if model is not None:
58
+ vectors = model.encode(batch_texts, convert_to_numpy=True, normalize_embeddings=True)
59
+ else:
60
+ vectors = np.stack([_fallback_embedding(t) for t in batch_texts], axis=0)
61
+ for node, vector in zip(batch, vectors, strict=True):
62
+ node.embedding = np.asarray(vector, dtype=np.float32)
63
+ progress.advance(task, len(batch))
64
+ return nodes
@@ -0,0 +1,166 @@
1
+ """Parse source files into graph nodes and edges."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import hashlib
7
+ import re
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ from contextforge.graph.store import Edge, Node
12
+ from contextforge.indexer.tree_sitter_extract import try_extract_symbols
13
+
14
+
15
+ def _node_id(kind: str, path: str, name: str) -> str:
16
+ return hashlib.sha1(f"{kind}:{path}:{name}".encode("utf-8")).hexdigest()
17
+
18
+
19
+ def _file_hash(content: str) -> str:
20
+ return hashlib.sha1(content.encode("utf-8")).hexdigest()
21
+
22
+
23
+ @dataclass(slots=True)
24
+ class ParseResult:
25
+ file_node: Node
26
+ symbol_nodes: list[Node]
27
+ edges: list[Edge]
28
+
29
+
30
+ def _parse_python_with_ast(content: str, rel_path: str) -> tuple[list[Node], list[Edge]]:
31
+ tree = ast.parse(content)
32
+ symbols: list[Node] = []
33
+ edges: list[Edge] = []
34
+ file_id = _node_id("file", rel_path, rel_path)
35
+ known_symbol_names: set[str] = set()
36
+
37
+ for item in ast.walk(tree):
38
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
39
+ name = item.name
40
+ symbol_id = _node_id("symbol", rel_path, name)
41
+ known_symbol_names.add(name)
42
+ snippet = ast.get_source_segment(content, item) or ""
43
+ symbols.append(
44
+ Node(
45
+ id=symbol_id,
46
+ kind="symbol",
47
+ name=name,
48
+ path=rel_path,
49
+ language="python",
50
+ start_line=getattr(item, "lineno", None),
51
+ end_line=getattr(item, "end_lineno", None),
52
+ snippet=snippet[:300],
53
+ file_hash=_file_hash(content),
54
+ )
55
+ )
56
+ edges.append(Edge(src_id=file_id, dst_id=symbol_id, kind="contains"))
57
+ if isinstance(item, ast.Import):
58
+ for alias in item.names:
59
+ mod_id = _node_id("module", rel_path, alias.name)
60
+ edges.append(Edge(src_id=file_id, dst_id=mod_id, kind="imports"))
61
+ if isinstance(item, ast.ImportFrom):
62
+ module = item.module or ""
63
+ if module:
64
+ mod_id = _node_id("module", rel_path, module)
65
+ edges.append(Edge(src_id=file_id, dst_id=mod_id, kind="imports"))
66
+
67
+ for item in ast.walk(tree):
68
+ if isinstance(item, ast.Call):
69
+ name: str | None = None
70
+ if isinstance(item.func, ast.Name):
71
+ name = item.func.id
72
+ elif isinstance(item.func, ast.Attribute):
73
+ name = item.func.attr
74
+ if name and name in known_symbol_names:
75
+ src = _node_id("symbol", rel_path, name)
76
+ dst = _node_id("symbol", rel_path, name)
77
+ edges.append(Edge(src_id=src, dst_id=dst, kind="calls"))
78
+
79
+ return symbols, edges
80
+
81
+
82
+ def _parse_ts_js_regex(content: str, rel_path: str, language: str, file_id: str, file_hash: str) -> tuple[list[Node], list[Edge]]:
83
+ symbol_nodes: list[Node] = []
84
+ edges: list[Edge] = []
85
+ patterns = [
86
+ re.compile(r"function\s+([A-Za-z_][A-Za-z0-9_]*)"),
87
+ re.compile(r"class\s+([A-Za-z_][A-Za-z0-9_]*)"),
88
+ re.compile(r"const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*\("),
89
+ ]
90
+ for pat in patterns:
91
+ for m in pat.finditer(content):
92
+ name = m.group(1)
93
+ sid = _node_id("symbol", rel_path, name)
94
+ symbol_nodes.append(
95
+ Node(
96
+ id=sid,
97
+ kind="symbol",
98
+ name=name,
99
+ path=rel_path,
100
+ language=language,
101
+ snippet=name,
102
+ file_hash=file_hash,
103
+ )
104
+ )
105
+ edges.append(Edge(src_id=file_id, dst_id=sid, kind="contains"))
106
+ return symbol_nodes, edges
107
+
108
+
109
+ def _parse_go_regex(content: str, rel_path: str, language: str, file_id: str, file_hash: str) -> tuple[list[Node], list[Edge]]:
110
+ symbol_nodes: list[Node] = []
111
+ edges: list[Edge] = []
112
+ for m in re.finditer(r"func\s+([A-Za-z_][A-Za-z0-9_]*)", content):
113
+ name = m.group(1)
114
+ sid = _node_id("symbol", rel_path, name)
115
+ symbol_nodes.append(
116
+ Node(id=sid, kind="symbol", name=name, path=rel_path, language=language, snippet=name, file_hash=file_hash)
117
+ )
118
+ edges.append(Edge(src_id=file_id, dst_id=sid, kind="contains"))
119
+ return symbol_nodes, edges
120
+
121
+
122
+ def _parse_rust_regex(content: str, rel_path: str, language: str, file_id: str, file_hash: str) -> tuple[list[Node], list[Edge]]:
123
+ symbol_nodes: list[Node] = []
124
+ edges: list[Edge] = []
125
+ for m in re.finditer(r"(fn|struct|enum)\s+([A-Za-z_][A-Za-z0-9_]*)", content):
126
+ name = m.group(2)
127
+ sid = _node_id("symbol", rel_path, name)
128
+ symbol_nodes.append(
129
+ Node(id=sid, kind="symbol", name=name, path=rel_path, language=language, snippet=name, file_hash=file_hash)
130
+ )
131
+ edges.append(Edge(src_id=file_id, dst_id=sid, kind="contains"))
132
+ return symbol_nodes, edges
133
+
134
+
135
+ def parse_file(path: Path, rel_path: str, language: str) -> ParseResult:
136
+ content = path.read_text(encoding="utf-8", errors="ignore")
137
+ file_id = _node_id("file", rel_path, rel_path)
138
+ fh = _file_hash(content)
139
+ file_node = Node(
140
+ id=file_id,
141
+ kind="file",
142
+ name=path.name,
143
+ path=rel_path,
144
+ language=language,
145
+ start_line=1,
146
+ end_line=max(content.count("\n") + 1, 1),
147
+ snippet=content[:300],
148
+ file_hash=fh,
149
+ )
150
+ symbol_nodes: list[Node] = []
151
+ edges: list[Edge] = []
152
+
153
+ if language == "python":
154
+ symbol_nodes, edges = _parse_python_with_ast(content, rel_path)
155
+ elif language in {"typescript", "javascript", "go", "rust"}:
156
+ extracted = try_extract_symbols(path, rel_path, content, language, file_id, fh)
157
+ if extracted is not None:
158
+ symbol_nodes, edges = extracted
159
+ elif language in {"typescript", "javascript"}:
160
+ symbol_nodes, edges = _parse_ts_js_regex(content, rel_path, language, file_id, fh)
161
+ elif language == "go":
162
+ symbol_nodes, edges = _parse_go_regex(content, rel_path, language, file_id, fh)
163
+ elif language == "rust":
164
+ symbol_nodes, edges = _parse_rust_regex(content, rel_path, language, file_id, fh)
165
+
166
+ return ParseResult(file_node=file_node, symbol_nodes=symbol_nodes, edges=edges)