onboarding-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "mcp__onboarding-agent__build_knowledge_graph"
5
+ ]
6
+ }
7
+ }
@@ -0,0 +1,8 @@
1
+ Claude.md
2
+ .venv/
3
+ .onboarding_agent/
4
+ __pycache__/
5
+ *.pyc
6
+ dist/
7
+ *.egg-info/
8
+ ./claude
@@ -0,0 +1,8 @@
1
+ {
2
+ "mcpServers": {
3
+ "onboarding-agent": {
4
+ "command": "uv",
5
+ "args": ["--directory", "/Users/abhirambanda/onboarding-agent", "run", "main.py"]
6
+ }
7
+ }
8
+ }
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,181 @@
1
+ Metadata-Version: 2.4
2
+ Name: onboarding-agent
3
+ Version: 0.1.0
4
+ Summary: An MCP server that onboards you to any codebase — point it at a repo and ask questions like a senior engineer who knows the project inside out.
5
+ License-Expression: MIT
6
+ Keywords: codebase,developer-tools,knowledge-graph,mcp,onboarding
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: gitpython>=3.1.50
9
+ Requires-Dist: mcp>=1.27.2
10
+ Description-Content-Type: text/markdown
11
+
12
+ # onboarding-agent
13
+
14
+ An MCP server that onboards you to any codebase. Point it at a repo, and it builds a knowledge graph of the project — files, functions, classes, imports, and their relationships. Then ask questions like you're talking to a senior engineer who knows the project inside out.
15
+
16
+ ## What it does
17
+
18
+ - **Ingests any local repo** — crawls the file tree, detects languages and frameworks
19
+ - **Builds a knowledge graph** — maps files, functions, classes, modules, and how they connect via imports
20
+ - **Answers onboarding questions** — "where does auth happen?", "what does this file do?", "who should I ask about the database?"
21
+ - **Analyzes git history** — finds the most-changed files (often the most important), recent activity, and contributors per file
22
+ - **Works 100% locally** — no API keys, no cloud, no data leaves your machine
23
+
24
+ ## Install
25
+
26
+ Requires Python 3.12+.
27
+
28
+ ```bash
29
+ # With uv (recommended)
30
+ uv pip install git+https://github.com/abab754/onboarding-agent.git
31
+
32
+ # Or clone and install locally
33
+ git clone https://github.com/abab754/onboarding-agent.git
34
+ cd onboarding-agent
35
+ uv sync
36
+ ```
37
+
38
+ ## Connect to an MCP client
39
+
40
+ ### Claude Desktop
41
+
42
+ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
43
+
44
+ ```json
45
+ {
46
+ "mcpServers": {
47
+ "onboarding-agent": {
48
+ "command": "uv",
49
+ "args": ["--directory", "/path/to/onboarding-agent", "run", "main.py"]
50
+ }
51
+ }
52
+ }
53
+ ```
54
+
55
+ ### Claude Code
56
+
57
+ Add a `.mcp.json` file to your project root (or the repo you want to onboard to):
58
+
59
+ ```json
60
+ {
61
+ "mcpServers": {
62
+ "onboarding-agent": {
63
+ "command": "uv",
64
+ "args": ["--directory", "/path/to/onboarding-agent", "run", "main.py"]
65
+ }
66
+ }
67
+ }
68
+ ```
69
+
70
+ Then restart Claude Code.
71
+
72
+ ### Any MCP-compatible client
73
+
74
+ The server uses stdio transport. Run it with:
75
+
76
+ ```bash
77
+ uv run main.py
78
+ ```
79
+
80
+ Any MCP client can connect via stdin/stdout.
81
+
82
+ ## Usage examples
83
+
84
+ Once connected, just talk to your AI assistant naturally:
85
+
86
+ **Full onboarding:**
87
+ > "Onboard me to the repo at /Users/me/projects/my-app"
88
+
89
+ **Understand a file:**
90
+ > "Explain what /Users/me/projects/my-app/src/auth.py does"
91
+
92
+ **Find relevant code:**
93
+ > "Where does database configuration happen in /Users/me/projects/my-app?"
94
+
95
+ **Check project activity:**
96
+ > "Who are the main contributors to /Users/me/projects/my-app and what files change the most?"
97
+
98
+ **Freeform questions:**
99
+ > "How is error handling done in this project?"
100
+
101
+ ## Tools
102
+
103
+ | Tool | Description |
104
+ |---|---|
105
+ | `ingest_repo` | Crawl a repo and return the file tree |
106
+ | `read_file` | Read a file's contents with metadata |
107
+ | `get_overview` | High-level summary: languages, frameworks, entry points |
108
+ | `explain_file` | File contents + imports, functions, classes |
109
+ | `explain_module` | Directory overview with per-file symbols |
110
+ | `build_knowledge_graph` | Index the repo into a queryable knowledge graph |
111
+ | `query_entities` | Search the graph for files, functions, classes, modules |
112
+ | `query_relationships` | Find how entities connect (imports, contains) |
113
+ | `find_relevant_code` | Search by topic and get ranked results |
114
+ | `get_architecture` | Import graph, module structure, coupling analysis |
115
+ | `ask` | Freeform Q&A — gathers context automatically |
116
+ | `get_git_history` | Recent commits and contributor summary |
117
+ | `get_hot_files` | Most frequently changed files |
118
+ | `get_file_contributors` | Who has worked on a specific file |
119
+
120
+ ## Resources
121
+
122
+ | URI | Description |
123
+ |---|---|
124
+ | `repo://overview` | Project summary (after a repo is loaded) |
125
+ | `repo://structure` | File tree |
126
+ | `repo://dependencies` | Import/dependency graph |
127
+
128
+ ## Prompts
129
+
130
+ | Prompt | Description |
131
+ |---|---|
132
+ | `onboard` | Full onboarding walkthrough |
133
+ | `explain_this_file` | Deep dive into a specific file |
134
+ | `find_code_for` | Find code related to a topic |
135
+ | `ask_question` | Freeform question answering |
136
+
137
+ ## How it works
138
+
139
+ 1. You point the server at a repo path
140
+ 2. It crawls the file tree, skipping noise directories (`.git`, `node_modules`, etc.)
141
+ 3. For Python files, it extracts functions, classes, and import statements
142
+ 4. Everything gets stored in a knowledge graph (saved to `.onboarding_agent/graph.json` in the repo)
143
+ 5. When you ask a question, it searches the graph, reads relevant files, and bundles the context for the LLM to answer
144
+
145
+ The knowledge graph persists between sessions, so re-analysis is only needed when the code changes.
146
+
147
+ ## Development
148
+
149
+ ```bash
150
+ git clone https://github.com/YOUR_USERNAME/onboarding-agent.git
151
+ cd onboarding-agent
152
+ uv sync
153
+
154
+ # Run the server
155
+ uv run main.py
156
+
157
+ # Test with MCP Inspector
158
+ npx @modelcontextprotocol/inspector uv run main.py
159
+ ```
160
+
161
+ ## Project structure
162
+
163
+ ```
164
+ onboarding_agent/
165
+ ├── server.py # FastMCP instance and global state
166
+ ├── constants.py # Language maps, config signals, skip dirs
167
+ ├── helpers.py # File tree building, Python symbol extraction
168
+ ├── knowledge_graph.py # KnowledgeGraph class with JSON persistence
169
+ ├── resources.py # MCP resources (repo://overview, etc.)
170
+ ├── prompts.py # MCP prompt templates
171
+ └── tools/
172
+ ├── ingest.py # ingest_repo, read_file
173
+ ├── analysis.py # get_overview, explain_file, explain_module
174
+ ├── graph.py # build_knowledge_graph, query_entities, query_relationships
175
+ ├── search.py # find_relevant_code, get_architecture, ask
176
+ └── git_history.py # get_git_history, get_hot_files, get_file_contributors
177
+ ```
178
+
179
+ ## License
180
+
181
+ MIT
@@ -0,0 +1,170 @@
1
+ # onboarding-agent
2
+
3
+ An MCP server that onboards you to any codebase. Point it at a repo, and it builds a knowledge graph of the project — files, functions, classes, imports, and their relationships. Then ask questions like you're talking to a senior engineer who knows the project inside out.
4
+
5
+ ## What it does
6
+
7
+ - **Ingests any local repo** — crawls the file tree, detects languages and frameworks
8
+ - **Builds a knowledge graph** — maps files, functions, classes, modules, and how they connect via imports
9
+ - **Answers onboarding questions** — "where does auth happen?", "what does this file do?", "who should I ask about the database?"
10
+ - **Analyzes git history** — finds the most-changed files (often the most important), recent activity, and contributors per file
11
+ - **Works 100% locally** — no API keys, no cloud, no data leaves your machine
12
+
13
+ ## Install
14
+
15
+ Requires Python 3.12+.
16
+
17
+ ```bash
18
+ # With uv (recommended)
19
+ uv pip install git+https://github.com/abab754/onboarding-agent.git
20
+
21
+ # Or clone and install locally
22
+ git clone https://github.com/abab754/onboarding-agent.git
23
+ cd onboarding-agent
24
+ uv sync
25
+ ```
26
+
27
+ ## Connect to an MCP client
28
+
29
+ ### Claude Desktop
30
+
31
+ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
32
+
33
+ ```json
34
+ {
35
+ "mcpServers": {
36
+ "onboarding-agent": {
37
+ "command": "uv",
38
+ "args": ["--directory", "/path/to/onboarding-agent", "run", "main.py"]
39
+ }
40
+ }
41
+ }
42
+ ```
43
+
44
+ ### Claude Code
45
+
46
+ Add a `.mcp.json` file to your project root (or the repo you want to onboard to):
47
+
48
+ ```json
49
+ {
50
+ "mcpServers": {
51
+ "onboarding-agent": {
52
+ "command": "uv",
53
+ "args": ["--directory", "/path/to/onboarding-agent", "run", "main.py"]
54
+ }
55
+ }
56
+ }
57
+ ```
58
+
59
+ Then restart Claude Code.
60
+
61
+ ### Any MCP-compatible client
62
+
63
+ The server uses stdio transport. Run it with:
64
+
65
+ ```bash
66
+ uv run main.py
67
+ ```
68
+
69
+ Any MCP client can connect via stdin/stdout.
70
+
71
+ ## Usage examples
72
+
73
+ Once connected, just talk to your AI assistant naturally:
74
+
75
+ **Full onboarding:**
76
+ > "Onboard me to the repo at /Users/me/projects/my-app"
77
+
78
+ **Understand a file:**
79
+ > "Explain what /Users/me/projects/my-app/src/auth.py does"
80
+
81
+ **Find relevant code:**
82
+ > "Where does database configuration happen in /Users/me/projects/my-app?"
83
+
84
+ **Check project activity:**
85
+ > "Who are the main contributors to /Users/me/projects/my-app and what files change the most?"
86
+
87
+ **Freeform questions:**
88
+ > "How is error handling done in this project?"
89
+
90
+ ## Tools
91
+
92
+ | Tool | Description |
93
+ |---|---|
94
+ | `ingest_repo` | Crawl a repo and return the file tree |
95
+ | `read_file` | Read a file's contents with metadata |
96
+ | `get_overview` | High-level summary: languages, frameworks, entry points |
97
+ | `explain_file` | File contents + imports, functions, classes |
98
+ | `explain_module` | Directory overview with per-file symbols |
99
+ | `build_knowledge_graph` | Index the repo into a queryable knowledge graph |
100
+ | `query_entities` | Search the graph for files, functions, classes, modules |
101
+ | `query_relationships` | Find how entities connect (imports, contains) |
102
+ | `find_relevant_code` | Search by topic and get ranked results |
103
+ | `get_architecture` | Import graph, module structure, coupling analysis |
104
+ | `ask` | Freeform Q&A — gathers context automatically |
105
+ | `get_git_history` | Recent commits and contributor summary |
106
+ | `get_hot_files` | Most frequently changed files |
107
+ | `get_file_contributors` | Who has worked on a specific file |
108
+
109
+ ## Resources
110
+
111
+ | URI | Description |
112
+ |---|---|
113
+ | `repo://overview` | Project summary (after a repo is loaded) |
114
+ | `repo://structure` | File tree |
115
+ | `repo://dependencies` | Import/dependency graph |
116
+
117
+ ## Prompts
118
+
119
+ | Prompt | Description |
120
+ |---|---|
121
+ | `onboard` | Full onboarding walkthrough |
122
+ | `explain_this_file` | Deep dive into a specific file |
123
+ | `find_code_for` | Find code related to a topic |
124
+ | `ask_question` | Freeform question answering |
125
+
126
+ ## How it works
127
+
128
+ 1. You point the server at a repo path
129
+ 2. It crawls the file tree, skipping noise directories (`.git`, `node_modules`, etc.)
130
+ 3. For Python files, it extracts functions, classes, and import statements
131
+ 4. Everything gets stored in a knowledge graph (saved to `.onboarding_agent/graph.json` in the repo)
132
+ 5. When you ask a question, it searches the graph, reads relevant files, and bundles the context for the LLM to answer
133
+
134
+ The knowledge graph persists between sessions, so re-analysis is only needed when the code changes.
135
+
136
+ ## Development
137
+
138
+ ```bash
139
+ git clone https://github.com/YOUR_USERNAME/onboarding-agent.git
140
+ cd onboarding-agent
141
+ uv sync
142
+
143
+ # Run the server
144
+ uv run main.py
145
+
146
+ # Test with MCP Inspector
147
+ npx @modelcontextprotocol/inspector uv run main.py
148
+ ```
149
+
150
+ ## Project structure
151
+
152
+ ```
153
+ onboarding_agent/
154
+ ├── server.py # FastMCP instance and global state
155
+ ├── constants.py # Language maps, config signals, skip dirs
156
+ ├── helpers.py # File tree building, Python symbol extraction
157
+ ├── knowledge_graph.py # KnowledgeGraph class with JSON persistence
158
+ ├── resources.py # MCP resources (repo://overview, etc.)
159
+ ├── prompts.py # MCP prompt templates
160
+ └── tools/
161
+ ├── ingest.py # ingest_repo, read_file
162
+ ├── analysis.py # get_overview, explain_file, explain_module
163
+ ├── graph.py # build_knowledge_graph, query_entities, query_relationships
164
+ ├── search.py # find_relevant_code, get_architecture, ask
165
+ └── git_history.py # get_git_history, get_hot_files, get_file_contributors
166
+ ```
167
+
168
+ ## License
169
+
170
+ MIT
@@ -0,0 +1,15 @@
1
+ """Entry point for the onboarding-agent MCP server."""
2
+
3
+ from onboarding_agent.server import mcp
4
+
5
+ # Import all tool/resource/prompt modules so they register with the mcp instance.
6
+ import onboarding_agent.tools.ingest # noqa: F401
7
+ import onboarding_agent.tools.analysis # noqa: F401
8
+ import onboarding_agent.tools.graph # noqa: F401
9
+ import onboarding_agent.tools.search # noqa: F401
10
+ import onboarding_agent.tools.git_history # noqa: F401
11
+ import onboarding_agent.resources # noqa: F401
12
+ import onboarding_agent.prompts # noqa: F401
13
+
14
+ if __name__ == "__main__":
15
+ mcp.run()
File without changes
@@ -0,0 +1,30 @@
1
+ # Directories we never want to crawl — these are noise, not project structure.
2
+ SKIP_DIRS = {".git", ".venv", "venv", "node_modules", "__pycache__", ".tox", ".mypy_cache"}
3
+
4
+ # Maps file extensions to language names.
5
+ EXTENSION_TO_LANGUAGE = {
6
+ ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript",
7
+ ".jsx": "JavaScript (React)", ".tsx": "TypeScript (React)",
8
+ ".java": "Java", ".go": "Go", ".rs": "Rust", ".rb": "Ruby",
9
+ ".cpp": "C++", ".c": "C", ".cs": "C#", ".swift": "Swift",
10
+ ".kt": "Kotlin", ".scala": "Scala", ".php": "PHP",
11
+ ".html": "HTML", ".css": "CSS", ".scss": "SCSS",
12
+ ".sh": "Shell", ".sql": "SQL", ".r": "R",
13
+ }
14
+
15
+ # Config files that reveal which frameworks/tools the project uses.
16
+ CONFIG_SIGNALS = {
17
+ "pyproject.toml": "Python (uv/pip)", "setup.py": "Python (setuptools)",
18
+ "requirements.txt": "Python (pip)", "Pipfile": "Python (pipenv)",
19
+ "package.json": "Node.js", "tsconfig.json": "TypeScript",
20
+ "Cargo.toml": "Rust", "go.mod": "Go", "pom.xml": "Java (Maven)",
21
+ "build.gradle": "Java (Gradle)", "Gemfile": "Ruby",
22
+ "Makefile": "Make", "Dockerfile": "Docker",
23
+ "docker-compose.yml": "Docker Compose", "docker-compose.yaml": "Docker Compose",
24
+ ".eslintrc.json": "ESLint", ".prettierrc": "Prettier",
25
+ }
26
+
27
+ ENTRY_POINT_NAMES = {
28
+ "main.py", "app.py", "manage.py", "server.py", "cli.py",
29
+ "index.js", "index.ts", "main.go", "main.rs", "App.java",
30
+ }
@@ -0,0 +1,101 @@
1
+ """Shared helper functions used across multiple tools."""
2
+
3
+ from pathlib import Path
4
+
5
+ from onboarding_agent.constants import SKIP_DIRS, EXTENSION_TO_LANGUAGE, CONFIG_SIGNALS, ENTRY_POINT_NAMES
6
+
7
+
8
+ def build_file_tree(root: Path, max_depth: int = 5) -> dict:
9
+ """Recursively walk a directory and return a nested dict representing the file tree.
10
+
11
+ Each directory is a dict with "type": "directory" and "children": {...}.
12
+ Each file is a dict with "type": "file" and "size": <bytes>.
13
+ """
14
+ tree: dict = {}
15
+
16
+ try:
17
+ entries = sorted(root.iterdir(), key=lambda e: (e.is_file(), e.name))
18
+ except PermissionError:
19
+ return {"error": "permission denied"}
20
+
21
+ for entry in entries:
22
+ if entry.name in SKIP_DIRS:
23
+ continue
24
+
25
+ if entry.is_dir():
26
+ if max_depth <= 0:
27
+ tree[entry.name] = {"type": "directory", "children": "...truncated"}
28
+ else:
29
+ tree[entry.name] = {
30
+ "type": "directory",
31
+ "children": build_file_tree(entry, max_depth - 1),
32
+ }
33
+ elif entry.is_file():
34
+ tree[entry.name] = {
35
+ "type": "file",
36
+ "size": entry.stat().st_size,
37
+ }
38
+
39
+ return tree
40
+
41
+
42
+ def extract_python_symbols(content: str) -> dict:
43
+ """Extract imports, function names, and class names from Python source code.
44
+
45
+ Uses simple line-based parsing rather than AST — faster, works on files
46
+ with syntax errors, and good enough for an overview.
47
+ """
48
+ imports: list[str] = []
49
+ functions: list[str] = []
50
+ classes: list[str] = []
51
+
52
+ for line in content.splitlines():
53
+ stripped = line.strip()
54
+ if stripped.startswith("import ") or stripped.startswith("from "):
55
+ imports.append(stripped)
56
+ elif stripped.startswith("def "):
57
+ name = stripped[4:].split("(")[0].strip()
58
+ functions.append(name)
59
+ elif stripped.startswith("class "):
60
+ name = stripped[6:].split("(")[0].split(":")[0].strip()
61
+ classes.append(name)
62
+
63
+ return {"imports": imports, "functions": functions, "classes": classes}
64
+
65
+
66
+ def collect_extensions(tree: dict) -> dict[str, int]:
67
+ """Walk the file tree and count occurrences of each file extension."""
68
+ counts: dict[str, int] = {}
69
+ for name, info in tree.items():
70
+ if not isinstance(info, dict):
71
+ continue
72
+ if info.get("type") == "file":
73
+ ext = Path(name).suffix.lower()
74
+ if ext:
75
+ counts[ext] = counts.get(ext, 0) + 1
76
+ elif info.get("type") == "directory":
77
+ children = info.get("children")
78
+ if isinstance(children, dict):
79
+ for ext, n in collect_extensions(children).items():
80
+ counts[ext] = counts.get(ext, 0) + n
81
+ return counts
82
+
83
+
84
+ def find_config_files(tree: dict) -> list[str]:
85
+ """Return names of known config/framework files found at the repo root."""
86
+ return [name for name in tree if name in CONFIG_SIGNALS]
87
+
88
+
89
+ def find_entry_points(tree: dict, root_path: str) -> list[str]:
90
+ """Find likely entry point files in the tree (searches recursively)."""
91
+ found: list[str] = []
92
+ for name, info in tree.items():
93
+ if not isinstance(info, dict):
94
+ continue
95
+ if info.get("type") == "file" and name in ENTRY_POINT_NAMES:
96
+ found.append(f"{root_path}/{name}")
97
+ elif info.get("type") == "directory":
98
+ children = info.get("children")
99
+ if isinstance(children, dict):
100
+ found.extend(find_entry_points(children, f"{root_path}/{name}"))
101
+ return found
@@ -0,0 +1,139 @@
1
+ """A simple file-backed knowledge graph for storing codebase entities and relationships.
2
+
3
+ Entities are things like files, functions, classes, and modules.
4
+ Relationships connect them: "file contains function", "file imports module", etc.
5
+
6
+ The graph persists to a JSON file so it survives server restarts.
7
+ """
8
+
9
+ import json
10
+ from pathlib import Path
11
+
12
+
13
+ class KnowledgeGraph:
14
+ """In-memory knowledge graph with JSON file persistence.
15
+
16
+ Each entity has:
17
+ - id: unique identifier (e.g., file path or "filepath::function_name")
18
+ - type: "file", "function", "class", "module"
19
+ - name: human-readable name
20
+ - metadata: dict of extra info (language, size, etc.)
21
+
22
+ Each relationship has:
23
+ - source: entity id
24
+ - target: entity id
25
+ - type: "contains", "imports", "calls", etc.
26
+ """
27
+
28
+ def __init__(self, storage_path: str | None = None):
29
+ self.entities: dict[str, dict] = {}
30
+ self.relationships: list[dict] = []
31
+ self.storage_path = Path(storage_path) if storage_path else None
32
+
33
+ if self.storage_path and self.storage_path.exists():
34
+ self._load()
35
+
36
+ def add_entity(self, entity_id: str, entity_type: str, name: str, metadata: dict | None = None) -> None:
37
+ """Add or update an entity in the graph."""
38
+ self.entities[entity_id] = {
39
+ "id": entity_id,
40
+ "type": entity_type,
41
+ "name": name,
42
+ "metadata": metadata or {},
43
+ }
44
+
45
+ def add_relationship(self, source: str, target: str, rel_type: str) -> None:
46
+ """Add a relationship between two entities. Skips duplicates."""
47
+ rel = {"source": source, "target": target, "type": rel_type}
48
+ if rel not in self.relationships:
49
+ self.relationships.append(rel)
50
+
51
+ def get_entity(self, entity_id: str) -> dict | None:
52
+ """Look up a single entity by id."""
53
+ return self.entities.get(entity_id)
54
+
55
+ def find_entities(self, entity_type: str | None = None, name_contains: str | None = None) -> list[dict]:
56
+ """Search entities by type and/or name substring."""
57
+ results = list(self.entities.values())
58
+ if entity_type:
59
+ results = [e for e in results if e["type"] == entity_type]
60
+ if name_contains:
61
+ query = name_contains.lower()
62
+ results = [e for e in results if query in e["name"].lower()]
63
+ return results
64
+
65
+ def find_relationships(
66
+ self,
67
+ source: str | None = None,
68
+ target: str | None = None,
69
+ rel_type: str | None = None,
70
+ ) -> list[dict]:
71
+ """Search relationships by source, target, and/or type."""
72
+ results = self.relationships
73
+ if source:
74
+ results = [r for r in results if r["source"] == source]
75
+ if target:
76
+ results = [r for r in results if r["target"] == target]
77
+ if rel_type:
78
+ results = [r for r in results if r["type"] == rel_type]
79
+ return results
80
+
81
+ def get_neighbors(self, entity_id: str) -> dict:
82
+ """Find all entities directly connected to the given entity."""
83
+ outgoing = self.find_relationships(source=entity_id)
84
+ incoming = self.find_relationships(target=entity_id)
85
+
86
+ connected_ids = set()
87
+ for r in outgoing:
88
+ connected_ids.add(r["target"])
89
+ for r in incoming:
90
+ connected_ids.add(r["source"])
91
+
92
+ return {
93
+ "entity": self.get_entity(entity_id),
94
+ "outgoing": outgoing,
95
+ "incoming": incoming,
96
+ "neighbors": [self.entities[eid] for eid in connected_ids if eid in self.entities],
97
+ }
98
+
99
+ def clear(self) -> None:
100
+ """Wipe the graph."""
101
+ self.entities.clear()
102
+ self.relationships.clear()
103
+
104
+ def save(self) -> None:
105
+ """Persist the graph to disk as JSON."""
106
+ if not self.storage_path:
107
+ return
108
+ self.storage_path.parent.mkdir(parents=True, exist_ok=True)
109
+ data = {"entities": self.entities, "relationships": self.relationships}
110
+ self.storage_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
111
+
112
+ def _load(self) -> None:
113
+ """Load graph from disk."""
114
+ try:
115
+ data = json.loads(self.storage_path.read_text(encoding="utf-8"))
116
+ self.entities = data.get("entities", {})
117
+ self.relationships = data.get("relationships", [])
118
+ except (json.JSONDecodeError, KeyError):
119
+ self.entities = {}
120
+ self.relationships = []
121
+
122
+ def stats(self) -> dict:
123
+ """Return summary stats about the graph."""
124
+ type_counts: dict[str, int] = {}
125
+ for e in self.entities.values():
126
+ t = e["type"]
127
+ type_counts[t] = type_counts.get(t, 0) + 1
128
+
129
+ rel_type_counts: dict[str, int] = {}
130
+ for r in self.relationships:
131
+ t = r["type"]
132
+ rel_type_counts[t] = rel_type_counts.get(t, 0) + 1
133
+
134
+ return {
135
+ "total_entities": len(self.entities),
136
+ "total_relationships": len(self.relationships),
137
+ "entities_by_type": type_counts,
138
+ "relationships_by_type": rel_type_counts,
139
+ }