codecompass-mcp 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graph/mcp_server.py ADDED
@@ -0,0 +1,280 @@
1
+ """CodeCompass MCP Server — exposes code graph queries as native opencode tools.
2
+
3
+ Registered in ~/.config/opencode/opencode.json as a local MCP server.
4
+ Available from any working directory — no need to cd to the codecompass project.
5
+
6
+ Tools exposed:
7
+ list_projects — list all ingested projects
8
+ blast_radius — all files reachable from a symbol/file (forward)
9
+ impact — what calls/uses a symbol (reverse)
10
+ deps — what a file imports (direct + transitive)
11
+ trace — forward call chain from a function
12
+ tree — folder/file hierarchy for a project
13
+ styles — CSS selectors that target an element
14
+ batch_impact — union blast radius for N targets (plan a PR)
15
+
16
+ Usage:
17
+ python -m graph.mcp_server
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import sys
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+
26
+ _project_root = Path(__file__).resolve().parent.parent
27
+ if str(_project_root) not in sys.path:
28
+ sys.path.insert(0, str(_project_root))
29
+
30
+ from mcp.server.fastmcp import FastMCP
31
+
32
+ from graph.code_graph_client import get_client
33
+
34
+ mcp = FastMCP("codecompass")
35
+ DEFAULT_HOPS = 3
36
+ STALE_WARN_HOURS = 24
37
+
38
+
39
+ def _stale_warning(project: str) -> str:
40
+ client = get_client(project)
41
+ try:
42
+ ts = client.get_project_last_ingested(project)
43
+ finally:
44
+ client.close()
45
+ if not ts:
46
+ return ""
47
+ try:
48
+ dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
49
+ age_hours = (datetime.now(timezone.utc) - dt).total_seconds() / 3600
50
+ if age_hours > STALE_WARN_HOURS:
51
+ return f"\nWARNING: index for '{project}' is {age_hours:.0f}h old — re-run ingest-code to refresh"
52
+ except (ValueError, TypeError):
53
+ pass
54
+ return ""
55
+
56
+
57
+ # ── list_projects ────────────────────────────────────────────────────────────
58
+
59
+
60
+ @mcp.tool()
61
+ def list_projects() -> str:
62
+ """List all projects currently ingested in the code graph."""
63
+ client = get_client("default")
64
+ try:
65
+ projects = client.get_all_projects()
66
+ finally:
67
+ client.close()
68
+
69
+ if not projects:
70
+ return "No projects ingested yet.\n Run: codecompass ingest-code <repo_path> --project <name>"
71
+
72
+ return "Ingested projects:\n" + "\n".join(f" {p}" for p in projects)
73
+
74
+
75
+ # ── impact ───────────────────────────────────────────────────────────────────
76
+
77
+
78
+ @mcp.tool()
79
+ def impact(symbol: str, project: str, hops: int = DEFAULT_HOPS) -> str:
80
+ """What calls or uses a symbol? Reverse traversal — find everything that
81
+ references a function, class, CSS variable, or HTML element."""
82
+ client = get_client(project)
83
+ try:
84
+ rows = client.find_callers(symbol, project, max_hops=hops)
85
+ finally:
86
+ client.close()
87
+
88
+ if not rows:
89
+ return f"Nothing calls '{symbol}' within {hops} hops."
90
+
91
+ lines = [f"Callers of '{symbol}':"]
92
+ for r in rows:
93
+ tag = f"({r.get('caller_type', '')})" if r.get("caller_type") else ""
94
+ lines.append(f" {r['caller_name']} {tag}in {r['caller_file']} [depth {r['depth']}]")
95
+
96
+ lines.append(_stale_warning(project))
97
+ return "\n".join(lines)
98
+
99
+
100
+ # ── deps ─────────────────────────────────────────────────────────────────────
101
+
102
+
103
+ @mcp.tool()
104
+ def deps(file_path: str, project: str, hops: int = DEFAULT_HOPS) -> str:
105
+ """What does a file import? Returns direct and transitive dependencies."""
106
+ client = get_client(project)
107
+ try:
108
+ rows = client.find_dependencies(file_path, project, max_hops=hops)
109
+ finally:
110
+ client.close()
111
+
112
+ if not rows:
113
+ return f"No dependencies found for '{file_path}'."
114
+
115
+ lines = [f"Dependencies of '{file_path}':"]
116
+ for r in rows:
117
+ tag = f"({r.get('dep_type', '')})" if r.get("dep_type") else ""
118
+ lines.append(f" {r['dependency']} {tag}[depth {r['depth']}]")
119
+
120
+ lines.append(_stale_warning(project))
121
+ return "\n".join(lines)
122
+
123
+
124
+ # ── styles ───────────────────────────────────────────────────────────────────
125
+
126
+
127
+ @mcp.tool()
128
+ def styles(element_name: str, project: str) -> str:
129
+ """CSS selectors that style an HTML element or web component."""
130
+ client = get_client(project)
131
+ try:
132
+ rows = client.find_styles(element_name, project)
133
+ finally:
134
+ client.close()
135
+
136
+ if not rows:
137
+ return f"No CSS selectors found for '{element_name}'."
138
+
139
+ lines = [f"CSS selectors for '{element_name}':"]
140
+ for r in rows:
141
+ line_info = f" line {r['line']}" if r.get("line") else ""
142
+ lines.append(f" {r['selector']} in {r['source_file']}{line_info}")
143
+
144
+ lines.append(_stale_warning(project))
145
+ return "\n".join(lines)
146
+
147
+
148
+ # ── trace ────────────────────────────────────────────────────────────────────
149
+
150
+
151
+ @mcp.tool()
152
+ def trace(start_name: str, project: str, hops: int = 4) -> str:
153
+ """Forward call chain — what functions does this entry point call?"""
154
+ client = get_client(project)
155
+ try:
156
+ rows = client.trace_calls(start_name, project, max_hops=hops)
157
+ finally:
158
+ client.close()
159
+
160
+ if not rows:
161
+ return f"No call chain found from '{start_name}' within {hops} hops."
162
+
163
+ lines = [f"Call chain from '{start_name}':"]
164
+ for r in rows:
165
+ tag = f"({r.get('callee_type', '')})" if r.get("callee_type") else ""
166
+ lines.append(f" {r['callee_name']} {tag}in {r['callee_file']} [depth {r['depth']}]")
167
+
168
+ lines.append(_stale_warning(project))
169
+ return "\n".join(lines)
170
+
171
+
172
+ # ── blast_radius ─────────────────────────────────────────────────────────────
173
+
174
+
175
+ @mcp.tool()
176
+ def blast_radius(target: str, project: str, hops: int = DEFAULT_HOPS) -> str:
177
+ """All files reachable from a symbol or file via CALLS/IMPORTS/INHERITS.
178
+ Use before editing — shows everything a change will touch."""
179
+ client = get_client(project)
180
+ try:
181
+ rows, target_file = client.get_blast_radius(target, project, max_hops=hops)
182
+ finally:
183
+ client.close()
184
+
185
+ if target_file is None and not rows:
186
+ return f"'{target}' not found in project '{project}'."
187
+
188
+ lines = [f"Blast radius for '{target}' (via {target_file or 'unknown file'}):"]
189
+ if not rows:
190
+ lines.append(" (nothing reachable within hops)")
191
+
192
+ seen = set()
193
+ for r in rows:
194
+ f = r["file"]
195
+ if f not in seen:
196
+ seen.add(f)
197
+ lines.append(f" {f} [via: {r.get('edge_type', '?')}]")
198
+
199
+ lines.append(f"\n# blast radius: {len(seen)} files across {hops} hops")
200
+ lines.append(_stale_warning(project))
201
+ return "\n".join(lines)
202
+
203
+
204
+ # ── batch_impact ─────────────────────────────────────────────────────────────
205
+
206
+
207
+ @mcp.tool()
208
+ def batch_impact(targets: str, project: str, hops: int = DEFAULT_HOPS) -> str:
209
+ """Union blast radius across multiple targets (comma-separated).
210
+ Use when planning a PR — see the full set of files touched."""
211
+ target_list = [t.strip() for t in targets.split(",") if t.strip()]
212
+
213
+ client = get_client(project)
214
+ try:
215
+ all_files: set[str] = set()
216
+ lines = [f"Batch impact for {len(target_list)} targets in '{project}':"]
217
+ found_any = False
218
+
219
+ for target in target_list:
220
+ rows, target_file = client.get_blast_radius(target, project, max_hops=hops)
221
+ if target_file is None and not rows:
222
+ lines.append(f" WARNING: '{target}' not found")
223
+ continue
224
+ found_any = True
225
+ for r in rows:
226
+ if r["file"] not in all_files:
227
+ all_files.add(r["file"])
228
+ lines.append(f" {r['file']} [via: {target}]")
229
+
230
+ if not found_any:
231
+ return f"None of the targets found in project '{project}'."
232
+
233
+ lines.append(f"\n# batch impact: {len(all_files)} files, {len(target_list)} input targets, {hops} hops")
234
+ finally:
235
+ client.close()
236
+
237
+ lines.append(_stale_warning(project))
238
+ return "\n".join(lines)
239
+
240
+
241
+ # ── tree ─────────────────────────────────────────────────────────────────────
242
+
243
+
244
+ @mcp.tool()
245
+ def tree(project: str) -> str:
246
+ """Folder and file hierarchy for a project."""
247
+ client = get_client(project)
248
+ try:
249
+ rows = client.get_project_tree(project)
250
+ finally:
251
+ client.close()
252
+
253
+ if not rows:
254
+ return f"No hierarchy found for project '{project}'. Run ingest-code first."
255
+
256
+ lines = [project + "/"]
257
+ for r in rows:
258
+ indent = " " * (r.get("depth", 0) or 0)
259
+ name = r["name"]
260
+ node_type = r.get("node_type", "")
261
+ suffix = "/" if node_type == "Folder" else ""
262
+ lines.append(f"{indent}├── {name}{suffix}")
263
+
264
+ lines.append(_stale_warning(project))
265
+ return "\n".join(lines)
266
+
267
+
268
+ # ── entry point ──────────────────────────────────────────────────────────────
269
+
270
+ def main() -> None:
271
+ transport = sys.argv[1] if len(sys.argv) > 1 else "stdio"
272
+ if transport == "sse":
273
+ port = int(sys.argv[2]) if len(sys.argv) > 2 else 8000
274
+ mcp.run(transport="sse", host="0.0.0.0", port=port)
275
+ else:
276
+ mcp.run()
277
+
278
+
279
+ if __name__ == "__main__":
280
+ main()
graph/setup.py ADDED
@@ -0,0 +1,255 @@
1
+ """CodeCompass setup wizard — writes all config files a pip-installed agent needs.
2
+
3
+ Usage:
4
+ codecompass setup
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ INSTRUCTIONS_MD = """\
15
+ # CodeCompass — opencode Instructions
16
+
17
+ A Neo4j-backed code dependency graph is available via MCP tools. \
18
+ **Always query it before editing code.** The graph knows what's connected — \
19
+ trust it over file exploration.
20
+
21
+ ---
22
+
23
+ ## Available tools (MCP)
24
+
25
+ All tools use the `codecompass` MCP server. Call them from any working directory.
26
+
27
+ | Tool | Purpose |
28
+ |---|---|
29
+ | `list_projects` | See all ingested projects |
30
+ | `blast_radius` | Every file a symbol/file touches (forward) |
31
+ | `impact` | What calls/uses a symbol (reverse) |
32
+ | `deps` | What a file imports |
33
+ | `trace` | Forward call chain from a function |
34
+ | `tree` | Folder/file hierarchy |
35
+ | `styles` | CSS selectors for an element |
36
+ | `batch_impact` | Union blast radius across N targets |
37
+
38
+ ---
39
+
40
+ ## When to use each tool
41
+
42
+ | Scenario | Tool to call first |
43
+ |---|---|
44
+ | About to edit one file or symbol | `blast_radius(symbol, project)` |
45
+ | Planning a PR touching N files | `batch_impact("file1, file2", project)` |
46
+ | Renaming or removing a function | `impact(function_name, project)` |
47
+ | Understanding what a file imports | `deps(file_path, project)` |
48
+ | Tracing a call chain forward | `trace(entry_point, project)` |
49
+ | Orienting in an unfamiliar project | `tree(project)` |
50
+ | Finding which CSS targets an element | `styles(element_name, project)` |
51
+ | Discovering ingested projects | `list_projects()` |
52
+
53
+ ---
54
+
55
+ ## Mandatory rules
56
+
57
+ 1. **Before editing any file in an ingested project, call the codecompass tools first.**
58
+ 2. Use `list_projects()` to discover what projects are available.
59
+ 3. Use `blast_radius` to understand impact before making changes.
60
+ 4. Use `impact` before renaming or removing anything.
61
+ 5. If a tool returns a WARNING about stale index, suggest re-running `codecompass ingest-code`.
62
+ 6. The graph provides **structural truth** (AST-parsed). Trust it. It cannot tell you what code *means* — only what's connected.
63
+
64
+ ---
65
+
66
+ ## Project memory
67
+
68
+ Session learnings are stored in `memory/learnings.md`. Design decisions are in \
69
+ `memory/decisions.md`. These accumulate across sessions — read them at session \
70
+ start if relevant to your task.
71
+ """
72
+
73
+ DOT_ENV_TEMPLATE = """\
74
+ ANTHROPIC_API_KEY=your_key_here
75
+ NEO4J_URI=bolt://localhost:7687
76
+ NEO4J_USER=neo4j
77
+ NEO4J_PASSWORD=password123
78
+ """
79
+
80
+
81
+ def _memory_plugin_ts(script_dir: str) -> str:
82
+ return f"""\
83
+ import type {{ Plugin }} from "@opencode-ai/plugin"
84
+
85
+ const SAVE_SCRIPT = "{script_dir}/save_learnings.py"
86
+ const LOG_SCRIPT = "{script_dir}/log_session.py"
87
+
88
+ export const CodeCompassMemory: Plugin = async ({{ $, directory }}) => {{
89
+ return {{
90
+ "experimental.session.compacting": async (_input, output) => {{
91
+ output.context.push(`## CodeCompass Session Memory
92
+
93
+ Before generating the compaction summary, review this conversation and include:
94
+
95
+ ### Key Learnings
96
+ - Design decisions made and why
97
+ - Problems solved and how
98
+ - Constraints discovered
99
+ - Patterns established
100
+ - Non-obvious insights
101
+
102
+ ### Active Context
103
+ - Current task and its status
104
+ - Files being modified
105
+ - Blockers or dependencies
106
+
107
+ Format the learnings section so they can be extracted later if needed.`)
108
+ }},
109
+
110
+ event: async ({{ event }}) => {{
111
+ if (event.type === "session.idle") {{
112
+ await $`python ${{LOG_SCRIPT}} ${{directory}}`.quiet().nothrow()
113
+ }}
114
+ if (event.type === "session.compacted") {{
115
+ await $`python ${{SAVE_SCRIPT}} ${{directory}}`.quiet().nothrow()
116
+ }}
117
+ }},
118
+ }}
119
+ }}
120
+ """
121
+
122
+
123
+ def _save_learnings_py(memory_dir: str) -> str:
124
+ return f"""\
125
+ from __future__ import annotations
126
+
127
+ import subprocess, sys
128
+ from datetime import datetime
129
+ from pathlib import Path
130
+
131
+ MEMORY_DIR = Path("{memory_dir}")
132
+ LEARNINGS_FILE = MEMORY_DIR / "learnings.md"
133
+
134
+
135
+ def _get_changed_files(cwd: str) -> list[str]:
136
+ try:
137
+ r = subprocess.run(["git", "diff", "--name-only", "HEAD"],
138
+ capture_output=True, text=True, timeout=5, cwd=cwd)
139
+ return [f.strip() for f in r.stdout.strip().split("\\n") if f.strip()]
140
+ except Exception:
141
+ return []
142
+
143
+
144
+ def main() -> None:
145
+ cwd = sys.argv[1] if len(sys.argv) > 1 else __import__("os").getcwd()
146
+ changed = _get_changed_files(cwd)
147
+ date_key = datetime.now().strftime("%Y-%m-%d")
148
+ lines = [f"\\n\\n## {{date_key}} (post-compact)", f"cwd: {{cwd}}"]
149
+ if changed:
150
+ lines.append(f"Files changed: {{', '.join(changed)}}")
151
+ lines.append(f"- (review conversation for key learnings about: {{', '.join(changed[:3])}})")
152
+ else:
153
+ lines.append("Session compacted — no file changes detected.")
154
+ MEMORY_DIR.mkdir(parents=True, exist_ok=True)
155
+ with open(LEARNINGS_FILE, "a", encoding="utf-8") as f:
156
+ f.write("\\n".join(lines) + "\\n")
157
+
158
+
159
+ if __name__ == "__main__":
160
+ main()
161
+ """
162
+
163
+
164
+ def _log_session_py(memory_dir: str) -> str:
165
+ return f"""\
166
+ from __future__ import annotations
167
+
168
+ import subprocess, sys
169
+ from datetime import datetime
170
+ from pathlib import Path
171
+
172
+ MEMORY_DIR = Path("{memory_dir}")
173
+ SESSION_LOG = MEMORY_DIR / "session_log.md"
174
+
175
+
176
+ def _get_changed_files(cwd: str) -> list[str]:
177
+ try:
178
+ r = subprocess.run(["git", "diff", "--name-only", "HEAD"],
179
+ capture_output=True, text=True, timeout=5, cwd=cwd)
180
+ return [f.strip() for f in r.stdout.strip().split("\\n") if f.strip()]
181
+ except Exception:
182
+ return []
183
+
184
+
185
+ def main() -> None:
186
+ cwd = sys.argv[1] if len(sys.argv) > 1 else __import__("os").getcwd()
187
+ changed = _get_changed_files(cwd)
188
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
189
+ lines = [f"\\n\\n## {{timestamp}}", f"cwd: {{cwd}}"]
190
+ lines.append(f"files changed: {{', '.join(changed) if changed else 'none'}}")
191
+ MEMORY_DIR.mkdir(parents=True, exist_ok=True)
192
+ with open(SESSION_LOG, "a", encoding="utf-8") as f:
193
+ f.write("\\n".join(lines) + "\\n")
194
+
195
+
196
+ if __name__ == "__main__":
197
+ main()
198
+ """
199
+
200
+
201
+ def run_setup() -> None:
202
+ base_dir = Path.home() / ".config" / "opencode" / "codecompass"
203
+ plugins_dir = base_dir / "plugins"
204
+ scripts_dir = base_dir / "scripts"
205
+ memory_dir = base_dir / "memory"
206
+
207
+ for d in (plugins_dir, scripts_dir, memory_dir):
208
+ d.mkdir(parents=True, exist_ok=True)
209
+
210
+ # 1. Write instructions
211
+ instructions_path = base_dir / "instructions.md"
212
+ instructions_path.write_text(INSTRUCTIONS_MD)
213
+ print(f"Wrote {instructions_path}")
214
+
215
+ # 2. Write memory plugin
216
+ plugin_path = plugins_dir / "memory.ts"
217
+ plugin_path.write_text(_memory_plugin_ts(str(scripts_dir)))
218
+ print(f"Wrote {plugin_path}")
219
+
220
+ # 3. Write helper scripts
221
+ (scripts_dir / "save_learnings.py").write_text(_save_learnings_py(str(memory_dir)))
222
+ (scripts_dir / "log_session.py").write_text(_log_session_py(str(memory_dir)))
223
+ print(f"Wrote scripts to {scripts_dir}/")
224
+
225
+ # 4. Write .env template
226
+ env_path = Path.cwd() / ".env"
227
+ if env_path.exists():
228
+ print(f".env exists at {env_path} — skipping")
229
+ else:
230
+ env_path.write_text(DOT_ENV_TEMPLATE)
231
+ print(f"Created {env_path}")
232
+
233
+ # 5. Print opencode config
234
+ config_block = {
235
+ "instructions": [str(instructions_path)],
236
+ "mcp": {
237
+ "codecompass": {
238
+ "type": "local",
239
+ "command": ["codecompass-mcp"]
240
+ }
241
+ },
242
+ "plugin": [str(plugin_path)]
243
+ }
244
+
245
+ opencode_config = Path.home() / ".config" / "opencode" / "opencode.json"
246
+ print()
247
+ if opencode_config.exists():
248
+ print(f"Merge this into {opencode_config}:")
249
+ else:
250
+ print(f"Add this to {opencode_config}:")
251
+ print()
252
+ print(json.dumps(config_block, indent=2))
253
+ print()
254
+ print("Restart opencode. Then: opencode")
255
+ print('Ask "what ingested projects are available?" — it should use list_projects.')
ingestion/__init__.py ADDED
File without changes
ingestion/chunker.py ADDED
@@ -0,0 +1,70 @@
1
+ import re
2
+ import PyPDF2
3
+
4
+ # Rough approximation: 1 token ≈ 4 characters (standard for English prose/code).
5
+ # Used when a tokeniser library is unavailable.
6
+ _CHARS_PER_TOKEN = 4
7
+
8
+
9
+ def _estimate_tokens(text: str) -> int:
10
+ return max(1, len(text) // _CHARS_PER_TOKEN)
11
+
12
+
13
+ def chunk_pdf(filepath: str, tokens_per_chunk: int = 500, overlap_tokens: int = 50) -> list[str]:
14
+ """Split PDF into overlapping token-sized chunks."""
15
+ with open(filepath, "rb") as f:
16
+ reader = PyPDF2.PdfReader(f)
17
+ full_text = " ".join(page.extract_text() or "" for page in reader.pages)
18
+ return chunk_text(full_text, tokens_per_chunk=tokens_per_chunk, overlap_tokens=overlap_tokens)
19
+
20
+
21
+ def chunk_text(text: str, tokens_per_chunk: int = 500, overlap_tokens: int = 50) -> list[str]:
22
+ """
23
+ Split text into chunks of approximately `tokens_per_chunk` tokens with
24
+ `overlap_tokens` of overlap between consecutive chunks.
25
+
26
+ Splitting prefers sentence boundaries so that a chunk never cuts mid-sentence.
27
+ Falls back to hard character splitting when no boundary is found.
28
+ """
29
+ chunk_chars = tokens_per_chunk * _CHARS_PER_TOKEN
30
+ overlap_chars = overlap_tokens * _CHARS_PER_TOKEN
31
+
32
+ # Normalise whitespace but preserve paragraph breaks as sentence boundaries.
33
+ text = re.sub(r"\r\n|\r", "\n", text)
34
+ text = re.sub(r"[ \t]+", " ", text).strip()
35
+
36
+ if not text:
37
+ return []
38
+
39
+ chunks: list[str] = []
40
+ start = 0
41
+ step = chunk_chars - overlap_chars
42
+
43
+ while start < len(text):
44
+ end = start + chunk_chars
45
+ segment = text[start:end]
46
+
47
+ # If this isn't the last chunk, trim to the last sentence boundary.
48
+ if end < len(text):
49
+ # Look for a sentence-ending punctuation followed by whitespace/newline.
50
+ boundary = _last_sentence_boundary(segment)
51
+ if boundary and boundary > chunk_chars // 2:
52
+ segment = segment[:boundary]
53
+
54
+ chunk = segment.strip()
55
+ if chunk:
56
+ chunks.append(chunk)
57
+
58
+ # Advance by the length of the actual segment taken (minus overlap).
59
+ advance = max(len(segment) - overlap_chars, step)
60
+ start += advance
61
+
62
+ return chunks
63
+
64
+
65
+ def _last_sentence_boundary(text: str) -> int | None:
66
+ """Return the index just after the last sentence-ending boundary in `text`."""
67
+ # Match '. ', '! ', '? ', or end of a paragraph ('\n\n').
68
+ for match in reversed(list(re.finditer(r"(?<=[.!?])\s+|(?<=\n)\n", text))):
69
+ return match.end()
70
+ return None