codecompass-mcp 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codecompass_mcp-2.0.0.dist-info/METADATA +368 -0
- codecompass_mcp-2.0.0.dist-info/RECORD +28 -0
- codecompass_mcp-2.0.0.dist-info/WHEEL +5 -0
- codecompass_mcp-2.0.0.dist-info/entry_points.txt +3 -0
- codecompass_mcp-2.0.0.dist-info/licenses/LICENSE +21 -0
- codecompass_mcp-2.0.0.dist-info/top_level.txt +6 -0
- config.py +16 -0
- graph/__init__.py +0 -0
- graph/cli.py +13 -0
- graph/code_graph_client.py +485 -0
- graph/code_query_cli.py +504 -0
- graph/mcp_server.py +280 -0
- graph/setup.py +255 -0
- ingestion/__init__.py +0 -0
- ingestion/chunker.py +70 -0
- ingestion/code_normalizer.py +158 -0
- ingestion/code_parser.py +709 -0
- ingestion/entity_resolver.py +179 -0
- ingestion/file_watcher.py +165 -0
- ingestion/graph_writer.py +17 -0
- ingestion/hierarchy_builder.py +148 -0
- ingestion/reader_agent.py +135 -0
- main.py +306 -0
- models/__init__.py +0 -0
- models/code_types.py +35 -0
- models/types.py +45 -0
- utils/__init__.py +0 -0
- utils/formatting.py +24 -0
graph/mcp_server.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""CodeCompass MCP Server — exposes code graph queries as native opencode tools.
|
|
2
|
+
|
|
3
|
+
Registered in ~/.config/opencode/opencode.json as a local MCP server.
|
|
4
|
+
Available from any working directory — no need to cd to the codecompass project.
|
|
5
|
+
|
|
6
|
+
Tools exposed:
|
|
7
|
+
list_projects — list all ingested projects
|
|
8
|
+
blast_radius — all files reachable from a symbol/file (forward)
|
|
9
|
+
impact — what calls/uses a symbol (reverse)
|
|
10
|
+
deps — what a file imports (direct + transitive)
|
|
11
|
+
trace — forward call chain from a function
|
|
12
|
+
tree — folder/file hierarchy for a project
|
|
13
|
+
styles — CSS selectors that target an element
|
|
14
|
+
batch_impact — union blast radius for N targets (plan a PR)
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
python -m graph.mcp_server
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import sys
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
_project_root = Path(__file__).resolve().parent.parent
|
|
27
|
+
if str(_project_root) not in sys.path:
|
|
28
|
+
sys.path.insert(0, str(_project_root))
|
|
29
|
+
|
|
30
|
+
from mcp.server.fastmcp import FastMCP
|
|
31
|
+
|
|
32
|
+
from graph.code_graph_client import get_client
|
|
33
|
+
|
|
34
|
+
mcp = FastMCP("codecompass")
|
|
35
|
+
DEFAULT_HOPS = 3
|
|
36
|
+
STALE_WARN_HOURS = 24
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _stale_warning(project: str) -> str:
|
|
40
|
+
client = get_client(project)
|
|
41
|
+
try:
|
|
42
|
+
ts = client.get_project_last_ingested(project)
|
|
43
|
+
finally:
|
|
44
|
+
client.close()
|
|
45
|
+
if not ts:
|
|
46
|
+
return ""
|
|
47
|
+
try:
|
|
48
|
+
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
49
|
+
age_hours = (datetime.now(timezone.utc) - dt).total_seconds() / 3600
|
|
50
|
+
if age_hours > STALE_WARN_HOURS:
|
|
51
|
+
return f"\nWARNING: index for '{project}' is {age_hours:.0f}h old — re-run ingest-code to refresh"
|
|
52
|
+
except (ValueError, TypeError):
|
|
53
|
+
pass
|
|
54
|
+
return ""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ── list_projects ────────────────────────────────────────────────────────────
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@mcp.tool()
|
|
61
|
+
def list_projects() -> str:
|
|
62
|
+
"""List all projects currently ingested in the code graph."""
|
|
63
|
+
client = get_client("default")
|
|
64
|
+
try:
|
|
65
|
+
projects = client.get_all_projects()
|
|
66
|
+
finally:
|
|
67
|
+
client.close()
|
|
68
|
+
|
|
69
|
+
if not projects:
|
|
70
|
+
return "No projects ingested yet.\n Run: codecompass ingest-code <repo_path> --project <name>"
|
|
71
|
+
|
|
72
|
+
return "Ingested projects:\n" + "\n".join(f" {p}" for p in projects)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ── impact ───────────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@mcp.tool()
|
|
79
|
+
def impact(symbol: str, project: str, hops: int = DEFAULT_HOPS) -> str:
|
|
80
|
+
"""What calls or uses a symbol? Reverse traversal — find everything that
|
|
81
|
+
references a function, class, CSS variable, or HTML element."""
|
|
82
|
+
client = get_client(project)
|
|
83
|
+
try:
|
|
84
|
+
rows = client.find_callers(symbol, project, max_hops=hops)
|
|
85
|
+
finally:
|
|
86
|
+
client.close()
|
|
87
|
+
|
|
88
|
+
if not rows:
|
|
89
|
+
return f"Nothing calls '{symbol}' within {hops} hops."
|
|
90
|
+
|
|
91
|
+
lines = [f"Callers of '{symbol}':"]
|
|
92
|
+
for r in rows:
|
|
93
|
+
tag = f"({r.get('caller_type', '')})" if r.get("caller_type") else ""
|
|
94
|
+
lines.append(f" {r['caller_name']} {tag}in {r['caller_file']} [depth {r['depth']}]")
|
|
95
|
+
|
|
96
|
+
lines.append(_stale_warning(project))
|
|
97
|
+
return "\n".join(lines)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ── deps ─────────────────────────────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@mcp.tool()
|
|
104
|
+
def deps(file_path: str, project: str, hops: int = DEFAULT_HOPS) -> str:
|
|
105
|
+
"""What does a file import? Returns direct and transitive dependencies."""
|
|
106
|
+
client = get_client(project)
|
|
107
|
+
try:
|
|
108
|
+
rows = client.find_dependencies(file_path, project, max_hops=hops)
|
|
109
|
+
finally:
|
|
110
|
+
client.close()
|
|
111
|
+
|
|
112
|
+
if not rows:
|
|
113
|
+
return f"No dependencies found for '{file_path}'."
|
|
114
|
+
|
|
115
|
+
lines = [f"Dependencies of '{file_path}':"]
|
|
116
|
+
for r in rows:
|
|
117
|
+
tag = f"({r.get('dep_type', '')})" if r.get("dep_type") else ""
|
|
118
|
+
lines.append(f" {r['dependency']} {tag}[depth {r['depth']}]")
|
|
119
|
+
|
|
120
|
+
lines.append(_stale_warning(project))
|
|
121
|
+
return "\n".join(lines)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ── styles ───────────────────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@mcp.tool()
|
|
128
|
+
def styles(element_name: str, project: str) -> str:
|
|
129
|
+
"""CSS selectors that style an HTML element or web component."""
|
|
130
|
+
client = get_client(project)
|
|
131
|
+
try:
|
|
132
|
+
rows = client.find_styles(element_name, project)
|
|
133
|
+
finally:
|
|
134
|
+
client.close()
|
|
135
|
+
|
|
136
|
+
if not rows:
|
|
137
|
+
return f"No CSS selectors found for '{element_name}'."
|
|
138
|
+
|
|
139
|
+
lines = [f"CSS selectors for '{element_name}':"]
|
|
140
|
+
for r in rows:
|
|
141
|
+
line_info = f" line {r['line']}" if r.get("line") else ""
|
|
142
|
+
lines.append(f" {r['selector']} in {r['source_file']}{line_info}")
|
|
143
|
+
|
|
144
|
+
lines.append(_stale_warning(project))
|
|
145
|
+
return "\n".join(lines)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ── trace ────────────────────────────────────────────────────────────────────
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@mcp.tool()
|
|
152
|
+
def trace(start_name: str, project: str, hops: int = 4) -> str:
|
|
153
|
+
"""Forward call chain — what functions does this entry point call?"""
|
|
154
|
+
client = get_client(project)
|
|
155
|
+
try:
|
|
156
|
+
rows = client.trace_calls(start_name, project, max_hops=hops)
|
|
157
|
+
finally:
|
|
158
|
+
client.close()
|
|
159
|
+
|
|
160
|
+
if not rows:
|
|
161
|
+
return f"No call chain found from '{start_name}' within {hops} hops."
|
|
162
|
+
|
|
163
|
+
lines = [f"Call chain from '{start_name}':"]
|
|
164
|
+
for r in rows:
|
|
165
|
+
tag = f"({r.get('callee_type', '')})" if r.get("callee_type") else ""
|
|
166
|
+
lines.append(f" {r['callee_name']} {tag}in {r['callee_file']} [depth {r['depth']}]")
|
|
167
|
+
|
|
168
|
+
lines.append(_stale_warning(project))
|
|
169
|
+
return "\n".join(lines)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ── blast_radius ─────────────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@mcp.tool()
|
|
176
|
+
def blast_radius(target: str, project: str, hops: int = DEFAULT_HOPS) -> str:
|
|
177
|
+
"""All files reachable from a symbol or file via CALLS/IMPORTS/INHERITS.
|
|
178
|
+
Use before editing — shows everything a change will touch."""
|
|
179
|
+
client = get_client(project)
|
|
180
|
+
try:
|
|
181
|
+
rows, target_file = client.get_blast_radius(target, project, max_hops=hops)
|
|
182
|
+
finally:
|
|
183
|
+
client.close()
|
|
184
|
+
|
|
185
|
+
if target_file is None and not rows:
|
|
186
|
+
return f"'{target}' not found in project '{project}'."
|
|
187
|
+
|
|
188
|
+
lines = [f"Blast radius for '{target}' (via {target_file or 'unknown file'}):"]
|
|
189
|
+
if not rows:
|
|
190
|
+
lines.append(" (nothing reachable within hops)")
|
|
191
|
+
|
|
192
|
+
seen = set()
|
|
193
|
+
for r in rows:
|
|
194
|
+
f = r["file"]
|
|
195
|
+
if f not in seen:
|
|
196
|
+
seen.add(f)
|
|
197
|
+
lines.append(f" {f} [via: {r.get('edge_type', '?')}]")
|
|
198
|
+
|
|
199
|
+
lines.append(f"\n# blast radius: {len(seen)} files across {hops} hops")
|
|
200
|
+
lines.append(_stale_warning(project))
|
|
201
|
+
return "\n".join(lines)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# ── batch_impact ─────────────────────────────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
@mcp.tool()
|
|
208
|
+
def batch_impact(targets: str, project: str, hops: int = DEFAULT_HOPS) -> str:
|
|
209
|
+
"""Union blast radius across multiple targets (comma-separated).
|
|
210
|
+
Use when planning a PR — see the full set of files touched."""
|
|
211
|
+
target_list = [t.strip() for t in targets.split(",") if t.strip()]
|
|
212
|
+
|
|
213
|
+
client = get_client(project)
|
|
214
|
+
try:
|
|
215
|
+
all_files: set[str] = set()
|
|
216
|
+
lines = [f"Batch impact for {len(target_list)} targets in '{project}':"]
|
|
217
|
+
found_any = False
|
|
218
|
+
|
|
219
|
+
for target in target_list:
|
|
220
|
+
rows, target_file = client.get_blast_radius(target, project, max_hops=hops)
|
|
221
|
+
if target_file is None and not rows:
|
|
222
|
+
lines.append(f" WARNING: '{target}' not found")
|
|
223
|
+
continue
|
|
224
|
+
found_any = True
|
|
225
|
+
for r in rows:
|
|
226
|
+
if r["file"] not in all_files:
|
|
227
|
+
all_files.add(r["file"])
|
|
228
|
+
lines.append(f" {r['file']} [via: {target}]")
|
|
229
|
+
|
|
230
|
+
if not found_any:
|
|
231
|
+
return f"None of the targets found in project '{project}'."
|
|
232
|
+
|
|
233
|
+
lines.append(f"\n# batch impact: {len(all_files)} files, {len(target_list)} input targets, {hops} hops")
|
|
234
|
+
finally:
|
|
235
|
+
client.close()
|
|
236
|
+
|
|
237
|
+
lines.append(_stale_warning(project))
|
|
238
|
+
return "\n".join(lines)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# ── tree ─────────────────────────────────────────────────────────────────────
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@mcp.tool()
|
|
245
|
+
def tree(project: str) -> str:
|
|
246
|
+
"""Folder and file hierarchy for a project."""
|
|
247
|
+
client = get_client(project)
|
|
248
|
+
try:
|
|
249
|
+
rows = client.get_project_tree(project)
|
|
250
|
+
finally:
|
|
251
|
+
client.close()
|
|
252
|
+
|
|
253
|
+
if not rows:
|
|
254
|
+
return f"No hierarchy found for project '{project}'. Run ingest-code first."
|
|
255
|
+
|
|
256
|
+
lines = [project + "/"]
|
|
257
|
+
for r in rows:
|
|
258
|
+
indent = " " * (r.get("depth", 0) or 0)
|
|
259
|
+
name = r["name"]
|
|
260
|
+
node_type = r.get("node_type", "")
|
|
261
|
+
suffix = "/" if node_type == "Folder" else ""
|
|
262
|
+
lines.append(f"{indent}├── {name}{suffix}")
|
|
263
|
+
|
|
264
|
+
lines.append(_stale_warning(project))
|
|
265
|
+
return "\n".join(lines)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ── entry point ──────────────────────────────────────────────────────────────
|
|
269
|
+
|
|
270
|
+
def main() -> None:
|
|
271
|
+
transport = sys.argv[1] if len(sys.argv) > 1 else "stdio"
|
|
272
|
+
if transport == "sse":
|
|
273
|
+
port = int(sys.argv[2]) if len(sys.argv) > 2 else 8000
|
|
274
|
+
mcp.run(transport="sse", host="0.0.0.0", port=port)
|
|
275
|
+
else:
|
|
276
|
+
mcp.run()
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
if __name__ == "__main__":
|
|
280
|
+
main()
|
graph/setup.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""CodeCompass setup wizard — writes all config files a pip-installed agent needs.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
codecompass setup
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
INSTRUCTIONS_MD = """\
|
|
15
|
+
# CodeCompass — opencode Instructions
|
|
16
|
+
|
|
17
|
+
A Neo4j-backed code dependency graph is available via MCP tools. \
|
|
18
|
+
**Always query it before editing code.** The graph knows what's connected — \
|
|
19
|
+
trust it over file exploration.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Available tools (MCP)
|
|
24
|
+
|
|
25
|
+
All tools use the `codecompass` MCP server. Call them from any working directory.
|
|
26
|
+
|
|
27
|
+
| Tool | Purpose |
|
|
28
|
+
|---|---|
|
|
29
|
+
| `list_projects` | See all ingested projects |
|
|
30
|
+
| `blast_radius` | Every file a symbol/file touches (forward) |
|
|
31
|
+
| `impact` | What calls/uses a symbol (reverse) |
|
|
32
|
+
| `deps` | What a file imports |
|
|
33
|
+
| `trace` | Forward call chain from a function |
|
|
34
|
+
| `tree` | Folder/file hierarchy |
|
|
35
|
+
| `styles` | CSS selectors for an element |
|
|
36
|
+
| `batch_impact` | Union blast radius across N targets |
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## When to use each tool
|
|
41
|
+
|
|
42
|
+
| Scenario | Tool to call first |
|
|
43
|
+
|---|---|
|
|
44
|
+
| About to edit one file or symbol | `blast_radius(symbol, project)` |
|
|
45
|
+
| Planning a PR touching N files | `batch_impact("file1, file2", project)` |
|
|
46
|
+
| Renaming or removing a function | `impact(function_name, project)` |
|
|
47
|
+
| Understanding what a file imports | `deps(file_path, project)` |
|
|
48
|
+
| Tracing a call chain forward | `trace(entry_point, project)` |
|
|
49
|
+
| Orienting in an unfamiliar project | `tree(project)` |
|
|
50
|
+
| Finding which CSS targets an element | `styles(element_name, project)` |
|
|
51
|
+
| Discovering ingested projects | `list_projects()` |
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Mandatory rules
|
|
56
|
+
|
|
57
|
+
1. **Before editing any file in an ingested project, call the codecompass tools first.**
|
|
58
|
+
2. Use `list_projects()` to discover what projects are available.
|
|
59
|
+
3. Use `blast_radius` to understand impact before making changes.
|
|
60
|
+
4. Use `impact` before renaming or removing anything.
|
|
61
|
+
5. If a tool returns a WARNING about stale index, suggest re-running `codecompass ingest-code`.
|
|
62
|
+
6. The graph provides **structural truth** (AST-parsed). Trust it. It cannot tell you what code *means* — only what's connected.
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Project memory
|
|
67
|
+
|
|
68
|
+
Session learnings are stored in `memory/learnings.md`. Design decisions are in \
|
|
69
|
+
`memory/decisions.md`. These accumulate across sessions — read them at session \
|
|
70
|
+
start if relevant to your task.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
DOT_ENV_TEMPLATE = """\
|
|
74
|
+
ANTHROPIC_API_KEY=your_key_here
|
|
75
|
+
NEO4J_URI=bolt://localhost:7687
|
|
76
|
+
NEO4J_USER=neo4j
|
|
77
|
+
NEO4J_PASSWORD=password123
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _memory_plugin_ts(script_dir: str) -> str:
|
|
82
|
+
return f"""\
|
|
83
|
+
import type {{ Plugin }} from "@opencode-ai/plugin"
|
|
84
|
+
|
|
85
|
+
const SAVE_SCRIPT = "{script_dir}/save_learnings.py"
|
|
86
|
+
const LOG_SCRIPT = "{script_dir}/log_session.py"
|
|
87
|
+
|
|
88
|
+
export const CodeCompassMemory: Plugin = async ({{ $, directory }}) => {{
|
|
89
|
+
return {{
|
|
90
|
+
"experimental.session.compacting": async (_input, output) => {{
|
|
91
|
+
output.context.push(`## CodeCompass Session Memory
|
|
92
|
+
|
|
93
|
+
Before generating the compaction summary, review this conversation and include:
|
|
94
|
+
|
|
95
|
+
### Key Learnings
|
|
96
|
+
- Design decisions made and why
|
|
97
|
+
- Problems solved and how
|
|
98
|
+
- Constraints discovered
|
|
99
|
+
- Patterns established
|
|
100
|
+
- Non-obvious insights
|
|
101
|
+
|
|
102
|
+
### Active Context
|
|
103
|
+
- Current task and its status
|
|
104
|
+
- Files being modified
|
|
105
|
+
- Blockers or dependencies
|
|
106
|
+
|
|
107
|
+
Format the learnings section so they can be extracted later if needed.`)
|
|
108
|
+
}},
|
|
109
|
+
|
|
110
|
+
event: async ({{ event }}) => {{
|
|
111
|
+
if (event.type === "session.idle") {{
|
|
112
|
+
await $`python ${{LOG_SCRIPT}} ${{directory}}`.quiet().nothrow()
|
|
113
|
+
}}
|
|
114
|
+
if (event.type === "session.compacted") {{
|
|
115
|
+
await $`python ${{SAVE_SCRIPT}} ${{directory}}`.quiet().nothrow()
|
|
116
|
+
}}
|
|
117
|
+
}},
|
|
118
|
+
}}
|
|
119
|
+
}}
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _save_learnings_py(memory_dir: str) -> str:
|
|
124
|
+
return f"""\
|
|
125
|
+
from __future__ import annotations
|
|
126
|
+
|
|
127
|
+
import subprocess, sys
|
|
128
|
+
from datetime import datetime
|
|
129
|
+
from pathlib import Path
|
|
130
|
+
|
|
131
|
+
MEMORY_DIR = Path("{memory_dir}")
|
|
132
|
+
LEARNINGS_FILE = MEMORY_DIR / "learnings.md"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _get_changed_files(cwd: str) -> list[str]:
|
|
136
|
+
try:
|
|
137
|
+
r = subprocess.run(["git", "diff", "--name-only", "HEAD"],
|
|
138
|
+
capture_output=True, text=True, timeout=5, cwd=cwd)
|
|
139
|
+
return [f.strip() for f in r.stdout.strip().split("\\n") if f.strip()]
|
|
140
|
+
except Exception:
|
|
141
|
+
return []
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def main() -> None:
|
|
145
|
+
cwd = sys.argv[1] if len(sys.argv) > 1 else __import__("os").getcwd()
|
|
146
|
+
changed = _get_changed_files(cwd)
|
|
147
|
+
date_key = datetime.now().strftime("%Y-%m-%d")
|
|
148
|
+
lines = [f"\\n\\n## {{date_key}} (post-compact)", f"cwd: {{cwd}}"]
|
|
149
|
+
if changed:
|
|
150
|
+
lines.append(f"Files changed: {{', '.join(changed)}}")
|
|
151
|
+
lines.append(f"- (review conversation for key learnings about: {{', '.join(changed[:3])}})")
|
|
152
|
+
else:
|
|
153
|
+
lines.append("Session compacted — no file changes detected.")
|
|
154
|
+
MEMORY_DIR.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
with open(LEARNINGS_FILE, "a", encoding="utf-8") as f:
|
|
156
|
+
f.write("\\n".join(lines) + "\\n")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__":
|
|
160
|
+
main()
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _log_session_py(memory_dir: str) -> str:
|
|
165
|
+
return f"""\
|
|
166
|
+
from __future__ import annotations
|
|
167
|
+
|
|
168
|
+
import subprocess, sys
|
|
169
|
+
from datetime import datetime
|
|
170
|
+
from pathlib import Path
|
|
171
|
+
|
|
172
|
+
MEMORY_DIR = Path("{memory_dir}")
|
|
173
|
+
SESSION_LOG = MEMORY_DIR / "session_log.md"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _get_changed_files(cwd: str) -> list[str]:
|
|
177
|
+
try:
|
|
178
|
+
r = subprocess.run(["git", "diff", "--name-only", "HEAD"],
|
|
179
|
+
capture_output=True, text=True, timeout=5, cwd=cwd)
|
|
180
|
+
return [f.strip() for f in r.stdout.strip().split("\\n") if f.strip()]
|
|
181
|
+
except Exception:
|
|
182
|
+
return []
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def main() -> None:
|
|
186
|
+
cwd = sys.argv[1] if len(sys.argv) > 1 else __import__("os").getcwd()
|
|
187
|
+
changed = _get_changed_files(cwd)
|
|
188
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
|
|
189
|
+
lines = [f"\\n\\n## {{timestamp}}", f"cwd: {{cwd}}"]
|
|
190
|
+
lines.append(f"files changed: {{', '.join(changed) if changed else 'none'}}")
|
|
191
|
+
MEMORY_DIR.mkdir(parents=True, exist_ok=True)
|
|
192
|
+
with open(SESSION_LOG, "a", encoding="utf-8") as f:
|
|
193
|
+
f.write("\\n".join(lines) + "\\n")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
if __name__ == "__main__":
|
|
197
|
+
main()
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def run_setup() -> None:
|
|
202
|
+
base_dir = Path.home() / ".config" / "opencode" / "codecompass"
|
|
203
|
+
plugins_dir = base_dir / "plugins"
|
|
204
|
+
scripts_dir = base_dir / "scripts"
|
|
205
|
+
memory_dir = base_dir / "memory"
|
|
206
|
+
|
|
207
|
+
for d in (plugins_dir, scripts_dir, memory_dir):
|
|
208
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
209
|
+
|
|
210
|
+
# 1. Write instructions
|
|
211
|
+
instructions_path = base_dir / "instructions.md"
|
|
212
|
+
instructions_path.write_text(INSTRUCTIONS_MD)
|
|
213
|
+
print(f"Wrote {instructions_path}")
|
|
214
|
+
|
|
215
|
+
# 2. Write memory plugin
|
|
216
|
+
plugin_path = plugins_dir / "memory.ts"
|
|
217
|
+
plugin_path.write_text(_memory_plugin_ts(str(scripts_dir)))
|
|
218
|
+
print(f"Wrote {plugin_path}")
|
|
219
|
+
|
|
220
|
+
# 3. Write helper scripts
|
|
221
|
+
(scripts_dir / "save_learnings.py").write_text(_save_learnings_py(str(memory_dir)))
|
|
222
|
+
(scripts_dir / "log_session.py").write_text(_log_session_py(str(memory_dir)))
|
|
223
|
+
print(f"Wrote scripts to {scripts_dir}/")
|
|
224
|
+
|
|
225
|
+
# 4. Write .env template
|
|
226
|
+
env_path = Path.cwd() / ".env"
|
|
227
|
+
if env_path.exists():
|
|
228
|
+
print(f".env exists at {env_path} — skipping")
|
|
229
|
+
else:
|
|
230
|
+
env_path.write_text(DOT_ENV_TEMPLATE)
|
|
231
|
+
print(f"Created {env_path}")
|
|
232
|
+
|
|
233
|
+
# 5. Print opencode config
|
|
234
|
+
config_block = {
|
|
235
|
+
"instructions": [str(instructions_path)],
|
|
236
|
+
"mcp": {
|
|
237
|
+
"codecompass": {
|
|
238
|
+
"type": "local",
|
|
239
|
+
"command": ["codecompass-mcp"]
|
|
240
|
+
}
|
|
241
|
+
},
|
|
242
|
+
"plugin": [str(plugin_path)]
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
opencode_config = Path.home() / ".config" / "opencode" / "opencode.json"
|
|
246
|
+
print()
|
|
247
|
+
if opencode_config.exists():
|
|
248
|
+
print(f"Merge this into {opencode_config}:")
|
|
249
|
+
else:
|
|
250
|
+
print(f"Add this to {opencode_config}:")
|
|
251
|
+
print()
|
|
252
|
+
print(json.dumps(config_block, indent=2))
|
|
253
|
+
print()
|
|
254
|
+
print("Restart opencode. Then: opencode")
|
|
255
|
+
print('Ask "what ingested projects are available?" — it should use list_projects.')
|
ingestion/__init__.py
ADDED
|
File without changes
|
ingestion/chunker.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import PyPDF2
|
|
3
|
+
|
|
4
|
+
# Rough approximation: 1 token ≈ 4 characters (standard for English prose/code).
|
|
5
|
+
# Used when a tokeniser library is unavailable.
|
|
6
|
+
_CHARS_PER_TOKEN = 4
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _estimate_tokens(text: str) -> int:
|
|
10
|
+
return max(1, len(text) // _CHARS_PER_TOKEN)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def chunk_pdf(filepath: str, tokens_per_chunk: int = 500, overlap_tokens: int = 50) -> list[str]:
|
|
14
|
+
"""Split PDF into overlapping token-sized chunks."""
|
|
15
|
+
with open(filepath, "rb") as f:
|
|
16
|
+
reader = PyPDF2.PdfReader(f)
|
|
17
|
+
full_text = " ".join(page.extract_text() or "" for page in reader.pages)
|
|
18
|
+
return chunk_text(full_text, tokens_per_chunk=tokens_per_chunk, overlap_tokens=overlap_tokens)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def chunk_text(text: str, tokens_per_chunk: int = 500, overlap_tokens: int = 50) -> list[str]:
|
|
22
|
+
"""
|
|
23
|
+
Split text into chunks of approximately `tokens_per_chunk` tokens with
|
|
24
|
+
`overlap_tokens` of overlap between consecutive chunks.
|
|
25
|
+
|
|
26
|
+
Splitting prefers sentence boundaries so that a chunk never cuts mid-sentence.
|
|
27
|
+
Falls back to hard character splitting when no boundary is found.
|
|
28
|
+
"""
|
|
29
|
+
chunk_chars = tokens_per_chunk * _CHARS_PER_TOKEN
|
|
30
|
+
overlap_chars = overlap_tokens * _CHARS_PER_TOKEN
|
|
31
|
+
|
|
32
|
+
# Normalise whitespace but preserve paragraph breaks as sentence boundaries.
|
|
33
|
+
text = re.sub(r"\r\n|\r", "\n", text)
|
|
34
|
+
text = re.sub(r"[ \t]+", " ", text).strip()
|
|
35
|
+
|
|
36
|
+
if not text:
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
chunks: list[str] = []
|
|
40
|
+
start = 0
|
|
41
|
+
step = chunk_chars - overlap_chars
|
|
42
|
+
|
|
43
|
+
while start < len(text):
|
|
44
|
+
end = start + chunk_chars
|
|
45
|
+
segment = text[start:end]
|
|
46
|
+
|
|
47
|
+
# If this isn't the last chunk, trim to the last sentence boundary.
|
|
48
|
+
if end < len(text):
|
|
49
|
+
# Look for a sentence-ending punctuation followed by whitespace/newline.
|
|
50
|
+
boundary = _last_sentence_boundary(segment)
|
|
51
|
+
if boundary and boundary > chunk_chars // 2:
|
|
52
|
+
segment = segment[:boundary]
|
|
53
|
+
|
|
54
|
+
chunk = segment.strip()
|
|
55
|
+
if chunk:
|
|
56
|
+
chunks.append(chunk)
|
|
57
|
+
|
|
58
|
+
# Advance by the length of the actual segment taken (minus overlap).
|
|
59
|
+
advance = max(len(segment) - overlap_chars, step)
|
|
60
|
+
start += advance
|
|
61
|
+
|
|
62
|
+
return chunks
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _last_sentence_boundary(text: str) -> int | None:
|
|
66
|
+
"""Return the index just after the last sentence-ending boundary in `text`."""
|
|
67
|
+
# Match '. ', '! ', '? ', or end of a paragraph ('\n\n').
|
|
68
|
+
for match in reversed(list(re.finditer(r"(?<=[.!?])\s+|(?<=\n)\n", text))):
|
|
69
|
+
return match.end()
|
|
70
|
+
return None
|