context-mcp-server 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +464 -0
  2. package/codegraph/__init__.py +0 -0
  3. package/codegraph/__main__.py +24 -0
  4. package/codegraph/__pycache__/__init__.cpython-313.pyc +0 -0
  5. package/codegraph/__pycache__/__main__.cpython-313.pyc +0 -0
  6. package/codegraph/__pycache__/cache.cpython-313.pyc +0 -0
  7. package/codegraph/__pycache__/config.cpython-313.pyc +0 -0
  8. package/codegraph/__pycache__/report.cpython-313.pyc +0 -0
  9. package/codegraph/__pycache__/scanner.cpython-313.pyc +0 -0
  10. package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
  11. package/codegraph/cache.py +137 -0
  12. package/codegraph/config.py +31 -0
  13. package/codegraph/extractors/__init__.py +0 -0
  14. package/codegraph/extractors/__pycache__/__init__.cpython-313.pyc +0 -0
  15. package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
  16. package/codegraph/extractors/__pycache__/audio_extractor.cpython-313.pyc +0 -0
  17. package/codegraph/extractors/__pycache__/doc_extractor.cpython-313.pyc +0 -0
  18. package/codegraph/extractors/__pycache__/image_extractor.cpython-313.pyc +0 -0
  19. package/codegraph/extractors/ast_extractor.py +222 -0
  20. package/codegraph/extractors/audio_extractor.py +8 -0
  21. package/codegraph/extractors/doc_extractor.py +34 -0
  22. package/codegraph/extractors/image_extractor.py +26 -0
  23. package/codegraph/graph/__init__.py +0 -0
  24. package/codegraph/graph/__pycache__/__init__.cpython-313.pyc +0 -0
  25. package/codegraph/graph/__pycache__/builder.cpython-313.pyc +0 -0
  26. package/codegraph/graph/__pycache__/clustering.cpython-313.pyc +0 -0
  27. package/codegraph/graph/__pycache__/query.cpython-313.pyc +0 -0
  28. package/codegraph/graph/builder.py +145 -0
  29. package/codegraph/graph/clustering.py +40 -0
  30. package/codegraph/graph/query.py +283 -0
  31. package/codegraph/report.py +115 -0
  32. package/codegraph/scanner.py +92 -0
  33. package/codegraph/server.py +514 -0
  34. package/package.json +62 -0
  35. package/src/cli.js +1010 -0
  36. package/src/config.js +89 -0
  37. package/src/db.js +786 -0
  38. package/src/guard.js +20 -0
  39. package/src/hooks/autoContext.js +17 -0
  40. package/src/hooks/autoLink.js +7 -0
  41. package/src/http.js +765 -0
  42. package/src/index.js +47 -0
  43. package/src/search.js +50 -0
  44. package/src/server.js +80 -0
  45. package/src/summarizer.js +124 -0
  46. package/src/templates/AGENTS.md +76 -0
  47. package/src/templates/CLAUDE.md +94 -0
  48. package/src/templates/GEMINI.md +76 -0
  49. package/src/templates/cursor-rules.mdc +41 -0
  50. package/src/templates/windsurf-rules.md +35 -0
  51. package/src/tools/codegraph.js +215 -0
  52. package/src/tools/context.js +188 -0
  53. package/src/tools/discussion.js +123 -0
  54. package/src/tools/errorCheck.js +65 -0
  55. package/src/tools/fileTools.js +185 -0
  56. package/src/tools/gitTools.js +259 -0
  57. package/src/tools/search.js +55 -0
  58. package/src/vector.js +153 -0
@@ -0,0 +1,283 @@
1
+ """
2
+ graph/query.py — natural language graph traversal for codegraph_query.
3
+
4
+ No LLM call on query — pure graph + keyword matching.
5
+ """
6
+
7
+ import re
8
+ from typing import Any
9
+
10
+
11
+ def answer(question: str, graph_dict: dict, token_budget: int = 2000) -> dict:
12
+ """
13
+ Answer a natural language question about the codebase using graph traversal.
14
+
15
+ Returns { answer, nodes, confidence, tokens_used }
16
+ Output is truncated to token_budget (approx 4 chars = 1 token).
17
+ """
18
+ nodes = graph_dict.get("nodes", [])
19
+ edges = graph_dict.get("edges", [])
20
+ communities = graph_dict.get("communities", [])
21
+ god_nodes = graph_dict.get("god_nodes", [])
22
+
23
+ q_lower = question.lower()
24
+ terms = _extract_terms(q_lower)
25
+
26
+ # 1. Find relevant nodes by name/file match
27
+ matched = _match_nodes(nodes, terms)
28
+
29
+ # 2. Detect query intent
30
+ intent = _detect_intent(q_lower)
31
+
32
+ if intent == "depends_on" and matched:
33
+ result = _depends_on(matched[0], edges, nodes)
34
+ elif intent == "used_by" and matched:
35
+ result = _used_by(matched[0], edges, nodes)
36
+ elif intent == "path" and len(matched) >= 2:
37
+ result = _shortest_path(matched[0], matched[1], edges, nodes)
38
+ elif intent == "list":
39
+ result = _list_nodes(nodes, terms)
40
+ elif intent == "god_nodes":
41
+ result = _describe_god_nodes(god_nodes, nodes)
42
+ elif intent == "community" and matched:
43
+ result = _describe_community(matched[0], communities, nodes)
44
+ else:
45
+ result = _general_search(matched, nodes, edges)
46
+
47
+ # Render subgraph as structured text, truncated to token_budget
48
+ subgraph_text = _render_subgraph(result.get("nodes", []), edges, token_budget)
49
+ answer_text = result.get("text", "No answer found.")
50
+ if subgraph_text:
51
+ answer_text = f"{answer_text}\n\n{subgraph_text}"
52
+
53
+ # Truncate to budget (4 chars ≈ 1 token)
54
+ char_limit = token_budget * 4
55
+ truncated = len(answer_text) > char_limit
56
+ if truncated:
57
+ answer_text = answer_text[:char_limit] + "\n…(truncated to token budget)"
58
+
59
+ return {
60
+ "question": question,
61
+ "answer": answer_text,
62
+ "nodes": result.get("nodes", []),
63
+ "confidence": result.get("confidence", "low"),
64
+ "tokens_used": len(answer_text) // 4,
65
+ "truncated": truncated,
66
+ }
67
+
68
+
69
+ def find_path(from_name: str, to_name: str, graph_dict: dict) -> dict:
70
+ """Find shortest relationship path between two concepts."""
71
+ nodes = graph_dict.get("nodes", [])
72
+ edges = graph_dict.get("edges", [])
73
+ from_node = _find_by_name(nodes, from_name)
74
+ to_node = _find_by_name(nodes, to_name)
75
+ if not from_node or not to_node:
76
+ return {"path": [], "found": False, "message": f"Could not find '{from_name}' or '{to_name}' in graph."}
77
+ result = _shortest_path(from_node, to_node, edges, nodes)
78
+ return {"path": result.get("nodes", []), "found": bool(result.get("nodes")), "text": result.get("text", "")}
79
+
80
+
81
+ # ── Helpers ───────────────────────────────────────────────────────────────────
82
+
83
+ def _extract_terms(q: str) -> list[str]:
84
+ stop = {"what", "does", "how", "where", "is", "the", "a", "an", "to", "do",
85
+ "does", "which", "files", "modules", "functions", "classes", "list",
86
+ "show", "find", "get", "all", "me", "about"}
87
+ return [w for w in re.findall(r"\w+", q) if w not in stop and len(w) > 2]
88
+
89
+
90
+ def _match_nodes(nodes: list, terms: list) -> list:
91
+ scored = []
92
+ for n in nodes:
93
+ name = (n.get("name", "") or "").lower()
94
+ fpath = (n.get("file", "") or "").lower()
95
+ score = sum(1 for t in terms if t in name or t in fpath)
96
+ if score:
97
+ scored.append((score, n))
98
+ scored.sort(key=lambda x: -x[0])
99
+ return [n for _, n in scored]
100
+
101
+
102
+ def _find_by_name(nodes: list, name: str) -> dict | None:
103
+ name_l = name.lower()
104
+ for n in nodes:
105
+ if (n.get("name", "") or "").lower() == name_l:
106
+ return n
107
+ # partial match
108
+ for n in nodes:
109
+ if name_l in (n.get("name", "") or "").lower():
110
+ return n
111
+ return None
112
+
113
+
114
+ def _detect_intent(q: str) -> str:
115
+ if any(w in q for w in ("depend", "import", "use", "require")):
116
+ return "depends_on"
117
+ if any(w in q for w in ("used by", "who calls", "caller")):
118
+ return "used_by"
119
+ if any(w in q for w in ("path", "connect", "relate", "between")):
120
+ return "path"
121
+ if any(w in q for w in ("list", "all", "show all", "every")):
122
+ return "list"
123
+ if any(w in q for w in ("god", "central", "most connected", "hub")):
124
+ return "god_nodes"
125
+ if any(w in q for w in ("community", "cluster", "group", "module")):
126
+ return "community"
127
+ return "general"
128
+
129
+
130
+ def _depends_on(node: dict, edges: list, nodes: list) -> dict:
131
+ nid = node["id"]
132
+ targets = [e["to"] for e in edges if e["from"] == nid]
133
+ target_nodes = [n for n in nodes if n["id"] in targets]
134
+ names = [n["name"] for n in target_nodes]
135
+ return {
136
+ "text": f"{node['name']} depends on: {', '.join(names) or 'nothing found'}.",
137
+ "nodes": [node] + target_nodes,
138
+ "confidence": "high" if targets else "low",
139
+ }
140
+
141
+
142
+ def _used_by(node: dict, edges: list, nodes: list) -> dict:
143
+ nid = node["id"]
144
+ sources = [e["from"] for e in edges if e["to"] == nid]
145
+ source_nodes = [n for n in nodes if n["id"] in sources]
146
+ names = [n["name"] for n in source_nodes]
147
+ return {
148
+ "text": f"{node['name']} is used by: {', '.join(names) or 'nothing found'}.",
149
+ "nodes": [node] + source_nodes,
150
+ "confidence": "high" if sources else "low",
151
+ }
152
+
153
+
154
+ def _shortest_path(from_node: dict, to_node: dict, edges: list, nodes: list) -> dict:
155
+ # BFS
156
+ adj: dict[str, list[str]] = {}
157
+ for e in edges:
158
+ adj.setdefault(e["from"], []).append(e["to"])
159
+ adj.setdefault(e["to"], []).append(e["from"]) # undirected for path finding
160
+
161
+ start, end = from_node["id"], to_node["id"]
162
+ visited = {start: None}
163
+ queue = [start]
164
+ while queue:
165
+ cur = queue.pop(0)
166
+ if cur == end:
167
+ break
168
+ for nb in adj.get(cur, []):
169
+ if nb not in visited:
170
+ visited[nb] = cur
171
+ queue.append(nb)
172
+
173
+ if end not in visited:
174
+ return {"text": f"No path found between {from_node['name']} and {to_node['name']}.", "nodes": []}
175
+
176
+ path_ids = []
177
+ cur = end
178
+ while cur:
179
+ path_ids.append(cur)
180
+ cur = visited[cur]
181
+ path_ids.reverse()
182
+
183
+ node_map = {n["id"]: n for n in nodes}
184
+ path_nodes = [node_map[i] for i in path_ids if i in node_map]
185
+ names = [n["name"] for n in path_nodes]
186
+ return {
187
+ "text": f"Path: {' → '.join(names)}",
188
+ "nodes": path_nodes,
189
+ "confidence": "medium",
190
+ }
191
+
192
+
193
+ def _list_nodes(nodes: list, terms: list) -> dict:
194
+ matched = _match_nodes(nodes, terms) if terms else nodes[:20]
195
+ names = [f"{n['name']} ({n.get('type','?')} in {n.get('file','?')})" for n in matched[:20]]
196
+ return {
197
+ "text": "\n".join(names) or "No matching nodes.",
198
+ "nodes": matched[:20],
199
+ "confidence": "medium",
200
+ }
201
+
202
+
203
+ def _describe_god_nodes(god_node_ids: list, nodes: list) -> dict:
204
+ node_map = {n["id"]: n for n in nodes}
205
+ god = [node_map[i] for i in god_node_ids if i in node_map]
206
+ names = [f"{n['name']} ({n.get('file','')})" for n in god]
207
+ return {
208
+ "text": f"God nodes (highest connectivity): {', '.join(names) or 'none identified'}.",
209
+ "nodes": god,
210
+ "confidence": "high",
211
+ }
212
+
213
+
214
+ def _describe_community(node: dict, communities: list, nodes: list) -> dict:
215
+ comm_id = node.get("community")
216
+ comm = next((c for c in communities if c["id"] == comm_id), None)
217
+ if not comm:
218
+ return {"text": f"{node['name']} has no community assignment.", "nodes": [node], "confidence": "low"}
219
+ node_map = {n["id"]: n for n in nodes}
220
+ members = [node_map[m] for m in comm["members"] if m in node_map]
221
+ return {
222
+ "text": f"{node['name']} is in community '{comm['label']}' with {len(members)} members.",
223
+ "nodes": members[:10],
224
+ "confidence": "high",
225
+ }
226
+
227
+
228
+ def _general_search(matched: list, nodes: list, edges: list) -> dict:
229
+ if not matched:
230
+ return {"text": "No matching nodes found.", "nodes": [], "confidence": "low"}
231
+ top = matched[:5]
232
+ lines = [f"• {n['name']} ({n.get('type','?')}) in {n.get('file','?')} line {n.get('line','?')}" for n in top]
233
+ return {
234
+ "text": "\n".join(lines),
235
+ "nodes": top,
236
+ "confidence": "medium",
237
+ }
238
+
239
+
240
+ def _render_subgraph(result_nodes: list, all_edges: list, token_budget: int) -> str:
241
+ """
242
+ Render a subgraph as structured plain text (graphify-style).
243
+ Format:
244
+ NODE name [type] src=file desc=...
245
+ EDGE from → to [relation]
246
+ Sorted by degree descending. Truncated to token_budget.
247
+ """
248
+ if not result_nodes:
249
+ return ""
250
+
251
+ node_ids = {n["id"] for n in result_nodes}
252
+ # Degree within subgraph
253
+ degree: dict[str, int] = {n["id"]: 0 for n in result_nodes}
254
+ subedges = []
255
+ for e in all_edges:
256
+ if e.get("from") in node_ids and e.get("to") in node_ids:
257
+ subedges.append(e)
258
+ degree[e["from"]] = degree.get(e["from"], 0) + 1
259
+ degree[e["to"]] = degree.get(e["to"], 0) + 1
260
+
261
+ sorted_nodes = sorted(result_nodes, key=lambda n: -degree.get(n["id"], 0))
262
+
263
+ char_limit = token_budget * 4
264
+ lines = []
265
+ chars = 0
266
+
267
+ for n in sorted_nodes:
268
+ desc = n.get("description", "")
269
+ desc_part = f" desc={desc[:80]}" if desc else ""
270
+ line = f"NODE {n.get('name','?')} [{n.get('type','?')}] src={n.get('file','?')}{desc_part}"
271
+ if chars + len(line) > char_limit:
272
+ break
273
+ lines.append(line)
274
+ chars += len(line)
275
+
276
+ for e in subedges:
277
+ line = f"EDGE {e.get('from','')} → {e.get('to','')} [{e.get('relation','?')}]"
278
+ if chars + len(line) > char_limit:
279
+ break
280
+ lines.append(line)
281
+ chars += len(line)
282
+
283
+ return "\n".join(lines)
@@ -0,0 +1,115 @@
1
+ """
2
+ report.py — generate REPORT.md from the built graph.
3
+ """
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def generate(graph_dict: dict, project_root: str) -> str:
9
+ """Write REPORT.md to project_root and return its content."""
10
+ content = _build_report(graph_dict)
11
+ cache_dir = Path(project_root) / "codegraph-cache"
12
+ cache_dir.mkdir(exist_ok=True)
13
+ out = cache_dir / "CODEGRAPH_REPORT.md"
14
+ out.write_text(content, encoding="utf-8")
15
+ return content
16
+
17
+
18
+ def _build_report(g: dict) -> str:
19
+ nodes = g.get("nodes", [])
20
+ edges = g.get("edges", [])
21
+ communities = g.get("communities", [])
22
+ god_nodes = g.get("god_nodes", [])
23
+ generated = g.get("generated_at", "")
24
+
25
+ node_map = {n["id"]: n for n in nodes}
26
+
27
+ lines = [
28
+ "# CodeGraph Report",
29
+ f"_Generated: {generated}_",
30
+ "",
31
+ f"**{len(nodes)} nodes** · **{len(edges)} edges** · **{len(communities)} communities**",
32
+ "",
33
+ ]
34
+
35
+ # God nodes
36
+ lines += ["## God Nodes", "", "_Highest-degree concepts everything flows through:_", ""]
37
+ if god_nodes:
38
+ for nid in god_nodes:
39
+ n = node_map.get(nid, {})
40
+ name = n.get("name", nid)
41
+ fpath = n.get("file", "")
42
+ deg = sum(1 for e in edges if e.get("from") == nid or e.get("to") == nid)
43
+ lines.append(f"- **{name}** `{fpath}` — {deg} connections")
44
+ else:
45
+ lines.append("_No god nodes identified._")
46
+ lines.append("")
47
+
48
+ # Communities
49
+ lines += ["## Community Clusters", ""]
50
+ if communities:
51
+ for c in communities[:10]:
52
+ members = c.get("members", [])
53
+ names = [node_map.get(m, {}).get("name", m) for m in members[:5]]
54
+ cluster_label = c.get("label") or f"Cluster {c['id']}"
55
+ lines.append(f"### {cluster_label} ({len(members)} nodes)")
56
+ lines.append(f"Members: {', '.join(names)}{' …' if len(members) > 5 else ''}")
57
+ lines.append("")
58
+ else:
59
+ lines.append("_No communities detected._")
60
+ lines.append("")
61
+
62
+ # Surprising cross-module connections
63
+ lines += ["## Surprising Connections", ""]
64
+ cross = _cross_module_edges(edges, node_map)
65
+ if cross:
66
+ for u_name, v_name, rel in cross[:8]:
67
+ lines.append(f"- **{u_name}** →({rel})→ **{v_name}**")
68
+ else:
69
+ lines.append("_None found._")
70
+ lines.append("")
71
+
72
+ # Suggested questions
73
+ lines += ["## Suggested Questions", ""]
74
+ questions = _suggest_questions(nodes, god_nodes, node_map)
75
+ for q in questions:
76
+ lines.append(f"- {q}")
77
+ lines.append("")
78
+
79
+ # Confidence breakdown
80
+ conf_counts = {}
81
+ for e in edges:
82
+ c = e.get("confidence", "UNKNOWN")
83
+ conf_counts[c] = conf_counts.get(c, 0) + 1
84
+ lines += ["## Confidence Breakdown", ""]
85
+ for label, count in sorted(conf_counts.items()):
86
+ lines.append(f"- **{label}**: {count} edges")
87
+
88
+ return "\n".join(lines)
89
+
90
+
91
+ def _cross_module_edges(edges: list, node_map: dict) -> list[tuple]:
92
+ results = []
93
+ for e in edges:
94
+ u = node_map.get(e.get("from", ""), {})
95
+ v = node_map.get(e.get("to", ""), {})
96
+ u_file = (u.get("file", "") or "").split("/")[0]
97
+ v_file = (v.get("file", "") or "").split("/")[0]
98
+ if u_file and v_file and u_file != v_file:
99
+ results.append((u.get("name", "?"), v.get("name", "?"), e.get("relation", "?")))
100
+ return results[:8]
101
+
102
+
103
+ def _suggest_questions(nodes: list, god_node_ids: list, node_map: dict) -> list[str]:
104
+ questions = []
105
+ if god_node_ids:
106
+ name = node_map.get(god_node_ids[0], {}).get("name", "")
107
+ if name:
108
+ questions.append(f"What does {name} depend on?")
109
+ questions.append(f"What uses {name}?")
110
+ classes = [n for n in nodes if n.get("type") == "class"][:2]
111
+ for c in classes:
112
+ questions.append(f"What is the relationship between {c['name']} and other modules?")
113
+ questions.append("Which files have the most connections?")
114
+ questions.append("Are there any circular dependencies?")
115
+ return questions[:5]
@@ -0,0 +1,92 @@
1
+ """
2
+ scanner.py — walk a project directory, hash every file, detect what changed.
3
+ """
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Iterator
8
+
9
+ from .cache import file_hash, get_cached_nodes, set_cached_nodes, remove_deleted, load_cache, save_cache
10
+ from .config import (
11
+ DEFAULT_IGNORE, MAX_FILE_BYTES,
12
+ CODE_EXTENSIONS, SQL_EXTENSIONS, CONFIG_EXTENSIONS,
13
+ DOC_EXTENSIONS, PDF_EXTENSIONS, IMAGE_EXTENSIONS,
14
+ AUDIO_EXTENSIONS, VIDEO_EXTENSIONS,
15
+ )
16
+
17
+
18
+ def _should_ignore(name: str, ignore: set) -> bool:
19
+ return name.startswith(".") and name != ".env" or name in ignore
20
+
21
+
22
+ def walk_files(root: str, extra_ignore: set | None = None) -> Iterator[str]:
23
+ """Yield absolute paths to all non-ignored files under root."""
24
+ ignore = DEFAULT_IGNORE | (extra_ignore or set())
25
+ for dirpath, dirnames, filenames in os.walk(root):
26
+ # Prune ignored dirs in-place so os.walk doesn't descend
27
+ dirnames[:] = [d for d in dirnames if d not in ignore and not d.startswith(".")]
28
+ for fname in filenames:
29
+ abs_path = os.path.join(dirpath, fname)
30
+ try:
31
+ if os.path.getsize(abs_path) > MAX_FILE_BYTES:
32
+ continue
33
+ except OSError:
34
+ continue
35
+ yield abs_path
36
+
37
+
38
+ def classify_file(path: str) -> str:
39
+ """Return extraction category for a file."""
40
+ ext = Path(path).suffix.lower()
41
+ if ext in CODE_EXTENSIONS: return "code"
42
+ if ext in SQL_EXTENSIONS: return "sql"
43
+ if ext in CONFIG_EXTENSIONS: return "config"
44
+ if ext in DOC_EXTENSIONS: return "doc"
45
+ if ext in PDF_EXTENSIONS: return "pdf"
46
+ if ext in IMAGE_EXTENSIONS: return "image"
47
+ if ext in AUDIO_EXTENSIONS: return "audio"
48
+ if ext in VIDEO_EXTENSIONS: return "video"
49
+ return "unknown"
50
+
51
+
52
+ def scan(project_root: str, extra_ignore: set | None = None) -> dict:
53
+ """
54
+ Walk project, diff against cache.
55
+
56
+ Returns:
57
+ {
58
+ "cached": { rel_path: [nodes] }, # unchanged — load from cache
59
+ "changed": { rel_path: abs_path }, # hash changed — need re-extraction
60
+ "deleted": [rel_path], # in cache but file gone
61
+ "cache": dict, # current cache (mutated in place)
62
+ }
63
+ """
64
+ root = os.path.abspath(project_root)
65
+ cache = load_cache(root)
66
+
67
+ cached = {}
68
+ changed = {}
69
+ existing_rel = set()
70
+
71
+ for abs_path in walk_files(root, extra_ignore):
72
+ rel_path = os.path.relpath(abs_path, root).replace("\\", "/")
73
+ existing_rel.add(rel_path)
74
+ category = classify_file(abs_path)
75
+ if category == "unknown":
76
+ continue
77
+ h = file_hash(abs_path)
78
+ nodes = get_cached_nodes(cache, rel_path, h)
79
+ if nodes is not None:
80
+ cached[rel_path] = nodes
81
+ else:
82
+ changed[rel_path] = abs_path
83
+
84
+ deleted = remove_deleted(cache, existing_rel)
85
+
86
+ return {
87
+ "cached": cached,
88
+ "changed": changed,
89
+ "deleted": deleted,
90
+ "cache": cache,
91
+ "root": root,
92
+ }