context-mcp-server 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +464 -0
- package/codegraph/__init__.py +0 -0
- package/codegraph/__main__.py +24 -0
- package/codegraph/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/__main__.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/cache.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/config.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/report.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/scanner.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
- package/codegraph/cache.py +137 -0
- package/codegraph/config.py +31 -0
- package/codegraph/extractors/__init__.py +0 -0
- package/codegraph/extractors/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/audio_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/doc_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/image_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/ast_extractor.py +222 -0
- package/codegraph/extractors/audio_extractor.py +8 -0
- package/codegraph/extractors/doc_extractor.py +34 -0
- package/codegraph/extractors/image_extractor.py +26 -0
- package/codegraph/graph/__init__.py +0 -0
- package/codegraph/graph/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/builder.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/clustering.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/query.cpython-313.pyc +0 -0
- package/codegraph/graph/builder.py +145 -0
- package/codegraph/graph/clustering.py +40 -0
- package/codegraph/graph/query.py +283 -0
- package/codegraph/report.py +115 -0
- package/codegraph/scanner.py +92 -0
- package/codegraph/server.py +514 -0
- package/package.json +62 -0
- package/src/cli.js +1010 -0
- package/src/config.js +89 -0
- package/src/db.js +786 -0
- package/src/guard.js +20 -0
- package/src/hooks/autoContext.js +17 -0
- package/src/hooks/autoLink.js +7 -0
- package/src/http.js +765 -0
- package/src/index.js +47 -0
- package/src/search.js +50 -0
- package/src/server.js +80 -0
- package/src/summarizer.js +124 -0
- package/src/templates/AGENTS.md +76 -0
- package/src/templates/CLAUDE.md +94 -0
- package/src/templates/GEMINI.md +76 -0
- package/src/templates/cursor-rules.mdc +41 -0
- package/src/templates/windsurf-rules.md +35 -0
- package/src/tools/codegraph.js +215 -0
- package/src/tools/context.js +188 -0
- package/src/tools/discussion.js +123 -0
- package/src/tools/errorCheck.js +65 -0
- package/src/tools/fileTools.js +185 -0
- package/src/tools/gitTools.js +259 -0
- package/src/tools/search.js +55 -0
- package/src/vector.js +153 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""
|
|
2
|
+
graph/query.py — natural language graph traversal for codegraph_query.
|
|
3
|
+
|
|
4
|
+
No LLM call on query — pure graph + keyword matching.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def answer(question: str, graph_dict: dict, token_budget: int = 2000) -> dict:
|
|
12
|
+
"""
|
|
13
|
+
Answer a natural language question about the codebase using graph traversal.
|
|
14
|
+
|
|
15
|
+
Returns { answer, nodes, confidence, tokens_used }
|
|
16
|
+
Output is truncated to token_budget (approx 4 chars = 1 token).
|
|
17
|
+
"""
|
|
18
|
+
nodes = graph_dict.get("nodes", [])
|
|
19
|
+
edges = graph_dict.get("edges", [])
|
|
20
|
+
communities = graph_dict.get("communities", [])
|
|
21
|
+
god_nodes = graph_dict.get("god_nodes", [])
|
|
22
|
+
|
|
23
|
+
q_lower = question.lower()
|
|
24
|
+
terms = _extract_terms(q_lower)
|
|
25
|
+
|
|
26
|
+
# 1. Find relevant nodes by name/file match
|
|
27
|
+
matched = _match_nodes(nodes, terms)
|
|
28
|
+
|
|
29
|
+
# 2. Detect query intent
|
|
30
|
+
intent = _detect_intent(q_lower)
|
|
31
|
+
|
|
32
|
+
if intent == "depends_on" and matched:
|
|
33
|
+
result = _depends_on(matched[0], edges, nodes)
|
|
34
|
+
elif intent == "used_by" and matched:
|
|
35
|
+
result = _used_by(matched[0], edges, nodes)
|
|
36
|
+
elif intent == "path" and len(matched) >= 2:
|
|
37
|
+
result = _shortest_path(matched[0], matched[1], edges, nodes)
|
|
38
|
+
elif intent == "list":
|
|
39
|
+
result = _list_nodes(nodes, terms)
|
|
40
|
+
elif intent == "god_nodes":
|
|
41
|
+
result = _describe_god_nodes(god_nodes, nodes)
|
|
42
|
+
elif intent == "community" and matched:
|
|
43
|
+
result = _describe_community(matched[0], communities, nodes)
|
|
44
|
+
else:
|
|
45
|
+
result = _general_search(matched, nodes, edges)
|
|
46
|
+
|
|
47
|
+
# Render subgraph as structured text, truncated to token_budget
|
|
48
|
+
subgraph_text = _render_subgraph(result.get("nodes", []), edges, token_budget)
|
|
49
|
+
answer_text = result.get("text", "No answer found.")
|
|
50
|
+
if subgraph_text:
|
|
51
|
+
answer_text = f"{answer_text}\n\n{subgraph_text}"
|
|
52
|
+
|
|
53
|
+
# Truncate to budget (4 chars ≈ 1 token)
|
|
54
|
+
char_limit = token_budget * 4
|
|
55
|
+
truncated = len(answer_text) > char_limit
|
|
56
|
+
if truncated:
|
|
57
|
+
answer_text = answer_text[:char_limit] + "\n…(truncated to token budget)"
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
"question": question,
|
|
61
|
+
"answer": answer_text,
|
|
62
|
+
"nodes": result.get("nodes", []),
|
|
63
|
+
"confidence": result.get("confidence", "low"),
|
|
64
|
+
"tokens_used": len(answer_text) // 4,
|
|
65
|
+
"truncated": truncated,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def find_path(from_name: str, to_name: str, graph_dict: dict) -> dict:
|
|
70
|
+
"""Find shortest relationship path between two concepts."""
|
|
71
|
+
nodes = graph_dict.get("nodes", [])
|
|
72
|
+
edges = graph_dict.get("edges", [])
|
|
73
|
+
from_node = _find_by_name(nodes, from_name)
|
|
74
|
+
to_node = _find_by_name(nodes, to_name)
|
|
75
|
+
if not from_node or not to_node:
|
|
76
|
+
return {"path": [], "found": False, "message": f"Could not find '{from_name}' or '{to_name}' in graph."}
|
|
77
|
+
result = _shortest_path(from_node, to_node, edges, nodes)
|
|
78
|
+
return {"path": result.get("nodes", []), "found": bool(result.get("nodes")), "text": result.get("text", "")}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
def _extract_terms(q: str) -> list[str]:
|
|
84
|
+
stop = {"what", "does", "how", "where", "is", "the", "a", "an", "to", "do",
|
|
85
|
+
"does", "which", "files", "modules", "functions", "classes", "list",
|
|
86
|
+
"show", "find", "get", "all", "me", "about"}
|
|
87
|
+
return [w for w in re.findall(r"\w+", q) if w not in stop and len(w) > 2]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _match_nodes(nodes: list, terms: list) -> list:
|
|
91
|
+
scored = []
|
|
92
|
+
for n in nodes:
|
|
93
|
+
name = (n.get("name", "") or "").lower()
|
|
94
|
+
fpath = (n.get("file", "") or "").lower()
|
|
95
|
+
score = sum(1 for t in terms if t in name or t in fpath)
|
|
96
|
+
if score:
|
|
97
|
+
scored.append((score, n))
|
|
98
|
+
scored.sort(key=lambda x: -x[0])
|
|
99
|
+
return [n for _, n in scored]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _find_by_name(nodes: list, name: str) -> dict | None:
|
|
103
|
+
name_l = name.lower()
|
|
104
|
+
for n in nodes:
|
|
105
|
+
if (n.get("name", "") or "").lower() == name_l:
|
|
106
|
+
return n
|
|
107
|
+
# partial match
|
|
108
|
+
for n in nodes:
|
|
109
|
+
if name_l in (n.get("name", "") or "").lower():
|
|
110
|
+
return n
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _detect_intent(q: str) -> str:
|
|
115
|
+
if any(w in q for w in ("depend", "import", "use", "require")):
|
|
116
|
+
return "depends_on"
|
|
117
|
+
if any(w in q for w in ("used by", "who calls", "caller")):
|
|
118
|
+
return "used_by"
|
|
119
|
+
if any(w in q for w in ("path", "connect", "relate", "between")):
|
|
120
|
+
return "path"
|
|
121
|
+
if any(w in q for w in ("list", "all", "show all", "every")):
|
|
122
|
+
return "list"
|
|
123
|
+
if any(w in q for w in ("god", "central", "most connected", "hub")):
|
|
124
|
+
return "god_nodes"
|
|
125
|
+
if any(w in q for w in ("community", "cluster", "group", "module")):
|
|
126
|
+
return "community"
|
|
127
|
+
return "general"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _depends_on(node: dict, edges: list, nodes: list) -> dict:
|
|
131
|
+
nid = node["id"]
|
|
132
|
+
targets = [e["to"] for e in edges if e["from"] == nid]
|
|
133
|
+
target_nodes = [n for n in nodes if n["id"] in targets]
|
|
134
|
+
names = [n["name"] for n in target_nodes]
|
|
135
|
+
return {
|
|
136
|
+
"text": f"{node['name']} depends on: {', '.join(names) or 'nothing found'}.",
|
|
137
|
+
"nodes": [node] + target_nodes,
|
|
138
|
+
"confidence": "high" if targets else "low",
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _used_by(node: dict, edges: list, nodes: list) -> dict:
|
|
143
|
+
nid = node["id"]
|
|
144
|
+
sources = [e["from"] for e in edges if e["to"] == nid]
|
|
145
|
+
source_nodes = [n for n in nodes if n["id"] in sources]
|
|
146
|
+
names = [n["name"] for n in source_nodes]
|
|
147
|
+
return {
|
|
148
|
+
"text": f"{node['name']} is used by: {', '.join(names) or 'nothing found'}.",
|
|
149
|
+
"nodes": [node] + source_nodes,
|
|
150
|
+
"confidence": "high" if sources else "low",
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _shortest_path(from_node: dict, to_node: dict, edges: list, nodes: list) -> dict:
|
|
155
|
+
# BFS
|
|
156
|
+
adj: dict[str, list[str]] = {}
|
|
157
|
+
for e in edges:
|
|
158
|
+
adj.setdefault(e["from"], []).append(e["to"])
|
|
159
|
+
adj.setdefault(e["to"], []).append(e["from"]) # undirected for path finding
|
|
160
|
+
|
|
161
|
+
start, end = from_node["id"], to_node["id"]
|
|
162
|
+
visited = {start: None}
|
|
163
|
+
queue = [start]
|
|
164
|
+
while queue:
|
|
165
|
+
cur = queue.pop(0)
|
|
166
|
+
if cur == end:
|
|
167
|
+
break
|
|
168
|
+
for nb in adj.get(cur, []):
|
|
169
|
+
if nb not in visited:
|
|
170
|
+
visited[nb] = cur
|
|
171
|
+
queue.append(nb)
|
|
172
|
+
|
|
173
|
+
if end not in visited:
|
|
174
|
+
return {"text": f"No path found between {from_node['name']} and {to_node['name']}.", "nodes": []}
|
|
175
|
+
|
|
176
|
+
path_ids = []
|
|
177
|
+
cur = end
|
|
178
|
+
while cur:
|
|
179
|
+
path_ids.append(cur)
|
|
180
|
+
cur = visited[cur]
|
|
181
|
+
path_ids.reverse()
|
|
182
|
+
|
|
183
|
+
node_map = {n["id"]: n for n in nodes}
|
|
184
|
+
path_nodes = [node_map[i] for i in path_ids if i in node_map]
|
|
185
|
+
names = [n["name"] for n in path_nodes]
|
|
186
|
+
return {
|
|
187
|
+
"text": f"Path: {' → '.join(names)}",
|
|
188
|
+
"nodes": path_nodes,
|
|
189
|
+
"confidence": "medium",
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _list_nodes(nodes: list, terms: list) -> dict:
|
|
194
|
+
matched = _match_nodes(nodes, terms) if terms else nodes[:20]
|
|
195
|
+
names = [f"{n['name']} ({n.get('type','?')} in {n.get('file','?')})" for n in matched[:20]]
|
|
196
|
+
return {
|
|
197
|
+
"text": "\n".join(names) or "No matching nodes.",
|
|
198
|
+
"nodes": matched[:20],
|
|
199
|
+
"confidence": "medium",
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _describe_god_nodes(god_node_ids: list, nodes: list) -> dict:
|
|
204
|
+
node_map = {n["id"]: n for n in nodes}
|
|
205
|
+
god = [node_map[i] for i in god_node_ids if i in node_map]
|
|
206
|
+
names = [f"{n['name']} ({n.get('file','')})" for n in god]
|
|
207
|
+
return {
|
|
208
|
+
"text": f"God nodes (highest connectivity): {', '.join(names) or 'none identified'}.",
|
|
209
|
+
"nodes": god,
|
|
210
|
+
"confidence": "high",
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _describe_community(node: dict, communities: list, nodes: list) -> dict:
|
|
215
|
+
comm_id = node.get("community")
|
|
216
|
+
comm = next((c for c in communities if c["id"] == comm_id), None)
|
|
217
|
+
if not comm:
|
|
218
|
+
return {"text": f"{node['name']} has no community assignment.", "nodes": [node], "confidence": "low"}
|
|
219
|
+
node_map = {n["id"]: n for n in nodes}
|
|
220
|
+
members = [node_map[m] for m in comm["members"] if m in node_map]
|
|
221
|
+
return {
|
|
222
|
+
"text": f"{node['name']} is in community '{comm['label']}' with {len(members)} members.",
|
|
223
|
+
"nodes": members[:10],
|
|
224
|
+
"confidence": "high",
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _general_search(matched: list, nodes: list, edges: list) -> dict:
|
|
229
|
+
if not matched:
|
|
230
|
+
return {"text": "No matching nodes found.", "nodes": [], "confidence": "low"}
|
|
231
|
+
top = matched[:5]
|
|
232
|
+
lines = [f"• {n['name']} ({n.get('type','?')}) in {n.get('file','?')} line {n.get('line','?')}" for n in top]
|
|
233
|
+
return {
|
|
234
|
+
"text": "\n".join(lines),
|
|
235
|
+
"nodes": top,
|
|
236
|
+
"confidence": "medium",
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _render_subgraph(result_nodes: list, all_edges: list, token_budget: int) -> str:
|
|
241
|
+
"""
|
|
242
|
+
Render a subgraph as structured plain text (graphify-style).
|
|
243
|
+
Format:
|
|
244
|
+
NODE name [type] src=file desc=...
|
|
245
|
+
EDGE from → to [relation]
|
|
246
|
+
Sorted by degree descending. Truncated to token_budget.
|
|
247
|
+
"""
|
|
248
|
+
if not result_nodes:
|
|
249
|
+
return ""
|
|
250
|
+
|
|
251
|
+
node_ids = {n["id"] for n in result_nodes}
|
|
252
|
+
# Degree within subgraph
|
|
253
|
+
degree: dict[str, int] = {n["id"]: 0 for n in result_nodes}
|
|
254
|
+
subedges = []
|
|
255
|
+
for e in all_edges:
|
|
256
|
+
if e.get("from") in node_ids and e.get("to") in node_ids:
|
|
257
|
+
subedges.append(e)
|
|
258
|
+
degree[e["from"]] = degree.get(e["from"], 0) + 1
|
|
259
|
+
degree[e["to"]] = degree.get(e["to"], 0) + 1
|
|
260
|
+
|
|
261
|
+
sorted_nodes = sorted(result_nodes, key=lambda n: -degree.get(n["id"], 0))
|
|
262
|
+
|
|
263
|
+
char_limit = token_budget * 4
|
|
264
|
+
lines = []
|
|
265
|
+
chars = 0
|
|
266
|
+
|
|
267
|
+
for n in sorted_nodes:
|
|
268
|
+
desc = n.get("description", "")
|
|
269
|
+
desc_part = f" desc={desc[:80]}" if desc else ""
|
|
270
|
+
line = f"NODE {n.get('name','?')} [{n.get('type','?')}] src={n.get('file','?')}{desc_part}"
|
|
271
|
+
if chars + len(line) > char_limit:
|
|
272
|
+
break
|
|
273
|
+
lines.append(line)
|
|
274
|
+
chars += len(line)
|
|
275
|
+
|
|
276
|
+
for e in subedges:
|
|
277
|
+
line = f"EDGE {e.get('from','')} → {e.get('to','')} [{e.get('relation','?')}]"
|
|
278
|
+
if chars + len(line) > char_limit:
|
|
279
|
+
break
|
|
280
|
+
lines.append(line)
|
|
281
|
+
chars += len(line)
|
|
282
|
+
|
|
283
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
report.py — generate REPORT.md from the built graph.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def generate(graph_dict: dict, project_root: str) -> str:
|
|
9
|
+
"""Write REPORT.md to project_root and return its content."""
|
|
10
|
+
content = _build_report(graph_dict)
|
|
11
|
+
cache_dir = Path(project_root) / "codegraph-cache"
|
|
12
|
+
cache_dir.mkdir(exist_ok=True)
|
|
13
|
+
out = cache_dir / "CODEGRAPH_REPORT.md"
|
|
14
|
+
out.write_text(content, encoding="utf-8")
|
|
15
|
+
return content
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _build_report(g: dict) -> str:
|
|
19
|
+
nodes = g.get("nodes", [])
|
|
20
|
+
edges = g.get("edges", [])
|
|
21
|
+
communities = g.get("communities", [])
|
|
22
|
+
god_nodes = g.get("god_nodes", [])
|
|
23
|
+
generated = g.get("generated_at", "")
|
|
24
|
+
|
|
25
|
+
node_map = {n["id"]: n for n in nodes}
|
|
26
|
+
|
|
27
|
+
lines = [
|
|
28
|
+
"# CodeGraph Report",
|
|
29
|
+
f"_Generated: {generated}_",
|
|
30
|
+
"",
|
|
31
|
+
f"**{len(nodes)} nodes** · **{len(edges)} edges** · **{len(communities)} communities**",
|
|
32
|
+
"",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
# God nodes
|
|
36
|
+
lines += ["## God Nodes", "", "_Highest-degree concepts everything flows through:_", ""]
|
|
37
|
+
if god_nodes:
|
|
38
|
+
for nid in god_nodes:
|
|
39
|
+
n = node_map.get(nid, {})
|
|
40
|
+
name = n.get("name", nid)
|
|
41
|
+
fpath = n.get("file", "")
|
|
42
|
+
deg = sum(1 for e in edges if e.get("from") == nid or e.get("to") == nid)
|
|
43
|
+
lines.append(f"- **{name}** `{fpath}` — {deg} connections")
|
|
44
|
+
else:
|
|
45
|
+
lines.append("_No god nodes identified._")
|
|
46
|
+
lines.append("")
|
|
47
|
+
|
|
48
|
+
# Communities
|
|
49
|
+
lines += ["## Community Clusters", ""]
|
|
50
|
+
if communities:
|
|
51
|
+
for c in communities[:10]:
|
|
52
|
+
members = c.get("members", [])
|
|
53
|
+
names = [node_map.get(m, {}).get("name", m) for m in members[:5]]
|
|
54
|
+
cluster_label = c.get("label") or f"Cluster {c['id']}"
|
|
55
|
+
lines.append(f"### {cluster_label} ({len(members)} nodes)")
|
|
56
|
+
lines.append(f"Members: {', '.join(names)}{' …' if len(members) > 5 else ''}")
|
|
57
|
+
lines.append("")
|
|
58
|
+
else:
|
|
59
|
+
lines.append("_No communities detected._")
|
|
60
|
+
lines.append("")
|
|
61
|
+
|
|
62
|
+
# Surprising cross-module connections
|
|
63
|
+
lines += ["## Surprising Connections", ""]
|
|
64
|
+
cross = _cross_module_edges(edges, node_map)
|
|
65
|
+
if cross:
|
|
66
|
+
for u_name, v_name, rel in cross[:8]:
|
|
67
|
+
lines.append(f"- **{u_name}** →({rel})→ **{v_name}**")
|
|
68
|
+
else:
|
|
69
|
+
lines.append("_None found._")
|
|
70
|
+
lines.append("")
|
|
71
|
+
|
|
72
|
+
# Suggested questions
|
|
73
|
+
lines += ["## Suggested Questions", ""]
|
|
74
|
+
questions = _suggest_questions(nodes, god_nodes, node_map)
|
|
75
|
+
for q in questions:
|
|
76
|
+
lines.append(f"- {q}")
|
|
77
|
+
lines.append("")
|
|
78
|
+
|
|
79
|
+
# Confidence breakdown
|
|
80
|
+
conf_counts = {}
|
|
81
|
+
for e in edges:
|
|
82
|
+
c = e.get("confidence", "UNKNOWN")
|
|
83
|
+
conf_counts[c] = conf_counts.get(c, 0) + 1
|
|
84
|
+
lines += ["## Confidence Breakdown", ""]
|
|
85
|
+
for label, count in sorted(conf_counts.items()):
|
|
86
|
+
lines.append(f"- **{label}**: {count} edges")
|
|
87
|
+
|
|
88
|
+
return "\n".join(lines)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _cross_module_edges(edges: list, node_map: dict) -> list[tuple]:
|
|
92
|
+
results = []
|
|
93
|
+
for e in edges:
|
|
94
|
+
u = node_map.get(e.get("from", ""), {})
|
|
95
|
+
v = node_map.get(e.get("to", ""), {})
|
|
96
|
+
u_file = (u.get("file", "") or "").split("/")[0]
|
|
97
|
+
v_file = (v.get("file", "") or "").split("/")[0]
|
|
98
|
+
if u_file and v_file and u_file != v_file:
|
|
99
|
+
results.append((u.get("name", "?"), v.get("name", "?"), e.get("relation", "?")))
|
|
100
|
+
return results[:8]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _suggest_questions(nodes: list, god_node_ids: list, node_map: dict) -> list[str]:
|
|
104
|
+
questions = []
|
|
105
|
+
if god_node_ids:
|
|
106
|
+
name = node_map.get(god_node_ids[0], {}).get("name", "")
|
|
107
|
+
if name:
|
|
108
|
+
questions.append(f"What does {name} depend on?")
|
|
109
|
+
questions.append(f"What uses {name}?")
|
|
110
|
+
classes = [n for n in nodes if n.get("type") == "class"][:2]
|
|
111
|
+
for c in classes:
|
|
112
|
+
questions.append(f"What is the relationship between {c['name']} and other modules?")
|
|
113
|
+
questions.append("Which files have the most connections?")
|
|
114
|
+
questions.append("Are there any circular dependencies?")
|
|
115
|
+
return questions[:5]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
scanner.py — walk a project directory, hash every file, detect what changed.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Iterator
|
|
8
|
+
|
|
9
|
+
from .cache import file_hash, get_cached_nodes, set_cached_nodes, remove_deleted, load_cache, save_cache
|
|
10
|
+
from .config import (
|
|
11
|
+
DEFAULT_IGNORE, MAX_FILE_BYTES,
|
|
12
|
+
CODE_EXTENSIONS, SQL_EXTENSIONS, CONFIG_EXTENSIONS,
|
|
13
|
+
DOC_EXTENSIONS, PDF_EXTENSIONS, IMAGE_EXTENSIONS,
|
|
14
|
+
AUDIO_EXTENSIONS, VIDEO_EXTENSIONS,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _should_ignore(name: str, ignore: set) -> bool:
|
|
19
|
+
return name.startswith(".") and name != ".env" or name in ignore
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def walk_files(root: str, extra_ignore: set | None = None) -> Iterator[str]:
|
|
23
|
+
"""Yield absolute paths to all non-ignored files under root."""
|
|
24
|
+
ignore = DEFAULT_IGNORE | (extra_ignore or set())
|
|
25
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
26
|
+
# Prune ignored dirs in-place so os.walk doesn't descend
|
|
27
|
+
dirnames[:] = [d for d in dirnames if d not in ignore and not d.startswith(".")]
|
|
28
|
+
for fname in filenames:
|
|
29
|
+
abs_path = os.path.join(dirpath, fname)
|
|
30
|
+
try:
|
|
31
|
+
if os.path.getsize(abs_path) > MAX_FILE_BYTES:
|
|
32
|
+
continue
|
|
33
|
+
except OSError:
|
|
34
|
+
continue
|
|
35
|
+
yield abs_path
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def classify_file(path: str) -> str:
|
|
39
|
+
"""Return extraction category for a file."""
|
|
40
|
+
ext = Path(path).suffix.lower()
|
|
41
|
+
if ext in CODE_EXTENSIONS: return "code"
|
|
42
|
+
if ext in SQL_EXTENSIONS: return "sql"
|
|
43
|
+
if ext in CONFIG_EXTENSIONS: return "config"
|
|
44
|
+
if ext in DOC_EXTENSIONS: return "doc"
|
|
45
|
+
if ext in PDF_EXTENSIONS: return "pdf"
|
|
46
|
+
if ext in IMAGE_EXTENSIONS: return "image"
|
|
47
|
+
if ext in AUDIO_EXTENSIONS: return "audio"
|
|
48
|
+
if ext in VIDEO_EXTENSIONS: return "video"
|
|
49
|
+
return "unknown"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def scan(project_root: str, extra_ignore: set | None = None) -> dict:
|
|
53
|
+
"""
|
|
54
|
+
Walk project, diff against cache.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
{
|
|
58
|
+
"cached": { rel_path: [nodes] }, # unchanged — load from cache
|
|
59
|
+
"changed": { rel_path: abs_path }, # hash changed — need re-extraction
|
|
60
|
+
"deleted": [rel_path], # in cache but file gone
|
|
61
|
+
"cache": dict, # current cache (mutated in place)
|
|
62
|
+
}
|
|
63
|
+
"""
|
|
64
|
+
root = os.path.abspath(project_root)
|
|
65
|
+
cache = load_cache(root)
|
|
66
|
+
|
|
67
|
+
cached = {}
|
|
68
|
+
changed = {}
|
|
69
|
+
existing_rel = set()
|
|
70
|
+
|
|
71
|
+
for abs_path in walk_files(root, extra_ignore):
|
|
72
|
+
rel_path = os.path.relpath(abs_path, root).replace("\\", "/")
|
|
73
|
+
existing_rel.add(rel_path)
|
|
74
|
+
category = classify_file(abs_path)
|
|
75
|
+
if category == "unknown":
|
|
76
|
+
continue
|
|
77
|
+
h = file_hash(abs_path)
|
|
78
|
+
nodes = get_cached_nodes(cache, rel_path, h)
|
|
79
|
+
if nodes is not None:
|
|
80
|
+
cached[rel_path] = nodes
|
|
81
|
+
else:
|
|
82
|
+
changed[rel_path] = abs_path
|
|
83
|
+
|
|
84
|
+
deleted = remove_deleted(cache, existing_rel)
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
"cached": cached,
|
|
88
|
+
"changed": changed,
|
|
89
|
+
"deleted": deleted,
|
|
90
|
+
"cache": cache,
|
|
91
|
+
"root": root,
|
|
92
|
+
}
|