context-mcp-server 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +464 -0
- package/codegraph/__init__.py +0 -0
- package/codegraph/__main__.py +24 -0
- package/codegraph/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/__main__.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/cache.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/config.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/report.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/scanner.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
- package/codegraph/cache.py +137 -0
- package/codegraph/config.py +31 -0
- package/codegraph/extractors/__init__.py +0 -0
- package/codegraph/extractors/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/audio_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/doc_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/image_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/ast_extractor.py +222 -0
- package/codegraph/extractors/audio_extractor.py +8 -0
- package/codegraph/extractors/doc_extractor.py +34 -0
- package/codegraph/extractors/image_extractor.py +26 -0
- package/codegraph/graph/__init__.py +0 -0
- package/codegraph/graph/__pycache__/__init__.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/builder.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/clustering.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/query.cpython-313.pyc +0 -0
- package/codegraph/graph/builder.py +145 -0
- package/codegraph/graph/clustering.py +40 -0
- package/codegraph/graph/query.py +283 -0
- package/codegraph/report.py +115 -0
- package/codegraph/scanner.py +92 -0
- package/codegraph/server.py +514 -0
- package/package.json +62 -0
- package/src/cli.js +1010 -0
- package/src/config.js +89 -0
- package/src/db.js +786 -0
- package/src/guard.js +20 -0
- package/src/hooks/autoContext.js +17 -0
- package/src/hooks/autoLink.js +7 -0
- package/src/http.js +765 -0
- package/src/index.js +47 -0
- package/src/search.js +50 -0
- package/src/server.js +80 -0
- package/src/summarizer.js +124 -0
- package/src/templates/AGENTS.md +76 -0
- package/src/templates/CLAUDE.md +94 -0
- package/src/templates/GEMINI.md +76 -0
- package/src/templates/cursor-rules.mdc +41 -0
- package/src/templates/windsurf-rules.md +35 -0
- package/src/tools/codegraph.js +215 -0
- package/src/tools/context.js +188 -0
- package/src/tools/discussion.js +123 -0
- package/src/tools/errorCheck.js +65 -0
- package/src/tools/fileTools.js +185 -0
- package/src/tools/gitTools.js +259 -0
- package/src/tools/search.js +55 -0
- package/src/vector.js +153 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ast_extractor.py — extract nodes from code files using tree-sitter AST.
|
|
3
|
+
|
|
4
|
+
Falls back to regex if tree-sitter grammars aren't installed.
|
|
5
|
+
Each node: { id, name, type, file, line, docstring?, calls?, imports? }
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ── tree-sitter setup (optional — graceful fallback) ─────────────────────────
|
|
13
|
+
|
|
14
|
+
def _try_load_ts():
|
|
15
|
+
try:
|
|
16
|
+
import tree_sitter_python as tspython
|
|
17
|
+
import tree_sitter_javascript as tsjavascript
|
|
18
|
+
from tree_sitter import Language, Parser
|
|
19
|
+
PY_LANG = Language(tspython.language())
|
|
20
|
+
JS_LANG = Language(tsjavascript.language())
|
|
21
|
+
return {"python": (PY_LANG, Parser(PY_LANG)), "javascript": (JS_LANG, Parser(JS_LANG))}
|
|
22
|
+
except ImportError:
|
|
23
|
+
return {}
|
|
24
|
+
|
|
25
|
+
_TS_PARSERS = _try_load_ts()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ── tree-sitter queries ───────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
_PY_CLASS_QUERY = """
|
|
31
|
+
(class_definition name: (identifier) @name) @class
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
_PY_FUNC_QUERY = """
|
|
35
|
+
(function_definition name: (identifier) @name) @func
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
_JS_CLASS_QUERY = """
|
|
39
|
+
(class_declaration name: (identifier) @name) @class
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
_JS_FUNC_QUERY = """
|
|
43
|
+
[
|
|
44
|
+
(function_declaration name: (identifier) @name)
|
|
45
|
+
(method_definition name: (property_identifier) @name)
|
|
46
|
+
] @func
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _ts_extract(source: bytes, lang_key: str, rel_path: str) -> list:
|
|
51
|
+
parsers = _TS_PARSERS
|
|
52
|
+
if lang_key not in parsers:
|
|
53
|
+
return []
|
|
54
|
+
lang, parser = parsers[lang_key]
|
|
55
|
+
tree = parser.parse(source)
|
|
56
|
+
|
|
57
|
+
nodes = []
|
|
58
|
+
lines = source.decode("utf-8", errors="replace").splitlines()
|
|
59
|
+
|
|
60
|
+
def _node(kind, name, line):
|
|
61
|
+
return {
|
|
62
|
+
"id": f"{rel_path}::{kind}::{name}",
|
|
63
|
+
"name": name,
|
|
64
|
+
"type": kind,
|
|
65
|
+
"file": rel_path,
|
|
66
|
+
"line": line,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def _iter_captures(query, root):
|
|
70
|
+
"""Yield (capture_name, tree_node) pairs; compatible with tree-sitter >=0.20."""
|
|
71
|
+
try:
|
|
72
|
+
# tree-sitter >= 0.22: matches() returns list of (pattern_idx, {name: [Node]})
|
|
73
|
+
for _pat_idx, caps in query.matches(root):
|
|
74
|
+
for cap_name, cap_nodes in caps.items():
|
|
75
|
+
for n in (cap_nodes if isinstance(cap_nodes, list) else [cap_nodes]):
|
|
76
|
+
yield cap_name, n
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
# Classes
|
|
81
|
+
try:
|
|
82
|
+
query = lang.query(_PY_CLASS_QUERY if lang_key == "python" else _JS_CLASS_QUERY)
|
|
83
|
+
for cap_name, node in _iter_captures(query, tree.root_node):
|
|
84
|
+
if cap_name == "name" and node.type == "identifier":
|
|
85
|
+
nodes.append(_node("class", node.text.decode(), node.start_point[0] + 1))
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
# Functions
|
|
90
|
+
try:
|
|
91
|
+
query = lang.query(_PY_FUNC_QUERY if lang_key == "python" else _JS_FUNC_QUERY)
|
|
92
|
+
for cap_name, node in _iter_captures(query, tree.root_node):
|
|
93
|
+
if cap_name == "name" and node.type in ("identifier", "property_identifier"):
|
|
94
|
+
nodes.append(_node("function", node.text.decode(), node.start_point[0] + 1))
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
return nodes
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ── Regex fallback ────────────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
_PATTERNS = {
|
|
104
|
+
"python": {
|
|
105
|
+
"class": re.compile(r"^class\s+(\w+)", re.MULTILINE),
|
|
106
|
+
"function": re.compile(r"^def\s+(\w+)", re.MULTILINE),
|
|
107
|
+
"import": re.compile(r"^(?:import|from)\s+([\w.]+)", re.MULTILINE),
|
|
108
|
+
},
|
|
109
|
+
"javascript": {
|
|
110
|
+
"class": re.compile(r"\bclass\s+(\w+)", re.MULTILINE),
|
|
111
|
+
"function": re.compile(r"\bfunction\s+(\w+)", re.MULTILINE),
|
|
112
|
+
"import": re.compile(r"^import\s+.*?from\s+['\"](.+?)['\"]", re.MULTILINE),
|
|
113
|
+
},
|
|
114
|
+
"go": {
|
|
115
|
+
"function": re.compile(r"^func\s+(?:\(\w+\s+\*?\w+\)\s+)?(\w+)", re.MULTILINE),
|
|
116
|
+
"struct": re.compile(r"^type\s+(\w+)\s+struct", re.MULTILINE),
|
|
117
|
+
"import": re.compile(r'"([\w./]+)"', re.MULTILINE),
|
|
118
|
+
},
|
|
119
|
+
"rust": {
|
|
120
|
+
"function": re.compile(r"^(?:pub\s+)?fn\s+(\w+)", re.MULTILINE),
|
|
121
|
+
"struct": re.compile(r"^(?:pub\s+)?struct\s+(\w+)", re.MULTILINE),
|
|
122
|
+
"import": re.compile(r"^use\s+([\w:]+)", re.MULTILINE),
|
|
123
|
+
},
|
|
124
|
+
"java": {
|
|
125
|
+
"class": re.compile(r"\bclass\s+(\w+)", re.MULTILINE),
|
|
126
|
+
"function": re.compile(r"(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+(\w+)\s*\(", re.MULTILINE),
|
|
127
|
+
"import": re.compile(r"^import\s+([\w.]+);", re.MULTILINE),
|
|
128
|
+
},
|
|
129
|
+
"ruby": {
|
|
130
|
+
"class": re.compile(r"^class\s+(\w+)", re.MULTILINE),
|
|
131
|
+
"function": re.compile(r"^\s*def\s+(\w+)", re.MULTILINE),
|
|
132
|
+
},
|
|
133
|
+
"sql": {
|
|
134
|
+
"table": re.compile(r"CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`\"]?(\w+)[`\"]?", re.IGNORECASE),
|
|
135
|
+
"index": re.compile(r"CREATE\s+(?:UNIQUE\s+)?INDEX\s+\w+\s+ON\s+[`\"]?(\w+)[`\"]?", re.IGNORECASE),
|
|
136
|
+
},
|
|
137
|
+
# config files: no node extraction — file node created by scanner
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _ext_to_lang(ext: str) -> str:
|
|
142
|
+
return {
|
|
143
|
+
".py": "python", ".pyw": "python",
|
|
144
|
+
".js": "javascript", ".mjs": "javascript", ".cjs": "javascript",
|
|
145
|
+
".jsx": "javascript", ".ts": "javascript", ".tsx": "javascript",
|
|
146
|
+
".go": "go", ".rs": "rust",
|
|
147
|
+
".java": "java", ".rb": "ruby",
|
|
148
|
+
".sql": "sql",
|
|
149
|
+
".yaml": "config", ".yml": "config", ".toml": "config",
|
|
150
|
+
".env": "config", ".ini": "config", ".cfg": "config",
|
|
151
|
+
}.get(ext, "")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _regex_extract(text: str, lang: str, rel_path: str) -> list:
|
|
155
|
+
patterns = _PATTERNS.get(lang, {})
|
|
156
|
+
children = []
|
|
157
|
+
for kind, pat in patterns.items():
|
|
158
|
+
if kind == "import":
|
|
159
|
+
continue
|
|
160
|
+
for m in pat.finditer(text):
|
|
161
|
+
line = text[:m.start()].count("\n") + 1
|
|
162
|
+
children.append({
|
|
163
|
+
"id": f"{rel_path}::{kind}::{m.group(1)}",
|
|
164
|
+
"name": m.group(1),
|
|
165
|
+
"type": kind,
|
|
166
|
+
"file": rel_path,
|
|
167
|
+
"line": line,
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
imp_pat = patterns.get("import")
|
|
171
|
+
imports = [m.group(1) for m in imp_pat.finditer(text)][:30] if imp_pat else []
|
|
172
|
+
|
|
173
|
+
return _wrap_in_module(rel_path, children, imports)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ── Module wrapper ───────────────────────────────────────────────────────────
|
|
177
|
+
|
|
178
|
+
def _wrap_in_module(rel_path: str, children: list, imports: list) -> list:
|
|
179
|
+
"""
|
|
180
|
+
Create a module node for the file, link all child nodes to it via defined_in.
|
|
181
|
+
The module node carries the imports so builder can create file-to-file edges.
|
|
182
|
+
"""
|
|
183
|
+
from pathlib import Path as _Path
|
|
184
|
+
stem = _Path(rel_path).stem
|
|
185
|
+
mod_id = f"{rel_path}::module::{stem}"
|
|
186
|
+
module = {
|
|
187
|
+
"id": mod_id,
|
|
188
|
+
"name": stem,
|
|
189
|
+
"type": "module",
|
|
190
|
+
"file": rel_path,
|
|
191
|
+
"line": 1,
|
|
192
|
+
"imports": imports,
|
|
193
|
+
}
|
|
194
|
+
for child in children:
|
|
195
|
+
child["relations"] = [{"id": mod_id, "relation": "defined-in", "confidence": "EXTRACTED"}]
|
|
196
|
+
return [module] + children
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# ── Public API ────────────────────────────────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
def extract(abs_path: str, rel_path: str) -> list:
|
|
202
|
+
"""Extract nodes from a code/sql/config file. Returns list of node dicts."""
|
|
203
|
+
ext = Path(abs_path).suffix.lower()
|
|
204
|
+
lang = _ext_to_lang(ext)
|
|
205
|
+
if not lang:
|
|
206
|
+
return []
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
raw = open(abs_path, "rb").read()
|
|
210
|
+
text = raw.decode("utf-8", errors="replace")
|
|
211
|
+
except OSError:
|
|
212
|
+
return []
|
|
213
|
+
|
|
214
|
+
# tree-sitter for Python and JS/TS if available
|
|
215
|
+
if lang in ("python", "javascript") and lang in _TS_PARSERS:
|
|
216
|
+
children = _ts_extract(raw, lang, rel_path)
|
|
217
|
+
if children:
|
|
218
|
+
imp_pat = _PATTERNS.get(lang, {}).get("import")
|
|
219
|
+
imports = [m.group(1) for m in imp_pat.finditer(text)][:30] if imp_pat else []
|
|
220
|
+
return _wrap_in_module(rel_path, children, imports)
|
|
221
|
+
|
|
222
|
+
return _regex_extract(text, lang, rel_path)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
doc_extractor.py — extract plain text from doc and PDF files.
|
|
3
|
+
PDF extraction uses pymupdf if installed; falls back to label-only otherwise.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def extract_text(path: str) -> str:
|
|
10
|
+
"""Return text content of a doc/PDF file. Truncated at DOC_MAX_CHARS."""
|
|
11
|
+
from ..config import DOC_MAX_CHARS
|
|
12
|
+
if path.lower().endswith(".pdf"):
|
|
13
|
+
return _extract_pdf(path, DOC_MAX_CHARS)
|
|
14
|
+
try:
|
|
15
|
+
return Path(path).read_text(encoding="utf-8", errors="replace")[:DOC_MAX_CHARS]
|
|
16
|
+
except OSError:
|
|
17
|
+
return ""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _extract_pdf(path: str, max_chars: int) -> str:
|
|
21
|
+
try:
|
|
22
|
+
import pymupdf # optional dep
|
|
23
|
+
doc = pymupdf.open(path)
|
|
24
|
+
parts = []
|
|
25
|
+
for page in doc:
|
|
26
|
+
parts.append(page.get_text())
|
|
27
|
+
if sum(len(p) for p in parts) >= max_chars:
|
|
28
|
+
break
|
|
29
|
+
doc.close()
|
|
30
|
+
return "".join(parts)[:max_chars]
|
|
31
|
+
except ImportError:
|
|
32
|
+
return f"[PDF: {Path(path).name} — install pymupdf to extract text]"
|
|
33
|
+
except Exception:
|
|
34
|
+
return ""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
image_extractor.py — encode images as base64 for AI vision.
|
|
3
|
+
No external deps — stdlib only.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import base64
|
|
7
|
+
import mimetypes
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def extract_image_b64(path: str) -> dict | None:
|
|
12
|
+
"""Return {"data": base64_str, "media_type": "image/png"} or None on failure."""
|
|
13
|
+
try:
|
|
14
|
+
data = Path(path).read_bytes()
|
|
15
|
+
media_type = mimetypes.guess_type(path)[0] or "image/png"
|
|
16
|
+
return {"data": base64.b64encode(data).decode(), "media_type": media_type}
|
|
17
|
+
except OSError:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_svg_text(path: str) -> str:
|
|
22
|
+
"""Return SVG file as plain text (SVGs are XML — readable as-is)."""
|
|
23
|
+
try:
|
|
24
|
+
return Path(path).read_text(encoding="utf-8", errors="replace")[:4000]
|
|
25
|
+
except OSError:
|
|
26
|
+
return ""
|
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""
|
|
2
|
+
graph/builder.py — build a NetworkX directed graph from extracted nodes.
|
|
3
|
+
|
|
4
|
+
Node attributes: id, name, type, file, line, community
|
|
5
|
+
Edge attributes: relation, confidence (EXTRACTED | INFERRED | AMBIGUOUS)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import networkx as nx
|
|
13
|
+
_HAS_NX = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
_HAS_NX = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def build(all_nodes: list[dict]) -> "nx.DiGraph | dict":
|
|
19
|
+
"""
|
|
20
|
+
Build graph from flat node list. Returns nx.DiGraph or plain dict fallback.
|
|
21
|
+
Edges are created from node.imports[] and node.calls[] fields.
|
|
22
|
+
"""
|
|
23
|
+
if not _HAS_NX:
|
|
24
|
+
return _dict_graph(all_nodes)
|
|
25
|
+
|
|
26
|
+
G = nx.DiGraph()
|
|
27
|
+
|
|
28
|
+
node_by_name: dict[str, str] = {} # name -> id
|
|
29
|
+
module_by_file: dict[str, str] = {} # rel_path -> module node id
|
|
30
|
+
|
|
31
|
+
for node in all_nodes:
|
|
32
|
+
nid = node.get("id", "")
|
|
33
|
+
if not nid:
|
|
34
|
+
continue
|
|
35
|
+
G.add_node(nid, **{k: v for k, v in node.items() if k not in ("imports", "calls", "relations")})
|
|
36
|
+
node_by_name[node.get("name", "")] = nid
|
|
37
|
+
if node.get("type") == "module":
|
|
38
|
+
module_by_file[node.get("file", "")] = nid
|
|
39
|
+
|
|
40
|
+
# Build file-path lookup from module nodes
|
|
41
|
+
file_node: dict[str, str] = {}
|
|
42
|
+
for rel_path, mod_id in module_by_file.items():
|
|
43
|
+
p = rel_path.replace("\\", "/")
|
|
44
|
+
stem = p.split("/")[-1].split(".")[0]
|
|
45
|
+
base = p.split("/")[-1]
|
|
46
|
+
for key in (stem, base, p):
|
|
47
|
+
file_node.setdefault(key, mod_id)
|
|
48
|
+
|
|
49
|
+
# defined-in edges: child nodes → their module
|
|
50
|
+
for node in all_nodes:
|
|
51
|
+
nid = node.get("id", "")
|
|
52
|
+
for rel in node.get("relations", []):
|
|
53
|
+
target_id = rel.get("id") or node_by_name.get(rel.get("name", ""))
|
|
54
|
+
if target_id and target_id != nid:
|
|
55
|
+
G.add_edge(nid, target_id,
|
|
56
|
+
relation=rel.get("relation", "relates-to"),
|
|
57
|
+
confidence=rel.get("confidence", "EXTRACTED"))
|
|
58
|
+
|
|
59
|
+
# Import edges: module → module
|
|
60
|
+
seen_edges: set[tuple] = set()
|
|
61
|
+
for node in all_nodes:
|
|
62
|
+
if node.get("type") != "module":
|
|
63
|
+
continue
|
|
64
|
+
src_id = node.get("id", "")
|
|
65
|
+
for imp in node.get("imports", []):
|
|
66
|
+
clean = imp.lstrip(".")
|
|
67
|
+
parts = clean.replace("\\", "/").split("/")
|
|
68
|
+
last = parts[-1]
|
|
69
|
+
stem = last.split(".")[0]
|
|
70
|
+
for c in (clean, last, stem):
|
|
71
|
+
if not c:
|
|
72
|
+
continue
|
|
73
|
+
target = file_node.get(c) or node_by_name.get(c)
|
|
74
|
+
if target and target != src_id:
|
|
75
|
+
key = (src_id, target)
|
|
76
|
+
if key not in seen_edges:
|
|
77
|
+
seen_edges.add(key)
|
|
78
|
+
G.add_edge(src_id, target, relation="imports", confidence="EXTRACTED")
|
|
79
|
+
break
|
|
80
|
+
|
|
81
|
+
# Edges from explicit relations (concept nodes from LLM)
|
|
82
|
+
for node in all_nodes:
|
|
83
|
+
nid = node.get("id", "")
|
|
84
|
+
for rel in node.get("relations", []):
|
|
85
|
+
target_id = rel.get("id") or node_by_name.get(rel.get("name", ""))
|
|
86
|
+
if target_id and target_id != nid:
|
|
87
|
+
G.add_edge(nid, target_id,
|
|
88
|
+
relation=rel.get("relation", "relates-to"),
|
|
89
|
+
confidence=rel.get("confidence", "INFERRED"))
|
|
90
|
+
|
|
91
|
+
return G
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _dict_graph(all_nodes: list[dict]) -> dict:
|
|
95
|
+
"""Fallback when networkx not installed."""
|
|
96
|
+
nodes = []
|
|
97
|
+
edges = []
|
|
98
|
+
seen = set()
|
|
99
|
+
for node in all_nodes:
|
|
100
|
+
nid = node.get("id", "")
|
|
101
|
+
if nid in seen:
|
|
102
|
+
continue
|
|
103
|
+
seen.add(nid)
|
|
104
|
+
nodes.append({k: v for k, v in node.items() if k not in ("imports", "calls", "relations")})
|
|
105
|
+
for imp in node.get("imports", []):
|
|
106
|
+
edges.append({"from": nid, "to": imp, "relation": "imports", "confidence": "EXTRACTED"})
|
|
107
|
+
return {"nodes": nodes, "edges": edges, "communities": [], "god_nodes": []}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def to_json_dict(G) -> dict:
|
|
111
|
+
"""Serialize graph to the graph.json schema."""
|
|
112
|
+
if isinstance(G, dict):
|
|
113
|
+
return G # fallback path
|
|
114
|
+
|
|
115
|
+
nodes = [{"id": nid, **data} for nid, data in G.nodes(data=True)]
|
|
116
|
+
edges = [{"from": u, "to": v, **data} for u, v, data in G.edges(data=True)]
|
|
117
|
+
|
|
118
|
+
# God nodes = highest degree
|
|
119
|
+
degrees = sorted(G.degree(), key=lambda x: x[1], reverse=True)
|
|
120
|
+
god_nodes = [n for n, d in degrees[:5] if d > 2]
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
"nodes": nodes,
|
|
124
|
+
"edges": edges,
|
|
125
|
+
"communities": G.graph.get("communities", []),
|
|
126
|
+
"god_nodes": god_nodes,
|
|
127
|
+
"generated_at": __import__("datetime").datetime.utcnow().isoformat() + "Z",
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def save_graph(project_root: str, graph_dict: dict) -> str:
|
|
132
|
+
out = Path(project_root) / "codegraph-cache" / "graph.json"
|
|
133
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
out.write_text(json.dumps(graph_dict, indent=2), encoding="utf-8")
|
|
135
|
+
return str(out)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def load_graph(project_root: str) -> dict | None:
|
|
139
|
+
p = Path(project_root) / "codegraph-cache" / "graph.json"
|
|
140
|
+
if not p.exists():
|
|
141
|
+
return None
|
|
142
|
+
try:
|
|
143
|
+
return json.loads(p.read_text(encoding="utf-8"))
|
|
144
|
+
except Exception:
|
|
145
|
+
return None
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
graph/clustering.py — community detection using NetworkX connected-components.
|
|
3
|
+
No external deps beyond networkx (already required).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def detect_communities(G) -> list[dict]:
|
|
8
|
+
"""Assign community IDs to graph nodes. Returns list of community dicts."""
|
|
9
|
+
try:
|
|
10
|
+
import networkx as nx
|
|
11
|
+
except ImportError:
|
|
12
|
+
return []
|
|
13
|
+
|
|
14
|
+
if G.number_of_nodes() == 0:
|
|
15
|
+
return []
|
|
16
|
+
|
|
17
|
+
undirected = G.to_undirected()
|
|
18
|
+
communities = []
|
|
19
|
+
for comm_id, component in enumerate(nx.connected_components(undirected)):
|
|
20
|
+
member_ids = list(component)
|
|
21
|
+
label = _community_label(G, member_ids)
|
|
22
|
+
communities.append({"id": comm_id, "label": label, "members": member_ids})
|
|
23
|
+
for nid in member_ids:
|
|
24
|
+
if G.has_node(nid):
|
|
25
|
+
G.nodes[nid]["community"] = comm_id
|
|
26
|
+
|
|
27
|
+
G.graph["communities"] = communities
|
|
28
|
+
return communities
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _community_label(G, member_ids: list) -> str:
|
|
32
|
+
files = []
|
|
33
|
+
for nid in member_ids:
|
|
34
|
+
if G.has_node(nid):
|
|
35
|
+
f = G.nodes[nid].get("file", "")
|
|
36
|
+
if f:
|
|
37
|
+
files.append(f.split("/")[0])
|
|
38
|
+
if not files:
|
|
39
|
+
return "misc"
|
|
40
|
+
return max(set(files), key=files.count)
|