context-mcp-server 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -7
- package/codegraph/__pycache__/affected.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/cache.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/callflow_html.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/export.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/report.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/tree_html.cpython-313.pyc +0 -0
- package/codegraph/affected.py +233 -0
- package/codegraph/cache.py +51 -2
- package/codegraph/callflow_html.py +273 -0
- package/codegraph/export.py +544 -0
- package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/ast_extractor.py +143 -16
- package/codegraph/graph/__pycache__/builder.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/clustering.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/query.cpython-313.pyc +0 -0
- package/codegraph/graph/__pycache__/symbol_resolution.cpython-313.pyc +0 -0
- package/codegraph/graph/builder.py +10 -0
- package/codegraph/graph/clustering.py +247 -10
- package/codegraph/graph/query.py +56 -0
- package/codegraph/graph/symbol_resolution.py +112 -0
- package/codegraph/report.py +53 -0
- package/codegraph/server.py +99 -10
- package/codegraph/tree_html.py +241 -0
- package/package.json +2 -2
- package/pyproject.toml +4 -1
- package/src/cli.js +277 -86
- package/src/server.js +7 -1
- package/src/templates/antigravity/GEMINI.md +96 -0
- package/src/templates/antigravity/hooks/context-mcp-post-tool-use.js +62 -0
- package/src/templates/antigravity/workflows/context-resume.md +20 -0
- package/src/templates/antigravity/workflows/graph-build.md +23 -0
- package/src/templates/antigravity/workflows/save-context.md +29 -0
- package/src/templates/{CLAUDE.md → claude/CLAUDE.md} +3 -0
- package/src/templates/claude/commands/graph-build.md +9 -0
- package/src/templates/claude/commands/save-context.md +19 -0
- package/src/templates/claude/hooks/context-mcp-post-tool-use.js +59 -0
- package/src/templates/claude/hooks/context-mcp-pre-tool-use.js +26 -0
- package/src/templates/{skills → claude/skills}/SKILL.md +3 -0
- package/src/templates/codex/AGENTS.md +107 -0
- package/src/templates/codex/hooks/context-mcp-post-tool-use.js +46 -0
- package/src/templates/codex/hooks/context-mcp-pre-tool-use.js +23 -0
- package/src/templates/codex/prompts/context-resume.md +15 -0
- package/src/templates/codex/prompts/graph-build.md +14 -0
- package/src/templates/codex/prompts/save-context.md +24 -0
- package/src/templates/cursor/commands/context-resume.md +7 -0
- package/src/templates/cursor/commands/graph-build.md +7 -0
- package/src/templates/cursor/commands/save-context.md +12 -0
- package/src/templates/{cursor-rules.mdc → cursor/cursor-rules.mdc} +13 -3
- package/src/templates/cursor/hooks/context-mcp-post-tool-use.js +55 -0
- package/src/templates/{GEMINI.md → gemini/GEMINI.md} +3 -1
- package/src/templates/gemini/commands/context-resume.toml +15 -0
- package/src/templates/gemini/commands/graph-build.toml +14 -0
- package/src/templates/gemini/commands/save-context.toml +24 -0
- package/src/templates/gemini/hooks/context-mcp-after-tool.js +59 -0
- package/src/templates/gemini/hooks/context-mcp-before-tool.js +26 -0
- package/src/templates/vscode/commands/context-resume.prompt.md +15 -0
- package/src/templates/vscode/commands/graph-build.prompt.md +10 -0
- package/src/templates/vscode/commands/save-context.prompt.md +16 -0
- package/src/templates/vscode/hooks/context-mcp-post-tool-use.js +58 -0
- package/src/templates/windsurf/hooks/context-mcp-post-run-command.js +57 -0
- package/src/templates/{windsurf-rules.md → windsurf/windsurf-rules.md} +6 -4
- package/src/templates/windsurf/workflows/context-resume.md +11 -0
- package/src/templates/windsurf/workflows/graph-build.md +11 -0
- package/src/templates/windsurf/workflows/save-context.md +18 -0
- package/src/tools/codegraph.js +37 -0
- package/uv.lock +1100 -3
- package/src/templates/AGENTS.md +0 -90
- package/src/templates/commands/graph-build.md +0 -5
- package/src/templates/commands/save-context.md +0 -12
- /package/src/templates/{commands → claude/commands}/context-resume.md +0 -0
|
@@ -282,18 +282,24 @@ def _extract_with_treesitter(source: bytes, rel_path: str, cfg: dict) -> list[di
|
|
|
282
282
|
|
|
283
283
|
nodes: list[dict] = []
|
|
284
284
|
seen: set[str] = set()
|
|
285
|
+
node_by_name: dict[str, dict] = {}
|
|
285
286
|
|
|
286
|
-
def _add(name: str, ntype: str, line: int):
|
|
287
|
+
def _add(name: str, ntype: str, line: int) -> dict | None:
|
|
287
288
|
if not name or name in seen:
|
|
288
|
-
return
|
|
289
|
+
return None
|
|
289
290
|
seen.add(name)
|
|
290
|
-
|
|
291
|
-
"id":
|
|
292
|
-
"name":
|
|
293
|
-
"type":
|
|
294
|
-
"file":
|
|
295
|
-
"line":
|
|
296
|
-
|
|
291
|
+
entry = {
|
|
292
|
+
"id": f"{rel_path}::{ntype}::{name}",
|
|
293
|
+
"name": name,
|
|
294
|
+
"type": ntype,
|
|
295
|
+
"file": rel_path,
|
|
296
|
+
"line": line + 1,
|
|
297
|
+
"calls": [],
|
|
298
|
+
"imports": [],
|
|
299
|
+
}
|
|
300
|
+
nodes.append(entry)
|
|
301
|
+
node_by_name[name] = entry
|
|
302
|
+
return entry
|
|
297
303
|
|
|
298
304
|
for node in _walk(root, cfg["function_types"]):
|
|
299
305
|
name = _get_name(node, cfg["name_field"])
|
|
@@ -305,6 +311,30 @@ def _extract_with_treesitter(source: bytes, rel_path: str, cfg: dict) -> list[di
|
|
|
305
311
|
if name:
|
|
306
312
|
_add(name, "class", node.start_point[0])
|
|
307
313
|
|
|
314
|
+
# Associate call expressions with their enclosing function
|
|
315
|
+
for node in _walk(root, cfg["call_types"]):
|
|
316
|
+
callee = _get_call_name(node, cfg["call_field"])
|
|
317
|
+
if not callee:
|
|
318
|
+
continue
|
|
319
|
+
enclosing = _find_enclosing_function(node, cfg["function_types"], cfg["name_field"])
|
|
320
|
+
if enclosing and enclosing in node_by_name:
|
|
321
|
+
calls_list = node_by_name[enclosing]["calls"]
|
|
322
|
+
if callee not in calls_list:
|
|
323
|
+
calls_list.append(callee)
|
|
324
|
+
|
|
325
|
+
# Collect file-level imports and attach to every node in this file
|
|
326
|
+
import_names: list[str] = []
|
|
327
|
+
for node in _walk(root, cfg["import_types"]):
|
|
328
|
+
text = node.text.decode("utf-8", errors="ignore").strip()
|
|
329
|
+
m = re.match(r'(?:import|from)\s+([\w./"\']+)', text)
|
|
330
|
+
if m:
|
|
331
|
+
raw = m.group(1).strip("\"'").split(".")[0].split("/")[-1]
|
|
332
|
+
if raw and raw not in import_names:
|
|
333
|
+
import_names.append(raw)
|
|
334
|
+
if import_names:
|
|
335
|
+
for entry in nodes:
|
|
336
|
+
entry["imports"] = import_names[:]
|
|
337
|
+
|
|
308
338
|
return nodes
|
|
309
339
|
|
|
310
340
|
|
|
@@ -382,16 +412,106 @@ _EXT_TO_LANG_NAME: dict[str, str] = {
|
|
|
382
412
|
}
|
|
383
413
|
|
|
384
414
|
|
|
415
|
+
# Generic call pattern: word immediately followed by (
|
|
416
|
+
_CALL_RE = re.compile(r'\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(')
|
|
417
|
+
|
|
418
|
+
# Language-specific import patterns (group 1 or 2 = module name)
|
|
419
|
+
_IMPORT_RE: dict[str, re.Pattern] = {
|
|
420
|
+
"javascript": re.compile(
|
|
421
|
+
r'(?:import\s+[\s\S]*?from\s+["\']([^"\']+)["\']'
|
|
422
|
+
r'|require\s*\(\s*["\']([^"\']+)["\']\s*\))',
|
|
423
|
+
re.MULTILINE,
|
|
424
|
+
),
|
|
425
|
+
"typescript": re.compile(
|
|
426
|
+
r'(?:import\s+[\s\S]*?from\s+["\']([^"\']+)["\']'
|
|
427
|
+
r'|require\s*\(\s*["\']([^"\']+)["\']\s*\))',
|
|
428
|
+
re.MULTILINE,
|
|
429
|
+
),
|
|
430
|
+
"python": re.compile(
|
|
431
|
+
r'^\s*(?:import\s+([\w.]+)|from\s+([\w.]+)\s+import)',
|
|
432
|
+
re.MULTILINE,
|
|
433
|
+
),
|
|
434
|
+
"go": re.compile(r'"([^"]+)"', re.MULTILINE),
|
|
435
|
+
"rust": re.compile(r'use\s+([\w:]+)', re.MULTILINE),
|
|
436
|
+
"java": re.compile(r'import\s+([\w.]+)', re.MULTILINE),
|
|
437
|
+
"csharp": re.compile(r'using\s+([\w.]+)', re.MULTILINE),
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
# Keywords that look like calls but aren't
|
|
441
|
+
_KEYWORD_BLACKLIST = {
|
|
442
|
+
"if", "for", "while", "switch", "catch", "function", "class",
|
|
443
|
+
"return", "typeof", "instanceof", "new", "await", "async",
|
|
444
|
+
"import", "export", "from", "let", "const", "var",
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _collect_imports_regex(source: str, lang: str) -> list[str]:
|
|
449
|
+
pat = _IMPORT_RE.get(lang)
|
|
450
|
+
if not pat:
|
|
451
|
+
return []
|
|
452
|
+
names: list[str] = []
|
|
453
|
+
seen: set[str] = set()
|
|
454
|
+
for m in pat.finditer(source):
|
|
455
|
+
raw = next((g for g in m.groups() if g), None)
|
|
456
|
+
if not raw:
|
|
457
|
+
continue
|
|
458
|
+
# Keep only the last path component / first dotted segment
|
|
459
|
+
stem = raw.replace("\\", "/").split("/")[-1].split(".")[0].strip("'\"")
|
|
460
|
+
if stem and stem not in seen:
|
|
461
|
+
seen.add(stem)
|
|
462
|
+
names.append(stem)
|
|
463
|
+
return names
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _attach_calls_brace(lines: list[str], func_nodes: list[dict]) -> None:
|
|
467
|
+
"""Brace-count scope tracker: assign call-expression names to enclosing function."""
|
|
468
|
+
# Sort functions by line
|
|
469
|
+
funcs = sorted(func_nodes, key=lambda n: n["line"])
|
|
470
|
+
if not funcs:
|
|
471
|
+
return
|
|
472
|
+
|
|
473
|
+
# Stack: list of (func_node, brace_depth_at_entry)
|
|
474
|
+
stack: list[tuple[dict, int]] = []
|
|
475
|
+
depth = 0
|
|
476
|
+
func_idx = 0
|
|
477
|
+
|
|
478
|
+
for line_no, line in enumerate(lines, 1):
|
|
479
|
+
# Push any functions that start on this line
|
|
480
|
+
while func_idx < len(funcs) and funcs[func_idx]["line"] == line_no:
|
|
481
|
+
stack.append((funcs[func_idx], depth))
|
|
482
|
+
func_idx += 1
|
|
483
|
+
|
|
484
|
+
depth += line.count("{") - line.count("}")
|
|
485
|
+
|
|
486
|
+
# Pop functions whose scope has closed
|
|
487
|
+
while stack and depth < stack[-1][1]:
|
|
488
|
+
stack.pop()
|
|
489
|
+
|
|
490
|
+
# Collect calls on this line and attach to innermost function
|
|
491
|
+
if stack:
|
|
492
|
+
current = stack[-1][0]
|
|
493
|
+
for m in _CALL_RE.finditer(line):
|
|
494
|
+
callee = m.group(1)
|
|
495
|
+
if callee in _KEYWORD_BLACKLIST or callee == current["name"]:
|
|
496
|
+
continue
|
|
497
|
+
calls = current["calls"]
|
|
498
|
+
if callee not in calls:
|
|
499
|
+
calls.append(callee)
|
|
500
|
+
|
|
501
|
+
|
|
385
502
|
def _extract_with_regex(source: str, rel_path: str, ext: str) -> list[dict]:
|
|
386
503
|
lang = _EXT_TO_LANG_NAME.get(ext.lower())
|
|
387
504
|
if not lang or lang not in _REGEX_PATTERNS:
|
|
388
505
|
return []
|
|
389
506
|
|
|
390
507
|
patterns = _REGEX_PATTERNS[lang]
|
|
508
|
+
lines = source.splitlines()
|
|
391
509
|
nodes: list[dict] = []
|
|
392
|
-
seen: set[str]
|
|
510
|
+
seen: set[str] = set()
|
|
393
511
|
|
|
394
|
-
|
|
512
|
+
import_names = _collect_imports_regex(source, lang)
|
|
513
|
+
|
|
514
|
+
for line_no, line in enumerate(lines, 1):
|
|
395
515
|
for ntype, pattern in patterns.items():
|
|
396
516
|
if not pattern:
|
|
397
517
|
continue
|
|
@@ -401,12 +521,19 @@ def _extract_with_regex(source: str, rel_path: str, ext: str) -> list[dict]:
|
|
|
401
521
|
if name and name not in seen:
|
|
402
522
|
seen.add(name)
|
|
403
523
|
nodes.append({
|
|
404
|
-
"id":
|
|
405
|
-
"name":
|
|
406
|
-
"type":
|
|
407
|
-
"file":
|
|
408
|
-
"line":
|
|
524
|
+
"id": f"{rel_path}::{ntype}::{name}",
|
|
525
|
+
"name": name,
|
|
526
|
+
"type": ntype,
|
|
527
|
+
"file": rel_path,
|
|
528
|
+
"line": line_no,
|
|
529
|
+
"calls": [],
|
|
530
|
+
"imports": import_names[:],
|
|
409
531
|
})
|
|
532
|
+
|
|
533
|
+
func_nodes = [n for n in nodes if n["type"] == "function"]
|
|
534
|
+
if func_nodes:
|
|
535
|
+
_attach_calls_brace(lines, func_nodes)
|
|
536
|
+
|
|
410
537
|
return nodes
|
|
411
538
|
|
|
412
539
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -88,6 +88,16 @@ def build(all_nodes: list[dict]) -> "nx.DiGraph | dict":
|
|
|
88
88
|
relation=rel.get("relation", "relates-to"),
|
|
89
89
|
confidence=rel.get("confidence", "INFERRED"))
|
|
90
90
|
|
|
91
|
+
# Resolve unresolved call targets from node['calls'] lists
|
|
92
|
+
try:
|
|
93
|
+
from codegraph.graph.symbol_resolution import resolve_calls
|
|
94
|
+
existing_keys = {(u, v) for u, v in G.edges()}
|
|
95
|
+
new_edges = resolve_calls(all_nodes, existing_keys)
|
|
96
|
+
for e in new_edges:
|
|
97
|
+
G.add_edge(e["from"], e["to"], relation=e["relation"], confidence=e["confidence"])
|
|
98
|
+
except Exception:
|
|
99
|
+
pass
|
|
100
|
+
|
|
91
101
|
return G
|
|
92
102
|
|
|
93
103
|
|
|
@@ -1,23 +1,260 @@
|
|
|
1
1
|
"""
|
|
2
|
-
graph/clustering.py — community detection using
|
|
3
|
-
|
|
2
|
+
graph/clustering.py — community detection using Leiden (graspologic) with Louvain fallback.
|
|
3
|
+
|
|
4
|
+
Replaces naive connected-components with production-grade algorithm:
|
|
5
|
+
- Hub exclusion before partitioning
|
|
6
|
+
- Oversized community splitting (>25% of graph)
|
|
7
|
+
- Low-cohesion re-splitting
|
|
8
|
+
- Stable IDs via size-desc lexical sort + previous-run remap
|
|
4
9
|
"""
|
|
10
|
+
from __future__ import annotations
|
|
5
11
|
|
|
12
|
+
import contextlib
|
|
13
|
+
import inspect
|
|
14
|
+
import io
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
|
|
18
|
+
import networkx as nx
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ── Low-level partitioner ────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
def _suppress_output():
|
|
24
|
+
return contextlib.redirect_stdout(io.StringIO())
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _partition(G: nx.Graph, resolution: float = 1.0) -> dict[str, int]:
|
|
28
|
+
"""Run community detection. Returns {node_id: community_id}.
|
|
29
|
+
|
|
30
|
+
Tries Leiden (graspologic) first, falls back to Louvain (networkx).
|
|
31
|
+
Output is deterministic via sorted nodes/edges and seed=42.
|
|
32
|
+
"""
|
|
33
|
+
stable = nx.Graph()
|
|
34
|
+
stable.add_nodes_from(sorted(G.nodes(), key=str))
|
|
35
|
+
edge_rows = sorted(
|
|
36
|
+
G.edges(data=True),
|
|
37
|
+
key=lambda row: (
|
|
38
|
+
str(row[0]),
|
|
39
|
+
str(row[1]),
|
|
40
|
+
json.dumps(row[2], sort_keys=True, ensure_ascii=False, default=str),
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
for src, tgt, attrs in edge_rows:
|
|
44
|
+
stable.add_edge(src, tgt, **attrs)
|
|
6
45
|
|
|
7
|
-
def detect_communities(G) -> list[dict]:
|
|
8
|
-
"""Assign community IDs to graph nodes. Returns list of community dicts."""
|
|
9
46
|
try:
|
|
10
|
-
|
|
47
|
+
from graspologic.partition import leiden
|
|
48
|
+
lsig = inspect.signature(leiden).parameters
|
|
49
|
+
kwargs: dict = {}
|
|
50
|
+
if "random_seed" in lsig:
|
|
51
|
+
kwargs["random_seed"] = 42
|
|
52
|
+
if "trials" in lsig:
|
|
53
|
+
kwargs["trials"] = 1
|
|
54
|
+
if "resolution" in lsig:
|
|
55
|
+
kwargs["resolution"] = resolution
|
|
56
|
+
old_stderr = sys.stderr
|
|
57
|
+
try:
|
|
58
|
+
sys.stderr = io.StringIO()
|
|
59
|
+
with _suppress_output():
|
|
60
|
+
result = leiden(stable, **kwargs)
|
|
61
|
+
finally:
|
|
62
|
+
sys.stderr = old_stderr
|
|
63
|
+
return result
|
|
11
64
|
except ImportError:
|
|
12
|
-
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
kwargs = {"seed": 42, "threshold": 1e-4, "resolution": resolution}
|
|
68
|
+
if "max_level" in inspect.signature(nx.community.louvain_communities).parameters:
|
|
69
|
+
kwargs["max_level"] = 10
|
|
70
|
+
communities = nx.community.louvain_communities(stable, **kwargs)
|
|
71
|
+
return {node: cid for cid, nodes in enumerate(communities) for node in nodes}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ── Constants ────────────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
_MAX_COMMUNITY_FRACTION = 0.25
|
|
77
|
+
_MIN_SPLIT_SIZE = 10
|
|
78
|
+
_COHESION_SPLIT_THRESHOLD = 0.05
|
|
79
|
+
_COHESION_SPLIT_MIN_SIZE = 50
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ── Public API ───────────────────────────────────────────────────────────────
|
|
83
|
+
|
|
84
|
+
def cluster(
|
|
85
|
+
G: nx.Graph,
|
|
86
|
+
resolution: float = 1.0,
|
|
87
|
+
exclude_hubs_percentile: float | None = None,
|
|
88
|
+
) -> dict[int, list[str]]:
|
|
89
|
+
"""Run Leiden community detection. Returns {community_id: [node_ids]}.
|
|
90
|
+
|
|
91
|
+
Community IDs are stable: 0 = largest community.
|
|
92
|
+
Oversized communities (>25% of nodes, min 10) are split by a second pass.
|
|
93
|
+
"""
|
|
94
|
+
if G.number_of_nodes() == 0:
|
|
95
|
+
return {}
|
|
96
|
+
if G.is_directed():
|
|
97
|
+
G = G.to_undirected()
|
|
98
|
+
if G.number_of_edges() == 0:
|
|
99
|
+
return {i: [n] for i, n in enumerate(sorted(G.nodes))}
|
|
100
|
+
|
|
101
|
+
hub_nodes: set[str] = set()
|
|
102
|
+
if exclude_hubs_percentile is not None:
|
|
103
|
+
degrees = sorted(d for _, d in G.degree())
|
|
104
|
+
if degrees:
|
|
105
|
+
idx = max(0, int(len(degrees) * exclude_hubs_percentile / 100) - 1)
|
|
106
|
+
threshold = degrees[idx]
|
|
107
|
+
hub_nodes = {n for n, d in G.degree() if d > threshold}
|
|
108
|
+
|
|
109
|
+
excluded = hub_nodes
|
|
110
|
+
isolates = [n for n in G.nodes() if G.degree(n) == 0 and n not in excluded]
|
|
111
|
+
connected_nodes = [n for n in G.nodes() if G.degree(n) > 0 and n not in excluded]
|
|
112
|
+
connected = G.subgraph(connected_nodes)
|
|
113
|
+
|
|
114
|
+
raw: dict[int, list[str]] = {}
|
|
115
|
+
if connected.number_of_nodes() > 0:
|
|
116
|
+
partition = _partition(connected, resolution=resolution)
|
|
117
|
+
for node, cid in partition.items():
|
|
118
|
+
raw.setdefault(cid, []).append(node)
|
|
13
119
|
|
|
120
|
+
next_cid = max(raw.keys(), default=-1) + 1
|
|
121
|
+
for node in isolates:
|
|
122
|
+
raw[next_cid] = [node]
|
|
123
|
+
next_cid += 1
|
|
124
|
+
|
|
125
|
+
if hub_nodes:
|
|
126
|
+
node_community: dict[str, int] = {n: cid for cid, nodes in raw.items() for n in nodes}
|
|
127
|
+
for hub in sorted(hub_nodes):
|
|
128
|
+
votes: dict[int, int] = {}
|
|
129
|
+
for nb in G.neighbors(hub):
|
|
130
|
+
cid = node_community.get(nb)
|
|
131
|
+
if cid is not None:
|
|
132
|
+
votes[cid] = votes.get(cid, 0) + 1
|
|
133
|
+
if votes:
|
|
134
|
+
best = min(votes, key=lambda c: (-votes[c], c))
|
|
135
|
+
raw.setdefault(best, []).append(hub)
|
|
136
|
+
node_community[hub] = best
|
|
137
|
+
else:
|
|
138
|
+
raw[next_cid] = [hub]
|
|
139
|
+
node_community[hub] = next_cid
|
|
140
|
+
next_cid += 1
|
|
141
|
+
|
|
142
|
+
max_size = max(_MIN_SPLIT_SIZE, int(G.number_of_nodes() * _MAX_COMMUNITY_FRACTION))
|
|
143
|
+
final_communities: list[list[str]] = []
|
|
144
|
+
for nodes in raw.values():
|
|
145
|
+
if len(nodes) > max_size:
|
|
146
|
+
final_communities.extend(_split_community(G, nodes))
|
|
147
|
+
else:
|
|
148
|
+
final_communities.append(nodes)
|
|
149
|
+
|
|
150
|
+
second_pass: list[list[str]] = []
|
|
151
|
+
for nodes in final_communities:
|
|
152
|
+
if len(nodes) >= _COHESION_SPLIT_MIN_SIZE and cohesion_score(G, nodes) < _COHESION_SPLIT_THRESHOLD:
|
|
153
|
+
splits = _split_community(G, nodes)
|
|
154
|
+
second_pass.extend(splits if len(splits) > 1 else [nodes])
|
|
155
|
+
else:
|
|
156
|
+
second_pass.append(nodes)
|
|
157
|
+
final_communities = second_pass
|
|
158
|
+
|
|
159
|
+
final_communities.sort(key=lambda nodes: (-len(nodes), tuple(sorted(map(str, nodes)))))
|
|
160
|
+
return {i: sorted(nodes) for i, nodes in enumerate(final_communities)}
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _split_community(G: nx.Graph, nodes: list[str]) -> list[list[str]]:
|
|
164
|
+
subgraph = G.subgraph(nodes)
|
|
165
|
+
if subgraph.number_of_edges() == 0:
|
|
166
|
+
return [[n] for n in sorted(nodes)]
|
|
167
|
+
try:
|
|
168
|
+
sub_partition = _partition(subgraph)
|
|
169
|
+
sub_communities: dict[int, list[str]] = {}
|
|
170
|
+
for node, cid in sub_partition.items():
|
|
171
|
+
sub_communities.setdefault(cid, []).append(node)
|
|
172
|
+
if len(sub_communities) <= 1:
|
|
173
|
+
return [sorted(nodes)]
|
|
174
|
+
return [sorted(v) for v in sub_communities.values()]
|
|
175
|
+
except Exception:
|
|
176
|
+
return [sorted(nodes)]
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def cohesion_score(G: nx.Graph, community_nodes: list[str]) -> float:
|
|
180
|
+
"""Ratio of actual intra-community edges to maximum possible."""
|
|
181
|
+
n = len(community_nodes)
|
|
182
|
+
if n <= 1:
|
|
183
|
+
return 1.0
|
|
184
|
+
subgraph = G.subgraph(community_nodes)
|
|
185
|
+
actual = subgraph.number_of_edges()
|
|
186
|
+
possible = n * (n - 1) / 2
|
|
187
|
+
return actual / possible if possible > 0 else 0.0
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def remap_communities_to_previous(
|
|
191
|
+
communities: dict[int, list[str]],
|
|
192
|
+
previous_node_community: dict[str, int],
|
|
193
|
+
) -> dict[int, list[str]]:
|
|
194
|
+
"""Remap community IDs to maximize overlap with a previous run (stable IDs)."""
|
|
195
|
+
if not communities:
|
|
196
|
+
return {}
|
|
197
|
+
new_sets = {cid: set(nodes) for cid, nodes in communities.items()}
|
|
198
|
+
old_sets: dict[int, set[str]] = {}
|
|
199
|
+
for node, old_cid in previous_node_community.items():
|
|
200
|
+
old_sets.setdefault(old_cid, set()).add(node)
|
|
201
|
+
|
|
202
|
+
overlaps: list[tuple[int, int, int]] = []
|
|
203
|
+
for old_cid, old_nodes in old_sets.items():
|
|
204
|
+
for new_cid, new_nodes in new_sets.items():
|
|
205
|
+
overlap = len(old_nodes & new_nodes)
|
|
206
|
+
if overlap > 0:
|
|
207
|
+
overlaps.append((overlap, old_cid, new_cid))
|
|
208
|
+
overlaps.sort(key=lambda x: (-x[0], x[1], x[2]))
|
|
209
|
+
|
|
210
|
+
new_to_final: dict[int, int] = {}
|
|
211
|
+
used_old_ids: set[int] = set()
|
|
212
|
+
matched_new_ids: set[int] = set()
|
|
213
|
+
for _overlap, old_cid, new_cid in overlaps:
|
|
214
|
+
if old_cid in used_old_ids or new_cid in matched_new_ids:
|
|
215
|
+
continue
|
|
216
|
+
new_to_final[new_cid] = old_cid
|
|
217
|
+
used_old_ids.add(old_cid)
|
|
218
|
+
matched_new_ids.add(new_cid)
|
|
219
|
+
|
|
220
|
+
unmatched = [cid for cid in communities if cid not in matched_new_ids]
|
|
221
|
+
unmatched.sort(key=lambda cid: (-len(communities[cid]), tuple(sorted(communities[cid]))))
|
|
222
|
+
next_id = 0
|
|
223
|
+
for new_cid in unmatched:
|
|
224
|
+
while next_id in used_old_ids:
|
|
225
|
+
next_id += 1
|
|
226
|
+
new_to_final[new_cid] = next_id
|
|
227
|
+
used_old_ids.add(next_id)
|
|
228
|
+
next_id += 1
|
|
229
|
+
|
|
230
|
+
remapped: dict[int, list[str]] = {}
|
|
231
|
+
for new_cid, nodes in communities.items():
|
|
232
|
+
remapped[new_to_final[new_cid]] = sorted(nodes)
|
|
233
|
+
return dict(sorted(remapped.items()))
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ── Public wrapper maintaining the existing detect_communities() API ──────────
|
|
237
|
+
|
|
238
|
+
def detect_communities(G) -> list[dict]:
|
|
239
|
+
"""Assign community IDs to graph nodes. Returns list of community dicts.
|
|
240
|
+
|
|
241
|
+
Wraps cluster() so callers don't need to change. Falls back to connected
|
|
242
|
+
components if networkx community detection is unavailable.
|
|
243
|
+
"""
|
|
14
244
|
if G.number_of_nodes() == 0:
|
|
15
245
|
return []
|
|
16
246
|
|
|
17
|
-
|
|
247
|
+
try:
|
|
248
|
+
comm_map = cluster(G)
|
|
249
|
+
except Exception:
|
|
250
|
+
# Final fallback: connected components (original behaviour)
|
|
251
|
+
undirected = G.to_undirected()
|
|
252
|
+
comm_map = {}
|
|
253
|
+
for cid, component in enumerate(nx.connected_components(undirected)):
|
|
254
|
+
comm_map[cid] = sorted(component)
|
|
255
|
+
|
|
18
256
|
communities = []
|
|
19
|
-
for comm_id,
|
|
20
|
-
member_ids = list(component)
|
|
257
|
+
for comm_id, member_ids in comm_map.items():
|
|
21
258
|
label = _community_label(G, member_ids)
|
|
22
259
|
communities.append({"id": comm_id, "label": label, "members": member_ids})
|
|
23
260
|
for nid in member_ids:
|
|
@@ -34,7 +271,7 @@ def _community_label(G, member_ids: list) -> str:
|
|
|
34
271
|
if G.has_node(nid):
|
|
35
272
|
f = G.nodes[nid].get("file", "")
|
|
36
273
|
if f:
|
|
37
|
-
files.append(f.split("/")[0])
|
|
274
|
+
files.append(f.replace("\\", "/").split("/")[0])
|
|
38
275
|
if not files:
|
|
39
276
|
return "misc"
|
|
40
277
|
return max(set(files), key=files.count)
|
package/codegraph/graph/query.py
CHANGED
|
@@ -41,6 +41,8 @@ def answer(question: str, graph_dict: dict, token_budget: int = 2000) -> dict:
|
|
|
41
41
|
result = _describe_god_nodes(god_nodes, nodes)
|
|
42
42
|
elif intent == "community" and matched:
|
|
43
43
|
result = _describe_community(matched[0], communities, nodes)
|
|
44
|
+
elif intent == "circular":
|
|
45
|
+
result = _circular_imports(graph_dict)
|
|
44
46
|
else:
|
|
45
47
|
result = _general_search(matched, nodes, edges)
|
|
46
48
|
|
|
@@ -124,6 +126,8 @@ def _detect_intent(q: str) -> str:
|
|
|
124
126
|
return "god_nodes"
|
|
125
127
|
if any(w in q for w in ("community", "cluster", "group", "module")):
|
|
126
128
|
return "community"
|
|
129
|
+
if any(w in q for w in ("circular", "cycle", "cyclic", "recursive import")):
|
|
130
|
+
return "circular"
|
|
127
131
|
return "general"
|
|
128
132
|
|
|
129
133
|
|
|
@@ -225,6 +229,58 @@ def _describe_community(node: dict, communities: list, nodes: list) -> dict:
|
|
|
225
229
|
}
|
|
226
230
|
|
|
227
231
|
|
|
232
|
+
def _circular_imports(graph_dict: dict) -> dict:
|
|
233
|
+
"""Find circular import chains using iterative DFS on import edges."""
|
|
234
|
+
edges = graph_dict.get("edges", [])
|
|
235
|
+
nodes = graph_dict.get("nodes", [])
|
|
236
|
+
node_map = {n["id"]: n for n in nodes}
|
|
237
|
+
|
|
238
|
+
adj: dict[str, list[str]] = {}
|
|
239
|
+
for e in edges:
|
|
240
|
+
if e.get("relation") in ("imports", "imports_from"):
|
|
241
|
+
adj.setdefault(e["from"], []).append(e["to"])
|
|
242
|
+
|
|
243
|
+
cycles: list[list[str]] = []
|
|
244
|
+
visited: set[str] = set()
|
|
245
|
+
|
|
246
|
+
def dfs(start: str) -> None:
|
|
247
|
+
stack = [(start, [start], {start})]
|
|
248
|
+
while stack and len(cycles) < 5:
|
|
249
|
+
node, path, path_set = stack.pop()
|
|
250
|
+
for nb in adj.get(node, []):
|
|
251
|
+
if nb in path_set:
|
|
252
|
+
cycle_start = path.index(nb)
|
|
253
|
+
cycles.append(path[cycle_start:] + [nb])
|
|
254
|
+
if len(cycles) >= 5:
|
|
255
|
+
return
|
|
256
|
+
elif nb not in visited:
|
|
257
|
+
visited.add(nb)
|
|
258
|
+
stack.append((nb, path + [nb], path_set | {nb}))
|
|
259
|
+
|
|
260
|
+
for nid in list(adj.keys()):
|
|
261
|
+
if nid not in visited:
|
|
262
|
+
visited.add(nid)
|
|
263
|
+
dfs(nid)
|
|
264
|
+
if len(cycles) >= 5:
|
|
265
|
+
break
|
|
266
|
+
|
|
267
|
+
if not cycles:
|
|
268
|
+
return {"text": "No circular imports detected.", "nodes": [], "confidence": "high"}
|
|
269
|
+
|
|
270
|
+
cycle_node_ids: list[str] = []
|
|
271
|
+
lines = [f"Found {len(cycles)} circular import chain(s):"]
|
|
272
|
+
for cycle in cycles[:5]:
|
|
273
|
+
names = [node_map.get(nid, {}).get("name", nid) for nid in cycle]
|
|
274
|
+
lines.append(f" {' → '.join(names)}")
|
|
275
|
+
cycle_node_ids.extend(nid for nid in cycle if nid in node_map)
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
"text": "\n".join(lines),
|
|
279
|
+
"nodes": [node_map[nid] for nid in dict.fromkeys(cycle_node_ids)][:20],
|
|
280
|
+
"confidence": "high",
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
|
|
228
284
|
def _general_search(matched: list, nodes: list, edges: list) -> dict:
|
|
229
285
|
if not matched:
|
|
230
286
|
return {"text": "No matching nodes found.", "nodes": [], "confidence": "low"}
|