code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Scoring metrics for evaluating graph-based code review quality.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
- Token efficiency: measures how many tokens the graph saves vs raw context.
|
|
5
|
+
- Mean Reciprocal Rank (MRR): evaluates ranking quality for search results.
|
|
6
|
+
- Precision / Recall / F1: evaluates set-based retrieval accuracy.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def compute_token_efficiency(raw_tokens: int, graph_tokens: int) -> dict:
|
|
13
|
+
"""Compute token efficiency metrics.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
raw_tokens: Number of tokens when sending raw source code.
|
|
17
|
+
graph_tokens: Number of tokens when using graph-based context.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Dict with keys:
|
|
21
|
+
- raw_tokens: the raw token count
|
|
22
|
+
- graph_tokens: the graph token count
|
|
23
|
+
- ratio: graph_tokens / raw_tokens (lower is better)
|
|
24
|
+
- reduction_percent: percentage of tokens saved (higher is better)
|
|
25
|
+
"""
|
|
26
|
+
if raw_tokens <= 0:
|
|
27
|
+
return {
|
|
28
|
+
"raw_tokens": raw_tokens,
|
|
29
|
+
"graph_tokens": graph_tokens,
|
|
30
|
+
"ratio": 0.0,
|
|
31
|
+
"reduction_percent": 0.0,
|
|
32
|
+
}
|
|
33
|
+
ratio = graph_tokens / raw_tokens
|
|
34
|
+
reduction = (1.0 - ratio) * 100.0
|
|
35
|
+
return {
|
|
36
|
+
"raw_tokens": raw_tokens,
|
|
37
|
+
"graph_tokens": graph_tokens,
|
|
38
|
+
"ratio": round(ratio, 4),
|
|
39
|
+
"reduction_percent": round(reduction, 2),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def compute_mrr(correct: str, results: list[str]) -> float:
|
|
44
|
+
"""Compute Mean Reciprocal Rank for a single query.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
correct: The correct/expected result identifier.
|
|
48
|
+
results: Ordered list of result identifiers (best first).
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
1/rank if *correct* is found in *results*, else 0.0.
|
|
52
|
+
"""
|
|
53
|
+
for i, r in enumerate(results, start=1):
|
|
54
|
+
if r == correct:
|
|
55
|
+
return 1.0 / i
|
|
56
|
+
return 0.0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def compute_precision_recall(predicted: set, actual: set) -> dict:
|
|
60
|
+
"""Compute precision, recall, and F1 score.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
predicted: Set of predicted/returned items.
|
|
64
|
+
actual: Set of ground-truth items.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Dict with keys: precision, recall, f1.
|
|
68
|
+
"""
|
|
69
|
+
if not predicted and not actual:
|
|
70
|
+
return {"precision": 1.0, "recall": 1.0, "f1": 1.0}
|
|
71
|
+
|
|
72
|
+
true_positive = len(predicted & actual)
|
|
73
|
+
precision = true_positive / len(predicted) if predicted else 0.0
|
|
74
|
+
recall = true_positive / len(actual) if actual else 0.0
|
|
75
|
+
|
|
76
|
+
if precision + recall > 0:
|
|
77
|
+
f1 = 2 * precision * recall / (precision + recall)
|
|
78
|
+
else:
|
|
79
|
+
f1 = 0.0
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
"precision": round(precision, 4),
|
|
83
|
+
"recall": round(recall, 4),
|
|
84
|
+
"f1": round(f1, 4),
|
|
85
|
+
}
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Measures total tokens consumed by agent workflows against benchmark repos."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def estimate_tokens(obj: Any) -> int:
|
|
13
|
+
"""Estimate token count from JSON-serializable object.
|
|
14
|
+
|
|
15
|
+
Uses character count / 4 as a rough approximation for English + code.
|
|
16
|
+
"""
|
|
17
|
+
return len(json.dumps(obj, default=str)) // 4
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def benchmark_review_workflow(repo_root: str, base: str = "HEAD~1") -> dict:
|
|
21
|
+
"""Simulate a review workflow and measure total tokens consumed."""
|
|
22
|
+
from ..tools.context import get_minimal_context
|
|
23
|
+
from ..tools.review import detect_changes_func
|
|
24
|
+
|
|
25
|
+
total_tokens = 0
|
|
26
|
+
calls = []
|
|
27
|
+
|
|
28
|
+
# Step 1: get_minimal_context
|
|
29
|
+
result = get_minimal_context(task="review changes", repo_root=repo_root, base=base)
|
|
30
|
+
tokens = estimate_tokens(result)
|
|
31
|
+
total_tokens += tokens
|
|
32
|
+
calls.append({"tool": "get_minimal_context", "tokens": tokens})
|
|
33
|
+
|
|
34
|
+
# Step 2: detect_changes (minimal)
|
|
35
|
+
result = detect_changes_func(base=base, repo_root=repo_root, detail_level="minimal")
|
|
36
|
+
tokens = estimate_tokens(result)
|
|
37
|
+
total_tokens += tokens
|
|
38
|
+
calls.append({"tool": "detect_changes_minimal", "tokens": tokens})
|
|
39
|
+
|
|
40
|
+
return {
|
|
41
|
+
"workflow": "review",
|
|
42
|
+
"total_tokens": total_tokens,
|
|
43
|
+
"tool_calls": len(calls),
|
|
44
|
+
"calls": calls,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def benchmark_architecture_workflow(repo_root: str) -> dict:
|
|
49
|
+
"""Simulate an architecture exploration workflow."""
|
|
50
|
+
from ..tools.community_tools import list_communities_func
|
|
51
|
+
from ..tools.context import get_minimal_context
|
|
52
|
+
from ..tools.flows_tools import list_flows
|
|
53
|
+
|
|
54
|
+
total_tokens = 0
|
|
55
|
+
calls = []
|
|
56
|
+
|
|
57
|
+
result = get_minimal_context(task="map architecture", repo_root=repo_root)
|
|
58
|
+
tokens = estimate_tokens(result)
|
|
59
|
+
total_tokens += tokens
|
|
60
|
+
calls.append({"tool": "get_minimal_context", "tokens": tokens})
|
|
61
|
+
|
|
62
|
+
result = list_communities_func(repo_root=repo_root, detail_level="minimal")
|
|
63
|
+
tokens = estimate_tokens(result)
|
|
64
|
+
total_tokens += tokens
|
|
65
|
+
calls.append({"tool": "list_communities_minimal", "tokens": tokens})
|
|
66
|
+
|
|
67
|
+
result = list_flows(repo_root=repo_root, detail_level="minimal")
|
|
68
|
+
tokens = estimate_tokens(result)
|
|
69
|
+
total_tokens += tokens
|
|
70
|
+
calls.append({"tool": "list_flows_minimal", "tokens": tokens})
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
"workflow": "architecture",
|
|
74
|
+
"total_tokens": total_tokens,
|
|
75
|
+
"tool_calls": len(calls),
|
|
76
|
+
"calls": calls,
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def benchmark_debug_workflow(repo_root: str) -> dict:
|
|
81
|
+
"""Simulate a debug workflow."""
|
|
82
|
+
from ..tools.context import get_minimal_context
|
|
83
|
+
from ..tools.query import semantic_search_nodes
|
|
84
|
+
|
|
85
|
+
total_tokens = 0
|
|
86
|
+
calls = []
|
|
87
|
+
|
|
88
|
+
result = get_minimal_context(task="debug login bug", repo_root=repo_root)
|
|
89
|
+
tokens = estimate_tokens(result)
|
|
90
|
+
total_tokens += tokens
|
|
91
|
+
calls.append({"tool": "get_minimal_context", "tokens": tokens})
|
|
92
|
+
|
|
93
|
+
result = semantic_search_nodes(
|
|
94
|
+
query="login", repo_root=repo_root, detail_level="minimal",
|
|
95
|
+
)
|
|
96
|
+
tokens = estimate_tokens(result)
|
|
97
|
+
total_tokens += tokens
|
|
98
|
+
calls.append({"tool": "semantic_search_minimal", "tokens": tokens})
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
"workflow": "debug",
|
|
102
|
+
"total_tokens": total_tokens,
|
|
103
|
+
"tool_calls": len(calls),
|
|
104
|
+
"calls": calls,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def benchmark_onboard_workflow(repo_root: str) -> dict:
|
|
109
|
+
"""Simulate an onboarding workflow."""
|
|
110
|
+
from ..tools.context import get_minimal_context
|
|
111
|
+
from ..tools.query import list_graph_stats
|
|
112
|
+
|
|
113
|
+
total_tokens = 0
|
|
114
|
+
calls = []
|
|
115
|
+
|
|
116
|
+
result = get_minimal_context(task="onboard developer", repo_root=repo_root)
|
|
117
|
+
tokens = estimate_tokens(result)
|
|
118
|
+
total_tokens += tokens
|
|
119
|
+
calls.append({"tool": "get_minimal_context", "tokens": tokens})
|
|
120
|
+
|
|
121
|
+
result = list_graph_stats(repo_root=repo_root)
|
|
122
|
+
tokens = estimate_tokens(result)
|
|
123
|
+
total_tokens += tokens
|
|
124
|
+
calls.append({"tool": "list_graph_stats", "tokens": tokens})
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
"workflow": "onboard",
|
|
128
|
+
"total_tokens": total_tokens,
|
|
129
|
+
"tool_calls": len(calls),
|
|
130
|
+
"calls": calls,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def benchmark_pre_merge_workflow(repo_root: str, base: str = "HEAD~1") -> dict:
|
|
135
|
+
"""Simulate a pre-merge check workflow."""
|
|
136
|
+
from ..tools.context import get_minimal_context
|
|
137
|
+
from ..tools.review import detect_changes_func
|
|
138
|
+
|
|
139
|
+
total_tokens = 0
|
|
140
|
+
calls = []
|
|
141
|
+
|
|
142
|
+
result = get_minimal_context(task="pre-merge check", repo_root=repo_root, base=base)
|
|
143
|
+
tokens = estimate_tokens(result)
|
|
144
|
+
total_tokens += tokens
|
|
145
|
+
calls.append({"tool": "get_minimal_context", "tokens": tokens})
|
|
146
|
+
|
|
147
|
+
result = detect_changes_func(base=base, repo_root=repo_root, detail_level="minimal")
|
|
148
|
+
tokens = estimate_tokens(result)
|
|
149
|
+
total_tokens += tokens
|
|
150
|
+
calls.append({"tool": "detect_changes_minimal", "tokens": tokens})
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
"workflow": "pre_merge",
|
|
154
|
+
"total_tokens": total_tokens,
|
|
155
|
+
"tool_calls": len(calls),
|
|
156
|
+
"calls": calls,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
ALL_WORKFLOWS: dict[str, Callable[..., dict]] = {
|
|
161
|
+
"review": benchmark_review_workflow,
|
|
162
|
+
"architecture": benchmark_architecture_workflow,
|
|
163
|
+
"debug": benchmark_debug_workflow,
|
|
164
|
+
"onboard": benchmark_onboard_workflow,
|
|
165
|
+
"pre_merge": benchmark_pre_merge_workflow,
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def run_all_benchmarks(repo_root: str, base: str = "HEAD~1") -> list[dict]:
|
|
170
|
+
"""Run all workflow benchmarks and return results."""
|
|
171
|
+
results = []
|
|
172
|
+
for name, fn in ALL_WORKFLOWS.items():
|
|
173
|
+
try:
|
|
174
|
+
if "base" in fn.__code__.co_varnames:
|
|
175
|
+
result = fn(repo_root=repo_root, base=base)
|
|
176
|
+
else:
|
|
177
|
+
result = fn(repo_root=repo_root)
|
|
178
|
+
results.append(result)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.warning("Benchmark %s failed: %s", name, e)
|
|
181
|
+
results.append({"workflow": name, "error": str(e)})
|
|
182
|
+
return results
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
"""Additional export formats: GraphML, Neo4j Cypher, Obsidian vault, SVG."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import html
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from .graph import GraphStore, _sanitize_name
|
|
11
|
+
from .visualization import export_graph_data
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# -------------------------------------------------------------------
|
|
17
|
+
# GraphML export (for Gephi, yEd, Cytoscape)
|
|
18
|
+
# -------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
def export_graphml(store: GraphStore, output_path: Path) -> Path:
|
|
21
|
+
"""Export the graph as GraphML XML for Gephi/yEd/Cytoscape.
|
|
22
|
+
|
|
23
|
+
Returns the path to the written file.
|
|
24
|
+
"""
|
|
25
|
+
data = export_graph_data(store)
|
|
26
|
+
nodes = data["nodes"]
|
|
27
|
+
edges = data["edges"]
|
|
28
|
+
|
|
29
|
+
lines = [
|
|
30
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
31
|
+
'<graphml xmlns="http://graphml.graphstruct.org/graphml"',
|
|
32
|
+
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
|
|
33
|
+
' xsi:schemaLocation="http://graphml.graphstruct.org/graphml">',
|
|
34
|
+
' <key id="kind" for="node" attr.name="kind" '
|
|
35
|
+
'attr.type="string"/>',
|
|
36
|
+
' <key id="file" for="node" attr.name="file" '
|
|
37
|
+
'attr.type="string"/>',
|
|
38
|
+
' <key id="language" for="node" attr.name="language" '
|
|
39
|
+
'attr.type="string"/>',
|
|
40
|
+
' <key id="community" for="node" attr.name="community" '
|
|
41
|
+
'attr.type="int"/>',
|
|
42
|
+
' <key id="edge_kind" for="edge" attr.name="kind" '
|
|
43
|
+
'attr.type="string"/>',
|
|
44
|
+
' <graph id="code-review-graph" edgedefault="directed">',
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
for n in nodes:
|
|
48
|
+
nid = html.escape(n["qualified_name"], quote=True)
|
|
49
|
+
lines.append(f' <node id="{nid}">')
|
|
50
|
+
lines.append(f' <data key="kind">'
|
|
51
|
+
f'{html.escape(n.get("kind", ""))}</data>')
|
|
52
|
+
lines.append(f' <data key="file">'
|
|
53
|
+
f'{html.escape(n.get("file_path", ""))}</data>')
|
|
54
|
+
lang = n.get("language", "") or ""
|
|
55
|
+
lines.append(f' <data key="language">'
|
|
56
|
+
f'{html.escape(lang)}</data>')
|
|
57
|
+
cid = n.get("community_id")
|
|
58
|
+
if cid is not None:
|
|
59
|
+
lines.append(f' <data key="community">'
|
|
60
|
+
f'{cid}</data>')
|
|
61
|
+
lines.append(' </node>')
|
|
62
|
+
|
|
63
|
+
for i, e in enumerate(edges):
|
|
64
|
+
src = html.escape(e["source"], quote=True)
|
|
65
|
+
tgt = html.escape(e["target"], quote=True)
|
|
66
|
+
kind = html.escape(e.get("kind", ""), quote=True)
|
|
67
|
+
lines.append(
|
|
68
|
+
f' <edge id="e{i}" source="{src}" target="{tgt}">'
|
|
69
|
+
)
|
|
70
|
+
lines.append(f' <data key="edge_kind">{kind}</data>')
|
|
71
|
+
lines.append(' </edge>')
|
|
72
|
+
|
|
73
|
+
lines.append(' </graph>')
|
|
74
|
+
lines.append('</graphml>')
|
|
75
|
+
|
|
76
|
+
output_path.write_text("\n".join(lines), encoding="utf-8")
|
|
77
|
+
logger.info("GraphML exported to %s (%d nodes, %d edges)",
|
|
78
|
+
output_path, len(nodes), len(edges))
|
|
79
|
+
return output_path
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# -------------------------------------------------------------------
|
|
83
|
+
# Neo4j Cypher export
|
|
84
|
+
# -------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
def export_neo4j_cypher(store: GraphStore, output_path: Path) -> Path:
|
|
87
|
+
"""Export the graph as Neo4j Cypher CREATE statements.
|
|
88
|
+
|
|
89
|
+
Returns the path to the written file.
|
|
90
|
+
"""
|
|
91
|
+
data = export_graph_data(store)
|
|
92
|
+
nodes = data["nodes"]
|
|
93
|
+
edges = data["edges"]
|
|
94
|
+
|
|
95
|
+
lines = [
|
|
96
|
+
"// Generated by code-review-graph",
|
|
97
|
+
"// Import: paste into Neo4j Browser or run via cypher-shell",
|
|
98
|
+
"",
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
# Create nodes
|
|
102
|
+
for n in nodes:
|
|
103
|
+
kind = n.get("kind", "Node")
|
|
104
|
+
props = {
|
|
105
|
+
"qualified_name": n["qualified_name"],
|
|
106
|
+
"name": n.get("name", ""),
|
|
107
|
+
"file_path": n.get("file_path", ""),
|
|
108
|
+
"language": n.get("language", "") or "",
|
|
109
|
+
}
|
|
110
|
+
cid = n.get("community_id")
|
|
111
|
+
if cid is not None:
|
|
112
|
+
props["community_id"] = cid
|
|
113
|
+
props_str = _cypher_props(props)
|
|
114
|
+
lines.append(f"CREATE (:{kind} {props_str});")
|
|
115
|
+
|
|
116
|
+
lines.append("")
|
|
117
|
+
|
|
118
|
+
# Create edges via MATCH
|
|
119
|
+
for e in edges:
|
|
120
|
+
kind = e.get("kind", "RELATES_TO")
|
|
121
|
+
src_qn = _cypher_escape(e["source"])
|
|
122
|
+
tgt_qn = _cypher_escape(e["target"])
|
|
123
|
+
lines.append(
|
|
124
|
+
f"MATCH (a {{qualified_name: '{src_qn}'}}), "
|
|
125
|
+
f"(b {{qualified_name: '{tgt_qn}'}}) "
|
|
126
|
+
f"CREATE (a)-[:{kind}]->(b);"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
output_path.write_text("\n".join(lines), encoding="utf-8")
|
|
130
|
+
logger.info("Neo4j Cypher exported to %s (%d nodes, %d edges)",
|
|
131
|
+
output_path, len(nodes), len(edges))
|
|
132
|
+
return output_path
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _cypher_escape(s: str) -> str:
|
|
136
|
+
"""Escape a string for Cypher single-quoted literals."""
|
|
137
|
+
return s.replace("\\", "\\\\").replace("'", "\\'")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _cypher_props(d: dict) -> str:
|
|
141
|
+
"""Format a dict as Cypher property map."""
|
|
142
|
+
parts = []
|
|
143
|
+
for k, v in d.items():
|
|
144
|
+
if isinstance(v, str):
|
|
145
|
+
parts.append(f"{k}: '{_cypher_escape(v)}'")
|
|
146
|
+
elif isinstance(v, (int, float)):
|
|
147
|
+
parts.append(f"{k}: {v}")
|
|
148
|
+
elif isinstance(v, bool):
|
|
149
|
+
parts.append(f"{k}: {'true' if v else 'false'}")
|
|
150
|
+
return "{" + ", ".join(parts) + "}"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# -------------------------------------------------------------------
|
|
154
|
+
# Obsidian vault export
|
|
155
|
+
# -------------------------------------------------------------------
|
|
156
|
+
|
|
157
|
+
def export_obsidian_vault(
|
|
158
|
+
store: GraphStore, output_dir: Path
|
|
159
|
+
) -> Path:
|
|
160
|
+
"""Export the graph as an Obsidian vault with wikilinks.
|
|
161
|
+
|
|
162
|
+
Creates:
|
|
163
|
+
- One .md per node with YAML frontmatter and [[wikilinks]]
|
|
164
|
+
- _COMMUNITY_*.md overview notes per community
|
|
165
|
+
- _INDEX.md with links to all nodes
|
|
166
|
+
|
|
167
|
+
Returns the output directory path.
|
|
168
|
+
"""
|
|
169
|
+
data = export_graph_data(store)
|
|
170
|
+
nodes = data["nodes"]
|
|
171
|
+
edges = data["edges"]
|
|
172
|
+
communities = data.get("communities", [])
|
|
173
|
+
|
|
174
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
175
|
+
|
|
176
|
+
# Build adjacency for wikilinks
|
|
177
|
+
neighbors: dict[str, list[dict]] = {}
|
|
178
|
+
for e in edges:
|
|
179
|
+
src = e["source"]
|
|
180
|
+
tgt = e["target"]
|
|
181
|
+
kind = e.get("kind", "RELATES_TO")
|
|
182
|
+
neighbors.setdefault(src, []).append(
|
|
183
|
+
{"target": tgt, "kind": kind}
|
|
184
|
+
)
|
|
185
|
+
neighbors.setdefault(tgt, []).append(
|
|
186
|
+
{"target": src, "kind": kind}
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Node name -> slug mapping
|
|
190
|
+
slugs: dict[str, str] = {}
|
|
191
|
+
for n in nodes:
|
|
192
|
+
slug = _obsidian_slug(n.get("name", n["qualified_name"]))
|
|
193
|
+
# Handle collisions
|
|
194
|
+
base_slug = slug
|
|
195
|
+
counter = 1
|
|
196
|
+
while slug in slugs.values():
|
|
197
|
+
slug = f"{base_slug}-{counter}"
|
|
198
|
+
counter += 1
|
|
199
|
+
slugs[n["qualified_name"]] = slug
|
|
200
|
+
|
|
201
|
+
# Write node pages
|
|
202
|
+
for n in nodes:
|
|
203
|
+
qn = n["qualified_name"]
|
|
204
|
+
slug = slugs[qn]
|
|
205
|
+
name = n.get("name", qn)
|
|
206
|
+
|
|
207
|
+
frontmatter = {
|
|
208
|
+
"kind": n.get("kind", ""),
|
|
209
|
+
"file": n.get("file_path", ""),
|
|
210
|
+
"language": n.get("language", "") or "",
|
|
211
|
+
"community": n.get("community_id"),
|
|
212
|
+
"tags": [n.get("kind", "").lower()],
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
lines = ["---"]
|
|
216
|
+
for k, v in frontmatter.items():
|
|
217
|
+
if isinstance(v, list):
|
|
218
|
+
lines.append(f"{k}:")
|
|
219
|
+
for item in v:
|
|
220
|
+
lines.append(f" - {item}")
|
|
221
|
+
elif v is not None:
|
|
222
|
+
lines.append(f"{k}: {v}")
|
|
223
|
+
lines.append("---")
|
|
224
|
+
lines.append(f"# {_sanitize_name(name)}")
|
|
225
|
+
lines.append("")
|
|
226
|
+
lines.append(f"**Kind:** {n.get('kind', '')}")
|
|
227
|
+
lines.append(f"**File:** `{n.get('file_path', '')}`")
|
|
228
|
+
lines.append("")
|
|
229
|
+
|
|
230
|
+
# Wikilinks to neighbors
|
|
231
|
+
nbrs = neighbors.get(qn, [])
|
|
232
|
+
if nbrs:
|
|
233
|
+
lines.append("## Connections")
|
|
234
|
+
lines.append("")
|
|
235
|
+
seen = set()
|
|
236
|
+
for nb in nbrs:
|
|
237
|
+
tgt_slug = slugs.get(nb["target"])
|
|
238
|
+
if tgt_slug and tgt_slug not in seen:
|
|
239
|
+
seen.add(tgt_slug)
|
|
240
|
+
tgt_name = tgt_slug.replace("-", " ").title()
|
|
241
|
+
lines.append(
|
|
242
|
+
f"- {nb['kind']}: "
|
|
243
|
+
f"[[{tgt_slug}|{tgt_name}]]"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
page_path = output_dir / f"{slug}.md"
|
|
247
|
+
page_path.write_text("\n".join(lines), encoding="utf-8")
|
|
248
|
+
|
|
249
|
+
# Write community overview pages
|
|
250
|
+
community_map: dict[int, list[str]] = {}
|
|
251
|
+
for n in nodes:
|
|
252
|
+
cid = n.get("community_id")
|
|
253
|
+
if cid is not None:
|
|
254
|
+
community_map.setdefault(cid, []).append(
|
|
255
|
+
n["qualified_name"]
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
for c in communities:
|
|
259
|
+
cid = c.get("id")
|
|
260
|
+
cname = c.get("name", f"community-{cid}")
|
|
261
|
+
members = community_map.get(cid, [])
|
|
262
|
+
|
|
263
|
+
lines = [f"# Community: {_sanitize_name(cname)}", ""]
|
|
264
|
+
lines.append(f"**Size:** {c.get('size', len(members))}")
|
|
265
|
+
lines.append(f"**Cohesion:** {c.get('cohesion', 0):.2f}")
|
|
266
|
+
lang = c.get("dominant_language", "")
|
|
267
|
+
if lang:
|
|
268
|
+
lines.append(f"**Language:** {lang}")
|
|
269
|
+
lines.append("")
|
|
270
|
+
lines.append("## Members")
|
|
271
|
+
lines.append("")
|
|
272
|
+
for qn in members[:50]:
|
|
273
|
+
slug = slugs.get(qn)
|
|
274
|
+
if slug:
|
|
275
|
+
lines.append(f"- [[{slug}]]")
|
|
276
|
+
|
|
277
|
+
page_path = output_dir / f"_COMMUNITY_{cid}.md"
|
|
278
|
+
page_path.write_text("\n".join(lines), encoding="utf-8")
|
|
279
|
+
|
|
280
|
+
# Write index
|
|
281
|
+
index_lines = ["# Code Graph Index", ""]
|
|
282
|
+
index_lines.append(f"**Nodes:** {len(nodes)}")
|
|
283
|
+
index_lines.append(f"**Edges:** {len(edges)}")
|
|
284
|
+
index_lines.append(
|
|
285
|
+
f"**Communities:** {len(communities)}"
|
|
286
|
+
)
|
|
287
|
+
index_lines.append("")
|
|
288
|
+
index_lines.append("## All Nodes")
|
|
289
|
+
index_lines.append("")
|
|
290
|
+
for n in sorted(nodes, key=lambda x: x.get("name", "")):
|
|
291
|
+
slug = slugs.get(n["qualified_name"])
|
|
292
|
+
if slug:
|
|
293
|
+
index_lines.append(
|
|
294
|
+
f"- [[{slug}]] ({n.get('kind', '')})"
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
(output_dir / "_INDEX.md").write_text(
|
|
298
|
+
"\n".join(index_lines), encoding="utf-8"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
logger.info(
|
|
302
|
+
"Obsidian vault exported to %s (%d pages)",
|
|
303
|
+
output_dir, len(nodes)
|
|
304
|
+
)
|
|
305
|
+
return output_dir
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _obsidian_slug(name: str) -> str:
|
|
309
|
+
"""Convert a name to an Obsidian-friendly filename slug."""
|
|
310
|
+
slug = re.sub(r"[^\w\s-]", "", name.lower())
|
|
311
|
+
slug = re.sub(r"[\s_]+", "-", slug).strip("-")
|
|
312
|
+
return slug[:100] or "unnamed"
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# -------------------------------------------------------------------
|
|
316
|
+
# SVG export (matplotlib-based)
|
|
317
|
+
# -------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
def export_svg(store: GraphStore, output_path: Path) -> Path:
|
|
320
|
+
"""Export a static SVG graph visualization.
|
|
321
|
+
|
|
322
|
+
Requires matplotlib (optional dependency).
|
|
323
|
+
Returns the path to the written file.
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
import matplotlib
|
|
327
|
+
matplotlib.use("Agg")
|
|
328
|
+
import matplotlib.pyplot as plt
|
|
329
|
+
except ImportError:
|
|
330
|
+
raise ImportError(
|
|
331
|
+
"matplotlib is required for SVG export. "
|
|
332
|
+
"Install with: pip install matplotlib"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
import networkx as nx
|
|
336
|
+
|
|
337
|
+
data = export_graph_data(store)
|
|
338
|
+
nodes_data = data["nodes"]
|
|
339
|
+
edges_data = data["edges"]
|
|
340
|
+
|
|
341
|
+
nxg: nx.DiGraph = nx.DiGraph() # type: ignore[type-arg]
|
|
342
|
+
for n in nodes_data:
|
|
343
|
+
nxg.add_node(
|
|
344
|
+
n["qualified_name"],
|
|
345
|
+
label=n.get("name", ""),
|
|
346
|
+
kind=n.get("kind", ""),
|
|
347
|
+
)
|
|
348
|
+
for e in edges_data:
|
|
349
|
+
if e["source"] in nxg and e["target"] in nxg:
|
|
350
|
+
nxg.add_edge(e["source"], e["target"])
|
|
351
|
+
|
|
352
|
+
if nxg.number_of_nodes() == 0:
|
|
353
|
+
raise ValueError("Graph is empty, nothing to export")
|
|
354
|
+
|
|
355
|
+
# Color by kind
|
|
356
|
+
kind_colors = {
|
|
357
|
+
"File": "#6c757d",
|
|
358
|
+
"Class": "#0d6efd",
|
|
359
|
+
"Function": "#198754",
|
|
360
|
+
"Type": "#ffc107",
|
|
361
|
+
"Test": "#dc3545",
|
|
362
|
+
}
|
|
363
|
+
colors = [
|
|
364
|
+
kind_colors.get(
|
|
365
|
+
nxg.nodes[n].get("kind", ""), "#adb5bd"
|
|
366
|
+
)
|
|
367
|
+
for n in nxg.nodes()
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
fig, ax = plt.subplots(1, 1, figsize=(16, 12))
|
|
371
|
+
pos = nx.spring_layout(
|
|
372
|
+
nxg, k=2 / (nxg.number_of_nodes() ** 0.5),
|
|
373
|
+
iterations=50, seed=42
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Limit labels to avoid clutter
|
|
377
|
+
labels = {}
|
|
378
|
+
if nxg.number_of_nodes() <= 100:
|
|
379
|
+
labels = {
|
|
380
|
+
n: nxg.nodes[n].get("label", n.split("::")[-1])
|
|
381
|
+
for n in nxg.nodes()
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
nx.draw_networkx_nodes(
|
|
385
|
+
nxg, pos, ax=ax, node_color=colors,
|
|
386
|
+
node_size=30, alpha=0.8
|
|
387
|
+
)
|
|
388
|
+
nx.draw_networkx_edges(
|
|
389
|
+
nxg, pos, ax=ax, alpha=0.2,
|
|
390
|
+
arrows=True, arrowsize=5
|
|
391
|
+
)
|
|
392
|
+
if labels:
|
|
393
|
+
nx.draw_networkx_labels(
|
|
394
|
+
nxg, pos, labels=labels, ax=ax,
|
|
395
|
+
font_size=6
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
ax.set_title("Code Review Graph", fontsize=14)
|
|
399
|
+
ax.axis("off")
|
|
400
|
+
|
|
401
|
+
fig.savefig(
|
|
402
|
+
str(output_path), format="svg",
|
|
403
|
+
bbox_inches="tight", dpi=150
|
|
404
|
+
)
|
|
405
|
+
plt.close(fig)
|
|
406
|
+
|
|
407
|
+
logger.info("SVG exported to %s (%d nodes)",
|
|
408
|
+
output_path, nxg.number_of_nodes())
|
|
409
|
+
return output_path
|