code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Tools 13, 14, 15: community listing, detail, architecture overview."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from ..communities import get_architecture_overview, get_communities
|
|
9
|
+
from ..context_savings import attach_context_savings
|
|
10
|
+
from ..graph import node_to_dict
|
|
11
|
+
from ..hints import generate_hints, get_session
|
|
12
|
+
from ._common import _get_store
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Tool 13: list_communities [EXPLORE]
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def list_communities_func(
|
|
20
|
+
repo_root: str | None = None,
|
|
21
|
+
sort_by: str = "size",
|
|
22
|
+
min_size: int = 0,
|
|
23
|
+
detail_level: str = "standard",
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
"""List detected code communities in the codebase.
|
|
26
|
+
|
|
27
|
+
[EXPLORE] Retrieves stored communities from the knowledge graph.
|
|
28
|
+
Each community represents a cluster of related code entities
|
|
29
|
+
(functions, classes) detected via the Leiden algorithm or
|
|
30
|
+
file-based grouping.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
34
|
+
sort_by: Sort column: size, cohesion, or name.
|
|
35
|
+
min_size: Minimum community size to include (default: 0).
|
|
36
|
+
detail_level: "standard" (default) returns full community data;
|
|
37
|
+
"minimal" returns only name, size, and cohesion
|
|
38
|
+
per community.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
List of communities with size and cohesion scores.
|
|
42
|
+
"""
|
|
43
|
+
store, root = _get_store(repo_root)
|
|
44
|
+
try:
|
|
45
|
+
communities = get_communities(
|
|
46
|
+
store, sort_by=sort_by, min_size=min_size
|
|
47
|
+
)
|
|
48
|
+
if detail_level == "minimal":
|
|
49
|
+
communities = [
|
|
50
|
+
{"name": c["name"], "size": c["size"], "cohesion": c["cohesion"]}
|
|
51
|
+
for c in communities
|
|
52
|
+
]
|
|
53
|
+
result: dict[str, object] = {
|
|
54
|
+
"status": "ok",
|
|
55
|
+
"summary": f"Found {len(communities)} communities",
|
|
56
|
+
"communities": communities,
|
|
57
|
+
}
|
|
58
|
+
result["_hints"] = generate_hints(
|
|
59
|
+
"list_communities", result, get_session()
|
|
60
|
+
)
|
|
61
|
+
return result
|
|
62
|
+
except Exception as exc:
|
|
63
|
+
return {"status": "error", "error": str(exc)}
|
|
64
|
+
finally:
|
|
65
|
+
store.close()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Tool 14: get_community [EXPLORE]
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_community_func(
|
|
74
|
+
community_name: str | None = None,
|
|
75
|
+
community_id: int | None = None,
|
|
76
|
+
include_members: bool = False,
|
|
77
|
+
repo_root: str | None = None,
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
"""Get details of a single code community.
|
|
80
|
+
|
|
81
|
+
[EXPLORE] Retrieves a community by its database ID or by name match.
|
|
82
|
+
Optionally includes the full list of member nodes.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
community_name: Name to search for (partial match). Ignored if
|
|
86
|
+
community_id given.
|
|
87
|
+
community_id: Database ID of the community.
|
|
88
|
+
include_members: If True, include full member node details.
|
|
89
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Community details, or not_found status.
|
|
93
|
+
"""
|
|
94
|
+
store, root = _get_store(repo_root)
|
|
95
|
+
try:
|
|
96
|
+
community: dict | None = None
|
|
97
|
+
all_communities = get_communities(store)
|
|
98
|
+
|
|
99
|
+
if community_id is not None:
|
|
100
|
+
for c in all_communities:
|
|
101
|
+
if c.get("id") == community_id:
|
|
102
|
+
community = c
|
|
103
|
+
break
|
|
104
|
+
elif community_name is not None:
|
|
105
|
+
for c in all_communities:
|
|
106
|
+
if community_name.lower() in c["name"].lower():
|
|
107
|
+
community = c
|
|
108
|
+
break
|
|
109
|
+
|
|
110
|
+
if community is None:
|
|
111
|
+
return {
|
|
112
|
+
"status": "not_found",
|
|
113
|
+
"summary": (
|
|
114
|
+
"No community found matching the given criteria."
|
|
115
|
+
),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if include_members:
|
|
119
|
+
cid = community.get("id")
|
|
120
|
+
if cid is not None:
|
|
121
|
+
member_nodes = store.get_nodes_by_community_id(cid)
|
|
122
|
+
members = [node_to_dict(n) for n in member_nodes]
|
|
123
|
+
community["member_details"] = members
|
|
124
|
+
|
|
125
|
+
result = {
|
|
126
|
+
"status": "ok",
|
|
127
|
+
"summary": (
|
|
128
|
+
f"Community '{community['name']}': "
|
|
129
|
+
f"{community['size']} nodes, "
|
|
130
|
+
f"cohesion {community['cohesion']:.4f}"
|
|
131
|
+
),
|
|
132
|
+
"community": community,
|
|
133
|
+
}
|
|
134
|
+
result["_hints"] = generate_hints(
|
|
135
|
+
"get_community", result, get_session()
|
|
136
|
+
)
|
|
137
|
+
return result
|
|
138
|
+
except Exception as exc:
|
|
139
|
+
return {"status": "error", "error": str(exc)}
|
|
140
|
+
finally:
|
|
141
|
+
store.close()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
# Tool 15: get_architecture_overview [EXPLORE]
|
|
146
|
+
# ---------------------------------------------------------------------------
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
_MINIMAL_COMMUNITY_FIELDS = ("id", "name", "size", "cohesion", "dominant_language")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _minimal_overview(overview: dict[str, Any]) -> dict[str, Any]:
|
|
153
|
+
"""Compress overview for ``detail_level="minimal"``.
|
|
154
|
+
|
|
155
|
+
The full overview can exceed 600KB on medium repos because it embeds
|
|
156
|
+
every community's member list and every individual cross-community
|
|
157
|
+
edge. Minimal mode drops member lists and aggregates the edge list
|
|
158
|
+
to one row per community pair with a count and the top edge kinds —
|
|
159
|
+
enough to spot coupling smells without exploding token budgets.
|
|
160
|
+
"""
|
|
161
|
+
communities = [
|
|
162
|
+
{k: c[k] for k in _MINIMAL_COMMUNITY_FIELDS if k in c}
|
|
163
|
+
for c in overview.get("communities", [])
|
|
164
|
+
]
|
|
165
|
+
id_to_name = {c["id"]: c["name"] for c in communities if "id" in c}
|
|
166
|
+
|
|
167
|
+
edge_pair_counts: Counter[tuple[int, int]] = Counter()
|
|
168
|
+
edge_pair_kinds: dict[tuple[int, int], Counter[str]] = {}
|
|
169
|
+
for e in overview.get("cross_community_edges", []):
|
|
170
|
+
# Use canonical (low, high) ordering so A↔B and B↔A aggregate together.
|
|
171
|
+
a, b = e["source_community"], e["target_community"]
|
|
172
|
+
pair = (a, b) if a <= b else (b, a)
|
|
173
|
+
edge_pair_counts[pair] += 1
|
|
174
|
+
edge_pair_kinds.setdefault(pair, Counter())[e["edge_kind"]] += 1
|
|
175
|
+
|
|
176
|
+
cross_pairs = [
|
|
177
|
+
{
|
|
178
|
+
"source_community": id_to_name.get(a, f"community-{a}"),
|
|
179
|
+
"target_community": id_to_name.get(b, f"community-{b}"),
|
|
180
|
+
"edge_count": count,
|
|
181
|
+
"top_kinds": [k for k, _ in edge_pair_kinds[(a, b)].most_common(3)],
|
|
182
|
+
}
|
|
183
|
+
for (a, b), count in edge_pair_counts.most_common()
|
|
184
|
+
]
|
|
185
|
+
return {
|
|
186
|
+
"communities": communities,
|
|
187
|
+
"cross_community_edges": cross_pairs,
|
|
188
|
+
"warnings": overview.get("warnings", []),
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_architecture_overview_func(
|
|
193
|
+
repo_root: str | None = None,
|
|
194
|
+
detail_level: str = "minimal",
|
|
195
|
+
) -> dict[str, Any]:
|
|
196
|
+
"""Generate an architecture overview based on community structure.
|
|
197
|
+
|
|
198
|
+
[EXPLORE] Builds a high-level view of the codebase architecture by
|
|
199
|
+
analyzing community boundaries and cross-community coupling.
|
|
200
|
+
Includes warnings for high coupling between communities.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
204
|
+
detail_level: "minimal" (default) drops community member lists
|
|
205
|
+
and aggregates edges to one row per community pair
|
|
206
|
+
(typical reduction: 600KB -> <5KB);
|
|
207
|
+
"standard" returns the full overview including
|
|
208
|
+
per-edge cross-community detail.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Architecture overview with communities, cross-community edges,
|
|
212
|
+
and warnings.
|
|
213
|
+
"""
|
|
214
|
+
store, root = _get_store(repo_root)
|
|
215
|
+
try:
|
|
216
|
+
full_overview = get_architecture_overview(store)
|
|
217
|
+
overview = full_overview
|
|
218
|
+
if detail_level == "minimal":
|
|
219
|
+
overview = _minimal_overview(full_overview)
|
|
220
|
+
n_communities = len(overview["communities"])
|
|
221
|
+
n_cross = len(overview["cross_community_edges"])
|
|
222
|
+
n_warnings = len(overview["warnings"])
|
|
223
|
+
cross_label = (
|
|
224
|
+
"community pairs"
|
|
225
|
+
if detail_level == "minimal"
|
|
226
|
+
else "cross-community edges"
|
|
227
|
+
)
|
|
228
|
+
result = {
|
|
229
|
+
"status": "ok",
|
|
230
|
+
"summary": (
|
|
231
|
+
f"Architecture: {n_communities} communities, "
|
|
232
|
+
f"{n_cross} {cross_label}, "
|
|
233
|
+
f"{n_warnings} warning(s)"
|
|
234
|
+
),
|
|
235
|
+
**overview,
|
|
236
|
+
}
|
|
237
|
+
result["_hints"] = generate_hints(
|
|
238
|
+
"get_architecture_overview", result, get_session()
|
|
239
|
+
)
|
|
240
|
+
if detail_level == "minimal":
|
|
241
|
+
attach_context_savings(result, original_context=full_overview)
|
|
242
|
+
return result
|
|
243
|
+
except Exception as exc:
|
|
244
|
+
return {"status": "error", "error": str(exc)}
|
|
245
|
+
finally:
|
|
246
|
+
store.close()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Tool: get_minimal_context — ultra-compact context for token-efficient workflows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sqlite3
|
|
7
|
+
import subprocess
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from ._common import _get_store, compact_response
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _has_git_changes(root: Path, base: str) -> bool:
|
|
17
|
+
"""Quick check for uncommitted or diffed changes."""
|
|
18
|
+
try:
|
|
19
|
+
result = subprocess.run(
|
|
20
|
+
["git", "diff", "--name-only", base, "--"],
|
|
21
|
+
capture_output=True, stdin=subprocess.DEVNULL, text=True,
|
|
22
|
+
cwd=str(root), timeout=10,
|
|
23
|
+
)
|
|
24
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
25
|
+
return True
|
|
26
|
+
# Also check staged/unstaged
|
|
27
|
+
result2 = subprocess.run(
|
|
28
|
+
["git", "status", "--porcelain"],
|
|
29
|
+
capture_output=True, stdin=subprocess.DEVNULL, text=True,
|
|
30
|
+
cwd=str(root), timeout=10,
|
|
31
|
+
)
|
|
32
|
+
return bool(result2.stdout.strip())
|
|
33
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_minimal_context(
|
|
38
|
+
task: str = "",
|
|
39
|
+
changed_files: list[str] | None = None,
|
|
40
|
+
repo_root: str | None = None,
|
|
41
|
+
base: str = "HEAD~1",
|
|
42
|
+
) -> dict[str, Any]:
|
|
43
|
+
"""Return minimum context an agent needs to start any task (~100 tokens).
|
|
44
|
+
|
|
45
|
+
Combines graph stats, top communities, top flows, risk score,
|
|
46
|
+
and suggested next tools into an ultra-compact response.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
task: Natural language description of what the agent is doing
|
|
50
|
+
(e.g. "review PR #42", "debug login timeout").
|
|
51
|
+
changed_files: Explicit changed files. Auto-detected from git if None.
|
|
52
|
+
repo_root: Repository root path. Auto-detected if None.
|
|
53
|
+
base: Git ref for diff comparison.
|
|
54
|
+
"""
|
|
55
|
+
store, root = _get_store(repo_root)
|
|
56
|
+
try:
|
|
57
|
+
# 1. Quick stats
|
|
58
|
+
stats = store.get_stats()
|
|
59
|
+
|
|
60
|
+
# 2. Risk from changed files
|
|
61
|
+
risk = "unknown"
|
|
62
|
+
risk_score = 0.0
|
|
63
|
+
top_affected: list[str] = []
|
|
64
|
+
test_gap_count = 0
|
|
65
|
+
if changed_files or _has_git_changes(root, base):
|
|
66
|
+
try:
|
|
67
|
+
from ..changes import analyze_changes
|
|
68
|
+
from ..incremental import get_changed_files as _get_changed
|
|
69
|
+
|
|
70
|
+
files = changed_files
|
|
71
|
+
if not files:
|
|
72
|
+
files = _get_changed(root, base)
|
|
73
|
+
if files:
|
|
74
|
+
abs_files = [str(root / f) for f in files]
|
|
75
|
+
analysis = analyze_changes(
|
|
76
|
+
store, abs_files, repo_root=str(root), base=base,
|
|
77
|
+
)
|
|
78
|
+
risk_score = analysis.get("risk_score", 0.0)
|
|
79
|
+
risk = (
|
|
80
|
+
"high" if risk_score > 0.7
|
|
81
|
+
else "medium" if risk_score > 0.4
|
|
82
|
+
else "low"
|
|
83
|
+
)
|
|
84
|
+
top_affected = [
|
|
85
|
+
f.get("name", "")
|
|
86
|
+
for f in analysis.get("changed_functions", [])[:5]
|
|
87
|
+
]
|
|
88
|
+
test_gap_count = len(analysis.get("test_gaps", []))
|
|
89
|
+
except (
|
|
90
|
+
ImportError, OSError, ValueError,
|
|
91
|
+
sqlite3.Error, subprocess.SubprocessError,
|
|
92
|
+
):
|
|
93
|
+
logger.debug("Risk analysis failed in get_minimal_context", exc_info=True)
|
|
94
|
+
|
|
95
|
+
# 3. Top 3 communities
|
|
96
|
+
communities: list[str] = []
|
|
97
|
+
try:
|
|
98
|
+
rows = store._conn.execute(
|
|
99
|
+
"SELECT name FROM communities ORDER BY size DESC LIMIT 3"
|
|
100
|
+
).fetchall()
|
|
101
|
+
communities = [r[0] for r in rows]
|
|
102
|
+
except sqlite3.OperationalError: # nosec B110 — table may not exist yet
|
|
103
|
+
logger.debug("communities table not yet populated")
|
|
104
|
+
|
|
105
|
+
# 4. Top 3 critical flows
|
|
106
|
+
flows: list[str] = []
|
|
107
|
+
try:
|
|
108
|
+
rows = store._conn.execute(
|
|
109
|
+
"SELECT name FROM flows ORDER BY criticality DESC LIMIT 3"
|
|
110
|
+
).fetchall()
|
|
111
|
+
flows = [r[0] for r in rows]
|
|
112
|
+
except sqlite3.OperationalError: # nosec B110 — table may not exist yet
|
|
113
|
+
logger.debug("flows table not yet populated")
|
|
114
|
+
|
|
115
|
+
# 5. Suggest next tools based on task keywords
|
|
116
|
+
task_lower = task.lower()
|
|
117
|
+
if any(w in task_lower for w in ("review", "pr", "merge", "diff")):
|
|
118
|
+
suggestions = ["detect_changes", "get_affected_flows", "get_review_context"]
|
|
119
|
+
elif any(w in task_lower for w in ("debug", "bug", "error", "fix")):
|
|
120
|
+
suggestions = ["semantic_search_nodes", "query_graph", "get_flow"]
|
|
121
|
+
elif any(w in task_lower for w in ("refactor", "rename", "dead", "clean")):
|
|
122
|
+
suggestions = ["refactor", "find_large_functions", "get_architecture_overview"]
|
|
123
|
+
elif any(w in task_lower for w in ("onboard", "understand", "explore", "arch")):
|
|
124
|
+
suggestions = [
|
|
125
|
+
"get_architecture_overview", "list_communities", "list_flows",
|
|
126
|
+
]
|
|
127
|
+
else:
|
|
128
|
+
suggestions = [
|
|
129
|
+
"detect_changes", "semantic_search_nodes",
|
|
130
|
+
"get_architecture_overview",
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
# Build summary
|
|
134
|
+
summary_parts = [
|
|
135
|
+
f"{stats.total_nodes} nodes, {stats.total_edges} edges"
|
|
136
|
+
f" across {stats.files_count} files.",
|
|
137
|
+
]
|
|
138
|
+
if risk != "unknown":
|
|
139
|
+
summary_parts.append(f"Risk: {risk} ({risk_score:.2f}).")
|
|
140
|
+
if test_gap_count:
|
|
141
|
+
summary_parts.append(f"{test_gap_count} test gaps.")
|
|
142
|
+
|
|
143
|
+
return compact_response(
|
|
144
|
+
summary=" ".join(summary_parts),
|
|
145
|
+
key_entities=top_affected or None,
|
|
146
|
+
risk=risk,
|
|
147
|
+
communities=communities or None,
|
|
148
|
+
flows_affected=flows or None,
|
|
149
|
+
next_tool_suggestions=suggestions,
|
|
150
|
+
)
|
|
151
|
+
finally:
|
|
152
|
+
store.close()
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""Tools 7, 8, 19, 20: embed_graph, get_docs_section, wiki tools."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..embeddings import EmbeddingStore, embed_all_nodes
|
|
10
|
+
from ..incremental import find_project_root, get_db_path
|
|
11
|
+
from ._common import _get_store, _resolve_root, _validate_repo_root
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# ---------------------------------------------------------------------------
|
|
16
|
+
# Tool 7: embed_graph
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def embed_graph(
|
|
21
|
+
repo_root: str | None = None,
|
|
22
|
+
model: str | None = None,
|
|
23
|
+
provider: str | None = None,
|
|
24
|
+
) -> dict[str, Any]:
|
|
25
|
+
"""Compute vector embeddings for all graph nodes to enable semantic search.
|
|
26
|
+
|
|
27
|
+
Requires: ``pip install code-review-graph[embeddings]`` (local provider only;
|
|
28
|
+
cloud providers like ``openai`` / ``google`` / ``minimax`` use stdlib ``urllib``).
|
|
29
|
+
Default model: all-MiniLM-L6-v2. Override via ``model`` param or
|
|
30
|
+
CRG_EMBEDDING_MODEL env var.
|
|
31
|
+
Changing the model or provider re-embeds all nodes automatically.
|
|
32
|
+
|
|
33
|
+
Only embeds nodes that don't already have up-to-date embeddings.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
37
|
+
model: Embedding model name. For local: HuggingFace ID or path;
|
|
38
|
+
for openai: model ID (e.g. ``text-embedding-3-small``);
|
|
39
|
+
for google: Gemini model ID. Falls back to
|
|
40
|
+
CRG_EMBEDDING_MODEL / CRG_OPENAI_MODEL env vars as appropriate.
|
|
41
|
+
provider: Provider name: ``local`` (default), ``openai``, ``google``,
|
|
42
|
+
or ``minimax``. ``openai`` requires CRG_OPENAI_BASE_URL +
|
|
43
|
+
CRG_OPENAI_API_KEY + CRG_OPENAI_MODEL env vars and accepts
|
|
44
|
+
any OpenAI-compatible endpoint (real OpenAI, Azure, new-api,
|
|
45
|
+
LiteLLM, vLLM, LocalAI, Ollama openai-mode, etc.).
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Number of nodes embedded and total embedding count.
|
|
49
|
+
"""
|
|
50
|
+
store, root = _get_store(repo_root)
|
|
51
|
+
try:
|
|
52
|
+
db_path = get_db_path(root)
|
|
53
|
+
try:
|
|
54
|
+
emb_store = EmbeddingStore(db_path, provider=provider, model=model)
|
|
55
|
+
except ValueError as exc:
|
|
56
|
+
# Unknown provider name or missing provider env vars — surface
|
|
57
|
+
# as a structured error rather than a traceback.
|
|
58
|
+
logger.error("embed_graph: %s", exc)
|
|
59
|
+
return {"status": "error", "error": str(exc)}
|
|
60
|
+
try:
|
|
61
|
+
if not emb_store.available:
|
|
62
|
+
if provider in ("openai", "google", "minimax"):
|
|
63
|
+
err = (
|
|
64
|
+
f"The '{provider}' embedding provider is not available. "
|
|
65
|
+
"Check the required environment variables "
|
|
66
|
+
"(see README and `get_provider()` docstring) and that "
|
|
67
|
+
"the endpoint is reachable."
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
err = (
|
|
71
|
+
"The local embedding provider needs sentence-transformers. "
|
|
72
|
+
"Install with: pip install code-review-graph[embeddings] — "
|
|
73
|
+
"or switch provider to 'openai' / 'google' / 'minimax'."
|
|
74
|
+
)
|
|
75
|
+
return {"status": "error", "error": err}
|
|
76
|
+
|
|
77
|
+
newly_embedded = embed_all_nodes(store, emb_store)
|
|
78
|
+
total = emb_store.count()
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
"status": "ok",
|
|
82
|
+
"summary": (
|
|
83
|
+
f"Embedded {newly_embedded} new node(s). "
|
|
84
|
+
f"Total embeddings: {total}. "
|
|
85
|
+
"Semantic search is now active."
|
|
86
|
+
),
|
|
87
|
+
"newly_embedded": newly_embedded,
|
|
88
|
+
"total_embeddings": total,
|
|
89
|
+
}
|
|
90
|
+
finally:
|
|
91
|
+
emb_store.close()
|
|
92
|
+
finally:
|
|
93
|
+
store.close()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# Tool 8: get_docs_section
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_docs_section(
|
|
102
|
+
section_name: str, repo_root: str | None = None
|
|
103
|
+
) -> dict[str, Any]:
|
|
104
|
+
"""Return a specific section from the LLM-optimized reference.
|
|
105
|
+
|
|
106
|
+
Used by skills and Claude Code to load only the exact documentation
|
|
107
|
+
section needed, keeping token usage minimal (90%+ savings).
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
section_name: Exact section name. One of: usage, review-delta,
|
|
111
|
+
review-pr, commands, legal, watch, embeddings,
|
|
112
|
+
languages, troubleshooting.
|
|
113
|
+
repo_root: Repository root path. Auto-detected from current
|
|
114
|
+
directory if omitted.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
The section content, or an error if not found.
|
|
118
|
+
"""
|
|
119
|
+
import re as _re
|
|
120
|
+
|
|
121
|
+
search_roots: list[Path] = []
|
|
122
|
+
|
|
123
|
+
# Wheel install: docs are packaged inside code_review_graph/docs.
|
|
124
|
+
in_pkg_docs = (
|
|
125
|
+
Path(__file__).parent.parent
|
|
126
|
+
/ "docs"
|
|
127
|
+
/ "LLM-OPTIMIZED-REFERENCE.md"
|
|
128
|
+
)
|
|
129
|
+
if repo_root:
|
|
130
|
+
try:
|
|
131
|
+
search_roots.append(_validate_repo_root(Path(repo_root)))
|
|
132
|
+
except ValueError:
|
|
133
|
+
pass
|
|
134
|
+
elif in_pkg_docs.exists():
|
|
135
|
+
in_pkg_root = in_pkg_docs.parent.parent
|
|
136
|
+
search_roots.append(in_pkg_root)
|
|
137
|
+
|
|
138
|
+
if not repo_root:
|
|
139
|
+
project_root = find_project_root()
|
|
140
|
+
if project_root not in search_roots:
|
|
141
|
+
search_roots.append(project_root)
|
|
142
|
+
|
|
143
|
+
# Editable/source-tree fallback: docs live next to code_review_graph/.
|
|
144
|
+
pkg_docs = (
|
|
145
|
+
Path(__file__).parent.parent.parent
|
|
146
|
+
/ "docs"
|
|
147
|
+
/ "LLM-OPTIMIZED-REFERENCE.md"
|
|
148
|
+
)
|
|
149
|
+
if pkg_docs.exists():
|
|
150
|
+
pkg_root = pkg_docs.parent.parent
|
|
151
|
+
if pkg_root not in search_roots:
|
|
152
|
+
search_roots.append(pkg_root)
|
|
153
|
+
|
|
154
|
+
for search_root in search_roots:
|
|
155
|
+
candidate = search_root / "docs" / "LLM-OPTIMIZED-REFERENCE.md"
|
|
156
|
+
if candidate.exists():
|
|
157
|
+
content = candidate.read_text(encoding="utf-8", errors="replace")
|
|
158
|
+
match = _re.search(
|
|
159
|
+
rf'<section name="{_re.escape(section_name)}">'
|
|
160
|
+
r"(.*?)</section>",
|
|
161
|
+
content,
|
|
162
|
+
_re.DOTALL | _re.IGNORECASE,
|
|
163
|
+
)
|
|
164
|
+
if match:
|
|
165
|
+
return {
|
|
166
|
+
"status": "ok",
|
|
167
|
+
"section": section_name,
|
|
168
|
+
"content": match.group(1).strip(),
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
available = [
|
|
172
|
+
"usage", "review-delta", "review-pr", "commands",
|
|
173
|
+
"legal", "watch", "embeddings", "languages", "troubleshooting",
|
|
174
|
+
]
|
|
175
|
+
return {
|
|
176
|
+
"status": "not_found",
|
|
177
|
+
"error": (
|
|
178
|
+
f"Section '{section_name}' not found. "
|
|
179
|
+
f"Available: {', '.join(available)}"
|
|
180
|
+
),
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
# Tool 19: generate_wiki [DOCS]
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def generate_wiki_func(
|
|
190
|
+
repo_root: str | None = None,
|
|
191
|
+
force: bool = False,
|
|
192
|
+
) -> dict[str, Any]:
|
|
193
|
+
"""Generate a markdown wiki from the community structure.
|
|
194
|
+
|
|
195
|
+
[DOCS] Creates a wiki page for each detected community and an index
|
|
196
|
+
page. Pages are written to ``.code-review-graph/wiki/`` inside the
|
|
197
|
+
repository. Only regenerates pages whose content has changed unless
|
|
198
|
+
force=True.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
202
|
+
force: If True, regenerate all pages even if content is unchanged.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Status with pages_generated, pages_updated, pages_unchanged counts.
|
|
206
|
+
"""
|
|
207
|
+
from ..incremental import get_data_dir
|
|
208
|
+
from ..wiki import generate_wiki
|
|
209
|
+
|
|
210
|
+
store, root = _get_store(repo_root)
|
|
211
|
+
try:
|
|
212
|
+
wiki_dir = get_data_dir(root) / "wiki"
|
|
213
|
+
result = generate_wiki(store, wiki_dir, force=force)
|
|
214
|
+
total = (
|
|
215
|
+
result["pages_generated"]
|
|
216
|
+
+ result["pages_updated"]
|
|
217
|
+
+ result["pages_unchanged"]
|
|
218
|
+
)
|
|
219
|
+
return {
|
|
220
|
+
"status": "ok",
|
|
221
|
+
"summary": (
|
|
222
|
+
f"Wiki generated: {result['pages_generated']} new, "
|
|
223
|
+
f"{result['pages_updated']} updated, "
|
|
224
|
+
f"{result['pages_unchanged']} unchanged "
|
|
225
|
+
f"({total} total pages)"
|
|
226
|
+
),
|
|
227
|
+
"wiki_dir": str(wiki_dir),
|
|
228
|
+
**result,
|
|
229
|
+
}
|
|
230
|
+
except Exception as exc:
|
|
231
|
+
return {"status": "error", "error": str(exc)}
|
|
232
|
+
finally:
|
|
233
|
+
store.close()
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
# Tool 20: get_wiki_page [DOCS]
|
|
238
|
+
# ---------------------------------------------------------------------------
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def get_wiki_page_func(
|
|
242
|
+
community_name: str,
|
|
243
|
+
repo_root: str | None = None,
|
|
244
|
+
) -> dict[str, Any]:
|
|
245
|
+
"""Retrieve a specific wiki page by community name.
|
|
246
|
+
|
|
247
|
+
[DOCS] Returns the markdown content of the wiki page for the given
|
|
248
|
+
community. The wiki must have been generated first via generate_wiki.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
community_name: Community name to look up (slugified for filename).
|
|
252
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Page content or not_found status.
|
|
256
|
+
"""
|
|
257
|
+
from ..incremental import get_data_dir
|
|
258
|
+
from ..wiki import get_wiki_page
|
|
259
|
+
|
|
260
|
+
root = _resolve_root(repo_root)
|
|
261
|
+
wiki_dir = get_data_dir(root) / "wiki"
|
|
262
|
+
content = get_wiki_page(wiki_dir, community_name)
|
|
263
|
+
if content is None:
|
|
264
|
+
return {
|
|
265
|
+
"status": "not_found",
|
|
266
|
+
"summary": f"No wiki page found for '{community_name}'.",
|
|
267
|
+
}
|
|
268
|
+
return {
|
|
269
|
+
"status": "ok",
|
|
270
|
+
"summary": (
|
|
271
|
+
f"Wiki page for '{community_name}' ({len(content)} chars)"
|
|
272
|
+
),
|
|
273
|
+
"content": content,
|
|
274
|
+
}
|