code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
"""Tool 1: build_or_update_graph + run_postprocess."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sqlite3
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from ..incremental import full_build, incremental_update
|
|
11
|
+
from ._common import _get_store
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _run_postprocess(
|
|
17
|
+
store: Any,
|
|
18
|
+
build_result: dict[str, Any],
|
|
19
|
+
postprocess: str,
|
|
20
|
+
full_rebuild: bool = False,
|
|
21
|
+
changed_files: list[str] | None = None,
|
|
22
|
+
) -> list[str]:
|
|
23
|
+
"""Run post-build steps based on *postprocess* level.
|
|
24
|
+
|
|
25
|
+
When *full_rebuild* is False and *changed_files* are available,
|
|
26
|
+
uses incremental flow/community detection for faster updates.
|
|
27
|
+
|
|
28
|
+
Returns a list of warning strings (empty on success).
|
|
29
|
+
"""
|
|
30
|
+
warnings: list[str] = []
|
|
31
|
+
build_result["postprocess_level"] = postprocess
|
|
32
|
+
|
|
33
|
+
if postprocess == "none":
|
|
34
|
+
return warnings
|
|
35
|
+
|
|
36
|
+
# -- Signatures + FTS (fast, always run unless "none") --
|
|
37
|
+
try:
|
|
38
|
+
rows = store.get_nodes_without_signature()
|
|
39
|
+
for row in rows:
|
|
40
|
+
node_id, name, kind, params, ret = (
|
|
41
|
+
row[0],
|
|
42
|
+
row[1],
|
|
43
|
+
row[2],
|
|
44
|
+
row[3],
|
|
45
|
+
row[4],
|
|
46
|
+
)
|
|
47
|
+
if kind in ("Function", "Test"):
|
|
48
|
+
sig = f"def {name}({params or ''})"
|
|
49
|
+
if ret:
|
|
50
|
+
sig += f" -> {ret}"
|
|
51
|
+
elif kind == "Class":
|
|
52
|
+
sig = f"class {name}"
|
|
53
|
+
else:
|
|
54
|
+
sig = name
|
|
55
|
+
store.update_node_signature(node_id, sig[:512])
|
|
56
|
+
store.commit()
|
|
57
|
+
build_result["signatures_updated"] = True
|
|
58
|
+
except (sqlite3.OperationalError, TypeError, KeyError) as e:
|
|
59
|
+
logger.warning("Signature computation failed: %s", e)
|
|
60
|
+
warnings.append(f"Signature computation failed: {type(e).__name__}: {e}")
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
from code_review_graph.search import rebuild_fts_index
|
|
64
|
+
|
|
65
|
+
fts_count = rebuild_fts_index(store)
|
|
66
|
+
build_result["fts_indexed"] = fts_count
|
|
67
|
+
build_result["fts_rebuilt"] = True
|
|
68
|
+
except (sqlite3.OperationalError, ImportError) as e:
|
|
69
|
+
logger.warning("FTS index rebuild failed: %s", e)
|
|
70
|
+
warnings.append(f"FTS index rebuild failed: {type(e).__name__}: {e}")
|
|
71
|
+
|
|
72
|
+
if postprocess == "minimal":
|
|
73
|
+
return warnings
|
|
74
|
+
|
|
75
|
+
# -- Expensive: flows + communities (only for "full") --
|
|
76
|
+
use_incremental = not full_rebuild and bool(changed_files)
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
if use_incremental:
|
|
80
|
+
from code_review_graph.flows import incremental_trace_flows
|
|
81
|
+
|
|
82
|
+
count = incremental_trace_flows(store, changed_files)
|
|
83
|
+
else:
|
|
84
|
+
from code_review_graph.flows import store_flows as _store_flows
|
|
85
|
+
from code_review_graph.flows import trace_flows as _trace_flows
|
|
86
|
+
|
|
87
|
+
flows = _trace_flows(store)
|
|
88
|
+
count = _store_flows(store, flows)
|
|
89
|
+
build_result["flows_detected"] = count
|
|
90
|
+
except (sqlite3.OperationalError, ImportError) as e:
|
|
91
|
+
logger.warning("Flow detection failed: %s", e)
|
|
92
|
+
warnings.append(f"Flow detection failed: {type(e).__name__}: {e}")
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
if use_incremental:
|
|
96
|
+
from code_review_graph.communities import (
|
|
97
|
+
incremental_detect_communities,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
count = incremental_detect_communities(store, changed_files)
|
|
101
|
+
else:
|
|
102
|
+
from code_review_graph.communities import (
|
|
103
|
+
detect_communities as _detect_communities,
|
|
104
|
+
)
|
|
105
|
+
from code_review_graph.communities import (
|
|
106
|
+
store_communities as _store_communities,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
comms = _detect_communities(store)
|
|
110
|
+
count = _store_communities(store, comms)
|
|
111
|
+
build_result["communities_detected"] = count
|
|
112
|
+
except (sqlite3.OperationalError, ImportError) as e:
|
|
113
|
+
logger.warning("Community detection failed: %s", e)
|
|
114
|
+
warnings.append(f"Community detection failed: {type(e).__name__}: {e}")
|
|
115
|
+
|
|
116
|
+
# -- Compute pre-computed summary tables --
|
|
117
|
+
try:
|
|
118
|
+
_compute_summaries(store)
|
|
119
|
+
build_result["summaries_computed"] = True
|
|
120
|
+
except (sqlite3.OperationalError, Exception) as e:
|
|
121
|
+
logger.warning("Summary computation failed: %s", e)
|
|
122
|
+
warnings.append(f"Summary computation failed: {type(e).__name__}: {e}")
|
|
123
|
+
|
|
124
|
+
store.set_metadata(
|
|
125
|
+
"last_postprocessed_at",
|
|
126
|
+
time.strftime("%Y-%m-%dT%H:%M:%S"),
|
|
127
|
+
)
|
|
128
|
+
store.set_metadata("postprocess_level", postprocess)
|
|
129
|
+
|
|
130
|
+
return warnings
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _compute_summaries(store: Any) -> None:
|
|
134
|
+
"""Populate community_summaries, flow_snapshots, and risk_index tables.
|
|
135
|
+
|
|
136
|
+
Uses batched aggregate queries and in-memory grouping instead of
|
|
137
|
+
per-community/per-node loops. On graphs with ~100k edges this
|
|
138
|
+
reduces the work from ``O(nodes + communities)`` SQLite round trips
|
|
139
|
+
each doing their own B-tree scan to a handful of ``GROUP BY``
|
|
140
|
+
queries, turning what used to be an effective hang into a few
|
|
141
|
+
seconds.
|
|
142
|
+
|
|
143
|
+
Each summary block (community_summaries, flow_snapshots, risk_index)
|
|
144
|
+
is wrapped in an explicit transaction so the DELETE + INSERT sequence
|
|
145
|
+
is atomic. If a table doesn't exist yet the block is silently skipped.
|
|
146
|
+
"""
|
|
147
|
+
import json as _json
|
|
148
|
+
from collections import defaultdict
|
|
149
|
+
from os.path import commonprefix
|
|
150
|
+
|
|
151
|
+
conn = store._conn
|
|
152
|
+
|
|
153
|
+
# -- community_summaries --
|
|
154
|
+
try:
|
|
155
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
156
|
+
conn.execute("DELETE FROM community_summaries")
|
|
157
|
+
|
|
158
|
+
# Pre-compute per-qualified_name edge counts once. Previously
|
|
159
|
+
# this section ran a per-community triple-JOIN aggregate query
|
|
160
|
+
# (nodes LEFT JOIN edges LEFT JOIN edges), which on graphs with
|
|
161
|
+
# thousands of communities was the second-biggest hang.
|
|
162
|
+
edge_counts: dict[str, int] = defaultdict(int)
|
|
163
|
+
for row in conn.execute(
|
|
164
|
+
"SELECT source_qualified, COUNT(*) FROM edges GROUP BY source_qualified"
|
|
165
|
+
):
|
|
166
|
+
edge_counts[row[0]] += row[1]
|
|
167
|
+
for row in conn.execute(
|
|
168
|
+
"SELECT target_qualified, COUNT(*) FROM edges GROUP BY target_qualified"
|
|
169
|
+
):
|
|
170
|
+
edge_counts[row[0]] += row[1]
|
|
171
|
+
|
|
172
|
+
# Group non-File nodes per community for top-symbol selection.
|
|
173
|
+
nodes_by_comm: dict[int, list[tuple[str, int]]] = defaultdict(list)
|
|
174
|
+
for row in conn.execute(
|
|
175
|
+
"SELECT community_id, name, qualified_name FROM nodes "
|
|
176
|
+
"WHERE community_id IS NOT NULL AND kind != 'File'"
|
|
177
|
+
):
|
|
178
|
+
cid, name, qn = row[0], row[1], row[2]
|
|
179
|
+
nodes_by_comm[cid].append((name, edge_counts.get(qn, 0)))
|
|
180
|
+
|
|
181
|
+
# Group distinct file paths per community (preserving first-seen
|
|
182
|
+
# order for stable output, same as DISTINCT in the old query).
|
|
183
|
+
files_by_comm: dict[int, list[str]] = defaultdict(list)
|
|
184
|
+
seen_files: dict[int, set[str]] = defaultdict(set)
|
|
185
|
+
for row in conn.execute(
|
|
186
|
+
"SELECT community_id, file_path FROM nodes WHERE community_id IS NOT NULL"
|
|
187
|
+
):
|
|
188
|
+
cid, fp = row[0], row[1]
|
|
189
|
+
if fp not in seen_files[cid]:
|
|
190
|
+
seen_files[cid].add(fp)
|
|
191
|
+
files_by_comm[cid].append(fp)
|
|
192
|
+
|
|
193
|
+
community_rows = conn.execute(
|
|
194
|
+
"SELECT id, name, size, dominant_language FROM communities"
|
|
195
|
+
).fetchall()
|
|
196
|
+
for r in community_rows:
|
|
197
|
+
cid, cname, csize, clang = r[0], r[1], r[2], r[3]
|
|
198
|
+
|
|
199
|
+
# Top 5 symbols by total edge count (in + out). Python's
|
|
200
|
+
# sorted() is stable so ties break by original row order.
|
|
201
|
+
members = sorted(
|
|
202
|
+
nodes_by_comm.get(cid, []),
|
|
203
|
+
key=lambda nc: nc[1],
|
|
204
|
+
reverse=True,
|
|
205
|
+
)
|
|
206
|
+
key_syms = _json.dumps([m[0] for m in members[:5]])
|
|
207
|
+
|
|
208
|
+
# Auto-generate purpose from common file path prefix.
|
|
209
|
+
paths = files_by_comm.get(cid, [])[:20]
|
|
210
|
+
purpose = ""
|
|
211
|
+
if paths:
|
|
212
|
+
prefix = commonprefix(paths)
|
|
213
|
+
if "/" in prefix:
|
|
214
|
+
purpose = prefix.rsplit("/", 1)[0].split("/")[-1] if "/" in prefix else ""
|
|
215
|
+
|
|
216
|
+
conn.execute(
|
|
217
|
+
"INSERT OR REPLACE INTO community_summaries "
|
|
218
|
+
"(community_id, name, purpose, key_symbols, size, dominant_language) "
|
|
219
|
+
"VALUES (?, ?, ?, ?, ?, ?)",
|
|
220
|
+
(cid, cname, purpose, key_syms, csize, clang or ""),
|
|
221
|
+
)
|
|
222
|
+
conn.commit()
|
|
223
|
+
except sqlite3.OperationalError:
|
|
224
|
+
conn.rollback() # Table may not exist yet
|
|
225
|
+
|
|
226
|
+
# -- flow_snapshots --
|
|
227
|
+
try:
|
|
228
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
229
|
+
conn.execute("DELETE FROM flow_snapshots")
|
|
230
|
+
flow_rows = conn.execute(
|
|
231
|
+
"SELECT id, name, entry_point_id, criticality, node_count, "
|
|
232
|
+
"file_count, path_json FROM flows"
|
|
233
|
+
).fetchall()
|
|
234
|
+
|
|
235
|
+
# Collect every node id referenced by any flow, then fetch
|
|
236
|
+
# their qualified_names in one batched query instead of per-flow
|
|
237
|
+
# per-node lookups.
|
|
238
|
+
needed_ids: set[int] = set()
|
|
239
|
+
parsed_paths: list[list[int]] = []
|
|
240
|
+
for r in flow_rows:
|
|
241
|
+
needed_ids.add(r[2]) # entry_point_id
|
|
242
|
+
path_ids = _json.loads(r[6]) if r[6] else []
|
|
243
|
+
parsed_paths.append(path_ids)
|
|
244
|
+
# Match the old semantics: entry + up to 3 intermediates + last
|
|
245
|
+
for nid in path_ids[1:4]:
|
|
246
|
+
needed_ids.add(nid)
|
|
247
|
+
if path_ids:
|
|
248
|
+
needed_ids.add(path_ids[-1])
|
|
249
|
+
|
|
250
|
+
id_to_name: dict[int, str] = {}
|
|
251
|
+
if needed_ids:
|
|
252
|
+
# Batch the IN clause in chunks of 450 to stay under SQLite's
|
|
253
|
+
# default SQLITE_MAX_VARIABLE_NUMBER (999), same strategy as
|
|
254
|
+
# GraphStore.get_edges_among.
|
|
255
|
+
id_list = list(needed_ids)
|
|
256
|
+
for i in range(0, len(id_list), 450):
|
|
257
|
+
batch = id_list[i : i + 450]
|
|
258
|
+
placeholders = ",".join("?" for _ in batch)
|
|
259
|
+
node_rows = conn.execute(
|
|
260
|
+
f"SELECT id, qualified_name FROM nodes WHERE id IN ({placeholders})", # nosec B608
|
|
261
|
+
batch,
|
|
262
|
+
).fetchall()
|
|
263
|
+
for nr in node_rows:
|
|
264
|
+
id_to_name[nr[0]] = nr[1]
|
|
265
|
+
|
|
266
|
+
for r, path_ids in zip(flow_rows, parsed_paths):
|
|
267
|
+
fid, fname, ep_id = r[0], r[1], r[2]
|
|
268
|
+
crit, ncount, fcount = r[3], r[4], r[5]
|
|
269
|
+
ep_name = id_to_name.get(ep_id, str(ep_id))
|
|
270
|
+
critical_path: list[str] = []
|
|
271
|
+
if path_ids:
|
|
272
|
+
critical_path.append(ep_name)
|
|
273
|
+
if len(path_ids) > 2:
|
|
274
|
+
for nid in path_ids[1:4]:
|
|
275
|
+
nm = id_to_name.get(nid)
|
|
276
|
+
if nm:
|
|
277
|
+
critical_path.append(nm)
|
|
278
|
+
if len(path_ids) > 1:
|
|
279
|
+
last = id_to_name.get(path_ids[-1])
|
|
280
|
+
if last and last not in critical_path:
|
|
281
|
+
critical_path.append(last)
|
|
282
|
+
conn.execute(
|
|
283
|
+
"INSERT OR REPLACE INTO flow_snapshots "
|
|
284
|
+
"(flow_id, name, entry_point, critical_path, criticality, "
|
|
285
|
+
"node_count, file_count) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
286
|
+
(fid, fname, ep_name, _json.dumps(critical_path), crit, ncount, fcount),
|
|
287
|
+
)
|
|
288
|
+
conn.commit()
|
|
289
|
+
except sqlite3.OperationalError:
|
|
290
|
+
conn.rollback()
|
|
291
|
+
|
|
292
|
+
# -- risk_index --
|
|
293
|
+
try:
|
|
294
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
295
|
+
conn.execute("DELETE FROM risk_index")
|
|
296
|
+
|
|
297
|
+
# Pre-compute caller and test-coverage counts in two aggregate
|
|
298
|
+
# queries. Previously this section ran two COUNT(*) queries per
|
|
299
|
+
# candidate node; on a ~100k-edge graph with tens of thousands
|
|
300
|
+
# of Function/Class/Test nodes that was the primary hang
|
|
301
|
+
# observed during Godot builds.
|
|
302
|
+
caller_counts: dict[str, int] = {}
|
|
303
|
+
for row in conn.execute(
|
|
304
|
+
"SELECT target_qualified, COUNT(*) FROM edges "
|
|
305
|
+
"WHERE kind = 'CALLS' GROUP BY target_qualified"
|
|
306
|
+
):
|
|
307
|
+
caller_counts[row[0]] = row[1]
|
|
308
|
+
|
|
309
|
+
tested_counts: dict[str, int] = {}
|
|
310
|
+
for row in conn.execute(
|
|
311
|
+
"SELECT source_qualified, COUNT(*) FROM edges "
|
|
312
|
+
"WHERE kind = 'TESTED_BY' GROUP BY source_qualified"
|
|
313
|
+
):
|
|
314
|
+
tested_counts[row[0]] = row[1]
|
|
315
|
+
|
|
316
|
+
risk_nodes = conn.execute(
|
|
317
|
+
"SELECT id, qualified_name, name FROM nodes WHERE kind IN ('Function', 'Class', 'Test')"
|
|
318
|
+
).fetchall()
|
|
319
|
+
security_kw = {
|
|
320
|
+
"auth",
|
|
321
|
+
"login",
|
|
322
|
+
"password",
|
|
323
|
+
"token",
|
|
324
|
+
"session",
|
|
325
|
+
"crypt",
|
|
326
|
+
"secret",
|
|
327
|
+
"credential",
|
|
328
|
+
"permission",
|
|
329
|
+
"sql",
|
|
330
|
+
"execute",
|
|
331
|
+
}
|
|
332
|
+
for n in risk_nodes:
|
|
333
|
+
nid, qn, name = n[0], n[1], n[2]
|
|
334
|
+
caller_count = caller_counts.get(qn, 0)
|
|
335
|
+
tested = tested_counts.get(qn, 0)
|
|
336
|
+
coverage = "tested" if tested > 0 else "untested"
|
|
337
|
+
name_lower = name.lower()
|
|
338
|
+
sec_relevant = 1 if any(kw in name_lower for kw in security_kw) else 0
|
|
339
|
+
risk = 0.0
|
|
340
|
+
if caller_count > 10:
|
|
341
|
+
risk += 0.3
|
|
342
|
+
elif caller_count > 3:
|
|
343
|
+
risk += 0.15
|
|
344
|
+
if coverage == "untested":
|
|
345
|
+
risk += 0.3
|
|
346
|
+
if sec_relevant:
|
|
347
|
+
risk += 0.4
|
|
348
|
+
risk = min(risk, 1.0)
|
|
349
|
+
conn.execute(
|
|
350
|
+
"INSERT OR REPLACE INTO risk_index "
|
|
351
|
+
"(node_id, qualified_name, risk_score, caller_count, "
|
|
352
|
+
"test_coverage, security_relevant, last_computed) "
|
|
353
|
+
"VALUES (?, ?, ?, ?, ?, ?, datetime('now'))",
|
|
354
|
+
(nid, qn, risk, caller_count, coverage, sec_relevant),
|
|
355
|
+
)
|
|
356
|
+
conn.commit()
|
|
357
|
+
except sqlite3.OperationalError:
|
|
358
|
+
conn.rollback()
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def build_or_update_graph(
|
|
362
|
+
full_rebuild: bool = False,
|
|
363
|
+
repo_root: str | None = None,
|
|
364
|
+
base: str = "HEAD~1",
|
|
365
|
+
postprocess: str = "full",
|
|
366
|
+
recurse_submodules: bool | None = None,
|
|
367
|
+
) -> dict[str, Any]:
|
|
368
|
+
"""Build or incrementally update the code knowledge graph.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
full_rebuild: If True, re-parse every file. If False (default),
|
|
372
|
+
only re-parse files changed since ``base``.
|
|
373
|
+
repo_root: Path to the repository root. Auto-detected if omitted.
|
|
374
|
+
base: Git ref for incremental diff (default: HEAD~1).
|
|
375
|
+
postprocess: Post-processing level after build:
|
|
376
|
+
``"full"`` (default) — signatures, FTS, flows, communities.
|
|
377
|
+
``"minimal"`` — signatures + FTS only (fast, keeps search working).
|
|
378
|
+
``"none"`` — skip all post-processing (raw parse only).
|
|
379
|
+
recurse_submodules: If True, include files from git submodules
|
|
380
|
+
via ``git ls-files --recurse-submodules``. When None
|
|
381
|
+
(default), falls back to the CRG_RECURSE_SUBMODULES
|
|
382
|
+
environment variable. Default: disabled.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
Summary with files_parsed/updated, node/edge counts, and errors.
|
|
386
|
+
"""
|
|
387
|
+
store, root = _get_store(repo_root)
|
|
388
|
+
try:
|
|
389
|
+
if full_rebuild:
|
|
390
|
+
result = full_build(root, store, recurse_submodules)
|
|
391
|
+
build_result = {
|
|
392
|
+
"status": "ok",
|
|
393
|
+
"build_type": "full",
|
|
394
|
+
"summary": (
|
|
395
|
+
f"Full build complete: parsed {result['files_parsed']} files, "
|
|
396
|
+
f"created {result['total_nodes']} nodes and "
|
|
397
|
+
f"{result['total_edges']} edges."
|
|
398
|
+
),
|
|
399
|
+
**result,
|
|
400
|
+
}
|
|
401
|
+
else:
|
|
402
|
+
result = incremental_update(root, store, base=base)
|
|
403
|
+
if result["files_updated"] == 0:
|
|
404
|
+
return {
|
|
405
|
+
"status": "ok",
|
|
406
|
+
"build_type": "incremental",
|
|
407
|
+
"summary": "No changes detected. Graph is up to date.",
|
|
408
|
+
"postprocess_level": postprocess,
|
|
409
|
+
**result,
|
|
410
|
+
}
|
|
411
|
+
build_result = {
|
|
412
|
+
"status": "ok",
|
|
413
|
+
"build_type": "incremental",
|
|
414
|
+
"summary": (
|
|
415
|
+
f"Incremental update: {result['files_updated']} files re-parsed, "
|
|
416
|
+
f"{result['total_nodes']} nodes and "
|
|
417
|
+
f"{result['total_edges']} edges updated. "
|
|
418
|
+
f"Changed: {result['changed_files']}. "
|
|
419
|
+
f"Dependents also updated: {result['dependent_files']}."
|
|
420
|
+
),
|
|
421
|
+
**result,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
# Pass changed_files for incremental flow/community detection
|
|
425
|
+
changed = result.get("changed_files") if not full_rebuild else None
|
|
426
|
+
warnings = _run_postprocess(
|
|
427
|
+
store,
|
|
428
|
+
build_result,
|
|
429
|
+
postprocess,
|
|
430
|
+
full_rebuild=full_rebuild,
|
|
431
|
+
changed_files=changed,
|
|
432
|
+
)
|
|
433
|
+
if warnings:
|
|
434
|
+
build_result["warnings"] = warnings
|
|
435
|
+
return build_result
|
|
436
|
+
finally:
|
|
437
|
+
store.close()
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def run_postprocess(
|
|
441
|
+
flows: bool = True,
|
|
442
|
+
communities: bool = True,
|
|
443
|
+
fts: bool = True,
|
|
444
|
+
repo_root: str | None = None,
|
|
445
|
+
) -> dict[str, Any]:
|
|
446
|
+
"""Run post-processing steps on an existing graph.
|
|
447
|
+
|
|
448
|
+
Useful for running expensive steps (flows, communities) separately
|
|
449
|
+
from the build, or for re-running after the graph has been updated
|
|
450
|
+
with ``postprocess="none"``.
|
|
451
|
+
|
|
452
|
+
Args:
|
|
453
|
+
flows: Run flow detection. Default: True.
|
|
454
|
+
communities: Run community detection. Default: True.
|
|
455
|
+
fts: Rebuild FTS index. Default: True.
|
|
456
|
+
repo_root: Repository root path. Auto-detected if omitted.
|
|
457
|
+
|
|
458
|
+
Returns:
|
|
459
|
+
Summary of what was computed.
|
|
460
|
+
"""
|
|
461
|
+
store, _root = _get_store(repo_root)
|
|
462
|
+
result: dict[str, Any] = {"status": "ok"}
|
|
463
|
+
warnings: list[str] = []
|
|
464
|
+
|
|
465
|
+
try:
|
|
466
|
+
try:
|
|
467
|
+
rows = store.get_nodes_without_signature()
|
|
468
|
+
for row in rows:
|
|
469
|
+
node_id, name, kind, params, ret = (
|
|
470
|
+
row[0],
|
|
471
|
+
row[1],
|
|
472
|
+
row[2],
|
|
473
|
+
row[3],
|
|
474
|
+
row[4],
|
|
475
|
+
)
|
|
476
|
+
if kind in ("Function", "Test"):
|
|
477
|
+
sig = f"def {name}({params or ''})"
|
|
478
|
+
if ret:
|
|
479
|
+
sig += f" -> {ret}"
|
|
480
|
+
elif kind == "Class":
|
|
481
|
+
sig = f"class {name}"
|
|
482
|
+
else:
|
|
483
|
+
sig = name
|
|
484
|
+
store.update_node_signature(node_id, sig[:512])
|
|
485
|
+
store.commit()
|
|
486
|
+
result["signatures_updated"] = True
|
|
487
|
+
except (sqlite3.OperationalError, TypeError, KeyError) as e:
|
|
488
|
+
logger.warning("Signature computation failed: %s", e)
|
|
489
|
+
warnings.append(f"Signature computation failed: {type(e).__name__}: {e}")
|
|
490
|
+
|
|
491
|
+
if fts:
|
|
492
|
+
try:
|
|
493
|
+
from code_review_graph.search import rebuild_fts_index
|
|
494
|
+
|
|
495
|
+
fts_count = rebuild_fts_index(store)
|
|
496
|
+
result["fts_indexed"] = fts_count
|
|
497
|
+
except (sqlite3.OperationalError, ImportError) as e:
|
|
498
|
+
store.rollback()
|
|
499
|
+
logger.warning("FTS index rebuild failed: %s", e)
|
|
500
|
+
warnings.append(f"FTS index rebuild failed: {type(e).__name__}: {e}")
|
|
501
|
+
|
|
502
|
+
if flows:
|
|
503
|
+
try:
|
|
504
|
+
from code_review_graph.flows import store_flows as _store_flows
|
|
505
|
+
from code_review_graph.flows import trace_flows as _trace_flows
|
|
506
|
+
|
|
507
|
+
traced = _trace_flows(store)
|
|
508
|
+
count = _store_flows(store, traced)
|
|
509
|
+
result["flows_detected"] = count
|
|
510
|
+
except (sqlite3.OperationalError, ImportError) as e:
|
|
511
|
+
store.rollback()
|
|
512
|
+
logger.warning("Flow detection failed: %s", e)
|
|
513
|
+
warnings.append(f"Flow detection failed: {type(e).__name__}: {e}")
|
|
514
|
+
|
|
515
|
+
if communities:
|
|
516
|
+
try:
|
|
517
|
+
from code_review_graph.communities import (
|
|
518
|
+
detect_communities as _detect_communities,
|
|
519
|
+
)
|
|
520
|
+
from code_review_graph.communities import (
|
|
521
|
+
store_communities as _store_communities,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
comms = _detect_communities(store)
|
|
525
|
+
count = _store_communities(store, comms)
|
|
526
|
+
result["communities_detected"] = count
|
|
527
|
+
except (sqlite3.OperationalError, ImportError) as e:
|
|
528
|
+
store.rollback()
|
|
529
|
+
logger.warning("Community detection failed: %s", e)
|
|
530
|
+
warnings.append(f"Community detection failed: {type(e).__name__}: {e}")
|
|
531
|
+
|
|
532
|
+
store.set_metadata(
|
|
533
|
+
"last_postprocessed_at",
|
|
534
|
+
time.strftime("%Y-%m-%dT%H:%M:%S"),
|
|
535
|
+
)
|
|
536
|
+
result["summary"] = "Post-processing complete."
|
|
537
|
+
if warnings:
|
|
538
|
+
result["warnings"] = warnings
|
|
539
|
+
return result
|
|
540
|
+
finally:
|
|
541
|
+
store.close()
|