code-review-graph 2.2.2__tar.gz → 2.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/PKG-INFO +8 -3
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/README.md +5 -2
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/changes.py +2 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/cli.py +20 -6
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/communities.py +136 -142
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/embeddings.py +4 -1
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/flows.py +39 -33
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/graph.py +17 -3
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/incremental.py +94 -10
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/main.py +4 -1
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/parser.py +543 -12
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/refactor.py +16 -4
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/registry.py +4 -1
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/skills.py +172 -54
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/build.py +21 -7
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/visualization.py +1 -1
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/USAGE.md +4 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/architecture.md +1 -1
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/pyproject.toml +10 -1
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/.gitignore +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/LICENSE +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code-review-graph-vscode/LICENSE +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code-review-graph-vscode/README.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/__init__.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/__main__.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/constants.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/__init__.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/benchmarks/__init__.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/benchmarks/build_performance.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/benchmarks/flow_completeness.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/benchmarks/impact_accuracy.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/benchmarks/search_quality.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/benchmarks/token_efficiency.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/configs/express.yaml +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/configs/fastapi.yaml +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/configs/flask.yaml +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/configs/gin.yaml +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/configs/httpx.yaml +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/configs/nextjs.yaml +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/reporter.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/runner.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/scorer.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/eval/token_benchmark.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/hints.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/migrations.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/prompts.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/search.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/__init__.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/_common.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/community_tools.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/context.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/docs.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/flows_tools.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/query.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/refactor_tools.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/registry_tools.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tools/review.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/tsconfig_resolver.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/code_review_graph/wiki.py +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/COMMANDS.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/FEATURES.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/INDEX.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/LEGAL.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/LLM-OPTIMIZED-REFERENCE.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/ROADMAP.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/TROUBLESHOOTING.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/docs/schema.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/hooks/hooks.json +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/hooks/session-start.sh +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/skills/build-graph/SKILL.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/skills/review-delta/SKILL.md +0 -0
- {code_review_graph-2.2.2 → code_review_graph-2.2.3}/skills/review-pr/SKILL.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: code-review-graph
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.3
|
|
4
4
|
Summary: Persistent incremental knowledge graph for token-efficient, context-aware code reviews with Claude Code
|
|
5
5
|
Project-URL: Homepage, https://code-review-graph.com
|
|
6
6
|
Project-URL: Repository, https://github.com/tirth8205/code-review-graph
|
|
@@ -38,8 +38,10 @@ Provides-Extra: communities
|
|
|
38
38
|
Requires-Dist: igraph>=0.11.0; extra == 'communities'
|
|
39
39
|
Provides-Extra: dev
|
|
40
40
|
Requires-Dist: pytest-asyncio<1,>=0.23; extra == 'dev'
|
|
41
|
+
Requires-Dist: pytest-cov<8,>=4.0; extra == 'dev'
|
|
41
42
|
Requires-Dist: pytest<9,>=8.0; extra == 'dev'
|
|
42
43
|
Requires-Dist: ruff<1,>=0.3.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: tomli>=2.0; (python_version < '3.11') and extra == 'dev'
|
|
43
45
|
Provides-Extra: embeddings
|
|
44
46
|
Requires-Dist: numpy<3,>=1.26; extra == 'embeddings'
|
|
45
47
|
Requires-Dist: sentence-transformers<4,>=3.0.0; extra == 'embeddings'
|
|
@@ -90,12 +92,13 @@ code-review-graph build # parse your codebase
|
|
|
90
92
|
One command sets up everything. `install` detects which AI coding tools you have, writes the correct MCP configuration for each one, and injects graph-aware instructions into your platform rules. It auto-detects whether you installed via `uvx` or `pip`/`pipx` and generates the right config. Restart your editor/tool after installing.
|
|
91
93
|
|
|
92
94
|
<p align="center">
|
|
93
|
-
<img src="diagrams/diagram8_supported_platforms.png" alt="One Install, Every Platform: auto-detects Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity" width="85%" />
|
|
95
|
+
<img src="diagrams/diagram8_supported_platforms.png" alt="One Install, Every Platform: auto-detects Codex, Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity" width="85%" />
|
|
94
96
|
</p>
|
|
95
97
|
|
|
96
98
|
To target a specific platform:
|
|
97
99
|
|
|
98
100
|
```bash
|
|
101
|
+
code-review-graph install --platform codex # configure only Codex
|
|
99
102
|
code-review-graph install --platform cursor # configure only Cursor
|
|
100
103
|
code-review-graph install --platform claude-code # configure only Claude Code
|
|
101
104
|
```
|
|
@@ -341,6 +344,8 @@ vendor/**
|
|
|
341
344
|
node_modules/**
|
|
342
345
|
```
|
|
343
346
|
|
|
347
|
+
Note: in git repos, only tracked files are indexed (`git ls-files`), so gitignored files are skipped automatically. Use `.code-review-graphignore` to exclude tracked files or when git isn't available.
|
|
348
|
+
|
|
344
349
|
Optional dependency groups:
|
|
345
350
|
|
|
346
351
|
```bash
|
|
@@ -382,5 +387,5 @@ MIT. See [LICENSE](LICENSE).
|
|
|
382
387
|
<br>
|
|
383
388
|
<a href="https://code-review-graph.com">code-review-graph.com</a><br><br>
|
|
384
389
|
<code>pip install code-review-graph && code-review-graph install</code><br>
|
|
385
|
-
<sub>Works with Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity</sub>
|
|
390
|
+
<sub>Works with Codex, Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity</sub>
|
|
386
391
|
</p>
|
|
@@ -36,12 +36,13 @@ code-review-graph build # parse your codebase
|
|
|
36
36
|
One command sets up everything. `install` detects which AI coding tools you have, writes the correct MCP configuration for each one, and injects graph-aware instructions into your platform rules. It auto-detects whether you installed via `uvx` or `pip`/`pipx` and generates the right config. Restart your editor/tool after installing.
|
|
37
37
|
|
|
38
38
|
<p align="center">
|
|
39
|
-
<img src="diagrams/diagram8_supported_platforms.png" alt="One Install, Every Platform: auto-detects Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity" width="85%" />
|
|
39
|
+
<img src="diagrams/diagram8_supported_platforms.png" alt="One Install, Every Platform: auto-detects Codex, Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity" width="85%" />
|
|
40
40
|
</p>
|
|
41
41
|
|
|
42
42
|
To target a specific platform:
|
|
43
43
|
|
|
44
44
|
```bash
|
|
45
|
+
code-review-graph install --platform codex # configure only Codex
|
|
45
46
|
code-review-graph install --platform cursor # configure only Cursor
|
|
46
47
|
code-review-graph install --platform claude-code # configure only Claude Code
|
|
47
48
|
```
|
|
@@ -287,6 +288,8 @@ vendor/**
|
|
|
287
288
|
node_modules/**
|
|
288
289
|
```
|
|
289
290
|
|
|
291
|
+
Note: in git repos, only tracked files are indexed (`git ls-files`), so gitignored files are skipped automatically. Use `.code-review-graphignore` to exclude tracked files or when git isn't available.
|
|
292
|
+
|
|
290
293
|
Optional dependency groups:
|
|
291
294
|
|
|
292
295
|
```bash
|
|
@@ -328,5 +331,5 @@ MIT. See [LICENSE](LICENSE).
|
|
|
328
331
|
<br>
|
|
329
332
|
<a href="https://code-review-graph.com">code-review-graph.com</a><br><br>
|
|
330
333
|
<code>pip install code-review-graph && code-review-graph install</code><br>
|
|
331
|
-
<sub>Works with Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity</sub>
|
|
334
|
+
<sub>Works with Codex, Claude Code, Cursor, Windsurf, Zed, Continue, OpenCode, and Antigravity</sub>
|
|
332
335
|
</p>
|
|
@@ -96,7 +96,7 @@ def _print_banner() -> None:
|
|
|
96
96
|
|
|
97
97
|
def _handle_init(args: argparse.Namespace) -> None:
|
|
98
98
|
"""Set up MCP config for detected AI coding platforms."""
|
|
99
|
-
from .incremental import find_repo_root
|
|
99
|
+
from .incremental import ensure_repo_gitignore_excludes_crg, find_repo_root
|
|
100
100
|
from .skills import install_platform_configs
|
|
101
101
|
|
|
102
102
|
repo_root = Path(args.repo) if args.repo else find_repo_root()
|
|
@@ -117,9 +117,18 @@ def _handle_init(args: argparse.Namespace) -> None:
|
|
|
117
117
|
print(f"\nConfigured {len(configured)} platform(s): {', '.join(configured)}")
|
|
118
118
|
|
|
119
119
|
if dry_run:
|
|
120
|
+
print("[dry-run] Would ensure .gitignore ignores .code-review-graph/.")
|
|
120
121
|
print("\n[dry-run] No files were modified.")
|
|
121
122
|
return
|
|
122
123
|
|
|
124
|
+
gitignore_state = ensure_repo_gitignore_excludes_crg(repo_root)
|
|
125
|
+
if gitignore_state == "created":
|
|
126
|
+
print("Created .gitignore and added .code-review-graph/.")
|
|
127
|
+
elif gitignore_state == "updated":
|
|
128
|
+
print("Updated .gitignore with .code-review-graph/.")
|
|
129
|
+
else:
|
|
130
|
+
print(".gitignore already contains .code-review-graph/.")
|
|
131
|
+
|
|
123
132
|
# Skills and hooks are installed by default so Claude actually uses the
|
|
124
133
|
# graph tools proactively. Use --no-skills / --no-hooks to opt out.
|
|
125
134
|
skip_skills = getattr(args, "no_skills", False)
|
|
@@ -130,20 +139,25 @@ def _handle_init(args: argparse.Namespace) -> None:
|
|
|
130
139
|
generate_skills,
|
|
131
140
|
inject_claude_md,
|
|
132
141
|
inject_platform_instructions,
|
|
142
|
+
install_git_hook,
|
|
133
143
|
install_hooks,
|
|
134
144
|
)
|
|
135
145
|
|
|
136
146
|
if not skip_skills:
|
|
137
147
|
skills_dir = generate_skills(repo_root)
|
|
138
148
|
print(f"Generated skills in {skills_dir}")
|
|
139
|
-
|
|
140
|
-
|
|
149
|
+
if target in ("claude", "all"):
|
|
150
|
+
inject_claude_md(repo_root)
|
|
151
|
+
updated = inject_platform_instructions(repo_root, target=target)
|
|
141
152
|
if updated:
|
|
142
153
|
print(f"Injected graph instructions into: {', '.join(updated)}")
|
|
143
154
|
|
|
144
|
-
if not skip_hooks:
|
|
155
|
+
if not skip_hooks and target in ("claude", "all"):
|
|
145
156
|
install_hooks(repo_root)
|
|
146
157
|
print(f"Installed hooks in {repo_root / '.claude' / 'settings.json'}")
|
|
158
|
+
git_hook = install_git_hook(repo_root)
|
|
159
|
+
if git_hook:
|
|
160
|
+
print(f"Installed git pre-commit hook in {git_hook}")
|
|
147
161
|
|
|
148
162
|
print()
|
|
149
163
|
print("Next steps:")
|
|
@@ -187,7 +201,7 @@ def main() -> None:
|
|
|
187
201
|
install_cmd.add_argument(
|
|
188
202
|
"--platform",
|
|
189
203
|
choices=[
|
|
190
|
-
"claude", "claude-code", "cursor", "windsurf", "zed",
|
|
204
|
+
"codex", "claude", "claude-code", "cursor", "windsurf", "zed",
|
|
191
205
|
"continue", "opencode", "antigravity", "all",
|
|
192
206
|
],
|
|
193
207
|
default="all",
|
|
@@ -217,7 +231,7 @@ def main() -> None:
|
|
|
217
231
|
init_cmd.add_argument(
|
|
218
232
|
"--platform",
|
|
219
233
|
choices=[
|
|
220
|
-
"claude", "claude-code", "cursor", "windsurf", "zed",
|
|
234
|
+
"codex", "claude", "claude-code", "cursor", "windsurf", "zed",
|
|
221
235
|
"continue", "opencode", "antigravity", "all",
|
|
222
236
|
],
|
|
223
237
|
default="all",
|
|
@@ -149,24 +149,62 @@ def _to_slug(s: str) -> str:
|
|
|
149
149
|
# ---------------------------------------------------------------------------
|
|
150
150
|
|
|
151
151
|
|
|
152
|
+
def _compute_cohesion_batch(
|
|
153
|
+
community_member_qns: list[set[str]],
|
|
154
|
+
all_edges: list[GraphEdge],
|
|
155
|
+
) -> list[float]:
|
|
156
|
+
"""Compute cohesion for multiple communities in a single O(edges) pass.
|
|
157
|
+
|
|
158
|
+
Builds a ``qualified_name -> community_index`` reverse map (each node
|
|
159
|
+
appears in at most one community since all callers produce partitions),
|
|
160
|
+
then walks every edge exactly once, bucketing it into internal/external
|
|
161
|
+
counters per community.
|
|
162
|
+
|
|
163
|
+
Total work: O(edges + sum(|members|)) instead of
|
|
164
|
+
O(edges * communities) for naive per-community cohesion.
|
|
165
|
+
|
|
166
|
+
Returns a list of cohesion scores aligned with ``community_member_qns``.
|
|
167
|
+
"""
|
|
168
|
+
qn_to_idx: dict[str, int] = {}
|
|
169
|
+
for idx, members in enumerate(community_member_qns):
|
|
170
|
+
for qn in members:
|
|
171
|
+
qn_to_idx[qn] = idx
|
|
172
|
+
|
|
173
|
+
n = len(community_member_qns)
|
|
174
|
+
internal = [0] * n
|
|
175
|
+
external = [0] * n
|
|
176
|
+
|
|
177
|
+
for e in all_edges:
|
|
178
|
+
sc = qn_to_idx.get(e.source_qualified)
|
|
179
|
+
tc = qn_to_idx.get(e.target_qualified)
|
|
180
|
+
if sc is None and tc is None:
|
|
181
|
+
continue
|
|
182
|
+
if sc == tc:
|
|
183
|
+
# Safe: sc is not None here (sc == tc and not both None).
|
|
184
|
+
assert sc is not None
|
|
185
|
+
internal[sc] += 1
|
|
186
|
+
else:
|
|
187
|
+
if sc is not None:
|
|
188
|
+
external[sc] += 1
|
|
189
|
+
if tc is not None:
|
|
190
|
+
external[tc] += 1
|
|
191
|
+
|
|
192
|
+
results: list[float] = []
|
|
193
|
+
for i in range(n):
|
|
194
|
+
total = internal[i] + external[i]
|
|
195
|
+
results.append(internal[i] / total if total > 0 else 0.0)
|
|
196
|
+
return results
|
|
197
|
+
|
|
198
|
+
|
|
152
199
|
def _compute_cohesion(
|
|
153
200
|
member_qns: set[str], all_edges: list[GraphEdge]
|
|
154
201
|
) -> float:
|
|
155
|
-
"""Compute cohesion: internal_edges / (internal_edges + external_edges).
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
if src_in or tgt_in:
|
|
162
|
-
if src_in and tgt_in:
|
|
163
|
-
internal += 1
|
|
164
|
-
else:
|
|
165
|
-
external += 1
|
|
166
|
-
total = internal + external
|
|
167
|
-
if total == 0:
|
|
168
|
-
return 0.0
|
|
169
|
-
return internal / total
|
|
202
|
+
"""Compute cohesion: internal_edges / (internal_edges + external_edges).
|
|
203
|
+
|
|
204
|
+
For multiple communities, prefer :func:`_compute_cohesion_batch`, which
|
|
205
|
+
runs in O(edges) total instead of O(edges) per community.
|
|
206
|
+
"""
|
|
207
|
+
return _compute_cohesion_batch([member_qns], all_edges)[0]
|
|
170
208
|
|
|
171
209
|
|
|
172
210
|
# ---------------------------------------------------------------------------
|
|
@@ -177,11 +215,15 @@ def _compute_cohesion(
|
|
|
177
215
|
def _detect_leiden(
|
|
178
216
|
nodes: list[GraphNode], edges: list[GraphEdge], min_size: int
|
|
179
217
|
) -> list[dict[str, Any]]:
|
|
180
|
-
"""Detect communities using Leiden algorithm via igraph.
|
|
218
|
+
"""Detect communities using Leiden algorithm via igraph.
|
|
219
|
+
|
|
220
|
+
Caps Leiden at ``n_iterations=2`` (sufficient for code dependency graphs)
|
|
221
|
+
and skips the recursive sub-community splitting pass that caused
|
|
222
|
+
exponential blow-up on large repos (>100k nodes).
|
|
223
|
+
"""
|
|
181
224
|
if ig is None:
|
|
182
225
|
return []
|
|
183
226
|
|
|
184
|
-
# Build mapping from qualified_name to index
|
|
185
227
|
qn_to_idx: dict[str, int] = {}
|
|
186
228
|
idx_to_node: dict[int, GraphNode] = {}
|
|
187
229
|
for i, node in enumerate(nodes):
|
|
@@ -191,7 +233,8 @@ def _detect_leiden(
|
|
|
191
233
|
if not qn_to_idx:
|
|
192
234
|
return []
|
|
193
235
|
|
|
194
|
-
|
|
236
|
+
logger.info("Building igraph with %d nodes...", len(qn_to_idx))
|
|
237
|
+
|
|
195
238
|
g = ig.Graph(n=len(qn_to_idx), directed=False)
|
|
196
239
|
edge_list: list[tuple[int, int]] = []
|
|
197
240
|
weights: list[float] = []
|
|
@@ -208,20 +251,28 @@ def _detect_leiden(
|
|
|
208
251
|
weights.append(EDGE_WEIGHTS.get(e.kind, 0.5))
|
|
209
252
|
|
|
210
253
|
if not edge_list:
|
|
211
|
-
# No edges — fall back to file grouping
|
|
212
254
|
return _detect_file_based(nodes, edges, min_size)
|
|
213
255
|
|
|
214
256
|
g.add_edges(edge_list)
|
|
215
257
|
g.es["weight"] = weights
|
|
216
258
|
|
|
217
|
-
|
|
259
|
+
logger.info(
|
|
260
|
+
"Running Leiden on %d nodes, %d edges...",
|
|
261
|
+
g.vcount(), g.ecount(),
|
|
262
|
+
)
|
|
263
|
+
|
|
218
264
|
partition = g.community_leiden(
|
|
219
265
|
objective_function="modularity",
|
|
220
266
|
weights="weight",
|
|
267
|
+
n_iterations=2,
|
|
221
268
|
)
|
|
222
269
|
|
|
223
|
-
|
|
224
|
-
|
|
270
|
+
logger.info(
|
|
271
|
+
"Leiden complete, found %d partitions. Computing cohesion...",
|
|
272
|
+
len(partition),
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
pending: list[tuple[list[GraphNode], set[str]]] = []
|
|
225
276
|
for cluster_ids in partition:
|
|
226
277
|
if len(cluster_ids) < min_size:
|
|
227
278
|
continue
|
|
@@ -229,7 +280,12 @@ def _detect_leiden(
|
|
|
229
280
|
if len(members) < min_size:
|
|
230
281
|
continue
|
|
231
282
|
member_qns = {m.qualified_name for m in members}
|
|
232
|
-
|
|
283
|
+
pending.append((members, member_qns))
|
|
284
|
+
|
|
285
|
+
cohesions = _compute_cohesion_batch([p[1] for p in pending], edges)
|
|
286
|
+
|
|
287
|
+
communities: list[dict[str, Any]] = []
|
|
288
|
+
for (members, member_qns), cohesion in zip(pending, cohesions):
|
|
233
289
|
lang_counts = Counter(m.language for m in members if m.language)
|
|
234
290
|
dominant_lang = lang_counts.most_common(1)[0][0] if lang_counts else ""
|
|
235
291
|
name = _generate_community_name(members)
|
|
@@ -245,94 +301,8 @@ def _detect_leiden(
|
|
|
245
301
|
"member_qns": member_qns,
|
|
246
302
|
})
|
|
247
303
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
for comm in communities:
|
|
251
|
-
if comm["size"] > 50:
|
|
252
|
-
sub_nodes = [n for n in nodes if n.qualified_name in comm["member_qns"]]
|
|
253
|
-
sub_edges = [
|
|
254
|
-
e for e in edges
|
|
255
|
-
if e.source_qualified in comm["member_qns"]
|
|
256
|
-
and e.target_qualified in comm["member_qns"]
|
|
257
|
-
]
|
|
258
|
-
subs = _detect_leiden_sub(sub_nodes, sub_edges, min_size, parent_name=comm["name"])
|
|
259
|
-
if len(subs) >= 2:
|
|
260
|
-
final.extend(subs)
|
|
261
|
-
else:
|
|
262
|
-
final.append(comm)
|
|
263
|
-
else:
|
|
264
|
-
final.append(comm)
|
|
265
|
-
|
|
266
|
-
return final
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def _detect_leiden_sub(
|
|
270
|
-
nodes: list[GraphNode],
|
|
271
|
-
edges: list[GraphEdge],
|
|
272
|
-
min_size: int,
|
|
273
|
-
parent_name: str,
|
|
274
|
-
) -> list[dict[str, Any]]:
|
|
275
|
-
"""Second-pass Leiden on a large community for sub-communities."""
|
|
276
|
-
if ig is None:
|
|
277
|
-
return []
|
|
278
|
-
|
|
279
|
-
qn_to_idx: dict[str, int] = {}
|
|
280
|
-
idx_to_node: dict[int, GraphNode] = {}
|
|
281
|
-
for i, node in enumerate(nodes):
|
|
282
|
-
qn_to_idx[node.qualified_name] = i
|
|
283
|
-
idx_to_node[i] = node
|
|
284
|
-
|
|
285
|
-
g = ig.Graph(n=len(qn_to_idx), directed=False)
|
|
286
|
-
edge_list: list[tuple[int, int]] = []
|
|
287
|
-
weights: list[float] = []
|
|
288
|
-
seen_edges: set[tuple[int, int]] = set()
|
|
289
|
-
|
|
290
|
-
for e in edges:
|
|
291
|
-
src_idx = qn_to_idx.get(e.source_qualified)
|
|
292
|
-
tgt_idx = qn_to_idx.get(e.target_qualified)
|
|
293
|
-
if src_idx is not None and tgt_idx is not None and src_idx != tgt_idx:
|
|
294
|
-
pair = (min(src_idx, tgt_idx), max(src_idx, tgt_idx))
|
|
295
|
-
if pair not in seen_edges:
|
|
296
|
-
seen_edges.add(pair)
|
|
297
|
-
edge_list.append(pair)
|
|
298
|
-
weights.append(EDGE_WEIGHTS.get(e.kind, 0.5))
|
|
299
|
-
|
|
300
|
-
if not edge_list:
|
|
301
|
-
return []
|
|
302
|
-
|
|
303
|
-
g.add_edges(edge_list)
|
|
304
|
-
g.es["weight"] = weights
|
|
305
|
-
|
|
306
|
-
partition = g.community_leiden(
|
|
307
|
-
objective_function="modularity",
|
|
308
|
-
weights="weight",
|
|
309
|
-
)
|
|
310
|
-
|
|
311
|
-
subs: list[dict[str, Any]] = []
|
|
312
|
-
for idx, cluster_ids in enumerate(partition):
|
|
313
|
-
if len(cluster_ids) < min_size:
|
|
314
|
-
continue
|
|
315
|
-
members = [idx_to_node[i] for i in cluster_ids if i in idx_to_node]
|
|
316
|
-
if len(members) < min_size:
|
|
317
|
-
continue
|
|
318
|
-
member_qns = {m.qualified_name for m in members}
|
|
319
|
-
cohesion = _compute_cohesion(member_qns, edges)
|
|
320
|
-
lang_counts = Counter(m.language for m in members if m.language)
|
|
321
|
-
dominant_lang = lang_counts.most_common(1)[0][0] if lang_counts else ""
|
|
322
|
-
name = _generate_community_name(members)
|
|
323
|
-
|
|
324
|
-
subs.append({
|
|
325
|
-
"name": f"{parent_name}/{name}",
|
|
326
|
-
"level": 1,
|
|
327
|
-
"size": len(members),
|
|
328
|
-
"cohesion": round(cohesion, 4),
|
|
329
|
-
"dominant_language": dominant_lang,
|
|
330
|
-
"description": f"Sub-community of {len(members)} nodes within {parent_name}",
|
|
331
|
-
"members": [m.qualified_name for m in members],
|
|
332
|
-
"member_qns": member_qns,
|
|
333
|
-
})
|
|
334
|
-
|
|
335
|
-
return subs
|
|
304
|
+
logger.info("Community detection complete: %d communities", len(communities))
|
|
305
|
+
return communities
|
|
336
306
|
|
|
337
307
|
|
|
338
308
|
# ---------------------------------------------------------------------------
|
|
@@ -348,12 +318,21 @@ def _detect_file_based(
|
|
|
348
318
|
for n in nodes:
|
|
349
319
|
by_file[n.file_path].append(n)
|
|
350
320
|
|
|
351
|
-
communities
|
|
321
|
+
# Pre-filter to communities meeting min_size and collect their member
|
|
322
|
+
# sets so we can batch-compute all cohesions in a single O(edges) pass.
|
|
323
|
+
# Without this, per-community cohesion is O(edges * files), which makes
|
|
324
|
+
# community detection effectively hang on large repos.
|
|
325
|
+
pending: list[tuple[str, list[GraphNode], set[str]]] = []
|
|
352
326
|
for file_path, members in by_file.items():
|
|
353
327
|
if len(members) < min_size:
|
|
354
328
|
continue
|
|
355
329
|
member_qns = {m.qualified_name for m in members}
|
|
356
|
-
|
|
330
|
+
pending.append((file_path, members, member_qns))
|
|
331
|
+
|
|
332
|
+
cohesions = _compute_cohesion_batch([p[2] for p in pending], edges)
|
|
333
|
+
|
|
334
|
+
communities: list[dict[str, Any]] = []
|
|
335
|
+
for (file_path, members, member_qns), cohesion in zip(pending, cohesions):
|
|
357
336
|
lang_counts = Counter(m.language for m in members if m.language)
|
|
358
337
|
dominant_lang = lang_counts.most_common(1)[0][0] if lang_counts else ""
|
|
359
338
|
name = _generate_community_name(members)
|
|
@@ -397,6 +376,8 @@ def detect_communities(
|
|
|
397
376
|
all_edges = store.get_all_edges()
|
|
398
377
|
all_files = store.get_all_files()
|
|
399
378
|
|
|
379
|
+
logger.info("Loading nodes from %d files...", len(all_files))
|
|
380
|
+
|
|
400
381
|
nodes: list[GraphNode] = []
|
|
401
382
|
for fp in all_files:
|
|
402
383
|
nodes.extend(store.get_nodes_by_file(fp))
|
|
@@ -416,6 +397,11 @@ def detect_communities(
|
|
|
416
397
|
seen_qns.add(n.qualified_name)
|
|
417
398
|
unique_nodes.append(n)
|
|
418
399
|
|
|
400
|
+
logger.info(
|
|
401
|
+
"Loaded %d unique nodes, %d edges",
|
|
402
|
+
len(unique_nodes), len(all_edges),
|
|
403
|
+
)
|
|
404
|
+
|
|
419
405
|
if IGRAPH_AVAILABLE:
|
|
420
406
|
logger.info("Detecting communities with Leiden algorithm (igraph)")
|
|
421
407
|
results = _detect_leiden(unique_nodes, all_edges, min_size)
|
|
@@ -493,36 +479,44 @@ def store_communities(
|
|
|
493
479
|
# that are tightly coupled to the DB transaction lifecycle.
|
|
494
480
|
conn = store._conn
|
|
495
481
|
|
|
496
|
-
#
|
|
497
|
-
|
|
498
|
-
conn.execute("
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
(
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
member_qns = comm.get("members", [])
|
|
518
|
-
for qn in member_qns:
|
|
519
|
-
conn.execute(
|
|
520
|
-
"UPDATE nodes SET community_id = ? WHERE qualified_name = ?",
|
|
521
|
-
(community_id, qn),
|
|
482
|
+
# Wrap in explicit transaction so the DELETE + INSERT + UPDATE
|
|
483
|
+
# sequence is atomic — no partial community data on crash.
|
|
484
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
485
|
+
try:
|
|
486
|
+
conn.execute("DELETE FROM communities")
|
|
487
|
+
conn.execute("UPDATE nodes SET community_id = NULL")
|
|
488
|
+
|
|
489
|
+
count = 0
|
|
490
|
+
for comm in communities:
|
|
491
|
+
cursor = conn.execute(
|
|
492
|
+
"""INSERT INTO communities
|
|
493
|
+
(name, level, cohesion, size, dominant_language, description)
|
|
494
|
+
VALUES (?, ?, ?, ?, ?, ?)""",
|
|
495
|
+
(
|
|
496
|
+
comm["name"],
|
|
497
|
+
comm.get("level", 0),
|
|
498
|
+
comm.get("cohesion", 0.0),
|
|
499
|
+
comm["size"],
|
|
500
|
+
comm.get("dominant_language", ""),
|
|
501
|
+
comm.get("description", ""),
|
|
502
|
+
),
|
|
522
503
|
)
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
504
|
+
community_id = cursor.lastrowid
|
|
505
|
+
|
|
506
|
+
# Batch update community_id on member nodes
|
|
507
|
+
member_qns = comm.get("members", [])
|
|
508
|
+
if member_qns:
|
|
509
|
+
placeholders = ",".join("?" * len(member_qns))
|
|
510
|
+
conn.execute(
|
|
511
|
+
f"UPDATE nodes SET community_id = ? WHERE qualified_name IN ({placeholders})", # nosec B608
|
|
512
|
+
[community_id] + member_qns,
|
|
513
|
+
)
|
|
514
|
+
count += 1
|
|
515
|
+
|
|
516
|
+
conn.commit()
|
|
517
|
+
except BaseException:
|
|
518
|
+
conn.rollback()
|
|
519
|
+
raise
|
|
526
520
|
return count
|
|
527
521
|
|
|
528
522
|
|
|
@@ -366,7 +366,10 @@ class EmbeddingStore:
|
|
|
366
366
|
self.provider = get_provider(provider, model=model)
|
|
367
367
|
self.available = self.provider is not None
|
|
368
368
|
self.db_path = Path(db_path)
|
|
369
|
-
self._conn = sqlite3.connect(
|
|
369
|
+
self._conn = sqlite3.connect(
|
|
370
|
+
str(self.db_path), timeout=30, check_same_thread=False,
|
|
371
|
+
isolation_level=None,
|
|
372
|
+
)
|
|
370
373
|
self._conn.row_factory = sqlite3.Row
|
|
371
374
|
self._conn.executescript(_EMBEDDINGS_SCHEMA)
|
|
372
375
|
|
|
@@ -314,41 +314,47 @@ def store_flows(store: GraphStore, flows: list[dict]) -> int:
|
|
|
314
314
|
# tightly coupled to the DB transaction lifecycle.
|
|
315
315
|
conn = store._conn
|
|
316
316
|
|
|
317
|
-
#
|
|
318
|
-
|
|
319
|
-
conn.execute("
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
criticality, path_json)
|
|
328
|
-
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
329
|
-
(
|
|
330
|
-
flow["name"],
|
|
331
|
-
flow["entry_point_id"],
|
|
332
|
-
flow["depth"],
|
|
333
|
-
flow["node_count"],
|
|
334
|
-
flow["file_count"],
|
|
335
|
-
flow["criticality"],
|
|
336
|
-
path_json,
|
|
337
|
-
),
|
|
338
|
-
)
|
|
339
|
-
flow_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
340
|
-
|
|
341
|
-
# Insert memberships.
|
|
342
|
-
node_ids = flow.get("path", [])
|
|
343
|
-
for position, node_id in enumerate(node_ids):
|
|
317
|
+
# Wrap the full DELETE + INSERT sequence in an explicit transaction
|
|
318
|
+
# so partial writes cannot occur if an exception interrupts the loop.
|
|
319
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
320
|
+
try:
|
|
321
|
+
conn.execute("DELETE FROM flow_memberships")
|
|
322
|
+
conn.execute("DELETE FROM flows")
|
|
323
|
+
|
|
324
|
+
count = 0
|
|
325
|
+
for flow in flows:
|
|
326
|
+
path_json = json.dumps(flow.get("path", []))
|
|
344
327
|
conn.execute(
|
|
345
|
-
"INSERT
|
|
346
|
-
|
|
347
|
-
|
|
328
|
+
"""INSERT INTO flows
|
|
329
|
+
(name, entry_point_id, depth, node_count, file_count,
|
|
330
|
+
criticality, path_json)
|
|
331
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
332
|
+
(
|
|
333
|
+
flow["name"],
|
|
334
|
+
flow["entry_point_id"],
|
|
335
|
+
flow["depth"],
|
|
336
|
+
flow["node_count"],
|
|
337
|
+
flow["file_count"],
|
|
338
|
+
flow["criticality"],
|
|
339
|
+
path_json,
|
|
340
|
+
),
|
|
348
341
|
)
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
342
|
+
flow_id = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
343
|
+
|
|
344
|
+
# Insert memberships.
|
|
345
|
+
node_ids = flow.get("path", [])
|
|
346
|
+
for position, node_id in enumerate(node_ids):
|
|
347
|
+
conn.execute(
|
|
348
|
+
"INSERT OR IGNORE INTO flow_memberships (flow_id, node_id, position) "
|
|
349
|
+
"VALUES (?, ?, ?)",
|
|
350
|
+
(flow_id, node_id, position),
|
|
351
|
+
)
|
|
352
|
+
count += 1
|
|
353
|
+
|
|
354
|
+
conn.commit()
|
|
355
|
+
except BaseException:
|
|
356
|
+
conn.rollback()
|
|
357
|
+
raise
|
|
352
358
|
return count
|
|
353
359
|
|
|
354
360
|
|