superlocalmemory 3.3.20 → 3.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +9 -1
- package/src/superlocalmemory/cli/commands.py +138 -22
- package/src/superlocalmemory/cli/daemon.py +372 -0
- package/src/superlocalmemory/cli/main.py +8 -0
- package/src/superlocalmemory/cli/pending_store.py +158 -0
- package/src/superlocalmemory/cli/setup_wizard.py +39 -6
- package/src/superlocalmemory/code_graph/__init__.py +46 -0
- package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
- package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
- package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
- package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
- package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
- package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
- package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
- package/src/superlocalmemory/code_graph/changes.py +363 -0
- package/src/superlocalmemory/code_graph/communities.py +299 -0
- package/src/superlocalmemory/code_graph/config.py +88 -0
- package/src/superlocalmemory/code_graph/database.py +482 -0
- package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
- package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
- package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
- package/src/superlocalmemory/code_graph/flows.py +350 -0
- package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
- package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
- package/src/superlocalmemory/code_graph/graph_store.py +158 -0
- package/src/superlocalmemory/code_graph/incremental.py +200 -0
- package/src/superlocalmemory/code_graph/models.py +130 -0
- package/src/superlocalmemory/code_graph/parser.py +507 -0
- package/src/superlocalmemory/code_graph/resolver.py +321 -0
- package/src/superlocalmemory/code_graph/search.py +460 -0
- package/src/superlocalmemory/code_graph/service.py +95 -0
- package/src/superlocalmemory/code_graph/watcher.py +207 -0
- package/src/superlocalmemory/core/embedding_worker.py +4 -2
- package/src/superlocalmemory/core/embeddings.py +8 -2
- package/src/superlocalmemory/core/engine.py +32 -0
- package/src/superlocalmemory/core/engine_wiring.py +5 -0
- package/src/superlocalmemory/core/store_pipeline.py +23 -1
- package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
- package/src/superlocalmemory/infra/event_bus.py +5 -0
- package/src/superlocalmemory/mcp/server.py +23 -0
- package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
- package/src/superlocalmemory/retrieval/engine.py +137 -2
- package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
- package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
- package/src/superlocalmemory/retrieval/strategy.py +16 -0
- package/src/superlocalmemory/server/api.py +4 -2
- package/src/superlocalmemory/server/ui.py +5 -2
- package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
- package/src/superlocalmemory/ui/index.html +1879 -0
- package/src/superlocalmemory/ui/js/agents.js +192 -0
- package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
- package/src/superlocalmemory/ui/js/behavioral.js +276 -0
- package/src/superlocalmemory/ui/js/clusters.js +206 -0
- package/src/superlocalmemory/ui/js/compliance.js +252 -0
- package/src/superlocalmemory/ui/js/core.js +246 -0
- package/src/superlocalmemory/ui/js/dashboard.js +110 -0
- package/src/superlocalmemory/ui/js/events.js +178 -0
- package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
- package/src/superlocalmemory/ui/js/feedback.js +333 -0
- package/src/superlocalmemory/ui/js/graph-core.js +447 -0
- package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
- package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
- package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
- package/src/superlocalmemory/ui/js/ide-status.js +102 -0
- package/src/superlocalmemory/ui/js/init.js +45 -0
- package/src/superlocalmemory/ui/js/learning.js +435 -0
- package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
- package/src/superlocalmemory/ui/js/math-health.js +98 -0
- package/src/superlocalmemory/ui/js/memories.js +264 -0
- package/src/superlocalmemory/ui/js/modal.js +357 -0
- package/src/superlocalmemory/ui/js/patterns.js +93 -0
- package/src/superlocalmemory/ui/js/profiles.js +236 -0
- package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
- package/src/superlocalmemory/ui/js/search.js +59 -0
- package/src/superlocalmemory/ui/js/settings.js +224 -0
- package/src/superlocalmemory/ui/js/timeline.js +32 -0
- package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""ChangeAnalyzer — git diff to risk-scored change analysis.
|
|
6
|
+
|
|
7
|
+
Parses git diff output, maps changed line ranges to graph nodes,
|
|
8
|
+
computes 5-factor risk scores, and produces review context.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
import subprocess
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from superlocalmemory.code_graph.database import CodeGraphDatabase
|
|
21
|
+
from superlocalmemory.code_graph.models import EdgeKind
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Constants
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
SECURITY_KEYWORDS: frozenset[str] = frozenset([
|
|
31
|
+
"auth", "login", "password", "token", "session", "crypt", "secret",
|
|
32
|
+
"credential", "permission", "sql", "query", "execute", "connect",
|
|
33
|
+
"socket", "request", "http", "sanitize", "validate", "encrypt",
|
|
34
|
+
"decrypt", "hash", "sign", "verify", "admin", "privilege",
|
|
35
|
+
])
|
|
36
|
+
|
|
37
|
+
_GIT_TIMEOUT_SECONDS = 30
|
|
38
|
+
_HUNK_HEADER_RE = re.compile(r'\+(\d+)(?:,(\d+))?')
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Result dataclasses
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class DiffHunk:
|
|
47
|
+
"""A single changed line range in a file."""
|
|
48
|
+
file_path: str
|
|
49
|
+
start_line: int
|
|
50
|
+
end_line: int
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class ChangedNode:
|
|
55
|
+
"""A graph node affected by a change, with risk score."""
|
|
56
|
+
node_id: str
|
|
57
|
+
name: str
|
|
58
|
+
kind: str
|
|
59
|
+
file_path: str
|
|
60
|
+
line_start: int
|
|
61
|
+
line_end: int
|
|
62
|
+
risk_score: float
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class ReviewContext:
|
|
67
|
+
"""Token-optimized review context for changed files."""
|
|
68
|
+
summary: str
|
|
69
|
+
changed_nodes: tuple[ChangedNode, ...]
|
|
70
|
+
test_gaps: tuple[ChangedNode, ...]
|
|
71
|
+
review_priorities: tuple[ChangedNode, ...]
|
|
72
|
+
overall_risk: float
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
# ChangeAnalyzer
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
class ChangeAnalyzer:
|
|
80
|
+
"""Analyze git changes and map to graph nodes with risk scores.
|
|
81
|
+
|
|
82
|
+
All git operations use subprocess with timeout for safety.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(self, db: CodeGraphDatabase) -> None:
|
|
86
|
+
self._db = db
|
|
87
|
+
|
|
88
|
+
# ------------------------------------------------------------------
|
|
89
|
+
# Public API
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
def analyze_changes(
|
|
93
|
+
self, changed_files: list[str], repo_root: Path | None = None
|
|
94
|
+
) -> ReviewContext:
|
|
95
|
+
"""Analyze changed files and produce risk-scored review context.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
changed_files: List of relative file paths that changed.
|
|
99
|
+
repo_root: Optional repo root for git diff parsing.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
ReviewContext with scored nodes and review priorities.
|
|
103
|
+
"""
|
|
104
|
+
if not changed_files:
|
|
105
|
+
return ReviewContext(
|
|
106
|
+
summary="No changes detected.",
|
|
107
|
+
changed_nodes=(),
|
|
108
|
+
test_gaps=(),
|
|
109
|
+
review_priorities=(),
|
|
110
|
+
overall_risk=0.0,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Map changed files to affected nodes
|
|
114
|
+
all_changed: dict[str, ChangedNode] = {}
|
|
115
|
+
for file_path in changed_files:
|
|
116
|
+
nodes = self._find_nodes_in_file(file_path)
|
|
117
|
+
for node in nodes:
|
|
118
|
+
risk = self.compute_risk_score(node)
|
|
119
|
+
scored_node = ChangedNode(
|
|
120
|
+
node_id=node["node_id"],
|
|
121
|
+
name=node["name"],
|
|
122
|
+
kind=node["kind"],
|
|
123
|
+
file_path=node["file_path"],
|
|
124
|
+
line_start=node["line_start"],
|
|
125
|
+
line_end=node["line_end"],
|
|
126
|
+
risk_score=risk,
|
|
127
|
+
)
|
|
128
|
+
all_changed[node["node_id"]] = scored_node
|
|
129
|
+
|
|
130
|
+
changed_list = tuple(sorted(
|
|
131
|
+
all_changed.values(), key=lambda n: -n.risk_score
|
|
132
|
+
))
|
|
133
|
+
|
|
134
|
+
# Find test gaps (changed non-test nodes without TESTED_BY)
|
|
135
|
+
test_gaps = tuple(
|
|
136
|
+
n for n in changed_list
|
|
137
|
+
if n.kind not in ("file", "module")
|
|
138
|
+
and not self._has_test_coverage(n.node_id)
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Top review priorities
|
|
142
|
+
review_priorities = changed_list[:10]
|
|
143
|
+
|
|
144
|
+
# Overall risk
|
|
145
|
+
overall_risk = max(
|
|
146
|
+
(n.risk_score for n in changed_list), default=0.0
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
summary = (
|
|
150
|
+
f"{len(changed_list)} changed nodes across "
|
|
151
|
+
f"{len(changed_files)} files. "
|
|
152
|
+
f"{len(test_gaps)} untested changes. "
|
|
153
|
+
f"Overall risk: {overall_risk:.2f}."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return ReviewContext(
|
|
157
|
+
summary=summary,
|
|
158
|
+
changed_nodes=changed_list,
|
|
159
|
+
test_gaps=test_gaps,
|
|
160
|
+
review_priorities=review_priorities,
|
|
161
|
+
overall_risk=overall_risk,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def compute_risk_score(self, node: dict[str, Any]) -> float:
|
|
165
|
+
"""5-factor risk scoring for a single node.
|
|
166
|
+
|
|
167
|
+
Factors:
|
|
168
|
+
1. flow_participation (max 0.25)
|
|
169
|
+
2. community_crossing (max 0.15)
|
|
170
|
+
3. test_coverage (0.05 if tested, 0.30 if untested)
|
|
171
|
+
4. security_keywords (0 or 0.20)
|
|
172
|
+
5. caller_count (max 0.10)
|
|
173
|
+
"""
|
|
174
|
+
node_id = node["node_id"]
|
|
175
|
+
name = node.get("name", "")
|
|
176
|
+
|
|
177
|
+
# 1. Flow participation
|
|
178
|
+
flow_score = self._flow_participation_score(node_id)
|
|
179
|
+
|
|
180
|
+
# 2. Community crossing
|
|
181
|
+
cross_score = self._community_crossing_score(node_id)
|
|
182
|
+
|
|
183
|
+
# 3. Test coverage
|
|
184
|
+
has_test = self._has_test_coverage(node_id)
|
|
185
|
+
test_score = 0.05 if has_test else 0.30
|
|
186
|
+
|
|
187
|
+
# 4. Security sensitivity
|
|
188
|
+
security_score = (
|
|
189
|
+
0.20
|
|
190
|
+
if any(kw in name.lower() for kw in SECURITY_KEYWORDS)
|
|
191
|
+
else 0.0
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# 5. Caller count
|
|
195
|
+
caller_score = self._caller_count_score(node_id)
|
|
196
|
+
|
|
197
|
+
return flow_score + cross_score + test_score + security_score + caller_score
|
|
198
|
+
|
|
199
|
+
def get_review_context(
|
|
200
|
+
self, changed_files: list[str], repo_root: Path | None = None
|
|
201
|
+
) -> ReviewContext:
|
|
202
|
+
"""Token-optimized review context.
|
|
203
|
+
|
|
204
|
+
Same as analyze_changes but designed for LLM consumption.
|
|
205
|
+
"""
|
|
206
|
+
return self.analyze_changes(changed_files, repo_root)
|
|
207
|
+
|
|
208
|
+
# ------------------------------------------------------------------
|
|
209
|
+
# Static: git diff parsing
|
|
210
|
+
# ------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def parse_git_diff(
|
|
214
|
+
repo_root: Path, base: str = "HEAD~1", timeout: int = _GIT_TIMEOUT_SECONDS
|
|
215
|
+
) -> list[DiffHunk]:
|
|
216
|
+
"""Parse git diff to get changed line ranges.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
repo_root: Path to the git repository root.
|
|
220
|
+
base: Git ref to diff against.
|
|
221
|
+
timeout: Subprocess timeout in seconds.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of DiffHunk with file paths and line ranges.
|
|
225
|
+
"""
|
|
226
|
+
try:
|
|
227
|
+
result = subprocess.run(
|
|
228
|
+
["git", "diff", "--unified=0", base, "--"],
|
|
229
|
+
capture_output=True,
|
|
230
|
+
text=True,
|
|
231
|
+
cwd=str(repo_root),
|
|
232
|
+
timeout=timeout,
|
|
233
|
+
)
|
|
234
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
|
|
235
|
+
logger.warning("git diff failed: %s", exc)
|
|
236
|
+
return []
|
|
237
|
+
|
|
238
|
+
if result.returncode != 0:
|
|
239
|
+
logger.debug("git diff returned %d: %s", result.returncode, result.stderr)
|
|
240
|
+
return []
|
|
241
|
+
|
|
242
|
+
return _parse_diff_output(result.stdout)
|
|
243
|
+
|
|
244
|
+
# ------------------------------------------------------------------
|
|
245
|
+
# Internal helpers
|
|
246
|
+
# ------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
def _find_nodes_in_file(self, file_path: str) -> list[dict[str, Any]]:
|
|
249
|
+
"""Find all graph nodes in a file."""
|
|
250
|
+
rows = self._db.execute(
|
|
251
|
+
"""SELECT node_id, name, kind, file_path, line_start, line_end
|
|
252
|
+
FROM graph_nodes
|
|
253
|
+
WHERE file_path = ?
|
|
254
|
+
ORDER BY line_start""",
|
|
255
|
+
(file_path,),
|
|
256
|
+
)
|
|
257
|
+
return [dict(row) for row in rows]
|
|
258
|
+
|
|
259
|
+
def _has_test_coverage(self, node_id: str) -> bool:
|
|
260
|
+
"""Check if a node has TESTED_BY edges or is called by test nodes."""
|
|
261
|
+
# Check outgoing TESTED_BY
|
|
262
|
+
rows = self._db.execute(
|
|
263
|
+
"""SELECT COUNT(*) as cnt FROM graph_edges
|
|
264
|
+
WHERE source_node_id = ? AND kind = ?""",
|
|
265
|
+
(node_id, EdgeKind.TESTED_BY.value),
|
|
266
|
+
)
|
|
267
|
+
if rows and rows[0]["cnt"] > 0:
|
|
268
|
+
return True
|
|
269
|
+
|
|
270
|
+
# Check incoming CALLS from test nodes
|
|
271
|
+
rows = self._db.execute(
|
|
272
|
+
"""SELECT COUNT(*) as cnt FROM graph_edges ge
|
|
273
|
+
JOIN graph_nodes gn ON ge.source_node_id = gn.node_id
|
|
274
|
+
WHERE ge.target_node_id = ?
|
|
275
|
+
AND ge.kind = ?
|
|
276
|
+
AND gn.is_test = 1""",
|
|
277
|
+
(node_id, EdgeKind.CALLS.value),
|
|
278
|
+
)
|
|
279
|
+
return bool(rows and rows[0]["cnt"] > 0)
|
|
280
|
+
|
|
281
|
+
def _flow_participation_score(self, node_id: str) -> float:
|
|
282
|
+
"""Score based on how many flows this node participates in.
|
|
283
|
+
|
|
284
|
+
Uses stored flows from graph_metadata. Max 0.25.
|
|
285
|
+
"""
|
|
286
|
+
import json
|
|
287
|
+
raw = self._db.get_metadata("flows")
|
|
288
|
+
if not raw:
|
|
289
|
+
return 0.0
|
|
290
|
+
try:
|
|
291
|
+
flows = json.loads(raw)
|
|
292
|
+
except (json.JSONDecodeError, TypeError):
|
|
293
|
+
return 0.0
|
|
294
|
+
|
|
295
|
+
count = sum(
|
|
296
|
+
1 for f in flows
|
|
297
|
+
if node_id in f.get("path_node_ids", [])
|
|
298
|
+
)
|
|
299
|
+
return min(count * 0.05, 0.25)
|
|
300
|
+
|
|
301
|
+
def _community_crossing_score(self, node_id: str) -> float:
|
|
302
|
+
"""Score based on cross-community callers. Max 0.15."""
|
|
303
|
+
# Get this node's community
|
|
304
|
+
node_rows = self._db.execute(
|
|
305
|
+
"SELECT community_id FROM graph_nodes WHERE node_id = ?",
|
|
306
|
+
(node_id,),
|
|
307
|
+
)
|
|
308
|
+
if not node_rows:
|
|
309
|
+
return 0.0
|
|
310
|
+
my_community = node_rows[0]["community_id"]
|
|
311
|
+
|
|
312
|
+
# Get callers and their communities
|
|
313
|
+
callers = self._db.execute(
|
|
314
|
+
"""SELECT gn.community_id
|
|
315
|
+
FROM graph_edges ge
|
|
316
|
+
JOIN graph_nodes gn ON ge.source_node_id = gn.node_id
|
|
317
|
+
WHERE ge.target_node_id = ?
|
|
318
|
+
AND ge.kind = ?""",
|
|
319
|
+
(node_id, EdgeKind.CALLS.value),
|
|
320
|
+
)
|
|
321
|
+
cross_count = sum(
|
|
322
|
+
1 for row in callers
|
|
323
|
+
if row["community_id"] is not None
|
|
324
|
+
and row["community_id"] != my_community
|
|
325
|
+
)
|
|
326
|
+
return min(cross_count * 0.05, 0.15)
|
|
327
|
+
|
|
328
|
+
def _caller_count_score(self, node_id: str) -> float:
|
|
329
|
+
"""Score based on number of callers. Max 0.10."""
|
|
330
|
+
rows = self._db.execute(
|
|
331
|
+
"""SELECT COUNT(*) as cnt FROM graph_edges
|
|
332
|
+
WHERE target_node_id = ? AND kind = ?""",
|
|
333
|
+
(node_id, EdgeKind.CALLS.value),
|
|
334
|
+
)
|
|
335
|
+
count = rows[0]["cnt"] if rows else 0
|
|
336
|
+
return min(count / 20.0, 0.10)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
# ---------------------------------------------------------------------------
|
|
340
|
+
# Module-level helpers
|
|
341
|
+
# ---------------------------------------------------------------------------
|
|
342
|
+
|
|
343
|
+
def _parse_diff_output(diff_text: str) -> list[DiffHunk]:
|
|
344
|
+
"""Parse unified diff output into DiffHunk list."""
|
|
345
|
+
hunks: list[DiffHunk] = []
|
|
346
|
+
current_file: str | None = None
|
|
347
|
+
|
|
348
|
+
for line in diff_text.splitlines():
|
|
349
|
+
if line.startswith("+++ b/"):
|
|
350
|
+
current_file = line[6:]
|
|
351
|
+
elif line.startswith("@@"):
|
|
352
|
+
match = _HUNK_HEADER_RE.search(line)
|
|
353
|
+
if match and current_file:
|
|
354
|
+
start = int(match.group(1))
|
|
355
|
+
count = int(match.group(2) or "1")
|
|
356
|
+
if count > 0:
|
|
357
|
+
hunks.append(DiffHunk(
|
|
358
|
+
file_path=current_file,
|
|
359
|
+
start_line=start,
|
|
360
|
+
end_line=start + count - 1,
|
|
361
|
+
))
|
|
362
|
+
|
|
363
|
+
return hunks
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory v3.4 — CodeGraph Module
|
|
4
|
+
|
|
5
|
+
"""CommunityDetector — file-based community detection.
|
|
6
|
+
|
|
7
|
+
Groups nodes by file path prefix / directory for MVP.
|
|
8
|
+
igraph/Leiden can be added later for more sophisticated detection.
|
|
9
|
+
Stores community_id on graph_nodes via UPDATE.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
from collections import Counter, defaultdict
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from pathlib import PurePosixPath
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from superlocalmemory.code_graph.database import CodeGraphDatabase
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Result dataclasses
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class CommunityInfo:
|
|
32
|
+
"""Detected code community."""
|
|
33
|
+
community_id: int
|
|
34
|
+
name: str
|
|
35
|
+
directory: str
|
|
36
|
+
size: int
|
|
37
|
+
dominant_language: str | None
|
|
38
|
+
file_count: int
|
|
39
|
+
cohesion: float
|
|
40
|
+
node_ids: tuple[str, ...]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True)
|
|
44
|
+
class CouplingWarning:
|
|
45
|
+
"""Warning about high coupling between communities."""
|
|
46
|
+
source_community: str
|
|
47
|
+
target_community: str
|
|
48
|
+
edge_count: int
|
|
49
|
+
severity: str # "low", "medium", "high"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class ArchitectureOverview:
|
|
54
|
+
"""Architecture overview with communities and coupling warnings."""
|
|
55
|
+
communities: tuple[CommunityInfo, ...]
|
|
56
|
+
coupling_warnings: tuple[CouplingWarning, ...]
|
|
57
|
+
total_nodes: int
|
|
58
|
+
total_communities: int
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
# CommunityDetector
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
class CommunityDetector:
|
|
66
|
+
"""File-based community detection.
|
|
67
|
+
|
|
68
|
+
Groups nodes by directory (file path prefix).
|
|
69
|
+
Each unique directory becomes a community.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, db: CodeGraphDatabase) -> None:
|
|
73
|
+
self._db = db
|
|
74
|
+
|
|
75
|
+
# ------------------------------------------------------------------
|
|
76
|
+
# Public API
|
|
77
|
+
# ------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
def detect_communities(self) -> list[CommunityInfo]:
|
|
80
|
+
"""Detect communities by grouping nodes by directory.
|
|
81
|
+
|
|
82
|
+
Updates community_id on graph_nodes.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
List of CommunityInfo sorted by size (largest first).
|
|
86
|
+
"""
|
|
87
|
+
# Load all nodes
|
|
88
|
+
rows = self._db.execute(
|
|
89
|
+
"SELECT node_id, name, kind, file_path, language FROM graph_nodes",
|
|
90
|
+
(),
|
|
91
|
+
)
|
|
92
|
+
if not rows:
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
# Group by directory
|
|
96
|
+
dir_groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
97
|
+
for row in rows:
|
|
98
|
+
directory = _extract_directory(row["file_path"])
|
|
99
|
+
dir_groups[directory].append(dict(row))
|
|
100
|
+
|
|
101
|
+
# Build communities
|
|
102
|
+
communities: list[CommunityInfo] = []
|
|
103
|
+
for comm_id, (directory, nodes) in enumerate(sorted(dir_groups.items())):
|
|
104
|
+
node_ids = tuple(n["node_id"] for n in nodes)
|
|
105
|
+
languages = [n["language"] for n in nodes if n.get("language")]
|
|
106
|
+
dominant_lang = _dominant_language(languages)
|
|
107
|
+
file_paths = {n["file_path"] for n in nodes}
|
|
108
|
+
|
|
109
|
+
community = CommunityInfo(
|
|
110
|
+
community_id=comm_id,
|
|
111
|
+
name=_generate_community_name(directory, nodes),
|
|
112
|
+
directory=directory,
|
|
113
|
+
size=len(nodes),
|
|
114
|
+
dominant_language=dominant_lang,
|
|
115
|
+
file_count=len(file_paths),
|
|
116
|
+
cohesion=1.0, # Trivially cohesive for file-based
|
|
117
|
+
node_ids=node_ids,
|
|
118
|
+
)
|
|
119
|
+
communities.append(community)
|
|
120
|
+
|
|
121
|
+
# Update community_id on nodes
|
|
122
|
+
self._update_node_communities(communities)
|
|
123
|
+
|
|
124
|
+
# Sort by size descending
|
|
125
|
+
communities.sort(key=lambda c: -c.size)
|
|
126
|
+
|
|
127
|
+
# Store in metadata
|
|
128
|
+
self._store_communities(communities)
|
|
129
|
+
|
|
130
|
+
return communities
|
|
131
|
+
|
|
132
|
+
def get_architecture_overview(self) -> ArchitectureOverview:
|
|
133
|
+
"""Return community summary with coupling warnings.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
ArchitectureOverview with communities and cross-community coupling.
|
|
137
|
+
"""
|
|
138
|
+
# Load or detect communities
|
|
139
|
+
communities = self._load_communities()
|
|
140
|
+
if not communities:
|
|
141
|
+
communities = self.detect_communities()
|
|
142
|
+
|
|
143
|
+
# Compute coupling warnings
|
|
144
|
+
warnings = self._compute_coupling_warnings(communities)
|
|
145
|
+
|
|
146
|
+
total_nodes = sum(c.size for c in communities)
|
|
147
|
+
|
|
148
|
+
return ArchitectureOverview(
|
|
149
|
+
communities=tuple(communities),
|
|
150
|
+
coupling_warnings=tuple(warnings),
|
|
151
|
+
total_nodes=total_nodes,
|
|
152
|
+
total_communities=len(communities),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# ------------------------------------------------------------------
|
|
156
|
+
# Internal helpers
|
|
157
|
+
# ------------------------------------------------------------------
|
|
158
|
+
|
|
159
|
+
def _update_node_communities(
|
|
160
|
+
self, communities: list[CommunityInfo]
|
|
161
|
+
) -> None:
|
|
162
|
+
"""Update community_id on graph_nodes."""
|
|
163
|
+
for community in communities:
|
|
164
|
+
if not community.node_ids:
|
|
165
|
+
continue
|
|
166
|
+
placeholders = ",".join("?" for _ in community.node_ids)
|
|
167
|
+
self._db.execute_write(
|
|
168
|
+
f"""UPDATE graph_nodes SET community_id = ?
|
|
169
|
+
WHERE node_id IN ({placeholders})""",
|
|
170
|
+
(community.community_id, *community.node_ids),
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def _compute_coupling_warnings(
|
|
174
|
+
self, communities: list[CommunityInfo]
|
|
175
|
+
) -> list[CouplingWarning]:
|
|
176
|
+
"""Find cross-community edges and generate coupling warnings."""
|
|
177
|
+
# Build node_id -> community_name map
|
|
178
|
+
node_to_community: dict[str, str] = {}
|
|
179
|
+
for comm in communities:
|
|
180
|
+
for nid in comm.node_ids:
|
|
181
|
+
node_to_community[nid] = comm.name
|
|
182
|
+
|
|
183
|
+
# Count cross-community edges
|
|
184
|
+
cross_edges: dict[tuple[str, str], int] = defaultdict(int)
|
|
185
|
+
|
|
186
|
+
edges = self._db.execute(
|
|
187
|
+
"SELECT source_node_id, target_node_id FROM graph_edges",
|
|
188
|
+
(),
|
|
189
|
+
)
|
|
190
|
+
for row in edges:
|
|
191
|
+
src_comm = node_to_community.get(row["source_node_id"])
|
|
192
|
+
tgt_comm = node_to_community.get(row["target_node_id"])
|
|
193
|
+
if src_comm and tgt_comm and src_comm != tgt_comm:
|
|
194
|
+
pair = (
|
|
195
|
+
min(src_comm, tgt_comm),
|
|
196
|
+
max(src_comm, tgt_comm),
|
|
197
|
+
)
|
|
198
|
+
cross_edges[pair] += 1
|
|
199
|
+
|
|
200
|
+
# Generate warnings
|
|
201
|
+
warnings: list[CouplingWarning] = []
|
|
202
|
+
for (src, tgt), count in sorted(
|
|
203
|
+
cross_edges.items(), key=lambda x: -x[1]
|
|
204
|
+
):
|
|
205
|
+
if count >= 10:
|
|
206
|
+
severity = "high"
|
|
207
|
+
elif count >= 5:
|
|
208
|
+
severity = "medium"
|
|
209
|
+
else:
|
|
210
|
+
severity = "low"
|
|
211
|
+
|
|
212
|
+
warnings.append(CouplingWarning(
|
|
213
|
+
source_community=src,
|
|
214
|
+
target_community=tgt,
|
|
215
|
+
edge_count=count,
|
|
216
|
+
severity=severity,
|
|
217
|
+
))
|
|
218
|
+
|
|
219
|
+
return warnings
|
|
220
|
+
|
|
221
|
+
def _store_communities(self, communities: list[CommunityInfo]) -> None:
|
|
222
|
+
"""Store communities in graph_metadata as JSON."""
|
|
223
|
+
data = [
|
|
224
|
+
{
|
|
225
|
+
"community_id": c.community_id,
|
|
226
|
+
"name": c.name,
|
|
227
|
+
"directory": c.directory,
|
|
228
|
+
"size": c.size,
|
|
229
|
+
"dominant_language": c.dominant_language,
|
|
230
|
+
"file_count": c.file_count,
|
|
231
|
+
"cohesion": c.cohesion,
|
|
232
|
+
"node_ids": list(c.node_ids),
|
|
233
|
+
}
|
|
234
|
+
for c in communities
|
|
235
|
+
]
|
|
236
|
+
self._db.set_metadata("communities", json.dumps(data))
|
|
237
|
+
|
|
238
|
+
def _load_communities(self) -> list[CommunityInfo]:
|
|
239
|
+
"""Load communities from graph_metadata."""
|
|
240
|
+
raw = self._db.get_metadata("communities")
|
|
241
|
+
if not raw:
|
|
242
|
+
return []
|
|
243
|
+
try:
|
|
244
|
+
data = json.loads(raw)
|
|
245
|
+
except (json.JSONDecodeError, TypeError):
|
|
246
|
+
return []
|
|
247
|
+
|
|
248
|
+
return [
|
|
249
|
+
CommunityInfo(
|
|
250
|
+
community_id=c["community_id"],
|
|
251
|
+
name=c["name"],
|
|
252
|
+
directory=c["directory"],
|
|
253
|
+
size=c["size"],
|
|
254
|
+
dominant_language=c.get("dominant_language"),
|
|
255
|
+
file_count=c["file_count"],
|
|
256
|
+
cohesion=c["cohesion"],
|
|
257
|
+
node_ids=tuple(c["node_ids"]),
|
|
258
|
+
)
|
|
259
|
+
for c in data
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# ---------------------------------------------------------------------------
|
|
264
|
+
# Module-level helpers
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
def _extract_directory(file_path: str) -> str:
|
|
268
|
+
"""Extract the directory from a file path."""
|
|
269
|
+
parent = str(PurePosixPath(file_path).parent)
|
|
270
|
+
return parent if parent != "." else "root"
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _generate_community_name(
|
|
274
|
+
directory: str, nodes: list[dict[str, Any]]
|
|
275
|
+
) -> str:
|
|
276
|
+
"""Generate a human-readable community name.
|
|
277
|
+
|
|
278
|
+
Uses directory name + most common class name (if any).
|
|
279
|
+
"""
|
|
280
|
+
# Extract most common class name
|
|
281
|
+
class_names = [
|
|
282
|
+
n["name"] for n in nodes
|
|
283
|
+
if n.get("kind") == "class"
|
|
284
|
+
]
|
|
285
|
+
if class_names:
|
|
286
|
+
most_common = Counter(class_names).most_common(1)[0][0]
|
|
287
|
+
return f"{directory}/{most_common}"
|
|
288
|
+
|
|
289
|
+
# Fall back to directory name
|
|
290
|
+
parts = directory.rstrip("/").split("/")
|
|
291
|
+
return parts[-1] if parts else directory
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _dominant_language(languages: list[str]) -> str | None:
|
|
295
|
+
"""Find the most common language."""
|
|
296
|
+
if not languages:
|
|
297
|
+
return None
|
|
298
|
+
counts = Counter(languages)
|
|
299
|
+
return counts.most_common(1)[0][0]
|