superlocalmemory 3.3.19 → 3.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +9 -1
  3. package/src/superlocalmemory/cli/commands.py +140 -23
  4. package/src/superlocalmemory/cli/daemon.py +372 -0
  5. package/src/superlocalmemory/cli/main.py +10 -2
  6. package/src/superlocalmemory/cli/pending_store.py +158 -0
  7. package/src/superlocalmemory/cli/setup_wizard.py +39 -6
  8. package/src/superlocalmemory/code_graph/__init__.py +46 -0
  9. package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
  10. package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
  11. package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
  12. package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
  13. package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
  14. package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
  15. package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
  16. package/src/superlocalmemory/code_graph/changes.py +363 -0
  17. package/src/superlocalmemory/code_graph/communities.py +299 -0
  18. package/src/superlocalmemory/code_graph/config.py +88 -0
  19. package/src/superlocalmemory/code_graph/database.py +482 -0
  20. package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
  21. package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
  22. package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
  23. package/src/superlocalmemory/code_graph/flows.py +350 -0
  24. package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
  25. package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
  26. package/src/superlocalmemory/code_graph/graph_store.py +158 -0
  27. package/src/superlocalmemory/code_graph/incremental.py +200 -0
  28. package/src/superlocalmemory/code_graph/models.py +130 -0
  29. package/src/superlocalmemory/code_graph/parser.py +507 -0
  30. package/src/superlocalmemory/code_graph/resolver.py +321 -0
  31. package/src/superlocalmemory/code_graph/search.py +460 -0
  32. package/src/superlocalmemory/code_graph/service.py +95 -0
  33. package/src/superlocalmemory/code_graph/watcher.py +207 -0
  34. package/src/superlocalmemory/core/config.py +4 -3
  35. package/src/superlocalmemory/core/embedding_worker.py +4 -2
  36. package/src/superlocalmemory/core/embeddings.py +8 -2
  37. package/src/superlocalmemory/core/engine.py +32 -0
  38. package/src/superlocalmemory/core/engine_wiring.py +5 -0
  39. package/src/superlocalmemory/core/recall_pipeline.py +7 -3
  40. package/src/superlocalmemory/core/store_pipeline.py +23 -1
  41. package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
  42. package/src/superlocalmemory/infra/event_bus.py +5 -0
  43. package/src/superlocalmemory/mcp/server.py +23 -0
  44. package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
  45. package/src/superlocalmemory/retrieval/agentic.py +89 -17
  46. package/src/superlocalmemory/retrieval/engine.py +137 -2
  47. package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
  48. package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
  49. package/src/superlocalmemory/retrieval/strategy.py +16 -0
  50. package/src/superlocalmemory/server/api.py +4 -2
  51. package/src/superlocalmemory/server/ui.py +5 -2
  52. package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
  53. package/src/superlocalmemory/ui/index.html +1879 -0
  54. package/src/superlocalmemory/ui/js/agents.js +192 -0
  55. package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
  56. package/src/superlocalmemory/ui/js/behavioral.js +276 -0
  57. package/src/superlocalmemory/ui/js/clusters.js +206 -0
  58. package/src/superlocalmemory/ui/js/compliance.js +252 -0
  59. package/src/superlocalmemory/ui/js/core.js +246 -0
  60. package/src/superlocalmemory/ui/js/dashboard.js +110 -0
  61. package/src/superlocalmemory/ui/js/events.js +178 -0
  62. package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
  63. package/src/superlocalmemory/ui/js/feedback.js +333 -0
  64. package/src/superlocalmemory/ui/js/graph-core.js +447 -0
  65. package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
  66. package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
  67. package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
  68. package/src/superlocalmemory/ui/js/ide-status.js +102 -0
  69. package/src/superlocalmemory/ui/js/init.js +45 -0
  70. package/src/superlocalmemory/ui/js/learning.js +435 -0
  71. package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
  72. package/src/superlocalmemory/ui/js/math-health.js +98 -0
  73. package/src/superlocalmemory/ui/js/memories.js +264 -0
  74. package/src/superlocalmemory/ui/js/modal.js +357 -0
  75. package/src/superlocalmemory/ui/js/patterns.js +93 -0
  76. package/src/superlocalmemory/ui/js/profiles.js +236 -0
  77. package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
  78. package/src/superlocalmemory/ui/js/search.js +59 -0
  79. package/src/superlocalmemory/ui/js/settings.js +224 -0
  80. package/src/superlocalmemory/ui/js/timeline.js +32 -0
  81. package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0
@@ -0,0 +1,363 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """ChangeAnalyzer — git diff to risk-scored change analysis.
6
+
7
+ Parses git diff output, maps changed line ranges to graph nodes,
8
+ computes 5-factor risk scores, and produces review context.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import re
15
+ import subprocess
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from superlocalmemory.code_graph.database import CodeGraphDatabase
21
+ from superlocalmemory.code_graph.models import EdgeKind
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Constants
28
+ # ---------------------------------------------------------------------------
29
+
30
+ SECURITY_KEYWORDS: frozenset[str] = frozenset([
31
+ "auth", "login", "password", "token", "session", "crypt", "secret",
32
+ "credential", "permission", "sql", "query", "execute", "connect",
33
+ "socket", "request", "http", "sanitize", "validate", "encrypt",
34
+ "decrypt", "hash", "sign", "verify", "admin", "privilege",
35
+ ])
36
+
37
+ _GIT_TIMEOUT_SECONDS = 30
38
+ _HUNK_HEADER_RE = re.compile(r'\+(\d+)(?:,(\d+))?')
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Result dataclasses
43
+ # ---------------------------------------------------------------------------
44
+
45
+ @dataclass(frozen=True)
46
+ class DiffHunk:
47
+ """A single changed line range in a file."""
48
+ file_path: str
49
+ start_line: int
50
+ end_line: int
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class ChangedNode:
55
+ """A graph node affected by a change, with risk score."""
56
+ node_id: str
57
+ name: str
58
+ kind: str
59
+ file_path: str
60
+ line_start: int
61
+ line_end: int
62
+ risk_score: float
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class ReviewContext:
67
+ """Token-optimized review context for changed files."""
68
+ summary: str
69
+ changed_nodes: tuple[ChangedNode, ...]
70
+ test_gaps: tuple[ChangedNode, ...]
71
+ review_priorities: tuple[ChangedNode, ...]
72
+ overall_risk: float
73
+
74
+
75
+ # ---------------------------------------------------------------------------
76
+ # ChangeAnalyzer
77
+ # ---------------------------------------------------------------------------
78
+
79
+ class ChangeAnalyzer:
80
+ """Analyze git changes and map to graph nodes with risk scores.
81
+
82
+ All git operations use subprocess with timeout for safety.
83
+ """
84
+
85
+ def __init__(self, db: CodeGraphDatabase) -> None:
86
+ self._db = db
87
+
88
+ # ------------------------------------------------------------------
89
+ # Public API
90
+ # ------------------------------------------------------------------
91
+
92
+ def analyze_changes(
93
+ self, changed_files: list[str], repo_root: Path | None = None
94
+ ) -> ReviewContext:
95
+ """Analyze changed files and produce risk-scored review context.
96
+
97
+ Args:
98
+ changed_files: List of relative file paths that changed.
99
+ repo_root: Optional repo root for git diff parsing.
100
+
101
+ Returns:
102
+ ReviewContext with scored nodes and review priorities.
103
+ """
104
+ if not changed_files:
105
+ return ReviewContext(
106
+ summary="No changes detected.",
107
+ changed_nodes=(),
108
+ test_gaps=(),
109
+ review_priorities=(),
110
+ overall_risk=0.0,
111
+ )
112
+
113
+ # Map changed files to affected nodes
114
+ all_changed: dict[str, ChangedNode] = {}
115
+ for file_path in changed_files:
116
+ nodes = self._find_nodes_in_file(file_path)
117
+ for node in nodes:
118
+ risk = self.compute_risk_score(node)
119
+ scored_node = ChangedNode(
120
+ node_id=node["node_id"],
121
+ name=node["name"],
122
+ kind=node["kind"],
123
+ file_path=node["file_path"],
124
+ line_start=node["line_start"],
125
+ line_end=node["line_end"],
126
+ risk_score=risk,
127
+ )
128
+ all_changed[node["node_id"]] = scored_node
129
+
130
+ changed_list = tuple(sorted(
131
+ all_changed.values(), key=lambda n: -n.risk_score
132
+ ))
133
+
134
+ # Find test gaps (changed non-test nodes without TESTED_BY)
135
+ test_gaps = tuple(
136
+ n for n in changed_list
137
+ if n.kind not in ("file", "module")
138
+ and not self._has_test_coverage(n.node_id)
139
+ )
140
+
141
+ # Top review priorities
142
+ review_priorities = changed_list[:10]
143
+
144
+ # Overall risk
145
+ overall_risk = max(
146
+ (n.risk_score for n in changed_list), default=0.0
147
+ )
148
+
149
+ summary = (
150
+ f"{len(changed_list)} changed nodes across "
151
+ f"{len(changed_files)} files. "
152
+ f"{len(test_gaps)} untested changes. "
153
+ f"Overall risk: {overall_risk:.2f}."
154
+ )
155
+
156
+ return ReviewContext(
157
+ summary=summary,
158
+ changed_nodes=changed_list,
159
+ test_gaps=test_gaps,
160
+ review_priorities=review_priorities,
161
+ overall_risk=overall_risk,
162
+ )
163
+
164
+ def compute_risk_score(self, node: dict[str, Any]) -> float:
165
+ """5-factor risk scoring for a single node.
166
+
167
+ Factors:
168
+ 1. flow_participation (max 0.25)
169
+ 2. community_crossing (max 0.15)
170
+ 3. test_coverage (0.05 if tested, 0.30 if untested)
171
+ 4. security_keywords (0 or 0.20)
172
+ 5. caller_count (max 0.10)
173
+ """
174
+ node_id = node["node_id"]
175
+ name = node.get("name", "")
176
+
177
+ # 1. Flow participation
178
+ flow_score = self._flow_participation_score(node_id)
179
+
180
+ # 2. Community crossing
181
+ cross_score = self._community_crossing_score(node_id)
182
+
183
+ # 3. Test coverage
184
+ has_test = self._has_test_coverage(node_id)
185
+ test_score = 0.05 if has_test else 0.30
186
+
187
+ # 4. Security sensitivity
188
+ security_score = (
189
+ 0.20
190
+ if any(kw in name.lower() for kw in SECURITY_KEYWORDS)
191
+ else 0.0
192
+ )
193
+
194
+ # 5. Caller count
195
+ caller_score = self._caller_count_score(node_id)
196
+
197
+ return flow_score + cross_score + test_score + security_score + caller_score
198
+
199
+ def get_review_context(
200
+ self, changed_files: list[str], repo_root: Path | None = None
201
+ ) -> ReviewContext:
202
+ """Token-optimized review context.
203
+
204
+ Same as analyze_changes but designed for LLM consumption.
205
+ """
206
+ return self.analyze_changes(changed_files, repo_root)
207
+
208
+ # ------------------------------------------------------------------
209
+ # Static: git diff parsing
210
+ # ------------------------------------------------------------------
211
+
212
+ @staticmethod
213
+ def parse_git_diff(
214
+ repo_root: Path, base: str = "HEAD~1", timeout: int = _GIT_TIMEOUT_SECONDS
215
+ ) -> list[DiffHunk]:
216
+ """Parse git diff to get changed line ranges.
217
+
218
+ Args:
219
+ repo_root: Path to the git repository root.
220
+ base: Git ref to diff against.
221
+ timeout: Subprocess timeout in seconds.
222
+
223
+ Returns:
224
+ List of DiffHunk with file paths and line ranges.
225
+ """
226
+ try:
227
+ result = subprocess.run(
228
+ ["git", "diff", "--unified=0", base, "--"],
229
+ capture_output=True,
230
+ text=True,
231
+ cwd=str(repo_root),
232
+ timeout=timeout,
233
+ )
234
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as exc:
235
+ logger.warning("git diff failed: %s", exc)
236
+ return []
237
+
238
+ if result.returncode != 0:
239
+ logger.debug("git diff returned %d: %s", result.returncode, result.stderr)
240
+ return []
241
+
242
+ return _parse_diff_output(result.stdout)
243
+
244
+ # ------------------------------------------------------------------
245
+ # Internal helpers
246
+ # ------------------------------------------------------------------
247
+
248
+ def _find_nodes_in_file(self, file_path: str) -> list[dict[str, Any]]:
249
+ """Find all graph nodes in a file."""
250
+ rows = self._db.execute(
251
+ """SELECT node_id, name, kind, file_path, line_start, line_end
252
+ FROM graph_nodes
253
+ WHERE file_path = ?
254
+ ORDER BY line_start""",
255
+ (file_path,),
256
+ )
257
+ return [dict(row) for row in rows]
258
+
259
+ def _has_test_coverage(self, node_id: str) -> bool:
260
+ """Check if a node has TESTED_BY edges or is called by test nodes."""
261
+ # Check outgoing TESTED_BY
262
+ rows = self._db.execute(
263
+ """SELECT COUNT(*) as cnt FROM graph_edges
264
+ WHERE source_node_id = ? AND kind = ?""",
265
+ (node_id, EdgeKind.TESTED_BY.value),
266
+ )
267
+ if rows and rows[0]["cnt"] > 0:
268
+ return True
269
+
270
+ # Check incoming CALLS from test nodes
271
+ rows = self._db.execute(
272
+ """SELECT COUNT(*) as cnt FROM graph_edges ge
273
+ JOIN graph_nodes gn ON ge.source_node_id = gn.node_id
274
+ WHERE ge.target_node_id = ?
275
+ AND ge.kind = ?
276
+ AND gn.is_test = 1""",
277
+ (node_id, EdgeKind.CALLS.value),
278
+ )
279
+ return bool(rows and rows[0]["cnt"] > 0)
280
+
281
+ def _flow_participation_score(self, node_id: str) -> float:
282
+ """Score based on how many flows this node participates in.
283
+
284
+ Uses stored flows from graph_metadata. Max 0.25.
285
+ """
286
+ import json
287
+ raw = self._db.get_metadata("flows")
288
+ if not raw:
289
+ return 0.0
290
+ try:
291
+ flows = json.loads(raw)
292
+ except (json.JSONDecodeError, TypeError):
293
+ return 0.0
294
+
295
+ count = sum(
296
+ 1 for f in flows
297
+ if node_id in f.get("path_node_ids", [])
298
+ )
299
+ return min(count * 0.05, 0.25)
300
+
301
+ def _community_crossing_score(self, node_id: str) -> float:
302
+ """Score based on cross-community callers. Max 0.15."""
303
+ # Get this node's community
304
+ node_rows = self._db.execute(
305
+ "SELECT community_id FROM graph_nodes WHERE node_id = ?",
306
+ (node_id,),
307
+ )
308
+ if not node_rows:
309
+ return 0.0
310
+ my_community = node_rows[0]["community_id"]
311
+
312
+ # Get callers and their communities
313
+ callers = self._db.execute(
314
+ """SELECT gn.community_id
315
+ FROM graph_edges ge
316
+ JOIN graph_nodes gn ON ge.source_node_id = gn.node_id
317
+ WHERE ge.target_node_id = ?
318
+ AND ge.kind = ?""",
319
+ (node_id, EdgeKind.CALLS.value),
320
+ )
321
+ cross_count = sum(
322
+ 1 for row in callers
323
+ if row["community_id"] is not None
324
+ and row["community_id"] != my_community
325
+ )
326
+ return min(cross_count * 0.05, 0.15)
327
+
328
+ def _caller_count_score(self, node_id: str) -> float:
329
+ """Score based on number of callers. Max 0.10."""
330
+ rows = self._db.execute(
331
+ """SELECT COUNT(*) as cnt FROM graph_edges
332
+ WHERE target_node_id = ? AND kind = ?""",
333
+ (node_id, EdgeKind.CALLS.value),
334
+ )
335
+ count = rows[0]["cnt"] if rows else 0
336
+ return min(count / 20.0, 0.10)
337
+
338
+
339
+ # ---------------------------------------------------------------------------
340
+ # Module-level helpers
341
+ # ---------------------------------------------------------------------------
342
+
343
+ def _parse_diff_output(diff_text: str) -> list[DiffHunk]:
344
+ """Parse unified diff output into DiffHunk list."""
345
+ hunks: list[DiffHunk] = []
346
+ current_file: str | None = None
347
+
348
+ for line in diff_text.splitlines():
349
+ if line.startswith("+++ b/"):
350
+ current_file = line[6:]
351
+ elif line.startswith("@@"):
352
+ match = _HUNK_HEADER_RE.search(line)
353
+ if match and current_file:
354
+ start = int(match.group(1))
355
+ count = int(match.group(2) or "1")
356
+ if count > 0:
357
+ hunks.append(DiffHunk(
358
+ file_path=current_file,
359
+ start_line=start,
360
+ end_line=start + count - 1,
361
+ ))
362
+
363
+ return hunks
@@ -0,0 +1,299 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory v3.4 — CodeGraph Module
4
+
5
+ """CommunityDetector — file-based community detection.
6
+
7
+ Groups nodes by file path prefix / directory for MVP.
8
+ igraph/Leiden can be added later for more sophisticated detection.
9
+ Stores community_id on graph_nodes via UPDATE.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ from collections import Counter, defaultdict
17
+ from dataclasses import dataclass
18
+ from pathlib import PurePosixPath
19
+ from typing import Any
20
+
21
+ from superlocalmemory.code_graph.database import CodeGraphDatabase
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Result dataclasses
28
+ # ---------------------------------------------------------------------------
29
+
30
+ @dataclass(frozen=True)
31
+ class CommunityInfo:
32
+ """Detected code community."""
33
+ community_id: int
34
+ name: str
35
+ directory: str
36
+ size: int
37
+ dominant_language: str | None
38
+ file_count: int
39
+ cohesion: float
40
+ node_ids: tuple[str, ...]
41
+
42
+
43
+ @dataclass(frozen=True)
44
+ class CouplingWarning:
45
+ """Warning about high coupling between communities."""
46
+ source_community: str
47
+ target_community: str
48
+ edge_count: int
49
+ severity: str # "low", "medium", "high"
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class ArchitectureOverview:
54
+ """Architecture overview with communities and coupling warnings."""
55
+ communities: tuple[CommunityInfo, ...]
56
+ coupling_warnings: tuple[CouplingWarning, ...]
57
+ total_nodes: int
58
+ total_communities: int
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # CommunityDetector
63
+ # ---------------------------------------------------------------------------
64
+
65
+ class CommunityDetector:
66
+ """File-based community detection.
67
+
68
+ Groups nodes by directory (file path prefix).
69
+ Each unique directory becomes a community.
70
+ """
71
+
72
+ def __init__(self, db: CodeGraphDatabase) -> None:
73
+ self._db = db
74
+
75
+ # ------------------------------------------------------------------
76
+ # Public API
77
+ # ------------------------------------------------------------------
78
+
79
+ def detect_communities(self) -> list[CommunityInfo]:
80
+ """Detect communities by grouping nodes by directory.
81
+
82
+ Updates community_id on graph_nodes.
83
+
84
+ Returns:
85
+ List of CommunityInfo sorted by size (largest first).
86
+ """
87
+ # Load all nodes
88
+ rows = self._db.execute(
89
+ "SELECT node_id, name, kind, file_path, language FROM graph_nodes",
90
+ (),
91
+ )
92
+ if not rows:
93
+ return []
94
+
95
+ # Group by directory
96
+ dir_groups: dict[str, list[dict[str, Any]]] = defaultdict(list)
97
+ for row in rows:
98
+ directory = _extract_directory(row["file_path"])
99
+ dir_groups[directory].append(dict(row))
100
+
101
+ # Build communities
102
+ communities: list[CommunityInfo] = []
103
+ for comm_id, (directory, nodes) in enumerate(sorted(dir_groups.items())):
104
+ node_ids = tuple(n["node_id"] for n in nodes)
105
+ languages = [n["language"] for n in nodes if n.get("language")]
106
+ dominant_lang = _dominant_language(languages)
107
+ file_paths = {n["file_path"] for n in nodes}
108
+
109
+ community = CommunityInfo(
110
+ community_id=comm_id,
111
+ name=_generate_community_name(directory, nodes),
112
+ directory=directory,
113
+ size=len(nodes),
114
+ dominant_language=dominant_lang,
115
+ file_count=len(file_paths),
116
+ cohesion=1.0, # Trivially cohesive for file-based
117
+ node_ids=node_ids,
118
+ )
119
+ communities.append(community)
120
+
121
+ # Update community_id on nodes
122
+ self._update_node_communities(communities)
123
+
124
+ # Sort by size descending
125
+ communities.sort(key=lambda c: -c.size)
126
+
127
+ # Store in metadata
128
+ self._store_communities(communities)
129
+
130
+ return communities
131
+
132
+ def get_architecture_overview(self) -> ArchitectureOverview:
133
+ """Return community summary with coupling warnings.
134
+
135
+ Returns:
136
+ ArchitectureOverview with communities and cross-community coupling.
137
+ """
138
+ # Load or detect communities
139
+ communities = self._load_communities()
140
+ if not communities:
141
+ communities = self.detect_communities()
142
+
143
+ # Compute coupling warnings
144
+ warnings = self._compute_coupling_warnings(communities)
145
+
146
+ total_nodes = sum(c.size for c in communities)
147
+
148
+ return ArchitectureOverview(
149
+ communities=tuple(communities),
150
+ coupling_warnings=tuple(warnings),
151
+ total_nodes=total_nodes,
152
+ total_communities=len(communities),
153
+ )
154
+
155
+ # ------------------------------------------------------------------
156
+ # Internal helpers
157
+ # ------------------------------------------------------------------
158
+
159
+ def _update_node_communities(
160
+ self, communities: list[CommunityInfo]
161
+ ) -> None:
162
+ """Update community_id on graph_nodes."""
163
+ for community in communities:
164
+ if not community.node_ids:
165
+ continue
166
+ placeholders = ",".join("?" for _ in community.node_ids)
167
+ self._db.execute_write(
168
+ f"""UPDATE graph_nodes SET community_id = ?
169
+ WHERE node_id IN ({placeholders})""",
170
+ (community.community_id, *community.node_ids),
171
+ )
172
+
173
+ def _compute_coupling_warnings(
174
+ self, communities: list[CommunityInfo]
175
+ ) -> list[CouplingWarning]:
176
+ """Find cross-community edges and generate coupling warnings."""
177
+ # Build node_id -> community_name map
178
+ node_to_community: dict[str, str] = {}
179
+ for comm in communities:
180
+ for nid in comm.node_ids:
181
+ node_to_community[nid] = comm.name
182
+
183
+ # Count cross-community edges
184
+ cross_edges: dict[tuple[str, str], int] = defaultdict(int)
185
+
186
+ edges = self._db.execute(
187
+ "SELECT source_node_id, target_node_id FROM graph_edges",
188
+ (),
189
+ )
190
+ for row in edges:
191
+ src_comm = node_to_community.get(row["source_node_id"])
192
+ tgt_comm = node_to_community.get(row["target_node_id"])
193
+ if src_comm and tgt_comm and src_comm != tgt_comm:
194
+ pair = (
195
+ min(src_comm, tgt_comm),
196
+ max(src_comm, tgt_comm),
197
+ )
198
+ cross_edges[pair] += 1
199
+
200
+ # Generate warnings
201
+ warnings: list[CouplingWarning] = []
202
+ for (src, tgt), count in sorted(
203
+ cross_edges.items(), key=lambda x: -x[1]
204
+ ):
205
+ if count >= 10:
206
+ severity = "high"
207
+ elif count >= 5:
208
+ severity = "medium"
209
+ else:
210
+ severity = "low"
211
+
212
+ warnings.append(CouplingWarning(
213
+ source_community=src,
214
+ target_community=tgt,
215
+ edge_count=count,
216
+ severity=severity,
217
+ ))
218
+
219
+ return warnings
220
+
221
+ def _store_communities(self, communities: list[CommunityInfo]) -> None:
222
+ """Store communities in graph_metadata as JSON."""
223
+ data = [
224
+ {
225
+ "community_id": c.community_id,
226
+ "name": c.name,
227
+ "directory": c.directory,
228
+ "size": c.size,
229
+ "dominant_language": c.dominant_language,
230
+ "file_count": c.file_count,
231
+ "cohesion": c.cohesion,
232
+ "node_ids": list(c.node_ids),
233
+ }
234
+ for c in communities
235
+ ]
236
+ self._db.set_metadata("communities", json.dumps(data))
237
+
238
+ def _load_communities(self) -> list[CommunityInfo]:
239
+ """Load communities from graph_metadata."""
240
+ raw = self._db.get_metadata("communities")
241
+ if not raw:
242
+ return []
243
+ try:
244
+ data = json.loads(raw)
245
+ except (json.JSONDecodeError, TypeError):
246
+ return []
247
+
248
+ return [
249
+ CommunityInfo(
250
+ community_id=c["community_id"],
251
+ name=c["name"],
252
+ directory=c["directory"],
253
+ size=c["size"],
254
+ dominant_language=c.get("dominant_language"),
255
+ file_count=c["file_count"],
256
+ cohesion=c["cohesion"],
257
+ node_ids=tuple(c["node_ids"]),
258
+ )
259
+ for c in data
260
+ ]
261
+
262
+
263
+ # ---------------------------------------------------------------------------
264
+ # Module-level helpers
265
+ # ---------------------------------------------------------------------------
266
+
267
+ def _extract_directory(file_path: str) -> str:
268
+ """Extract the directory from a file path."""
269
+ parent = str(PurePosixPath(file_path).parent)
270
+ return parent if parent != "." else "root"
271
+
272
+
273
+ def _generate_community_name(
274
+ directory: str, nodes: list[dict[str, Any]]
275
+ ) -> str:
276
+ """Generate a human-readable community name.
277
+
278
+ Uses directory name + most common class name (if any).
279
+ """
280
+ # Extract most common class name
281
+ class_names = [
282
+ n["name"] for n in nodes
283
+ if n.get("kind") == "class"
284
+ ]
285
+ if class_names:
286
+ most_common = Counter(class_names).most_common(1)[0][0]
287
+ return f"{directory}/{most_common}"
288
+
289
+ # Fall back to directory name
290
+ parts = directory.rstrip("/").split("/")
291
+ return parts[-1] if parts else directory
292
+
293
+
294
+ def _dominant_language(languages: list[str]) -> str | None:
295
+ """Find the most common language."""
296
+ if not languages:
297
+ return None
298
+ counts = Counter(languages)
299
+ return counts.most_common(1)[0][0]