codexa 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. codexa-0.4.0.dist-info/METADATA +650 -0
  2. codexa-0.4.0.dist-info/RECORD +189 -0
  3. codexa-0.4.0.dist-info/WHEEL +5 -0
  4. codexa-0.4.0.dist-info/entry_points.txt +2 -0
  5. codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
  6. codexa-0.4.0.dist-info/top_level.txt +1 -0
  7. semantic_code_intelligence/__init__.py +5 -0
  8. semantic_code_intelligence/analysis/__init__.py +21 -0
  9. semantic_code_intelligence/analysis/ai_features.py +351 -0
  10. semantic_code_intelligence/bridge/__init__.py +28 -0
  11. semantic_code_intelligence/bridge/context_provider.py +245 -0
  12. semantic_code_intelligence/bridge/protocol.py +167 -0
  13. semantic_code_intelligence/bridge/server.py +348 -0
  14. semantic_code_intelligence/bridge/vscode.py +271 -0
  15. semantic_code_intelligence/ci/__init__.py +13 -0
  16. semantic_code_intelligence/ci/hooks.py +98 -0
  17. semantic_code_intelligence/ci/hotspots.py +272 -0
  18. semantic_code_intelligence/ci/impact.py +246 -0
  19. semantic_code_intelligence/ci/metrics.py +591 -0
  20. semantic_code_intelligence/ci/pr.py +412 -0
  21. semantic_code_intelligence/ci/quality.py +557 -0
  22. semantic_code_intelligence/ci/templates.py +164 -0
  23. semantic_code_intelligence/ci/trace.py +224 -0
  24. semantic_code_intelligence/cli/__init__.py +0 -0
  25. semantic_code_intelligence/cli/commands/__init__.py +0 -0
  26. semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
  27. semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
  28. semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
  29. semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
  30. semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
  31. semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
  32. semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
  33. semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
  34. semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
  35. semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
  36. semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
  37. semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
  38. semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
  39. semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
  40. semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
  41. semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
  42. semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
  43. semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
  44. semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
  45. semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
  46. semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
  47. semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
  48. semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
  49. semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
  50. semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
  51. semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
  52. semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
  53. semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
  54. semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
  55. semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
  56. semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
  57. semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
  58. semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
  59. semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
  60. semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
  61. semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
  62. semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
  63. semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
  64. semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
  65. semantic_code_intelligence/cli/main.py +65 -0
  66. semantic_code_intelligence/cli/router.py +92 -0
  67. semantic_code_intelligence/config/__init__.py +0 -0
  68. semantic_code_intelligence/config/settings.py +260 -0
  69. semantic_code_intelligence/context/__init__.py +19 -0
  70. semantic_code_intelligence/context/engine.py +429 -0
  71. semantic_code_intelligence/context/memory.py +253 -0
  72. semantic_code_intelligence/daemon/__init__.py +1 -0
  73. semantic_code_intelligence/daemon/watcher.py +515 -0
  74. semantic_code_intelligence/docs/__init__.py +1080 -0
  75. semantic_code_intelligence/embeddings/__init__.py +0 -0
  76. semantic_code_intelligence/embeddings/enhanced.py +131 -0
  77. semantic_code_intelligence/embeddings/generator.py +149 -0
  78. semantic_code_intelligence/embeddings/model_registry.py +100 -0
  79. semantic_code_intelligence/evolution/__init__.py +1 -0
  80. semantic_code_intelligence/evolution/budget_guard.py +111 -0
  81. semantic_code_intelligence/evolution/commit_manager.py +88 -0
  82. semantic_code_intelligence/evolution/context_builder.py +131 -0
  83. semantic_code_intelligence/evolution/engine.py +249 -0
  84. semantic_code_intelligence/evolution/patch_generator.py +229 -0
  85. semantic_code_intelligence/evolution/task_selector.py +214 -0
  86. semantic_code_intelligence/evolution/test_runner.py +111 -0
  87. semantic_code_intelligence/indexing/__init__.py +0 -0
  88. semantic_code_intelligence/indexing/chunker.py +174 -0
  89. semantic_code_intelligence/indexing/parallel.py +86 -0
  90. semantic_code_intelligence/indexing/scanner.py +146 -0
  91. semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
  92. semantic_code_intelligence/llm/__init__.py +62 -0
  93. semantic_code_intelligence/llm/cache.py +219 -0
  94. semantic_code_intelligence/llm/cached_provider.py +145 -0
  95. semantic_code_intelligence/llm/conversation.py +190 -0
  96. semantic_code_intelligence/llm/cross_refactor.py +272 -0
  97. semantic_code_intelligence/llm/investigation.py +274 -0
  98. semantic_code_intelligence/llm/mock_provider.py +77 -0
  99. semantic_code_intelligence/llm/ollama_provider.py +122 -0
  100. semantic_code_intelligence/llm/openai_provider.py +100 -0
  101. semantic_code_intelligence/llm/provider.py +92 -0
  102. semantic_code_intelligence/llm/rate_limiter.py +164 -0
  103. semantic_code_intelligence/llm/reasoning.py +438 -0
  104. semantic_code_intelligence/llm/safety.py +110 -0
  105. semantic_code_intelligence/llm/streaming.py +251 -0
  106. semantic_code_intelligence/lsp/__init__.py +609 -0
  107. semantic_code_intelligence/mcp/__init__.py +393 -0
  108. semantic_code_intelligence/parsing/__init__.py +19 -0
  109. semantic_code_intelligence/parsing/parser.py +375 -0
  110. semantic_code_intelligence/plugins/__init__.py +255 -0
  111. semantic_code_intelligence/plugins/examples/__init__.py +1 -0
  112. semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
  113. semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
  114. semantic_code_intelligence/scalability/__init__.py +205 -0
  115. semantic_code_intelligence/search/__init__.py +0 -0
  116. semantic_code_intelligence/search/formatter.py +123 -0
  117. semantic_code_intelligence/search/grep.py +361 -0
  118. semantic_code_intelligence/search/hybrid_search.py +170 -0
  119. semantic_code_intelligence/search/keyword_search.py +311 -0
  120. semantic_code_intelligence/search/section_expander.py +103 -0
  121. semantic_code_intelligence/services/__init__.py +0 -0
  122. semantic_code_intelligence/services/indexing_service.py +630 -0
  123. semantic_code_intelligence/services/search_service.py +269 -0
  124. semantic_code_intelligence/storage/__init__.py +0 -0
  125. semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
  126. semantic_code_intelligence/storage/hash_store.py +66 -0
  127. semantic_code_intelligence/storage/index_manifest.py +85 -0
  128. semantic_code_intelligence/storage/index_stats.py +138 -0
  129. semantic_code_intelligence/storage/query_history.py +160 -0
  130. semantic_code_intelligence/storage/symbol_registry.py +209 -0
  131. semantic_code_intelligence/storage/vector_store.py +297 -0
  132. semantic_code_intelligence/tests/__init__.py +0 -0
  133. semantic_code_intelligence/tests/test_ai_features.py +351 -0
  134. semantic_code_intelligence/tests/test_chunker.py +119 -0
  135. semantic_code_intelligence/tests/test_cli.py +188 -0
  136. semantic_code_intelligence/tests/test_config.py +154 -0
  137. semantic_code_intelligence/tests/test_context.py +381 -0
  138. semantic_code_intelligence/tests/test_embeddings.py +73 -0
  139. semantic_code_intelligence/tests/test_endtoend.py +1142 -0
  140. semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
  141. semantic_code_intelligence/tests/test_hash_store.py +79 -0
  142. semantic_code_intelligence/tests/test_logging.py +55 -0
  143. semantic_code_intelligence/tests/test_new_cli.py +138 -0
  144. semantic_code_intelligence/tests/test_parser.py +495 -0
  145. semantic_code_intelligence/tests/test_phase10.py +355 -0
  146. semantic_code_intelligence/tests/test_phase11.py +593 -0
  147. semantic_code_intelligence/tests/test_phase12.py +375 -0
  148. semantic_code_intelligence/tests/test_phase13.py +663 -0
  149. semantic_code_intelligence/tests/test_phase14.py +568 -0
  150. semantic_code_intelligence/tests/test_phase15.py +814 -0
  151. semantic_code_intelligence/tests/test_phase16.py +792 -0
  152. semantic_code_intelligence/tests/test_phase17.py +815 -0
  153. semantic_code_intelligence/tests/test_phase18.py +934 -0
  154. semantic_code_intelligence/tests/test_phase19.py +986 -0
  155. semantic_code_intelligence/tests/test_phase20.py +2753 -0
  156. semantic_code_intelligence/tests/test_phase20b.py +2058 -0
  157. semantic_code_intelligence/tests/test_phase20c.py +962 -0
  158. semantic_code_intelligence/tests/test_phase21.py +428 -0
  159. semantic_code_intelligence/tests/test_phase22.py +799 -0
  160. semantic_code_intelligence/tests/test_phase23.py +783 -0
  161. semantic_code_intelligence/tests/test_phase24.py +715 -0
  162. semantic_code_intelligence/tests/test_phase25.py +496 -0
  163. semantic_code_intelligence/tests/test_phase26.py +251 -0
  164. semantic_code_intelligence/tests/test_phase27.py +531 -0
  165. semantic_code_intelligence/tests/test_phase8.py +592 -0
  166. semantic_code_intelligence/tests/test_phase9.py +643 -0
  167. semantic_code_intelligence/tests/test_plugins.py +293 -0
  168. semantic_code_intelligence/tests/test_priority_features.py +727 -0
  169. semantic_code_intelligence/tests/test_router.py +41 -0
  170. semantic_code_intelligence/tests/test_scalability.py +138 -0
  171. semantic_code_intelligence/tests/test_scanner.py +125 -0
  172. semantic_code_intelligence/tests/test_search.py +160 -0
  173. semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
  174. semantic_code_intelligence/tests/test_tools.py +182 -0
  175. semantic_code_intelligence/tests/test_vector_store.py +151 -0
  176. semantic_code_intelligence/tests/test_watcher.py +211 -0
  177. semantic_code_intelligence/tools/__init__.py +442 -0
  178. semantic_code_intelligence/tools/executor.py +232 -0
  179. semantic_code_intelligence/tools/protocol.py +200 -0
  180. semantic_code_intelligence/tui/__init__.py +454 -0
  181. semantic_code_intelligence/utils/__init__.py +0 -0
  182. semantic_code_intelligence/utils/logging.py +112 -0
  183. semantic_code_intelligence/version.py +3 -0
  184. semantic_code_intelligence/web/__init__.py +11 -0
  185. semantic_code_intelligence/web/api.py +289 -0
  186. semantic_code_intelligence/web/server.py +397 -0
  187. semantic_code_intelligence/web/ui.py +659 -0
  188. semantic_code_intelligence/web/visualize.py +226 -0
  189. semantic_code_intelligence/workspace/__init__.py +427 -0
@@ -0,0 +1,98 @@
1
+ """Pre-commit validation hook support.
2
+
3
+ Provides a lightweight validation pipeline that can be invoked as a
4
+ pre-commit hook or standalone CLI command. Hooks run the safety
5
+ validator and optionally dispatch ``CUSTOM_VALIDATION`` plugin hooks.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from semantic_code_intelligence.llm.safety import SafetyValidator, SafetyReport
15
+ from semantic_code_intelligence.utils.logging import get_logger
16
+
17
+ logger = get_logger("ci.hooks")
18
+
19
+
20
+ @dataclass
21
+ class HookResult:
22
+ """Result from a pre-commit validation run."""
23
+
24
+ passed: bool = True
25
+ files_checked: int = 0
26
+ safety: SafetyReport | None = None
27
+ plugin_results: list[dict[str, Any]] = field(default_factory=list)
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ return {
31
+ "passed": self.passed,
32
+ "files_checked": self.files_checked,
33
+ "safety": self.safety.to_dict() if self.safety else None,
34
+ "plugin_results": self.plugin_results,
35
+ }
36
+
37
+
38
+ def run_precommit_check(
39
+ files: list[str],
40
+ *,
41
+ project_root: Path | None = None,
42
+ run_plugins: bool = True,
43
+ ) -> HookResult:
44
+ """Run pre-commit safety and validation checks on *files*.
45
+
46
+ 1. Safety validation: scans each file for dangerous patterns.
47
+ 2. Plugin hooks: dispatches ``CUSTOM_VALIDATION`` on each file (optional).
48
+ """
49
+ result = HookResult(files_checked=len(files))
50
+
51
+ # Aggregate file content for safety scan
52
+ all_code = ""
53
+ for fpath in files:
54
+ try:
55
+ all_code += Path(fpath).read_text(encoding="utf-8", errors="replace") + "\n"
56
+ except Exception as exc:
57
+ logger.debug("Could not read %s: %s", fpath, exc)
58
+
59
+ # Run safety validator
60
+ validator = SafetyValidator()
61
+ result.safety = validator.validate(all_code)
62
+ if not result.safety.safe:
63
+ result.passed = False
64
+
65
+ # Run plugin CUSTOM_VALIDATION hooks if enabled
66
+ if run_plugins and project_root:
67
+ try:
68
+ from semantic_code_intelligence.plugins import PluginManager, PluginHook
69
+
70
+ mgr = PluginManager()
71
+ plugin_dir = project_root / ".codexa" / "plugins"
72
+ if plugin_dir.is_dir():
73
+ mgr.discover_from_directory(plugin_dir)
74
+ for name in mgr.registered_plugins:
75
+ mgr.activate(name)
76
+
77
+ for fpath in files:
78
+ try:
79
+ content = Path(fpath).read_text(encoding="utf-8", errors="replace")
80
+ except Exception:
81
+ logger.debug("Failed to read %s for plugin validation", fpath)
82
+ continue
83
+ data = {
84
+ "file_path": fpath,
85
+ "content": content,
86
+ "issues": [],
87
+ }
88
+ out = mgr.dispatch(PluginHook.CUSTOM_VALIDATION, data)
89
+ if out.get("issues"):
90
+ result.plugin_results.append({
91
+ "file": fpath,
92
+ "issues": out["issues"],
93
+ })
94
+ result.passed = False
95
+ except Exception as exc:
96
+ logger.debug("Plugin validation skipped: %s", exc)
97
+
98
+ return result
@@ -0,0 +1,272 @@
1
+ """Hotspot detection engine — identifies high-risk, high-impact code areas.
2
+
3
+ Computes a weighted risk score per file and symbol using:
4
+ - Cyclomatic complexity
5
+ - Duplication density
6
+ - Dependency fan-in / fan-out
7
+ - Historical change frequency (git log, if available)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import subprocess
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from semantic_code_intelligence.context.engine import CallGraph, DependencyMap
18
+ from semantic_code_intelligence.parsing.parser import Symbol
19
+ from semantic_code_intelligence.utils.logging import get_logger
20
+
21
+ logger = get_logger("ci.hotspots")
22
+
23
+ # ── Weight defaults ──────────────────────────────────────────────────
24
+
25
+ _W_COMPLEXITY = 0.30
26
+ _W_DUPLICATION = 0.20
27
+ _W_FAN_IN = 0.15
28
+ _W_FAN_OUT = 0.15
29
+ _W_CHURN = 0.20
30
+
31
+
32
+ @dataclass
33
+ class HotspotFactor:
34
+ """A single contributing factor to a hotspot score."""
35
+
36
+ name: str
37
+ raw_value: float
38
+ normalized: float # 0-1
39
+ weight: float
40
+
41
+ def to_dict(self) -> dict[str, Any]:
42
+ return {
43
+ "name": self.name,
44
+ "raw_value": round(self.raw_value, 2),
45
+ "normalized": round(self.normalized, 3),
46
+ "weight": self.weight,
47
+ }
48
+
49
+
50
+ @dataclass
51
+ class Hotspot:
52
+ """A detected hotspot — file or symbol with risk score."""
53
+
54
+ name: str
55
+ file_path: str
56
+ kind: str # "file" or "symbol"
57
+ risk_score: float # 0-100
58
+ factors: list[HotspotFactor] = field(default_factory=list)
59
+
60
+ def to_dict(self) -> dict[str, Any]:
61
+ return {
62
+ "name": self.name,
63
+ "file_path": self.file_path,
64
+ "kind": self.kind,
65
+ "risk_score": round(self.risk_score, 1),
66
+ "factors": [f.to_dict() for f in self.factors],
67
+ }
68
+
69
+
70
+ @dataclass
71
+ class HotspotReport:
72
+ """Result of hotspot analysis."""
73
+
74
+ files_analyzed: int
75
+ symbols_analyzed: int
76
+ hotspots: list[Hotspot] = field(default_factory=list)
77
+
78
+ def to_dict(self) -> dict[str, Any]:
79
+ return {
80
+ "files_analyzed": self.files_analyzed,
81
+ "symbols_analyzed": self.symbols_analyzed,
82
+ "hotspot_count": len(self.hotspots),
83
+ "hotspots": [h.to_dict() for h in self.hotspots],
84
+ }
85
+
86
+
87
+ # ── Git churn ────────────────────────────────────────────────────────
88
+
89
+
90
+ def _git_change_counts(project_root: Path) -> dict[str, int]:
91
+ """Return commit-count-per-file via ``git log --name-only``.
92
+
93
+ Returns an empty dict if git is unavailable or the directory is not
94
+ a repository.
95
+ """
96
+ try:
97
+ result = subprocess.run(
98
+ ["git", "log", "--name-only", "--pretty=format:"],
99
+ cwd=str(project_root),
100
+ capture_output=True,
101
+ text=True,
102
+ timeout=15,
103
+ )
104
+ if result.returncode != 0:
105
+ return {}
106
+ except Exception:
107
+ return {}
108
+
109
+ counts: dict[str, int] = {}
110
+ for line in result.stdout.splitlines():
111
+ line = line.strip()
112
+ if line:
113
+ counts[line] = counts.get(line, 0) + 1
114
+ return counts
115
+
116
+
117
+ # ── Normalisation helpers ────────────────────────────────────────────
118
+
119
+
120
+ def _normalise(value: float, max_val: float) -> float:
121
+ """Normalise *value* to [0, 1] given *max_val*."""
122
+ if max_val <= 0:
123
+ return 0.0
124
+ return min(value / max_val, 1.0)
125
+
126
+
127
+ # ── Core analyser ────────────────────────────────────────────────────
128
+
129
+
130
+ def analyze_hotspots(
131
+ symbols: list[Symbol],
132
+ call_graph: CallGraph,
133
+ dep_map: DependencyMap,
134
+ project_root: Path,
135
+ *,
136
+ top_n: int = 20,
137
+ include_git: bool = True,
138
+ weights: dict[str, float] | None = None,
139
+ ) -> HotspotReport:
140
+ """Detect hotspots across files in the project.
141
+
142
+ Args:
143
+ symbols: All parsed symbols.
144
+ call_graph: Pre-built call graph.
145
+ dep_map: Pre-built dependency map.
146
+ project_root: Project root for git and path resolution.
147
+ top_n: Maximum hotspots to return.
148
+ include_git: Whether to factor git churn.
149
+ weights: Override default factor weights.
150
+
151
+ Returns:
152
+ HotspotReport with ranked hotspots.
153
+ """
154
+ from semantic_code_intelligence.ci.quality import compute_complexity
155
+
156
+ w = weights or {}
157
+ w_complexity = w.get("complexity", _W_COMPLEXITY)
158
+ w_duplication = w.get("duplication", _W_DUPLICATION)
159
+ w_fan_in = w.get("fan_in", _W_FAN_IN)
160
+ w_fan_out = w.get("fan_out", _W_FAN_OUT)
161
+ w_churn = w.get("churn", _W_CHURN)
162
+
163
+ # If no git, redistribute churn weight
164
+ churn_map: dict[str, int] = {}
165
+ if include_git:
166
+ churn_map = _git_change_counts(project_root)
167
+ if not churn_map:
168
+ extra = w_churn / 4
169
+ w_complexity += extra
170
+ w_duplication += extra
171
+ w_fan_in += extra
172
+ w_fan_out += extra
173
+ w_churn = 0.0
174
+
175
+ callable_symbols = [s for s in symbols if s.kind in ("function", "method")]
176
+
177
+ # ── Per-symbol raw metrics ───────────────────────────────────
178
+ # Complexity
179
+ sym_complexity: dict[str, int] = {}
180
+ for s in callable_symbols:
181
+ cr = compute_complexity(s)
182
+ sym_complexity[f"{s.file_path}:{s.name}"] = cr.complexity
183
+
184
+ max_complexity = max(sym_complexity.values(), default=1)
185
+
186
+ # Fan-in / fan-out
187
+ sym_fan_in: dict[str, int] = {}
188
+ sym_fan_out: dict[str, int] = {}
189
+ for s in callable_symbols:
190
+ key = f"{s.file_path}:{s.name}"
191
+ sym_fan_in[key] = len(call_graph.callers_of(s.name))
192
+ sym_fan_out[key] = len(call_graph.callees_of(key))
193
+
194
+ max_fan_in = max(sym_fan_in.values(), default=1)
195
+ max_fan_out = max(sym_fan_out.values(), default=1)
196
+
197
+ # Per-file aggregate: duplication density
198
+ # Count how many duplicate pairs touch each file
199
+ file_dup_count: dict[str, int] = {}
200
+ try:
201
+ from semantic_code_intelligence.ci.quality import detect_duplicates
202
+
203
+ dups = detect_duplicates(callable_symbols, threshold=0.70, min_lines=3)
204
+ for d in dups:
205
+ file_dup_count[d.file_a] = file_dup_count.get(d.file_a, 0) + 1
206
+ file_dup_count[d.file_b] = file_dup_count.get(d.file_b, 0) + 1
207
+ except Exception as exc:
208
+ logger.debug("Duplicate detection skipped: %s", exc)
209
+
210
+ max_dup = max(file_dup_count.values(), default=1)
211
+
212
+ # Git churn — resolve relative paths
213
+ root = project_root.resolve()
214
+ max_churn = max(churn_map.values(), default=1)
215
+
216
+ # ── Score each callable symbol ───────────────────────────────
217
+ hotspots: list[Hotspot] = []
218
+ unique_files: set[str] = set()
219
+
220
+ for s in callable_symbols:
221
+ unique_files.add(s.file_path)
222
+ key = f"{s.file_path}:{s.name}"
223
+
224
+ cc = sym_complexity.get(key, 1)
225
+ fi = sym_fan_in.get(key, 0)
226
+ fo = sym_fan_out.get(key, 0)
227
+ dp = file_dup_count.get(s.file_path, 0)
228
+
229
+ # Resolve relative path for git churn lookup
230
+ try:
231
+ rel = str(Path(s.file_path).resolve().relative_to(root)).replace("\\", "/")
232
+ except ValueError:
233
+ rel = ""
234
+ ch = churn_map.get(rel, 0)
235
+
236
+ n_cc = _normalise(float(cc), float(max_complexity))
237
+ n_fi = _normalise(float(fi), float(max_fan_in))
238
+ n_fo = _normalise(float(fo), float(max_fan_out))
239
+ n_dp = _normalise(float(dp), float(max_dup))
240
+ n_ch = _normalise(float(ch), float(max_churn))
241
+
242
+ score = (
243
+ w_complexity * n_cc
244
+ + w_fan_in * n_fi
245
+ + w_fan_out * n_fo
246
+ + w_duplication * n_dp
247
+ + w_churn * n_ch
248
+ ) * 100
249
+
250
+ factors = [
251
+ HotspotFactor("complexity", float(cc), n_cc, w_complexity),
252
+ HotspotFactor("fan_in", float(fi), n_fi, w_fan_in),
253
+ HotspotFactor("fan_out", float(fo), n_fo, w_fan_out),
254
+ HotspotFactor("duplication", float(dp), n_dp, w_duplication),
255
+ ]
256
+ if w_churn > 0:
257
+ factors.append(HotspotFactor("churn", float(ch), n_ch, w_churn))
258
+
259
+ hotspots.append(Hotspot(
260
+ name=s.name,
261
+ file_path=s.file_path,
262
+ kind="symbol",
263
+ risk_score=score,
264
+ factors=factors,
265
+ ))
266
+
267
+ hotspots.sort(key=lambda h: h.risk_score, reverse=True)
268
+ return HotspotReport(
269
+ files_analyzed=len(unique_files),
270
+ symbols_analyzed=len(callable_symbols),
271
+ hotspots=hotspots[:top_n],
272
+ )
@@ -0,0 +1,246 @@
1
+ """Impact analysis engine — predicts blast radius of code changes.
2
+
3
+ Given a file path or symbol name, determines which parts of the codebase
4
+ are directly and transitively affected via call graph edges, dependency
5
+ map imports, and symbol cross-references.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections import deque
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from semantic_code_intelligence.context.engine import (
16
+ CallGraph,
17
+ DependencyMap,
18
+ )
19
+ from semantic_code_intelligence.parsing.parser import Symbol
20
+ from semantic_code_intelligence.utils.logging import get_logger
21
+
22
+ logger = get_logger("ci.impact")
23
+
24
+
25
+ @dataclass
26
+ class AffectedSymbol:
27
+ """A symbol affected by a change."""
28
+
29
+ name: str
30
+ file_path: str
31
+ kind: str # "function", "method", "class"
32
+ relationship: str # "direct_caller", "transitive_caller", "import_dep"
33
+ depth: int # hops from source
34
+
35
+ def to_dict(self) -> dict[str, Any]:
36
+ return {
37
+ "name": self.name,
38
+ "file_path": self.file_path,
39
+ "kind": self.kind,
40
+ "relationship": self.relationship,
41
+ "depth": self.depth,
42
+ }
43
+
44
+
45
+ @dataclass
46
+ class AffectedModule:
47
+ """A module (file) transitively affected by a change."""
48
+
49
+ file_path: str
50
+ relationship: str # "imports_target", "transitive_import", "contains_caller"
51
+ depth: int
52
+
53
+ def to_dict(self) -> dict[str, Any]:
54
+ return {
55
+ "file_path": self.file_path,
56
+ "relationship": self.relationship,
57
+ "depth": self.depth,
58
+ }
59
+
60
+
61
+ @dataclass
62
+ class DependencyChain:
63
+ """A single dependency chain explaining why a module is affected."""
64
+
65
+ path: list[str] # list of file/symbol names forming the chain
66
+
67
+ def to_dict(self) -> dict[str, Any]:
68
+ return {"path": self.path}
69
+
70
+
71
+ @dataclass
72
+ class ImpactReport:
73
+ """Result of impact analysis."""
74
+
75
+ target: str
76
+ target_kind: str # "file" or "symbol"
77
+ direct_symbols: list[AffectedSymbol] = field(default_factory=list)
78
+ transitive_symbols: list[AffectedSymbol] = field(default_factory=list)
79
+ affected_modules: list[AffectedModule] = field(default_factory=list)
80
+ chains: list[DependencyChain] = field(default_factory=list)
81
+
82
+ @property
83
+ def total_affected(self) -> int:
84
+ return len(self.direct_symbols) + len(self.transitive_symbols)
85
+
86
+ def to_dict(self) -> dict[str, Any]:
87
+ return {
88
+ "target": self.target,
89
+ "target_kind": self.target_kind,
90
+ "direct_symbols": [s.to_dict() for s in self.direct_symbols],
91
+ "transitive_symbols": [s.to_dict() for s in self.transitive_symbols],
92
+ "affected_modules": [m.to_dict() for m in self.affected_modules],
93
+ "chains": [c.to_dict() for c in self.chains],
94
+ "total_affected": self.total_affected,
95
+ }
96
+
97
+
98
+ def _resolve_target_symbols(
99
+ target: str,
100
+ symbols: list[Symbol],
101
+ project_root: Path,
102
+ ) -> tuple[str, list[Symbol]]:
103
+ """Resolve a target (file path or symbol name) to matching symbols.
104
+
105
+ Returns (target_kind, matched_symbols).
106
+ """
107
+ root = project_root.resolve()
108
+
109
+ # Check if it looks like a file path
110
+ candidate = Path(target)
111
+ if not candidate.is_absolute():
112
+ candidate = root / target
113
+
114
+ if candidate.exists() and candidate.is_file():
115
+ resolved = str(candidate.resolve())
116
+ matched = [s for s in symbols if str(Path(s.file_path).resolve()) == resolved]
117
+ return "file", matched
118
+
119
+ # Treat as symbol name
120
+ matched = [s for s in symbols if s.name == target and s.kind != "import"]
121
+ return "symbol", matched
122
+
123
+
124
+ def analyze_impact(
125
+ target: str,
126
+ symbols: list[Symbol],
127
+ call_graph: CallGraph,
128
+ dep_map: DependencyMap,
129
+ project_root: Path,
130
+ *,
131
+ max_depth: int = 5,
132
+ ) -> ImpactReport:
133
+ """Analyze the impact of modifying a file or symbol.
134
+
135
+ BFS over call graph callers and dependency map importers to find
136
+ the full blast radius of a change.
137
+ """
138
+ target_kind, target_syms = _resolve_target_symbols(target, symbols, project_root)
139
+
140
+ if not target_syms:
141
+ return ImpactReport(target=target, target_kind=target_kind)
142
+
143
+ # Collect seed symbol names
144
+ seed_names: set[str] = set()
145
+ seed_files: set[str] = set()
146
+ for s in target_syms:
147
+ seed_names.add(s.name)
148
+ seed_files.add(s.file_path)
149
+
150
+ # Build symbol-name → Symbol lookup
151
+ sym_lookup: dict[str, Symbol] = {}
152
+ for s in symbols:
153
+ if s.kind != "import":
154
+ sym_lookup.setdefault(s.name, s)
155
+
156
+ # ── BFS over call graph (callers of target symbols) ──────────
157
+ direct: list[AffectedSymbol] = []
158
+ transitive: list[AffectedSymbol] = []
159
+ visited_callers: set[str] = set() # caller keys visited
160
+ queue: deque[tuple[str, int, str]] = deque() # (symbol_name, depth, relationship)
161
+
162
+ for name in seed_names:
163
+ for edge in call_graph.callers_of(name):
164
+ caller_key = edge.caller
165
+ if caller_key in visited_callers:
166
+ continue
167
+ visited_callers.add(caller_key)
168
+ # Parse caller_key "file:name"
169
+ parts = caller_key.rsplit(":", 1)
170
+ caller_name = parts[-1] if len(parts) == 2 else caller_key
171
+ queue.append((caller_name, 1, "direct_caller"))
172
+
173
+ while queue:
174
+ sym_name, depth, relationship = queue.popleft()
175
+ sym = sym_lookup.get(sym_name)
176
+ if sym is None:
177
+ continue
178
+
179
+ affected = AffectedSymbol(
180
+ name=sym.name,
181
+ file_path=sym.file_path,
182
+ kind=sym.kind,
183
+ relationship=relationship,
184
+ depth=depth,
185
+ )
186
+ if depth == 1:
187
+ direct.append(affected)
188
+ else:
189
+ transitive.append(affected)
190
+
191
+ # Continue BFS if within depth limit
192
+ if depth < max_depth:
193
+ for edge in call_graph.callers_of(sym.name):
194
+ if edge.caller not in visited_callers:
195
+ visited_callers.add(edge.caller)
196
+ parts = edge.caller.rsplit(":", 1)
197
+ cname = parts[-1] if len(parts) == 2 else edge.caller
198
+ queue.append((cname, depth + 1, "transitive_caller"))
199
+
200
+ # ── Module-level impact via dependency map ────────────────────
201
+ affected_modules: list[AffectedModule] = []
202
+ visited_modules: set[str] = set()
203
+
204
+ for fpath in seed_files:
205
+ # Find the module name from file path
206
+ p = Path(fpath)
207
+ module_name = p.stem
208
+
209
+ dependents = dep_map.get_dependents(module_name)
210
+ for dep in dependents:
211
+ if dep.source_file not in visited_modules and dep.source_file not in seed_files:
212
+ visited_modules.add(dep.source_file)
213
+ affected_modules.append(AffectedModule(
214
+ file_path=dep.source_file,
215
+ relationship="imports_target",
216
+ depth=1,
217
+ ))
218
+
219
+ # Add files containing direct callers
220
+ for af in direct:
221
+ if af.file_path not in visited_modules and af.file_path not in seed_files:
222
+ visited_modules.add(af.file_path)
223
+ affected_modules.append(AffectedModule(
224
+ file_path=af.file_path,
225
+ relationship="contains_caller",
226
+ depth=1,
227
+ ))
228
+
229
+ # ── Build dependency chains (top 10) ─────────────────────
230
+ chains: list[DependencyChain] = []
231
+ for af in direct[:10]:
232
+ chain = [target, af.name]
233
+ chains.append(DependencyChain(path=chain))
234
+
235
+ for af in transitive[:5]:
236
+ chain = [target, "...", af.name]
237
+ chains.append(DependencyChain(path=chain))
238
+
239
+ return ImpactReport(
240
+ target=target,
241
+ target_kind=target_kind,
242
+ direct_symbols=direct,
243
+ transitive_symbols=transitive,
244
+ affected_modules=affected_modules,
245
+ chains=chains,
246
+ )