codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Pre-commit validation hook support.
|
|
2
|
+
|
|
3
|
+
Provides a lightweight validation pipeline that can be invoked as a
|
|
4
|
+
pre-commit hook or standalone CLI command. Hooks run the safety
|
|
5
|
+
validator and optionally dispatch ``CUSTOM_VALIDATION`` plugin hooks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from semantic_code_intelligence.llm.safety import SafetyValidator, SafetyReport
|
|
15
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger("ci.hooks")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class HookResult:
|
|
22
|
+
"""Result from a pre-commit validation run."""
|
|
23
|
+
|
|
24
|
+
passed: bool = True
|
|
25
|
+
files_checked: int = 0
|
|
26
|
+
safety: SafetyReport | None = None
|
|
27
|
+
plugin_results: list[dict[str, Any]] = field(default_factory=list)
|
|
28
|
+
|
|
29
|
+
def to_dict(self) -> dict[str, Any]:
|
|
30
|
+
return {
|
|
31
|
+
"passed": self.passed,
|
|
32
|
+
"files_checked": self.files_checked,
|
|
33
|
+
"safety": self.safety.to_dict() if self.safety else None,
|
|
34
|
+
"plugin_results": self.plugin_results,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def run_precommit_check(
|
|
39
|
+
files: list[str],
|
|
40
|
+
*,
|
|
41
|
+
project_root: Path | None = None,
|
|
42
|
+
run_plugins: bool = True,
|
|
43
|
+
) -> HookResult:
|
|
44
|
+
"""Run pre-commit safety and validation checks on *files*.
|
|
45
|
+
|
|
46
|
+
1. Safety validation: scans each file for dangerous patterns.
|
|
47
|
+
2. Plugin hooks: dispatches ``CUSTOM_VALIDATION`` on each file (optional).
|
|
48
|
+
"""
|
|
49
|
+
result = HookResult(files_checked=len(files))
|
|
50
|
+
|
|
51
|
+
# Aggregate file content for safety scan
|
|
52
|
+
all_code = ""
|
|
53
|
+
for fpath in files:
|
|
54
|
+
try:
|
|
55
|
+
all_code += Path(fpath).read_text(encoding="utf-8", errors="replace") + "\n"
|
|
56
|
+
except Exception as exc:
|
|
57
|
+
logger.debug("Could not read %s: %s", fpath, exc)
|
|
58
|
+
|
|
59
|
+
# Run safety validator
|
|
60
|
+
validator = SafetyValidator()
|
|
61
|
+
result.safety = validator.validate(all_code)
|
|
62
|
+
if not result.safety.safe:
|
|
63
|
+
result.passed = False
|
|
64
|
+
|
|
65
|
+
# Run plugin CUSTOM_VALIDATION hooks if enabled
|
|
66
|
+
if run_plugins and project_root:
|
|
67
|
+
try:
|
|
68
|
+
from semantic_code_intelligence.plugins import PluginManager, PluginHook
|
|
69
|
+
|
|
70
|
+
mgr = PluginManager()
|
|
71
|
+
plugin_dir = project_root / ".codexa" / "plugins"
|
|
72
|
+
if plugin_dir.is_dir():
|
|
73
|
+
mgr.discover_from_directory(plugin_dir)
|
|
74
|
+
for name in mgr.registered_plugins:
|
|
75
|
+
mgr.activate(name)
|
|
76
|
+
|
|
77
|
+
for fpath in files:
|
|
78
|
+
try:
|
|
79
|
+
content = Path(fpath).read_text(encoding="utf-8", errors="replace")
|
|
80
|
+
except Exception:
|
|
81
|
+
logger.debug("Failed to read %s for plugin validation", fpath)
|
|
82
|
+
continue
|
|
83
|
+
data = {
|
|
84
|
+
"file_path": fpath,
|
|
85
|
+
"content": content,
|
|
86
|
+
"issues": [],
|
|
87
|
+
}
|
|
88
|
+
out = mgr.dispatch(PluginHook.CUSTOM_VALIDATION, data)
|
|
89
|
+
if out.get("issues"):
|
|
90
|
+
result.plugin_results.append({
|
|
91
|
+
"file": fpath,
|
|
92
|
+
"issues": out["issues"],
|
|
93
|
+
})
|
|
94
|
+
result.passed = False
|
|
95
|
+
except Exception as exc:
|
|
96
|
+
logger.debug("Plugin validation skipped: %s", exc)
|
|
97
|
+
|
|
98
|
+
return result
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""Hotspot detection engine — identifies high-risk, high-impact code areas.
|
|
2
|
+
|
|
3
|
+
Computes a weighted risk score per file and symbol using:
|
|
4
|
+
- Cyclomatic complexity
|
|
5
|
+
- Duplication density
|
|
6
|
+
- Dependency fan-in / fan-out
|
|
7
|
+
- Historical change frequency (git log, if available)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import subprocess
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from semantic_code_intelligence.context.engine import CallGraph, DependencyMap
|
|
18
|
+
from semantic_code_intelligence.parsing.parser import Symbol
|
|
19
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
20
|
+
|
|
21
|
+
logger = get_logger("ci.hotspots")
|
|
22
|
+
|
|
23
|
+
# ── Weight defaults ──────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
_W_COMPLEXITY = 0.30
|
|
26
|
+
_W_DUPLICATION = 0.20
|
|
27
|
+
_W_FAN_IN = 0.15
|
|
28
|
+
_W_FAN_OUT = 0.15
|
|
29
|
+
_W_CHURN = 0.20
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class HotspotFactor:
|
|
34
|
+
"""A single contributing factor to a hotspot score."""
|
|
35
|
+
|
|
36
|
+
name: str
|
|
37
|
+
raw_value: float
|
|
38
|
+
normalized: float # 0-1
|
|
39
|
+
weight: float
|
|
40
|
+
|
|
41
|
+
def to_dict(self) -> dict[str, Any]:
|
|
42
|
+
return {
|
|
43
|
+
"name": self.name,
|
|
44
|
+
"raw_value": round(self.raw_value, 2),
|
|
45
|
+
"normalized": round(self.normalized, 3),
|
|
46
|
+
"weight": self.weight,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class Hotspot:
|
|
52
|
+
"""A detected hotspot — file or symbol with risk score."""
|
|
53
|
+
|
|
54
|
+
name: str
|
|
55
|
+
file_path: str
|
|
56
|
+
kind: str # "file" or "symbol"
|
|
57
|
+
risk_score: float # 0-100
|
|
58
|
+
factors: list[HotspotFactor] = field(default_factory=list)
|
|
59
|
+
|
|
60
|
+
def to_dict(self) -> dict[str, Any]:
|
|
61
|
+
return {
|
|
62
|
+
"name": self.name,
|
|
63
|
+
"file_path": self.file_path,
|
|
64
|
+
"kind": self.kind,
|
|
65
|
+
"risk_score": round(self.risk_score, 1),
|
|
66
|
+
"factors": [f.to_dict() for f in self.factors],
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class HotspotReport:
|
|
72
|
+
"""Result of hotspot analysis."""
|
|
73
|
+
|
|
74
|
+
files_analyzed: int
|
|
75
|
+
symbols_analyzed: int
|
|
76
|
+
hotspots: list[Hotspot] = field(default_factory=list)
|
|
77
|
+
|
|
78
|
+
def to_dict(self) -> dict[str, Any]:
|
|
79
|
+
return {
|
|
80
|
+
"files_analyzed": self.files_analyzed,
|
|
81
|
+
"symbols_analyzed": self.symbols_analyzed,
|
|
82
|
+
"hotspot_count": len(self.hotspots),
|
|
83
|
+
"hotspots": [h.to_dict() for h in self.hotspots],
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# ── Git churn ────────────────────────────────────────────────────────
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _git_change_counts(project_root: Path) -> dict[str, int]:
|
|
91
|
+
"""Return commit-count-per-file via ``git log --name-only``.
|
|
92
|
+
|
|
93
|
+
Returns an empty dict if git is unavailable or the directory is not
|
|
94
|
+
a repository.
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
result = subprocess.run(
|
|
98
|
+
["git", "log", "--name-only", "--pretty=format:"],
|
|
99
|
+
cwd=str(project_root),
|
|
100
|
+
capture_output=True,
|
|
101
|
+
text=True,
|
|
102
|
+
timeout=15,
|
|
103
|
+
)
|
|
104
|
+
if result.returncode != 0:
|
|
105
|
+
return {}
|
|
106
|
+
except Exception:
|
|
107
|
+
return {}
|
|
108
|
+
|
|
109
|
+
counts: dict[str, int] = {}
|
|
110
|
+
for line in result.stdout.splitlines():
|
|
111
|
+
line = line.strip()
|
|
112
|
+
if line:
|
|
113
|
+
counts[line] = counts.get(line, 0) + 1
|
|
114
|
+
return counts
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ── Normalisation helpers ────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _normalise(value: float, max_val: float) -> float:
|
|
121
|
+
"""Normalise *value* to [0, 1] given *max_val*."""
|
|
122
|
+
if max_val <= 0:
|
|
123
|
+
return 0.0
|
|
124
|
+
return min(value / max_val, 1.0)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ── Core analyser ────────────────────────────────────────────────────
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def analyze_hotspots(
|
|
131
|
+
symbols: list[Symbol],
|
|
132
|
+
call_graph: CallGraph,
|
|
133
|
+
dep_map: DependencyMap,
|
|
134
|
+
project_root: Path,
|
|
135
|
+
*,
|
|
136
|
+
top_n: int = 20,
|
|
137
|
+
include_git: bool = True,
|
|
138
|
+
weights: dict[str, float] | None = None,
|
|
139
|
+
) -> HotspotReport:
|
|
140
|
+
"""Detect hotspots across files in the project.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
symbols: All parsed symbols.
|
|
144
|
+
call_graph: Pre-built call graph.
|
|
145
|
+
dep_map: Pre-built dependency map.
|
|
146
|
+
project_root: Project root for git and path resolution.
|
|
147
|
+
top_n: Maximum hotspots to return.
|
|
148
|
+
include_git: Whether to factor git churn.
|
|
149
|
+
weights: Override default factor weights.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
HotspotReport with ranked hotspots.
|
|
153
|
+
"""
|
|
154
|
+
from semantic_code_intelligence.ci.quality import compute_complexity
|
|
155
|
+
|
|
156
|
+
w = weights or {}
|
|
157
|
+
w_complexity = w.get("complexity", _W_COMPLEXITY)
|
|
158
|
+
w_duplication = w.get("duplication", _W_DUPLICATION)
|
|
159
|
+
w_fan_in = w.get("fan_in", _W_FAN_IN)
|
|
160
|
+
w_fan_out = w.get("fan_out", _W_FAN_OUT)
|
|
161
|
+
w_churn = w.get("churn", _W_CHURN)
|
|
162
|
+
|
|
163
|
+
# If no git, redistribute churn weight
|
|
164
|
+
churn_map: dict[str, int] = {}
|
|
165
|
+
if include_git:
|
|
166
|
+
churn_map = _git_change_counts(project_root)
|
|
167
|
+
if not churn_map:
|
|
168
|
+
extra = w_churn / 4
|
|
169
|
+
w_complexity += extra
|
|
170
|
+
w_duplication += extra
|
|
171
|
+
w_fan_in += extra
|
|
172
|
+
w_fan_out += extra
|
|
173
|
+
w_churn = 0.0
|
|
174
|
+
|
|
175
|
+
callable_symbols = [s for s in symbols if s.kind in ("function", "method")]
|
|
176
|
+
|
|
177
|
+
# ── Per-symbol raw metrics ───────────────────────────────────
|
|
178
|
+
# Complexity
|
|
179
|
+
sym_complexity: dict[str, int] = {}
|
|
180
|
+
for s in callable_symbols:
|
|
181
|
+
cr = compute_complexity(s)
|
|
182
|
+
sym_complexity[f"{s.file_path}:{s.name}"] = cr.complexity
|
|
183
|
+
|
|
184
|
+
max_complexity = max(sym_complexity.values(), default=1)
|
|
185
|
+
|
|
186
|
+
# Fan-in / fan-out
|
|
187
|
+
sym_fan_in: dict[str, int] = {}
|
|
188
|
+
sym_fan_out: dict[str, int] = {}
|
|
189
|
+
for s in callable_symbols:
|
|
190
|
+
key = f"{s.file_path}:{s.name}"
|
|
191
|
+
sym_fan_in[key] = len(call_graph.callers_of(s.name))
|
|
192
|
+
sym_fan_out[key] = len(call_graph.callees_of(key))
|
|
193
|
+
|
|
194
|
+
max_fan_in = max(sym_fan_in.values(), default=1)
|
|
195
|
+
max_fan_out = max(sym_fan_out.values(), default=1)
|
|
196
|
+
|
|
197
|
+
# Per-file aggregate: duplication density
|
|
198
|
+
# Count how many duplicate pairs touch each file
|
|
199
|
+
file_dup_count: dict[str, int] = {}
|
|
200
|
+
try:
|
|
201
|
+
from semantic_code_intelligence.ci.quality import detect_duplicates
|
|
202
|
+
|
|
203
|
+
dups = detect_duplicates(callable_symbols, threshold=0.70, min_lines=3)
|
|
204
|
+
for d in dups:
|
|
205
|
+
file_dup_count[d.file_a] = file_dup_count.get(d.file_a, 0) + 1
|
|
206
|
+
file_dup_count[d.file_b] = file_dup_count.get(d.file_b, 0) + 1
|
|
207
|
+
except Exception as exc:
|
|
208
|
+
logger.debug("Duplicate detection skipped: %s", exc)
|
|
209
|
+
|
|
210
|
+
max_dup = max(file_dup_count.values(), default=1)
|
|
211
|
+
|
|
212
|
+
# Git churn — resolve relative paths
|
|
213
|
+
root = project_root.resolve()
|
|
214
|
+
max_churn = max(churn_map.values(), default=1)
|
|
215
|
+
|
|
216
|
+
# ── Score each callable symbol ───────────────────────────────
|
|
217
|
+
hotspots: list[Hotspot] = []
|
|
218
|
+
unique_files: set[str] = set()
|
|
219
|
+
|
|
220
|
+
for s in callable_symbols:
|
|
221
|
+
unique_files.add(s.file_path)
|
|
222
|
+
key = f"{s.file_path}:{s.name}"
|
|
223
|
+
|
|
224
|
+
cc = sym_complexity.get(key, 1)
|
|
225
|
+
fi = sym_fan_in.get(key, 0)
|
|
226
|
+
fo = sym_fan_out.get(key, 0)
|
|
227
|
+
dp = file_dup_count.get(s.file_path, 0)
|
|
228
|
+
|
|
229
|
+
# Resolve relative path for git churn lookup
|
|
230
|
+
try:
|
|
231
|
+
rel = str(Path(s.file_path).resolve().relative_to(root)).replace("\\", "/")
|
|
232
|
+
except ValueError:
|
|
233
|
+
rel = ""
|
|
234
|
+
ch = churn_map.get(rel, 0)
|
|
235
|
+
|
|
236
|
+
n_cc = _normalise(float(cc), float(max_complexity))
|
|
237
|
+
n_fi = _normalise(float(fi), float(max_fan_in))
|
|
238
|
+
n_fo = _normalise(float(fo), float(max_fan_out))
|
|
239
|
+
n_dp = _normalise(float(dp), float(max_dup))
|
|
240
|
+
n_ch = _normalise(float(ch), float(max_churn))
|
|
241
|
+
|
|
242
|
+
score = (
|
|
243
|
+
w_complexity * n_cc
|
|
244
|
+
+ w_fan_in * n_fi
|
|
245
|
+
+ w_fan_out * n_fo
|
|
246
|
+
+ w_duplication * n_dp
|
|
247
|
+
+ w_churn * n_ch
|
|
248
|
+
) * 100
|
|
249
|
+
|
|
250
|
+
factors = [
|
|
251
|
+
HotspotFactor("complexity", float(cc), n_cc, w_complexity),
|
|
252
|
+
HotspotFactor("fan_in", float(fi), n_fi, w_fan_in),
|
|
253
|
+
HotspotFactor("fan_out", float(fo), n_fo, w_fan_out),
|
|
254
|
+
HotspotFactor("duplication", float(dp), n_dp, w_duplication),
|
|
255
|
+
]
|
|
256
|
+
if w_churn > 0:
|
|
257
|
+
factors.append(HotspotFactor("churn", float(ch), n_ch, w_churn))
|
|
258
|
+
|
|
259
|
+
hotspots.append(Hotspot(
|
|
260
|
+
name=s.name,
|
|
261
|
+
file_path=s.file_path,
|
|
262
|
+
kind="symbol",
|
|
263
|
+
risk_score=score,
|
|
264
|
+
factors=factors,
|
|
265
|
+
))
|
|
266
|
+
|
|
267
|
+
hotspots.sort(key=lambda h: h.risk_score, reverse=True)
|
|
268
|
+
return HotspotReport(
|
|
269
|
+
files_analyzed=len(unique_files),
|
|
270
|
+
symbols_analyzed=len(callable_symbols),
|
|
271
|
+
hotspots=hotspots[:top_n],
|
|
272
|
+
)
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Impact analysis engine — predicts blast radius of code changes.
|
|
2
|
+
|
|
3
|
+
Given a file path or symbol name, determines which parts of the codebase
|
|
4
|
+
are directly and transitively affected via call graph edges, dependency
|
|
5
|
+
map imports, and symbol cross-references.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections import deque
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from semantic_code_intelligence.context.engine import (
|
|
16
|
+
CallGraph,
|
|
17
|
+
DependencyMap,
|
|
18
|
+
)
|
|
19
|
+
from semantic_code_intelligence.parsing.parser import Symbol
|
|
20
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
21
|
+
|
|
22
|
+
logger = get_logger("ci.impact")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class AffectedSymbol:
|
|
27
|
+
"""A symbol affected by a change."""
|
|
28
|
+
|
|
29
|
+
name: str
|
|
30
|
+
file_path: str
|
|
31
|
+
kind: str # "function", "method", "class"
|
|
32
|
+
relationship: str # "direct_caller", "transitive_caller", "import_dep"
|
|
33
|
+
depth: int # hops from source
|
|
34
|
+
|
|
35
|
+
def to_dict(self) -> dict[str, Any]:
|
|
36
|
+
return {
|
|
37
|
+
"name": self.name,
|
|
38
|
+
"file_path": self.file_path,
|
|
39
|
+
"kind": self.kind,
|
|
40
|
+
"relationship": self.relationship,
|
|
41
|
+
"depth": self.depth,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class AffectedModule:
|
|
47
|
+
"""A module (file) transitively affected by a change."""
|
|
48
|
+
|
|
49
|
+
file_path: str
|
|
50
|
+
relationship: str # "imports_target", "transitive_import", "contains_caller"
|
|
51
|
+
depth: int
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> dict[str, Any]:
|
|
54
|
+
return {
|
|
55
|
+
"file_path": self.file_path,
|
|
56
|
+
"relationship": self.relationship,
|
|
57
|
+
"depth": self.depth,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class DependencyChain:
|
|
63
|
+
"""A single dependency chain explaining why a module is affected."""
|
|
64
|
+
|
|
65
|
+
path: list[str] # list of file/symbol names forming the chain
|
|
66
|
+
|
|
67
|
+
def to_dict(self) -> dict[str, Any]:
|
|
68
|
+
return {"path": self.path}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class ImpactReport:
|
|
73
|
+
"""Result of impact analysis."""
|
|
74
|
+
|
|
75
|
+
target: str
|
|
76
|
+
target_kind: str # "file" or "symbol"
|
|
77
|
+
direct_symbols: list[AffectedSymbol] = field(default_factory=list)
|
|
78
|
+
transitive_symbols: list[AffectedSymbol] = field(default_factory=list)
|
|
79
|
+
affected_modules: list[AffectedModule] = field(default_factory=list)
|
|
80
|
+
chains: list[DependencyChain] = field(default_factory=list)
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def total_affected(self) -> int:
|
|
84
|
+
return len(self.direct_symbols) + len(self.transitive_symbols)
|
|
85
|
+
|
|
86
|
+
def to_dict(self) -> dict[str, Any]:
|
|
87
|
+
return {
|
|
88
|
+
"target": self.target,
|
|
89
|
+
"target_kind": self.target_kind,
|
|
90
|
+
"direct_symbols": [s.to_dict() for s in self.direct_symbols],
|
|
91
|
+
"transitive_symbols": [s.to_dict() for s in self.transitive_symbols],
|
|
92
|
+
"affected_modules": [m.to_dict() for m in self.affected_modules],
|
|
93
|
+
"chains": [c.to_dict() for c in self.chains],
|
|
94
|
+
"total_affected": self.total_affected,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _resolve_target_symbols(
|
|
99
|
+
target: str,
|
|
100
|
+
symbols: list[Symbol],
|
|
101
|
+
project_root: Path,
|
|
102
|
+
) -> tuple[str, list[Symbol]]:
|
|
103
|
+
"""Resolve a target (file path or symbol name) to matching symbols.
|
|
104
|
+
|
|
105
|
+
Returns (target_kind, matched_symbols).
|
|
106
|
+
"""
|
|
107
|
+
root = project_root.resolve()
|
|
108
|
+
|
|
109
|
+
# Check if it looks like a file path
|
|
110
|
+
candidate = Path(target)
|
|
111
|
+
if not candidate.is_absolute():
|
|
112
|
+
candidate = root / target
|
|
113
|
+
|
|
114
|
+
if candidate.exists() and candidate.is_file():
|
|
115
|
+
resolved = str(candidate.resolve())
|
|
116
|
+
matched = [s for s in symbols if str(Path(s.file_path).resolve()) == resolved]
|
|
117
|
+
return "file", matched
|
|
118
|
+
|
|
119
|
+
# Treat as symbol name
|
|
120
|
+
matched = [s for s in symbols if s.name == target and s.kind != "import"]
|
|
121
|
+
return "symbol", matched
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def analyze_impact(
|
|
125
|
+
target: str,
|
|
126
|
+
symbols: list[Symbol],
|
|
127
|
+
call_graph: CallGraph,
|
|
128
|
+
dep_map: DependencyMap,
|
|
129
|
+
project_root: Path,
|
|
130
|
+
*,
|
|
131
|
+
max_depth: int = 5,
|
|
132
|
+
) -> ImpactReport:
|
|
133
|
+
"""Analyze the impact of modifying a file or symbol.
|
|
134
|
+
|
|
135
|
+
BFS over call graph callers and dependency map importers to find
|
|
136
|
+
the full blast radius of a change.
|
|
137
|
+
"""
|
|
138
|
+
target_kind, target_syms = _resolve_target_symbols(target, symbols, project_root)
|
|
139
|
+
|
|
140
|
+
if not target_syms:
|
|
141
|
+
return ImpactReport(target=target, target_kind=target_kind)
|
|
142
|
+
|
|
143
|
+
# Collect seed symbol names
|
|
144
|
+
seed_names: set[str] = set()
|
|
145
|
+
seed_files: set[str] = set()
|
|
146
|
+
for s in target_syms:
|
|
147
|
+
seed_names.add(s.name)
|
|
148
|
+
seed_files.add(s.file_path)
|
|
149
|
+
|
|
150
|
+
# Build symbol-name → Symbol lookup
|
|
151
|
+
sym_lookup: dict[str, Symbol] = {}
|
|
152
|
+
for s in symbols:
|
|
153
|
+
if s.kind != "import":
|
|
154
|
+
sym_lookup.setdefault(s.name, s)
|
|
155
|
+
|
|
156
|
+
# ── BFS over call graph (callers of target symbols) ──────────
|
|
157
|
+
direct: list[AffectedSymbol] = []
|
|
158
|
+
transitive: list[AffectedSymbol] = []
|
|
159
|
+
visited_callers: set[str] = set() # caller keys visited
|
|
160
|
+
queue: deque[tuple[str, int, str]] = deque() # (symbol_name, depth, relationship)
|
|
161
|
+
|
|
162
|
+
for name in seed_names:
|
|
163
|
+
for edge in call_graph.callers_of(name):
|
|
164
|
+
caller_key = edge.caller
|
|
165
|
+
if caller_key in visited_callers:
|
|
166
|
+
continue
|
|
167
|
+
visited_callers.add(caller_key)
|
|
168
|
+
# Parse caller_key "file:name"
|
|
169
|
+
parts = caller_key.rsplit(":", 1)
|
|
170
|
+
caller_name = parts[-1] if len(parts) == 2 else caller_key
|
|
171
|
+
queue.append((caller_name, 1, "direct_caller"))
|
|
172
|
+
|
|
173
|
+
while queue:
|
|
174
|
+
sym_name, depth, relationship = queue.popleft()
|
|
175
|
+
sym = sym_lookup.get(sym_name)
|
|
176
|
+
if sym is None:
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
affected = AffectedSymbol(
|
|
180
|
+
name=sym.name,
|
|
181
|
+
file_path=sym.file_path,
|
|
182
|
+
kind=sym.kind,
|
|
183
|
+
relationship=relationship,
|
|
184
|
+
depth=depth,
|
|
185
|
+
)
|
|
186
|
+
if depth == 1:
|
|
187
|
+
direct.append(affected)
|
|
188
|
+
else:
|
|
189
|
+
transitive.append(affected)
|
|
190
|
+
|
|
191
|
+
# Continue BFS if within depth limit
|
|
192
|
+
if depth < max_depth:
|
|
193
|
+
for edge in call_graph.callers_of(sym.name):
|
|
194
|
+
if edge.caller not in visited_callers:
|
|
195
|
+
visited_callers.add(edge.caller)
|
|
196
|
+
parts = edge.caller.rsplit(":", 1)
|
|
197
|
+
cname = parts[-1] if len(parts) == 2 else edge.caller
|
|
198
|
+
queue.append((cname, depth + 1, "transitive_caller"))
|
|
199
|
+
|
|
200
|
+
# ── Module-level impact via dependency map ────────────────────
|
|
201
|
+
affected_modules: list[AffectedModule] = []
|
|
202
|
+
visited_modules: set[str] = set()
|
|
203
|
+
|
|
204
|
+
for fpath in seed_files:
|
|
205
|
+
# Find the module name from file path
|
|
206
|
+
p = Path(fpath)
|
|
207
|
+
module_name = p.stem
|
|
208
|
+
|
|
209
|
+
dependents = dep_map.get_dependents(module_name)
|
|
210
|
+
for dep in dependents:
|
|
211
|
+
if dep.source_file not in visited_modules and dep.source_file not in seed_files:
|
|
212
|
+
visited_modules.add(dep.source_file)
|
|
213
|
+
affected_modules.append(AffectedModule(
|
|
214
|
+
file_path=dep.source_file,
|
|
215
|
+
relationship="imports_target",
|
|
216
|
+
depth=1,
|
|
217
|
+
))
|
|
218
|
+
|
|
219
|
+
# Add files containing direct callers
|
|
220
|
+
for af in direct:
|
|
221
|
+
if af.file_path not in visited_modules and af.file_path not in seed_files:
|
|
222
|
+
visited_modules.add(af.file_path)
|
|
223
|
+
affected_modules.append(AffectedModule(
|
|
224
|
+
file_path=af.file_path,
|
|
225
|
+
relationship="contains_caller",
|
|
226
|
+
depth=1,
|
|
227
|
+
))
|
|
228
|
+
|
|
229
|
+
# ── Build dependency chains (top 10) ─────────────────────
|
|
230
|
+
chains: list[DependencyChain] = []
|
|
231
|
+
for af in direct[:10]:
|
|
232
|
+
chain = [target, af.name]
|
|
233
|
+
chains.append(DependencyChain(path=chain))
|
|
234
|
+
|
|
235
|
+
for af in transitive[:5]:
|
|
236
|
+
chain = [target, "...", af.name]
|
|
237
|
+
chains.append(DependencyChain(path=chain))
|
|
238
|
+
|
|
239
|
+
return ImpactReport(
|
|
240
|
+
target=target,
|
|
241
|
+
target_kind=target_kind,
|
|
242
|
+
direct_symbols=direct,
|
|
243
|
+
transitive_symbols=transitive,
|
|
244
|
+
affected_modules=affected_modules,
|
|
245
|
+
chains=chains,
|
|
246
|
+
)
|