codexa 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codexa-0.4.0.dist-info/METADATA +650 -0
- codexa-0.4.0.dist-info/RECORD +189 -0
- codexa-0.4.0.dist-info/WHEEL +5 -0
- codexa-0.4.0.dist-info/entry_points.txt +2 -0
- codexa-0.4.0.dist-info/licenses/LICENSE +21 -0
- codexa-0.4.0.dist-info/top_level.txt +1 -0
- semantic_code_intelligence/__init__.py +5 -0
- semantic_code_intelligence/analysis/__init__.py +21 -0
- semantic_code_intelligence/analysis/ai_features.py +351 -0
- semantic_code_intelligence/bridge/__init__.py +28 -0
- semantic_code_intelligence/bridge/context_provider.py +245 -0
- semantic_code_intelligence/bridge/protocol.py +167 -0
- semantic_code_intelligence/bridge/server.py +348 -0
- semantic_code_intelligence/bridge/vscode.py +271 -0
- semantic_code_intelligence/ci/__init__.py +13 -0
- semantic_code_intelligence/ci/hooks.py +98 -0
- semantic_code_intelligence/ci/hotspots.py +272 -0
- semantic_code_intelligence/ci/impact.py +246 -0
- semantic_code_intelligence/ci/metrics.py +591 -0
- semantic_code_intelligence/ci/pr.py +412 -0
- semantic_code_intelligence/ci/quality.py +557 -0
- semantic_code_intelligence/ci/templates.py +164 -0
- semantic_code_intelligence/ci/trace.py +224 -0
- semantic_code_intelligence/cli/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/__init__.py +0 -0
- semantic_code_intelligence/cli/commands/ask_cmd.py +153 -0
- semantic_code_intelligence/cli/commands/benchmark_cmd.py +303 -0
- semantic_code_intelligence/cli/commands/chat_cmd.py +252 -0
- semantic_code_intelligence/cli/commands/ci_gen_cmd.py +74 -0
- semantic_code_intelligence/cli/commands/context_cmd.py +120 -0
- semantic_code_intelligence/cli/commands/cross_refactor_cmd.py +113 -0
- semantic_code_intelligence/cli/commands/deps_cmd.py +91 -0
- semantic_code_intelligence/cli/commands/docs_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/doctor_cmd.py +147 -0
- semantic_code_intelligence/cli/commands/evolve_cmd.py +171 -0
- semantic_code_intelligence/cli/commands/explain_cmd.py +112 -0
- semantic_code_intelligence/cli/commands/gate_cmd.py +135 -0
- semantic_code_intelligence/cli/commands/grep_cmd.py +234 -0
- semantic_code_intelligence/cli/commands/hotspots_cmd.py +119 -0
- semantic_code_intelligence/cli/commands/impact_cmd.py +131 -0
- semantic_code_intelligence/cli/commands/index_cmd.py +138 -0
- semantic_code_intelligence/cli/commands/init_cmd.py +152 -0
- semantic_code_intelligence/cli/commands/investigate_cmd.py +163 -0
- semantic_code_intelligence/cli/commands/languages_cmd.py +101 -0
- semantic_code_intelligence/cli/commands/lsp_cmd.py +49 -0
- semantic_code_intelligence/cli/commands/mcp_cmd.py +50 -0
- semantic_code_intelligence/cli/commands/metrics_cmd.py +264 -0
- semantic_code_intelligence/cli/commands/models_cmd.py +157 -0
- semantic_code_intelligence/cli/commands/plugin_cmd.py +275 -0
- semantic_code_intelligence/cli/commands/pr_summary_cmd.py +178 -0
- semantic_code_intelligence/cli/commands/quality_cmd.py +208 -0
- semantic_code_intelligence/cli/commands/refactor_cmd.py +103 -0
- semantic_code_intelligence/cli/commands/review_cmd.py +88 -0
- semantic_code_intelligence/cli/commands/search_cmd.py +236 -0
- semantic_code_intelligence/cli/commands/serve_cmd.py +117 -0
- semantic_code_intelligence/cli/commands/suggest_cmd.py +100 -0
- semantic_code_intelligence/cli/commands/summary_cmd.py +78 -0
- semantic_code_intelligence/cli/commands/tool_cmd.py +282 -0
- semantic_code_intelligence/cli/commands/trace_cmd.py +123 -0
- semantic_code_intelligence/cli/commands/tui_cmd.py +58 -0
- semantic_code_intelligence/cli/commands/viz_cmd.py +127 -0
- semantic_code_intelligence/cli/commands/watch_cmd.py +72 -0
- semantic_code_intelligence/cli/commands/web_cmd.py +61 -0
- semantic_code_intelligence/cli/commands/workspace_cmd.py +250 -0
- semantic_code_intelligence/cli/main.py +65 -0
- semantic_code_intelligence/cli/router.py +92 -0
- semantic_code_intelligence/config/__init__.py +0 -0
- semantic_code_intelligence/config/settings.py +260 -0
- semantic_code_intelligence/context/__init__.py +19 -0
- semantic_code_intelligence/context/engine.py +429 -0
- semantic_code_intelligence/context/memory.py +253 -0
- semantic_code_intelligence/daemon/__init__.py +1 -0
- semantic_code_intelligence/daemon/watcher.py +515 -0
- semantic_code_intelligence/docs/__init__.py +1080 -0
- semantic_code_intelligence/embeddings/__init__.py +0 -0
- semantic_code_intelligence/embeddings/enhanced.py +131 -0
- semantic_code_intelligence/embeddings/generator.py +149 -0
- semantic_code_intelligence/embeddings/model_registry.py +100 -0
- semantic_code_intelligence/evolution/__init__.py +1 -0
- semantic_code_intelligence/evolution/budget_guard.py +111 -0
- semantic_code_intelligence/evolution/commit_manager.py +88 -0
- semantic_code_intelligence/evolution/context_builder.py +131 -0
- semantic_code_intelligence/evolution/engine.py +249 -0
- semantic_code_intelligence/evolution/patch_generator.py +229 -0
- semantic_code_intelligence/evolution/task_selector.py +214 -0
- semantic_code_intelligence/evolution/test_runner.py +111 -0
- semantic_code_intelligence/indexing/__init__.py +0 -0
- semantic_code_intelligence/indexing/chunker.py +174 -0
- semantic_code_intelligence/indexing/parallel.py +86 -0
- semantic_code_intelligence/indexing/scanner.py +146 -0
- semantic_code_intelligence/indexing/semantic_chunker.py +337 -0
- semantic_code_intelligence/llm/__init__.py +62 -0
- semantic_code_intelligence/llm/cache.py +219 -0
- semantic_code_intelligence/llm/cached_provider.py +145 -0
- semantic_code_intelligence/llm/conversation.py +190 -0
- semantic_code_intelligence/llm/cross_refactor.py +272 -0
- semantic_code_intelligence/llm/investigation.py +274 -0
- semantic_code_intelligence/llm/mock_provider.py +77 -0
- semantic_code_intelligence/llm/ollama_provider.py +122 -0
- semantic_code_intelligence/llm/openai_provider.py +100 -0
- semantic_code_intelligence/llm/provider.py +92 -0
- semantic_code_intelligence/llm/rate_limiter.py +164 -0
- semantic_code_intelligence/llm/reasoning.py +438 -0
- semantic_code_intelligence/llm/safety.py +110 -0
- semantic_code_intelligence/llm/streaming.py +251 -0
- semantic_code_intelligence/lsp/__init__.py +609 -0
- semantic_code_intelligence/mcp/__init__.py +393 -0
- semantic_code_intelligence/parsing/__init__.py +19 -0
- semantic_code_intelligence/parsing/parser.py +375 -0
- semantic_code_intelligence/plugins/__init__.py +255 -0
- semantic_code_intelligence/plugins/examples/__init__.py +1 -0
- semantic_code_intelligence/plugins/examples/code_quality.py +73 -0
- semantic_code_intelligence/plugins/examples/search_annotator.py +56 -0
- semantic_code_intelligence/scalability/__init__.py +205 -0
- semantic_code_intelligence/search/__init__.py +0 -0
- semantic_code_intelligence/search/formatter.py +123 -0
- semantic_code_intelligence/search/grep.py +361 -0
- semantic_code_intelligence/search/hybrid_search.py +170 -0
- semantic_code_intelligence/search/keyword_search.py +311 -0
- semantic_code_intelligence/search/section_expander.py +103 -0
- semantic_code_intelligence/services/__init__.py +0 -0
- semantic_code_intelligence/services/indexing_service.py +630 -0
- semantic_code_intelligence/services/search_service.py +269 -0
- semantic_code_intelligence/storage/__init__.py +0 -0
- semantic_code_intelligence/storage/chunk_hash_store.py +86 -0
- semantic_code_intelligence/storage/hash_store.py +66 -0
- semantic_code_intelligence/storage/index_manifest.py +85 -0
- semantic_code_intelligence/storage/index_stats.py +138 -0
- semantic_code_intelligence/storage/query_history.py +160 -0
- semantic_code_intelligence/storage/symbol_registry.py +209 -0
- semantic_code_intelligence/storage/vector_store.py +297 -0
- semantic_code_intelligence/tests/__init__.py +0 -0
- semantic_code_intelligence/tests/test_ai_features.py +351 -0
- semantic_code_intelligence/tests/test_chunker.py +119 -0
- semantic_code_intelligence/tests/test_cli.py +188 -0
- semantic_code_intelligence/tests/test_config.py +154 -0
- semantic_code_intelligence/tests/test_context.py +381 -0
- semantic_code_intelligence/tests/test_embeddings.py +73 -0
- semantic_code_intelligence/tests/test_endtoend.py +1142 -0
- semantic_code_intelligence/tests/test_enhanced_embeddings.py +92 -0
- semantic_code_intelligence/tests/test_hash_store.py +79 -0
- semantic_code_intelligence/tests/test_logging.py +55 -0
- semantic_code_intelligence/tests/test_new_cli.py +138 -0
- semantic_code_intelligence/tests/test_parser.py +495 -0
- semantic_code_intelligence/tests/test_phase10.py +355 -0
- semantic_code_intelligence/tests/test_phase11.py +593 -0
- semantic_code_intelligence/tests/test_phase12.py +375 -0
- semantic_code_intelligence/tests/test_phase13.py +663 -0
- semantic_code_intelligence/tests/test_phase14.py +568 -0
- semantic_code_intelligence/tests/test_phase15.py +814 -0
- semantic_code_intelligence/tests/test_phase16.py +792 -0
- semantic_code_intelligence/tests/test_phase17.py +815 -0
- semantic_code_intelligence/tests/test_phase18.py +934 -0
- semantic_code_intelligence/tests/test_phase19.py +986 -0
- semantic_code_intelligence/tests/test_phase20.py +2753 -0
- semantic_code_intelligence/tests/test_phase20b.py +2058 -0
- semantic_code_intelligence/tests/test_phase20c.py +962 -0
- semantic_code_intelligence/tests/test_phase21.py +428 -0
- semantic_code_intelligence/tests/test_phase22.py +799 -0
- semantic_code_intelligence/tests/test_phase23.py +783 -0
- semantic_code_intelligence/tests/test_phase24.py +715 -0
- semantic_code_intelligence/tests/test_phase25.py +496 -0
- semantic_code_intelligence/tests/test_phase26.py +251 -0
- semantic_code_intelligence/tests/test_phase27.py +531 -0
- semantic_code_intelligence/tests/test_phase8.py +592 -0
- semantic_code_intelligence/tests/test_phase9.py +643 -0
- semantic_code_intelligence/tests/test_plugins.py +293 -0
- semantic_code_intelligence/tests/test_priority_features.py +727 -0
- semantic_code_intelligence/tests/test_router.py +41 -0
- semantic_code_intelligence/tests/test_scalability.py +138 -0
- semantic_code_intelligence/tests/test_scanner.py +125 -0
- semantic_code_intelligence/tests/test_search.py +160 -0
- semantic_code_intelligence/tests/test_semantic_chunker.py +255 -0
- semantic_code_intelligence/tests/test_tools.py +182 -0
- semantic_code_intelligence/tests/test_vector_store.py +151 -0
- semantic_code_intelligence/tests/test_watcher.py +211 -0
- semantic_code_intelligence/tools/__init__.py +442 -0
- semantic_code_intelligence/tools/executor.py +232 -0
- semantic_code_intelligence/tools/protocol.py +200 -0
- semantic_code_intelligence/tui/__init__.py +454 -0
- semantic_code_intelligence/utils/__init__.py +0 -0
- semantic_code_intelligence/utils/logging.py +112 -0
- semantic_code_intelligence/version.py +3 -0
- semantic_code_intelligence/web/__init__.py +11 -0
- semantic_code_intelligence/web/api.py +289 -0
- semantic_code_intelligence/web/server.py +397 -0
- semantic_code_intelligence/web/ui.py +659 -0
- semantic_code_intelligence/web/visualize.py +226 -0
- semantic_code_intelligence/workspace/__init__.py +427 -0
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
"""Code quality analyzers — dead code, duplicate logic, complexity, security.
|
|
2
|
+
|
|
3
|
+
All analyzers operate on parsed ``Symbol`` lists and raw file content,
|
|
4
|
+
returning structured reports that are both human-readable (via Rich) and
|
|
5
|
+
machine-parsable (``to_dict()`` → JSON).
|
|
6
|
+
|
|
7
|
+
Uses `radon <https://radon.readthedocs.io/>`_ for AST-based cyclomatic
|
|
8
|
+
complexity analysis on Python files, with a regex fallback for other languages.
|
|
9
|
+
Optionally integrates `bandit <https://bandit.readthedocs.io/>`_ for Python
|
|
10
|
+
security linting.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import math
|
|
16
|
+
import re
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from semantic_code_intelligence.parsing.parser import Symbol, parse_file
|
|
22
|
+
from semantic_code_intelligence.context.engine import CallGraph, ContextBuilder
|
|
23
|
+
from semantic_code_intelligence.llm.safety import SafetyValidator, SafetyReport
|
|
24
|
+
from semantic_code_intelligence.utils.logging import get_logger
|
|
25
|
+
|
|
26
|
+
logger = get_logger("ci.quality")
|
|
27
|
+
|
|
28
|
+
# ── Radon (optional — used for Python AST-based complexity) ──────────
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
from radon.complexity import cc_visit
|
|
32
|
+
from radon.metrics import mi_visit
|
|
33
|
+
|
|
34
|
+
_HAS_RADON = True
|
|
35
|
+
except ImportError: # pragma: no cover
|
|
36
|
+
_HAS_RADON = False
|
|
37
|
+
|
|
38
|
+
# ── Cyclomatic complexity ────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
# Decision keywords/patterns — used as fallback for non-Python files.
|
|
41
|
+
_DECISION_PATTERNS: list[re.Pattern[str]] = [
|
|
42
|
+
re.compile(r"\bif\b"),
|
|
43
|
+
re.compile(r"\belif\b"),
|
|
44
|
+
re.compile(r"\belse\s+if\b"),
|
|
45
|
+
re.compile(r"\bfor\b"),
|
|
46
|
+
re.compile(r"\bwhile\b"),
|
|
47
|
+
re.compile(r"\bcatch\b"),
|
|
48
|
+
re.compile(r"\bexcept\b"),
|
|
49
|
+
re.compile(r"\bcase\b"),
|
|
50
|
+
re.compile(r"\b\?\?"), # null coalescing
|
|
51
|
+
re.compile(r"\?\s*\."), # optional chaining counts mildly
|
|
52
|
+
re.compile(r"\band\b"),
|
|
53
|
+
re.compile(r"\bor\b"),
|
|
54
|
+
re.compile(r"&&"),
|
|
55
|
+
re.compile(r"\|\|"),
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ComplexityResult:
|
|
61
|
+
"""Cyclomatic complexity measurement for a single symbol."""
|
|
62
|
+
|
|
63
|
+
symbol_name: str
|
|
64
|
+
file_path: str
|
|
65
|
+
start_line: int
|
|
66
|
+
end_line: int
|
|
67
|
+
complexity: int
|
|
68
|
+
rating: str # "low", "moderate", "high", "very_high"
|
|
69
|
+
|
|
70
|
+
def to_dict(self) -> dict[str, Any]:
|
|
71
|
+
return {
|
|
72
|
+
"symbol_name": self.symbol_name,
|
|
73
|
+
"file_path": self.file_path,
|
|
74
|
+
"start_line": self.start_line,
|
|
75
|
+
"end_line": self.end_line,
|
|
76
|
+
"complexity": self.complexity,
|
|
77
|
+
"rating": self.rating,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _rate_complexity(score: int) -> str:
|
|
82
|
+
if score <= 5:
|
|
83
|
+
return "low"
|
|
84
|
+
if score <= 10:
|
|
85
|
+
return "moderate"
|
|
86
|
+
if score <= 20:
|
|
87
|
+
return "high"
|
|
88
|
+
return "very_high"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _compute_complexity_regex(symbol: Symbol) -> ComplexityResult:
|
|
92
|
+
"""Regex-based fallback for non-Python files."""
|
|
93
|
+
body = symbol.body or ""
|
|
94
|
+
score = 1 # base path
|
|
95
|
+
for line in body.splitlines():
|
|
96
|
+
stripped = line.strip()
|
|
97
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("//"):
|
|
98
|
+
continue
|
|
99
|
+
for pattern in _DECISION_PATTERNS:
|
|
100
|
+
if pattern.search(stripped):
|
|
101
|
+
score += 1
|
|
102
|
+
|
|
103
|
+
return ComplexityResult(
|
|
104
|
+
symbol_name=symbol.name,
|
|
105
|
+
file_path=symbol.file_path,
|
|
106
|
+
start_line=symbol.start_line,
|
|
107
|
+
end_line=symbol.end_line,
|
|
108
|
+
complexity=score,
|
|
109
|
+
rating=_rate_complexity(score),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _compute_complexity_radon(symbol: Symbol) -> ComplexityResult:
|
|
114
|
+
"""AST-based complexity via radon for Python symbols."""
|
|
115
|
+
body = symbol.body or ""
|
|
116
|
+
|
|
117
|
+
# If the body is not a complete function/class definition, wrap it so
|
|
118
|
+
# radon can parse it. Symbols store the body content which may or may
|
|
119
|
+
# not include the ``def`` line.
|
|
120
|
+
code = body
|
|
121
|
+
if not body.lstrip().startswith(("def ", "class ", "async def ")):
|
|
122
|
+
# Indent all body lines and wrap in a temporary function
|
|
123
|
+
indented = "\n".join(" " + ln for ln in body.splitlines())
|
|
124
|
+
code = f"def _wrapper():\n{indented}\n"
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
results = cc_visit(code)
|
|
128
|
+
# Sum complexities from all top-level blocks (usually 1 function).
|
|
129
|
+
score = max((r.complexity for r in results), default=1)
|
|
130
|
+
except SyntaxError:
|
|
131
|
+
# If radon can't parse the snippet, fall back to regex
|
|
132
|
+
return _compute_complexity_regex(symbol)
|
|
133
|
+
|
|
134
|
+
return ComplexityResult(
|
|
135
|
+
symbol_name=symbol.name,
|
|
136
|
+
file_path=symbol.file_path,
|
|
137
|
+
start_line=symbol.start_line,
|
|
138
|
+
end_line=symbol.end_line,
|
|
139
|
+
complexity=score,
|
|
140
|
+
rating=_rate_complexity(score),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def compute_complexity(symbol: Symbol) -> ComplexityResult:
|
|
145
|
+
"""Compute cyclomatic complexity for a single symbol.
|
|
146
|
+
|
|
147
|
+
Uses radon's AST analysis for Python files, falling back to regex-based
|
|
148
|
+
counting for other languages.
|
|
149
|
+
"""
|
|
150
|
+
if _HAS_RADON and symbol.file_path.endswith(".py"):
|
|
151
|
+
return _compute_complexity_radon(symbol)
|
|
152
|
+
return _compute_complexity_regex(symbol)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def analyze_complexity(
|
|
156
|
+
symbols: list[Symbol],
|
|
157
|
+
*,
|
|
158
|
+
threshold: int = 10,
|
|
159
|
+
) -> list[ComplexityResult]:
|
|
160
|
+
"""Analyze all function/method symbols and return those above *threshold*."""
|
|
161
|
+
results: list[ComplexityResult] = []
|
|
162
|
+
for sym in symbols:
|
|
163
|
+
if sym.kind not in ("function", "method"):
|
|
164
|
+
continue
|
|
165
|
+
cr = compute_complexity(sym)
|
|
166
|
+
if cr.complexity >= threshold:
|
|
167
|
+
results.append(cr)
|
|
168
|
+
results.sort(key=lambda r: r.complexity, reverse=True)
|
|
169
|
+
return results
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ── Dead code detection ──────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class DeadCodeResult:
|
|
176
|
+
"""A symbol suspected of being unreferenced."""
|
|
177
|
+
|
|
178
|
+
symbol_name: str
|
|
179
|
+
kind: str
|
|
180
|
+
file_path: str
|
|
181
|
+
start_line: int
|
|
182
|
+
|
|
183
|
+
def to_dict(self) -> dict[str, Any]:
|
|
184
|
+
return {
|
|
185
|
+
"symbol_name": self.symbol_name,
|
|
186
|
+
"kind": self.kind,
|
|
187
|
+
"file_path": self.file_path,
|
|
188
|
+
"start_line": self.start_line,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# Names that are conventionally always reachable.
|
|
193
|
+
_ENTRY_NAMES: set[str] = {
|
|
194
|
+
"main", "__init__", "__new__", "__del__", "__str__", "__repr__",
|
|
195
|
+
"__enter__", "__exit__", "__call__", "__getattr__", "__setattr__",
|
|
196
|
+
"__getitem__", "__setitem__", "__len__", "__iter__", "__next__",
|
|
197
|
+
"__eq__", "__hash__", "__lt__", "__le__", "__gt__", "__ge__",
|
|
198
|
+
"__add__", "__sub__", "__mul__", "__truediv__",
|
|
199
|
+
"setUp", "tearDown", "setUpClass", "tearDownClass",
|
|
200
|
+
"setup_method", "teardown_method",
|
|
201
|
+
"create_plugin", # CodexA plugin factory
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# Prefix patterns that indicate entry points / framework hooks.
|
|
205
|
+
_ENTRY_PREFIXES: tuple[str, ...] = (
|
|
206
|
+
"test_", "Test", # pytest
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def detect_dead_code(
|
|
211
|
+
symbols: list[Symbol],
|
|
212
|
+
call_graph: CallGraph | None = None,
|
|
213
|
+
) -> list[DeadCodeResult]:
|
|
214
|
+
"""Detect functions/methods that are never referenced.
|
|
215
|
+
|
|
216
|
+
Uses the ``CallGraph`` callee set to determine reachability.
|
|
217
|
+
Symbols whose names match known entry-point patterns are excluded.
|
|
218
|
+
"""
|
|
219
|
+
if not symbols:
|
|
220
|
+
return []
|
|
221
|
+
|
|
222
|
+
# Build reference set from call graph edges
|
|
223
|
+
referenced: set[str] = set()
|
|
224
|
+
if call_graph:
|
|
225
|
+
for edge in call_graph.edges:
|
|
226
|
+
callee = edge.callee
|
|
227
|
+
if ":" in callee:
|
|
228
|
+
callee = callee.rsplit(":", 1)[-1]
|
|
229
|
+
referenced.add(callee)
|
|
230
|
+
|
|
231
|
+
# Also scan raw bodies for name mentions (lightweight fallback)
|
|
232
|
+
all_bodies = "\n".join(s.body for s in symbols if s.body)
|
|
233
|
+
name_set = {s.name for s in symbols}
|
|
234
|
+
|
|
235
|
+
results: list[DeadCodeResult] = []
|
|
236
|
+
for sym in symbols:
|
|
237
|
+
if sym.kind not in ("function", "method", "class"):
|
|
238
|
+
continue
|
|
239
|
+
if sym.name in _ENTRY_NAMES:
|
|
240
|
+
continue
|
|
241
|
+
if any(sym.name.startswith(p) for p in _ENTRY_PREFIXES):
|
|
242
|
+
continue
|
|
243
|
+
# Private dunder-style names starting and ending with __ handled above
|
|
244
|
+
if sym.name.startswith("_") and sym.name.endswith("_"):
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
# Check call graph
|
|
248
|
+
if sym.name in referenced:
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
# Heuristic: name appears in other symbols' bodies
|
|
252
|
+
# We count occurrences of the name in all bodies excluding the symbol itself.
|
|
253
|
+
body_without_self = all_bodies.replace(sym.body, "", 1) if sym.body else all_bodies
|
|
254
|
+
if re.search(rf"\b{re.escape(sym.name)}\b", body_without_self):
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
results.append(DeadCodeResult(
|
|
258
|
+
symbol_name=sym.name,
|
|
259
|
+
kind=sym.kind,
|
|
260
|
+
file_path=sym.file_path,
|
|
261
|
+
start_line=sym.start_line,
|
|
262
|
+
))
|
|
263
|
+
|
|
264
|
+
return results
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# ── Duplicate logic detection ────────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
@dataclass
|
|
270
|
+
class DuplicateResult:
|
|
271
|
+
"""A pair of symbols with similar bodies."""
|
|
272
|
+
|
|
273
|
+
symbol_a: str
|
|
274
|
+
file_a: str
|
|
275
|
+
line_a: int
|
|
276
|
+
symbol_b: str
|
|
277
|
+
file_b: str
|
|
278
|
+
line_b: int
|
|
279
|
+
similarity: float # 0.0 – 1.0
|
|
280
|
+
|
|
281
|
+
def to_dict(self) -> dict[str, Any]:
|
|
282
|
+
return {
|
|
283
|
+
"symbol_a": self.symbol_a,
|
|
284
|
+
"file_a": self.file_a,
|
|
285
|
+
"line_a": self.line_a,
|
|
286
|
+
"symbol_b": self.symbol_b,
|
|
287
|
+
"file_b": self.file_b,
|
|
288
|
+
"line_b": self.line_b,
|
|
289
|
+
"similarity": round(self.similarity, 3),
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _normalize_body(body: str) -> str:
|
|
294
|
+
"""Strip whitespace and comments for comparison."""
|
|
295
|
+
lines: list[str] = []
|
|
296
|
+
for line in body.splitlines():
|
|
297
|
+
stripped = line.strip()
|
|
298
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("//"):
|
|
299
|
+
continue
|
|
300
|
+
lines.append(stripped)
|
|
301
|
+
return "\n".join(lines)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _trigram_set(text: str) -> set[str]:
|
|
305
|
+
"""Return the set of 3-character shingles in *text*."""
|
|
306
|
+
if len(text) < 3:
|
|
307
|
+
return {text} if text else set()
|
|
308
|
+
return {text[i : i + 3] for i in range(len(text) - 2)}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _jaccard(a: set[str], b: set[str]) -> float:
|
|
312
|
+
if not a and not b:
|
|
313
|
+
return 1.0
|
|
314
|
+
if not a or not b:
|
|
315
|
+
return 0.0
|
|
316
|
+
return len(a & b) / len(a | b)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def detect_duplicates(
|
|
320
|
+
symbols: list[Symbol],
|
|
321
|
+
*,
|
|
322
|
+
threshold: float = 0.75,
|
|
323
|
+
min_lines: int = 4,
|
|
324
|
+
) -> list[DuplicateResult]:
|
|
325
|
+
"""Detect similar function/method bodies using trigram Jaccard similarity.
|
|
326
|
+
|
|
327
|
+
Only compares symbols whose normalised body has at least *min_lines* lines.
|
|
328
|
+
Pairs with similarity ≥ *threshold* are returned.
|
|
329
|
+
"""
|
|
330
|
+
# Pre-filter
|
|
331
|
+
candidates: list[tuple[Symbol, str, set[str]]] = []
|
|
332
|
+
for sym in symbols:
|
|
333
|
+
if sym.kind not in ("function", "method"):
|
|
334
|
+
continue
|
|
335
|
+
norm = _normalize_body(sym.body or "")
|
|
336
|
+
if norm.count("\n") + 1 < min_lines:
|
|
337
|
+
continue
|
|
338
|
+
candidates.append((sym, norm, _trigram_set(norm)))
|
|
339
|
+
|
|
340
|
+
results: list[DuplicateResult] = []
|
|
341
|
+
seen: set[tuple[str, str]] = set()
|
|
342
|
+
|
|
343
|
+
for i, (sym_a, _norm_a, tri_a) in enumerate(candidates):
|
|
344
|
+
for j in range(i + 1, len(candidates)):
|
|
345
|
+
sym_b, _norm_b, tri_b = candidates[j]
|
|
346
|
+
key = (f"{sym_a.file_path}:{sym_a.name}", f"{sym_b.file_path}:{sym_b.name}")
|
|
347
|
+
if key in seen:
|
|
348
|
+
continue
|
|
349
|
+
sim = _jaccard(tri_a, tri_b)
|
|
350
|
+
if sim >= threshold:
|
|
351
|
+
seen.add(key)
|
|
352
|
+
results.append(DuplicateResult(
|
|
353
|
+
symbol_a=sym_a.name,
|
|
354
|
+
file_a=sym_a.file_path,
|
|
355
|
+
line_a=sym_a.start_line,
|
|
356
|
+
symbol_b=sym_b.name,
|
|
357
|
+
file_b=sym_b.file_path,
|
|
358
|
+
line_b=sym_b.start_line,
|
|
359
|
+
similarity=sim,
|
|
360
|
+
))
|
|
361
|
+
|
|
362
|
+
results.sort(key=lambda r: r.similarity, reverse=True)
|
|
363
|
+
return results
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
# ── Bandit security linting (optional) ───────────────────────────────
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
from bandit.core import manager as _bandit_manager
|
|
370
|
+
from bandit.core import config as _bandit_config
|
|
371
|
+
|
|
372
|
+
_HAS_BANDIT = True
|
|
373
|
+
except ImportError: # pragma: no cover
|
|
374
|
+
_HAS_BANDIT = False
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
@dataclass
|
|
378
|
+
class BanditIssue:
|
|
379
|
+
"""A security issue found by Bandit."""
|
|
380
|
+
|
|
381
|
+
test_id: str
|
|
382
|
+
severity: str # LOW / MEDIUM / HIGH
|
|
383
|
+
confidence: str # LOW / MEDIUM / HIGH
|
|
384
|
+
text: str
|
|
385
|
+
file_path: str
|
|
386
|
+
line: int
|
|
387
|
+
|
|
388
|
+
def to_dict(self) -> dict[str, Any]:
|
|
389
|
+
return {
|
|
390
|
+
"test_id": self.test_id,
|
|
391
|
+
"severity": self.severity,
|
|
392
|
+
"confidence": self.confidence,
|
|
393
|
+
"text": self.text,
|
|
394
|
+
"file_path": self.file_path,
|
|
395
|
+
"line": self.line,
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def run_bandit_scan(file_paths: list[str]) -> list[BanditIssue]:
|
|
400
|
+
"""Run Bandit static analysis on the given Python files.
|
|
401
|
+
|
|
402
|
+
Returns an empty list when Bandit is not installed or when no
|
|
403
|
+
Python files are provided.
|
|
404
|
+
"""
|
|
405
|
+
if not _HAS_BANDIT:
|
|
406
|
+
return []
|
|
407
|
+
|
|
408
|
+
py_files = [f for f in file_paths if f.endswith(".py")]
|
|
409
|
+
if not py_files:
|
|
410
|
+
return []
|
|
411
|
+
|
|
412
|
+
try:
|
|
413
|
+
conf = _bandit_config.BanditConfig()
|
|
414
|
+
mgr = _bandit_manager.BanditManager(conf, "file")
|
|
415
|
+
mgr.discover_files(py_files)
|
|
416
|
+
mgr.run_tests()
|
|
417
|
+
return [
|
|
418
|
+
BanditIssue(
|
|
419
|
+
test_id=iss.test_id,
|
|
420
|
+
severity=str(iss.severity).upper(),
|
|
421
|
+
confidence=str(iss.confidence).upper(),
|
|
422
|
+
text=iss.text,
|
|
423
|
+
file_path=iss.fname,
|
|
424
|
+
line=iss.lineno,
|
|
425
|
+
)
|
|
426
|
+
for iss in mgr.get_issue_list()
|
|
427
|
+
]
|
|
428
|
+
except Exception as exc:
|
|
429
|
+
logger.debug("Bandit scan failed: %s", exc)
|
|
430
|
+
return []
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
# ── Quality report (aggregate) ───────────────────────────────────────
|
|
434
|
+
|
|
435
|
+
@dataclass
|
|
436
|
+
class QualityReport:
|
|
437
|
+
"""Aggregate quality report for a project or file set."""
|
|
438
|
+
|
|
439
|
+
files_analyzed: int = 0
|
|
440
|
+
symbol_count: int = 0
|
|
441
|
+
complexity_issues: list[ComplexityResult] = field(default_factory=list)
|
|
442
|
+
dead_code: list[DeadCodeResult] = field(default_factory=list)
|
|
443
|
+
duplicates: list[DuplicateResult] = field(default_factory=list)
|
|
444
|
+
safety: SafetyReport | None = None
|
|
445
|
+
bandit_issues: list[BanditIssue] = field(default_factory=list)
|
|
446
|
+
maintainability_index: float | None = None
|
|
447
|
+
|
|
448
|
+
@property
|
|
449
|
+
def issue_count(self) -> int:
|
|
450
|
+
n = len(self.complexity_issues) + len(self.dead_code) + len(self.duplicates)
|
|
451
|
+
n += len(self.bandit_issues)
|
|
452
|
+
if self.safety:
|
|
453
|
+
n += len(self.safety.issues)
|
|
454
|
+
return n
|
|
455
|
+
|
|
456
|
+
def to_dict(self) -> dict[str, Any]:
|
|
457
|
+
return {
|
|
458
|
+
"files_analyzed": self.files_analyzed,
|
|
459
|
+
"symbol_count": self.symbol_count,
|
|
460
|
+
"issue_count": self.issue_count,
|
|
461
|
+
"complexity_issues": [c.to_dict() for c in self.complexity_issues],
|
|
462
|
+
"dead_code": [d.to_dict() for d in self.dead_code],
|
|
463
|
+
"duplicates": [d.to_dict() for d in self.duplicates],
|
|
464
|
+
"safety": self.safety.to_dict() if self.safety else None,
|
|
465
|
+
"bandit_issues": [b.to_dict() for b in self.bandit_issues],
|
|
466
|
+
"maintainability_index": round(self.maintainability_index, 2) if self.maintainability_index is not None else None,
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def analyze_project(
|
|
471
|
+
project_root: Path,
|
|
472
|
+
*,
|
|
473
|
+
file_paths: list[str] | None = None,
|
|
474
|
+
complexity_threshold: int = 10,
|
|
475
|
+
duplicate_threshold: float = 0.75,
|
|
476
|
+
run_safety: bool = True,
|
|
477
|
+
) -> QualityReport:
|
|
478
|
+
"""Run all quality analyzers on a project or a subset of files.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
project_root: Repository root directory.
|
|
482
|
+
file_paths: Optional specific file list. If *None*, indexes the whole project.
|
|
483
|
+
complexity_threshold: Minimum complexity score to report.
|
|
484
|
+
duplicate_threshold: Minimum Jaccard similarity to report as duplicate.
|
|
485
|
+
run_safety: Whether to run the safety validator.
|
|
486
|
+
|
|
487
|
+
Returns:
|
|
488
|
+
Aggregated ``QualityReport``.
|
|
489
|
+
"""
|
|
490
|
+
builder = ContextBuilder()
|
|
491
|
+
all_symbols: list[Symbol] = []
|
|
492
|
+
all_code = ""
|
|
493
|
+
|
|
494
|
+
if file_paths:
|
|
495
|
+
files = [str(Path(f).resolve()) for f in file_paths]
|
|
496
|
+
else:
|
|
497
|
+
# Walk project for supported files
|
|
498
|
+
from semantic_code_intelligence.parsing.parser import EXTENSION_TO_LANGUAGE
|
|
499
|
+
files = []
|
|
500
|
+
for f in project_root.rglob("*"):
|
|
501
|
+
if f.is_file() and f.suffix in EXTENSION_TO_LANGUAGE:
|
|
502
|
+
# Skip hidden dirs, .codexa, __pycache__, node_modules
|
|
503
|
+
parts = f.relative_to(project_root).parts
|
|
504
|
+
if any(p.startswith(".") or p in ("__pycache__", "node_modules", ".codexa") for p in parts):
|
|
505
|
+
continue
|
|
506
|
+
files.append(str(f))
|
|
507
|
+
|
|
508
|
+
for fpath in files:
|
|
509
|
+
try:
|
|
510
|
+
syms = builder.index_file(fpath)
|
|
511
|
+
all_symbols.extend(syms)
|
|
512
|
+
content = Path(fpath).read_text(encoding="utf-8", errors="replace")
|
|
513
|
+
all_code += content + "\n"
|
|
514
|
+
except Exception as exc:
|
|
515
|
+
logger.debug("Skipping %s: %s", fpath, exc)
|
|
516
|
+
|
|
517
|
+
# Call graph for dead-code analysis
|
|
518
|
+
call_graph = CallGraph()
|
|
519
|
+
call_graph.build(all_symbols)
|
|
520
|
+
|
|
521
|
+
report = QualityReport(
|
|
522
|
+
files_analyzed=len(files),
|
|
523
|
+
symbol_count=len(all_symbols),
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
report.complexity_issues = analyze_complexity(
|
|
527
|
+
all_symbols, threshold=complexity_threshold
|
|
528
|
+
)
|
|
529
|
+
report.dead_code = detect_dead_code(all_symbols, call_graph=call_graph)
|
|
530
|
+
report.duplicates = detect_duplicates(
|
|
531
|
+
all_symbols, threshold=duplicate_threshold
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Bandit security scan (Python files only)
|
|
535
|
+
report.bandit_issues = run_bandit_scan(files)
|
|
536
|
+
|
|
537
|
+
# Maintainability index (average across Python files)
|
|
538
|
+
if _HAS_RADON:
|
|
539
|
+
mi_scores: list[float] = []
|
|
540
|
+
for fpath in files:
|
|
541
|
+
if not fpath.endswith(".py"):
|
|
542
|
+
continue
|
|
543
|
+
try:
|
|
544
|
+
code = Path(fpath).read_text(encoding="utf-8", errors="replace")
|
|
545
|
+
score = mi_visit(code, True)
|
|
546
|
+
if isinstance(score, (int, float)):
|
|
547
|
+
mi_scores.append(float(score))
|
|
548
|
+
except Exception:
|
|
549
|
+
pass
|
|
550
|
+
if mi_scores:
|
|
551
|
+
report.maintainability_index = sum(mi_scores) / len(mi_scores)
|
|
552
|
+
|
|
553
|
+
if run_safety:
|
|
554
|
+
validator = SafetyValidator()
|
|
555
|
+
report.safety = validator.validate(all_code)
|
|
556
|
+
|
|
557
|
+
return report
|