python-checkup 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_checkup/__init__.py +9 -0
- python_checkup/__main__.py +3 -0
- python_checkup/analysis_request.py +35 -0
- python_checkup/analyzer_catalog.py +100 -0
- python_checkup/analyzers/__init__.py +54 -0
- python_checkup/analyzers/bandit.py +158 -0
- python_checkup/analyzers/basedpyright.py +103 -0
- python_checkup/analyzers/cached.py +106 -0
- python_checkup/analyzers/dependency_vulns.py +298 -0
- python_checkup/analyzers/deptry.py +142 -0
- python_checkup/analyzers/detect_secrets.py +101 -0
- python_checkup/analyzers/mypy.py +217 -0
- python_checkup/analyzers/radon.py +150 -0
- python_checkup/analyzers/registry.py +69 -0
- python_checkup/analyzers/ruff.py +256 -0
- python_checkup/analyzers/typos.py +80 -0
- python_checkup/analyzers/vulture.py +151 -0
- python_checkup/cache.py +244 -0
- python_checkup/cli.py +763 -0
- python_checkup/config.py +87 -0
- python_checkup/dedup.py +119 -0
- python_checkup/dependencies/discovery.py +192 -0
- python_checkup/detection.py +298 -0
- python_checkup/diff.py +130 -0
- python_checkup/discovery.py +180 -0
- python_checkup/formatters/__init__.py +0 -0
- python_checkup/formatters/badge.py +38 -0
- python_checkup/formatters/json_fmt.py +22 -0
- python_checkup/formatters/terminal.py +396 -0
- python_checkup/mcp/__init__.py +3 -0
- python_checkup/mcp/installer.py +119 -0
- python_checkup/mcp/server.py +411 -0
- python_checkup/models.py +114 -0
- python_checkup/plan.py +109 -0
- python_checkup/progress.py +95 -0
- python_checkup/runner.py +438 -0
- python_checkup/scoring/__init__.py +0 -0
- python_checkup/scoring/engine.py +397 -0
- python_checkup/skills/SKILL.md +416 -0
- python_checkup/skills/__init__.py +0 -0
- python_checkup/skills/agents.py +98 -0
- python_checkup/skills/installer.py +248 -0
- python_checkup/skills/rule_db.py +806 -0
- python_checkup/web/__init__.py +0 -0
- python_checkup/web/server.py +285 -0
- python_checkup/web/static/__init__.py +0 -0
- python_checkup/web/static/index.html +959 -0
- python_checkup/web/template.py +26 -0
- python_checkup-0.0.1.dist-info/METADATA +250 -0
- python_checkup-0.0.1.dist-info/RECORD +53 -0
- python_checkup-0.0.1.dist-info/WHEEL +4 -0
- python_checkup-0.0.1.dist-info/entry_points.txt +14 -0
- python_checkup-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from python_checkup.config import CheckupConfig
|
|
4
|
+
from python_checkup.models import (
|
|
5
|
+
Category,
|
|
6
|
+
CategoryScore,
|
|
7
|
+
CoverageInfo,
|
|
8
|
+
Diagnostic,
|
|
9
|
+
HealthReport,
|
|
10
|
+
ProjectInfo,
|
|
11
|
+
Severity,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# --- Critical security rules that cap the security score ---
|
|
15
|
+
CRITICAL_RULES: set[str] = {
|
|
16
|
+
"B608",
|
|
17
|
+
"S608", # SQL injection
|
|
18
|
+
"B105",
|
|
19
|
+
"S105", # Hardcoded password string
|
|
20
|
+
"B106",
|
|
21
|
+
"S106", # Hardcoded password in function arg
|
|
22
|
+
"B107",
|
|
23
|
+
"S107", # Hardcoded password in default
|
|
24
|
+
"B602",
|
|
25
|
+
"S602", # subprocess with shell=True
|
|
26
|
+
"B301",
|
|
27
|
+
"S301", # pickle (arbitrary code exec)
|
|
28
|
+
"B614", # torch.load
|
|
29
|
+
"B615", # HuggingFace unsafe
|
|
30
|
+
"B701",
|
|
31
|
+
"S701", # Jinja2 autoescape false
|
|
32
|
+
"B501",
|
|
33
|
+
"S501", # requests with no cert validation
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def compute_health_report(
|
|
38
|
+
diagnostics: list[Diagnostic],
|
|
39
|
+
project: ProjectInfo,
|
|
40
|
+
config: CheckupConfig,
|
|
41
|
+
duration_ms: int,
|
|
42
|
+
analyzers_used: list[str],
|
|
43
|
+
analyzers_skipped: list[str],
|
|
44
|
+
mi_scores: list[float] | None = None,
|
|
45
|
+
cache_stats: dict[str, int] | None = None,
|
|
46
|
+
coverage: CoverageInfo | None = None,
|
|
47
|
+
) -> HealthReport:
|
|
48
|
+
"""Compute the full health report from diagnostics and metadata.
|
|
49
|
+
|
|
50
|
+
This is the main entry point for scoring. It:
|
|
51
|
+
1. Determines which categories are active (have analyzers)
|
|
52
|
+
2. Redistributes weights for missing categories
|
|
53
|
+
3. Computes per-category scores
|
|
54
|
+
4. Computes the weighted overall score
|
|
55
|
+
5. Assembles the HealthReport
|
|
56
|
+
"""
|
|
57
|
+
# Determine available categories from the tools that ran
|
|
58
|
+
available_categories = _categories_from_analyzers(analyzers_used)
|
|
59
|
+
|
|
60
|
+
# Redistribute weights
|
|
61
|
+
redistributed = redistribute_weights(config.weights, available_categories)
|
|
62
|
+
|
|
63
|
+
# Compute per-category scores
|
|
64
|
+
kloc = max(project.total_lines / 1000.0, 0.1)
|
|
65
|
+
category_scores: list[CategoryScore] = []
|
|
66
|
+
|
|
67
|
+
for cat, weight in redistributed.items():
|
|
68
|
+
cat_diags = [d for d in diagnostics if d.category == cat]
|
|
69
|
+
errors = sum(1 for d in cat_diags if d.severity == Severity.ERROR)
|
|
70
|
+
warnings = sum(1 for d in cat_diags if d.severity == Severity.WARNING)
|
|
71
|
+
|
|
72
|
+
score: float
|
|
73
|
+
match cat:
|
|
74
|
+
case Category.QUALITY:
|
|
75
|
+
score = _score_quality(cat_diags, kloc)
|
|
76
|
+
case Category.TYPE_SAFETY:
|
|
77
|
+
score = _score_type_safety(cat_diags, project.total_files)
|
|
78
|
+
case Category.SECURITY:
|
|
79
|
+
score = _score_security(cat_diags)
|
|
80
|
+
case Category.COMPLEXITY:
|
|
81
|
+
score = _score_complexity(mi_scores or [], cat_diags)
|
|
82
|
+
case Category.DEAD_CODE:
|
|
83
|
+
score = _score_dead_code(cat_diags, project.total_lines)
|
|
84
|
+
case Category.DEPENDENCIES:
|
|
85
|
+
score = _score_dependencies(cat_diags)
|
|
86
|
+
|
|
87
|
+
details = _build_details(errors, warnings, len(cat_diags))
|
|
88
|
+
coverage_note = ""
|
|
89
|
+
status = "scored"
|
|
90
|
+
if coverage is not None:
|
|
91
|
+
for item in coverage.category_coverage:
|
|
92
|
+
if item.category == cat:
|
|
93
|
+
status = item.status
|
|
94
|
+
coverage_note = item.reason
|
|
95
|
+
break
|
|
96
|
+
category_scores.append(
|
|
97
|
+
CategoryScore(
|
|
98
|
+
category=cat,
|
|
99
|
+
score=int(score),
|
|
100
|
+
weight=weight,
|
|
101
|
+
issue_count=len(cat_diags),
|
|
102
|
+
error_count=errors,
|
|
103
|
+
warning_count=warnings,
|
|
104
|
+
details=details,
|
|
105
|
+
status=status,
|
|
106
|
+
coverage_note=coverage_note,
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Overall score
|
|
111
|
+
overall, label = _compute_overall(category_scores, config.thresholds)
|
|
112
|
+
|
|
113
|
+
return HealthReport(
|
|
114
|
+
score=overall,
|
|
115
|
+
label=label,
|
|
116
|
+
category_scores=sorted(
|
|
117
|
+
category_scores,
|
|
118
|
+
key=lambda cs: cs.weight,
|
|
119
|
+
reverse=True,
|
|
120
|
+
),
|
|
121
|
+
diagnostics=diagnostics,
|
|
122
|
+
project=project,
|
|
123
|
+
duration_ms=duration_ms,
|
|
124
|
+
analyzers_used=analyzers_used,
|
|
125
|
+
analyzers_skipped=analyzers_skipped,
|
|
126
|
+
cache_stats=cache_stats,
|
|
127
|
+
coverage=coverage,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# --- Per-category scoring functions ---
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _score_quality(diagnostics: list[Diagnostic], kloc: float) -> float:
|
|
135
|
+
"""Score code quality from Ruff diagnostics.
|
|
136
|
+
|
|
137
|
+
Normalizes by KLOC so a 500-line project and a 50,000-line
|
|
138
|
+
project are scored on the same scale.
|
|
139
|
+
|
|
140
|
+
Deductions: 3 points per error per KLOC, 1 per warning per KLOC.
|
|
141
|
+
"""
|
|
142
|
+
errors = sum(1 for d in diagnostics if d.severity == Severity.ERROR)
|
|
143
|
+
warnings = sum(1 for d in diagnostics if d.severity == Severity.WARNING)
|
|
144
|
+
penalty = (3.0 * errors / kloc) + (1.0 * warnings / kloc)
|
|
145
|
+
return max(0.0, min(100.0, 100.0 - penalty))
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _score_type_safety(diagnostics: list[Diagnostic], total_files: int) -> float:
|
|
149
|
+
"""Score type safety from mypy diagnostics.
|
|
150
|
+
|
|
151
|
+
Uses file-level pass rate: what percentage of files have zero
|
|
152
|
+
type errors. This rewards gradual typing adoption.
|
|
153
|
+
"""
|
|
154
|
+
if total_files <= 0:
|
|
155
|
+
return 100.0
|
|
156
|
+
files_with_errors = len(
|
|
157
|
+
{d.file_path for d in diagnostics if d.severity == Severity.ERROR}
|
|
158
|
+
)
|
|
159
|
+
pass_rate = 1.0 - (files_with_errors / total_files)
|
|
160
|
+
return max(0.0, min(100.0, pass_rate * 100.0))
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _score_security(diagnostics: list[Diagnostic]) -> float:
|
|
164
|
+
"""Score security from Bandit/Ruff S-rule diagnostics.
|
|
165
|
+
|
|
166
|
+
Only errors and warnings affect the score. INFO-level findings
|
|
167
|
+
(e.g. B404 "import subprocess", B603 "subprocess without shell")
|
|
168
|
+
are advisory notices, not actionable problems, so they are shown
|
|
169
|
+
in the report but do not penalise the score.
|
|
170
|
+
|
|
171
|
+
Critical findings (SQL injection, hardcoded secrets, pickle)
|
|
172
|
+
immediately cap the score at 25.
|
|
173
|
+
"""
|
|
174
|
+
if not diagnostics:
|
|
175
|
+
return 100.0
|
|
176
|
+
|
|
177
|
+
score = 100.0
|
|
178
|
+
has_critical = False
|
|
179
|
+
|
|
180
|
+
for d in diagnostics:
|
|
181
|
+
match d.severity:
|
|
182
|
+
case Severity.ERROR:
|
|
183
|
+
score -= 15.0
|
|
184
|
+
case Severity.WARNING:
|
|
185
|
+
score -= 5.0
|
|
186
|
+
case Severity.INFO:
|
|
187
|
+
pass # advisory — no penalty
|
|
188
|
+
if d.rule_id in CRITICAL_RULES:
|
|
189
|
+
has_critical = True
|
|
190
|
+
|
|
191
|
+
if has_critical:
|
|
192
|
+
score = min(score, 25.0)
|
|
193
|
+
|
|
194
|
+
return max(0.0, score)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _score_complexity(mi_scores: list[float], diagnostics: list[Diagnostic]) -> float:
|
|
198
|
+
"""Score complexity using Radon's Maintainability Index.
|
|
199
|
+
|
|
200
|
+
MI is a 0-100 scale where higher is better, but it is *not*
|
|
201
|
+
directly usable as a quality score. Industry convention (Microsoft
|
|
202
|
+
Visual Studio, Radon docs) treats MI >= 20 as "maintainable" and
|
|
203
|
+
typical well-written code lands at 40-70. Using raw MI would make
|
|
204
|
+
healthy codebases look mediocre.
|
|
205
|
+
|
|
206
|
+
We rescale so the practical MI range maps to an intuitive score:
|
|
207
|
+
|
|
208
|
+
MI >= 65 → 100 (excellent)
|
|
209
|
+
MI 20-65 → 60-100 (linear, good to excellent)
|
|
210
|
+
MI < 20 → 0-60 (linear, needs work)
|
|
211
|
+
|
|
212
|
+
Falls back to CC-based scoring from diagnostics when MI is not
|
|
213
|
+
available.
|
|
214
|
+
"""
|
|
215
|
+
if mi_scores:
|
|
216
|
+
avg_mi = sum(mi_scores) / len(mi_scores)
|
|
217
|
+
return _mi_to_score(avg_mi)
|
|
218
|
+
|
|
219
|
+
# Fallback: use complexity diagnostics (C901 from Ruff)
|
|
220
|
+
if not diagnostics:
|
|
221
|
+
return 100.0
|
|
222
|
+
|
|
223
|
+
penalty = len(diagnostics) * 5.0
|
|
224
|
+
return max(0.0, min(100.0, 100.0 - penalty))
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _mi_to_score(avg_mi: float) -> float:
|
|
228
|
+
"""Convert a Maintainability Index average to a 0-100 health score."""
|
|
229
|
+
if avg_mi >= 65.0:
|
|
230
|
+
return 100.0
|
|
231
|
+
if avg_mi >= 20.0:
|
|
232
|
+
# Linear: MI 20 → score 60, MI 65 → score 100
|
|
233
|
+
return 60.0 + (avg_mi - 20.0) * (40.0 / 45.0)
|
|
234
|
+
# Below 20: MI 0 → score 0, MI 20 → score 60
|
|
235
|
+
return max(0.0, avg_mi * 3.0)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _score_dead_code(diagnostics: list[Diagnostic], total_lines: int) -> float:
|
|
239
|
+
"""Score dead code from Vulture diagnostics.
|
|
240
|
+
|
|
241
|
+
Formula: 100 - (dead_code_percentage * 2). A project with 10%
|
|
242
|
+
dead code scores 80. >50% dead code scores 0.
|
|
243
|
+
"""
|
|
244
|
+
if total_lines <= 0 or not diagnostics:
|
|
245
|
+
return 100.0
|
|
246
|
+
|
|
247
|
+
dead_lines = 0
|
|
248
|
+
for d in diagnostics:
|
|
249
|
+
if d.end_line and d.line:
|
|
250
|
+
dead_lines += d.end_line - d.line + 1
|
|
251
|
+
else:
|
|
252
|
+
dead_lines += 1 # Conservative: count as 1 line
|
|
253
|
+
|
|
254
|
+
dead_pct = (dead_lines / total_lines) * 100.0
|
|
255
|
+
return max(0.0, min(100.0, 100.0 - (dead_pct * 2.0)))
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _score_dependencies(diagnostics: list[Diagnostic]) -> float:
|
|
259
|
+
"""Score dependency health from dependency advisories and dep hygiene.
|
|
260
|
+
|
|
261
|
+
Deductions:
|
|
262
|
+
- 10 points per known vulnerability (CVE/GHSA/PYSEC)
|
|
263
|
+
- 5 points per unused dependency (DEP002)
|
|
264
|
+
- 3 points per missing dependency (DEP001)
|
|
265
|
+
- 2 points per transitive/misplaced dep (DEP003/DEP004)
|
|
266
|
+
"""
|
|
267
|
+
if not diagnostics:
|
|
268
|
+
return 100.0
|
|
269
|
+
|
|
270
|
+
score = 100.0
|
|
271
|
+
for d in diagnostics:
|
|
272
|
+
rule = d.rule_id.upper()
|
|
273
|
+
if rule.startswith(("CVE", "GHSA", "PYSEC")):
|
|
274
|
+
score -= 10.0
|
|
275
|
+
elif rule == "DEP002":
|
|
276
|
+
score -= 5.0
|
|
277
|
+
elif rule == "DEP001":
|
|
278
|
+
score -= 3.0
|
|
279
|
+
elif rule in ("DEP003", "DEP004"):
|
|
280
|
+
score -= 2.0
|
|
281
|
+
else:
|
|
282
|
+
score -= 3.0
|
|
283
|
+
|
|
284
|
+
return max(0.0, score)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# --- Helpers ---
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _compute_overall(
|
|
291
|
+
category_scores: list[CategoryScore],
|
|
292
|
+
thresholds: dict[str, int],
|
|
293
|
+
) -> tuple[int, str]:
|
|
294
|
+
active = [cs for cs in category_scores if cs.weight > 0]
|
|
295
|
+
if not active:
|
|
296
|
+
return 100, "Healthy"
|
|
297
|
+
|
|
298
|
+
total_weight = sum(cs.weight for cs in active)
|
|
299
|
+
if total_weight == 0:
|
|
300
|
+
return 100, "Healthy"
|
|
301
|
+
|
|
302
|
+
score = sum(cs.score * cs.weight for cs in active) / total_weight
|
|
303
|
+
score = int(max(0, min(100, score)))
|
|
304
|
+
|
|
305
|
+
healthy = thresholds.get("healthy", 75)
|
|
306
|
+
needs_work = thresholds.get("needs_work", 50)
|
|
307
|
+
|
|
308
|
+
if score >= healthy:
|
|
309
|
+
label = "Healthy"
|
|
310
|
+
elif score >= needs_work:
|
|
311
|
+
label = "Needs work"
|
|
312
|
+
else:
|
|
313
|
+
label = "Critical"
|
|
314
|
+
|
|
315
|
+
return score, label
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def redistribute_weights(
|
|
319
|
+
configured_weights: dict[str, int],
|
|
320
|
+
available_categories: set[Category],
|
|
321
|
+
) -> dict[Category, int]:
|
|
322
|
+
"""Redistribute weights from unavailable categories.
|
|
323
|
+
|
|
324
|
+
When a category has no analyzer, its weight is proportionally
|
|
325
|
+
distributed to the remaining categories so they still sum to 100.
|
|
326
|
+
"""
|
|
327
|
+
key_to_cat = {
|
|
328
|
+
"quality": Category.QUALITY,
|
|
329
|
+
"types": Category.TYPE_SAFETY,
|
|
330
|
+
"security": Category.SECURITY,
|
|
331
|
+
"complexity": Category.COMPLEXITY,
|
|
332
|
+
"dead_code": Category.DEAD_CODE,
|
|
333
|
+
"dependencies": Category.DEPENDENCIES,
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
# Filter to available categories
|
|
337
|
+
active: dict[Category, int] = {}
|
|
338
|
+
for key, w in configured_weights.items():
|
|
339
|
+
cat = key_to_cat.get(key)
|
|
340
|
+
if cat and cat in available_categories:
|
|
341
|
+
active[cat] = w
|
|
342
|
+
|
|
343
|
+
if not active:
|
|
344
|
+
return {}
|
|
345
|
+
|
|
346
|
+
total = sum(active.values())
|
|
347
|
+
if total == 0:
|
|
348
|
+
return active
|
|
349
|
+
|
|
350
|
+
# Proportional redistribution
|
|
351
|
+
scale = 100.0 / total
|
|
352
|
+
result = {cat: round(w * scale) for cat, w in active.items()}
|
|
353
|
+
|
|
354
|
+
# Fix rounding errors so weights sum to exactly 100
|
|
355
|
+
diff = 100 - sum(result.values())
|
|
356
|
+
if diff != 0:
|
|
357
|
+
largest = max(result, key=lambda c: result[c])
|
|
358
|
+
result[largest] += diff
|
|
359
|
+
|
|
360
|
+
return result
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def _categories_from_analyzers(
|
|
364
|
+
analyzers_used: list[str],
|
|
365
|
+
) -> set[Category]:
|
|
366
|
+
mapping: dict[str, set[Category]] = {
|
|
367
|
+
"ruff": {
|
|
368
|
+
Category.QUALITY,
|
|
369
|
+
Category.SECURITY,
|
|
370
|
+
Category.COMPLEXITY,
|
|
371
|
+
},
|
|
372
|
+
"mypy": {Category.TYPE_SAFETY},
|
|
373
|
+
"bandit": {Category.SECURITY},
|
|
374
|
+
"radon": {Category.COMPLEXITY},
|
|
375
|
+
"vulture": {Category.DEAD_CODE},
|
|
376
|
+
"deptry": {Category.DEPENDENCIES},
|
|
377
|
+
"dependency-vulns": {Category.DEPENDENCIES},
|
|
378
|
+
"detect-secrets": {Category.SECURITY},
|
|
379
|
+
"basedpyright": {Category.TYPE_SAFETY},
|
|
380
|
+
"typos": {Category.QUALITY},
|
|
381
|
+
}
|
|
382
|
+
categories: set[Category] = set()
|
|
383
|
+
for name in analyzers_used:
|
|
384
|
+
categories.update(mapping.get(name, set()))
|
|
385
|
+
return categories
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _build_details(errors: int, warnings: int, total: int) -> str:
|
|
389
|
+
parts = []
|
|
390
|
+
if errors:
|
|
391
|
+
parts.append(f"{errors} error{'s' if errors != 1 else ''}")
|
|
392
|
+
if warnings:
|
|
393
|
+
parts.append(f"{warnings} warning{'s' if warnings != 1 else ''}")
|
|
394
|
+
infos = total - errors - warnings
|
|
395
|
+
if infos > 0:
|
|
396
|
+
parts.append(f"{infos} info")
|
|
397
|
+
return ", ".join(parts) if parts else "No issues"
|