preflight-scan 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- preflight/__init__.py +13 -0
- preflight/__main__.py +3 -0
- preflight/checks.py +764 -0
- preflight/cli.py +102 -0
- preflight/report.py +79 -0
- preflight_scan-0.1.0.dist-info/METADATA +116 -0
- preflight_scan-0.1.0.dist-info/RECORD +11 -0
- preflight_scan-0.1.0.dist-info/WHEEL +5 -0
- preflight_scan-0.1.0.dist-info/entry_points.txt +2 -0
- preflight_scan-0.1.0.dist-info/licenses/LICENSE +21 -0
- preflight_scan-0.1.0.dist-info/top_level.txt +1 -0
preflight/__init__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""preflight — is your vibe-coded project ready to ship?
|
|
2
|
+
|
|
3
|
+
A zero-dependency static scanner that reads a codebase the way a senior
|
|
4
|
+
engineer skims it on day one: where's the dead weight, what's duplicated,
|
|
5
|
+
what's going to break in production, and what's missing before this can be
|
|
6
|
+
deployed as a real product.
|
|
7
|
+
|
|
8
|
+
Runs entirely offline. No LLM calls. Nothing leaves your machine.
|
|
9
|
+
"""
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
|
|
12
|
+
from .checks import scan_project, Finding, ScanResult # noqa: F401
|
|
13
|
+
from .report import render_markdown, render_terminal # noqa: F401
|
preflight/__main__.py
ADDED
preflight/checks.py
ADDED
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
"""All analyzers. Each check reads the shared FileIndex and yields Findings.
|
|
2
|
+
|
|
3
|
+
Design rules (the same ones the report enforces on your code):
|
|
4
|
+
- stdlib only, no network, no LLM;
|
|
5
|
+
- conservative by default — a finding should be worth a human's time;
|
|
6
|
+
- every finding carries concrete advice, not just a complaint.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import ast
|
|
11
|
+
import hashlib
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
|
|
17
|
+
# Directories that are never the user's source code.
|
|
18
|
+
SKIP_DIRS = {
|
|
19
|
+
".git", ".hg", ".svn", "__pycache__", ".pytest_cache", ".mypy_cache",
|
|
20
|
+
".ruff_cache", ".tox", ".venv", "venv", "env", "node_modules", "dist",
|
|
21
|
+
"build", ".next", ".nuxt", "coverage", ".idea", ".vscode", ".eggs",
|
|
22
|
+
"site-packages", ".cache", "htmlcov",
|
|
23
|
+
}
|
|
24
|
+
TEXT_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".json",
|
|
25
|
+
".md", ".txt", ".toml", ".cfg", ".ini", ".yml", ".yaml", ".env",
|
|
26
|
+
".sh", ".bat", ".html", ".css"}
|
|
27
|
+
CODE_EXTS = {".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
|
|
28
|
+
MAX_TEXT_BYTES = 1_500_000
|
|
29
|
+
|
|
30
|
+
SEVERITY_ORDER = {"high": 0, "medium": 1, "low": 2, "info": 3}
|
|
31
|
+
SEVERITY_PENALTY = {"high": 10, "medium": 4, "low": 1, "info": 0}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class Finding:
|
|
36
|
+
check: str
|
|
37
|
+
severity: str # high | medium | low | info
|
|
38
|
+
title: str
|
|
39
|
+
detail: str
|
|
40
|
+
advice: str
|
|
41
|
+
paths: list = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
def to_dict(self) -> dict:
|
|
44
|
+
return {"check": self.check, "severity": self.severity,
|
|
45
|
+
"title": self.title, "detail": self.detail,
|
|
46
|
+
"advice": self.advice, "paths": list(self.paths)}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ScanResult:
|
|
51
|
+
root: str
|
|
52
|
+
findings: list
|
|
53
|
+
files_scanned: int
|
|
54
|
+
code_files: int
|
|
55
|
+
total_lines: int
|
|
56
|
+
score: int
|
|
57
|
+
grade: str
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> dict:
|
|
60
|
+
return {"root": self.root, "files_scanned": self.files_scanned,
|
|
61
|
+
"code_files": self.code_files, "total_lines": self.total_lines,
|
|
62
|
+
"score": self.score, "grade": self.grade,
|
|
63
|
+
"findings": [f.to_dict() for f in self.findings]}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# ── File index ────────────────────────────────────────────────────────────────
|
|
67
|
+
|
|
68
|
+
class FileIndex:
|
|
69
|
+
"""One walk of the tree; everything else reads from here."""
|
|
70
|
+
|
|
71
|
+
def __init__(self, root: str):
|
|
72
|
+
self.root = os.path.abspath(root)
|
|
73
|
+
self.files: list = [] # rel paths, all files
|
|
74
|
+
self.text: dict = {} # rel path -> str content (text files)
|
|
75
|
+
self.py_ast: dict = {} # rel path -> ast.Module (parse ok)
|
|
76
|
+
self.py_bad: list = [] # rel paths that fail to parse
|
|
77
|
+
self.total_lines = 0
|
|
78
|
+
self._walk()
|
|
79
|
+
|
|
80
|
+
def _walk(self) -> None:
|
|
81
|
+
for dirpath, dirnames, filenames in os.walk(self.root):
|
|
82
|
+
dirnames[:] = [d for d in sorted(dirnames) if d not in SKIP_DIRS]
|
|
83
|
+
for name in sorted(filenames):
|
|
84
|
+
full = os.path.join(dirpath, name)
|
|
85
|
+
rel = os.path.relpath(full, self.root).replace(os.sep, "/")
|
|
86
|
+
self.files.append(rel)
|
|
87
|
+
ext = os.path.splitext(name)[1].lower()
|
|
88
|
+
if ext not in TEXT_EXTS:
|
|
89
|
+
continue
|
|
90
|
+
try:
|
|
91
|
+
if os.path.getsize(full) > MAX_TEXT_BYTES:
|
|
92
|
+
continue
|
|
93
|
+
with open(full, encoding="utf-8", errors="replace") as fh:
|
|
94
|
+
content = fh.read()
|
|
95
|
+
except OSError:
|
|
96
|
+
continue
|
|
97
|
+
self.text[rel] = content
|
|
98
|
+
if ext in CODE_EXTS:
|
|
99
|
+
self.total_lines += content.count("\n") + 1
|
|
100
|
+
if ext == ".py":
|
|
101
|
+
try:
|
|
102
|
+
self.py_ast[rel] = ast.parse(content)
|
|
103
|
+
except SyntaxError:
|
|
104
|
+
self.py_bad.append(rel)
|
|
105
|
+
|
|
106
|
+
def code_paths(self) -> list:
|
|
107
|
+
return [p for p in self.text
|
|
108
|
+
if os.path.splitext(p)[1].lower() in CODE_EXTS]
|
|
109
|
+
|
|
110
|
+
def is_testish(self, rel: str) -> bool:
|
|
111
|
+
low = rel.lower()
|
|
112
|
+
return ("test" in low.split("/")[-1] or low.startswith("tests/")
|
|
113
|
+
or "/tests/" in low or "conftest" in low)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# ── Python import graph (dead modules + cycles) ───────────────────────────────
|
|
117
|
+
|
|
118
|
+
def _py_module_name(rel: str) -> str:
|
|
119
|
+
return rel[:-3].replace("/", ".").removesuffix(".__init__")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _build_import_graph(idx: FileIndex, top_level_only: bool = False) -> dict:
|
|
123
|
+
"""module name -> set of imported module names (only project-local).
|
|
124
|
+
|
|
125
|
+
top_level_only=True ignores imports inside functions/methods — those are
|
|
126
|
+
the *standard fix* for circular imports, so the cycle check must not
|
|
127
|
+
count them. The dead-module check uses the full graph (a lazy import
|
|
128
|
+
still keeps a module alive)."""
|
|
129
|
+
local = {_py_module_name(p): p for p in idx.py_ast}
|
|
130
|
+
graph: dict = {m: set() for m in local}
|
|
131
|
+
for rel, tree in idx.py_ast.items():
|
|
132
|
+
me = _py_module_name(rel)
|
|
133
|
+
# for __init__.py the module name IS the package, so relative imports
|
|
134
|
+
# resolve against it directly; for plain modules, against the parent.
|
|
135
|
+
pkg_parts = me.split(".") if rel.endswith("__init__.py") \
|
|
136
|
+
else me.split(".")[:-1]
|
|
137
|
+
if top_level_only:
|
|
138
|
+
nodes = []
|
|
139
|
+
stack = list(ast.iter_child_nodes(tree))
|
|
140
|
+
while stack:
|
|
141
|
+
n = stack.pop()
|
|
142
|
+
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
143
|
+
continue
|
|
144
|
+
nodes.append(n)
|
|
145
|
+
stack.extend(ast.iter_child_nodes(n))
|
|
146
|
+
else:
|
|
147
|
+
nodes = list(ast.walk(tree))
|
|
148
|
+
for node in nodes:
|
|
149
|
+
targets = []
|
|
150
|
+
if isinstance(node, ast.Import):
|
|
151
|
+
targets = [a.name for a in node.names]
|
|
152
|
+
elif isinstance(node, ast.ImportFrom):
|
|
153
|
+
if node.level: # relative import
|
|
154
|
+
base = pkg_parts[: len(pkg_parts) - node.level + 1]
|
|
155
|
+
mod = ".".join(base + ([node.module] if node.module else []))
|
|
156
|
+
targets = [mod] + [f"{mod}.{a.name}" for a in node.names]
|
|
157
|
+
elif node.module:
|
|
158
|
+
targets = [node.module] + \
|
|
159
|
+
[f"{node.module}.{a.name}" for a in node.names]
|
|
160
|
+
for t in targets:
|
|
161
|
+
while t:
|
|
162
|
+
if t in local and t != me:
|
|
163
|
+
graph[me].add(t)
|
|
164
|
+
break
|
|
165
|
+
t = t.rpartition(".")[0]
|
|
166
|
+
return graph
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
_ENTRYISH = {"__init__", "__main__", "cli", "main", "app", "setup",
|
|
170
|
+
"conftest", "manage", "wsgi", "asgi", "run", "server"}
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def check_dead_modules(idx: FileIndex) -> list:
|
|
174
|
+
graph = _build_import_graph(idx)
|
|
175
|
+
imported = set()
|
|
176
|
+
for deps in graph.values():
|
|
177
|
+
imported |= deps
|
|
178
|
+
# entry points declared in pyproject count as roots
|
|
179
|
+
entry_strings = ""
|
|
180
|
+
for rel in ("pyproject.toml", "setup.py", "setup.cfg", "package.json"):
|
|
181
|
+
entry_strings += idx.text.get(rel, "")
|
|
182
|
+
dead = []
|
|
183
|
+
for mod, rel in sorted({_py_module_name(p): p for p in idx.py_ast}.items()):
|
|
184
|
+
base = mod.split(".")[-1]
|
|
185
|
+
if mod in imported or base in _ENTRYISH:
|
|
186
|
+
continue
|
|
187
|
+
if idx.is_testish(rel) or "/" not in rel: # repo-root scripts: skip
|
|
188
|
+
continue
|
|
189
|
+
if "examples/" in rel or "scripts/" in rel or "tools/" in rel:
|
|
190
|
+
continue
|
|
191
|
+
if base in entry_strings or mod in entry_strings:
|
|
192
|
+
continue
|
|
193
|
+
dead.append(rel)
|
|
194
|
+
if not dead:
|
|
195
|
+
return []
|
|
196
|
+
return [Finding(
|
|
197
|
+
check="dead_modules", severity="high",
|
|
198
|
+
title=f"{len(dead)} module(s) appear unreachable",
|
|
199
|
+
detail="No other module imports these, they are not entry points, "
|
|
200
|
+
"tests, or examples. Dead modules confuse contributors, hide "
|
|
201
|
+
"in security audits, and rot silently.",
|
|
202
|
+
advice="Verify with your test suite, then delete them (keep them in "
|
|
203
|
+
"git history or an attic/ folder outside the package).",
|
|
204
|
+
paths=dead)]
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def check_import_cycles(idx: FileIndex) -> list:
|
|
208
|
+
graph = _build_import_graph(idx, top_level_only=True)
|
|
209
|
+
# iterative DFS strongly-connected detection (small graphs: simple approach)
|
|
210
|
+
cycles, seen = [], set()
|
|
211
|
+
for start in graph:
|
|
212
|
+
if start in seen:
|
|
213
|
+
continue
|
|
214
|
+
stack, path, on_path = [(start, iter(graph[start]))], [start], {start}
|
|
215
|
+
while stack:
|
|
216
|
+
node, it = stack[-1]
|
|
217
|
+
advanced = False
|
|
218
|
+
for nxt in it:
|
|
219
|
+
if nxt in on_path:
|
|
220
|
+
cyc = path[path.index(nxt):] + [nxt]
|
|
221
|
+
key = frozenset(cyc)
|
|
222
|
+
if key not in {frozenset(c) for c in cycles}:
|
|
223
|
+
cycles.append(cyc)
|
|
224
|
+
elif nxt not in seen:
|
|
225
|
+
stack.append((nxt, iter(graph[nxt])))
|
|
226
|
+
path.append(nxt)
|
|
227
|
+
on_path.add(nxt)
|
|
228
|
+
advanced = True
|
|
229
|
+
break
|
|
230
|
+
if not advanced:
|
|
231
|
+
seen.add(node)
|
|
232
|
+
stack.pop()
|
|
233
|
+
path.pop()
|
|
234
|
+
on_path.discard(node)
|
|
235
|
+
if not cycles:
|
|
236
|
+
return []
|
|
237
|
+
shown = [" -> ".join(c) for c in cycles[:5]]
|
|
238
|
+
return [Finding(
|
|
239
|
+
check="import_cycles", severity="medium",
|
|
240
|
+
title=f"{len(cycles)} circular import chain(s)",
|
|
241
|
+
detail="Cycles: " + "; ".join(shown) +
|
|
242
|
+
(" (more not shown)" if len(cycles) > 5 else ""),
|
|
243
|
+
advice="Break each cycle by moving the shared piece into its own "
|
|
244
|
+
"module, or defer one import into the function that needs it.")]
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# ── Duplication ───────────────────────────────────────────────────────────────
|
|
248
|
+
|
|
249
|
+
def _normalized_lines(content: str, ext: str) -> list:
|
|
250
|
+
out = []
|
|
251
|
+
for line in content.splitlines():
|
|
252
|
+
s = line.strip()
|
|
253
|
+
if not s:
|
|
254
|
+
continue
|
|
255
|
+
if ext == ".py" and s.startswith("#"):
|
|
256
|
+
continue
|
|
257
|
+
if ext != ".py" and (s.startswith("//") or s.startswith("*")):
|
|
258
|
+
continue
|
|
259
|
+
out.append(s)
|
|
260
|
+
return out
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
WINDOW = 8
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def check_duplication(idx: FileIndex) -> list:
|
|
267
|
+
occurrences: dict = {}
|
|
268
|
+
for rel in idx.code_paths():
|
|
269
|
+
ext = os.path.splitext(rel)[1].lower()
|
|
270
|
+
lines = _normalized_lines(idx.text[rel], ext)
|
|
271
|
+
for i in range(len(lines) - WINDOW + 1):
|
|
272
|
+
h = hashlib.md5("\n".join(lines[i:i + WINDOW]).encode()).hexdigest()
|
|
273
|
+
occurrences.setdefault(h, []).append(rel)
|
|
274
|
+
pair_hits: dict = {}
|
|
275
|
+
for paths in occurrences.values():
|
|
276
|
+
uniq = sorted(set(paths))
|
|
277
|
+
if len(uniq) >= 2:
|
|
278
|
+
for a in range(len(uniq)):
|
|
279
|
+
for b in range(a + 1, len(uniq)):
|
|
280
|
+
pair_hits[(uniq[a], uniq[b])] = \
|
|
281
|
+
pair_hits.get((uniq[a], uniq[b]), 0) + 1
|
|
282
|
+
elif len(paths) >= 3: # heavy self-duplication
|
|
283
|
+
pair_hits[(uniq[0], uniq[0])] = \
|
|
284
|
+
pair_hits.get((uniq[0], uniq[0]), 0) + 1
|
|
285
|
+
findings = []
|
|
286
|
+
serious = {k: v for k, v in pair_hits.items() if v >= 3}
|
|
287
|
+
if serious:
|
|
288
|
+
top = sorted(serious.items(), key=lambda kv: -kv[1])[:10]
|
|
289
|
+
lines = [(f"{a} <-> {b}" if a != b else f"{a} (within itself)") +
|
|
290
|
+
f": ~{n * WINDOW}+ duplicated lines" for (a, b), n in top]
|
|
291
|
+
findings.append(Finding(
|
|
292
|
+
check="duplication", severity="medium",
|
|
293
|
+
title=f"{len(serious)} file pair(s) share substantial copy-paste",
|
|
294
|
+
detail="\n".join(lines),
|
|
295
|
+
advice="Extract the shared logic into one function/module and "
|
|
296
|
+
"import it. Copy-paste forks silently: the next bug fix "
|
|
297
|
+
"will land in one copy and not the other.",
|
|
298
|
+
paths=sorted({p for pair in serious for p in pair})))
|
|
299
|
+
# byte-identical files
|
|
300
|
+
by_hash: dict = {}
|
|
301
|
+
for rel in idx.code_paths():
|
|
302
|
+
by_hash.setdefault(
|
|
303
|
+
hashlib.md5(idx.text[rel].encode()).hexdigest(), []).append(rel)
|
|
304
|
+
twins = [sorted(v) for v in by_hash.values() if len(v) > 1]
|
|
305
|
+
if twins:
|
|
306
|
+
findings.append(Finding(
|
|
307
|
+
check="identical_files", severity="medium",
|
|
308
|
+
title=f"{len(twins)} set(s) of byte-identical code files",
|
|
309
|
+
detail="; ".join(" == ".join(t) for t in twins[:5]),
|
|
310
|
+
advice="Keep one canonical copy; delete or symlink the rest.",
|
|
311
|
+
paths=[p for t in twins for p in t]))
|
|
312
|
+
return findings
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ── Junk, stale artifacts, vibe-residue filenames ────────────────────────────
|
|
316
|
+
|
|
317
|
+
_JUNK_BASENAMES = {".DS_Store", "Thumbs.db", "desktop.ini"}
|
|
318
|
+
_JUNK_EXTS = {".pyc", ".pyo", ".swp", ".swo", ".orig", ".rej", ".tmp", ".bak"}
|
|
319
|
+
_VIBE_NAME_RE = re.compile(
|
|
320
|
+
r"(copy ?\d*|final|final_v\d+|v\d+_final|untitled\d*|new\d+|test123|"
|
|
321
|
+
r"asdf|temp|old|backup|deleteme|donotuse)", re.IGNORECASE)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def check_junk_files(idx: FileIndex) -> list:
|
|
325
|
+
junk, vibey = [], []
|
|
326
|
+
for rel in idx.files:
|
|
327
|
+
base = os.path.basename(rel)
|
|
328
|
+
stem, ext = os.path.splitext(base)
|
|
329
|
+
if base in _JUNK_BASENAMES or ext.lower() in _JUNK_EXTS:
|
|
330
|
+
junk.append(rel)
|
|
331
|
+
elif (ext.lower() in CODE_EXTS and _VIBE_NAME_RE.fullmatch(stem)):
|
|
332
|
+
vibey.append(rel)
|
|
333
|
+
out = []
|
|
334
|
+
if junk:
|
|
335
|
+
out.append(Finding(
|
|
336
|
+
check="junk_files", severity="low",
|
|
337
|
+
title=f"{len(junk)} junk/OS artifact file(s) in the tree",
|
|
338
|
+
detail=", ".join(junk[:15]) + (" …" if len(junk) > 15 else ""),
|
|
339
|
+
advice="Delete them and add the patterns to .gitignore.",
|
|
340
|
+
paths=junk))
|
|
341
|
+
if vibey:
|
|
342
|
+
out.append(Finding(
|
|
343
|
+
check="leftover_names", severity="medium",
|
|
344
|
+
title=f"{len(vibey)} file(s) with leftover working names",
|
|
345
|
+
detail=", ".join(vibey[:15]),
|
|
346
|
+
advice="'final_v2.py' and friends are how the wrong file gets "
|
|
347
|
+
"deployed. Rename to what the file actually does, or "
|
|
348
|
+
"delete it.",
|
|
349
|
+
paths=vibey))
|
|
350
|
+
return out
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# ── Oversized units / deep nesting (Python AST; line counts for JS) ──────────
|
|
354
|
+
|
|
355
|
+
def check_oversized(idx: FileIndex) -> list:
|
|
356
|
+
big_files, big_funcs, deep = [], [], []
|
|
357
|
+
for rel in idx.code_paths():
|
|
358
|
+
n = idx.text[rel].count("\n") + 1
|
|
359
|
+
if n > 700 and not idx.is_testish(rel):
|
|
360
|
+
big_files.append(f"{rel} ({n} lines)")
|
|
361
|
+
for rel, tree in idx.py_ast.items():
|
|
362
|
+
if idx.is_testish(rel):
|
|
363
|
+
continue
|
|
364
|
+
for node in ast.walk(tree):
|
|
365
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
366
|
+
span = (getattr(node, "end_lineno", node.lineno) - node.lineno)
|
|
367
|
+
if span > 80:
|
|
368
|
+
big_funcs.append(f"{rel}:{node.lineno} "
|
|
369
|
+
f"{node.name}() ({span} lines)")
|
|
370
|
+
depth = _max_nesting(tree)
|
|
371
|
+
if depth > 5:
|
|
372
|
+
deep.append(f"{rel} (nesting depth {depth})")
|
|
373
|
+
out = []
|
|
374
|
+
if big_files:
|
|
375
|
+
out.append(Finding(
|
|
376
|
+
check="oversized_files", severity="low",
|
|
377
|
+
title=f"{len(big_files)} very large source file(s)",
|
|
378
|
+
detail=", ".join(big_files[:10]),
|
|
379
|
+
advice="Split by responsibility. A 1,000-line file is where "
|
|
380
|
+
"duplicate logic and dead code go to hide.",
|
|
381
|
+
paths=[b.split(" ")[0] for b in big_files]))
|
|
382
|
+
if big_funcs:
|
|
383
|
+
out.append(Finding(
|
|
384
|
+
check="oversized_functions", severity="medium",
|
|
385
|
+
title=f"{len(big_funcs)} function(s) over 80 lines",
|
|
386
|
+
detail="\n".join(big_funcs[:10]) +
|
|
387
|
+
("\n…" if len(big_funcs) > 10 else ""),
|
|
388
|
+
advice="Extract the distinct phases into named helpers — "
|
|
389
|
+
"long functions are untestable and unreviewable.",
|
|
390
|
+
paths=[b.split(":")[0] for b in big_funcs]))
|
|
391
|
+
if deep:
|
|
392
|
+
out.append(Finding(
|
|
393
|
+
check="deep_nesting", severity="low",
|
|
394
|
+
title=f"{len(deep)} file(s) with deeply nested control flow",
|
|
395
|
+
detail=", ".join(deep[:10]),
|
|
396
|
+
advice="Invert conditions and return early; extract inner loops.",
|
|
397
|
+
paths=[d.split(" ")[0] for d in deep]))
|
|
398
|
+
return out
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _max_nesting(tree: ast.AST) -> int:
|
|
402
|
+
worst = 0
|
|
403
|
+
|
|
404
|
+
def walk(node, depth):
|
|
405
|
+
nonlocal worst
|
|
406
|
+
for child in ast.iter_child_nodes(node):
|
|
407
|
+
d = depth + isinstance(child, (ast.If, ast.For, ast.While,
|
|
408
|
+
ast.With, ast.Try))
|
|
409
|
+
worst = max(worst, d)
|
|
410
|
+
walk(child, d)
|
|
411
|
+
walk(tree, 0)
|
|
412
|
+
return worst
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# ── Error-handling slop + debug residue (Python AST / JS regex) ──────────────
|
|
416
|
+
|
|
417
|
+
def check_error_handling(idx: FileIndex) -> list:
|
|
418
|
+
bare, swallowed = [], []
|
|
419
|
+
for rel, tree in idx.py_ast.items():
|
|
420
|
+
if idx.is_testish(rel):
|
|
421
|
+
continue
|
|
422
|
+
for node in ast.walk(tree):
|
|
423
|
+
if isinstance(node, ast.ExceptHandler):
|
|
424
|
+
is_bare = node.type is None
|
|
425
|
+
body_is_pass = (len(node.body) == 1
|
|
426
|
+
and isinstance(node.body[0], ast.Pass))
|
|
427
|
+
broad = (isinstance(node.type, ast.Name)
|
|
428
|
+
and node.type.id in ("Exception", "BaseException"))
|
|
429
|
+
if is_bare:
|
|
430
|
+
bare.append(f"{rel}:{node.lineno}")
|
|
431
|
+
elif broad and body_is_pass:
|
|
432
|
+
swallowed.append(f"{rel}:{node.lineno}")
|
|
433
|
+
out = []
|
|
434
|
+
if bare:
|
|
435
|
+
out.append(Finding(
|
|
436
|
+
check="bare_except", severity="high",
|
|
437
|
+
title=f"{len(bare)} bare `except:` clause(s)",
|
|
438
|
+
detail=", ".join(bare[:10]),
|
|
439
|
+
advice="Catch the specific exception you expect. Bare except "
|
|
440
|
+
"swallows KeyboardInterrupt, SystemExit, and every bug "
|
|
441
|
+
"you'll ever need to see.",
|
|
442
|
+
paths=[b.split(":")[0] for b in bare]))
|
|
443
|
+
if swallowed:
|
|
444
|
+
out.append(Finding(
|
|
445
|
+
check="swallowed_exceptions", severity="medium",
|
|
446
|
+
title=f"{len(swallowed)} `except Exception: pass` block(s)",
|
|
447
|
+
detail=", ".join(swallowed[:10]),
|
|
448
|
+
advice="At minimum log the exception. Silent failure in "
|
|
449
|
+
"production is the most expensive bug class there is.",
|
|
450
|
+
paths=[s.split(":")[0] for s in swallowed]))
|
|
451
|
+
return out
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
_CONSOLE_RE = re.compile(r"^\s*console\.log\(", re.MULTILINE)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def check_debug_residue(idx: FileIndex) -> list:
|
|
458
|
+
js_hits = []
|
|
459
|
+
for rel in idx.code_paths():
|
|
460
|
+
ext = os.path.splitext(rel)[1].lower()
|
|
461
|
+
if ext in (".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs") \
|
|
462
|
+
and not idx.is_testish(rel):
|
|
463
|
+
n = len(_CONSOLE_RE.findall(idx.text[rel]))
|
|
464
|
+
if n >= 5:
|
|
465
|
+
js_hits.append(f"{rel} ({n}x console.log)")
|
|
466
|
+
if not js_hits:
|
|
467
|
+
return []
|
|
468
|
+
return [Finding(
|
|
469
|
+
check="debug_residue", severity="low",
|
|
470
|
+
title=f"{len(js_hits)} file(s) full of console.log debugging",
|
|
471
|
+
detail=", ".join(js_hits[:10]),
|
|
472
|
+
advice="Replace with a real logger (or remove). Shipping debug spew "
|
|
473
|
+
"leaks internals and drowns real errors.",
|
|
474
|
+
paths=[h.split(" ")[0] for h in js_hits])]
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
# ── Secrets ───────────────────────────────────────────────────────────────────
|
|
478
|
+
|
|
479
|
+
_SECRET_PATTERNS = [
|
|
480
|
+
("OpenAI/Anthropic-style key", re.compile(r"\bsk-(?:ant-)?[A-Za-z0-9_-]{20,}")),
|
|
481
|
+
("AWS access key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
|
|
482
|
+
("GitHub token", re.compile(r"\bgh[pousr]_[A-Za-z0-9]{30,}\b")),
|
|
483
|
+
("Slack token", re.compile(r"\bxox[baprs]-[A-Za-z0-9-]{10,}")),
|
|
484
|
+
("Google API key", re.compile(r"\bAIza[0-9A-Za-z_-]{35}\b")),
|
|
485
|
+
("Private key block", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
|
|
486
|
+
]
|
|
487
|
+
_PLACEHOLDER_RE = re.compile(
|
|
488
|
+
r"(your|example|placeholder|changeme|dummy|xxx+|<.*>|\.\.\.)", re.IGNORECASE)
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def check_secrets(idx: FileIndex) -> list:
|
|
492
|
+
hits = []
|
|
493
|
+
for rel, content in idx.text.items():
|
|
494
|
+
lines = content.splitlines()
|
|
495
|
+
for label, pat in _SECRET_PATTERNS:
|
|
496
|
+
for m in pat.finditer(content):
|
|
497
|
+
window = content[max(0, m.start() - 40): m.end() + 10]
|
|
498
|
+
if _PLACEHOLDER_RE.search(window):
|
|
499
|
+
continue
|
|
500
|
+
line = content.count("\n", 0, m.start()) + 1
|
|
501
|
+
# inline suppression for known-fake fixtures:
|
|
502
|
+
# KEY = "sk-..." # preflight:ignore
|
|
503
|
+
src_line = lines[line - 1] if line <= len(lines) else ""
|
|
504
|
+
if "preflight:ignore" in src_line:
|
|
505
|
+
continue
|
|
506
|
+
hits.append((rel, line, label))
|
|
507
|
+
if not hits:
|
|
508
|
+
return []
|
|
509
|
+
return [Finding(
|
|
510
|
+
check="secrets", severity="high",
|
|
511
|
+
title=f"{len(hits)} possible hardcoded secret(s)",
|
|
512
|
+
detail="\n".join(f"{r}:{ln} — {lbl}" for r, ln, lbl in hits[:10]),
|
|
513
|
+
advice="Move secrets to environment variables NOW, rotate any real "
|
|
514
|
+
"key that was committed (git history remembers), and add the "
|
|
515
|
+
"files to .gitignore.",
|
|
516
|
+
paths=sorted({r for r, _, _ in hits}))]
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
# ── Dependency hygiene (Python) ───────────────────────────────────────────────
|
|
520
|
+
|
|
521
|
+
_IMPORT_TO_DIST = {
|
|
522
|
+
"pil": "pillow", "cv2": "opencv-python", "sklearn": "scikit-learn",
|
|
523
|
+
"yaml": "pyyaml", "dotenv": "python-dotenv", "bs4": "beautifulsoup4",
|
|
524
|
+
"attr": "attrs", "dateutil": "python-dateutil", "jose": "python-jose",
|
|
525
|
+
"opentelemetry": "opentelemetry-api", "google": "google-api-core",
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def _declared_deps(idx: FileIndex) -> set:
|
|
530
|
+
"""Every package name declared anywhere in pyproject (core deps AND
|
|
531
|
+
optional extras — an extra is still a declaration) or requirements files.
|
|
532
|
+
Deliberately permissive: this set only ever *suppresses* findings."""
|
|
533
|
+
declared = set()
|
|
534
|
+
py = idx.text.get("pyproject.toml", "")
|
|
535
|
+
for m in re.finditer(r'"([A-Za-z][A-Za-z0-9_.-]*)\s*(?:[><=~!^\[ ]|")', py):
|
|
536
|
+
name = m.group(1)
|
|
537
|
+
if ":" not in name and "/" not in name:
|
|
538
|
+
declared.add(name.lower().replace("-", "_"))
|
|
539
|
+
for req in ("requirements.txt", "requirements-dev.txt", "requirements.in"):
|
|
540
|
+
for raw in idx.text.get(req, "").splitlines():
|
|
541
|
+
line = raw.split("#")[0].strip()
|
|
542
|
+
m = re.match(r"([A-Za-z0-9_.-]+)", line)
|
|
543
|
+
if m:
|
|
544
|
+
declared.add(m.group(1).lower().replace("-", "_"))
|
|
545
|
+
return declared
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def check_dependencies(idx: FileIndex) -> list:
|
|
549
|
+
if not idx.py_ast:
|
|
550
|
+
return []
|
|
551
|
+
has_manifest = any(p in idx.text for p in
|
|
552
|
+
("pyproject.toml", "requirements.txt", "setup.py"))
|
|
553
|
+
local_tops = {p.split("/")[0] for p in idx.py_ast} | \
|
|
554
|
+
{_py_module_name(p).split(".")[0] for p in idx.py_ast}
|
|
555
|
+
stdlib = getattr(sys, "stdlib_module_names", set())
|
|
556
|
+
used = set()
|
|
557
|
+
for rel, tree in idx.py_ast.items():
|
|
558
|
+
# tests, examples, tools, and packaging scripts have their own dep
|
|
559
|
+
# conventions (pytest, setuptools, demo-only libs) — library code is
|
|
560
|
+
# what install-time declarations must cover.
|
|
561
|
+
if (idx.is_testish(rel) or rel.startswith(("examples/", "tools/",
|
|
562
|
+
"scripts/", "docs/"))
|
|
563
|
+
or rel in ("setup.py", "conftest.py")):
|
|
564
|
+
continue
|
|
565
|
+
# imports wrapped in try/except are the deliberate optional-dependency
|
|
566
|
+
# pattern — they must not count as hard requirements.
|
|
567
|
+
guarded: list = []
|
|
568
|
+
for node in ast.walk(tree):
|
|
569
|
+
if isinstance(node, ast.Try) and node.handlers and node.body:
|
|
570
|
+
start = node.body[0].lineno
|
|
571
|
+
end = getattr(node.body[-1], "end_lineno", start)
|
|
572
|
+
guarded.append((start, end))
|
|
573
|
+
|
|
574
|
+
def _is_guarded(n):
|
|
575
|
+
ln = getattr(n, "lineno", None)
|
|
576
|
+
return ln is not None and any(s <= ln <= e for s, e in guarded)
|
|
577
|
+
|
|
578
|
+
for node in ast.walk(tree):
|
|
579
|
+
if _is_guarded(node):
|
|
580
|
+
continue
|
|
581
|
+
if isinstance(node, ast.Import):
|
|
582
|
+
used |= {a.name.split(".")[0] for a in node.names}
|
|
583
|
+
elif isinstance(node, ast.ImportFrom) and node.module \
|
|
584
|
+
and not node.level:
|
|
585
|
+
used.add(node.module.split(".")[0])
|
|
586
|
+
third_party = {u for u in used
|
|
587
|
+
if u not in stdlib and u not in local_tops and u != "__future__"}
|
|
588
|
+
if not third_party:
|
|
589
|
+
return []
|
|
590
|
+
declared = _declared_deps(idx)
|
|
591
|
+
undeclared = sorted(
|
|
592
|
+
u for u in third_party
|
|
593
|
+
if u.lower() not in declared
|
|
594
|
+
and _IMPORT_TO_DIST.get(u.lower(), u.lower()).replace("-", "_")
|
|
595
|
+
not in declared)
|
|
596
|
+
out = []
|
|
597
|
+
if undeclared and has_manifest:
|
|
598
|
+
out.append(Finding(
|
|
599
|
+
check="undeclared_deps", severity="high",
|
|
600
|
+
title=f"{len(undeclared)} imported package(s) not declared",
|
|
601
|
+
detail="Imported but missing from pyproject/requirements: "
|
|
602
|
+
+ ", ".join(undeclared),
|
|
603
|
+
advice="Declare them with versions. 'Works on my machine' is "
|
|
604
|
+
"exactly this bug.",
|
|
605
|
+
paths=[]))
|
|
606
|
+
elif third_party and not has_manifest:
|
|
607
|
+
out.append(Finding(
|
|
608
|
+
check="no_dependency_manifest", severity="high",
|
|
609
|
+
title="Third-party imports but no dependency manifest",
|
|
610
|
+
detail="Uses " + ", ".join(sorted(third_party)[:10]) +
|
|
611
|
+
" but there is no pyproject.toml or requirements.txt.",
|
|
612
|
+
advice="Add a pyproject.toml (preferred) listing dependencies "
|
|
613
|
+
"with version bounds so anyone can install and run this.",
|
|
614
|
+
paths=[]))
|
|
615
|
+
return out
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
# ── Single source of truth ────────────────────────────────────────────────────
|
|
619
|
+
|
|
620
|
+
_VERSION_RE = re.compile(
|
|
621
|
+
r"""(?:__version__|version)\s*[=:]\s*["'](\d+\.\d+[^"']*)["']""")
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def check_version_truth(idx: FileIndex) -> list:
|
|
625
|
+
seen: dict = {}
|
|
626
|
+
for rel in ("pyproject.toml", "setup.py", "setup.cfg", "package.json"):
|
|
627
|
+
if rel in idx.text:
|
|
628
|
+
m = _VERSION_RE.search(idx.text[rel])
|
|
629
|
+
if m:
|
|
630
|
+
seen[rel] = m.group(1)
|
|
631
|
+
for rel, tree in idx.py_ast.items():
|
|
632
|
+
if rel.endswith("__init__.py"):
|
|
633
|
+
m = _VERSION_RE.search(idx.text[rel])
|
|
634
|
+
if m:
|
|
635
|
+
seen[rel] = m.group(1)
|
|
636
|
+
if len(seen) <= 1:
|
|
637
|
+
return []
|
|
638
|
+
values = set(seen.values())
|
|
639
|
+
if len(values) > 1:
|
|
640
|
+
return [Finding(
|
|
641
|
+
check="version_mismatch", severity="high",
|
|
642
|
+
title="Version strings disagree across files",
|
|
643
|
+
detail="; ".join(f"{k} = {v}" for k, v in sorted(seen.items())),
|
|
644
|
+
advice="Pick one source of truth (pyproject.toml) and read it "
|
|
645
|
+
"everywhere else. A mismatched release is a support "
|
|
646
|
+
"nightmare.",
|
|
647
|
+
paths=sorted(seen))]
|
|
648
|
+
if len(seen) > 2:
|
|
649
|
+
return [Finding(
|
|
650
|
+
check="version_duplication", severity="low",
|
|
651
|
+
title=f"Version string duplicated in {len(seen)} files",
|
|
652
|
+
detail="; ".join(sorted(seen)) + f" (all = {values.pop()})",
|
|
653
|
+
advice="They agree today; they won't forever. Single-source it.",
|
|
654
|
+
paths=sorted(seen))]
|
|
655
|
+
return []
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
# ── Ship essentials ───────────────────────────────────────────────────────────
|
|
659
|
+
|
|
660
|
+
def check_ship_essentials(idx: FileIndex) -> list:
|
|
661
|
+
has = {f.lower() for f in idx.files}
|
|
662
|
+
tops = {f.split("/")[0].lower() for f in idx.files}
|
|
663
|
+
missing = []
|
|
664
|
+
if not any(h.startswith("readme") for h in tops):
|
|
665
|
+
missing.append(("README", "No README — the first thing every user, "
|
|
666
|
+
"contributor, and future-you needs."))
|
|
667
|
+
if not any(h.startswith("license") for h in tops):
|
|
668
|
+
missing.append(("LICENSE", "No license file — legally, nobody can "
|
|
669
|
+
"use or contribute to this."))
|
|
670
|
+
if ".gitignore" not in tops:
|
|
671
|
+
missing.append((".gitignore", "No .gitignore — caches, venvs, and "
|
|
672
|
+
"secrets will end up committed."))
|
|
673
|
+
has_tests = any(idx.is_testish(p) and p.endswith((".py", ".js", ".ts"))
|
|
674
|
+
for p in idx.files)
|
|
675
|
+
if idx.code_paths() and not has_tests:
|
|
676
|
+
missing.append(("tests", "No tests found anywhere. Untested code "
|
|
677
|
+
"can't be changed safely, which means it can't be "
|
|
678
|
+
"maintained."))
|
|
679
|
+
has_ci = any(p.startswith(".github/workflows/") for p in idx.files)
|
|
680
|
+
if has_tests and not has_ci:
|
|
681
|
+
missing.append(("CI workflow", "Tests exist but nothing runs them "
|
|
682
|
+
"automatically — they will rot."))
|
|
683
|
+
out = []
|
|
684
|
+
for name, why in missing:
|
|
685
|
+
sev = "high" if name in ("LICENSE", "tests", "README") else "medium"
|
|
686
|
+
out.append(Finding(
|
|
687
|
+
check=f"missing_{name.lower().replace(' ', '_').lstrip('.')}",
|
|
688
|
+
severity=sev, title=f"Missing: {name}", detail=why,
|
|
689
|
+
advice=f"Add {name} before sharing or deploying this project."))
|
|
690
|
+
return out
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
# ── TODO debt + broken Python ─────────────────────────────────────────────────
|
|
694
|
+
|
|
695
|
+
_TODO_RE = re.compile(r"\b(TODO|FIXME|HACK|XXX)\b")
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def check_todo_debt(idx: FileIndex) -> list:
|
|
699
|
+
count, files = 0, set()
|
|
700
|
+
for rel in idx.code_paths():
|
|
701
|
+
n = len(_TODO_RE.findall(idx.text[rel]))
|
|
702
|
+
if n:
|
|
703
|
+
count += n
|
|
704
|
+
files.add(rel)
|
|
705
|
+
if count < 10:
|
|
706
|
+
return []
|
|
707
|
+
return [Finding(
|
|
708
|
+
check="todo_debt", severity="low",
|
|
709
|
+
title=f"{count} TODO/FIXME/HACK markers across {len(files)} files",
|
|
710
|
+
detail="High marker density usually means known problems are being "
|
|
711
|
+
"carried instead of fixed or ticketed.",
|
|
712
|
+
advice="Triage: fix the quick ones, file issues for the real ones, "
|
|
713
|
+
"delete the stale ones.",
|
|
714
|
+
paths=sorted(files)[:15])]
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
def check_broken_python(idx: FileIndex) -> list:
|
|
718
|
+
if not idx.py_bad:
|
|
719
|
+
return []
|
|
720
|
+
return [Finding(
|
|
721
|
+
check="syntax_errors", severity="high",
|
|
722
|
+
title=f"{len(idx.py_bad)} Python file(s) do not even parse",
|
|
723
|
+
detail=", ".join(idx.py_bad[:10]),
|
|
724
|
+
advice="These can't run. Fix or delete them — shipped trees should "
|
|
725
|
+
"contain only working code.",
|
|
726
|
+
paths=idx.py_bad)]
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
# ── Orchestration ─────────────────────────────────────────────────────────────
|
|
730
|
+
|
|
731
|
+
ALL_CHECKS = [
|
|
732
|
+
check_broken_python, check_secrets, check_dead_modules,
|
|
733
|
+
check_dependencies, check_version_truth, check_error_handling,
|
|
734
|
+
check_duplication, check_import_cycles, check_oversized,
|
|
735
|
+
check_junk_files, check_debug_residue, check_ship_essentials,
|
|
736
|
+
check_todo_debt,
|
|
737
|
+
]
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def scan_project(root: str) -> ScanResult:
|
|
741
|
+
idx = FileIndex(root)
|
|
742
|
+
findings: list = []
|
|
743
|
+
for check in ALL_CHECKS:
|
|
744
|
+
try:
|
|
745
|
+
findings.extend(check(idx))
|
|
746
|
+
except Exception as exc: # a check must never kill the scan
|
|
747
|
+
findings.append(Finding(
|
|
748
|
+
check="scanner_error", severity="info",
|
|
749
|
+
title=f"Check {check.__name__} failed on this tree",
|
|
750
|
+
detail=f"{type(exc).__name__}: {exc}",
|
|
751
|
+
advice="Please report this at the preflight issue tracker."))
|
|
752
|
+
findings.sort(key=lambda f: (SEVERITY_ORDER.get(f.severity, 9), f.check))
|
|
753
|
+
score = 100
|
|
754
|
+
for f in findings:
|
|
755
|
+
score -= SEVERITY_PENALTY.get(f.severity, 0)
|
|
756
|
+
score = max(5, score)
|
|
757
|
+
grade = ("ready to ship" if score >= 90 else
|
|
758
|
+
"close — one cleanup pass" if score >= 75 else
|
|
759
|
+
"needs real cleanup before shipping" if score >= 50 else
|
|
760
|
+
"not ready to ship")
|
|
761
|
+
return ScanResult(root=idx.root, findings=findings,
|
|
762
|
+
files_scanned=len(idx.files),
|
|
763
|
+
code_files=len(idx.code_paths()),
|
|
764
|
+
total_lines=idx.total_lines, score=score, grade=grade)
|
preflight/cli.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""preflight CLI.
|
|
2
|
+
|
|
3
|
+
preflight # scan the current directory
|
|
4
|
+
preflight path/to/project # scan a project
|
|
5
|
+
preflight demo # build a deliberately messy demo project & scan it
|
|
6
|
+
preflight . --json out.json --out report.md
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import sys
|
|
14
|
+
import tempfile
|
|
15
|
+
|
|
16
|
+
from . import __version__
|
|
17
|
+
from .checks import scan_project
|
|
18
|
+
from .report import render_markdown, render_terminal
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _make_demo_project() -> str:
|
|
22
|
+
"""A tiny, deliberately messy project so `preflight demo` shows real output."""
|
|
23
|
+
root = tempfile.mkdtemp(prefix="preflight_demo_")
|
|
24
|
+
pkg = os.path.join(root, "myapp")
|
|
25
|
+
os.makedirs(pkg)
|
|
26
|
+
w = lambda rel, s: open(os.path.join(root, rel), "w").write(s)
|
|
27
|
+
w("myapp/__init__.py", "__version__ = '1.0.0'\n")
|
|
28
|
+
w("myapp/main.py",
|
|
29
|
+
"from . import utils\n\ndef run():\n try:\n utils.go()\n"
|
|
30
|
+
" except:\n pass\n")
|
|
31
|
+
w("myapp/utils.py", "def go():\n return 1\n")
|
|
32
|
+
w("myapp/old_helpers.py", "def forgotten():\n return 'dead code'\n")
|
|
33
|
+
w("myapp/final_v2.py", "x = 1\n")
|
|
34
|
+
# the demo project deliberately contains a fake committed key so the
|
|
35
|
+
# secrets check has something to find (preflight:ignore on THIS line
|
|
36
|
+
# only — the generated file itself must be flagged)
|
|
37
|
+
w("config.py",
|
|
38
|
+
"API_KEY = 'sk-" + "demo0" * 6 + "'\n") # preflight:ignore
|
|
39
|
+
w("setup.py", "version='1.2.0'\n")
|
|
40
|
+
open(os.path.join(root, ".DS_Store"), "w").close()
|
|
41
|
+
return root
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def main(argv=None) -> int:
|
|
45
|
+
ap = argparse.ArgumentParser(
|
|
46
|
+
prog="preflight",
|
|
47
|
+
description="Scan a codebase the way a senior engineer skims it: "
|
|
48
|
+
"dead weight, duplication, slop, and what's missing "
|
|
49
|
+
"before it can ship. Offline, zero dependencies, no LLM.")
|
|
50
|
+
ap.add_argument("path", nargs="?", default=".",
|
|
51
|
+
help="project directory to scan (or 'demo')")
|
|
52
|
+
ap.add_argument("--out", default=None,
|
|
53
|
+
help="markdown report path "
|
|
54
|
+
"(default: preflight_report.md in the scanned dir)")
|
|
55
|
+
ap.add_argument("--json", dest="json_path", default=None,
|
|
56
|
+
help="also write machine-readable JSON here")
|
|
57
|
+
ap.add_argument("--quiet", action="store_true",
|
|
58
|
+
help="print only the score line")
|
|
59
|
+
ap.add_argument("--fail-under", type=int, default=None, metavar="N",
|
|
60
|
+
help="exit nonzero if score < N (for CI gates)")
|
|
61
|
+
ap.add_argument("--version", action="version",
|
|
62
|
+
version=f"preflight {__version__}")
|
|
63
|
+
args = ap.parse_args(argv)
|
|
64
|
+
|
|
65
|
+
target = args.path
|
|
66
|
+
if target == "demo":
|
|
67
|
+
target = _make_demo_project()
|
|
68
|
+
print(f"Built a deliberately messy demo project at {target}\n")
|
|
69
|
+
|
|
70
|
+
if not os.path.isdir(target):
|
|
71
|
+
print(f"preflight: not a directory: {target}", file=sys.stderr)
|
|
72
|
+
return 2
|
|
73
|
+
|
|
74
|
+
result = scan_project(target)
|
|
75
|
+
|
|
76
|
+
out_path = args.out or os.path.join(result.root, "preflight_report.md")
|
|
77
|
+
try:
|
|
78
|
+
with open(out_path, "w", encoding="utf-8") as fh:
|
|
79
|
+
fh.write(render_markdown(result))
|
|
80
|
+
wrote = out_path
|
|
81
|
+
except OSError as exc:
|
|
82
|
+
wrote = None
|
|
83
|
+
print(f"preflight: could not write report ({exc})", file=sys.stderr)
|
|
84
|
+
|
|
85
|
+
if args.json_path:
|
|
86
|
+
with open(args.json_path, "w", encoding="utf-8") as fh:
|
|
87
|
+
json.dump(result.to_dict(), fh, indent=2)
|
|
88
|
+
|
|
89
|
+
if args.quiet:
|
|
90
|
+
print(f"{result.score}/100 — {result.grade}")
|
|
91
|
+
else:
|
|
92
|
+
print(render_terminal(result))
|
|
93
|
+
if wrote:
|
|
94
|
+
print(f"\n Markdown report written to: {wrote}\n")
|
|
95
|
+
|
|
96
|
+
if args.fail_under is not None and result.score < args.fail_under:
|
|
97
|
+
return 1
|
|
98
|
+
return 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
if __name__ == "__main__":
|
|
102
|
+
raise SystemExit(main())
|
preflight/report.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Render a ScanResult for terminals and as a markdown report file."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from .checks import ScanResult
|
|
5
|
+
|
|
6
|
+
_SEV_LABEL = {"high": "HIGH", "medium": "MED ", "low": "low ", "info": "info"}
|
|
7
|
+
_SEV_EMOJI = {"high": "🔴", "medium": "🟠", "low": "🟡", "info": "ℹ️"}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def render_terminal(result: ScanResult) -> str:
|
|
11
|
+
lines = []
|
|
12
|
+
lines.append("")
|
|
13
|
+
lines.append(f"preflight — {result.root}")
|
|
14
|
+
lines.append(f"{result.files_scanned} files · {result.code_files} code "
|
|
15
|
+
f"files · {result.total_lines:,} lines of code")
|
|
16
|
+
lines.append("")
|
|
17
|
+
lines.append(f" SHIP-READINESS SCORE: {result.score}/100 — {result.grade}")
|
|
18
|
+
lines.append("")
|
|
19
|
+
if not result.findings:
|
|
20
|
+
lines.append(" No findings. Genuinely clean tree — ship it.")
|
|
21
|
+
return "\n".join(lines)
|
|
22
|
+
for f in result.findings:
|
|
23
|
+
lines.append(f" [{_SEV_LABEL.get(f.severity, '????')}] {f.title}")
|
|
24
|
+
for d in f.detail.splitlines():
|
|
25
|
+
lines.append(f" {d}")
|
|
26
|
+
lines.append(f" fix -> {f.advice}")
|
|
27
|
+
lines.append("")
|
|
28
|
+
high = sum(1 for f in result.findings if f.severity == "high")
|
|
29
|
+
med = sum(1 for f in result.findings if f.severity == "medium")
|
|
30
|
+
lines.append(f" {len(result.findings)} finding(s): {high} high, {med} "
|
|
31
|
+
f"medium. Full report: see the markdown file.")
|
|
32
|
+
return "\n".join(lines)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def render_markdown(result: ScanResult) -> str:
|
|
36
|
+
out = []
|
|
37
|
+
out.append("# preflight report")
|
|
38
|
+
out.append("")
|
|
39
|
+
out.append(f"**Project:** `{result.root}` ")
|
|
40
|
+
out.append(f"**Scanned:** {result.files_scanned} files, "
|
|
41
|
+
f"{result.code_files} code files, "
|
|
42
|
+
f"{result.total_lines:,} lines of code")
|
|
43
|
+
out.append("")
|
|
44
|
+
out.append(f"## Ship-readiness: {result.score}/100 — {result.grade}")
|
|
45
|
+
out.append("")
|
|
46
|
+
if not result.findings:
|
|
47
|
+
out.append("No findings. Clean tree.")
|
|
48
|
+
return "\n".join(out) + "\n"
|
|
49
|
+
out.append("| Severity | Finding |")
|
|
50
|
+
out.append("|---|---|")
|
|
51
|
+
for f in result.findings:
|
|
52
|
+
out.append(f"| {_SEV_EMOJI.get(f.severity, '')} {f.severity} | "
|
|
53
|
+
f"{f.title} |")
|
|
54
|
+
out.append("")
|
|
55
|
+
out.append("## Findings and how to fix them")
|
|
56
|
+
for f in result.findings:
|
|
57
|
+
out.append("")
|
|
58
|
+
out.append(f"### {_SEV_EMOJI.get(f.severity, '')} {f.title}")
|
|
59
|
+
out.append("")
|
|
60
|
+
out.append(f.detail)
|
|
61
|
+
out.append("")
|
|
62
|
+
out.append(f"**Fix:** {f.advice}")
|
|
63
|
+
if f.paths:
|
|
64
|
+
out.append("")
|
|
65
|
+
shown = f.paths[:20]
|
|
66
|
+
out.append("<details><summary>Files "
|
|
67
|
+
f"({len(f.paths)})</summary>\n")
|
|
68
|
+
for p in shown:
|
|
69
|
+
out.append(f"- `{p}`")
|
|
70
|
+
if len(f.paths) > 20:
|
|
71
|
+
out.append(f"- … and {len(f.paths) - 20} more")
|
|
72
|
+
out.append("\n</details>")
|
|
73
|
+
out.append("")
|
|
74
|
+
out.append("---")
|
|
75
|
+
out.append("*Generated by [preflight](https://github.com/<you>/preflight) "
|
|
76
|
+
"— static heuristics, zero LLM calls, runs entirely on your "
|
|
77
|
+
"machine. A finding is a prompt for human judgment, not a "
|
|
78
|
+
"verdict.*")
|
|
79
|
+
return "\n".join(out) + "\n"
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: preflight-scan
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Is your vibe-coded project ready to ship? A zero-dependency architecture and cleanliness scanner.
|
|
5
|
+
Author: preflight
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: code-quality,vibe-coding,static-analysis,cleanup,refactoring
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Dynamic: license-file
|
|
18
|
+
|
|
19
|
+
# Preflight
|
|
20
|
+
|
|
21
|
+
**Is your vibe-coded project ready to ship?**
|
|
22
|
+
|
|
23
|
+
You built something with AI — it works on your machine, and now you want it to
|
|
24
|
+
be a real product. `preflight` reads your codebase the way a senior engineer
|
|
25
|
+
skims it on day one and tells you, concretely, what stands between "it runs"
|
|
26
|
+
and "it ships": dead code, copy-paste forks, hardcoded keys, missing licenses
|
|
27
|
+
and tests, leftover `final_v2.py` files, functions nobody can review.
|
|
28
|
+
|
|
29
|
+
- **Zero dependencies.** Pure Python stdlib. Nothing to break.
|
|
30
|
+
- **Zero LLM calls. Runs entirely offline.** Your code never leaves your machine.
|
|
31
|
+
- **Every finding comes with a concrete fix**, not just a complaint.
|
|
32
|
+
- **A ship-readiness score (0–100)** you can put in CI and watch improve.
|
|
33
|
+
|
|
34
|
+
```
|
|
35
|
+
SHIP-READINESS SCORE: 21/100 — not ready to ship
|
|
36
|
+
|
|
37
|
+
[HIGH] 1 possible hardcoded secret(s)
|
|
38
|
+
config.py:1 — OpenAI/Anthropic-style key
|
|
39
|
+
fix -> Move secrets to environment variables NOW, rotate any real
|
|
40
|
+
key that was committed, and add the files to .gitignore.
|
|
41
|
+
|
|
42
|
+
[HIGH] 2 module(s) appear unreachable
|
|
43
|
+
fix -> Verify with your test suite, then delete them.
|
|
44
|
+
|
|
45
|
+
[HIGH] Version strings disagree across files
|
|
46
|
+
myapp/__init__.py = 1.0.0; setup.py = 1.2.0
|
|
47
|
+
fix -> Pick one source of truth (pyproject.toml).
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## One-click (no terminal)
|
|
51
|
+
|
|
52
|
+
Download this repo, open the `launchers/` folder, and double-click:
|
|
53
|
+
|
|
54
|
+
- **Mac:** `Check My Code (Mac).command` — first run may need right-click → Open
|
|
55
|
+
- **Windows:** `Check My Code (Windows).bat`
|
|
56
|
+
|
|
57
|
+
It installs itself the first time, asks you to drag your project folder in,
|
|
58
|
+
and writes `preflight_report.md` next to your project with everything it
|
|
59
|
+
found and how to fix each one. That's it.
|
|
60
|
+
|
|
61
|
+
## 30 seconds in a terminal
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install preflight-scan
|
|
65
|
+
preflight demo # builds a deliberately messy project and scans it
|
|
66
|
+
preflight path/to/your/project
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The terminal shows the summary; the full report (with file lists and fixes)
|
|
70
|
+
is written to `preflight_report.md` in the scanned folder.
|
|
71
|
+
|
|
72
|
+
## One line in CI
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
preflight . --fail-under 75 # exit nonzero if the score drops below 75
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## What it checks
|
|
79
|
+
|
|
80
|
+
| Check | What it catches |
|
|
81
|
+
|---|---|
|
|
82
|
+
| Dead modules | Files no code path can reach (verified by import graph) |
|
|
83
|
+
| Hardcoded secrets | OpenAI/Anthropic/AWS/GitHub/Slack/Google keys, private key blocks |
|
|
84
|
+
| Copy-paste forks | Substantial duplicated blocks across files; byte-identical files |
|
|
85
|
+
| Dependency hygiene | Imports you never declared; no manifest at all |
|
|
86
|
+
| Version truth | Version strings that disagree (or are duplicated) across files |
|
|
87
|
+
| Error-handling slop | Bare `except:`; `except Exception: pass` silent failures |
|
|
88
|
+
| Circular imports | Module-level import cycles (lazy in-function imports excluded — that's the fix, not the bug) |
|
|
89
|
+
| Oversized units | 700+ line files, 80+ line functions, deeply nested control flow |
|
|
90
|
+
| Leftover working files | `final_v2.py`, `untitled3.py`, `.DS_Store`, `*.pyc`, editor swap files |
|
|
91
|
+
| Ship essentials | Missing README, LICENSE, .gitignore, tests, CI |
|
|
92
|
+
| Broken files | Python that doesn't even parse |
|
|
93
|
+
| TODO debt | Heavy TODO/FIXME/HACK density |
|
|
94
|
+
|
|
95
|
+
False-positive suppression for known-fake fixtures: append `# preflight:ignore`
|
|
96
|
+
to the line.
|
|
97
|
+
|
|
98
|
+
## Honesty notes (read before trusting the score)
|
|
99
|
+
|
|
100
|
+
- These are **static heuristics**, deliberately conservative. A finding is a
|
|
101
|
+
prompt for human judgment, not a verdict; the absence of findings is not a
|
|
102
|
+
security audit or a code review.
|
|
103
|
+
- Python gets the deepest analysis (AST-based). JavaScript/TypeScript gets
|
|
104
|
+
duplication, secrets, junk, size, and debug-residue checks — not import
|
|
105
|
+
graphs.
|
|
106
|
+
- preflight flags its own test suite's fake keys when scanned. A scanner that
|
|
107
|
+
special-cased itself would be lying; use `# preflight:ignore` like everyone
|
|
108
|
+
else (we do).
|
|
109
|
+
- Dogfooded for real: preflight's first run against a 17,500-line production
|
|
110
|
+
package found 3 dead modules, an undeclared-extras parser gap (in itself —
|
|
111
|
+
fixed), and a core scoring function duplicated across two entry points that
|
|
112
|
+
had already begun to drift. All fixed; that package went 62 → 90/100.
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
|
|
116
|
+
MIT.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
preflight/__init__.py,sha256=GGsFsgouFCR9ELj2SRNV0lxAPvyqAUWg7uFYSwj3ey8,539
|
|
2
|
+
preflight/__main__.py,sha256=k1ocEWawweo1qCJWNFAAvyxz3tcY13dzvCenHszij30,48
|
|
3
|
+
preflight/checks.py,sha256=c-BHsUGAbz0cVgWvylKz0IOXCY_6gfOr1Pvv4sEcSkI,32793
|
|
4
|
+
preflight/cli.py,sha256=NAU2t2e7Ah0ueej8KU5JBPv_hJRu7ffFXSXU7igWkNw,3814
|
|
5
|
+
preflight/report.py,sha256=SwKv4-H6R4m8ZRrtUyry9wwERGgA1mixhVeojIhtnnY,3216
|
|
6
|
+
preflight_scan-0.1.0.dist-info/licenses/LICENSE,sha256=O38j1kjHtcE2yoFaMd_m-9FBKJ0u0q6HXeCZ1_YDIMo,1072
|
|
7
|
+
preflight_scan-0.1.0.dist-info/METADATA,sha256=HQlmhL3HI_b9DmJlTvqSN7QjtLZgen3VkrEXHmeQnew,4809
|
|
8
|
+
preflight_scan-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
9
|
+
preflight_scan-0.1.0.dist-info/entry_points.txt,sha256=VcW1tXHanw5ExFiAv-nHrUeDFl0-p85MiGw2YhnnI30,49
|
|
10
|
+
preflight_scan-0.1.0.dist-info/top_level.txt,sha256=ounnrNnQ8yH4uQJrLFyVEB5GGGMW4lQUBqjti6ra2VE,10
|
|
11
|
+
preflight_scan-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Wellness Agents
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
preflight
|