audit-test 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. audit_code/__init__.py +3 -0
  2. audit_code/__main__.py +5 -0
  3. audit_code/adapters/__init__.py +34 -0
  4. audit_code/adapters/base.py +202 -0
  5. audit_code/adapters/cpp/__init__.py +1 -0
  6. audit_code/adapters/cpp/adapter.py +114 -0
  7. audit_code/adapters/csharp/__init__.py +1 -0
  8. audit_code/adapters/csharp/adapter.py +85 -0
  9. audit_code/adapters/go/__init__.py +1 -0
  10. audit_code/adapters/go/adapter.py +53 -0
  11. audit_code/adapters/html/__init__.py +1 -0
  12. audit_code/adapters/html/adapter.py +177 -0
  13. audit_code/adapters/java/__init__.py +1 -0
  14. audit_code/adapters/java/adapter.py +101 -0
  15. audit_code/adapters/javascript/__init__.py +1 -0
  16. audit_code/adapters/javascript/adapter.py +128 -0
  17. audit_code/adapters/python/__init__.py +1 -0
  18. audit_code/adapters/python/adapter.py +40 -0
  19. audit_code/adapters/rust/__init__.py +1 -0
  20. audit_code/adapters/rust/adapter.py +60 -0
  21. audit_code/adapters/sql/__init__.py +1 -0
  22. audit_code/adapters/sql/adapter.py +71 -0
  23. audit_code/audit_config.py +21 -0
  24. audit_code/audit_deps.py +426 -0
  25. audit_code/audit_gate.py +596 -0
  26. audit_code/audit_phd.py +1478 -0
  27. audit_code/audit_quality.py +574 -0
  28. audit_code/audit_runtime.py +1262 -0
  29. audit_code/audit_shared.py +63 -0
  30. audit_code/audit_suite.py +322 -0
  31. audit_code/audit_wiring.py +998 -0
  32. audit_code/cli.py +353 -0
  33. audit_code/config.py +107 -0
  34. audit_code/deps.py +49 -0
  35. audit_code/gate.py +52 -0
  36. audit_code/integrations/__init__.py +15 -0
  37. audit_code/integrations/codeql.py +27 -0
  38. audit_code/integrations/dependency_scan.py +27 -0
  39. audit_code/integrations/megalinter.py +27 -0
  40. audit_code/integrations/secret_scan.py +27 -0
  41. audit_code/integrations/semgrep.py +27 -0
  42. audit_code/models.py +73 -0
  43. audit_code/phd.py +84 -0
  44. audit_code/profiles/__init__.py +10 -0
  45. audit_code/profiles/agent_engine/__init__.py +1 -0
  46. audit_code/profiles/agent_engine/config_checks.py +14 -0
  47. audit_code/profiles/agent_engine/profile.py +16 -0
  48. audit_code/profiles/agent_engine/prompt_checks.py +14 -0
  49. audit_code/profiles/agent_engine/stdout_checks.py +14 -0
  50. audit_code/profiles/agent_engine/tool_registry_checks.py +14 -0
  51. audit_code/project.py +28 -0
  52. audit_code/quality.py +748 -0
  53. audit_code/reporting/__init__.py +19 -0
  54. audit_code/reporting/json_report.py +42 -0
  55. audit_code/reporting/junit.py +47 -0
  56. audit_code/reporting/sarif.py +50 -0
  57. audit_code/run_all_audits.py +121 -0
  58. audit_code/runner.py +343 -0
  59. audit_code/runtime.py +74 -0
  60. audit_code/suite.py +337 -0
  61. audit_code/wiring.py +90 -0
  62. audit_test-0.1.0.dist-info/METADATA +239 -0
  63. audit_test-0.1.0.dist-info/RECORD +67 -0
  64. audit_test-0.1.0.dist-info/WHEEL +5 -0
  65. audit_test-0.1.0.dist-info/entry_points.txt +4 -0
  66. audit_test-0.1.0.dist-info/licenses/LICENSE +26 -0
  67. audit_test-0.1.0.dist-info/top_level.txt +1 -0
audit_code/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """audit-code: code and test verification orchestrator."""
2
+
3
+ __version__ = "0.1.0"
audit_code/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow `python -m audit_code`."""
2
+
3
+ from audit_code.cli import main
4
+
5
+ main()
@@ -0,0 +1,34 @@
1
+ """Language adapters — one per supported language."""
2
+
3
+ from pathlib import Path
4
+
5
+ from audit_code.adapters.cpp.adapter import CppAdapter
6
+ from audit_code.adapters.csharp.adapter import CsharpAdapter
7
+ from audit_code.adapters.go.adapter import GoAdapter
8
+ from audit_code.adapters.html.adapter import HtmlAdapter
9
+ from audit_code.adapters.java.adapter import JavaAdapter
10
+ from audit_code.adapters.javascript.adapter import JavaScriptAdapter
11
+ from audit_code.adapters.python.adapter import PythonAdapter
12
+ from audit_code.adapters.rust.adapter import RustAdapter
13
+ from audit_code.adapters.sql.adapter import SqlAdapter
14
+
15
+ ALL = [
16
+ PythonAdapter,
17
+ JavaScriptAdapter,
18
+ JavaAdapter,
19
+ GoAdapter,
20
+ RustAdapter,
21
+ CsharpAdapter,
22
+ CppAdapter,
23
+ HtmlAdapter,
24
+ SqlAdapter,
25
+ ]
26
+
27
+
28
+ def discover(target_root: Path) -> list:
29
+ """Return the adapter classes for every language detected in the target.
30
+
31
+ Detection only — the runner decides when to run syntax checks and test
32
+ suites so their results land in the report.
33
+ """
34
+ return [a for a in ALL if a.detect(target_root)]
@@ -0,0 +1,202 @@
1
+ """Shared machinery for language adapters.
2
+
3
+ Contract (fail-closed, honest):
4
+ - detect() marker file at the root OR any source file anywhere.
5
+ - syntax_check() runs a REAL check. Three honest outcomes only:
6
+ PASS files were actually checked and are clean
7
+ FAIL/WARN files were checked and have findings
8
+ SKIP no files, or the needed tool is missing
9
+ A missing tool is never reported as PASS.
10
+ - test_command() the native test invocation for the project, or None.
11
+ """
12
+
13
+ import os
14
+ import shutil
15
+ import subprocess
16
+ import time
17
+ from pathlib import Path
18
+
19
+ from audit_code.config import ADAPTER_EXCLUDE_DIRS, TOOL_TIMEOUT
20
+ from audit_code.models import AuditResult, AuditStatus, Finding, Severity
21
+
22
+ # Per-file checkers (one subprocess per file) stop after this many files so a
23
+ # huge repo cannot stall the audit; the result notes the truncation.
24
+ MAX_PER_FILE_CHECKS = 400 # audit: ok
25
+
26
+
27
+ def which(name: str) -> str | None:
28
+ """Locate an executable on PATH (indirection point for tests)."""
29
+ return shutil.which(name)
30
+
31
+
32
+ def _load_project_excludes(root: Path) -> set[str]:
33
+ """Load .audit-test-ignore patterns from a project root.
34
+
35
+ Returns extra dir/file name patterns to exclude. Patterns are exact
36
+ name matches (not substrings), # for comments.
37
+ """
38
+ ignore_file = root / ".audit-test-ignore"
39
+ if not ignore_file.exists():
40
+ return set()
41
+ extras: set[str] = set()
42
+ try:
43
+ for line in ignore_file.read_text(encoding="utf-8").splitlines():
44
+ line = line.strip()
45
+ if not line or line.startswith("#"):
46
+ continue
47
+ extras.add(line)
48
+ except OSError:
49
+ pass
50
+ return extras
51
+
52
+
53
+ def run_tool(cmd: list, cwd: Path, timeout: int = TOOL_TIMEOUT) -> tuple[int, str, str]:
54
+ """Run an external tool; returns (rc, stdout, stderr). Never raises."""
55
+ try:
56
+ proc = subprocess.run(
57
+ cmd,
58
+ cwd=str(cwd),
59
+ capture_output=True,
60
+ text=True,
61
+ encoding="utf-8",
62
+ errors="replace",
63
+ timeout=timeout,
64
+ )
65
+ return proc.returncode, proc.stdout or "", proc.stderr or ""
66
+ except subprocess.TimeoutExpired:
67
+ return -1, "", f"[timed out after {timeout}s]"
68
+ except OSError as e:
69
+ return -2, "", f"[failed to launch: {e}]"
70
+
71
+
72
+ def iter_source_files(
73
+ root: Path, extensions: tuple, extra_excludes: set[str] | None = None
74
+ ):
75
+ """Yield files under root (root-level included) with pruned walk."""
76
+ excludes = ADAPTER_EXCLUDE_DIRS
77
+ if extra_excludes:
78
+ excludes = excludes | extra_excludes
79
+ exts = tuple(extensions)
80
+ for dirpath, dirnames, filenames in os.walk(root):
81
+ dirnames[:] = [d for d in dirnames if d not in excludes]
82
+ for fn in filenames:
83
+ if fn.endswith(exts):
84
+ yield Path(dirpath) / fn
85
+
86
+
87
+ class LanguageAdapter:
88
+ """Base adapter. Subclasses set language/extensions/markers and implement
89
+ check_files() with a real syntax check."""
90
+
91
+ language: str = ""
92
+ extensions: tuple = ()
93
+ marker_files: tuple = ()
94
+ tool_hint: str = "" # how to install the checker, shown on SKIP
95
+
96
+ @classmethod
97
+ def audit_id(cls) -> str:
98
+ return f"{cls.language}-syntax"
99
+
100
+ @classmethod
101
+ def detect(cls, target_root: Path) -> bool:
102
+ for marker in cls.marker_files:
103
+ if (target_root / marker).exists():
104
+ return True
105
+ root = target_root.resolve()
106
+ extras = _load_project_excludes(root)
107
+ return next(iter_source_files(root, cls.extensions, extras), None) is not None
108
+
109
+ @classmethod
110
+ def collect_files(cls, target_root: Path) -> list:
111
+ root = target_root.resolve()
112
+ extras = _load_project_excludes(root)
113
+ return sorted(iter_source_files(root, cls.extensions, extras))
114
+
115
+ @classmethod
116
+ def syntax_check(cls, target_root: Path) -> AuditResult:
117
+ root = target_root.resolve()
118
+ files = cls.collect_files(root)
119
+ if not files:
120
+ return cls.skip(f"no {cls.language} source files found")
121
+ return cls.check_files(root, files)
122
+
123
+ @classmethod
124
+ def check_files(cls, root: Path, files: list) -> AuditResult:
125
+ raise NotImplementedError(f"{cls.__name__} must implement check_files()")
126
+
127
+ @staticmethod
128
+ def test_command(target_root: Path) -> list | None:
129
+ return None
130
+
131
+ # ── result helpers ──
132
+
133
+ @classmethod
134
+ def skip(cls, reason: str, tool_missing: bool = False) -> AuditResult:
135
+ msg = reason
136
+ if tool_missing and cls.tool_hint:
137
+ msg = f"{reason} ({cls.tool_hint})"
138
+ return AuditResult(
139
+ audit_id=cls.audit_id(),
140
+ status=AuditStatus.SKIP,
141
+ completed=True,
142
+ tool_missing=tool_missing,
143
+ stdout=msg,
144
+ )
145
+
146
+ @classmethod
147
+ def finding(
148
+ cls,
149
+ message: str,
150
+ file: str | None = None,
151
+ line: int | None = None,
152
+ severity: Severity = Severity.HIGH,
153
+ ) -> Finding:
154
+ return Finding(
155
+ rule_id=cls.audit_id(),
156
+ severity=severity,
157
+ message=message[:300],
158
+ file=file,
159
+ line=line,
160
+ language=cls.language,
161
+ source="adapter",
162
+ )
163
+
164
+ @classmethod
165
+ def result(cls, findings: list, notes: list) -> AuditResult:
166
+ has_high = any(f.severity == Severity.HIGH for f in findings)
167
+ has_med = any(f.severity == Severity.MEDIUM for f in findings)
168
+ status = (
169
+ AuditStatus.FAIL
170
+ if has_high
171
+ else (AuditStatus.WARN if has_med else AuditStatus.PASS)
172
+ )
173
+ lines = list(notes)
174
+ for f in findings[:50]:
175
+ loc = f"{f.file}:{f.line}" if f.file else ""
176
+ lines.append(f" [{f.severity.value}] {loc} {f.message}".rstrip())
177
+ if len(findings) > 50:
178
+ lines.append(f" ... {len(findings) - 50} more finding(s)")
179
+ return AuditResult(
180
+ audit_id=cls.audit_id(),
181
+ status=status,
182
+ findings=findings,
183
+ completed=True,
184
+ stdout="\n".join(lines),
185
+ )
186
+
187
+
188
+ class TimeBudget:
189
+ """Wall-clock budget for per-file checker loops."""
190
+
191
+ def __init__(self, seconds: int = TOOL_TIMEOUT):
192
+ self.deadline = time.monotonic() + seconds
193
+
194
+ def exhausted(self) -> bool:
195
+ return time.monotonic() > self.deadline
196
+
197
+
198
+ def rel(path: Path, root: Path) -> str:
199
+ try:
200
+ return str(path.relative_to(root))
201
+ except ValueError:
202
+ return str(path)
@@ -0,0 +1 @@
1
+ """Cpp adapter."""
@@ -0,0 +1,114 @@
1
+ """C/C++ adapter — per-file `-fsyntax-only` (gcc/clang) or `/Zs` (MSVC cl).
2
+ This genuinely parses each translation unit. Missing project headers are
3
+ counted but not judged (they need the build system's include paths). Headers
4
+ are not compiled standalone — most need a surrounding translation unit."""
5
+
6
+ import re
7
+ from pathlib import Path
8
+
9
+ from audit_code.adapters.base import (
10
+ MAX_PER_FILE_CHECKS,
11
+ LanguageAdapter,
12
+ TimeBudget,
13
+ rel,
14
+ run_tool,
15
+ which,
16
+ )
17
+
18
+ _UNIT_EXTS = (".c", ".cc", ".cpp", ".cxx")
19
+ _GCC_ERR = re.compile(r"^(.*?):(\d+):(?:\d+:)?\s*(?:fatal )?error:\s*(.*)$")
20
+ _CL_ERR = re.compile(r"^(.*?)\((\d+)\):\s*(?:fatal )?error\s+C\d+:\s*(.*)$")
21
+ _MISSING_HDR = ("No such file or directory", "file not found", "Cannot open include")
22
+
23
+
24
+ class CppAdapter(LanguageAdapter):
25
+ """Language adapter for C/C++ projects."""
26
+
27
+ language = "cpp"
28
+ extensions = _UNIT_EXTS + (".h", ".hpp", ".hh")
29
+ marker_files = ("CMakeLists.txt", "Makefile", "meson.build")
30
+ tool_hint = "install gcc/clang (or MSVC cl.exe on PATH)"
31
+
32
+ @classmethod
33
+ def check_files(cls, root: Path, files: list):
34
+ cc_cxx = which("g++") or which("clang++")
35
+ cc_c = which("gcc") or which("clang") or cc_cxx
36
+ cl = which("cl")
37
+ if not cc_cxx and not cc_c and not cl:
38
+ return cls.skip("no C/C++ compiler found — cannot check syntax", True)
39
+
40
+ units = [f for f in files if f.suffix in _UNIT_EXTS]
41
+ headers = len(files) - len(units)
42
+ if not units:
43
+ return cls.skip(
44
+ f"{headers} header file(s) only — headers are not compiled "
45
+ "standalone (no translation units found)"
46
+ )
47
+
48
+ findings, notes = [], []
49
+ missing_hdrs = 0
50
+ budget = TimeBudget()
51
+ checked = 0
52
+ for f in units[:MAX_PER_FILE_CHECKS]:
53
+ if budget.exhausted():
54
+ notes.append(
55
+ f"time budget exhausted after {checked}/{len(units)} units"
56
+ )
57
+ break
58
+ if cl and not cc_cxx and not cc_c:
59
+ cmd = [cl, "/Zs", "/nologo", str(f)]
60
+ err_re = _CL_ERR
61
+ else:
62
+ preferred = cc_c if f.suffix == ".c" else cc_cxx
63
+ fallback = cc_cxx or cc_c
64
+ if preferred is not None:
65
+ cmd = [preferred, "-fsyntax-only", "-I", str(f.parent), str(f)]
66
+ elif fallback is not None:
67
+ mode = "c" if f.suffix == ".c" else "c++"
68
+ cmd = [
69
+ fallback,
70
+ "-x",
71
+ mode,
72
+ "-fsyntax-only",
73
+ "-I",
74
+ str(f.parent),
75
+ str(f),
76
+ ]
77
+ else: # unreachable: the skip() above guarantees a compiler
78
+ break
79
+ err_re = _GCC_ERR
80
+ rc, out, err = run_tool(cmd, root, timeout=60)
81
+ checked += 1
82
+ if rc == 0:
83
+ continue
84
+ for ln in (err + "\n" + out).splitlines():
85
+ m = err_re.match(ln.strip())
86
+ if not m:
87
+ continue
88
+ msg = m.group(3)
89
+ if any(marker in msg for marker in _MISSING_HDR):
90
+ missing_hdrs += 1
91
+ continue
92
+ findings.append(
93
+ cls.finding(msg, file=rel(f, root), line=int(m.group(2)))
94
+ )
95
+ notes.insert(
96
+ 0,
97
+ f"{checked}/{len(units)} translation unit(s) parsed "
98
+ f"({headers} header(s) not compiled standalone)",
99
+ )
100
+ if missing_hdrs:
101
+ notes.append(
102
+ f"{missing_hdrs} missing-include error(s) not judged "
103
+ "(needs the build system's include paths)"
104
+ )
105
+ if len(units) > MAX_PER_FILE_CHECKS:
106
+ notes.append(f"capped at {MAX_PER_FILE_CHECKS} files")
107
+ return cls.result(findings, notes)
108
+
109
+ @staticmethod
110
+ def test_command(target_root: Path) -> list | None:
111
+ ctest = which("ctest")
112
+ if ctest and (target_root / "build").is_dir():
113
+ return [ctest, "--test-dir", "build", "--output-on-failure"]
114
+ return None
@@ -0,0 +1 @@
1
+ """Csharp adapter."""
@@ -0,0 +1,85 @@
1
+ """C# adapter — `dotnet build` is the real gate (there is no lightweight
2
+ syntax-only checker on a normal PATH). Restore failures (offline, missing
3
+ feed) are an honest SKIP, because the compiler never ran."""
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from audit_code.adapters.base import (
9
+ LanguageAdapter,
10
+ iter_source_files,
11
+ run_tool,
12
+ which,
13
+ )
14
+
15
+ # MSBuild: path(line,col): error CS1002: ; expected [proj.csproj]
16
+ _ERR = re.compile(r"^(.*?)\((\d+),\d+\):\s*error\s+(CS\d+):\s*(.*?)(\s*\[.*\])?$")
17
+
18
+
19
+ class CsharpAdapter(LanguageAdapter):
20
+ """Language adapter for C# projects."""
21
+
22
+ language = "csharp"
23
+ extensions = (".cs",)
24
+ marker_files = ()
25
+ tool_hint = "install the .NET SDK from dotnet.microsoft.com"
26
+
27
+ @classmethod
28
+ def check_files(cls, root: Path, files: list):
29
+ dotnet = which("dotnet")
30
+ if not dotnet:
31
+ return cls.skip("dotnet not found — cannot check syntax", True)
32
+ projects = list(iter_source_files(root, (".csproj", ".sln", ".slnx")))
33
+ if not projects:
34
+ return cls.skip(
35
+ f"{len(files)} .cs file(s) but no .csproj/.sln — "
36
+ "bare files cannot be built"
37
+ )
38
+
39
+ rc, out, err = run_tool([dotnet, "build", "--nologo", "-v:q"], root)
40
+ if rc == -1:
41
+ return cls.skip("dotnet build timed out")
42
+ if rc == -2:
43
+ return cls.skip(f"dotnet failed to launch: {err}", True)
44
+
45
+ text = out + "\n" + err
46
+ if rc != 0 and ("error NU" in text or "Restore failed" in text):
47
+ return cls.skip(
48
+ "NuGet restore failed — compiler never ran, cannot judge "
49
+ "(check network/feeds)"
50
+ )
51
+
52
+ findings = []
53
+ seen = set()
54
+ for ln in text.splitlines():
55
+ m = _ERR.match(ln.strip())
56
+ if m:
57
+ key = (m.group(1), m.group(2), m.group(3))
58
+ if key in seen:
59
+ continue
60
+ seen.add(key)
61
+ findings.append(
62
+ cls.finding(
63
+ f"{m.group(3)}: {m.group(4)}",
64
+ file=m.group(1),
65
+ line=int(m.group(2)),
66
+ )
67
+ )
68
+ if rc != 0 and not findings:
69
+ tail = "\n".join(text.strip().splitlines()[-5:])
70
+ findings.append(cls.finding(f"dotnet build failed (rc={rc}): {tail}"))
71
+ notes = [
72
+ f"{len(files)} C# file(s) across {len(projects)} project(s) "
73
+ "checked via dotnet build"
74
+ ]
75
+ return cls.result(findings, notes)
76
+
77
+ @staticmethod
78
+ def test_command(target_root: Path) -> list | None:
79
+ dotnet = which("dotnet")
80
+ if not dotnet:
81
+ return None
82
+ projects = list(iter_source_files(target_root, (".csproj", ".sln")))
83
+ if projects:
84
+ return [dotnet, "test", "--nologo"]
85
+ return None
@@ -0,0 +1 @@
1
+ """Go adapter."""
@@ -0,0 +1,53 @@
1
+ """Go adapter — `gofmt -l -e` parses every file (real syntax check); parse
2
+ errors land on stderr as path:line:col, unformatted files on stdout."""
3
+
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from audit_code.adapters.base import LanguageAdapter, run_tool, which
8
+ from audit_code.models import Severity
9
+
10
+ _ERR = re.compile(r"^(.*?\.go):(\d+):(\d+):\s*(.*)$")
11
+
12
+
13
+ class GoAdapter(LanguageAdapter):
14
+ """Language adapter for Go projects."""
15
+
16
+ language = "go"
17
+ extensions = (".go",)
18
+ marker_files = ("go.mod",)
19
+ tool_hint = "install the Go toolchain from go.dev"
20
+
21
+ @classmethod
22
+ def check_files(cls, root: Path, files: list):
23
+ gofmt = which("gofmt")
24
+ if not gofmt:
25
+ return cls.skip("gofmt not found — cannot check syntax", True)
26
+
27
+ rc, out, err = run_tool([gofmt, "-l", "-e", "."], root)
28
+ if rc == -2:
29
+ return cls.skip(f"gofmt failed to launch: {err}", True)
30
+
31
+ findings = []
32
+ for ln in err.splitlines():
33
+ m = _ERR.match(ln.strip())
34
+ if m:
35
+ findings.append(
36
+ cls.finding(m.group(4), file=m.group(1), line=int(m.group(2)))
37
+ )
38
+ unformatted = [ln.strip() for ln in out.splitlines() if ln.strip()]
39
+ for uf in unformatted:
40
+ findings.append(
41
+ cls.finding("not gofmt-formatted", file=uf, severity=Severity.MEDIUM)
42
+ )
43
+ notes = [f"{len(files)} Go file(s) parsed via gofmt -l -e"]
44
+ if unformatted:
45
+ notes.append(f"{len(unformatted)} file(s) have formatting drift")
46
+ return cls.result(findings, notes)
47
+
48
+ @staticmethod
49
+ def test_command(target_root: Path) -> list | None:
50
+ go = which("go")
51
+ if go and (target_root / "go.mod").exists():
52
+ return [go, "test", "./..."]
53
+ return None
@@ -0,0 +1 @@
1
+ """Html adapter."""
@@ -0,0 +1,177 @@
1
+ """HTML/CSS adapter — self-contained structural well-formedness checks.
2
+
3
+ HTML: stdlib HTMLParser drives a tag-balance check (stray closing tags,
4
+ unclosed elements) with void-element awareness. CSS/SCSS: brace balance,
5
+ string/comment aware. These catch genuinely broken files; they are NOT a
6
+ full spec validator, and the result says so. Findings are MEDIUM (browsers
7
+ tolerate malformed markup), so the audit warns rather than hard-fails.
8
+ """
9
+
10
+ from html.parser import HTMLParser
11
+ from pathlib import Path
12
+
13
+ from audit_code.adapters.base import LanguageAdapter, rel
14
+ from audit_code.models import Severity
15
+
16
+ _VOID = {
17
+ "area",
18
+ "base",
19
+ "br",
20
+ "col",
21
+ "embed",
22
+ "hr",
23
+ "img",
24
+ "input",
25
+ "link",
26
+ "meta",
27
+ "param",
28
+ "source",
29
+ "track",
30
+ "wbr",
31
+ }
32
+ # Elements the HTML spec auto-closes; unclosed is legal, not a defect.
33
+ _OPTIONAL_CLOSE = {
34
+ "p",
35
+ "li",
36
+ "td",
37
+ "tr",
38
+ "th",
39
+ "dt",
40
+ "dd",
41
+ "option",
42
+ "html",
43
+ "body",
44
+ "head",
45
+ }
46
+
47
+
48
+ class _TagBalancer(HTMLParser):
49
+ def __init__(self):
50
+ super().__init__(convert_charrefs=True)
51
+ self.stack: list = [] # (tag, line)
52
+ self.problems: list = [] # (line, message)
53
+
54
+ def handle_starttag(self, tag, attrs):
55
+ if tag not in _VOID:
56
+ self.stack.append((tag, self.getpos()[0]))
57
+
58
+ def handle_startendtag(self, tag, attrs):
59
+ pass # self-closing — nothing to balance
60
+
61
+ def handle_endtag(self, tag):
62
+ line = self.getpos()[0]
63
+ if tag in _VOID:
64
+ return
65
+ open_tags = [t for t, _ in self.stack]
66
+ if tag not in open_tags:
67
+ self.problems.append((line, f"stray closing tag </{tag}>"))
68
+ return
69
+ while self.stack:
70
+ open_tag, open_line = self.stack.pop()
71
+ if open_tag == tag:
72
+ break
73
+ if open_tag not in _OPTIONAL_CLOSE:
74
+ self.problems.append(
75
+ (
76
+ open_line,
77
+ f"<{open_tag}> never closed (implicitly "
78
+ f"closed by </{tag}> at line {line})",
79
+ )
80
+ )
81
+
82
+ def finish(self):
83
+ for open_tag, open_line in self.stack:
84
+ if open_tag not in _OPTIONAL_CLOSE:
85
+ self.problems.append((open_line, f"<{open_tag}> never closed"))
86
+
87
+
88
+ def _check_css_braces(text: str) -> list:
89
+ """Return (line, message) for unbalanced braces, skipping strings/comments."""
90
+ problems = []
91
+ depth = 0
92
+ line = 1
93
+ i = 0
94
+ n = len(text)
95
+ in_str: str | None = None
96
+ while i < n:
97
+ ch = text[i]
98
+ if ch == "\n":
99
+ line += 1
100
+ in_str = None # CSS strings do not span raw newlines
101
+ elif in_str:
102
+ if ch == "\\":
103
+ i += 1
104
+ elif ch == in_str:
105
+ in_str = None
106
+ elif ch in "\"'":
107
+ in_str = ch
108
+ elif ch == "/" and i + 1 < n and text[i + 1] == "*":
109
+ end = text.find("*/", i + 2)
110
+ if end == -1:
111
+ break
112
+ line += text.count("\n", i, end)
113
+ i = end + 1
114
+ elif ch == "/" and i + 1 < n and text[i + 1] == "/":
115
+ nl = text.find("\n", i)
116
+ i = (nl - 1) if nl != -1 else n
117
+ elif ch == "{":
118
+ depth += 1
119
+ elif ch == "}":
120
+ depth -= 1
121
+ if depth < 0:
122
+ problems.append((line, "unmatched closing brace '}'"))
123
+ depth = 0
124
+ i += 1
125
+ if depth > 0:
126
+ problems.append((line, f"{depth} unclosed brace(s) '{{' at end of file"))
127
+ return problems
128
+
129
+
130
+ class HtmlAdapter(LanguageAdapter):
131
+ """Language adapter for HTML/CSS projects."""
132
+
133
+ language = "html"
134
+ extensions = (".html", ".htm", ".css", ".scss")
135
+ marker_files = ()
136
+
137
+ @classmethod
138
+ def check_files(cls, root: Path, files: list):
139
+ findings = []
140
+ html_n = css_n = 0
141
+ for f in files:
142
+ text = f.read_text(encoding="utf-8", errors="replace")
143
+ if f.suffix in (".html", ".htm"):
144
+ html_n += 1
145
+ parser = _TagBalancer()
146
+ try:
147
+ parser.feed(text)
148
+ parser.close()
149
+ except Exception as e: # HTMLParser rarely raises; fail closed
150
+ parser.problems.append((0, f"parser error: {e}"))
151
+ parser.finish()
152
+ for line, msg in parser.problems[:20]:
153
+ findings.append(
154
+ cls.finding(
155
+ msg,
156
+ file=rel(f, root),
157
+ line=line or None,
158
+ severity=Severity.MEDIUM,
159
+ )
160
+ )
161
+ else:
162
+ css_n += 1
163
+ for line, msg in _check_css_braces(text)[:20]:
164
+ findings.append(
165
+ cls.finding(
166
+ msg,
167
+ file=rel(f, root),
168
+ line=line,
169
+ severity=Severity.MEDIUM,
170
+ )
171
+ )
172
+ notes = [
173
+ f"{html_n} HTML file(s) tag-balance checked, "
174
+ f"{css_n} CSS/SCSS file(s) brace-balance checked "
175
+ "(structural check — not a full spec validator)"
176
+ ]
177
+ return cls.result(findings, notes)
@@ -0,0 +1 @@
1
+ """Java adapter."""