proofctl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
proofctl/__init__.py ADDED
File without changes
proofctl/baseline.py ADDED
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections import Counter
5
+ from datetime import datetime, timezone
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ if TYPE_CHECKING:
10
+ from .models import Finding
11
+
12
+ _BASELINE_FILE = ".proofctl-baseline.json"
13
+
14
+
15
+ def _fingerprint(f: Finding) -> tuple[str, str, str]:
16
+ # Stable across line-number shifts: same bug at a new line still matches.
17
+ return (f.rule_id, f.file, f.message)
18
+
19
+
20
+ def save_baseline(findings: list[Finding], root: Path) -> Path:
21
+ baseline_path = root / _BASELINE_FILE
22
+ data = {
23
+ "created": datetime.now(timezone.utc).isoformat(),
24
+ "proofctl_version": "0.1.0",
25
+ "count": len(findings),
26
+ "findings": [
27
+ {"rule_id": f.rule_id, "file": f.file, "message": f.message}
28
+ for f in findings
29
+ ],
30
+ }
31
+ baseline_path.write_text(json.dumps(data, indent=2))
32
+ return baseline_path
33
+
34
+
35
+ def load_baseline(root: Path) -> list[dict] | None:
36
+ baseline_path = root / _BASELINE_FILE
37
+ if not baseline_path.exists():
38
+ return None
39
+ try:
40
+ data = json.loads(baseline_path.read_text())
41
+ return data.get("findings", [])
42
+ except (json.JSONDecodeError, KeyError):
43
+ return None
44
+
45
+
46
+ def filter_new_findings(findings: list[Finding], baseline: list[dict]) -> list[Finding]:
47
+ """Return only findings not accounted for in the baseline.
48
+
49
+ Uses a multiset so N identical baseline entries suppress exactly N occurrences
50
+ in the new run — handles repeated findings correctly.
51
+ """
52
+ baseline_counts: Counter[tuple] = Counter(
53
+ (b["rule_id"], b["file"], b["message"]) for b in baseline
54
+ )
55
+ result = []
56
+ seen: Counter[tuple] = Counter()
57
+ for f in findings:
58
+ key = _fingerprint(f)
59
+ if seen[key] < baseline_counts[key]:
60
+ seen[key] += 1
61
+ else:
62
+ result.append(f)
63
+ return result
File without changes
@@ -0,0 +1,61 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from abc import ABC, abstractmethod
5
+ from pathlib import Path
6
+
7
+ from ..models import Finding
8
+
9
+
10
+ class FileChecker(ABC):
11
+ """Runs once per Python file. Receives path, raw source, and parsed AST (None on SyntaxError)."""
12
+
13
+ @abstractmethod
14
+ def check(self, path: Path, source: str, tree: ast.Module | None) -> list[Finding]:
15
+ ...
16
+
17
+
18
+ class DirectoryChecker(ABC):
19
+ """Runs once per scan root. Used for cross-file analysis."""
20
+
21
+ @abstractmethod
22
+ def check(self, root: Path, py_files: list[Path]) -> list[Finding]:
23
+ ...
24
+
25
+
26
+ class HclFileChecker(ABC):
27
+ """Runs once per HCL file (.tf, .tfvars, .hcl).
28
+
29
+ Subclasses declare which extensions they handle via the `extensions` class
30
+ variable. An empty tuple means "run on all HCL files".
31
+ """
32
+
33
+ extensions: tuple[str, ...] = ()
34
+
35
+ @abstractmethod
36
+ def check(self, path: Path, source: str) -> list[Finding]:
37
+ ...
38
+
39
+
40
+ class HclDirChecker(ABC):
41
+ """Runs once per scan root for cross-file HCL analysis."""
42
+
43
+ @abstractmethod
44
+ def check(self, root: Path, hcl_files: list[Path]) -> list[Finding]:
45
+ ...
46
+
47
+
48
+ class DockerfileChecker(ABC):
49
+ """Runs on Dockerfile* files (pure text, no AST)."""
50
+
51
+ @abstractmethod
52
+ def check(self, path: Path, source: str) -> list[Finding]:
53
+ ...
54
+
55
+
56
+ class YamlFileChecker(ABC):
57
+ """Runs on .yml / .yaml files (parsed via PyYAML)."""
58
+
59
+ @abstractmethod
60
+ def check(self, path: Path, source: str) -> list[Finding]:
61
+ ...
@@ -0,0 +1,452 @@
1
+ """Dockerfile static analysis — PROOFCTL-DF-* rules."""
2
+ from __future__ import annotations
3
+
4
+ import re
5
+ from pathlib import Path
6
+
7
+ from ..models import Finding, Severity
8
+ from .base import DockerfileChecker
9
+
10
+ # ── helpers ───────────────────────────────────────────────────────────────────
11
+
12
+ _SHA_DIGEST_RE = re.compile(r"@sha256:[a-f0-9]{64}")
13
+ _HAS_TAG_RE = re.compile(r":[a-zA-Z0-9][\w.\-]*$")
14
+
15
+ # DF-007: curl/wget piped to a shell
16
+ _CURL_PIPE_RE = re.compile(r"\b(curl|wget)\b.+\|\s*(bash|sh|zsh|python[23]?|perl|ruby)")
17
+
18
+ # DF-009: dependency install patterns in RUN args
19
+ _DEP_INSTALL_RE = re.compile(
20
+ r"\b(pip3?\s+install|npm\s+install|yarn\s+install|poetry\s+install|pipenv\s+install)\b"
21
+ )
22
+
23
+ # DF-009: broad COPY source tokens
24
+ _BROAD_COPY_SOURCES = {".", "./"}
25
+
26
+ # DF-010: OCI label prefix
27
+ _OCI_LABEL_PREFIX = "org.opencontainers.image."
28
+
29
+ _SECRET_VAR_RE = re.compile(
30
+ r"^(password|passwd|secret[_-]?key|secret|api[_-]?key|auth[_-]?token|"
31
+ r"access[_-]?key|private[_-]?key|credentials?|client[_-]?secret|"
32
+ r"db[_-]?pass(?:word)?|database[_-]?pass(?:word)?|jwt[_-]?secret|"
33
+ r"encryption[_-]?key|bearer[_-]?token|session[_-]?secret)$",
34
+ re.IGNORECASE,
35
+ )
36
+
37
+ # Archive extensions that make ADD legitimate (auto-extract)
38
+ _ARCHIVE_RE = re.compile(r"\.(tar(\.(gz|bz2|xz|lz4|zst))?|tgz|tbz2)$", re.IGNORECASE)
39
+ # URLs that make ADD legitimate (remote fetch)
40
+ _URL_RE = re.compile(r"^https?://")
41
+
42
+
43
+ def _parse_dockerfile(source: str) -> list[tuple[int, str, str]]:
44
+ """Return [(1-based lineno, INSTRUCTION, arguments), ...]."""
45
+ instructions: list[tuple[int, str, str]] = []
46
+ lines = source.splitlines()
47
+ i = 0
48
+ while i < len(lines):
49
+ raw = lines[i]
50
+ stripped = raw.strip()
51
+ if not stripped or stripped.startswith("#"):
52
+ i += 1
53
+ continue
54
+ lineno = i + 1
55
+ # Collect continuation lines
56
+ joined = stripped
57
+ while joined.endswith("\\") and i + 1 < len(lines):
58
+ joined = joined[:-1].rstrip()
59
+ i += 1
60
+ cont = lines[i].strip()
61
+ if cont.startswith("#"):
62
+ i += 1
63
+ continue
64
+ joined += " " + cont
65
+ parts = joined.split(None, 1)
66
+ if parts:
67
+ instr = parts[0].upper()
68
+ args = parts[1] if len(parts) > 1 else ""
69
+ instructions.append((lineno, instr, args))
70
+ i += 1
71
+ return instructions
72
+
73
+
74
+ def _from_image_ref(args: str) -> tuple[str, str | None]:
75
+ """Parse 'FROM [--platform=...] <image> [AS <name>]' → (image_ref, stage_name)."""
76
+ tokens = args.split()
77
+ # Strip --platform= flag
78
+ tokens = [t for t in tokens if not t.startswith("--")]
79
+ if not tokens:
80
+ return ("", None)
81
+ image = tokens[0]
82
+ stage = tokens[2] if len(tokens) >= 3 and tokens[1].upper() == "AS" else None
83
+ return image, stage
84
+
85
+
86
+ # ── rule implementations ──────────────────────────────────────────────────────
87
+
88
+ def _df001(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
89
+ """PROOFCTL-DF-001 — Unpinned base image."""
90
+ stage_names: set[str] = set()
91
+ findings: list[Finding] = []
92
+
93
+ for lineno, instr, args in instructions:
94
+ if instr != "FROM":
95
+ continue
96
+ image, stage = _from_image_ref(args)
97
+ if stage:
98
+ stage_names.add(stage.lower())
99
+
100
+ for lineno, instr, args in instructions:
101
+ if instr != "FROM":
102
+ continue
103
+ image, _ = _from_image_ref(args)
104
+ if not image:
105
+ continue
106
+
107
+ # Special base images / stage aliases
108
+ bare_name = re.split(r"[@:]", image)[0].lower()
109
+ if bare_name in ("scratch", "busybox") or bare_name in stage_names:
110
+ continue
111
+
112
+ if _SHA_DIGEST_RE.search(image):
113
+ continue # digest-pinned — clean
114
+
115
+ # Strip digest to check tag
116
+ image_no_digest = image.split("@")[0]
117
+
118
+ if ":" not in image_no_digest:
119
+ sev = Severity.ERROR
120
+ msg = f"Base image '{image}' has no tag — resolves to :latest implicitly"
121
+ hint = "Pin to a specific version and digest: FROM python:3.12@sha256:<digest>"
122
+ elif image_no_digest.endswith(":latest"):
123
+ sev = Severity.ERROR
124
+ msg = f"Base image '{image}' uses the mutable ':latest' tag"
125
+ hint = "Replace :latest with a pinned version and digest."
126
+ else:
127
+ sev = Severity.WARNING
128
+ msg = f"Base image '{image}' is pinned to a tag but not a digest (still mutable)"
129
+ hint = "Add digest pinning: FROM python:3.12@sha256:<digest>"
130
+
131
+ findings.append(Finding(
132
+ file=str(path),
133
+ line=lineno,
134
+ col=0,
135
+ rule_id="PROOFCTL-DF-001",
136
+ rule_name="Unpinned base image",
137
+ severity=sev,
138
+ message=msg,
139
+ hint=hint,
140
+ authority="SLSA Supply Chain – Base image provenance",
141
+ ))
142
+ return findings
143
+
144
+
145
+ def _df002(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
146
+ """PROOFCTL-DF-002 — Running as root (no USER or USER root/0)."""
147
+ user_instructions = [(ln, args) for ln, instr, args in instructions if instr == "USER"]
148
+ if not user_instructions:
149
+ return [Finding(
150
+ file=str(path),
151
+ line=None,
152
+ col=None,
153
+ rule_id="PROOFCTL-DF-002",
154
+ rule_name="Running as root",
155
+ severity=Severity.WARNING,
156
+ message="No USER instruction — container runs as root by default",
157
+ hint="Add 'USER nonroot' (or a named user) before the final CMD/ENTRYPOINT.",
158
+ authority="CIS Docker Benchmark 4.1 – Do not run containers as root",
159
+ )]
160
+
161
+ findings = []
162
+ for lineno, args in user_instructions:
163
+ user = args.strip().split()[0] if args.strip() else ""
164
+ if user in ("root", "0"):
165
+ findings.append(Finding(
166
+ file=str(path),
167
+ line=lineno,
168
+ col=0,
169
+ rule_id="PROOFCTL-DF-002",
170
+ rule_name="Running as root",
171
+ severity=Severity.WARNING,
172
+ message=f"USER instruction sets user to '{user}' (root)",
173
+ hint="Use a non-root user: USER appuser",
174
+ authority="CIS Docker Benchmark 4.1 – Do not run containers as root",
175
+ ))
176
+ return findings
177
+
178
+
179
+ def _df003(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
180
+ """PROOFCTL-DF-003 — ADD used for local files (use COPY instead)."""
181
+ findings = []
182
+ for lineno, instr, args in instructions:
183
+ if instr != "ADD":
184
+ continue
185
+ # ADD can take multiple sources; first token is the src
186
+ tokens = args.split()
187
+ if not tokens:
188
+ continue
189
+ src = tokens[0]
190
+ if _URL_RE.match(src) or _ARCHIVE_RE.search(src):
191
+ continue # legitimate ADD use
192
+ findings.append(Finding(
193
+ file=str(path),
194
+ line=lineno,
195
+ col=0,
196
+ rule_id="PROOFCTL-DF-003",
197
+ rule_name="ADD used for local files",
198
+ severity=Severity.WARNING,
199
+ message=f"ADD '{src}' copies a local path — use COPY for predictable behaviour",
200
+ hint="Replace ADD with COPY unless you need URL fetch or archive auto-extraction.",
201
+ authority="Docker Best Practices – Prefer COPY over ADD",
202
+ ))
203
+ return findings
204
+
205
+
206
+ def _df004(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
207
+ """PROOFCTL-DF-004 — apt-get install without --no-install-recommends."""
208
+ findings = []
209
+ for lineno, instr, args in instructions:
210
+ if instr != "RUN":
211
+ continue
212
+ if "apt-get install" not in args and "apt install" not in args:
213
+ continue
214
+ if "--no-install-recommends" in args:
215
+ continue
216
+ findings.append(Finding(
217
+ file=str(path),
218
+ line=lineno,
219
+ col=0,
220
+ rule_id="PROOFCTL-DF-004",
221
+ rule_name="apt-get install without --no-install-recommends",
222
+ severity=Severity.INFO,
223
+ message="apt-get install missing --no-install-recommends — installs unnecessary packages",
224
+ hint="Add --no-install-recommends to keep image layers lean.",
225
+ authority="Docker Best Practices – Minimise image layers",
226
+ ))
227
+ return findings
228
+
229
+
230
+ def _df005(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
231
+ """PROOFCTL-DF-005 — Secret variable name in ENV or ARG."""
232
+ findings = []
233
+ for lineno, instr, args in instructions:
234
+ if instr not in ("ENV", "ARG"):
235
+ continue
236
+ # ENV KEY=VAL or ENV KEY VAL (legacy); ARG KEY or ARG KEY=DEFAULT
237
+ # Extract the variable name: first token up to '=' or whitespace
238
+ key = re.split(r"[=\s]", args.strip())[0] if args.strip() else ""
239
+ if not key:
240
+ continue
241
+ if _SECRET_VAR_RE.match(key):
242
+ sev = Severity.ERROR if instr == "ENV" else Severity.WARNING
243
+ findings.append(Finding(
244
+ file=str(path),
245
+ line=lineno,
246
+ col=0,
247
+ rule_id="PROOFCTL-DF-005",
248
+ rule_name="Secret in Dockerfile instruction",
249
+ severity=sev,
250
+ message=f"{instr} exposes a secret-named variable '{key}' — visible in image metadata",
251
+ hint=(
252
+ "Use Docker BuildKit secrets (--secret) or pass via runtime environment, "
253
+ "never bake into the image layer."
254
+ ),
255
+ authority="OWASP Docker Security – Never store secrets in images",
256
+ ))
257
+ return findings
258
+
259
+
260
+ def _df006(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
261
+ """PROOFCTL-DF-006 — Missing HEALTHCHECK instruction."""
262
+ # Only flag if the image has an ENTRYPOINT or CMD (i.e., it's a runnable image)
263
+ has_cmd = any(instr in ("CMD", "ENTRYPOINT") for _, instr, _ in instructions)
264
+ has_healthcheck = any(instr == "HEALTHCHECK" for _, instr, _ in instructions)
265
+ from_scratch = any(
266
+ _from_image_ref(args)[0].lower() == "scratch"
267
+ for _, instr, args in instructions if instr == "FROM"
268
+ )
269
+ if has_cmd and not has_healthcheck and not from_scratch:
270
+ return [Finding(
271
+ file=str(path),
272
+ line=None,
273
+ col=None,
274
+ rule_id="PROOFCTL-DF-006",
275
+ rule_name="Missing HEALTHCHECK",
276
+ severity=Severity.INFO,
277
+ message="Dockerfile has no HEALTHCHECK — orchestrators cannot detect unhealthy containers",
278
+ hint="Add HEALTHCHECK --interval=30s --timeout=3s CMD curl -f http://localhost/health || exit 1",
279
+ authority="Docker Best Practices – Container health checks",
280
+ )]
281
+ return []
282
+
283
+
284
+ def _df007(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
285
+ """PROOFCTL-DF-007 — curl|sh / wget|sh (remote script execution in RUN)."""
286
+ findings: list[Finding] = []
287
+ for lineno, instr, args in instructions:
288
+ if instr != "RUN":
289
+ continue
290
+ if _CURL_PIPE_RE.search(args):
291
+ findings.append(Finding(
292
+ file=str(path),
293
+ line=lineno,
294
+ col=0,
295
+ rule_id="PROOFCTL-DF-007",
296
+ rule_name="Remote script execution via pipe",
297
+ severity=Severity.ERROR,
298
+ message="RUN instruction pipes a remote download directly to a shell — arbitrary code execution risk",
299
+ hint=(
300
+ "Download to a file, verify its checksum (sha256sum), then execute: "
301
+ "RUN curl -fsSL https://... -o /tmp/script && sha256sum -c /tmp/script.sha256 && sh /tmp/script"
302
+ ),
303
+ authority="OWASP CICD-SEC-3 – Dependency Chain Abuse / CIS Docker Benchmark 4.6",
304
+ ))
305
+ return findings
306
+
307
+
308
+ def _df008(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
309
+ """PROOFCTL-DF-008 — ADD <url> without checksum verification."""
310
+ findings: list[Finding] = []
311
+ for idx, (lineno, instr, args) in enumerate(instructions):
312
+ if instr != "ADD":
313
+ continue
314
+ tokens = args.split()
315
+ if not tokens:
316
+ continue
317
+ url = tokens[0]
318
+ if not _URL_RE.match(url):
319
+ continue
320
+
321
+ # BuildKit inline checksum syntax: --checksum=sha256:...
322
+ if "--checksum=sha256:" in args:
323
+ continue
324
+
325
+ # Check the next 3 instructions for a verification RUN
326
+ window = instructions[idx + 1: idx + 4]
327
+ verified = any(
328
+ next_instr == "RUN" and (
329
+ "sha256sum" in next_args
330
+ or "shasum -a 256" in next_args
331
+ or "gpg --verify" in next_args
332
+ )
333
+ for _, next_instr, next_args in window
334
+ )
335
+ if verified:
336
+ continue
337
+
338
+ findings.append(Finding(
339
+ file=str(path),
340
+ line=lineno,
341
+ col=0,
342
+ rule_id="PROOFCTL-DF-008",
343
+ rule_name="ADD URL without checksum verification",
344
+ severity=Severity.ERROR,
345
+ message=f"ADD fetches '{url}' without checksum verification — supply chain integrity risk",
346
+ hint="Use ADD with --checksum=sha256:<hash>, or RUN curl ... && sha256sum -c to verify before using.",
347
+ authority="OWASP CICD-SEC-6 – Insufficient Artifact Integrity Validation",
348
+ ))
349
+ return findings
350
+
351
+
352
+ def _df009(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
353
+ """PROOFCTL-DF-009 — COPY . . before dependency installation."""
354
+ findings: list[Finding] = []
355
+
356
+ # Broad COPY destinations that indicate a full source-tree copy
357
+ _broad_dests = {"/app", "/src", "/code", "/workspace", "."}
358
+
359
+ # Find the first broad COPY (source is bare ".")
360
+ first_broad_copy: tuple[int, str, str] | None = None
361
+ for item in instructions:
362
+ lineno, instr, args = item
363
+ if instr != "COPY":
364
+ continue
365
+ tokens = args.split()
366
+ if not tokens:
367
+ continue
368
+ src = tokens[0]
369
+ if src in _BROAD_COPY_SOURCES:
370
+ first_broad_copy = item
371
+ break
372
+
373
+ if first_broad_copy is None:
374
+ return findings
375
+
376
+ copy_lineno = first_broad_copy[0]
377
+
378
+ # Find the first dependency-install RUN
379
+ for lineno, instr, args in instructions:
380
+ if instr != "RUN":
381
+ continue
382
+ if _DEP_INSTALL_RE.search(args):
383
+ if copy_lineno < lineno:
384
+ findings.append(Finding(
385
+ file=str(path),
386
+ line=copy_lineno,
387
+ col=0,
388
+ rule_id="PROOFCTL-DF-009",
389
+ rule_name="COPY . before dependency installation",
390
+ severity=Severity.INFO,
391
+ message="COPY . precedes dependency installation — every source change invalidates the dependency layer cache",
392
+ hint=(
393
+ "Copy only dependency files first (requirements.txt / package.json), "
394
+ "run the install, then COPY . to maximise layer cache hits."
395
+ ),
396
+ authority="Docker Best Practices – Leverage build cache",
397
+ ))
398
+ break
399
+
400
+ return findings
401
+
402
+
403
+ def _df010(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
404
+ """PROOFCTL-DF-010 — Missing OCI image labels."""
405
+ has_runnable = any(instr in ("CMD", "ENTRYPOINT") for _, instr, _ in instructions)
406
+ if not has_runnable:
407
+ return []
408
+
409
+ has_oci_label = any(
410
+ instr == "LABEL" and _OCI_LABEL_PREFIX in args
411
+ for _, instr, args in instructions
412
+ )
413
+ if has_oci_label:
414
+ return []
415
+
416
+ return [Finding(
417
+ file=str(path),
418
+ line=None,
419
+ col=None,
420
+ rule_id="PROOFCTL-DF-010",
421
+ rule_name="Missing OCI image labels",
422
+ severity=Severity.INFO,
423
+ message="Dockerfile has no OCI image labels — SBOM generation and provenance tracking are impaired",
424
+ hint=(
425
+ "Add LABEL org.opencontainers.image.source=https://github.com/org/repo "
426
+ "org.opencontainers.image.version=1.0.0"
427
+ ),
428
+ authority="OCI Image Spec – Annotations / OWASP CICD-SEC-9",
429
+ )]
430
+
431
+
432
+ # ── main checker ──────────────────────────────────────────────────────────────
433
+
434
+ class _DockerfileCheckerImpl(DockerfileChecker):
435
+ def check(self, path: Path, source: str) -> list[Finding]:
436
+ instructions = _parse_dockerfile(source)
437
+ findings: list[Finding] = []
438
+ findings.extend(_df001(instructions, path))
439
+ findings.extend(_df002(instructions, path))
440
+ findings.extend(_df003(instructions, path))
441
+ findings.extend(_df004(instructions, path))
442
+ findings.extend(_df005(instructions, path))
443
+ findings.extend(_df006(instructions, path))
444
+ findings.extend(_df007(instructions, path))
445
+ findings.extend(_df008(instructions, path))
446
+ findings.extend(_df009(instructions, path))
447
+ findings.extend(_df010(instructions, path))
448
+ return findings
449
+
450
+
451
+ # Singleton used by the engine
452
+ DockerfileRulesChecker = _DockerfileCheckerImpl