harness-scorecard 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. harness_scorecard-1.0.0/LICENSE +21 -0
  2. harness_scorecard-1.0.0/PKG-INFO +106 -0
  3. harness_scorecard-1.0.0/README.md +83 -0
  4. harness_scorecard-1.0.0/pyproject.toml +97 -0
  5. harness_scorecard-1.0.0/src/harness_scorecard/__init__.py +5 -0
  6. harness_scorecard-1.0.0/src/harness_scorecard/checks/__init__.py +36 -0
  7. harness_scorecard-1.0.0/src/harness_scorecard/checks/base.py +149 -0
  8. harness_scorecard-1.0.0/src/harness_scorecard/checks/destructive_git.py +148 -0
  9. harness_scorecard-1.0.0/src/harness_scorecard/checks/egress.py +71 -0
  10. harness_scorecard-1.0.0/src/harness_scorecard/checks/observability.py +65 -0
  11. harness_scorecard-1.0.0/src/harness_scorecard/checks/provenance.py +52 -0
  12. harness_scorecard-1.0.0/src/harness_scorecard/checks/recovery.py +64 -0
  13. harness_scorecard-1.0.0/src/harness_scorecard/checks/secret_protection.py +146 -0
  14. harness_scorecard-1.0.0/src/harness_scorecard/checks/self_protection.py +129 -0
  15. harness_scorecard-1.0.0/src/harness_scorecard/checks/subagent_isolation.py +86 -0
  16. harness_scorecard-1.0.0/src/harness_scorecard/checks/tool_surface.py +99 -0
  17. harness_scorecard-1.0.0/src/harness_scorecard/checks/verification.py +52 -0
  18. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/__init__.py +39 -0
  19. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/destructive_git.py +107 -0
  20. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/egress.py +94 -0
  21. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/observability.py +59 -0
  22. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/provenance.py +70 -0
  23. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/recovery.py +53 -0
  24. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/secret_protection.py +100 -0
  25. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/self_protection.py +116 -0
  26. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/subagent_isolation.py +70 -0
  27. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/tool_surface.py +74 -0
  28. harness_scorecard-1.0.0/src/harness_scorecard/checks_codex/verification.py +67 -0
  29. harness_scorecard-1.0.0/src/harness_scorecard/cli.py +113 -0
  30. harness_scorecard-1.0.0/src/harness_scorecard/discovery.py +177 -0
  31. harness_scorecard-1.0.0/src/harness_scorecard/discovery_codex.py +251 -0
  32. harness_scorecard-1.0.0/src/harness_scorecard/dispatch.py +60 -0
  33. harness_scorecard-1.0.0/src/harness_scorecard/htmlreport.py +152 -0
  34. harness_scorecard-1.0.0/src/harness_scorecard/models.py +137 -0
  35. harness_scorecard-1.0.0/src/harness_scorecard/parsing.py +134 -0
  36. harness_scorecard-1.0.0/src/harness_scorecard/py.typed +0 -0
  37. harness_scorecard-1.0.0/src/harness_scorecard/redaction.py +52 -0
  38. harness_scorecard-1.0.0/src/harness_scorecard/report.py +114 -0
  39. harness_scorecard-1.0.0/src/harness_scorecard/sarif.py +160 -0
  40. harness_scorecard-1.0.0/src/harness_scorecard/scoring.py +99 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Saagar Patel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.4
2
+ Name: harness-scorecard
3
+ Version: 1.0.0
4
+ Summary: A read-only linter and A-F maturity grader for coding-agent harnesses (Claude Code, Codex).
5
+ Keywords: claude-code,codex,coding-agent,harness,linter,sarif,security,static-analysis,supply-chain
6
+ Author: Saagar Patel
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Security
16
+ Classifier: Topic :: Software Development :: Quality Assurance
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.12
19
+ Project-URL: Homepage, https://github.com/saagpatel/harness-scorecard
20
+ Project-URL: Repository, https://github.com/saagpatel/harness-scorecard
21
+ Project-URL: Issues, https://github.com/saagpatel/harness-scorecard/issues
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Harness Scorecard
25
+
26
+ A read-only linter and **A–F maturity grader for coding-agent harnesses**. Point it at a
27
+ Claude Code or Codex setup — Claude Code's hooks, `permissions`, `rules/*.md`, agents, and
28
+ `CLAUDE.md`, or Codex's `config.toml` (sandbox, approval policy, trust levels), `hooks.json`,
29
+ and `AGENTS.md` — and it returns a graded scorecard: the overall maturity grade, the specific
30
+ gaps, and the guards that are missing, each with rationale. The harness type is auto-detected.
31
+
32
+ "Harness engineering" became a named discipline in 2026 and everyone is assembling harnesses
33
+ with no way to tell if theirs is any good. The rubric is the product: every check traces to a
34
+ **documented red-team failure mode**, not generic advice.
35
+
36
+ ## What makes the grade real
37
+
38
+ Most config "linters" credit a harness for declaring a rule. This one models the *effective*
39
+ enforcement floor. The headline example:
40
+
41
+ > `autoMode.hard_deny` is **inert** when `permissions.defaultMode == "bypassPermissions"`.
42
+
43
+ A naive scorer reads a rich `hard_deny` block and awards an A. Harness Scorecard reads the
44
+ mode, discounts the inert block, and grades against what actually fires — `permissions.deny`
45
+ globs plus the PreToolUse hooks. See [`docs/rubric.md`](docs/rubric.md) for the full model,
46
+ including **capability gates** that cap the grade when a critical hole is present (you can't
47
+ score an A with readable credentials, no matter how many cheap checks pass).
48
+
49
+ ## Usage
50
+
51
+ ```bash
52
+ # Grade a harness directory (e.g. your ~/.claude)
53
+ harness-scorecard scan ~/.claude
54
+
55
+ # JSON for tooling, plus a self-contained HTML scorecard
56
+ harness-scorecard scan ~/.claude --format json --html scorecard.html
57
+
58
+ # SARIF 2.1.0 for CI / GitHub code scanning, failing the run below grade C
59
+ harness-scorecard scan ~/.claude --sarif harness.sarif --min-grade C
60
+ ```
61
+
62
+ `--min-grade {A,B,C,D,F}` sets the bar (default `B`). Exit codes: `0` meets the bar ·
63
+ `1` below the bar · `2` no harness found.
64
+
65
+ ## GitHub Action
66
+
67
+ Grade your harness in CI and upload the findings to code scanning:
68
+
69
+ ```yaml
70
+ - uses: saagpatel/harness-scorecard@v1
71
+ with:
72
+ path: .claude
73
+ min-grade: B
74
+ ```
75
+
76
+ The action writes SARIF and uploads it (requires `security-events: write`) **even when the grade
77
+ fails the build**, so findings always reach code scanning. A complete workflow — permissions,
78
+ weekly scheduling, SARIF upload — is in [`examples/github-workflow.yml`](examples/github-workflow.yml).
79
+
80
+ ## Guarantees
81
+
82
+ - **Read-only.** It never writes to the harness it audits.
83
+ - **Privacy-preserving.** All output redacts secrets, tokens, emails, and absolute home
84
+ paths. Nothing leaves the machine.
85
+ - **Dependency-free runtime.** The scorer ships stdlib-only — a tool that grades
86
+ supply-chain hygiene should carry the smallest surface itself.
87
+
88
+ ## Scope (v1)
89
+
90
+ Implements **all ten rubric dimensions** end-to-end for **both Claude Code and Codex**: secret
91
+ protection, egress/exfiltration control, tool-surface & inbound-injection defense,
92
+ destructive-action & git safety, harness self-protection & integrity, verification gates,
93
+ subagent isolation & governance, recovery/rollback safety, memory/provenance hygiene, and
94
+ observability/audit trail (the critical gated trio is D1/D4/D5). Each harness has its own
95
+ adapter and check suite over the shared scoring engine; the bypass-aware effective floor maps
96
+ to Codex's `sandbox_mode = "danger-full-access"` + `approval_policy = "never"` just as it does
97
+ to Claude Code's `bypassPermissions`. The rubric is versioned and emitted in every report.
98
+
99
+ ## Development
100
+
101
+ ```bash
102
+ uv sync --frozen # install dev tooling from the lockfile
103
+ uv run --no-sync python -m unittest discover -s tests # tests (stdlib runner, zero extra deps)
104
+ uv run --no-sync ruff check src/ tests/ # lint
105
+ uv run --no-sync ty check src/ # type check
106
+ ```
@@ -0,0 +1,83 @@
1
+ # Harness Scorecard
2
+
3
+ A read-only linter and **A–F maturity grader for coding-agent harnesses**. Point it at a
4
+ Claude Code or Codex setup — Claude Code's hooks, `permissions`, `rules/*.md`, agents, and
5
+ `CLAUDE.md`, or Codex's `config.toml` (sandbox, approval policy, trust levels), `hooks.json`,
6
+ and `AGENTS.md` — and it returns a graded scorecard: the overall maturity grade, the specific
7
+ gaps, and the guards that are missing, each with rationale. The harness type is auto-detected.
8
+
9
+ "Harness engineering" became a named discipline in 2026 and everyone is assembling harnesses
10
+ with no way to tell if theirs is any good. The rubric is the product: every check traces to a
11
+ **documented red-team failure mode**, not generic advice.
12
+
13
+ ## What makes the grade real
14
+
15
+ Most config "linters" credit a harness for declaring a rule. This one models the *effective*
16
+ enforcement floor. The headline example:
17
+
18
+ > `autoMode.hard_deny` is **inert** when `permissions.defaultMode == "bypassPermissions"`.
19
+
20
+ A naive scorer reads a rich `hard_deny` block and awards an A. Harness Scorecard reads the
21
+ mode, discounts the inert block, and grades against what actually fires — `permissions.deny`
22
+ globs plus the PreToolUse hooks. See [`docs/rubric.md`](docs/rubric.md) for the full model,
23
+ including **capability gates** that cap the grade when a critical hole is present (you can't
24
+ score an A with readable credentials, no matter how many cheap checks pass).
25
+
26
+ ## Usage
27
+
28
+ ```bash
29
+ # Grade a harness directory (e.g. your ~/.claude)
30
+ harness-scorecard scan ~/.claude
31
+
32
+ # JSON for tooling, plus a self-contained HTML scorecard
33
+ harness-scorecard scan ~/.claude --format json --html scorecard.html
34
+
35
+ # SARIF 2.1.0 for CI / GitHub code scanning, failing the run below grade C
36
+ harness-scorecard scan ~/.claude --sarif harness.sarif --min-grade C
37
+ ```
38
+
39
+ `--min-grade {A,B,C,D,F}` sets the bar (default `B`). Exit codes: `0` meets the bar ·
40
+ `1` below the bar · `2` no harness found.
41
+
42
+ ## GitHub Action
43
+
44
+ Grade your harness in CI and upload the findings to code scanning:
45
+
46
+ ```yaml
47
+ - uses: saagpatel/harness-scorecard@v1
48
+ with:
49
+ path: .claude
50
+ min-grade: B
51
+ ```
52
+
53
+ The action writes SARIF and uploads it (requires `security-events: write`) **even when the grade
54
+ fails the build**, so findings always reach code scanning. A complete workflow — permissions,
55
+ weekly scheduling, SARIF upload — is in [`examples/github-workflow.yml`](examples/github-workflow.yml).
56
+
57
+ ## Guarantees
58
+
59
+ - **Read-only.** It never writes to the harness it audits.
60
+ - **Privacy-preserving.** All output redacts secrets, tokens, emails, and absolute home
61
+ paths. Nothing leaves the machine.
62
+ - **Dependency-free runtime.** The scorer ships stdlib-only — a tool that grades
63
+ supply-chain hygiene should carry the smallest surface itself.
64
+
65
+ ## Scope (v1)
66
+
67
+ Implements **all ten rubric dimensions** end-to-end for **both Claude Code and Codex**: secret
68
+ protection, egress/exfiltration control, tool-surface & inbound-injection defense,
69
+ destructive-action & git safety, harness self-protection & integrity, verification gates,
70
+ subagent isolation & governance, recovery/rollback safety, memory/provenance hygiene, and
71
+ observability/audit trail (the critical gated trio is D1/D4/D5). Each harness has its own
72
+ adapter and check suite over the shared scoring engine; the bypass-aware effective floor maps
73
+ to Codex's `sandbox_mode = "danger-full-access"` + `approval_policy = "never"` just as it does
74
+ to Claude Code's `bypassPermissions`. The rubric is versioned and emitted in every report.
75
+
76
+ ## Development
77
+
78
+ ```bash
79
+ uv sync --frozen # install dev tooling from the lockfile
80
+ uv run --no-sync python -m unittest discover -s tests # tests (stdlib runner, zero extra deps)
81
+ uv run --no-sync ruff check src/ tests/ # lint
82
+ uv run --no-sync ty check src/ # type check
83
+ ```
@@ -0,0 +1,97 @@
1
+ [project]
2
+ name = "harness-scorecard"
3
+ version = "1.0.0"
4
+ description = "A read-only linter and A-F maturity grader for coding-agent harnesses (Claude Code, Codex)."
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ license-files = ["LICENSE"]
8
+ authors = [{ name = "Saagar Patel" }]
9
+ requires-python = ">=3.12"
10
+ keywords = [
11
+ "claude-code",
12
+ "codex",
13
+ "coding-agent",
14
+ "harness",
15
+ "linter",
16
+ "sarif",
17
+ "security",
18
+ "static-analysis",
19
+ "supply-chain",
20
+ ]
21
+ classifiers = [
22
+ "Development Status :: 4 - Beta",
23
+ "Environment :: Console",
24
+ "Intended Audience :: Developers",
25
+ "Operating System :: OS Independent",
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Topic :: Security",
29
+ "Topic :: Software Development :: Quality Assurance",
30
+ "Typing :: Typed",
31
+ ]
32
+ # Runtime is intentionally dependency-free: stdlib only.
33
+ # A scorer that grades supply-chain hygiene should carry the smallest
34
+ # possible dependency surface itself. Dev tooling lives in [dependency-groups].
35
+ dependencies = []
36
+
37
+ [project.urls]
38
+ Homepage = "https://github.com/saagpatel/harness-scorecard"
39
+ Repository = "https://github.com/saagpatel/harness-scorecard"
40
+ Issues = "https://github.com/saagpatel/harness-scorecard/issues"
41
+
42
+ [project.scripts]
43
+ harness-scorecard = "harness_scorecard.cli:main"
44
+
45
+ [build-system]
46
+ requires = ["uv_build>=0.11.13,<0.12.0"]
47
+ build-backend = "uv_build"
48
+
49
+ # Dev tooling is declared here but installed via an approval-gated `uv add`.
50
+ # Tests run on the stdlib (`unittest`) until pytest is approved; the suite is
51
+ # written as unittest.TestCase classes so pytest discovers it unchanged.
52
+ [dependency-groups]
53
+ dev = [{ include-group = "lint" }, { include-group = "test" }]
54
+ lint = ["ruff", "ty"]
55
+ test = ["pytest", "pytest-cov"]
56
+
57
+ [tool.ruff]
58
+ line-length = 100
59
+ target-version = "py312"
60
+
61
+ [tool.ruff.lint]
62
+ select = ["ALL"]
63
+ ignore = [
64
+ "D", # docstring style — handled case-by-case
65
+ "COM812", # trailing comma — conflicts with the formatter
66
+ "ISC001", # implicit string concat — conflicts with the formatter
67
+ "ANN401", # Any is legitimate at the raw-JSON parsing boundary
68
+ "PLR0913", # keyword-only config params (effective_block) read clearly
69
+ "TC001", # avoid TYPE_CHECKING-block ceremony in small modules
70
+ "TC002",
71
+ "TC003",
72
+ ]
73
+
74
+ [tool.ruff.lint.per-file-ignores]
75
+ # Tests use unittest.TestCase deliberately (stdlib-runnable, zero extra deps).
76
+ "tests/**" = [
77
+ "S101", # asserts are the point of tests
78
+ "PLR2004", # magic values in assertions are fine
79
+ "ANN", # test signatures need no annotations
80
+ "SLF001", # tests may touch private members
81
+ "PT009", # unittest-style assertEqual is intentional here
82
+ "PT027", # unittest-style assertRaises is intentional here
83
+ "INP001", # tests/ is intentionally not an importable package
84
+ "N802", # setUp / tearDown are the unittest API
85
+ "S108", # fixture configs use placeholder /tmp roots that are never written
86
+ ]
87
+ "src/harness_scorecard/cli.py" = ["T201"] # print is the CLI's output channel
88
+
89
+ [tool.pytest.ini_options]
90
+ testpaths = ["tests"]
91
+ addopts = ["--import-mode=importlib"]
92
+
93
+ [tool.ty.environment]
94
+ python-version = "3.12"
95
+
96
+ [tool.ty.terminal]
97
+ error-on-warning = true
@@ -0,0 +1,5 @@
1
+ """Harness Scorecard: a read-only A-F maturity grader for coding-agent harnesses."""
2
+
3
+ from harness_scorecard.models import RUBRIC_VERSION
4
+
5
+ __all__ = ["RUBRIC_VERSION"]
@@ -0,0 +1,36 @@
1
+ """Check registry: assembles every dimension's checks into one ordered list."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from harness_scorecard.checks import (
6
+ destructive_git,
7
+ egress,
8
+ observability,
9
+ provenance,
10
+ recovery,
11
+ secret_protection,
12
+ self_protection,
13
+ subagent_isolation,
14
+ tool_surface,
15
+ verification,
16
+ )
17
+ from harness_scorecard.checks.base import DIMENSIONS, Check, Dimension
18
+
19
+ # Order = dimension order in the rubric. New dimension modules append here as they land.
20
+ ALL_CHECKS: list[Check] = [
21
+ *secret_protection.CHECKS,
22
+ *egress.CHECKS,
23
+ *tool_surface.CHECKS,
24
+ *destructive_git.CHECKS,
25
+ *self_protection.CHECKS,
26
+ *verification.CHECKS,
27
+ *subagent_isolation.CHECKS,
28
+ *recovery.CHECKS,
29
+ *provenance.CHECKS,
30
+ *observability.CHECKS,
31
+ ]
32
+
33
+ # Dimensions that actually have checks this version (used to report coverage honestly).
34
+ IMPLEMENTED_DIMENSION_IDS: list[str] = list(dict.fromkeys(check.dimension for check in ALL_CHECKS))
35
+
36
+ __all__ = ["ALL_CHECKS", "DIMENSIONS", "IMPLEMENTED_DIMENSION_IDS", "Check", "Dimension"]
@@ -0,0 +1,149 @@
1
+ """Check abstraction, the dimension catalog, and the effective-enforcement helper.
2
+
3
+ A :class:`Check` pairs immutable rubric metadata (id, weight, gate status) with an
4
+ ``evaluate`` function that inspects a :class:`HarnessConfig` and returns a status. The
5
+ effective-floor helper (:func:`effective_block`) centralizes the bypass-aware rule so every
6
+ destructive-action check enforces it identically.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Callable, Iterable, Sequence
12
+ from dataclasses import dataclass, field
13
+
14
+ from harness_scorecard.discovery import HarnessConfig
15
+ from harness_scorecard.models import (
16
+ CheckResult,
17
+ Detectability,
18
+ Grade,
19
+ Severity,
20
+ Status,
21
+ )
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class Dimension:
26
+ id: str
27
+ name: str
28
+ weight: int
29
+
30
+
31
+ # The full rubric catalog (all ten dimensions). Scoring runs over whichever dimensions
32
+ # have registered checks; the rest are reported as specced-but-pending.
33
+ DIMENSIONS: dict[str, Dimension] = {
34
+ "D1": Dimension("D1", "Secret protection & credential isolation", 5),
35
+ "D2": Dimension("D2", "Egress / exfiltration control", 4),
36
+ "D3": Dimension("D3", "Tool-surface & inbound-injection defense", 4),
37
+ "D4": Dimension("D4", "Destructive-action & git safety", 5),
38
+ "D5": Dimension("D5", "Harness self-protection & integrity", 5),
39
+ "D6": Dimension("D6", "Verification gates", 3),
40
+ "D7": Dimension("D7", "Subagent isolation & governance", 3),
41
+ "D8": Dimension("D8", "Recovery / rollback safety", 2),
42
+ "D9": Dimension("D9", "Memory / provenance hygiene", 2),
43
+ "D10": Dimension("D10", "Observability / audit trail", 2),
44
+ }
45
+
46
+
47
+ @dataclass(slots=True)
48
+ class CheckOutcome:
49
+ """The mutable result of evaluating a check: status + human-readable rationale."""
50
+
51
+ status: Status
52
+ message: str
53
+ evidence: list[str] = field(default_factory=list)
54
+
55
+
56
+ def passed(message: str, evidence: Iterable[str] = ()) -> CheckOutcome:
57
+ return CheckOutcome(Status.PASS, message, list(evidence))
58
+
59
+
60
+ def partial(message: str, evidence: Iterable[str] = ()) -> CheckOutcome:
61
+ return CheckOutcome(Status.PARTIAL, message, list(evidence))
62
+
63
+
64
+ def failed(message: str, evidence: Iterable[str] = ()) -> CheckOutcome:
65
+ return CheckOutcome(Status.FAIL, message, list(evidence))
66
+
67
+
68
+ def not_applicable(message: str, evidence: Iterable[str] = ()) -> CheckOutcome:
69
+ """A check that does not apply to this harness; excluded from the dimension denominator."""
70
+ return CheckOutcome(Status.NOT_APPLICABLE, message, list(evidence))
71
+
72
+
73
+ @dataclass(frozen=True, slots=True)
74
+ class Check[ConfigT]:
75
+ """A single rubric check: metadata plus an evaluation function.
76
+
77
+ Generic over the harness config it inspects (``HarnessConfig`` for Claude Code,
78
+ ``CodexConfig`` for Codex) so the rubric metadata, ``run()``, and the dimension catalog
79
+ are shared across adapters while each check only sees the config shape it understands.
80
+ """
81
+
82
+ id: str
83
+ dimension: str
84
+ title: str
85
+ weight: int
86
+ evaluate: Callable[[ConfigT], CheckOutcome]
87
+ severity: Severity = Severity.MEDIUM
88
+ detectability: Detectability = Detectability.STATIC
89
+ is_gate: bool = False
90
+ gate_cap: Grade | None = None
91
+ remediation: str = ""
92
+
93
+ def run(self, config: ConfigT) -> CheckResult:
94
+ outcome = self.evaluate(config)
95
+ return CheckResult(
96
+ id=self.id,
97
+ dimension=self.dimension,
98
+ title=self.title,
99
+ status=outcome.status,
100
+ weight=self.weight,
101
+ message=outcome.message,
102
+ severity=self.severity,
103
+ detectability=self.detectability,
104
+ is_gate=self.is_gate,
105
+ gate_cap=self.gate_cap,
106
+ remediation=self.remediation,
107
+ evidence=outcome.evidence,
108
+ )
109
+
110
+
111
+ @dataclass(slots=True)
112
+ class EffectiveFloor:
113
+ """Whether a protection is present in the *effective* enforcement floor (rubric §3)."""
114
+
115
+ blocked: bool
116
+ sources: list[str]
117
+
118
+
119
+ def hard_deny_covers(config: HarnessConfig, token_groups: Sequence[Sequence[str]]) -> bool:
120
+ """True only if ``hard_deny`` is effective AND some rule matches an AND-group of tokens."""
121
+ if not config.hard_deny_effective:
122
+ return False
123
+ rules = [rule.lower() for rule in config.hard_deny]
124
+ return any(all(token in rule for token in group) for group in token_groups for rule in rules)
125
+
126
+
127
+ def effective_block(
128
+ config: HarnessConfig,
129
+ *,
130
+ hooks: Sequence[str] = (),
131
+ deny_needles: Sequence[str] = (),
132
+ hard_deny_tokens: Sequence[Sequence[str]] = (),
133
+ event: str = "PreToolUse",
134
+ matcher: str | None = "Bash",
135
+ ) -> EffectiveFloor:
136
+ """Resolve whether an action is blocked by the effective floor.
137
+
138
+ The floor counts a guard present if any of: a registered hook matches; a
139
+ ``permissions.deny`` entry matches; or an effective (non-bypass) ``hard_deny`` rule
140
+ matches. A ``hard_deny`` rule under bypass mode contributes nothing.
141
+ """
142
+ sources: list[str] = [
143
+ f"hook:{hook_name}" for hook_name in hooks if config.has_hook(event, hook_name, matcher)
144
+ ]
145
+ if any(config.deny_matches(needle) for needle in deny_needles):
146
+ sources.append("permissions.deny")
147
+ if hard_deny_covers(config, hard_deny_tokens):
148
+ sources.append("hard_deny")
149
+ return EffectiveFloor(blocked=bool(sources), sources=sources)
@@ -0,0 +1,148 @@
1
+ """D4 - Destructive-action & git safety.
2
+
3
+ Every block check here resolves against the *effective* enforcement floor, so a guard that
4
+ exists only in an inert ``hard_deny`` block under bypass mode scores as absent.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from harness_scorecard.checks.base import (
10
+ Check,
11
+ CheckOutcome,
12
+ effective_block,
13
+ failed,
14
+ partial,
15
+ passed,
16
+ )
17
+ from harness_scorecard.discovery import HarnessConfig
18
+ from harness_scorecard.models import Detectability, Grade, Severity
19
+
20
+
21
+ def _bypass_note(config: HarnessConfig) -> list[str]:
22
+ if config.is_bypass:
23
+ return ["defaultMode=bypassPermissions: autoMode.hard_deny is INERT"]
24
+ return []
25
+
26
+
27
+ def _check_push_to_protected_branch(config: HarnessConfig) -> CheckOutcome:
28
+ floor = effective_block(
29
+ config,
30
+ hooks=("git-safety", "git-guard", "protect-branch"),
31
+ deny_needles=("push origin main", "push origin master", "git push"),
32
+ hard_deny_tokens=(("push", "main"), ("push", "master")),
33
+ )
34
+ if floor.blocked:
35
+ return passed(
36
+ "Push to a protected branch is blocked by the effective floor.",
37
+ evidence=floor.sources,
38
+ )
39
+ return failed(
40
+ "Push to main/master is not blocked by any effective guard.",
41
+ evidence=[*_bypass_note(config), "no git-safety hook or deny entry found"],
42
+ )
43
+
44
+
45
+ def _check_catastrophic_deletion(config: HarnessConfig) -> CheckOutcome:
46
+ floor = effective_block(
47
+ config,
48
+ hooks=("block-dangerous-cmds", "defer-destructive", "dangerous"),
49
+ deny_needles=("rm -rf /", "rm -rf ~", "rm -rf /*"),
50
+ hard_deny_tokens=(("rm -rf",),),
51
+ )
52
+ if floor.blocked:
53
+ return passed("Catastrophic deletion is blocked by the effective floor.", floor.sources)
54
+ return failed(
55
+ "No effective guard against catastrophic rm -rf deletion.",
56
+ evidence=_bypass_note(config),
57
+ )
58
+
59
+
60
+ def _check_destructive_db(config: HarnessConfig) -> CheckOutcome:
61
+ floor = effective_block(
62
+ config,
63
+ hooks=("db-guard", "database-guard"),
64
+ deny_needles=(),
65
+ hard_deny_tokens=(("destructive", "db"), ("database",), ("db", "host")),
66
+ )
67
+ if floor.blocked:
68
+ return passed("Destructive DB operations are guarded.", floor.sources)
69
+ return failed(
70
+ "No effective guard against destructive DB operations on non-local hosts.",
71
+ evidence=_bypass_note(config),
72
+ )
73
+
74
+
75
+ def _check_dependency_install_gate(config: HarnessConfig) -> CheckOutcome:
76
+ if config.has_hook("PreToolUse", "confirm-token", matcher="Bash") or config.has_hook(
77
+ "PreToolUse", "lockfile-freeze", matcher="Bash"
78
+ ):
79
+ return passed("Dependency installs require a confirm-token / lockfile freeze.")
80
+ return failed("No gate on dependency installs; unvetted packages can be pulled in.")
81
+
82
+
83
+ def _check_force_push_policy(config: HarnessConfig) -> CheckOutcome:
84
+ if config.has_hook("PreToolUse", "git-safety", matcher="Bash"):
85
+ return passed("A git-safety hook covers force-push / history-rewrite.")
86
+ if any(("force" in rule.lower() or "git-safety" in rule.lower()) for rule in config.rule_files):
87
+ return partial(
88
+ "Force-push policy is documented in rules/ but not enforced by a hook.",
89
+ evidence=["advisory only"],
90
+ )
91
+ return failed("No force-push / history-rewrite guard or documented policy.")
92
+
93
+
94
+ CHECKS: list[Check] = [
95
+ Check(
96
+ id="HS-D4-01",
97
+ dimension="D4",
98
+ title="Push to protected branch effectively blocked",
99
+ weight=5,
100
+ evaluate=_check_push_to_protected_branch,
101
+ severity=Severity.CRITICAL,
102
+ is_gate=True,
103
+ gate_cap=Grade.C,
104
+ remediation=(
105
+ "Block push to main/master via a PreToolUse Bash hook or a deny entry "
106
+ "(not hard_deny alone under bypass)."
107
+ ),
108
+ ),
109
+ Check(
110
+ id="HS-D4-02",
111
+ dimension="D4",
112
+ title="Catastrophic deletion blocked",
113
+ weight=4,
114
+ evaluate=_check_catastrophic_deletion,
115
+ severity=Severity.HIGH,
116
+ remediation="Add a dangerous-command hook and deny rm -rf at shallow depth.",
117
+ ),
118
+ Check(
119
+ id="HS-D4-03",
120
+ dimension="D4",
121
+ title="Destructive DB ops on non-local hosts blocked",
122
+ weight=4,
123
+ evaluate=_check_destructive_db,
124
+ severity=Severity.HIGH,
125
+ remediation=(
126
+ "Add a PreToolUse Bash db-guard hook that blocks destructive ops on non-local hosts."
127
+ ),
128
+ ),
129
+ Check(
130
+ id="HS-D4-04",
131
+ dimension="D4",
132
+ title="Dependency-install / lockfile gate",
133
+ weight=3,
134
+ evaluate=_check_dependency_install_gate,
135
+ severity=Severity.MEDIUM,
136
+ remediation="Require a confirm-token for *-add/install, or add a lockfile-freeze guard.",
137
+ ),
138
+ Check(
139
+ id="HS-D4-05",
140
+ dimension="D4",
141
+ title="Force-push / history-rewrite policy",
142
+ weight=3,
143
+ evaluate=_check_force_push_policy,
144
+ severity=Severity.MEDIUM,
145
+ detectability=Detectability.PARTIAL,
146
+ remediation="Enforce a no-force-push policy via the git-safety hook, not docs alone.",
147
+ ),
148
+ ]