difftriage 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- difftriage-0.1.0/.github/workflows/release.yml +32 -0
- difftriage-0.1.0/.gitignore +7 -0
- difftriage-0.1.0/PKG-INFO +223 -0
- difftriage-0.1.0/README.md +212 -0
- difftriage-0.1.0/pyproject.toml +22 -0
- difftriage-0.1.0/src/difftriage/__init__.py +4 -0
- difftriage-0.1.0/src/difftriage/cli.py +103 -0
- difftriage-0.1.0/src/difftriage/config.py +70 -0
- difftriage-0.1.0/src/difftriage/models.py +33 -0
- difftriage-0.1.0/src/difftriage/scoring.py +324 -0
- difftriage-0.1.0/tests/test_scoring.py +242 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: release
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
tags: ["v*"]
|
|
5
|
+
jobs:
|
|
6
|
+
build:
|
|
7
|
+
runs-on: ubuntu-latest
|
|
8
|
+
steps:
|
|
9
|
+
- uses: actions/checkout@v4
|
|
10
|
+
- uses: actions/setup-python@v5
|
|
11
|
+
with:
|
|
12
|
+
python-version: "3.11"
|
|
13
|
+
- run: |
|
|
14
|
+
python -m pip install --upgrade pip
|
|
15
|
+
pip install build
|
|
16
|
+
python -m build
|
|
17
|
+
- uses: actions/upload-artifact@v4
|
|
18
|
+
with:
|
|
19
|
+
name: dist
|
|
20
|
+
path: dist/
|
|
21
|
+
publish:
|
|
22
|
+
needs: build
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
environment: pypi
|
|
25
|
+
permissions:
|
|
26
|
+
id-token: write
|
|
27
|
+
steps:
|
|
28
|
+
- uses: actions/download-artifact@v4
|
|
29
|
+
with:
|
|
30
|
+
name: dist
|
|
31
|
+
path: dist/
|
|
32
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: difftriage
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: PR risk gate for AI-generated code: diff scoring, test impact checks, and merge policy enforcement.
|
|
5
|
+
Author: DiffTriage Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: pyyaml>=6.0
|
|
9
|
+
Requires-Dist: typer>=0.12.0
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# DiffTriage - Pro PR Risk Gating for AI-Generated Code
|
|
13
|
+
|
|
14
|
+
**difftriage** is an open-source Python library for production-grade pull request risk analysis.
|
|
15
|
+
|
|
16
|
+
It scores change risk across blast radius, behavior change, test impact, schema/security sensitivity, AI uncertainty, and observability gaps. It then enforces configurable merge policy decisions: `pass`, `review_required`, `block`, and `override_required_review`.
|
|
17
|
+
|
|
18
|
+
[](https://pypi.org/project/difftriage/)
|
|
19
|
+
[](https://pypi.org/project/difftriage/)
|
|
20
|
+
[](https://opensource.org/licenses/MIT)
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Why DiffTriage
|
|
25
|
+
|
|
26
|
+
Teams shipping more AI-assisted code often face:
|
|
27
|
+
- more regressions in core paths,
|
|
28
|
+
- slower reviews,
|
|
29
|
+
- weak signal-to-noise in large diffs.
|
|
30
|
+
|
|
31
|
+
DiffTriage provides a deterministic, explainable risk gate with rule-level scoring so teams can move fast without blind merges.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Key Features
|
|
36
|
+
|
|
37
|
+
- Multi-factor risk model (9 weighted dimensions)
|
|
38
|
+
- Explainable scoring with per-rule contributions
|
|
39
|
+
- Policy decisions: `pass` / `review_required` / `block` / `override_required_review`
|
|
40
|
+
- Confidence bands near threshold boundaries
|
|
41
|
+
- False-positive controls for docs/tests-only changes
|
|
42
|
+
- Override governance with audit-friendly policy flags
|
|
43
|
+
- CI-friendly CLI with multiple output modes
|
|
44
|
+
- GitHub + PyPI release workflows (trusted publishing ready)
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install difftriage
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Requires Python 3.10+.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
difftriage score \
|
|
62
|
+
--files-changed 12 \
|
|
63
|
+
--lines-added 420 \
|
|
64
|
+
--lines-deleted 130 \
|
|
65
|
+
--changed-path src/auth/service.py \
|
|
66
|
+
--changed-path db/migrations/001.sql \
|
|
67
|
+
--dependencies-changed \
|
|
68
|
+
--output text
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
Example result:
|
|
72
|
+
- risk score + level
|
|
73
|
+
- decision + confidence
|
|
74
|
+
- top risk drivers
|
|
75
|
+
- rule scores
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## CLI Usage
|
|
80
|
+
|
|
81
|
+
Output formats:
|
|
82
|
+
- `--output text` (default)
|
|
83
|
+
- `--output json`
|
|
84
|
+
- `--output markdown`
|
|
85
|
+
|
|
86
|
+
Exit semantics:
|
|
87
|
+
- `--exit-mode threshold`:
|
|
88
|
+
- `0` pass
|
|
89
|
+
- `2` fail
|
|
90
|
+
- `--exit-mode decision`:
|
|
91
|
+
- `0` pass
|
|
92
|
+
- `3` review required
|
|
93
|
+
- `2` block
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# JSON for automation
|
|
99
|
+
difftriage score --files-changed 8 --lines-added 140 --lines-deleted 35 --changed-path src/payments.py --output json
|
|
100
|
+
|
|
101
|
+
# Markdown for PR comments
|
|
102
|
+
difftriage score --files-changed 8 --lines-added 140 --lines-deleted 35 --changed-path src/payments.py --output markdown --exit-mode decision
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Risk Model
|
|
108
|
+
|
|
109
|
+
Default weighted dimensions:
|
|
110
|
+
|
|
111
|
+
- `blast_radius` (0.20)
|
|
112
|
+
- `behavior_change` (0.18)
|
|
113
|
+
- `test_impact` (0.16)
|
|
114
|
+
- `complexity` (0.12)
|
|
115
|
+
- `dependency_config` (0.10)
|
|
116
|
+
- `data_schema` (0.08)
|
|
117
|
+
- `security_privacy` (0.08)
|
|
118
|
+
- `ai_uncertainty` (0.05)
|
|
119
|
+
- `observability_gap` (0.03)
|
|
120
|
+
|
|
121
|
+
Scoring levels:
|
|
122
|
+
- `0-24`: low
|
|
123
|
+
- `25-49`: medium
|
|
124
|
+
- `50-74`: high
|
|
125
|
+
- `75-100`: critical
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Configuration
|
|
130
|
+
|
|
131
|
+
Create `.difftriage.yml` in repo root:
|
|
132
|
+
|
|
133
|
+
```yaml
|
|
134
|
+
threshold: 50
|
|
135
|
+
review_threshold: 50
|
|
136
|
+
block_threshold: 75
|
|
137
|
+
enable_confidence_bands: true
|
|
138
|
+
|
|
139
|
+
weights:
|
|
140
|
+
blast_radius: 0.20
|
|
141
|
+
behavior_change: 0.18
|
|
142
|
+
test_impact: 0.16
|
|
143
|
+
complexity: 0.12
|
|
144
|
+
dependency_config: 0.10
|
|
145
|
+
data_schema: 0.08
|
|
146
|
+
security_privacy: 0.08
|
|
147
|
+
ai_uncertainty: 0.05
|
|
148
|
+
observability_gap: 0.03
|
|
149
|
+
|
|
150
|
+
rules:
|
|
151
|
+
risky_path_patterns: ["auth", "billing", "migrations", ".github/workflows", "infra", "config", "permissions", "token"]
|
|
152
|
+
schema_patterns: ["migrations", "schema", "models", "ddl"]
|
|
153
|
+
security_patterns: ["auth", "token", "permission", "secret", "privacy"]
|
|
154
|
+
observability_patterns: ["log", "metrics", "trace"]
|
|
155
|
+
safe_path_patterns: ["docs/", ".md", "tests/"]
|
|
156
|
+
|
|
157
|
+
policy:
|
|
158
|
+
docs_tests_only_max_score: 18
|
|
159
|
+
safe_change_max_score: 30
|
|
160
|
+
security_block_score: 70
|
|
161
|
+
schema_block_score: 78
|
|
162
|
+
schema_review_score: 45
|
|
163
|
+
dependency_review_score: 55
|
|
164
|
+
missing_tests_review_score: 45
|
|
165
|
+
|
|
166
|
+
override:
|
|
167
|
+
require_reason_min_length: 12
|
|
168
|
+
allow_score_override: true
|
|
169
|
+
allow_schema_override: true
|
|
170
|
+
allow_dependency_override: true
|
|
171
|
+
allow_security_override: true
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## CI Integration
|
|
177
|
+
|
|
178
|
+
DiffTriage is designed for CI gates:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
difftriage score ... --output json --exit-mode decision
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Recommended policy:
|
|
185
|
+
- treat `review_required` as protected-branch reviewer gate,
|
|
186
|
+
- treat `block` as merge stop,
|
|
187
|
+
- allow audited overrides only for approved emergency paths.
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Release and Publishing
|
|
192
|
+
|
|
193
|
+
This repo includes:
|
|
194
|
+
- `.github/workflows/ci.yml` for tests + build checks
|
|
195
|
+
- `.github/workflows/release.yml` for tag-based PyPI publishing
|
|
196
|
+
|
|
197
|
+
Publish flow:
|
|
198
|
+
1. Push tag `vX.Y.Z`
|
|
199
|
+
2. Build artifacts
|
|
200
|
+
3. Publish with `pypa/gh-action-pypi-publish` via GitHub OIDC
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Local Validation
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
pip install -e . pytest build twine
|
|
208
|
+
pytest -q
|
|
209
|
+
python -m build
|
|
210
|
+
twine check dist/*
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Contributing
|
|
216
|
+
|
|
217
|
+
Issues and pull requests are welcome.
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## License
|
|
222
|
+
|
|
223
|
+
MIT - see `LICENSE`.
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# DiffTriage - Pro PR Risk Gating for AI-Generated Code
|
|
2
|
+
|
|
3
|
+
**difftriage** is an open-source Python library for production-grade pull request risk analysis.
|
|
4
|
+
|
|
5
|
+
It scores change risk across blast radius, behavior change, test impact, schema/security sensitivity, AI uncertainty, and observability gaps. It then enforces configurable merge policy decisions: `pass`, `review_required`, `block`, and `override_required_review`.
|
|
6
|
+
|
|
7
|
+
[](https://pypi.org/project/difftriage/)
|
|
8
|
+
[](https://pypi.org/project/difftriage/)
|
|
9
|
+
[](https://opensource.org/licenses/MIT)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Why DiffTriage
|
|
14
|
+
|
|
15
|
+
Teams shipping more AI-assisted code often face:
|
|
16
|
+
- more regressions in core paths,
|
|
17
|
+
- slower reviews,
|
|
18
|
+
- weak signal-to-noise in large diffs.
|
|
19
|
+
|
|
20
|
+
DiffTriage provides a deterministic, explainable risk gate with rule-level scoring so teams can move fast without blind merges.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Key Features
|
|
25
|
+
|
|
26
|
+
- Multi-factor risk model (9 weighted dimensions)
|
|
27
|
+
- Explainable scoring with per-rule contributions
|
|
28
|
+
- Policy decisions: `pass` / `review_required` / `block` / `override_required_review`
|
|
29
|
+
- Confidence bands near threshold boundaries
|
|
30
|
+
- False-positive controls for docs/tests-only changes
|
|
31
|
+
- Override governance with audit-friendly policy flags
|
|
32
|
+
- CI-friendly CLI with multiple output modes
|
|
33
|
+
- GitHub + PyPI release workflows (trusted publishing ready)
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install difftriage
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Requires Python 3.10+.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
difftriage score \
|
|
51
|
+
--files-changed 12 \
|
|
52
|
+
--lines-added 420 \
|
|
53
|
+
--lines-deleted 130 \
|
|
54
|
+
--changed-path src/auth/service.py \
|
|
55
|
+
--changed-path db/migrations/001.sql \
|
|
56
|
+
--dependencies-changed \
|
|
57
|
+
--output text
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Example result:
|
|
61
|
+
- risk score + level
|
|
62
|
+
- decision + confidence
|
|
63
|
+
- top risk drivers
|
|
64
|
+
- rule scores
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## CLI Usage
|
|
69
|
+
|
|
70
|
+
Output formats:
|
|
71
|
+
- `--output text` (default)
|
|
72
|
+
- `--output json`
|
|
73
|
+
- `--output markdown`
|
|
74
|
+
|
|
75
|
+
Exit semantics:
|
|
76
|
+
- `--exit-mode threshold`:
|
|
77
|
+
- `0` pass
|
|
78
|
+
- `2` fail
|
|
79
|
+
- `--exit-mode decision`:
|
|
80
|
+
- `0` pass
|
|
81
|
+
- `3` review required
|
|
82
|
+
- `2` block
|
|
83
|
+
|
|
84
|
+
Examples:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# JSON for automation
|
|
88
|
+
difftriage score --files-changed 8 --lines-added 140 --lines-deleted 35 --changed-path src/payments.py --output json
|
|
89
|
+
|
|
90
|
+
# Markdown for PR comments
|
|
91
|
+
difftriage score --files-changed 8 --lines-added 140 --lines-deleted 35 --changed-path src/payments.py --output markdown --exit-mode decision
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Risk Model
|
|
97
|
+
|
|
98
|
+
Default weighted dimensions:
|
|
99
|
+
|
|
100
|
+
- `blast_radius` (0.20)
|
|
101
|
+
- `behavior_change` (0.18)
|
|
102
|
+
- `test_impact` (0.16)
|
|
103
|
+
- `complexity` (0.12)
|
|
104
|
+
- `dependency_config` (0.10)
|
|
105
|
+
- `data_schema` (0.08)
|
|
106
|
+
- `security_privacy` (0.08)
|
|
107
|
+
- `ai_uncertainty` (0.05)
|
|
108
|
+
- `observability_gap` (0.03)
|
|
109
|
+
|
|
110
|
+
Scoring levels:
|
|
111
|
+
- `0-24`: low
|
|
112
|
+
- `25-49`: medium
|
|
113
|
+
- `50-74`: high
|
|
114
|
+
- `75-100`: critical
|
|
115
|
+
|
|
116
|
+
---
|
|
117
|
+
|
|
118
|
+
## Configuration
|
|
119
|
+
|
|
120
|
+
Create `.difftriage.yml` in repo root:
|
|
121
|
+
|
|
122
|
+
```yaml
|
|
123
|
+
threshold: 50
|
|
124
|
+
review_threshold: 50
|
|
125
|
+
block_threshold: 75
|
|
126
|
+
enable_confidence_bands: true
|
|
127
|
+
|
|
128
|
+
weights:
|
|
129
|
+
blast_radius: 0.20
|
|
130
|
+
behavior_change: 0.18
|
|
131
|
+
test_impact: 0.16
|
|
132
|
+
complexity: 0.12
|
|
133
|
+
dependency_config: 0.10
|
|
134
|
+
data_schema: 0.08
|
|
135
|
+
security_privacy: 0.08
|
|
136
|
+
ai_uncertainty: 0.05
|
|
137
|
+
observability_gap: 0.03
|
|
138
|
+
|
|
139
|
+
rules:
|
|
140
|
+
risky_path_patterns: ["auth", "billing", "migrations", ".github/workflows", "infra", "config", "permissions", "token"]
|
|
141
|
+
schema_patterns: ["migrations", "schema", "models", "ddl"]
|
|
142
|
+
security_patterns: ["auth", "token", "permission", "secret", "privacy"]
|
|
143
|
+
observability_patterns: ["log", "metrics", "trace"]
|
|
144
|
+
safe_path_patterns: ["docs/", ".md", "tests/"]
|
|
145
|
+
|
|
146
|
+
policy:
|
|
147
|
+
docs_tests_only_max_score: 18
|
|
148
|
+
safe_change_max_score: 30
|
|
149
|
+
security_block_score: 70
|
|
150
|
+
schema_block_score: 78
|
|
151
|
+
schema_review_score: 45
|
|
152
|
+
dependency_review_score: 55
|
|
153
|
+
missing_tests_review_score: 45
|
|
154
|
+
|
|
155
|
+
override:
|
|
156
|
+
require_reason_min_length: 12
|
|
157
|
+
allow_score_override: true
|
|
158
|
+
allow_schema_override: true
|
|
159
|
+
allow_dependency_override: true
|
|
160
|
+
allow_security_override: true
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## CI Integration
|
|
166
|
+
|
|
167
|
+
DiffTriage is designed for CI gates:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
difftriage score ... --output json --exit-mode decision
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Recommended policy:
|
|
174
|
+
- treat `review_required` as protected-branch reviewer gate,
|
|
175
|
+
- treat `block` as merge stop,
|
|
176
|
+
- allow audited overrides only for approved emergency paths.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Release and Publishing
|
|
181
|
+
|
|
182
|
+
This repo includes:
|
|
183
|
+
- `.github/workflows/ci.yml` for tests + build checks
|
|
184
|
+
- `.github/workflows/release.yml` for tag-based PyPI publishing
|
|
185
|
+
|
|
186
|
+
Publish flow:
|
|
187
|
+
1. Push tag `vX.Y.Z`
|
|
188
|
+
2. Build artifacts
|
|
189
|
+
3. Publish with `pypa/gh-action-pypi-publish` via GitHub OIDC
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## Local Validation
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
pip install -e . pytest build twine
|
|
197
|
+
pytest -q
|
|
198
|
+
python -m build
|
|
199
|
+
twine check dist/*
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Contributing
|
|
205
|
+
|
|
206
|
+
Issues and pull requests are welcome.
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
MIT - see `LICENSE`.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.26.0"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "difftriage"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "PR risk gate for AI-generated code: diff scoring, test impact checks, and merge policy enforcement."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "DiffTriage Contributors" }]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"pyyaml>=6.0",
|
|
15
|
+
"typer>=0.12.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
difftriage = "difftriage.cli:app"
|
|
20
|
+
|
|
21
|
+
[tool.pytest.ini_options]
|
|
22
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import asdict
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from difftriage.models import RiskInput
|
|
8
|
+
from difftriage.scoring import score_pr
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(no_args_is_help=True)
|
|
11
|
+
|
|
12
|
+
OutputFormat = Literal["text", "json", "markdown"]
|
|
13
|
+
ExitMode = Literal["threshold", "decision"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _render_text(report: object) -> str:
|
|
17
|
+
lines = [
|
|
18
|
+
(
|
|
19
|
+
f"risk_score={report.score} level={report.level} threshold={report.threshold} "
|
|
20
|
+
f"passed={report.passed} decision={report.decision} confidence={report.confidence}"
|
|
21
|
+
),
|
|
22
|
+
"top_drivers:",
|
|
23
|
+
]
|
|
24
|
+
for driver in report.drivers:
|
|
25
|
+
lines.append(f"- {driver}")
|
|
26
|
+
lines.append("rule_scores:")
|
|
27
|
+
for rule, val in sorted(report.rule_scores.items(), key=lambda item: item[1], reverse=True):
|
|
28
|
+
lines.append(f"- {rule}: {val}")
|
|
29
|
+
return "\n".join(lines)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _render_markdown(report: object) -> str:
|
|
33
|
+
lines = [
|
|
34
|
+
"| Metric | Value |",
|
|
35
|
+
"|---|---|",
|
|
36
|
+
f"| Risk Score | {report.score} |",
|
|
37
|
+
f"| Risk Level | {report.level} |",
|
|
38
|
+
f"| Threshold | {report.threshold} |",
|
|
39
|
+
f"| Passed | {report.passed} |",
|
|
40
|
+
f"| Decision | {report.decision} |",
|
|
41
|
+
f"| Confidence | {report.confidence} |",
|
|
42
|
+
"",
|
|
43
|
+
"### Top Drivers",
|
|
44
|
+
]
|
|
45
|
+
for driver in report.drivers:
|
|
46
|
+
lines.append(f"- {driver}")
|
|
47
|
+
lines.extend(["", "### Rule Scores", "", "| Rule | Score |", "|---|---:|"])
|
|
48
|
+
for rule, val in sorted(report.rule_scores.items(), key=lambda item: item[1], reverse=True):
|
|
49
|
+
lines.append(f"| `{rule}` | {val} |")
|
|
50
|
+
return "\n".join(lines)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _exit_code(report: object, exit_mode: ExitMode) -> int:
|
|
54
|
+
if exit_mode == "threshold":
|
|
55
|
+
return 0 if report.passed else 2
|
|
56
|
+
|
|
57
|
+
if report.decision == "pass":
|
|
58
|
+
return 0
|
|
59
|
+
if report.decision in {"review_required", "override_required_review"}:
|
|
60
|
+
return 3
|
|
61
|
+
return 2
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@app.command()
|
|
65
|
+
def score(
|
|
66
|
+
files_changed: int = typer.Option(...),
|
|
67
|
+
lines_added: int = typer.Option(...),
|
|
68
|
+
lines_deleted: int = typer.Option(...),
|
|
69
|
+
changed_path: list[str] = typer.Option([]),
|
|
70
|
+
tests_changed: bool = typer.Option(False),
|
|
71
|
+
dependencies_changed: bool = typer.Option(False),
|
|
72
|
+
ai_uncertainty_hits: int = typer.Option(0),
|
|
73
|
+
config: str = typer.Option(".difftriage.yml"),
|
|
74
|
+
threshold: float | None = typer.Option(None),
|
|
75
|
+
output: OutputFormat = typer.Option("text"),
|
|
76
|
+
exit_mode: ExitMode = typer.Option("threshold"),
|
|
77
|
+
) -> None:
|
|
78
|
+
report = score_pr(
|
|
79
|
+
RiskInput(
|
|
80
|
+
files_changed=files_changed,
|
|
81
|
+
lines_added=lines_added,
|
|
82
|
+
lines_deleted=lines_deleted,
|
|
83
|
+
changed_paths=changed_path,
|
|
84
|
+
tests_changed=tests_changed,
|
|
85
|
+
dependencies_changed=dependencies_changed,
|
|
86
|
+
ai_uncertainty_hits=ai_uncertainty_hits,
|
|
87
|
+
),
|
|
88
|
+
config_path=config,
|
|
89
|
+
threshold_override=threshold,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if output == "json":
|
|
93
|
+
typer.echo(json.dumps(asdict(report), indent=2))
|
|
94
|
+
elif output == "markdown":
|
|
95
|
+
typer.echo(_render_markdown(report))
|
|
96
|
+
else:
|
|
97
|
+
typer.echo(_render_text(report))
|
|
98
|
+
|
|
99
|
+
raise typer.Exit(code=_exit_code(report, exit_mode))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == "__main__":
|
|
103
|
+
app()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
DEFAULT_CONFIG: dict[str, Any] = {
|
|
8
|
+
"threshold": 50,
|
|
9
|
+
"block_threshold": 75,
|
|
10
|
+
"review_threshold": 50,
|
|
11
|
+
"enable_confidence_bands": True,
|
|
12
|
+
"weights": {
|
|
13
|
+
"blast_radius": 0.20,
|
|
14
|
+
"behavior_change": 0.18,
|
|
15
|
+
"test_impact": 0.16,
|
|
16
|
+
"complexity": 0.12,
|
|
17
|
+
"dependency_config": 0.10,
|
|
18
|
+
"data_schema": 0.08,
|
|
19
|
+
"security_privacy": 0.08,
|
|
20
|
+
"ai_uncertainty": 0.05,
|
|
21
|
+
"observability_gap": 0.03,
|
|
22
|
+
},
|
|
23
|
+
"rules": {
|
|
24
|
+
"risky_path_patterns": ["auth", "billing", "migrations", ".github/workflows", "infra", "config", "permissions", "token"],
|
|
25
|
+
"schema_patterns": ["migrations", "schema", "models", "ddl"],
|
|
26
|
+
"security_patterns": ["auth", "token", "permission", "secret", "privacy"],
|
|
27
|
+
"observability_patterns": ["log", "metrics", "trace"],
|
|
28
|
+
"safe_path_patterns": ["docs/", ".md", "tests/"],
|
|
29
|
+
"docs_path_patterns": ["docs/", ".md", ".rst", ".txt", "readme", "changelog", "license"],
|
|
30
|
+
"test_path_patterns": ["tests/", "test_", "_test.", ".spec.", ".test."],
|
|
31
|
+
},
|
|
32
|
+
"policy": {
|
|
33
|
+
"docs_tests_only_max_score": 18,
|
|
34
|
+
"docs_tests_only_review_threshold": 70,
|
|
35
|
+
"safe_change_max_score": 30,
|
|
36
|
+
"security_block_score": 70,
|
|
37
|
+
"schema_block_score": 78,
|
|
38
|
+
"schema_review_score": 45,
|
|
39
|
+
"dependency_review_score": 55,
|
|
40
|
+
"missing_tests_review_score": 45,
|
|
41
|
+
},
|
|
42
|
+
"override": {
|
|
43
|
+
"require_reason_min_length": 12,
|
|
44
|
+
"allow_score_override": True,
|
|
45
|
+
"allow_schema_override": True,
|
|
46
|
+
"allow_dependency_override": True,
|
|
47
|
+
"allow_security_override": True,
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _deep_merge(base: dict[str, Any], updates: dict[str, Any]) -> dict[str, Any]:
|
|
53
|
+
merged = deepcopy(base)
|
|
54
|
+
for key, value in updates.items():
|
|
55
|
+
if isinstance(value, dict) and isinstance(merged.get(key), dict):
|
|
56
|
+
merged[key] = _deep_merge(merged[key], value)
|
|
57
|
+
else:
|
|
58
|
+
merged[key] = value
|
|
59
|
+
return merged
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def load_config(path: str | None) -> dict[str, Any]:
|
|
63
|
+
if not path:
|
|
64
|
+
return deepcopy(DEFAULT_CONFIG)
|
|
65
|
+
try:
|
|
66
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
67
|
+
data = yaml.safe_load(f) or {}
|
|
68
|
+
except FileNotFoundError:
|
|
69
|
+
return deepcopy(DEFAULT_CONFIG)
|
|
70
|
+
return _deep_merge(DEFAULT_CONFIG, data)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class RiskInput:
|
|
7
|
+
files_changed: int
|
|
8
|
+
lines_added: int
|
|
9
|
+
lines_deleted: int
|
|
10
|
+
changed_paths: list[str] = field(default_factory=list)
|
|
11
|
+
tests_changed: bool = False
|
|
12
|
+
dependencies_changed: bool = False
|
|
13
|
+
ai_uncertainty_hits: int = 0
|
|
14
|
+
ai_generated_ratio: float = 0.0
|
|
15
|
+
override_approved: bool = False
|
|
16
|
+
override_reason: str = ""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class RiskReport:
|
|
21
|
+
score: float
|
|
22
|
+
threshold: float
|
|
23
|
+
passed: bool
|
|
24
|
+
level: str
|
|
25
|
+
drivers: list[str]
|
|
26
|
+
decision: str
|
|
27
|
+
confidence: str
|
|
28
|
+
rule_scores: dict[str, float]
|
|
29
|
+
review_threshold: float = 50.0
|
|
30
|
+
block_threshold: float = 75.0
|
|
31
|
+
rule_breakdown: list[dict[str, Any]] = field(default_factory=list)
|
|
32
|
+
policy_flags: list[str] = field(default_factory=list)
|
|
33
|
+
override_gates: dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from difftriage.config import load_config
|
|
6
|
+
from difftriage.models import RiskInput, RiskReport
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _clamp(value: float, low: float = 0.0, high: float = 100.0) -> float:
|
|
10
|
+
return max(low, min(high, value))
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _risk_level(score: float) -> str:
|
|
14
|
+
if score >= 75:
|
|
15
|
+
return "critical"
|
|
16
|
+
if score >= 50:
|
|
17
|
+
return "high"
|
|
18
|
+
if score >= 25:
|
|
19
|
+
return "medium"
|
|
20
|
+
return "low"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _confidence(score: float, review_threshold: float, block_threshold: float) -> str:
|
|
24
|
+
# Scores near enforcement edges are inherently less certain.
|
|
25
|
+
band = 5.0
|
|
26
|
+
if abs(score - review_threshold) <= band or abs(score - block_threshold) <= band:
|
|
27
|
+
return "medium"
|
|
28
|
+
return "high"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _matches(path: str, patterns: list[str]) -> bool:
|
|
32
|
+
return any(pattern.lower() in path for pattern in patterns)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _path_profile(paths: list[str], rules: dict[str, Any], files_changed: int) -> dict[str, Any]:
|
|
36
|
+
lowered = [p.replace("\\", "/").lower() for p in paths]
|
|
37
|
+
file_count = max(files_changed, len(lowered), 1)
|
|
38
|
+
docs_patterns = rules["docs_path_patterns"]
|
|
39
|
+
test_patterns = rules["test_path_patterns"]
|
|
40
|
+
safe_patterns = rules["safe_path_patterns"]
|
|
41
|
+
|
|
42
|
+
docs = [p for p in lowered if _matches(p, docs_patterns)]
|
|
43
|
+
tests = [p for p in lowered if _matches(p, test_patterns)]
|
|
44
|
+
safe = [p for p in lowered if _matches(p, safe_patterns) or p in docs or p in tests]
|
|
45
|
+
risky = [p for p in lowered if _matches(p, rules["risky_path_patterns"])]
|
|
46
|
+
schema = [p for p in lowered if _matches(p, rules["schema_patterns"])]
|
|
47
|
+
security = [p for p in lowered if _matches(p, rules["security_patterns"])]
|
|
48
|
+
observability = [p for p in lowered if _matches(p, rules["observability_patterns"])]
|
|
49
|
+
|
|
50
|
+
docs_or_tests_only = bool(lowered) and len(set(docs + tests + safe)) == len(set(lowered))
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
"paths": lowered,
|
|
54
|
+
"file_count": file_count,
|
|
55
|
+
"docs_count": len(set(docs)),
|
|
56
|
+
"tests_count": len(set(tests)),
|
|
57
|
+
"safe_count": len(set(safe)),
|
|
58
|
+
"non_safe_count": max(0, file_count - len(set(safe))),
|
|
59
|
+
"risky": risky,
|
|
60
|
+
"schema": schema,
|
|
61
|
+
"security": security,
|
|
62
|
+
"observability": observability,
|
|
63
|
+
"docs_or_tests_only": docs_or_tests_only,
|
|
64
|
+
"risky_ratio": len(set(risky)) / file_count,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _breakdown_item(
|
|
69
|
+
name: str,
|
|
70
|
+
score: float,
|
|
71
|
+
weight: float,
|
|
72
|
+
rationale: str,
|
|
73
|
+
evidence: list[str] | None = None,
|
|
74
|
+
) -> dict[str, Any]:
|
|
75
|
+
return {
|
|
76
|
+
"rule": name,
|
|
77
|
+
"score": round(score, 2),
|
|
78
|
+
"weight": weight,
|
|
79
|
+
"contribution": round(score * weight, 2),
|
|
80
|
+
"rationale": rationale,
|
|
81
|
+
"evidence": evidence or [],
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _has_valid_override(risk_input: RiskInput, override_config: dict[str, Any]) -> bool:
|
|
86
|
+
reason = risk_input.override_reason.strip()
|
|
87
|
+
min_length = int(override_config.get("require_reason_min_length", 12))
|
|
88
|
+
return risk_input.override_approved and len(reason) >= min_length
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _override_satisfies(reason: str, risk_input: RiskInput, override_config: dict[str, Any]) -> bool:
|
|
92
|
+
if not risk_input.override_approved:
|
|
93
|
+
return False
|
|
94
|
+
if reason == "schema":
|
|
95
|
+
return bool(override_config.get("allow_schema_override", True))
|
|
96
|
+
if reason == "security":
|
|
97
|
+
return bool(override_config.get("allow_security_override", True)) and _has_valid_override(risk_input, override_config)
|
|
98
|
+
if reason == "score":
|
|
99
|
+
return bool(override_config.get("allow_score_override", True)) and _has_valid_override(risk_input, override_config)
|
|
100
|
+
if reason == "dependency":
|
|
101
|
+
return bool(override_config.get("allow_dependency_override", True)) and _has_valid_override(risk_input, override_config)
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def score_pr(risk_input: RiskInput, config_path: str | None = None, threshold_override: float | None = None) -> RiskReport:
|
|
106
|
+
config = load_config(config_path)
|
|
107
|
+
weights = config["weights"]
|
|
108
|
+
rules = config["rules"]
|
|
109
|
+
policy = config["policy"]
|
|
110
|
+
override_config = config["override"]
|
|
111
|
+
|
|
112
|
+
profile = _path_profile(risk_input.changed_paths, rules, risk_input.files_changed)
|
|
113
|
+
file_count = profile["file_count"]
|
|
114
|
+
non_safe_files = profile["non_safe_count"]
|
|
115
|
+
churn = risk_input.lines_added + risk_input.lines_deleted
|
|
116
|
+
docs_tests_only = profile["docs_or_tests_only"]
|
|
117
|
+
|
|
118
|
+
code_file_factor = 0.15 if docs_tests_only else 1.0
|
|
119
|
+
churn_factor = 0.12 if docs_tests_only else 1.0
|
|
120
|
+
|
|
121
|
+
blast_radius = _clamp((non_safe_files * 7) + ((churn * churn_factor) / 28))
|
|
122
|
+
behavior_change = _clamp((non_safe_files * 6) + ((risk_input.lines_added * churn_factor) / 32))
|
|
123
|
+
complexity = _clamp((file_count * 3.5 * code_file_factor) + ((churn * churn_factor) / 24))
|
|
124
|
+
|
|
125
|
+
schema_hits = len(set(profile["schema"]))
|
|
126
|
+
security_hits = len(set(profile["security"]))
|
|
127
|
+
observability_hits = len(set(profile["observability"]))
|
|
128
|
+
risky_ratio = profile["risky_ratio"]
|
|
129
|
+
|
|
130
|
+
missing_tests = not risk_input.tests_changed and not docs_tests_only
|
|
131
|
+
test_impact = 0.0 if not missing_tests else _clamp(32 + (complexity * 0.45) + (risky_ratio * 22))
|
|
132
|
+
dependency_config = 65.0 if risk_input.dependencies_changed else 0.0
|
|
133
|
+
data_schema = _clamp(schema_hits * 32)
|
|
134
|
+
security_privacy = _clamp(security_hits * 28)
|
|
135
|
+
ai_uncertainty = _clamp((risk_input.ai_uncertainty_hits * 12) + (risk_input.ai_generated_ratio * 25))
|
|
136
|
+
observability_gap = 0.0 if observability_hits > 0 or docs_tests_only else _clamp((risky_ratio * 35) + (8 if non_safe_files > 4 else 0))
|
|
137
|
+
|
|
138
|
+
rule_scores = {
|
|
139
|
+
"blast_radius": round(blast_radius, 2),
|
|
140
|
+
"behavior_change": round(behavior_change, 2),
|
|
141
|
+
"test_impact": round(test_impact, 2),
|
|
142
|
+
"complexity": round(complexity, 2),
|
|
143
|
+
"dependency_config": round(dependency_config, 2),
|
|
144
|
+
"data_schema": round(data_schema, 2),
|
|
145
|
+
"security_privacy": round(security_privacy, 2),
|
|
146
|
+
"ai_uncertainty": round(ai_uncertainty, 2),
|
|
147
|
+
"observability_gap": round(observability_gap, 2),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
raw_score = sum(rule_scores[name] * weights[name] for name in weights)
|
|
151
|
+
score_cap = None
|
|
152
|
+
policy_flags: list[str] = []
|
|
153
|
+
if docs_tests_only:
|
|
154
|
+
score_cap = float(policy["docs_tests_only_max_score"])
|
|
155
|
+
policy_flags.append("docs_tests_only_low_false_positive_cap")
|
|
156
|
+
elif profile["safe_count"] and profile["safe_count"] >= file_count * 0.75:
|
|
157
|
+
score_cap = float(policy["safe_change_max_score"])
|
|
158
|
+
policy_flags.append("mostly_safe_paths_score_cap")
|
|
159
|
+
|
|
160
|
+
score = min(raw_score, score_cap) if score_cap is not None else raw_score
|
|
161
|
+
|
|
162
|
+
threshold = threshold_override if threshold_override is not None else float(config["threshold"])
|
|
163
|
+
review_threshold = float(config["review_threshold"])
|
|
164
|
+
block_threshold = float(config["block_threshold"])
|
|
165
|
+
|
|
166
|
+
rule_breakdown = [
|
|
167
|
+
_breakdown_item(
|
|
168
|
+
"blast_radius",
|
|
169
|
+
blast_radius,
|
|
170
|
+
weights["blast_radius"],
|
|
171
|
+
f"{non_safe_files} non-safe file(s), {churn} total changed line(s)",
|
|
172
|
+
),
|
|
173
|
+
_breakdown_item(
|
|
174
|
+
"behavior_change",
|
|
175
|
+
behavior_change,
|
|
176
|
+
weights["behavior_change"],
|
|
177
|
+
f"{risk_input.lines_added} added line(s) across implementation-sensitive paths",
|
|
178
|
+
),
|
|
179
|
+
_breakdown_item(
|
|
180
|
+
"test_impact",
|
|
181
|
+
test_impact,
|
|
182
|
+
weights["test_impact"],
|
|
183
|
+
"Tests changed or docs/tests-only change" if not missing_tests else "Implementation change without test changes",
|
|
184
|
+
),
|
|
185
|
+
_breakdown_item(
|
|
186
|
+
"complexity",
|
|
187
|
+
complexity,
|
|
188
|
+
weights["complexity"],
|
|
189
|
+
f"{file_count} file(s), churn factor adjusted for docs/tests-only safety",
|
|
190
|
+
),
|
|
191
|
+
_breakdown_item(
|
|
192
|
+
"dependency_config",
|
|
193
|
+
dependency_config,
|
|
194
|
+
weights["dependency_config"],
|
|
195
|
+
"Dependency/config change declared" if risk_input.dependencies_changed else "No dependency change declared",
|
|
196
|
+
),
|
|
197
|
+
_breakdown_item(
|
|
198
|
+
"data_schema",
|
|
199
|
+
data_schema,
|
|
200
|
+
weights["data_schema"],
|
|
201
|
+
f"{schema_hits} schema-sensitive path hit(s)",
|
|
202
|
+
profile["schema"][:5],
|
|
203
|
+
),
|
|
204
|
+
_breakdown_item(
|
|
205
|
+
"security_privacy",
|
|
206
|
+
security_privacy,
|
|
207
|
+
weights["security_privacy"],
|
|
208
|
+
f"{security_hits} security/privacy path hit(s)",
|
|
209
|
+
profile["security"][:5],
|
|
210
|
+
),
|
|
211
|
+
_breakdown_item(
|
|
212
|
+
"ai_uncertainty",
|
|
213
|
+
ai_uncertainty,
|
|
214
|
+
weights["ai_uncertainty"],
|
|
215
|
+
f"{risk_input.ai_uncertainty_hits} uncertainty hit(s), generated ratio {risk_input.ai_generated_ratio}",
|
|
216
|
+
),
|
|
217
|
+
_breakdown_item(
|
|
218
|
+
"observability_gap",
|
|
219
|
+
observability_gap,
|
|
220
|
+
weights["observability_gap"],
|
|
221
|
+
"Risky code path has observability coverage" if observability_gap == 0 else "Risky code path without observability signal",
|
|
222
|
+
),
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
if score_cap is not None and raw_score > score:
|
|
226
|
+
policy_flags.append(f"score_capped_from_{round(raw_score, 2)}_to_{round(score, 2)}")
|
|
227
|
+
|
|
228
|
+
hard_blocks: list[str] = []
|
|
229
|
+
review_reasons: list[str] = []
|
|
230
|
+
overrideable_reasons: list[str] = []
|
|
231
|
+
|
|
232
|
+
if score >= block_threshold:
|
|
233
|
+
hard_blocks.append("score_at_or_above_block_threshold")
|
|
234
|
+
overrideable_reasons.append("score")
|
|
235
|
+
elif score >= review_threshold:
|
|
236
|
+
review_reasons.append("score_at_or_above_review_threshold")
|
|
237
|
+
|
|
238
|
+
if security_privacy >= float(policy["security_block_score"]):
|
|
239
|
+
hard_blocks.append("security_privacy_gate")
|
|
240
|
+
if bool(override_config.get("allow_security_override", False)):
|
|
241
|
+
overrideable_reasons.append("security")
|
|
242
|
+
if data_schema >= float(policy["schema_block_score"]):
|
|
243
|
+
hard_blocks.append("schema_gate")
|
|
244
|
+
if bool(override_config.get("allow_schema_override", True)):
|
|
245
|
+
overrideable_reasons.append("schema")
|
|
246
|
+
elif data_schema >= float(policy["schema_review_score"]):
|
|
247
|
+
review_reasons.append("schema_review_gate")
|
|
248
|
+
if dependency_config >= float(policy["dependency_review_score"]):
|
|
249
|
+
review_reasons.append("dependency_or_config_review_gate")
|
|
250
|
+
if test_impact >= float(policy["missing_tests_review_score"]):
|
|
251
|
+
review_reasons.append("missing_tests_review_gate")
|
|
252
|
+
|
|
253
|
+
valid_override = _has_valid_override(risk_input, override_config)
|
|
254
|
+
block_reason_map = {
|
|
255
|
+
"score_at_or_above_block_threshold": "score",
|
|
256
|
+
"schema_gate": "schema",
|
|
257
|
+
"security_privacy_gate": "security",
|
|
258
|
+
}
|
|
259
|
+
non_overrideable_blocks = [
|
|
260
|
+
reason
|
|
261
|
+
for reason in hard_blocks
|
|
262
|
+
if block_reason_map.get(reason) not in overrideable_reasons
|
|
263
|
+
]
|
|
264
|
+
unsatisfied_override_blocks = [
|
|
265
|
+
reason
|
|
266
|
+
for reason in hard_blocks
|
|
267
|
+
if reason not in non_overrideable_blocks and not _override_satisfies(block_reason_map[reason], risk_input, override_config)
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
if hard_blocks and not non_overrideable_blocks and not unsatisfied_override_blocks:
|
|
271
|
+
if hard_blocks == ["schema_gate"] and not review_reasons and score < review_threshold:
|
|
272
|
+
decision = "pass"
|
|
273
|
+
policy_flags.append("valid_schema_override_downgraded_block_to_pass")
|
|
274
|
+
else:
|
|
275
|
+
decision = "override_required_review"
|
|
276
|
+
policy_flags.append("valid_override_downgraded_block_to_review")
|
|
277
|
+
elif hard_blocks:
|
|
278
|
+
decision = "block"
|
|
279
|
+
elif review_reasons:
|
|
280
|
+
decision = "review_required"
|
|
281
|
+
else:
|
|
282
|
+
decision = "pass"
|
|
283
|
+
|
|
284
|
+
if hard_blocks:
|
|
285
|
+
policy_flags.extend(hard_blocks)
|
|
286
|
+
if review_reasons:
|
|
287
|
+
policy_flags.extend(review_reasons)
|
|
288
|
+
if risk_input.override_approved and unsatisfied_override_blocks:
|
|
289
|
+
policy_flags.append("override_missing_required_reason")
|
|
290
|
+
|
|
291
|
+
level = _risk_level(score)
|
|
292
|
+
conf = _confidence(score, review_threshold, block_threshold) if config.get("enable_confidence_bands", True) else "high"
|
|
293
|
+
|
|
294
|
+
drivers = []
|
|
295
|
+
for item in sorted(rule_breakdown, key=lambda x: x["contribution"], reverse=True)[:4]:
|
|
296
|
+
if item["score"] > 0:
|
|
297
|
+
drivers.append(f"{item['rule']}: {item['score']} ({item['rationale']})")
|
|
298
|
+
if not drivers:
|
|
299
|
+
drivers.append("No major risk driver detected")
|
|
300
|
+
|
|
301
|
+
passed = decision == "pass" and score < threshold
|
|
302
|
+
|
|
303
|
+
return RiskReport(
|
|
304
|
+
score=round(score, 2),
|
|
305
|
+
threshold=threshold,
|
|
306
|
+
passed=passed,
|
|
307
|
+
level=level,
|
|
308
|
+
drivers=drivers,
|
|
309
|
+
decision=decision,
|
|
310
|
+
confidence=conf,
|
|
311
|
+
rule_scores=rule_scores,
|
|
312
|
+
review_threshold=review_threshold,
|
|
313
|
+
block_threshold=block_threshold,
|
|
314
|
+
rule_breakdown=rule_breakdown,
|
|
315
|
+
policy_flags=policy_flags,
|
|
316
|
+
override_gates={
|
|
317
|
+
"override_requested": risk_input.override_approved,
|
|
318
|
+
"override_reason_provided": bool(risk_input.override_reason.strip()),
|
|
319
|
+
"override_valid": valid_override,
|
|
320
|
+
"overrideable_reasons": sorted(set(overrideable_reasons)),
|
|
321
|
+
"non_overrideable_blocks": non_overrideable_blocks,
|
|
322
|
+
"unsatisfied_override_blocks": unsatisfied_override_blocks,
|
|
323
|
+
},
|
|
324
|
+
)
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
from difftriage.models import RiskInput
|
|
2
|
+
from difftriage.scoring import score_pr
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def test_high_risk_without_tests_blocks() -> None:
|
|
6
|
+
report = score_pr(
|
|
7
|
+
RiskInput(
|
|
8
|
+
files_changed=20,
|
|
9
|
+
lines_added=800,
|
|
10
|
+
lines_deleted=200,
|
|
11
|
+
changed_paths=["src/auth/service.py", "src/billing/charge.py", "db/migrations/001.sql", ".github/workflows/ci.yml"],
|
|
12
|
+
tests_changed=False,
|
|
13
|
+
dependencies_changed=True,
|
|
14
|
+
ai_uncertainty_hits=2,
|
|
15
|
+
ai_generated_ratio=0.7,
|
|
16
|
+
),
|
|
17
|
+
threshold_override=50,
|
|
18
|
+
)
|
|
19
|
+
assert report.decision == "block"
|
|
20
|
+
assert report.passed is False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_low_risk_with_tests_passes() -> None:
|
|
24
|
+
report = score_pr(
|
|
25
|
+
RiskInput(
|
|
26
|
+
files_changed=2,
|
|
27
|
+
lines_added=20,
|
|
28
|
+
lines_deleted=4,
|
|
29
|
+
changed_paths=["src/utils/format.py", "tests/test_format.py"],
|
|
30
|
+
tests_changed=True,
|
|
31
|
+
),
|
|
32
|
+
threshold_override=50,
|
|
33
|
+
)
|
|
34
|
+
assert report.decision == "pass"
|
|
35
|
+
assert report.passed is True
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_docs_only_reduces_false_positive() -> None:
|
|
39
|
+
report = score_pr(
|
|
40
|
+
RiskInput(
|
|
41
|
+
files_changed=3,
|
|
42
|
+
lines_added=120,
|
|
43
|
+
lines_deleted=30,
|
|
44
|
+
changed_paths=["docs/guide.md", "README.md", "docs/usage.md"],
|
|
45
|
+
tests_changed=False,
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
assert report.level in {"low", "medium"}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_override_changes_block_to_review_required() -> None:
|
|
52
|
+
base = RiskInput(
|
|
53
|
+
files_changed=12,
|
|
54
|
+
lines_added=400,
|
|
55
|
+
lines_deleted=120,
|
|
56
|
+
changed_paths=["src/auth/token.py", "db/schema.sql"],
|
|
57
|
+
tests_changed=False,
|
|
58
|
+
)
|
|
59
|
+
blocked = score_pr(base)
|
|
60
|
+
assert blocked.decision in {"block", "review_required"}
|
|
61
|
+
|
|
62
|
+
override = score_pr(
|
|
63
|
+
RiskInput(**{**base.__dict__, "override_approved": True, "override_reason": "approved by code owners"})
|
|
64
|
+
)
|
|
65
|
+
assert override.decision in {"override_required_review", "review_required", "pass"}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_decision_review_required_at_exact_review_threshold_and_confidence_band() -> None:
|
|
69
|
+
# Force a deterministic score of exactly 50.0 via clamping of core rule scores.
|
|
70
|
+
# With default config, review_threshold is 50.
|
|
71
|
+
report = score_pr(
|
|
72
|
+
RiskInput(
|
|
73
|
+
files_changed=1,
|
|
74
|
+
lines_added=4000,
|
|
75
|
+
lines_deleted=0,
|
|
76
|
+
changed_paths=["src/core/worker.py"],
|
|
77
|
+
tests_changed=True, # keep test_impact at 0 for determinism
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
assert report.score == 50.0
|
|
81
|
+
assert report.decision == "review_required"
|
|
82
|
+
assert report.passed is False
|
|
83
|
+
# Confidence band is +/- 5 around the review_threshold.
|
|
84
|
+
assert report.confidence == "medium"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_threshold_override_only_affects_passed_not_decision() -> None:
|
|
88
|
+
# Craft a pass decision with a stable score around ~43, then tighten only the pass threshold.
|
|
89
|
+
base = score_pr(
|
|
90
|
+
RiskInput(
|
|
91
|
+
files_changed=1,
|
|
92
|
+
lines_added=2500,
|
|
93
|
+
lines_deleted=0,
|
|
94
|
+
changed_paths=["src/core/worker.py"],
|
|
95
|
+
tests_changed=True,
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
assert base.decision == "pass"
|
|
99
|
+
assert base.passed is True
|
|
100
|
+
assert base.score > 40.0
|
|
101
|
+
|
|
102
|
+
tightened = score_pr(
|
|
103
|
+
RiskInput(
|
|
104
|
+
files_changed=1,
|
|
105
|
+
lines_added=2500,
|
|
106
|
+
lines_deleted=0,
|
|
107
|
+
changed_paths=["src/core/worker.py"],
|
|
108
|
+
tests_changed=True,
|
|
109
|
+
),
|
|
110
|
+
threshold_override=40,
|
|
111
|
+
)
|
|
112
|
+
assert tightened.decision == "pass"
|
|
113
|
+
assert tightened.score == base.score
|
|
114
|
+
assert tightened.threshold == 40
|
|
115
|
+
assert tightened.passed is False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def test_security_privacy_hard_block_and_override_required_review() -> None:
|
|
119
|
+
# security_privacy = 22 * security_hits; 4 "auth" paths -> 88 -> hard block.
|
|
120
|
+
blocked = score_pr(
|
|
121
|
+
RiskInput(
|
|
122
|
+
files_changed=4,
|
|
123
|
+
lines_added=0,
|
|
124
|
+
lines_deleted=0,
|
|
125
|
+
changed_paths=[
|
|
126
|
+
"src/auth/a.py",
|
|
127
|
+
"src/auth/b.py",
|
|
128
|
+
"src/auth/c.py",
|
|
129
|
+
"src/auth/d.py",
|
|
130
|
+
],
|
|
131
|
+
tests_changed=True,
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
assert blocked.rule_scores["security_privacy"] >= 70
|
|
135
|
+
assert blocked.decision == "block"
|
|
136
|
+
assert blocked.passed is False
|
|
137
|
+
|
|
138
|
+
overridden = score_pr(
|
|
139
|
+
RiskInput(
|
|
140
|
+
files_changed=4,
|
|
141
|
+
lines_added=0,
|
|
142
|
+
lines_deleted=0,
|
|
143
|
+
changed_paths=[
|
|
144
|
+
"src/auth/a.py",
|
|
145
|
+
"src/auth/b.py",
|
|
146
|
+
"src/auth/c.py",
|
|
147
|
+
"src/auth/d.py",
|
|
148
|
+
],
|
|
149
|
+
tests_changed=True,
|
|
150
|
+
override_approved=True,
|
|
151
|
+
override_reason="Emergency fix; follow-up review required",
|
|
152
|
+
)
|
|
153
|
+
)
|
|
154
|
+
assert overridden.rule_scores["security_privacy"] >= 70
|
|
155
|
+
assert overridden.decision == "override_required_review"
|
|
156
|
+
assert overridden.passed is False
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def test_schema_hard_block_can_be_avoided_with_override_approval() -> None:
|
|
160
|
+
# Schema hits can hard-block, but a valid schema override can downgrade a schema-only block
|
|
161
|
+
# all the way to a pass when nothing else triggers review and the score is below the review threshold.
|
|
162
|
+
no_override = score_pr(
|
|
163
|
+
RiskInput(
|
|
164
|
+
files_changed=3,
|
|
165
|
+
lines_added=0,
|
|
166
|
+
lines_deleted=0,
|
|
167
|
+
changed_paths=["migrations/001.sql", "migrations/002.sql", "migrations/003.sql"],
|
|
168
|
+
tests_changed=True,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
assert no_override.rule_scores["data_schema"] >= 70
|
|
172
|
+
assert no_override.decision == "block"
|
|
173
|
+
assert no_override.passed is False
|
|
174
|
+
|
|
175
|
+
approved = score_pr(
|
|
176
|
+
RiskInput(
|
|
177
|
+
files_changed=3,
|
|
178
|
+
lines_added=0,
|
|
179
|
+
lines_deleted=0,
|
|
180
|
+
changed_paths=["migrations/001.sql", "migrations/002.sql", "migrations/003.sql"],
|
|
181
|
+
tests_changed=True,
|
|
182
|
+
override_approved=True,
|
|
183
|
+
override_reason="Approved for emergency rollout",
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
assert approved.rule_scores["data_schema"] >= 70
|
|
187
|
+
assert approved.decision == "pass"
|
|
188
|
+
assert approved.passed is True
|
|
189
|
+
assert "valid_schema_override_downgraded_block_to_pass" in approved.policy_flags
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_confidence_bands_can_be_disabled_via_config_injection(monkeypatch) -> None:
|
|
193
|
+
# load_config() does not currently merge enable_confidence_bands from file,
|
|
194
|
+
# so cover this branch by injecting a config with bands disabled.
|
|
195
|
+
import difftriage.scoring as scoring
|
|
196
|
+
|
|
197
|
+
real_load = scoring.load_config
|
|
198
|
+
|
|
199
|
+
def fake_load_config(_path):
|
|
200
|
+
cfg = real_load(None)
|
|
201
|
+
cfg["enable_confidence_bands"] = False
|
|
202
|
+
return cfg
|
|
203
|
+
|
|
204
|
+
monkeypatch.setattr(scoring, "load_config", fake_load_config)
|
|
205
|
+
|
|
206
|
+
report = scoring.score_pr(
|
|
207
|
+
RiskInput(
|
|
208
|
+
files_changed=1,
|
|
209
|
+
lines_added=4000,
|
|
210
|
+
lines_deleted=0,
|
|
211
|
+
changed_paths=["src/core/worker.py"],
|
|
212
|
+
tests_changed=True,
|
|
213
|
+
)
|
|
214
|
+
)
|
|
215
|
+
assert report.score == 50.0
|
|
216
|
+
assert report.confidence == "high"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def test_safe_paths_reduce_false_positive_risk() -> None:
|
|
220
|
+
# Same churn and file_count, but safe paths reduce non_safe_files -> lower blast_radius/behavior_change -> lower score.
|
|
221
|
+
non_safe = score_pr(
|
|
222
|
+
RiskInput(
|
|
223
|
+
files_changed=3,
|
|
224
|
+
lines_added=300,
|
|
225
|
+
lines_deleted=0,
|
|
226
|
+
changed_paths=["src/core/a.py", "src/core/b.py", "src/core/c.py"],
|
|
227
|
+
tests_changed=False,
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
safe = score_pr(
|
|
231
|
+
RiskInput(
|
|
232
|
+
files_changed=3,
|
|
233
|
+
lines_added=300,
|
|
234
|
+
lines_deleted=0,
|
|
235
|
+
changed_paths=["DOCS/a.MD", "docs/b.md", "docs/c.md"], # case-insensitive safe hit
|
|
236
|
+
tests_changed=False,
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
assert safe.rule_scores["blast_radius"] < non_safe.rule_scores["blast_radius"]
|
|
241
|
+
assert safe.rule_scores["behavior_change"] < non_safe.rule_scores["behavior_change"]
|
|
242
|
+
assert safe.score < non_safe.score
|