covopt 0.1.0a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- covopt-0.1.0a0/.github/workflows/check.yml +41 -0
- covopt-0.1.0a0/.github/workflows/publish.yml +32 -0
- covopt-0.1.0a0/.gitignore +2 -0
- covopt-0.1.0a0/PKG-INFO +257 -0
- covopt-0.1.0a0/README.md +239 -0
- covopt-0.1.0a0/covopt/__init__.py +0 -0
- covopt-0.1.0a0/covopt/__main__.py +35 -0
- covopt-0.1.0a0/covopt/algorithms.py +63 -0
- covopt-0.1.0a0/covopt/cli/__init__.py +0 -0
- covopt-0.1.0a0/covopt/cli/select.py +102 -0
- covopt-0.1.0a0/covopt/models.py +90 -0
- covopt-0.1.0a0/covopt/utils.py +333 -0
- covopt-0.1.0a0/pyproject.toml +84 -0
- covopt-0.1.0a0/tests/conftest.py +199 -0
- covopt-0.1.0a0/tests/test_models.py +42 -0
- covopt-0.1.0a0/tests/test_utils.py +218 -0
- covopt-0.1.0a0/uv.lock +320 -0
- covopt-0.1.0a0/vulture/allowlist.py +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: "Publish"
|
|
2
|
+
|
|
3
|
+
on: push
|
|
4
|
+
|
|
5
|
+
jobs:
|
|
6
|
+
checks:
|
|
7
|
+
runs-on: ubuntu-22.04
|
|
8
|
+
steps:
|
|
9
|
+
- name: Checkout
|
|
10
|
+
uses: actions/checkout@v6
|
|
11
|
+
- name: Install uv
|
|
12
|
+
uses: astral-sh/setup-uv@v7
|
|
13
|
+
- name: Install node
|
|
14
|
+
uses: actions/setup-node@v3
|
|
15
|
+
with:
|
|
16
|
+
node-version: 24
|
|
17
|
+
- name: build-uv-lock
|
|
18
|
+
run: uv lock --locked
|
|
19
|
+
- name: format-ruff
|
|
20
|
+
run: uv run --extra dev ruff format . --check
|
|
21
|
+
- name: format-ruff-imports
|
|
22
|
+
run: uv run --extra dev ruff check . --select I
|
|
23
|
+
- name: format-prettier
|
|
24
|
+
run: npx prettier --check `git ls-files *.md *.json *.yml *.yaml`
|
|
25
|
+
- name: lint-ruff
|
|
26
|
+
run: uv run --extra dev ruff check .
|
|
27
|
+
- name: lint-ty
|
|
28
|
+
run: |
|
|
29
|
+
uv run --extra dev ty check covopt/ \
|
|
30
|
+
--ignore unresolved-attribute \
|
|
31
|
+
--ignore invalid-argument-type
|
|
32
|
+
- name: lint-vulture
|
|
33
|
+
run: uv run --extra dev vulture covopt/ tests/ vulture/allowlist.py
|
|
34
|
+
- name: test-pytest
|
|
35
|
+
run: |
|
|
36
|
+
uv run --extra dev pytest tests/ \
|
|
37
|
+
--cov=covopt \
|
|
38
|
+
--cov-report=term-missing
|
|
39
|
+
uv run --extra dev coverage report --fail-under=50
|
|
40
|
+
- name: build-uv-build
|
|
41
|
+
run: uv build
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
name: "Publish"
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
# Publish on any tag starting with a `v`, e.g., v0.1.0
|
|
7
|
+
- v*
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
run:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
environment:
|
|
13
|
+
name: pypi
|
|
14
|
+
permissions:
|
|
15
|
+
id-token: write
|
|
16
|
+
contents: read
|
|
17
|
+
steps:
|
|
18
|
+
- name: Checkout
|
|
19
|
+
uses: actions/checkout@v6
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v7
|
|
22
|
+
- name: Install Python 3.13
|
|
23
|
+
run: uv python install 3.13
|
|
24
|
+
- name: Build
|
|
25
|
+
run: uv build
|
|
26
|
+
# Check that basic features work and we didn't miss to include crucial files
|
|
27
|
+
- name: Smoke test (wheel)
|
|
28
|
+
run: uv run --isolated --no-project --with dist/*.whl --with pytest pytest tests
|
|
29
|
+
- name: Smoke test (source distribution)
|
|
30
|
+
run: uv run --isolated --no-project --with dist/*.tar.gz --with pytest pytest tests
|
|
31
|
+
- name: Publish
|
|
32
|
+
run: uv publish
|
covopt-0.1.0a0/PKG-INFO
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: covopt
|
|
3
|
+
Version: 0.1.0a0
|
|
4
|
+
Summary: Budgeted local test selection
|
|
5
|
+
Author: Olle Lindgren
|
|
6
|
+
Requires-Python: >=3.14
|
|
7
|
+
Requires-Dist: diskcache>=5.6.3
|
|
8
|
+
Requires-Dist: numpy>=2.4.4
|
|
9
|
+
Requires-Dist: pygit2>=1.14
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: coverage>=7.6; extra == 'dev'
|
|
12
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=8.3; extra == 'dev'
|
|
14
|
+
Requires-Dist: ruff>=0.15.8; extra == 'dev'
|
|
15
|
+
Requires-Dist: ty>=0.0.26; extra == 'dev'
|
|
16
|
+
Requires-Dist: vulture; extra == 'dev'
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# covopt
|
|
20
|
+
|
|
21
|
+
`covopt` is a small command-line tool for **budgeted local test selection**.
|
|
22
|
+
|
|
23
|
+
It reads a diff from `stdin`, loads a precomputed model describing which tests are informative for which parts of the repository, and emits a subset of tests that maximizes expected signal under a runtime budget.
|
|
24
|
+
|
|
25
|
+
The intended workflow is:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
git diff | covopt select -t 10 | xargs pytest
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
This is **not** a full test runner and does **not** integrate with `pytest` directly. Its only job is to:
|
|
32
|
+
|
|
33
|
+
1. parse changed code from a diff,
|
|
34
|
+
2. map those changes into an internal repository-space representation,
|
|
35
|
+
3. score candidate tests by expected value,
|
|
36
|
+
4. penalize redundant tests,
|
|
37
|
+
5. choose a subset that fits a wall-clock budget,
|
|
38
|
+
6. print selected test IDs to `stdout`, one per line.
|
|
39
|
+
|
|
40
|
+
## Motivation
|
|
41
|
+
|
|
42
|
+
Large test suites often contain many tests that are highly correlated. For local iteration, running all tests is too expensive, while running only “affected” tests can still waste time on near-duplicates.
|
|
43
|
+
|
|
44
|
+
`covopt` treats test selection as a **budgeted optimization problem**:
|
|
45
|
+
|
|
46
|
+
- **Return**: expected defect-detection signal in changed areas
|
|
47
|
+
- **Cost**: estimated runtime of a test
|
|
48
|
+
- **Risk**: residual untested change exposure
|
|
49
|
+
- **Correlation**: redundancy between tests
|
|
50
|
+
|
|
51
|
+
The goal is to select a small, diverse, high-value subset of tests for fast feedback, while leaving full validation to CI.
|
|
52
|
+
|
|
53
|
+
## Core idea
|
|
54
|
+
|
|
55
|
+
The repository is modeled as a feature space over code regions such as files, modules, classes, or functions.
|
|
56
|
+
|
|
57
|
+
Each test has:
|
|
58
|
+
|
|
59
|
+
- a sparse **signal vector** over that space,
|
|
60
|
+
- an estimated runtime cost,
|
|
61
|
+
- optional metadata such as historical failures or flakiness.
|
|
62
|
+
|
|
63
|
+
A diff is converted into a weighted **change vector** over the same space.
|
|
64
|
+
|
|
65
|
+
Selection then solves:
|
|
66
|
+
|
|
67
|
+
- maximize coverage of changed regions,
|
|
68
|
+
- prefer tests with high marginal gain per second,
|
|
69
|
+
- avoid picking multiple tests with nearly identical signal,
|
|
70
|
+
- stop when the budget is exhausted.
|
|
71
|
+
|
|
72
|
+
In practice this is implemented as a **budgeted greedy optimizer** over a diminishing-returns objective.
|
|
73
|
+
|
|
74
|
+
## Non-goals
|
|
75
|
+
|
|
76
|
+
For the first version, this project does **not** aim to provide:
|
|
77
|
+
|
|
78
|
+
- direct `pytest` plugin support,
|
|
79
|
+
- dynamic collection of coverage data,
|
|
80
|
+
- mutation testing,
|
|
81
|
+
- distributed execution,
|
|
82
|
+
- CI orchestration,
|
|
83
|
+
- perfect safety guarantees.
|
|
84
|
+
|
|
85
|
+
This is a **local feedback accelerator**, not a replacement for the full test suite.
|
|
86
|
+
|
|
87
|
+
## CLI
|
|
88
|
+
|
|
89
|
+
### Basic usage
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
git diff | covopt select -t 10
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Prints selected test node IDs to `stdout`.
|
|
96
|
+
|
|
97
|
+
### Example
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
git diff HEAD~1 | covopt select -t 10 | xargs pytest
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Arguments
|
|
104
|
+
|
|
105
|
+
```text
|
|
106
|
+
usage: covopt select [-h] -t SECONDS [-n N] [--verbose]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
#### Optional
|
|
110
|
+
|
|
111
|
+
- `-n N`
|
|
112
|
+
Restrict optimization to the top `N` candidate tests after initial scoring.
|
|
113
|
+
|
|
114
|
+
- `--verbose`
|
|
115
|
+
Emit diagnostics to `stderr`.
|
|
116
|
+
|
|
117
|
+
## Input / output contract
|
|
118
|
+
|
|
119
|
+
### Input
|
|
120
|
+
|
|
121
|
+
`stdin` must contain a unified diff, for example from:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
git diff
|
|
125
|
+
git diff HEAD~1
|
|
126
|
+
git show <commit>
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Output
|
|
130
|
+
|
|
131
|
+
By default, the tool prints one test per line:
|
|
132
|
+
|
|
133
|
+
```text
|
|
134
|
+
tests/unit/foo/test_parser.py::test_basic_parse
|
|
135
|
+
tests/unit/bar/test_config.py::test_defaults
|
|
136
|
+
tests/integration/api/test_health.py::test_healthcheck
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
This makes it easy to pipe into `xargs pytest`.
|
|
140
|
+
|
|
141
|
+
With `--verbose`, the tool emits structured output including estimated total runtime and selection scores to stderr. Errors are always written to stderr.
|
|
142
|
+
|
|
143
|
+
## Model format
|
|
144
|
+
|
|
145
|
+
The selector consumes a model containing:
|
|
146
|
+
|
|
147
|
+
- repository feature definitions,
|
|
148
|
+
- per-test sparse signal vectors,
|
|
149
|
+
- pairwise or cluster-level redundancy information,
|
|
150
|
+
- test runtime estimates,
|
|
151
|
+
- optional weights and calibration parameters.
|
|
152
|
+
|
|
153
|
+
A minimal conceptual schema looks like this:
|
|
154
|
+
|
|
155
|
+
```json
|
|
156
|
+
{
|
|
157
|
+
"features": [
|
|
158
|
+
"pkg.module_a",
|
|
159
|
+
"pkg.module_a.fn_x",
|
|
160
|
+
"pkg.module_b",
|
|
161
|
+
"pkg.module_b.ClassY.method_z"
|
|
162
|
+
],
|
|
163
|
+
"tests": {
|
|
164
|
+
"tests/unit/test_a.py::test_one": {
|
|
165
|
+
"cost": 0.8,
|
|
166
|
+
"signal": {
|
|
167
|
+
"pkg.module_a": 0.9,
|
|
168
|
+
"pkg.module_a.fn_x": 1.0
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The model is stored in a `.covopt` file that is generated when invoking the tool.
|
|
176
|
+
|
|
177
|
+
The exact on-disk format is an implementation detail and may evolve.
|
|
178
|
+
|
|
179
|
+
## Selection algorithm
|
|
180
|
+
|
|
181
|
+
At a high level:
|
|
182
|
+
|
|
183
|
+
1. Parse the diff from `stdin`
|
|
184
|
+
2. Map changed files / hunks / symbols to repository features
|
|
185
|
+
3. Build a weighted change vector
|
|
186
|
+
4. Score candidate tests by overlap with the change vector
|
|
187
|
+
5. Iteratively pick the test with the best **marginal gain / cost**
|
|
188
|
+
6. Apply diminishing returns to already-covered regions
|
|
189
|
+
7. Penalize redundant tests using similarity
|
|
190
|
+
8. Stop when the next test would exceed the budget
|
|
191
|
+
|
|
192
|
+
A typical utility function is:
|
|
193
|
+
|
|
194
|
+
```text
|
|
195
|
+
utility(S) = sum over features j of w_j * f(sum over tests i in S of A_ij)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Where:
|
|
199
|
+
|
|
200
|
+
- `w_j` is the change weight for feature `j`
|
|
201
|
+
- `A_ij` is the signal of test `i` on feature `j`
|
|
202
|
+
- `f(...)` is a saturating function so duplicate tests add less value
|
|
203
|
+
|
|
204
|
+
This makes the selector naturally prefer diverse tests over repeated variants of the same test shape.
|
|
205
|
+
|
|
206
|
+
## Project structure
|
|
207
|
+
|
|
208
|
+
```text
|
|
209
|
+
covopt/
|
|
210
|
+
__main__.py # CLI entrypoint
|
|
211
|
+
cli.py # argv parsing and I/O orchestration
|
|
212
|
+
diff_parser.py # unified diff parsing
|
|
213
|
+
feature_space.py # repo feature mapping
|
|
214
|
+
scoring.py # initial candidate scoring
|
|
215
|
+
optimize.py # budgeted greedy selection
|
|
216
|
+
output.py # stdout / json formatting
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Exit behavior
|
|
220
|
+
|
|
221
|
+
- `0`: successful selection
|
|
222
|
+
- non-zero: invalid arguments, malformed diff, or internal failure
|
|
223
|
+
|
|
224
|
+
Diagnostics should go to `stderr`. Selected tests should go to `stdout` only.
|
|
225
|
+
|
|
226
|
+
## Design principles
|
|
227
|
+
|
|
228
|
+
- **Fast startup**: suitable for local shell pipelines
|
|
229
|
+
- **Deterministic**: same diff => same selection
|
|
230
|
+
- **Composable**: works well with Unix pipes
|
|
231
|
+
- **Model-driven**: selection logic is decoupled from model construction
|
|
232
|
+
- **Conservative stdout**: only emit test IDs unless JSON mode is requested
|
|
233
|
+
|
|
234
|
+
## Example workflow
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
git diff | covopt select -t 10 | xargs pytest
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
Full CI still runs the complete suite:
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
pytest
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Future work
|
|
247
|
+
|
|
248
|
+
- richer repository feature extraction
|
|
249
|
+
- learned test-value calibration from historical failures
|
|
250
|
+
- improved redundancy modeling
|
|
251
|
+
- coverage/model builders
|
|
252
|
+
- optional `pytest` integration
|
|
253
|
+
- support for multiple optimization strategies
|
|
254
|
+
|
|
255
|
+
## Status
|
|
256
|
+
|
|
257
|
+
Early-stage experimental project focused on the core algorithm and CLI. The first milestone is a reliable selector that can consume a diff, apply a time budget, and emit a useful non-redundant subset of tests for local development.
|
covopt-0.1.0a0/README.md
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# covopt
|
|
2
|
+
|
|
3
|
+
`covopt` is a small command-line tool for **budgeted local test selection**.
|
|
4
|
+
|
|
5
|
+
It reads a diff from `stdin`, loads a precomputed model describing which tests are informative for which parts of the repository, and emits a subset of tests that maximizes expected signal under a runtime budget.
|
|
6
|
+
|
|
7
|
+
The intended workflow is:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
git diff | covopt select -t 10 | xargs pytest
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
This is **not** a full test runner and does **not** integrate with `pytest` directly. Its only job is to:
|
|
14
|
+
|
|
15
|
+
1. parse changed code from a diff,
|
|
16
|
+
2. map those changes into an internal repository-space representation,
|
|
17
|
+
3. score candidate tests by expected value,
|
|
18
|
+
4. penalize redundant tests,
|
|
19
|
+
5. choose a subset that fits a wall-clock budget,
|
|
20
|
+
6. print selected test IDs to `stdout`, one per line.
|
|
21
|
+
|
|
22
|
+
## Motivation
|
|
23
|
+
|
|
24
|
+
Large test suites often contain many tests that are highly correlated. For local iteration, running all tests is too expensive, while running only “affected” tests can still waste time on near-duplicates.
|
|
25
|
+
|
|
26
|
+
`covopt` treats test selection as a **budgeted optimization problem**:
|
|
27
|
+
|
|
28
|
+
- **Return**: expected defect-detection signal in changed areas
|
|
29
|
+
- **Cost**: estimated runtime of a test
|
|
30
|
+
- **Risk**: residual untested change exposure
|
|
31
|
+
- **Correlation**: redundancy between tests
|
|
32
|
+
|
|
33
|
+
The goal is to select a small, diverse, high-value subset of tests for fast feedback, while leaving full validation to CI.
|
|
34
|
+
|
|
35
|
+
## Core idea
|
|
36
|
+
|
|
37
|
+
The repository is modeled as a feature space over code regions such as files, modules, classes, or functions.
|
|
38
|
+
|
|
39
|
+
Each test has:
|
|
40
|
+
|
|
41
|
+
- a sparse **signal vector** over that space,
|
|
42
|
+
- an estimated runtime cost,
|
|
43
|
+
- optional metadata such as historical failures or flakiness.
|
|
44
|
+
|
|
45
|
+
A diff is converted into a weighted **change vector** over the same space.
|
|
46
|
+
|
|
47
|
+
Selection then solves:
|
|
48
|
+
|
|
49
|
+
- maximize coverage of changed regions,
|
|
50
|
+
- prefer tests with high marginal gain per second,
|
|
51
|
+
- avoid picking multiple tests with nearly identical signal,
|
|
52
|
+
- stop when the budget is exhausted.
|
|
53
|
+
|
|
54
|
+
In practice this is implemented as a **budgeted greedy optimizer** over a diminishing-returns objective.
|
|
55
|
+
|
|
56
|
+
## Non-goals
|
|
57
|
+
|
|
58
|
+
For the first version, this project does **not** aim to provide:
|
|
59
|
+
|
|
60
|
+
- direct `pytest` plugin support,
|
|
61
|
+
- dynamic collection of coverage data,
|
|
62
|
+
- mutation testing,
|
|
63
|
+
- distributed execution,
|
|
64
|
+
- CI orchestration,
|
|
65
|
+
- perfect safety guarantees.
|
|
66
|
+
|
|
67
|
+
This is a **local feedback accelerator**, not a replacement for the full test suite.
|
|
68
|
+
|
|
69
|
+
## CLI
|
|
70
|
+
|
|
71
|
+
### Basic usage
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
git diff | covopt select -t 10
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Prints selected test node IDs to `stdout`.
|
|
78
|
+
|
|
79
|
+
### Example
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
git diff HEAD~1 | covopt select -t 10 | xargs pytest
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Arguments
|
|
86
|
+
|
|
87
|
+
```text
|
|
88
|
+
usage: covopt select [-h] -t SECONDS [-n N] [--verbose]
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
#### Optional
|
|
92
|
+
|
|
93
|
+
- `-n N`
|
|
94
|
+
Restrict optimization to the top `N` candidate tests after initial scoring.
|
|
95
|
+
|
|
96
|
+
- `--verbose`
|
|
97
|
+
Emit diagnostics to `stderr`.
|
|
98
|
+
|
|
99
|
+
## Input / output contract
|
|
100
|
+
|
|
101
|
+
### Input
|
|
102
|
+
|
|
103
|
+
`stdin` must contain a unified diff, for example from:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
git diff
|
|
107
|
+
git diff HEAD~1
|
|
108
|
+
git show <commit>
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Output
|
|
112
|
+
|
|
113
|
+
By default, the tool prints one test per line:
|
|
114
|
+
|
|
115
|
+
```text
|
|
116
|
+
tests/unit/foo/test_parser.py::test_basic_parse
|
|
117
|
+
tests/unit/bar/test_config.py::test_defaults
|
|
118
|
+
tests/integration/api/test_health.py::test_healthcheck
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
This makes it easy to pipe into `xargs pytest`.
|
|
122
|
+
|
|
123
|
+
With `--verbose`, the tool emits structured output including estimated total runtime and selection scores to stderr. Errors are always written to stderr.
|
|
124
|
+
|
|
125
|
+
## Model format
|
|
126
|
+
|
|
127
|
+
The selector consumes a model containing:
|
|
128
|
+
|
|
129
|
+
- repository feature definitions,
|
|
130
|
+
- per-test sparse signal vectors,
|
|
131
|
+
- pairwise or cluster-level redundancy information,
|
|
132
|
+
- test runtime estimates,
|
|
133
|
+
- optional weights and calibration parameters.
|
|
134
|
+
|
|
135
|
+
A minimal conceptual schema looks like this:
|
|
136
|
+
|
|
137
|
+
```json
|
|
138
|
+
{
|
|
139
|
+
"features": [
|
|
140
|
+
"pkg.module_a",
|
|
141
|
+
"pkg.module_a.fn_x",
|
|
142
|
+
"pkg.module_b",
|
|
143
|
+
"pkg.module_b.ClassY.method_z"
|
|
144
|
+
],
|
|
145
|
+
"tests": {
|
|
146
|
+
"tests/unit/test_a.py::test_one": {
|
|
147
|
+
"cost": 0.8,
|
|
148
|
+
"signal": {
|
|
149
|
+
"pkg.module_a": 0.9,
|
|
150
|
+
"pkg.module_a.fn_x": 1.0
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
The model is stored in a `.covopt` file that is generated when invoking the tool.
|
|
158
|
+
|
|
159
|
+
The exact on-disk format is an implementation detail and may evolve.
|
|
160
|
+
|
|
161
|
+
## Selection algorithm
|
|
162
|
+
|
|
163
|
+
At a high level:
|
|
164
|
+
|
|
165
|
+
1. Parse the diff from `stdin`
|
|
166
|
+
2. Map changed files / hunks / symbols to repository features
|
|
167
|
+
3. Build a weighted change vector
|
|
168
|
+
4. Score candidate tests by overlap with the change vector
|
|
169
|
+
5. Iteratively pick the test with the best **marginal gain / cost**
|
|
170
|
+
6. Apply diminishing returns to already-covered regions
|
|
171
|
+
7. Penalize redundant tests using similarity
|
|
172
|
+
8. Stop when the next test would exceed the budget
|
|
173
|
+
|
|
174
|
+
A typical utility function is:
|
|
175
|
+
|
|
176
|
+
```text
|
|
177
|
+
utility(S) = sum over features j of w_j * f(sum over tests i in S of A_ij)
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Where:
|
|
181
|
+
|
|
182
|
+
- `w_j` is the change weight for feature `j`
|
|
183
|
+
- `A_ij` is the signal of test `i` on feature `j`
|
|
184
|
+
- `f(...)` is a saturating function so duplicate tests add less value
|
|
185
|
+
|
|
186
|
+
This makes the selector naturally prefer diverse tests over repeated variants of the same test shape.
|
|
187
|
+
|
|
188
|
+
## Project structure
|
|
189
|
+
|
|
190
|
+
```text
|
|
191
|
+
covopt/
|
|
192
|
+
__main__.py # CLI entrypoint
|
|
193
|
+
cli.py # argv parsing and I/O orchestration
|
|
194
|
+
diff_parser.py # unified diff parsing
|
|
195
|
+
feature_space.py # repo feature mapping
|
|
196
|
+
scoring.py # initial candidate scoring
|
|
197
|
+
optimize.py # budgeted greedy selection
|
|
198
|
+
output.py # stdout / json formatting
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## Exit behavior
|
|
202
|
+
|
|
203
|
+
- `0`: successful selection
|
|
204
|
+
- non-zero: invalid arguments, malformed diff, or internal failure
|
|
205
|
+
|
|
206
|
+
Diagnostics should go to `stderr`. Selected tests should go to `stdout` only.
|
|
207
|
+
|
|
208
|
+
## Design principles
|
|
209
|
+
|
|
210
|
+
- **Fast startup**: suitable for local shell pipelines
|
|
211
|
+
- **Deterministic**: same diff => same selection
|
|
212
|
+
- **Composable**: works well with Unix pipes
|
|
213
|
+
- **Model-driven**: selection logic is decoupled from model construction
|
|
214
|
+
- **Conservative stdout**: only emit test IDs unless JSON mode is requested
|
|
215
|
+
|
|
216
|
+
## Example workflow
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
git diff | covopt select -t 10 | xargs pytest
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Full CI still runs the complete suite:
|
|
223
|
+
|
|
224
|
+
```bash
|
|
225
|
+
pytest
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Future work
|
|
229
|
+
|
|
230
|
+
- richer repository feature extraction
|
|
231
|
+
- learned test-value calibration from historical failures
|
|
232
|
+
- improved redundancy modeling
|
|
233
|
+
- coverage/model builders
|
|
234
|
+
- optional `pytest` integration
|
|
235
|
+
- support for multiple optimization strategies
|
|
236
|
+
|
|
237
|
+
## Status
|
|
238
|
+
|
|
239
|
+
Early-stage experimental project focused on the core algorithm and CLI. The first milestone is a reliable selector that can consume a diff, apply a time budget, and emit a useful non-redundant subset of tests for local development.
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# PYTHON_ARGCOMPLETE_OK
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import covopt.cli.select
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from collections.abc import Sequence
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main(argv: Sequence[str] = ()) -> int:
|
|
14
|
+
argv = argv or sys.argv[1:]
|
|
15
|
+
|
|
16
|
+
parser = argparse.ArgumentParser(
|
|
17
|
+
prog="covopt", description="Optimize test selection based on code changes."
|
|
18
|
+
)
|
|
19
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
20
|
+
covopt.cli.select.add_parser(subparsers)
|
|
21
|
+
|
|
22
|
+
args = parser.parse_args(argv)
|
|
23
|
+
|
|
24
|
+
match args.command:
|
|
25
|
+
case "select":
|
|
26
|
+
ctx = covopt.cli.select.Context(n=max(0, args.n), verbose=args.verbose)
|
|
27
|
+
return covopt.cli.select.main(ctx)
|
|
28
|
+
case _:
|
|
29
|
+
print(f"Invalid args: {argv}", file=sys.stderr)
|
|
30
|
+
|
|
31
|
+
return 1
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__": # pragma: no cover
|
|
35
|
+
sys.exit(main(sys.argv[1:]))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from collections.abc import Collection, Mapping
|
|
6
|
+
|
|
7
|
+
import covopt.models
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def select_tests(
|
|
11
|
+
function_edit_strengths: Mapping[covopt.models.FunctionDefinition, float],
|
|
12
|
+
test_dependency_strengths: Mapping[
|
|
13
|
+
covopt.models.FunctionDefinition, Mapping[covopt.models.FunctionDefinition, float]
|
|
14
|
+
],
|
|
15
|
+
*,
|
|
16
|
+
n: int,
|
|
17
|
+
verbose: bool,
|
|
18
|
+
) -> Collection[covopt.models.FunctionDefinition]:
|
|
19
|
+
"""Select tests to run based on edited functions and test dependencies."""
|
|
20
|
+
# Placeholder implementation: select all tests that depend on any edited function.
|
|
21
|
+
selected_tests: set[covopt.models.FunctionDefinition] = set()
|
|
22
|
+
|
|
23
|
+
# Copy the dependency strengths to avoid mutating the input.
|
|
24
|
+
test_dependency_strengths = {
|
|
25
|
+
test: dict(dependencies) for test, dependencies in test_dependency_strengths.items()
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
def score(test: covopt.models.FunctionDefinition) -> float:
|
|
29
|
+
score = 0
|
|
30
|
+
for func, strength in test_dependency_strengths[test].items():
|
|
31
|
+
score += strength * function_edit_strengths.get(func, 0)
|
|
32
|
+
|
|
33
|
+
return score
|
|
34
|
+
|
|
35
|
+
def decrease_strength(
|
|
36
|
+
test: covopt.models.FunctionDefinition,
|
|
37
|
+
strength_multiplier: float,
|
|
38
|
+
) -> None:
|
|
39
|
+
for func in test_dependency_strengths[test]:
|
|
40
|
+
test_dependency_strengths[test][func] *= strength_multiplier
|
|
41
|
+
|
|
42
|
+
remaining_tests = set(test_dependency_strengths)
|
|
43
|
+
while remaining_tests and len(selected_tests) < n:
|
|
44
|
+
best_test = max(remaining_tests, key=score)
|
|
45
|
+
|
|
46
|
+
test_score = score(best_test)
|
|
47
|
+
if test_score == 0:
|
|
48
|
+
if verbose:
|
|
49
|
+
print("No more tests with non-zero score, stopping selection.", file=sys.stderr)
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
selected_tests.add(best_test)
|
|
53
|
+
remaining_tests.remove(best_test)
|
|
54
|
+
|
|
55
|
+
if verbose:
|
|
56
|
+
print(
|
|
57
|
+
f"Selected {best_test.name} (score={test_score:.4f})",
|
|
58
|
+
file=sys.stderr,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
decrease_strength(best_test, 0.5)
|
|
62
|
+
|
|
63
|
+
return frozenset(selected_tests)
|
|
File without changes
|