covopt 0.1.0a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ name: "Publish"
2
+
3
+ on: push
4
+
5
+ jobs:
6
+ checks:
7
+ runs-on: ubuntu-22.04
8
+ steps:
9
+ - name: Checkout
10
+ uses: actions/checkout@v6
11
+ - name: Install uv
12
+ uses: astral-sh/setup-uv@v7
13
+ - name: Install node
14
+ uses: actions/setup-node@v3
15
+ with:
16
+ node-version: 24
17
+ - name: build-uv-lock
18
+ run: uv lock --locked
19
+ - name: format-ruff
20
+ run: uv run --extra dev ruff format . --check
21
+ - name: format-ruff-imports
22
+ run: uv run --extra dev ruff check . --select I
23
+ - name: format-prettier
24
+ run: npx prettier --check `git ls-files *.md *.json *.yml *.yaml`
25
+ - name: lint-ruff
26
+ run: uv run --extra dev ruff check .
27
+ - name: lint-ty
28
+ run: |
29
+ uv run --extra dev ty check covopt/ \
30
+ --ignore unresolved-attribute \
31
+ --ignore invalid-argument-type
32
+ - name: lint-vulture
33
+ run: uv run --extra dev vulture covopt/ tests/ vulture/allowlist.py
34
+ - name: test-pytest
35
+ run: |
36
+ uv run --extra dev pytest tests/ \
37
+ --cov=covopt \
38
+ --cov-report=term-missing
39
+ uv run --extra dev coverage report --fail-under=50
40
+ - name: build-uv-build
41
+ run: uv build
@@ -0,0 +1,32 @@
1
+ name: "Publish"
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ # Publish on any tag starting with a `v`, e.g., v0.1.0
7
+ - v*
8
+
9
+ jobs:
10
+ run:
11
+ runs-on: ubuntu-latest
12
+ environment:
13
+ name: pypi
14
+ permissions:
15
+ id-token: write
16
+ contents: read
17
+ steps:
18
+ - name: Checkout
19
+ uses: actions/checkout@v6
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v7
22
+ - name: Install Python 3.13
23
+ run: uv python install 3.13
24
+ - name: Build
25
+ run: uv build
26
+ # Check that basic features work and we didn't miss to include crucial files
27
+ - name: Smoke test (wheel)
28
+ run: uv run --isolated --no-project --with dist/*.whl --with pytest pytest tests
29
+ - name: Smoke test (source distribution)
30
+ run: uv run --isolated --no-project --with dist/*.tar.gz --with pytest pytest tests
31
+ - name: Publish
32
+ run: uv publish
@@ -0,0 +1,2 @@
1
+ covopt.egg-info
2
+ build
@@ -0,0 +1,257 @@
1
+ Metadata-Version: 2.4
2
+ Name: covopt
3
+ Version: 0.1.0a0
4
+ Summary: Budgeted local test selection
5
+ Author: Olle Lindgren
6
+ Requires-Python: >=3.14
7
+ Requires-Dist: diskcache>=5.6.3
8
+ Requires-Dist: numpy>=2.4.4
9
+ Requires-Dist: pygit2>=1.14
10
+ Provides-Extra: dev
11
+ Requires-Dist: coverage>=7.6; extra == 'dev'
12
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
13
+ Requires-Dist: pytest>=8.3; extra == 'dev'
14
+ Requires-Dist: ruff>=0.15.8; extra == 'dev'
15
+ Requires-Dist: ty>=0.0.26; extra == 'dev'
16
+ Requires-Dist: vulture; extra == 'dev'
17
+ Description-Content-Type: text/markdown
18
+
19
+ # covopt
20
+
21
+ `covopt` is a small command-line tool for **budgeted local test selection**.
22
+
23
+ It reads a diff from `stdin`, loads a precomputed model describing which tests are informative for which parts of the repository, and emits a subset of tests that maximizes expected signal under a runtime budget.
24
+
25
+ The intended workflow is:
26
+
27
+ ```bash
28
+ git diff | covopt select -t 10 | xargs pytest
29
+ ```
30
+
31
+ This is **not** a full test runner and does **not** integrate with `pytest` directly. Its only job is to:
32
+
33
+ 1. parse changed code from a diff,
34
+ 2. map those changes into an internal repository-space representation,
35
+ 3. score candidate tests by expected value,
36
+ 4. penalize redundant tests,
37
+ 5. choose a subset that fits a wall-clock budget,
38
+ 6. print selected test IDs to `stdout`, one per line.
39
+
40
+ ## Motivation
41
+
42
+ Large test suites often contain many tests that are highly correlated. For local iteration, running all tests is too expensive, while running only “affected” tests can still waste time on near-duplicates.
43
+
44
+ `covopt` treats test selection as a **budgeted optimization problem**:
45
+
46
+ - **Return**: expected defect-detection signal in changed areas
47
+ - **Cost**: estimated runtime of a test
48
+ - **Risk**: residual untested change exposure
49
+ - **Correlation**: redundancy between tests
50
+
51
+ The goal is to select a small, diverse, high-value subset of tests for fast feedback, while leaving full validation to CI.
52
+
53
+ ## Core idea
54
+
55
+ The repository is modeled as a feature space over code regions such as files, modules, classes, or functions.
56
+
57
+ Each test has:
58
+
59
+ - a sparse **signal vector** over that space,
60
+ - an estimated runtime cost,
61
+ - optional metadata such as historical failures or flakiness.
62
+
63
+ A diff is converted into a weighted **change vector** over the same space.
64
+
65
+ Selection then solves:
66
+
67
+ - maximize coverage of changed regions,
68
+ - prefer tests with high marginal gain per second,
69
+ - avoid picking multiple tests with nearly identical signal,
70
+ - stop when the budget is exhausted.
71
+
72
+ In practice this is implemented as a **budgeted greedy optimizer** over a diminishing-returns objective.
73
+
74
+ ## Non-goals
75
+
76
+ For the first version, this project does **not** aim to provide:
77
+
78
+ - direct `pytest` plugin support,
79
+ - dynamic collection of coverage data,
80
+ - mutation testing,
81
+ - distributed execution,
82
+ - CI orchestration,
83
+ - perfect safety guarantees.
84
+
85
+ This is a **local feedback accelerator**, not a replacement for the full test suite.
86
+
87
+ ## CLI
88
+
89
+ ### Basic usage
90
+
91
+ ```bash
92
+ git diff | covopt select -t 10
93
+ ```
94
+
95
+ Prints selected test node IDs to `stdout`.
96
+
97
+ ### Example
98
+
99
+ ```bash
100
+ git diff HEAD~1 | covopt select -t 10 | xargs pytest
101
+ ```
102
+
103
+ ### Arguments
104
+
105
+ ```text
106
+ usage: covopt select [-h] -t SECONDS [-n N] [--verbose]
107
+ ```
108
+
109
+ #### Optional
110
+
111
+ - `-n N`
112
+ Restrict optimization to the top `N` candidate tests after initial scoring.
113
+
114
+ - `--verbose`
115
+ Emit diagnostics to `stderr`.
116
+
117
+ ## Input / output contract
118
+
119
+ ### Input
120
+
121
+ `stdin` must contain a unified diff, for example from:
122
+
123
+ ```bash
124
+ git diff
125
+ git diff HEAD~1
126
+ git show <commit>
127
+ ```
128
+
129
+ ### Output
130
+
131
+ By default, the tool prints one test per line:
132
+
133
+ ```text
134
+ tests/unit/foo/test_parser.py::test_basic_parse
135
+ tests/unit/bar/test_config.py::test_defaults
136
+ tests/integration/api/test_health.py::test_healthcheck
137
+ ```
138
+
139
+ This makes it easy to pipe into `xargs pytest`.
140
+
141
+ With `--verbose`, the tool emits structured output including estimated total runtime and selection scores to stderr. Errors are always written to stderr.
142
+
143
+ ## Model format
144
+
145
+ The selector consumes a model containing:
146
+
147
+ - repository feature definitions,
148
+ - per-test sparse signal vectors,
149
+ - pairwise or cluster-level redundancy information,
150
+ - test runtime estimates,
151
+ - optional weights and calibration parameters.
152
+
153
+ A minimal conceptual schema looks like this:
154
+
155
+ ```json
156
+ {
157
+ "features": [
158
+ "pkg.module_a",
159
+ "pkg.module_a.fn_x",
160
+ "pkg.module_b",
161
+ "pkg.module_b.ClassY.method_z"
162
+ ],
163
+ "tests": {
164
+ "tests/unit/test_a.py::test_one": {
165
+ "cost": 0.8,
166
+ "signal": {
167
+ "pkg.module_a": 0.9,
168
+ "pkg.module_a.fn_x": 1.0
169
+ }
170
+ }
171
+ }
172
+ }
173
+ ```
174
+
175
+ The model is stored in a `.covopt` file that is generated when invoking the tool.
176
+
177
+ The exact on-disk format is an implementation detail and may evolve.
178
+
179
+ ## Selection algorithm
180
+
181
+ At a high level:
182
+
183
+ 1. Parse the diff from `stdin`
184
+ 2. Map changed files / hunks / symbols to repository features
185
+ 3. Build a weighted change vector
186
+ 4. Score candidate tests by overlap with the change vector
187
+ 5. Iteratively pick the test with the best **marginal gain / cost**
188
+ 6. Apply diminishing returns to already-covered regions
189
+ 7. Penalize redundant tests using similarity
190
+ 8. Stop when the next test would exceed the budget
191
+
192
+ A typical utility function is:
193
+
194
+ ```text
195
+ utility(S) = sum over features j of w_j * f(sum over tests i in S of A_ij)
196
+ ```
197
+
198
+ Where:
199
+
200
+ - `w_j` is the change weight for feature `j`
201
+ - `A_ij` is the signal of test `i` on feature `j`
202
+ - `f(...)` is a saturating function so duplicate tests add less value
203
+
204
+ This makes the selector naturally prefer diverse tests over repeated variants of the same test shape.
205
+
206
+ ## Project structure
207
+
208
+ ```text
209
+ covopt/
210
+ __main__.py # CLI entrypoint
211
+ cli.py # argv parsing and I/O orchestration
212
+ diff_parser.py # unified diff parsing
213
+ feature_space.py # repo feature mapping
214
+ scoring.py # initial candidate scoring
215
+ optimize.py # budgeted greedy selection
216
+ output.py # stdout / json formatting
217
+ ```
218
+
219
+ ## Exit behavior
220
+
221
+ - `0`: successful selection
222
+ - non-zero: invalid arguments, malformed diff, or internal failure
223
+
224
+ Diagnostics should go to `stderr`. Selected tests should go to `stdout` only.
225
+
226
+ ## Design principles
227
+
228
+ - **Fast startup**: suitable for local shell pipelines
229
+ - **Deterministic**: same diff => same selection
230
+ - **Composable**: works well with Unix pipes
231
+ - **Model-driven**: selection logic is decoupled from model construction
232
+ - **Conservative stdout**: only emit test IDs unless JSON mode is requested
233
+
234
+ ## Example workflow
235
+
236
+ ```bash
237
+ git diff | covopt select -t 10 | xargs pytest
238
+ ```
239
+
240
+ Full CI still runs the complete suite:
241
+
242
+ ```bash
243
+ pytest
244
+ ```
245
+
246
+ ## Future work
247
+
248
+ - richer repository feature extraction
249
+ - learned test-value calibration from historical failures
250
+ - improved redundancy modeling
251
+ - coverage/model builders
252
+ - optional `pytest` integration
253
+ - support for multiple optimization strategies
254
+
255
+ ## Status
256
+
257
+ Early-stage experimental project focused on the core algorithm and CLI. The first milestone is a reliable selector that can consume a diff, apply a time budget, and emit a useful non-redundant subset of tests for local development.
@@ -0,0 +1,239 @@
1
+ # covopt
2
+
3
+ `covopt` is a small command-line tool for **budgeted local test selection**.
4
+
5
+ It reads a diff from `stdin`, loads a precomputed model describing which tests are informative for which parts of the repository, and emits a subset of tests that maximizes expected signal under a runtime budget.
6
+
7
+ The intended workflow is:
8
+
9
+ ```bash
10
+ git diff | covopt select -t 10 | xargs pytest
11
+ ```
12
+
13
+ This is **not** a full test runner and does **not** integrate with `pytest` directly. Its only job is to:
14
+
15
+ 1. parse changed code from a diff,
16
+ 2. map those changes into an internal repository-space representation,
17
+ 3. score candidate tests by expected value,
18
+ 4. penalize redundant tests,
19
+ 5. choose a subset that fits a wall-clock budget,
20
+ 6. print selected test IDs to `stdout`, one per line.
21
+
22
+ ## Motivation
23
+
24
+ Large test suites often contain many tests that are highly correlated. For local iteration, running all tests is too expensive, while running only “affected” tests can still waste time on near-duplicates.
25
+
26
+ `covopt` treats test selection as a **budgeted optimization problem**:
27
+
28
+ - **Return**: expected defect-detection signal in changed areas
29
+ - **Cost**: estimated runtime of a test
30
+ - **Risk**: residual untested change exposure
31
+ - **Correlation**: redundancy between tests
32
+
33
+ The goal is to select a small, diverse, high-value subset of tests for fast feedback, while leaving full validation to CI.
34
+
35
+ ## Core idea
36
+
37
+ The repository is modeled as a feature space over code regions such as files, modules, classes, or functions.
38
+
39
+ Each test has:
40
+
41
+ - a sparse **signal vector** over that space,
42
+ - an estimated runtime cost,
43
+ - optional metadata such as historical failures or flakiness.
44
+
45
+ A diff is converted into a weighted **change vector** over the same space.
46
+
47
+ Selection then solves:
48
+
49
+ - maximize coverage of changed regions,
50
+ - prefer tests with high marginal gain per second,
51
+ - avoid picking multiple tests with nearly identical signal,
52
+ - stop when the budget is exhausted.
53
+
54
+ In practice this is implemented as a **budgeted greedy optimizer** over a diminishing-returns objective.
55
+
56
+ ## Non-goals
57
+
58
+ For the first version, this project does **not** aim to provide:
59
+
60
+ - direct `pytest` plugin support,
61
+ - dynamic collection of coverage data,
62
+ - mutation testing,
63
+ - distributed execution,
64
+ - CI orchestration,
65
+ - perfect safety guarantees.
66
+
67
+ This is a **local feedback accelerator**, not a replacement for the full test suite.
68
+
69
+ ## CLI
70
+
71
+ ### Basic usage
72
+
73
+ ```bash
74
+ git diff | covopt select -t 10
75
+ ```
76
+
77
+ Prints selected test node IDs to `stdout`.
78
+
79
+ ### Example
80
+
81
+ ```bash
82
+ git diff HEAD~1 | covopt select -t 10 | xargs pytest
83
+ ```
84
+
85
+ ### Arguments
86
+
87
+ ```text
88
+ usage: covopt select [-h] -t SECONDS [-n N] [--verbose]
89
+ ```
90
+
91
+ #### Optional
92
+
93
+ - `-n N`
94
+ Restrict optimization to the top `N` candidate tests after initial scoring.
95
+
96
+ - `--verbose`
97
+ Emit diagnostics to `stderr`.
98
+
99
+ ## Input / output contract
100
+
101
+ ### Input
102
+
103
+ `stdin` must contain a unified diff, for example from:
104
+
105
+ ```bash
106
+ git diff
107
+ git diff HEAD~1
108
+ git show <commit>
109
+ ```
110
+
111
+ ### Output
112
+
113
+ By default, the tool prints one test per line:
114
+
115
+ ```text
116
+ tests/unit/foo/test_parser.py::test_basic_parse
117
+ tests/unit/bar/test_config.py::test_defaults
118
+ tests/integration/api/test_health.py::test_healthcheck
119
+ ```
120
+
121
+ This makes it easy to pipe into `xargs pytest`.
122
+
123
+ With `--verbose`, the tool emits structured output including estimated total runtime and selection scores to stderr. Errors are always written to stderr.
124
+
125
+ ## Model format
126
+
127
+ The selector consumes a model containing:
128
+
129
+ - repository feature definitions,
130
+ - per-test sparse signal vectors,
131
+ - pairwise or cluster-level redundancy information,
132
+ - test runtime estimates,
133
+ - optional weights and calibration parameters.
134
+
135
+ A minimal conceptual schema looks like this:
136
+
137
+ ```json
138
+ {
139
+ "features": [
140
+ "pkg.module_a",
141
+ "pkg.module_a.fn_x",
142
+ "pkg.module_b",
143
+ "pkg.module_b.ClassY.method_z"
144
+ ],
145
+ "tests": {
146
+ "tests/unit/test_a.py::test_one": {
147
+ "cost": 0.8,
148
+ "signal": {
149
+ "pkg.module_a": 0.9,
150
+ "pkg.module_a.fn_x": 1.0
151
+ }
152
+ }
153
+ }
154
+ }
155
+ ```
156
+
157
+ The model is stored in a `.covopt` file that is generated when invoking the tool.
158
+
159
+ The exact on-disk format is an implementation detail and may evolve.
160
+
161
+ ## Selection algorithm
162
+
163
+ At a high level:
164
+
165
+ 1. Parse the diff from `stdin`
166
+ 2. Map changed files / hunks / symbols to repository features
167
+ 3. Build a weighted change vector
168
+ 4. Score candidate tests by overlap with the change vector
169
+ 5. Iteratively pick the test with the best **marginal gain / cost**
170
+ 6. Apply diminishing returns to already-covered regions
171
+ 7. Penalize redundant tests using similarity
172
+ 8. Stop when the next test would exceed the budget
173
+
174
+ A typical utility function is:
175
+
176
+ ```text
177
+ utility(S) = sum over features j of w_j * f(sum over tests i in S of A_ij)
178
+ ```
179
+
180
+ Where:
181
+
182
+ - `w_j` is the change weight for feature `j`
183
+ - `A_ij` is the signal of test `i` on feature `j`
184
+ - `f(...)` is a saturating function so duplicate tests add less value
185
+
186
+ This makes the selector naturally prefer diverse tests over repeated variants of the same test shape.
187
+
188
+ ## Project structure
189
+
190
+ ```text
191
+ covopt/
192
+ __main__.py # CLI entrypoint
193
+ cli.py # argv parsing and I/O orchestration
194
+ diff_parser.py # unified diff parsing
195
+ feature_space.py # repo feature mapping
196
+ scoring.py # initial candidate scoring
197
+ optimize.py # budgeted greedy selection
198
+ output.py # stdout / json formatting
199
+ ```
200
+
201
+ ## Exit behavior
202
+
203
+ - `0`: successful selection
204
+ - non-zero: invalid arguments, malformed diff, or internal failure
205
+
206
+ Diagnostics should go to `stderr`. Selected tests should go to `stdout` only.
207
+
208
+ ## Design principles
209
+
210
+ - **Fast startup**: suitable for local shell pipelines
211
+ - **Deterministic**: same diff => same selection
212
+ - **Composable**: works well with Unix pipes
213
+ - **Model-driven**: selection logic is decoupled from model construction
214
+ - **Conservative stdout**: only emit test IDs unless JSON mode is requested
215
+
216
+ ## Example workflow
217
+
218
+ ```bash
219
+ git diff | covopt select -t 10 | xargs pytest
220
+ ```
221
+
222
+ Full CI still runs the complete suite:
223
+
224
+ ```bash
225
+ pytest
226
+ ```
227
+
228
+ ## Future work
229
+
230
+ - richer repository feature extraction
231
+ - learned test-value calibration from historical failures
232
+ - improved redundancy modeling
233
+ - coverage/model builders
234
+ - optional `pytest` integration
235
+ - support for multiple optimization strategies
236
+
237
+ ## Status
238
+
239
+ Early-stage experimental project focused on the core algorithm and CLI. The first milestone is a reliable selector that can consume a diff, apply a time budget, and emit a useful non-redundant subset of tests for local development.
File without changes
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env python3
2
+ # PYTHON_ARGCOMPLETE_OK
3
+ import argparse
4
+ import sys
5
+ from typing import TYPE_CHECKING
6
+
7
+ import covopt.cli.select
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Sequence
11
+
12
+
13
+ def main(argv: Sequence[str] = ()) -> int:
14
+ argv = argv or sys.argv[1:]
15
+
16
+ parser = argparse.ArgumentParser(
17
+ prog="covopt", description="Optimize test selection based on code changes."
18
+ )
19
+ subparsers = parser.add_subparsers(dest="command", required=True)
20
+ covopt.cli.select.add_parser(subparsers)
21
+
22
+ args = parser.parse_args(argv)
23
+
24
+ match args.command:
25
+ case "select":
26
+ ctx = covopt.cli.select.Context(n=max(0, args.n), verbose=args.verbose)
27
+ return covopt.cli.select.main(ctx)
28
+ case _:
29
+ print(f"Invalid args: {argv}", file=sys.stderr)
30
+
31
+ return 1
32
+
33
+
34
+ if __name__ == "__main__": # pragma: no cover
35
+ sys.exit(main(sys.argv[1:]))
@@ -0,0 +1,63 @@
1
+ import sys
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from collections.abc import Collection, Mapping
6
+
7
+ import covopt.models
8
+
9
+
10
+ def select_tests(
11
+ function_edit_strengths: Mapping[covopt.models.FunctionDefinition, float],
12
+ test_dependency_strengths: Mapping[
13
+ covopt.models.FunctionDefinition, Mapping[covopt.models.FunctionDefinition, float]
14
+ ],
15
+ *,
16
+ n: int,
17
+ verbose: bool,
18
+ ) -> Collection[covopt.models.FunctionDefinition]:
19
+ """Select tests to run based on edited functions and test dependencies."""
20
+ # Placeholder implementation: select all tests that depend on any edited function.
21
+ selected_tests: set[covopt.models.FunctionDefinition] = set()
22
+
23
+ # Copy the dependency strengths to avoid mutating the input.
24
+ test_dependency_strengths = {
25
+ test: dict(dependencies) for test, dependencies in test_dependency_strengths.items()
26
+ }
27
+
28
+ def score(test: covopt.models.FunctionDefinition) -> float:
29
+ score = 0
30
+ for func, strength in test_dependency_strengths[test].items():
31
+ score += strength * function_edit_strengths.get(func, 0)
32
+
33
+ return score
34
+
35
+ def decrease_strength(
36
+ test: covopt.models.FunctionDefinition,
37
+ strength_multiplier: float,
38
+ ) -> None:
39
+ for func in test_dependency_strengths[test]:
40
+ test_dependency_strengths[test][func] *= strength_multiplier
41
+
42
+ remaining_tests = set(test_dependency_strengths)
43
+ while remaining_tests and len(selected_tests) < n:
44
+ best_test = max(remaining_tests, key=score)
45
+
46
+ test_score = score(best_test)
47
+ if test_score == 0:
48
+ if verbose:
49
+ print("No more tests with non-zero score, stopping selection.", file=sys.stderr)
50
+ break
51
+
52
+ selected_tests.add(best_test)
53
+ remaining_tests.remove(best_test)
54
+
55
+ if verbose:
56
+ print(
57
+ f"Selected {best_test.name} (score={test_score:.4f})",
58
+ file=sys.stderr,
59
+ )
60
+
61
+ decrease_strength(best_test, 0.5)
62
+
63
+ return frozenset(selected_tests)
File without changes