mentistest-coverage 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mentistest_coverage-0.2.2.dist-info/METADATA +310 -0
- mentistest_coverage-0.2.2.dist-info/RECORD +88 -0
- mentistest_coverage-0.2.2.dist-info/WHEEL +5 -0
- mentistest_coverage-0.2.2.dist-info/entry_points.txt +2 -0
- mentistest_coverage-0.2.2.dist-info/licenses/LICENSE +21 -0
- mentistest_coverage-0.2.2.dist-info/top_level.txt +1 -0
- test_coverage_tool/__init__.py +17 -0
- test_coverage_tool/_logging.py +34 -0
- test_coverage_tool/_version.py +4 -0
- test_coverage_tool/ai/__init__.py +8 -0
- test_coverage_tool/ai/_scrub.py +83 -0
- test_coverage_tool/ai/analyzer.py +455 -0
- test_coverage_tool/ai/budget.py +177 -0
- test_coverage_tool/ai/config.py +93 -0
- test_coverage_tool/api/__init__.py +7 -0
- test_coverage_tool/api/_logging_middleware.py +218 -0
- test_coverage_tool/api/_metrics.py +131 -0
- test_coverage_tool/api/_security.py +284 -0
- test_coverage_tool/api/models.py +104 -0
- test_coverage_tool/api/routes.py +661 -0
- test_coverage_tool/benchmarks/__init__.py +7 -0
- test_coverage_tool/benchmarks/_templates/compare.html.jinja +219 -0
- test_coverage_tool/benchmarks/_templates/dashboard.html.jinja +977 -0
- test_coverage_tool/benchmarks/dashboard.py +706 -0
- test_coverage_tool/benchmarks/store.py +520 -0
- test_coverage_tool/cli/__init__.py +7 -0
- test_coverage_tool/cli/main.py +706 -0
- test_coverage_tool/core/__init__.py +14 -0
- test_coverage_tool/core/analyzer.py +1194 -0
- test_coverage_tool/core/call_graph.py +240 -0
- test_coverage_tool/core/classifier.py +982 -0
- test_coverage_tool/core/classifier_facts.py +676 -0
- test_coverage_tool/core/classifier_validation.py +400 -0
- test_coverage_tool/core/contract_coverage.py +101 -0
- test_coverage_tool/core/diff_coverage.py +226 -0
- test_coverage_tool/core/endpoint_attribution.py +303 -0
- test_coverage_tool/core/endpoint_correlation.py +364 -0
- test_coverage_tool/core/endpoint_extractors/__init__.py +56 -0
- test_coverage_tool/core/endpoint_extractors/class_views.py +469 -0
- test_coverage_tool/core/endpoint_extractors/cross_check.py +205 -0
- test_coverage_tool/core/endpoint_extractors/inbound.py +413 -0
- test_coverage_tool/core/endpoint_extractors/javascript.py +335 -0
- test_coverage_tool/core/endpoint_extractors/mock_libs.py +170 -0
- test_coverage_tool/core/endpoint_extractors/outbound.py +440 -0
- test_coverage_tool/core/endpoint_extractors/overrides.py +228 -0
- test_coverage_tool/core/endpoint_extractors/wrappers.py +162 -0
- test_coverage_tool/core/fixture_resolver.py +287 -0
- test_coverage_tool/core/git.py +196 -0
- test_coverage_tool/core/imports.py +101 -0
- test_coverage_tool/core/javascript_classifier.py +273 -0
- test_coverage_tool/core/marker_config.py +251 -0
- test_coverage_tool/core/metrics.py +332 -0
- test_coverage_tool/core/reclassify.py +503 -0
- test_coverage_tool/core/regression.py +184 -0
- test_coverage_tool/core/symbol_table.py +348 -0
- test_coverage_tool/core/test_type.py +52 -0
- test_coverage_tool/core/timing.py +223 -0
- test_coverage_tool/core/types.py +1038 -0
- test_coverage_tool/history/__init__.py +5 -0
- test_coverage_tool/history/backfill.py +369 -0
- test_coverage_tool/history/comparison.py +297 -0
- test_coverage_tool/history/comparison_dashboard.py +353 -0
- test_coverage_tool/history/dashboard.py +794 -0
- test_coverage_tool/history/store.py +588 -0
- test_coverage_tool/parsers/__init__.py +9 -0
- test_coverage_tool/parsers/_constants.py +13 -0
- test_coverage_tool/parsers/code_parser.py +201 -0
- test_coverage_tool/parsers/coverage_contexts.py +172 -0
- test_coverage_tool/parsers/coverage_parser.py +180 -0
- test_coverage_tool/parsers/cypress_parser.py +95 -0
- test_coverage_tool/parsers/http_traffic_parser.py +143 -0
- test_coverage_tool/parsers/istanbul_parser.py +172 -0
- test_coverage_tool/parsers/mutation_parser.py +209 -0
- test_coverage_tool/parsers/pact_parser.py +227 -0
- test_coverage_tool/parsers/spec_parser.py +118 -0
- test_coverage_tool/parsers/test_parser.py +335 -0
- test_coverage_tool/portfolio/__init__.py +18 -0
- test_coverage_tool/portfolio/_templates/portfolio.html.jinja +130 -0
- test_coverage_tool/portfolio/dashboard.py +102 -0
- test_coverage_tool/portfolio/rollup.py +323 -0
- test_coverage_tool/portfolio/tokens.py +130 -0
- test_coverage_tool/py.typed +0 -0
- test_coverage_tool/reports/__init__.py +8 -0
- test_coverage_tool/reports/_format.py +160 -0
- test_coverage_tool/reports/_templates/report.html.jinja +1498 -0
- test_coverage_tool/reports/html_generator.py +653 -0
- test_coverage_tool/reports/json_generator.py +528 -0
- test_coverage_tool/reports/pdf_generator.py +442 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mentistest-coverage
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Analyze test strategy across SaaS products with coverage metrics
|
|
5
|
+
Author-email: Harry Trott <harrydtrott@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Harry Trott
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/HarryDouglas/test_coverage_tool
|
|
29
|
+
Project-URL: Repository, https://github.com/HarryDouglas/test_coverage_tool.git
|
|
30
|
+
Project-URL: Bug Tracker, https://github.com/HarryDouglas/test_coverage_tool/issues
|
|
31
|
+
Keywords: test-coverage,coverage-metrics,saas
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: Natural Language :: English
|
|
35
|
+
Classifier: Operating System :: OS Independent
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Requires-Python: >=3.10
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
License-File: LICENSE
|
|
42
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
43
|
+
Requires-Dist: fastapi<1,>=0.104
|
|
44
|
+
Requires-Dist: uvicorn<1,>=0.24
|
|
45
|
+
Requires-Dist: pyyaml<7,>=6.0
|
|
46
|
+
Requires-Dist: jsonschema<5,>=4.0
|
|
47
|
+
Requires-Dist: jinja2<4,>=3.1
|
|
48
|
+
Requires-Dist: typer<1,>=0.9
|
|
49
|
+
Requires-Dist: anthropic<1,>=0.40
|
|
50
|
+
Requires-Dist: httpx<1,>=0.25
|
|
51
|
+
Requires-Dist: slowapi<1,>=0.1.9
|
|
52
|
+
Requires-Dist: tomli<3,>=2.0; python_version < "3.11"
|
|
53
|
+
Provides-Extra: test
|
|
54
|
+
Requires-Dist: pytest~=9.0; extra == "test"
|
|
55
|
+
Requires-Dist: pytest-cov>=4.1; extra == "test"
|
|
56
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "test"
|
|
57
|
+
Requires-Dist: allure-pytest~=2.15; extra == "test"
|
|
58
|
+
Requires-Dist: packaging>=21.0; extra == "test"
|
|
59
|
+
Requires-Dist: hypothesis<7,>=6.0; extra == "test"
|
|
60
|
+
Provides-Extra: coverage
|
|
61
|
+
Requires-Dist: coverage>=5.0; extra == "coverage"
|
|
62
|
+
Provides-Extra: types
|
|
63
|
+
Requires-Dist: types-PyYAML>=6.0; extra == "types"
|
|
64
|
+
Provides-Extra: code-checks
|
|
65
|
+
Requires-Dist: black==25.1.0; extra == "code-checks"
|
|
66
|
+
Requires-Dist: mypy==1.16.1; extra == "code-checks"
|
|
67
|
+
Requires-Dist: pydocstyle==6.3.0; extra == "code-checks"
|
|
68
|
+
Requires-Dist: pylint==3.3.7; extra == "code-checks"
|
|
69
|
+
Requires-Dist: ruff>=0.1.0; extra == "code-checks"
|
|
70
|
+
Provides-Extra: docs
|
|
71
|
+
Requires-Dist: sphinx~=8.1; extra == "docs"
|
|
72
|
+
Requires-Dist: sphinx-breeze-theme; extra == "docs"
|
|
73
|
+
Requires-Dist: sphinxcontrib-mermaid; extra == "docs"
|
|
74
|
+
Requires-Dist: sphinx-autodoc-typehints; extra == "docs"
|
|
75
|
+
Requires-Dist: sphinx-book-theme; extra == "docs"
|
|
76
|
+
Requires-Dist: myst-parser; extra == "docs"
|
|
77
|
+
Provides-Extra: dev
|
|
78
|
+
Requires-Dist: invoke~=2.2; extra == "dev"
|
|
79
|
+
Requires-Dist: changelog-cli; extra == "dev"
|
|
80
|
+
Requires-Dist: cruft; extra == "dev"
|
|
81
|
+
Requires-Dist: ipython; extra == "dev"
|
|
82
|
+
Requires-Dist: pre-commit~=4.0; extra == "dev"
|
|
83
|
+
Requires-Dist: setuptools; extra == "dev"
|
|
84
|
+
Requires-Dist: build; extra == "dev"
|
|
85
|
+
Provides-Extra: logging-json
|
|
86
|
+
Requires-Dist: python-json-logger<4,>=2.0; extra == "logging-json"
|
|
87
|
+
Provides-Extra: metrics
|
|
88
|
+
Requires-Dist: prometheus-client<1,>=0.20; extra == "metrics"
|
|
89
|
+
Provides-Extra: all
|
|
90
|
+
Requires-Dist: invoke~=2.2; extra == "all"
|
|
91
|
+
Requires-Dist: setuptools; extra == "all"
|
|
92
|
+
Requires-Dist: build; extra == "all"
|
|
93
|
+
Dynamic: license-file
|
|
94
|
+
|
|
95
|
+
# mentistest-coverage
|
|
96
|
+
|
|
97
|
+
Strategic test-suite analyser. Looks at a Python project's source tree, test
|
|
98
|
+
tree, JUnit XML and (optionally) an OpenAPI spec, and produces a *test-strategy*
|
|
99
|
+
report — not just a coverage percentage.
|
|
100
|
+
|
|
101
|
+
What it does that `coverage.py` doesn't:
|
|
102
|
+
|
|
103
|
+
- Classifies every test as **unit / integration / contract / e2e** using markers,
|
|
104
|
+
imports, fixtures and directory layout.
|
|
105
|
+
- Weights coverage gaps by **cyclomatic complexity and call-graph depth** so the
|
|
106
|
+
20 functions that matter rank above the 200 that don't.
|
|
107
|
+
- Correlates tests against the **OpenAPI spec** to flag uncovered endpoints.
|
|
108
|
+
- Tracks results over time in a local SQLite **history store** with full git/CI
|
|
109
|
+
provenance, and renders a **trend dashboard** per project.
|
|
110
|
+
- Detects **regressions** between snapshots and can fail CI when a previously-
|
|
111
|
+
covered symbol loses coverage.
|
|
112
|
+
- Ships an opt-in **AI second opinion** (Claude Haiku → Sonnet escalation, gated
|
|
113
|
+
to skip healthy projects — typical cost <£0.01/run).
|
|
114
|
+
- Exports HTML (interactive), JSON (machine-readable) and PDF (board-ready) reports.
|
|
115
|
+
- Ships a first-class **GitHub Action** that posts a coverage-delta PR comment
|
|
116
|
+
and uploads artifacts.
|
|
117
|
+
|
|
118
|
+
Local-first. The analyser runs against filesystem paths; source code is never
|
|
119
|
+
uploaded anywhere. The optional AI call sends a structured summary (file paths,
|
|
120
|
+
coverage numbers, test names) — never the source itself.
|
|
121
|
+
|
|
122
|
+
## Install
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pip install mentistest-coverage
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
…or from a clone:
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
pip install -e .
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Verify:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
mentistest --help
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Every PyPI release is built via OIDC trusted publishing and signed
|
|
141
|
+
with Sigstore — see [RELEASING.md](RELEASING.md) for the full chain
|
|
142
|
+
and verification recipe.
|
|
143
|
+
|
|
144
|
+
## Quick start
|
|
145
|
+
|
|
146
|
+
A complete analysis with all the features wired in:
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
mentistest analyze my-service \
|
|
150
|
+
--source ./src --tests ./tests \
|
|
151
|
+
--junit ./reports/junit.xml \
|
|
152
|
+
--api-spec ./openapi.yaml \
|
|
153
|
+
--output-json ./report.json \
|
|
154
|
+
--output-html ./report.html \
|
|
155
|
+
--output-pdf ./report.pdf \
|
|
156
|
+
--track --compare-baseline --fail-on-regression \
|
|
157
|
+
--detect-git \
|
|
158
|
+
--ai
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
What that does:
|
|
162
|
+
|
|
163
|
+
| Flag | Effect |
|
|
164
|
+
| --------------------------- | ------ |
|
|
165
|
+
| `--source / -s` | Source directory (repeatable) |
|
|
166
|
+
| `--tests / -t` | Test directory (repeatable) |
|
|
167
|
+
| `--junit` | JUnit XML report path (repeatable) |
|
|
168
|
+
| `--api-spec` | OpenAPI / Swagger spec — drives endpoint-coverage analysis |
|
|
169
|
+
| `--coverage-xml` | Real coverage.xml from coverage.py (preferred over the fallback) |
|
|
170
|
+
| `--output-json / -j` | JSON report path |
|
|
171
|
+
| `--output-html / -H` | Interactive HTML dashboard path |
|
|
172
|
+
| `--output-pdf / -P` | Board-ready PDF report path |
|
|
173
|
+
| `--track / --no-track` | Save the run as a snapshot in the history DB |
|
|
174
|
+
| `--db-path` | Override `~/.test-coverage/history.db` |
|
|
175
|
+
| `--compare-baseline` | Diff this run against the most recent snapshot |
|
|
176
|
+
| `--regression-threshold N` | Allowable coverage drop in pp (default 0) |
|
|
177
|
+
| `--fail-on-regression` | `exit 1` if a regression is detected |
|
|
178
|
+
| `--detect-git / --no-detect-git` | Capture commit, branch, CI run ID, provider |
|
|
179
|
+
| `--ai` | Run the optional AI commentary (requires `ANTHROPIC_API_KEY`) |
|
|
180
|
+
| `--test-type` | Default type label when no signal fires (`unit`/`integration`/`contract`/`e2e`) |
|
|
181
|
+
| `--verbose / -v` | Debug logging |
|
|
182
|
+
|
|
183
|
+
## Server mode
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
mentistest serve --host 0.0.0.0 --port 8000
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Because the `/analyze` endpoint reads arbitrary filesystem paths, `serve`
|
|
190
|
+
**refuses to bind a non-loopback host** (anything other than `127.0.0.1`/
|
|
191
|
+
`::1`/`localhost`) unless `TEST_COVERAGE_API_KEY` or
|
|
192
|
+
`TEST_COVERAGE_ALLOWED_PATHS` is configured. Pass `--insecure` to override
|
|
193
|
+
on a trusted private network. The published Docker image binds `0.0.0.0` and
|
|
194
|
+
sets `TEST_COVERAGE_REQUIRE_SECURITY=1`, so the container likewise won't
|
|
195
|
+
start unprotected.
|
|
196
|
+
|
|
197
|
+
Then:
|
|
198
|
+
|
|
199
|
+
- `http://localhost:8000/docs` — interactive OpenAPI docs for the REST API
|
|
200
|
+
- `http://localhost:8000/benchmarks` — multi-project benchmark dashboard
|
|
201
|
+
- `http://localhost:8000/trends/<project>` — single-project trend dashboard
|
|
202
|
+
- `http://localhost:8000/report/html?project_name=<name>` — last-rendered report
|
|
203
|
+
- `http://localhost:8000/report/pdf?project_name=<name>` — last-rendered PDF
|
|
204
|
+
- `http://localhost:8000/history/<project>` — JSON list of snapshots
|
|
205
|
+
- `http://localhost:8000/regression/<project>` — JSON regression diff
|
|
206
|
+
|
|
207
|
+
## GitHub Action
|
|
208
|
+
|
|
209
|
+
```yaml
|
|
210
|
+
- uses: HarryDouglas/test_coverage_tool@v1
|
|
211
|
+
with:
|
|
212
|
+
source_dirs: src
|
|
213
|
+
test_dirs: tests
|
|
214
|
+
junit_xml_paths: reports/junit.xml
|
|
215
|
+
track_history: 'true'
|
|
216
|
+
fail_on_regression: 'true'
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Posts a PR comment with the coverage delta, uploads JSON + HTML reports as
|
|
220
|
+
workflow artifacts, and fails the check if a symbol loses coverage. See
|
|
221
|
+
`action.yml` for all inputs.
|
|
222
|
+
|
|
223
|
+
**Action vs PyPI wheel:** the action is its own distribution channel —
|
|
224
|
+
referenced by git tag, not installed from PyPI. Under the hood it
|
|
225
|
+
installs the wheel from PyPI and runs the CLI, so the two move
|
|
226
|
+
together but version independently. The action's `@v1` tag pins a
|
|
227
|
+
major; minor / patch wheel updates roll through without a workflow
|
|
228
|
+
change.
|
|
229
|
+
|
|
230
|
+
## Security
|
|
231
|
+
|
|
232
|
+
If you're deploying the HTTP API beyond `localhost`, read
|
|
233
|
+
[SECURITY.md](SECURITY.md). Two env vars (`TEST_COVERAGE_API_KEY` and
|
|
234
|
+
`TEST_COVERAGE_ALLOWED_PATHS`) enable bearer-token auth and
|
|
235
|
+
filesystem path-traversal protection. Both are off by default so local,
|
|
236
|
+
loopback-only use is unaffected — but the tool **fails closed** the moment
|
|
237
|
+
it would be network-reachable: `serve` rejects a non-loopback `--host`
|
|
238
|
+
(unless `--insecure`), and `create_app` refuses to start when
|
|
239
|
+
`TEST_COVERAGE_REQUIRE_SECURITY=1` (set in the Docker image) is configured
|
|
240
|
+
without a protection layer.
|
|
241
|
+
|
|
242
|
+
To report a vulnerability, email **harrydtrott@gmail.com** — don't
|
|
243
|
+
open a public issue.
|
|
244
|
+
|
|
245
|
+
## Public benchmark suite
|
|
246
|
+
|
|
247
|
+
The tool ships with a 16+ repo OSS benchmark suite (flask, fastapi, pydantic,
|
|
248
|
+
httpx, celery, mlflow, attrs, rich, schemathesis, dbt-core, alembic, …) used
|
|
249
|
+
both as a smoke test and as a demo gallery for the consulting site.
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
make benchmark # clone, test, analyse, render all projects
|
|
253
|
+
make benchmark-serve # serve the rendered dashboard at localhost:8003/benchmarks
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
Outputs land in `reports/benchmarks/<project>/` (HTML + JSON) and are
|
|
257
|
+
git-ignored. Re-run after any classifier change to verify the public pyramid
|
|
258
|
+
still looks sensible.
|
|
259
|
+
|
|
260
|
+
## Library use
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
from pathlib import Path
|
|
264
|
+
from test_coverage_tool.core.analyzer import AnalysisEngine
|
|
265
|
+
from test_coverage_tool.core.git import detect_git_metadata
|
|
266
|
+
from test_coverage_tool.reports.html_generator import HTMLReportGenerator
|
|
267
|
+
from test_coverage_tool.reports.json_generator import JSONReportGenerator
|
|
268
|
+
from test_coverage_tool.history.store import HistoryStore
|
|
269
|
+
|
|
270
|
+
engine = AnalysisEngine()
|
|
271
|
+
analysis = engine.analyze(
|
|
272
|
+
project_name = "my-service",
|
|
273
|
+
source_dirs = [Path("src")],
|
|
274
|
+
test_dirs = [Path("tests")],
|
|
275
|
+
junit_xml_paths = [Path("reports/junit.xml")],
|
|
276
|
+
api_spec_path = Path("openapi.yaml"),
|
|
277
|
+
git_metadata = detect_git_metadata(),
|
|
278
|
+
)
|
|
279
|
+
JSONReportGenerator().write(analysis, Path("report.json"))
|
|
280
|
+
HTMLReportGenerator().write(analysis, Path("report.html"))
|
|
281
|
+
|
|
282
|
+
# Persist a snapshot
|
|
283
|
+
HistoryStore(Path("history.db")).save("my-service", analysis_to_dict(analysis))
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
## Configuration
|
|
287
|
+
|
|
288
|
+
Environment variables:
|
|
289
|
+
|
|
290
|
+
| Variable | Purpose |
|
|
291
|
+
| -------------------------- | ------- |
|
|
292
|
+
| `ANTHROPIC_API_KEY` | Enables `--ai` (otherwise the flag is a no-op) |
|
|
293
|
+
| `TEST_COVERAGE_DB_PATH` | Override the default `~/.test-coverage/history.db` location |
|
|
294
|
+
| `MENTIS_BENCH_AI` | Set to `1` to opt-in to AI commentary on `make benchmark` |
|
|
295
|
+
|
|
296
|
+
## Development
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
make install # install dev deps via uv
|
|
300
|
+
make test # pytest --cov on unittests/
|
|
301
|
+
make lint # ruff + mypy --strict + pylint
|
|
302
|
+
make benchmark # rebuild the public benchmark dashboard
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
The repo uses ruff, mypy (`--strict`), pylint and pytest. The quality bar:
|
|
306
|
+
zero ruff/mypy/pylint issues, pytest -x -q must pass before merge.
|
|
307
|
+
|
|
308
|
+
## License
|
|
309
|
+
|
|
310
|
+
MIT — see `LICENSE`.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
mentistest_coverage-0.2.2.dist-info/licenses/LICENSE,sha256=ue2igMlCJVgRuvDVcCeAeFtyfFN6FH8u8qFRxPAyP_k,1068
|
|
2
|
+
test_coverage_tool/__init__.py,sha256=-m4RMW_s4COWKRM7VkdGITkyPUnh7QEJfcF1Bicq32I,404
|
|
3
|
+
test_coverage_tool/_logging.py,sha256=zuhZk2j41IJxme7x40IklKojvIa8qL-ld_FIFHDWQLQ,960
|
|
4
|
+
test_coverage_tool/_version.py,sha256=ywc2091SFrYn1uCe8vq8F2ODfUKv17MyplDPnGr97SI,65
|
|
5
|
+
test_coverage_tool/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
test_coverage_tool/ai/__init__.py,sha256=dFKFg3AewYtbW49shxSrKAhTAzYtMgV1YJuhWyz0zqg,227
|
|
7
|
+
test_coverage_tool/ai/_scrub.py,sha256=fHFOAxMt5RYXGcEd3UVnbyUfYaXeiQH77DMXOEQQhjk,3119
|
|
8
|
+
test_coverage_tool/ai/analyzer.py,sha256=sNzkxJlJglGxec1X4mwprNmJUA1Hx2NRyf1rVreMFY0,16916
|
|
9
|
+
test_coverage_tool/ai/budget.py,sha256=ecUNL93LB0rg5nF_4mrw6Un5e_47BClwKqOzel7TzIs,6035
|
|
10
|
+
test_coverage_tool/ai/config.py,sha256=wP1ZNk-6krGZni-2thg-f1_HCq8zjZllgkYkpXfTRkM,3651
|
|
11
|
+
test_coverage_tool/api/__init__.py,sha256=Eu4mDp_Z1rLmCEfuoNFXXqDT0oCDFq_cZNH34A30GbQ,171
|
|
12
|
+
test_coverage_tool/api/_logging_middleware.py,sha256=Hlgs1lGV5uQrmL18Pf13MiB2OThSX1Xdsfj-BDFULqM,8102
|
|
13
|
+
test_coverage_tool/api/_metrics.py,sha256=I26ZyIeghzyqi6OilHZTFJoRFI-GrGhF8DecbgMhGPE,4268
|
|
14
|
+
test_coverage_tool/api/_security.py,sha256=E8ILaZ22Fr3Z13cU5-m7UJLLYSlkaiMj9TtW5b17cOM,10704
|
|
15
|
+
test_coverage_tool/api/models.py,sha256=4oY0z5fUqFHVgfwDMnnyfCF856mtOXvPKf5N4eElfHw,4085
|
|
16
|
+
test_coverage_tool/api/routes.py,sha256=5Vg5pYakkGRTvq2nn_6NbiivCIAEMB2_FKYaFthaZEs,25438
|
|
17
|
+
test_coverage_tool/benchmarks/__init__.py,sha256=yWL1hQpDSK-b3o6CLW61Tb34npMuB2k7Yqzg1wpZpOY,214
|
|
18
|
+
test_coverage_tool/benchmarks/dashboard.py,sha256=wakP7fdzmWi1Aoh4uimzY4gE_XoGcgvbrK8p30XIQfA,25113
|
|
19
|
+
test_coverage_tool/benchmarks/store.py,sha256=a8wp_Wv6mo44BEgFw9Lzq-NmoYO7jLDMkWWxvI6kL14,22992
|
|
20
|
+
test_coverage_tool/benchmarks/_templates/compare.html.jinja,sha256=MW_kiKczVdO7D-kan1qwQtHW9tYbZQBz9Ir6ZzKo6HA,8689
|
|
21
|
+
test_coverage_tool/benchmarks/_templates/dashboard.html.jinja,sha256=ybZ7Mm-HRBtS67ahiPbVu-AsDvFkPFHXcYsKcStMGNU,43424
|
|
22
|
+
test_coverage_tool/cli/__init__.py,sha256=trSKqZZ_WggkWo0g60hP5PGd1W5e1DYp4Ivy_jeaAtk,157
|
|
23
|
+
test_coverage_tool/cli/main.py,sha256=yH2NjI8N2hy9GOeFJCiEDxRl9-ReHr4KXDD_8Kl4wgE,25250
|
|
24
|
+
test_coverage_tool/core/__init__.py,sha256=rC0xs9hmW-s5bFFFrsUUgCuvsl6vB1WP2_xUOIcWUSM,460
|
|
25
|
+
test_coverage_tool/core/analyzer.py,sha256=p7uVXyMTyTLiOh5nTM6_53IQuo8x6zXDh2qmPdjAGLg,47053
|
|
26
|
+
test_coverage_tool/core/call_graph.py,sha256=6XHqvaVCehqXDQnYmdhZyX3TyHlF7v2IzcEAc28brF0,7641
|
|
27
|
+
test_coverage_tool/core/classifier.py,sha256=13rrpHQjeodkH83CIFLvYgVuJyTuKK_6FU_qt555aPA,39267
|
|
28
|
+
test_coverage_tool/core/classifier_facts.py,sha256=ZRE7MuOHjehHeCgZ0ZP6epKMF9EV6CO03HXy8__97P4,25572
|
|
29
|
+
test_coverage_tool/core/classifier_validation.py,sha256=q1MNGp7JYyXGCFc8rJFh4r9T8EydDJ2KiuCd62IXmgw,13661
|
|
30
|
+
test_coverage_tool/core/contract_coverage.py,sha256=nc2evASI_TpAsqp6GiBKZbnF042cXiEFiBsXf5zaPyI,3473
|
|
31
|
+
test_coverage_tool/core/diff_coverage.py,sha256=JuVNG6hqvAUMg9TQadjQ7FznXNEos-g6re9Q47c8FkA,8032
|
|
32
|
+
test_coverage_tool/core/endpoint_attribution.py,sha256=n_nxHpC-7nXib5x_qco_ykzGVgVfez0xsTPzb1Gqv20,10516
|
|
33
|
+
test_coverage_tool/core/endpoint_correlation.py,sha256=XBtAt60ltzoxryP1b2CWHCwaZgtCce_x6vJx3eqTWJM,12530
|
|
34
|
+
test_coverage_tool/core/fixture_resolver.py,sha256=XE6wfSJTbBrAB-bOQiehmJe6EEUS57qzgnUJo-dvbdQ,10185
|
|
35
|
+
test_coverage_tool/core/git.py,sha256=vFi2P889Cy90Q25KLzjDHvLYunE2G-uhIT92ta_kjj8,6314
|
|
36
|
+
test_coverage_tool/core/imports.py,sha256=8y0gvxGCdy7wwO02VfbxxLj-mHgKLXa5nCLe5BEkaes,3144
|
|
37
|
+
test_coverage_tool/core/javascript_classifier.py,sha256=ShAlAFKvzcaHs9qmMDp9iMI3ngtvpVgAU6U9NvN1mpo,9236
|
|
38
|
+
test_coverage_tool/core/marker_config.py,sha256=pBxvsIPqbFS2Fs3xR1ShHXmcc84GIA14gQZxIy9J3ck,8349
|
|
39
|
+
test_coverage_tool/core/metrics.py,sha256=3j7Od-PGXQD_2MWc63I36Phld6gMP-mV2HjmbvDSDQo,12587
|
|
40
|
+
test_coverage_tool/core/reclassify.py,sha256=2iYPusMYQNLOMApI5K5qQtjD9cmsqfHBewp8_sqmRnU,19703
|
|
41
|
+
test_coverage_tool/core/regression.py,sha256=bZ6wrUJvwEOHMEnurOVNe31tu9nXUg8dmk6EkHNSawg,6680
|
|
42
|
+
test_coverage_tool/core/symbol_table.py,sha256=jOBguPK_2G_IGN2TZJj0rSuhABlPYzZYK5cmM8ixWtc,12645
|
|
43
|
+
test_coverage_tool/core/test_type.py,sha256=3mgyUiTy3MpDO5X6M5khf_AGLpXMQBOdDuZw4gHGOzc,1769
|
|
44
|
+
test_coverage_tool/core/timing.py,sha256=SylM6fEALJRJgZ5B0AlXMXuZZgsfcBxJeLbGynLQ7vY,7761
|
|
45
|
+
test_coverage_tool/core/types.py,sha256=zKhdFDRPPWDOk_yWjjs_4Dwdo-Abfrdgdv_gGTS7HGg,43185
|
|
46
|
+
test_coverage_tool/core/endpoint_extractors/__init__.py,sha256=DzMFWEF3r5egLNvkjqXyqkHFl670OLtn83kilMA74Vw,1698
|
|
47
|
+
test_coverage_tool/core/endpoint_extractors/class_views.py,sha256=0DLltKePljdu4VpXwgEf8m2dfMXrveXtF7Sv6gkdZOg,16329
|
|
48
|
+
test_coverage_tool/core/endpoint_extractors/cross_check.py,sha256=YaQboLENT0vjtehbDXePg_NZbpcYPSg0ZLVv5qGrbFE,7431
|
|
49
|
+
test_coverage_tool/core/endpoint_extractors/inbound.py,sha256=WLtd7e4K_aMwEO3PdJFg0zPldLK27BZuGrv_wt1VytE,13932
|
|
50
|
+
test_coverage_tool/core/endpoint_extractors/javascript.py,sha256=rbXbQgPzddQq6M7ZhfvNengQ4f9Ay0SF1Jbj36I50Yk,12603
|
|
51
|
+
test_coverage_tool/core/endpoint_extractors/mock_libs.py,sha256=-AttDobCk3MXqHfSwWpF-quuPK0oOGWB3cUIuWfTK94,6018
|
|
52
|
+
test_coverage_tool/core/endpoint_extractors/outbound.py,sha256=XHoEUVQqSNzUwrfb8Nm7dQ2EdaeDdtQDvJ1Uobve4_o,15600
|
|
53
|
+
test_coverage_tool/core/endpoint_extractors/overrides.py,sha256=8fGy6qb3UmveeNe0kc_oQrEnVHPHBRR9t0I6N1vxtII,7520
|
|
54
|
+
test_coverage_tool/core/endpoint_extractors/wrappers.py,sha256=cTk0RtaFKSveYRRjggRblG29_jQEG24HA3uYAFNc-HE,5854
|
|
55
|
+
test_coverage_tool/history/__init__.py,sha256=57IVjW0AD4KGSYg-SQf6PXEh9_UkMTcdgHXMFBsXfA0,145
|
|
56
|
+
test_coverage_tool/history/backfill.py,sha256=-QjCgnk5vfHn6P8CKcvro-BZmiZue7XeXhiFvKTLjAI,12256
|
|
57
|
+
test_coverage_tool/history/comparison.py,sha256=N7iaoN3eDTkihBPfmTdstvim5ABEHmkXbzLCLEqMnOo,10301
|
|
58
|
+
test_coverage_tool/history/comparison_dashboard.py,sha256=WOchsyj6Fvl-U0965Jjj0FuDqPY2oIkoKkx9q-Ourg4,11656
|
|
59
|
+
test_coverage_tool/history/dashboard.py,sha256=cUHbRUUznD77Ykg0TElfJHG5IY6wn7rw4U2S9y4PgWY,28775
|
|
60
|
+
test_coverage_tool/history/store.py,sha256=XrWNU8leGuLj4jcU8yLz10j-3VNhJ9uB39qIThzbGRU,21791
|
|
61
|
+
test_coverage_tool/parsers/__init__.py,sha256=u1vbWuvsax5bMYtkb9MZm892ESO7oAgmSAyByNok7gs,377
|
|
62
|
+
test_coverage_tool/parsers/_constants.py,sha256=v7dYrJdbS3izEc4-Wu4fxzagBlsrA6lk5nJQ2WskPxQ,281
|
|
63
|
+
test_coverage_tool/parsers/code_parser.py,sha256=w-kpUZfG1XxxKHsTyNJLC3ngYYwFwAaWdyciyI-JgOY,6844
|
|
64
|
+
test_coverage_tool/parsers/coverage_contexts.py,sha256=hY3fPIy0DCAsZcWARRxQZXBYtJS24ccVnv7jzlNfaIo,5981
|
|
65
|
+
test_coverage_tool/parsers/coverage_parser.py,sha256=FRvmIHpO9wE4It0pVA6QYdYYBrYn-BCthaxUixQxBiE,6086
|
|
66
|
+
test_coverage_tool/parsers/cypress_parser.py,sha256=RCwI_9nIVRKD-Fzq-VLrUb6VjuKq-9AhvTbG47XkRZ0,3640
|
|
67
|
+
test_coverage_tool/parsers/http_traffic_parser.py,sha256=m2hVMBaDprIXTUEEYksDOddq9mkPdo8YTWjfeAsPnUI,4481
|
|
68
|
+
test_coverage_tool/parsers/istanbul_parser.py,sha256=1qRikgW9tziLWWj_unusQd3_r9Qe74prBkuN5nWIi0U,5663
|
|
69
|
+
test_coverage_tool/parsers/mutation_parser.py,sha256=FUmzjowqNDUemhSjVL0oSIhwCz48TDRYnyapi3jqYJ0,7677
|
|
70
|
+
test_coverage_tool/parsers/pact_parser.py,sha256=kB9XJHzZTyK-bOVvQxZsoFe54-3cSIeTl8I919giAMQ,7760
|
|
71
|
+
test_coverage_tool/parsers/spec_parser.py,sha256=QY3QEhRko-vo7-JIUYQp-83JzwyRXG3Uz97LI0HVL8Y,3656
|
|
72
|
+
test_coverage_tool/parsers/test_parser.py,sha256=1JqlzS2z1PbKQRHd0fhIdSXVPhfICWGXnsS7wGST9gA,12136
|
|
73
|
+
test_coverage_tool/portfolio/__init__.py,sha256=eUOGFqYIXYCgyBD9rS7tNdy0aQs8zui7tUy41yILwG0,541
|
|
74
|
+
test_coverage_tool/portfolio/dashboard.py,sha256=gBqPLOSsfeSLTKgDyXOtfQlUjM5ka9fbkOlmeo6iwCM,3251
|
|
75
|
+
test_coverage_tool/portfolio/rollup.py,sha256=MtaxHkgxBqDIxi9u15HbzVwrBRUHRzyM--fVTKpg6Cg,11329
|
|
76
|
+
test_coverage_tool/portfolio/tokens.py,sha256=-4H8inW-qtfHdtwjYeWwKSLDK3lzEU_2wEjZOSEx-lE,4234
|
|
77
|
+
test_coverage_tool/portfolio/_templates/portfolio.html.jinja,sha256=95TFeI4RmStVCMrzYU4n8qIM2w0hF6QqCQsmZT72A7E,4672
|
|
78
|
+
test_coverage_tool/reports/__init__.py,sha256=4ou_gwaDnqraG_b8-dG3hd5K-Qhb1dJQeteDwNszeR4,283
|
|
79
|
+
test_coverage_tool/reports/_format.py,sha256=Ap3FVSh2FaHfNTXXYuJYJtTHrcTSIopqvxKLBty8XFs,5183
|
|
80
|
+
test_coverage_tool/reports/html_generator.py,sha256=sCJm-QKLtHNexmUr721Stup-zHcZh30wTQsWtPb96Eo,25371
|
|
81
|
+
test_coverage_tool/reports/json_generator.py,sha256=rS0yTa6ZEcSzzQSB0fZzh6XpyomMRs-kjNR9dTCVgVc,20601
|
|
82
|
+
test_coverage_tool/reports/pdf_generator.py,sha256=mowbnD6GkxO26hGlVxJhI8pv73fUT3OFSo4sjqaYTa4,12422
|
|
83
|
+
test_coverage_tool/reports/_templates/report.html.jinja,sha256=E7c1g16cwwlGsVShSZKuij0T61DSaj0BvI4Q9-4xYxg,65976
|
|
84
|
+
mentistest_coverage-0.2.2.dist-info/METADATA,sha256=h__Wvxd9GUL78DKGkLR6I_U-j4n9E89t17LcT5FjNLw,12523
|
|
85
|
+
mentistest_coverage-0.2.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
86
|
+
mentistest_coverage-0.2.2.dist-info/entry_points.txt,sha256=009ZtUtesWqVrgcaNbv44YFDvkbqvd_x59puJLdMcmI,64
|
|
87
|
+
mentistest_coverage-0.2.2.dist-info/top_level.txt,sha256=q5Bmd1bSzavT5kyJ8VG7k8IvagaQKecL7U0LQaX4EdU,19
|
|
88
|
+
mentistest_coverage-0.2.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Harry Trott
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
test_coverage_tool
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""test_coverage_tool.
|
|
2
|
+
|
|
3
|
+
A short description.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging as _std_lib_logging # To avoid confusion with the package's logging.
|
|
9
|
+
|
|
10
|
+
from . import _logging as logging
|
|
11
|
+
from ._version import __version__
|
|
12
|
+
|
|
13
|
+
logger = _std_lib_logging.getLogger(__name__)
|
|
14
|
+
logger.setLevel(_std_lib_logging.WARNING)
|
|
15
|
+
logger.addHandler(_std_lib_logging.NullHandler())
|
|
16
|
+
|
|
17
|
+
__all__: list[str] = []
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Console logger helper function."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
logger: logging.Logger = logging.getLogger("test_coverage_tool")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def add_stream_handler(
|
|
12
|
+
level: int = logging.INFO,
|
|
13
|
+
) -> logging.StreamHandler[Any]:
|
|
14
|
+
"""Macro to add a StreamHandler to the package logger.
|
|
15
|
+
|
|
16
|
+
Useful for debugging or "semi-automatic" console scripts.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
level: Logging level (use the logging module constants).
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
the handler added to the package logger.
|
|
23
|
+
"""
|
|
24
|
+
handler: logging.StreamHandler[Any] = logging.StreamHandler()
|
|
25
|
+
formatter = logging.Formatter(
|
|
26
|
+
"%(asctime)s: %(levelname)8s - %(name)s - %(message)s"
|
|
27
|
+
)
|
|
28
|
+
handler.setFormatter(formatter)
|
|
29
|
+
logger.addHandler(handler)
|
|
30
|
+
if level < logger.level:
|
|
31
|
+
logger.setLevel(level)
|
|
32
|
+
handler.setLevel(level)
|
|
33
|
+
logger.debug("Added a stderr logging handler to logger: %s", logger.name)
|
|
34
|
+
return handler
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Identifier scrubbing for AI prompts (PRODUCTION_REVIEW.md S4).
|
|
2
|
+
|
|
3
|
+
The default prompt avoids source code but still carries test names,
|
|
4
|
+
file paths, and project names — all of which can leak business
|
|
5
|
+
context (``test_payment_card_validation_for_org_42_user_alice``,
|
|
6
|
+
``customers/acme-corp/...``). When ``AIConfig.scrub_identifiers`` is
|
|
7
|
+
True we run every variable string through :func:`scrub` before it
|
|
8
|
+
hits the model.
|
|
9
|
+
|
|
10
|
+
The scrub is intentionally conservative: it replaces well-shaped PII
|
|
11
|
+
patterns (email-like, UUID-like, multi-word proper nouns inside path
|
|
12
|
+
segments) with stable placeholders. Test-type words and version
|
|
13
|
+
strings pass through untouched because the model needs them to make
|
|
14
|
+
sensible recommendations.
|
|
15
|
+
|
|
16
|
+
Stability matters: ``acme-corp`` always becomes ``<PII1>`` within a
|
|
17
|
+
single :class:`Scrubber` instance so the model can still spot
|
|
18
|
+
"the same customer appears in three tests" patterns without seeing
|
|
19
|
+
the real name.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import re
|
|
25
|
+
|
|
26
|
+
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
|
27
|
+
_UUID_RE = re.compile(
|
|
28
|
+
r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-"
|
|
29
|
+
r"[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b"
|
|
30
|
+
)
|
|
31
|
+
# Proper-noun segments: a path / underscore token that's at least two
|
|
32
|
+
# alphabetic chunks joined by an underscore or hyphen, starting with
|
|
33
|
+
# an upper-case letter or an all-lower-case word longer than 5 chars.
|
|
34
|
+
# Heuristic, intentionally narrow.
|
|
35
|
+
_PROPER_NOUN_RE = re.compile(
|
|
36
|
+
r"\b(?:[A-Z][a-z]{2,}(?:[_-][A-Za-z]{2,})+|"
|
|
37
|
+
r"[a-z]{6,}(?:[_-][a-z]{3,}){1,})\b"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class Scrubber:
|
|
42
|
+
"""Deterministic identifier scrubber with stable placeholders.
|
|
43
|
+
|
|
44
|
+
A single instance carries a mapping from original token to
|
|
45
|
+
placeholder so repeated tokens scrub to the same placeholder
|
|
46
|
+
within one prompt — preserving the "same customer appears
|
|
47
|
+
several times" signal without exposing the real value.
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
prefix: Prefix for placeholders. Defaults to ``"PII"``.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, prefix: str = "PII") -> None:
|
|
54
|
+
"""Initialise the scrubber with an empty token map.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
prefix: Placeholder prefix; produced tokens look like
|
|
58
|
+
``<PII1>``.
|
|
59
|
+
"""
|
|
60
|
+
self.prefix = prefix
|
|
61
|
+
self._tokens: dict[str, str] = {}
|
|
62
|
+
|
|
63
|
+
def scrub(self, text: str) -> str:
|
|
64
|
+
"""Return *text* with detected PII replaced by stable placeholders.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
text: Raw user-prompt-bound string.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
The same text with email / UUID / proper-noun-shaped
|
|
71
|
+
tokens replaced by ``<PII1>``, ``<PII2>``, ... in the
|
|
72
|
+
order they were first seen.
|
|
73
|
+
"""
|
|
74
|
+
out = _EMAIL_RE.sub(self._replace, text)
|
|
75
|
+
out = _UUID_RE.sub(self._replace, out)
|
|
76
|
+
return _PROPER_NOUN_RE.sub(self._replace, out)
|
|
77
|
+
|
|
78
|
+
def _replace(self, match: re.Match[str]) -> str:
|
|
79
|
+
"""Return the placeholder for *match*, registering it on first sight."""
|
|
80
|
+
token = match.group(0)
|
|
81
|
+
if token not in self._tokens:
|
|
82
|
+
self._tokens[token] = f"<{self.prefix}{len(self._tokens) + 1}>"
|
|
83
|
+
return self._tokens[token]
|