flakeradar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +309 -0
- package/bin/flakeradar.js +11 -0
- package/dist/analysis/classify.d.ts +10 -0
- package/dist/analysis/classify.d.ts.map +1 -0
- package/dist/analysis/classify.js +163 -0
- package/dist/analysis/classify.js.map +1 -0
- package/dist/analysis/flakiness.d.ts +11 -0
- package/dist/analysis/flakiness.d.ts.map +1 -0
- package/dist/analysis/flakiness.js +177 -0
- package/dist/analysis/flakiness.js.map +1 -0
- package/dist/cli.d.ts +5 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +77 -0
- package/dist/cli.js.map +1 -0
- package/dist/color.d.ts +19 -0
- package/dist/color.d.ts.map +1 -0
- package/dist/color.js +36 -0
- package/dist/color.js.map +1 -0
- package/dist/commands/analyze.d.ts +4 -0
- package/dist/commands/analyze.d.ts.map +1 -0
- package/dist/commands/analyze.js +106 -0
- package/dist/commands/analyze.js.map +1 -0
- package/dist/commands/run.d.ts +4 -0
- package/dist/commands/run.d.ts.map +1 -0
- package/dist/commands/run.js +116 -0
- package/dist/commands/run.js.map +1 -0
- package/dist/commands/shared.d.ts +16 -0
- package/dist/commands/shared.d.ts.map +1 -0
- package/dist/commands/shared.js +44 -0
- package/dist/commands/shared.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/junit/parser.d.ts +11 -0
- package/dist/junit/parser.d.ts.map +1 -0
- package/dist/junit/parser.js +98 -0
- package/dist/junit/parser.js.map +1 -0
- package/dist/junit/xml.d.ts +27 -0
- package/dist/junit/xml.d.ts.map +1 -0
- package/dist/junit/xml.js +238 -0
- package/dist/junit/xml.js.map +1 -0
- package/dist/report/json.d.ts +7 -0
- package/dist/report/json.d.ts.map +1 -0
- package/dist/report/json.js +38 -0
- package/dist/report/json.js.map +1 -0
- package/dist/report/markdown.d.ts +7 -0
- package/dist/report/markdown.d.ts.map +1 -0
- package/dist/report/markdown.js +75 -0
- package/dist/report/markdown.js.map +1 -0
- package/dist/report/terminal.d.ts +4 -0
- package/dist/report/terminal.d.ts.map +1 -0
- package/dist/report/terminal.js +101 -0
- package/dist/report/terminal.js.map +1 -0
- package/dist/runner/runner.d.ts +45 -0
- package/dist/runner/runner.d.ts.map +1 -0
- package/dist/runner/runner.js +125 -0
- package/dist/runner/runner.js.map +1 -0
- package/dist/types.d.ts +87 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/util/args.d.ts +23 -0
- package/dist/util/args.d.ts.map +1 -0
- package/dist/util/args.js +83 -0
- package/dist/util/args.js.map +1 -0
- package/dist/util/glob.d.ts +11 -0
- package/dist/util/glob.d.ts.map +1 -0
- package/dist/util/glob.js +104 -0
- package/dist/util/glob.js.map +1 -0
- package/package.json +62 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 flakeradar contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# 🛰️ flakeradar
|
|
4
|
+
|
|
5
|
+
**Find, rank, and diagnose flaky tests in any language — straight from JUnit XML.**
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/flakeradar)
|
|
8
|
+
[](https://github.com/shravnn/flakeradar/actions/workflows/ci.yml)
|
|
9
|
+
[](./LICENSE)
|
|
10
|
+
[](https://nodejs.org)
|
|
11
|
+
[](./package.json)
|
|
12
|
+
|
|
13
|
+
Run your suite N times (or point it at your CI history) and flakeradar tells you
|
|
14
|
+
**which tests are flaky, how often they fail, and _why_.**
|
|
15
|
+
|
|
16
|
+
<img src="docs/demo.svg" alt="flakeradar ranking flaky tests with likely root causes" width="820">
|
|
17
|
+
|
|
18
|
+
</div>
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Why
|
|
23
|
+
|
|
24
|
+
Flaky tests are the most corrosive thing in a CI pipeline: they erode trust in
|
|
25
|
+
the suite, waste hours on re-runs, and bury real bugs in the noise. Most tools
|
|
26
|
+
just tell you a test *sometimes* fails. flakeradar goes further — it **ranks**
|
|
27
|
+
flakes by how disruptive they are and **guesses the root cause**, turning "ugh,
|
|
28
|
+
it failed again" into "here's the flaky test, here's how often, here's why."
|
|
29
|
+
|
|
30
|
+
- 🌐 **Language-agnostic** — anything that emits JUnit XML: pytest, Jest, Vitest, Go, Gradle/Maven, RSpec, PHPUnit, Mocha…
|
|
31
|
+
- 🧠 **Root-cause hints** — timing/async, resource, order-dependency, concurrency, external-dependency, or randomness
|
|
32
|
+
- 📊 **Ranked, not just listed** — worst offenders first
|
|
33
|
+
- 🐛 **Separates flaky from broken** — always-failing tests are real bugs, reported apart from the noise
|
|
34
|
+
- 📎 **Shareable Markdown report** — paste straight into a GitHub issue or PR
|
|
35
|
+
- 🚦 **CI-ready** — `--fail-on-flaky` gates merges; JSON output for dashboards
|
|
36
|
+
- 📦 **Zero runtime dependencies** — one small package, nothing transitive
|
|
37
|
+
|
|
38
|
+
## Contents
|
|
39
|
+
|
|
40
|
+
- [Quick start](#quick-start)
|
|
41
|
+
- [Two ways to use it](#two-ways-to-use-it)
|
|
42
|
+
- [Framework recipes](#framework-recipes)
|
|
43
|
+
- [Use it in CI](#use-it-in-ci)
|
|
44
|
+
- [How it works](#how-it-works)
|
|
45
|
+
- [Output & exit codes](#output-formats)
|
|
46
|
+
- [Options](#options)
|
|
47
|
+
- [Use as a library](#use-as-a-library)
|
|
48
|
+
- [FAQ](#faq)
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Quick start
|
|
53
|
+
|
|
54
|
+
No install needed:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Analyze existing CI reports (each XML file = one run):
|
|
58
|
+
npx flakeradar analyze "test-reports/**/*.xml"
|
|
59
|
+
|
|
60
|
+
# Or run a suite repeatedly and watch for flakiness:
|
|
61
|
+
npx flakeradar run -n 20 -- pytest --junitxml={out}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Or install it:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
npm install -g flakeradar
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Try it right now on the bundled example reports:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
npx flakeradar analyze "examples/*.xml"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
```text
|
|
77
|
+
flakeradar — flakiness report
|
|
78
|
+
|
|
79
|
+
Runs analyzed: 5 Green runs: 0% Crashed: 0
|
|
80
|
+
Unique tests: 5 Flaky: 3 Always-failing: 1
|
|
81
|
+
|
|
82
|
+
Flaky tests (ranked, worst first):
|
|
83
|
+
# Test Fails Rate Flips Likely cause Conf
|
|
84
|
+
1 checkout.CheckoutTest::test_places_order 2/5 40% 4 timing/async 95%
|
|
85
|
+
2 db.UserRepoTest::test_create_user 2/5 40% 3 order/state leak 95%
|
|
86
|
+
3 api.PaymentTest::test_charge_card 1/5 20% 2 resource/network 95%
|
|
87
|
+
|
|
88
|
+
Top offender: checkout.CheckoutTest::test_places_order
|
|
89
|
+
likely: timing/async (95% confident)
|
|
90
|
+
→ Replace fixed sleeps with polling/awaits and raise timeouts.
|
|
91
|
+
sample failures:
|
|
92
|
+
• TimeoutError: timed out after 5000ms waiting for order confirmation
|
|
93
|
+
|
|
94
|
+
Always-failing (broken every run — likely a real bug, not flakiness):
|
|
95
|
+
✗ math.TaxTest::test_tax_rate (5/5 failed)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Two ways to use it
|
|
101
|
+
|
|
102
|
+
### 1. `run` — reproduce flakiness locally
|
|
103
|
+
|
|
104
|
+
Run a test command many times back-to-back. Put the `{out}` token where your
|
|
105
|
+
runner writes its JUnit XML; flakeradar swaps in a fresh path each run.
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
flakeradar run -n 25 -- pytest --junitxml={out}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
If your runner writes to a fixed path (or many files), use `--report` instead:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
flakeradar run -n 15 -r "build/test-results/test/*.xml" -- ./gradlew test
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Stop as soon as a flake shows up:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
flakeradar run -n 50 --stop-on-first-flaky -- go test ./... 2>&1
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 2. `analyze` — mine your CI history
|
|
124
|
+
|
|
125
|
+
Already have per-run JUnit reports archived from CI? Point flakeradar at them.
|
|
126
|
+
Each file is treated as one run by default.
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
flakeradar analyze "ci-artifacts/**/junit.xml"
|
|
130
|
+
|
|
131
|
+
# One run per directory instead of per file:
|
|
132
|
+
flakeradar analyze --group-by dir "ci-artifacts/*/"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Framework recipes
|
|
138
|
+
|
|
139
|
+
| Framework | Command |
|
|
140
|
+
|-----------|---------|
|
|
141
|
+
| **pytest** | `flakeradar run -n 20 -- pytest --junitxml={out}` |
|
|
142
|
+
| **Jest** | `flakeradar run -n 20 -r junit.xml -- npx jest --reporters=default --reporters=jest-junit` |
|
|
143
|
+
| **Vitest** | `flakeradar run -n 20 -r junit.xml -- npx vitest run --reporter=junit --outputFile=junit.xml` |
|
|
144
|
+
| **Go** | `flakeradar run -n 20 -- sh -c "gotestsum --junitfile={out} ./..."` |
|
|
145
|
+
| **Gradle** | `flakeradar run -n 15 -r "build/test-results/test/*.xml" -- ./gradlew test` |
|
|
146
|
+
| **Maven** | `flakeradar run -n 15 -r "target/surefire-reports/*.xml" -- mvn -q test` |
|
|
147
|
+
| **RSpec** | `flakeradar run -n 20 -- bundle exec rspec --format RspecJunitFormatter --out {out}` |
|
|
148
|
+
| **PHPUnit** | `flakeradar run -n 20 -- phpunit --log-junit {out}` |
|
|
149
|
+
|
|
150
|
+
> Tip: for runners that only write to a fixed file, pass that path to `--report`
|
|
151
|
+
> (`-r`). For runners that accept an output path, use the `{out}` token.
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Use it in CI
|
|
156
|
+
|
|
157
|
+
Detect flakiness that slips past a single run, and comment a report on the PR.
|
|
158
|
+
|
|
159
|
+
```yaml
|
|
160
|
+
# .github/workflows/flaky.yml
|
|
161
|
+
name: flaky-test-check
|
|
162
|
+
on: [pull_request]
|
|
163
|
+
|
|
164
|
+
jobs:
|
|
165
|
+
flaky:
|
|
166
|
+
runs-on: ubuntu-latest
|
|
167
|
+
steps:
|
|
168
|
+
- uses: actions/checkout@v4
|
|
169
|
+
- uses: actions/setup-node@v4
|
|
170
|
+
with: { node-version: 20 }
|
|
171
|
+
|
|
172
|
+
# ... set up your language + install deps ...
|
|
173
|
+
|
|
174
|
+
- name: Hunt for flaky tests
|
|
175
|
+
run: npx flakeradar run -n 15 --markdown -- pytest --junitxml={out} > flaky.md
|
|
176
|
+
|
|
177
|
+
- name: Comment report on the PR
|
|
178
|
+
if: always()
|
|
179
|
+
uses: marocchino/sticky-pull-request-comment@v2
|
|
180
|
+
with:
|
|
181
|
+
path: flaky.md
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Or gate on your existing archived reports:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
flakeradar analyze --fail-on-flaky "artifacts/**/*.xml"
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## How it works
|
|
193
|
+
|
|
194
|
+
**Flakiness detection.** A test is **flaky** when it both passes and fails across
|
|
195
|
+
the runs analyzed. A test that fails *every* run isn't flaky — it's **broken**, and
|
|
196
|
+
flakeradar reports those separately so you don't confuse a real bug with noise.
|
|
197
|
+
|
|
198
|
+
Each flaky test gets:
|
|
199
|
+
|
|
200
|
+
- **Fails / Rate** — how many runs failed, and the failure rate.
|
|
201
|
+
- **Flips** — how many times it switched between pass and fail across the run
|
|
202
|
+
sequence. High flips = highly nondeterministic.
|
|
203
|
+
- **Score** — `failures + flips`, used to rank the worst offenders first.
|
|
204
|
+
|
|
205
|
+
**Root-cause classification.** flakeradar scans each test's failure output and
|
|
206
|
+
matches it against tuned signatures to guess a category:
|
|
207
|
+
|
|
208
|
+
| Category | Typical signals | Suggested fix |
|
|
209
|
+
|----------|-----------------|---------------|
|
|
210
|
+
| `timing/async` | `TimeoutError`, `deadline exceeded`, `waited for` | Poll/await instead of fixed sleeps; raise timeouts |
|
|
211
|
+
| `concurrency/race` | `data race`, `deadlock`, `ConcurrentModification` | Synchronize shared state; remove shared mutation |
|
|
212
|
+
| `resource/network` | `EADDRINUSE`, `ECONNREFUSED`, `too many open files` | Isolate ports/temp files per test |
|
|
213
|
+
| `order/state leak` | `duplicate key`, `already exists`, `unique constraint` | Reset fixtures/DB between tests; randomize order |
|
|
214
|
+
| `external dep` | `503`, `429 rate limit`, `bad gateway` | Mock the dependency or add tolerant retries |
|
|
215
|
+
| `randomness/time` | `random`, `uuid`, `Date.now`, `timezone` | Seed RNG; freeze the clock in tests |
|
|
216
|
+
|
|
217
|
+
The classifier is a heuristic — a strong hint about where to look, not a verdict.
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## Output formats
|
|
222
|
+
|
|
223
|
+
| Flag | Output |
|
|
224
|
+
|------|--------|
|
|
225
|
+
| _(default)_ | Colored terminal report |
|
|
226
|
+
| `--markdown` | GitHub-flavored Markdown (tables + collapsible failure samples) |
|
|
227
|
+
| `--json` | Stable `flakeradar/v1` JSON for dashboards and scripts |
|
|
228
|
+
|
|
229
|
+
### Exit codes
|
|
230
|
+
|
|
231
|
+
| Code | Meaning |
|
|
232
|
+
|------|---------|
|
|
233
|
+
| `0` | Success (no flaky tests, or `--fail-on-flaky` not set) |
|
|
234
|
+
| `1` | Usage error, or no reports/test data found |
|
|
235
|
+
| `2` | Flaky or always-failing tests detected **and** `--fail-on-flaky` was set |
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
## Options
|
|
240
|
+
|
|
241
|
+
### `flakeradar run [options] -- <test command>`
|
|
242
|
+
|
|
243
|
+
| Option | Description |
|
|
244
|
+
|--------|-------------|
|
|
245
|
+
| `-n, --runs <N>` | Number of times to run the suite (default `10`) |
|
|
246
|
+
| `-r, --report <glob>` | Where the command writes its JUnit XML each run |
|
|
247
|
+
| `-o, --output <dir>` | Directory for `{out}` report files (default `.flakeradar/runs`) |
|
|
248
|
+
| `--stop-on-first-flaky` | Stop as soon as any flaky test is detected |
|
|
249
|
+
| `--keep-going` | Keep running even if a run crashes (default: stop) |
|
|
250
|
+
| `--fail-on-flaky` | Exit `2` if any flaky/always-failing test is found |
|
|
251
|
+
| `--json` / `--markdown` | Choose output format |
|
|
252
|
+
| `--no-color` | Disable ANSI colors |
|
|
253
|
+
|
|
254
|
+
### `flakeradar analyze [options] <glob...>`
|
|
255
|
+
|
|
256
|
+
| Option | Description |
|
|
257
|
+
|--------|-------------|
|
|
258
|
+
| `--group-by <file\|dir>` | Treat each file (default) or each directory as one run |
|
|
259
|
+
| `--fail-on-flaky` | Exit `2` if any flaky/always-failing test is found |
|
|
260
|
+
| `--json` / `--markdown` | Choose output format |
|
|
261
|
+
| `--no-color` | Disable ANSI colors |
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
## Use as a library
|
|
266
|
+
|
|
267
|
+
The analysis engine is exported too, for custom integrations:
|
|
268
|
+
|
|
269
|
+
```ts
|
|
270
|
+
import { parseJUnitXml, analyze, runFromResults } from "flakeradar";
|
|
271
|
+
|
|
272
|
+
const runs = reportStrings.map((xml, i) => runFromResults(`run ${i + 1}`, parseJUnitXml(xml)));
|
|
273
|
+
const report = analyze(runs);
|
|
274
|
+
|
|
275
|
+
console.log(report.summary.flakyCount, "flaky tests");
|
|
276
|
+
for (const t of report.flaky) {
|
|
277
|
+
console.log(t.id, t.failureRate, t.cause?.category);
|
|
278
|
+
}
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
---
|
|
282
|
+
|
|
283
|
+
## FAQ
|
|
284
|
+
|
|
285
|
+
**How many runs do I need?** At least 2 to detect any flakiness. For confidence,
|
|
286
|
+
10–30 runs is a good range — the rarer the flake, the more runs you need to catch it.
|
|
287
|
+
|
|
288
|
+
**Does it need my CI or a service?** No. It's a local CLI. Nothing is uploaded.
|
|
289
|
+
|
|
290
|
+
**My runner exits non-zero on failure — is that a problem?** No. flakeradar reads
|
|
291
|
+
the JUnit report regardless of exit code. A run only counts as *crashed* if it
|
|
292
|
+
produces no report at all.
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## Contributing
|
|
297
|
+
|
|
298
|
+
Issues and PRs welcome.
|
|
299
|
+
|
|
300
|
+
```bash
|
|
301
|
+
npm install
|
|
302
|
+
npm test # run the test suite (vitest)
|
|
303
|
+
npm run build # compile TypeScript to dist/
|
|
304
|
+
node bin/flakeradar.js analyze "examples/*.xml"
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## License
|
|
308
|
+
|
|
309
|
+
[MIT](./LICENSE) © [shravnn](https://github.com/shravnn)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { main } from "../dist/cli.js";
|
|
3
|
+
|
|
4
|
+
main(process.argv.slice(2))
|
|
5
|
+
.then((code) => {
|
|
6
|
+
process.exitCode = code;
|
|
7
|
+
})
|
|
8
|
+
.catch((err) => {
|
|
9
|
+
console.error(err instanceof Error ? err.stack ?? err.message : String(err));
|
|
10
|
+
process.exitCode = 1;
|
|
11
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { CauseCategory, CauseGuess, FailureSample } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Classify the likely root cause of a flaky test from its failure samples.
|
|
4
|
+
* Returns the best-scoring category with a rough confidence, or `unknown`
|
|
5
|
+
* when no signal is found.
|
|
6
|
+
*/
|
|
7
|
+
export declare function classifyCause(samples: FailureSample[]): CauseGuess;
|
|
8
|
+
/** Short human label for a cause category (used in tables). */
|
|
9
|
+
export declare function causeLabel(category: CauseCategory): string;
|
|
10
|
+
//# sourceMappingURL=classify.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classify.d.ts","sourceRoot":"","sources":["../../src/analysis/classify.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAsG5E;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,aAAa,EAAE,GAAG,UAAU,CAkDlE;AAED,+DAA+D;AAC/D,wBAAgB,UAAU,CAAC,QAAQ,EAAE,aAAa,GAAG,MAAM,CAiB1D"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
const p = (re, weight = 1) => ({ re, weight });
|
|
2
|
+
/**
|
|
3
|
+
* Ordered strongest-signal-first. Each failure sample votes for categories;
|
|
4
|
+
* the category with the highest total weight wins. These patterns are tuned
|
|
5
|
+
* against real failure output from pytest, Jest, JUnit, RSpec and Go.
|
|
6
|
+
*/
|
|
7
|
+
const CATEGORIES = [
|
|
8
|
+
{
|
|
9
|
+
category: "timing",
|
|
10
|
+
hint: "Timing/async: a wait, sleep, or deadline is racing the code. Replace fixed sleeps with polling/awaits and raise timeouts.",
|
|
11
|
+
patterns: [
|
|
12
|
+
p(/\btimed?\s?out\b/i, 3),
|
|
13
|
+
p(/\btimeout(error|exception)?\b/i, 3),
|
|
14
|
+
p(/context deadline exceeded/i, 3),
|
|
15
|
+
p(/deadline exceeded/i, 2),
|
|
16
|
+
p(/\bwait(ed|ing)? for\b/i, 2),
|
|
17
|
+
p(/element .*not (visible|found|clickable)/i, 2),
|
|
18
|
+
p(/\bflaky\b/i, 1),
|
|
19
|
+
p(/eventually/i, 1),
|
|
20
|
+
p(/retry|retries exhausted/i, 1),
|
|
21
|
+
],
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
category: "concurrency",
|
|
25
|
+
hint: "Concurrency/race: shared mutable state or a race condition. Look for goroutines/threads, locks, and unsynchronised access.",
|
|
26
|
+
patterns: [
|
|
27
|
+
p(/\bdata race\b/i, 3),
|
|
28
|
+
p(/race condition/i, 3),
|
|
29
|
+
p(/\bdeadlock\b/i, 3),
|
|
30
|
+
p(/concurrent (map|modification)/i, 3),
|
|
31
|
+
p(/ConcurrentModification/i, 3),
|
|
32
|
+
p(/\bgoroutine\b/i, 1),
|
|
33
|
+
p(/\bmutex\b|\block (held|contention)\b/i, 2),
|
|
34
|
+
p(/thread|threadpool/i, 1),
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
category: "resource",
|
|
39
|
+
hint: "Resource/network: a port, socket, DB, or file handle is contended or unavailable. Isolate ports/temp files per test.",
|
|
40
|
+
patterns: [
|
|
41
|
+
p(/address already in use/i, 3),
|
|
42
|
+
p(/EADDRINUSE/i, 3),
|
|
43
|
+
p(/ECONNREFUSED|connection refused/i, 3),
|
|
44
|
+
p(/EADDRNOTAVAIL|ENOTFOUND|no such host/i, 2),
|
|
45
|
+
p(/port \d+ (is )?(already )?(in use|unavailable)/i, 3),
|
|
46
|
+
p(/too many open files|EMFILE/i, 3),
|
|
47
|
+
p(/broken pipe|EPIPE|ECONNRESET/i, 2),
|
|
48
|
+
p(/socket|bind|listen/i, 1),
|
|
49
|
+
p(/disk|ENOSPC|no space left/i, 2),
|
|
50
|
+
],
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
category: "external",
|
|
54
|
+
hint: "External dependency: a downstream service, API, or rate limit is flaky. Mock the dependency or add tolerant retries.",
|
|
55
|
+
patterns: [
|
|
56
|
+
p(/\b5\d\d\b.*(error|status|response)/i, 2),
|
|
57
|
+
p(/\b(429|rate.?limit(ed|ing)?)\b/i, 3),
|
|
58
|
+
p(/\b503\b|service unavailable/i, 3),
|
|
59
|
+
p(/\b502\b|bad gateway/i, 2),
|
|
60
|
+
p(/upstream|downstream service/i, 2),
|
|
61
|
+
p(/gateway timeout/i, 2),
|
|
62
|
+
p(/\bDNS\b/i, 1),
|
|
63
|
+
],
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
category: "order-dependency",
|
|
67
|
+
hint: "Order/state leakage: the test depends on state left by another test. Reset fixtures/DB between tests and randomise order.",
|
|
68
|
+
patterns: [
|
|
69
|
+
p(/already exists|duplicate key|unique constraint/i, 3),
|
|
70
|
+
p(/leftover|not cleaned up|dirty state/i, 2),
|
|
71
|
+
p(/expected .* to be empty/i, 2),
|
|
72
|
+
p(/no such (row|record|entity)|not found/i, 1),
|
|
73
|
+
p(/global (state|variable)/i, 2),
|
|
74
|
+
p(/cache (hit|stale|invalidation)/i, 1),
|
|
75
|
+
p(/singleton/i, 1),
|
|
76
|
+
],
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
category: "randomness",
|
|
80
|
+
hint: "Randomness/time: nondeterministic inputs (random, UUIDs, current date/time). Seed RNG and freeze the clock in tests.",
|
|
81
|
+
patterns: [
|
|
82
|
+
p(/\brandom\b|\bseed\b/i, 2),
|
|
83
|
+
p(/\bUUID\b/i, 1),
|
|
84
|
+
p(/current (date|time)|now\(\)|Date\.now|time\.Now/i, 2),
|
|
85
|
+
p(/timezone|utc|locale/i, 1),
|
|
86
|
+
p(/floating point|precision|rounding/i, 1),
|
|
87
|
+
p(/ordering of (keys|map|set)/i, 2),
|
|
88
|
+
p(/non.?deterministic/i, 2),
|
|
89
|
+
],
|
|
90
|
+
},
|
|
91
|
+
];
|
|
92
|
+
/**
|
|
93
|
+
* Classify the likely root cause of a flaky test from its failure samples.
|
|
94
|
+
* Returns the best-scoring category with a rough confidence, or `unknown`
|
|
95
|
+
* when no signal is found.
|
|
96
|
+
*/
|
|
97
|
+
export function classifyCause(samples) {
|
|
98
|
+
const haystacks = samples
|
|
99
|
+
.flatMap((s) => [s.message ?? "", s.details ?? ""])
|
|
100
|
+
.filter((s) => s.length > 0);
|
|
101
|
+
if (haystacks.length === 0) {
|
|
102
|
+
return {
|
|
103
|
+
category: "unknown",
|
|
104
|
+
confidence: 0,
|
|
105
|
+
hint: "No failure output was captured, so the cause can't be inferred. Enable verbose test output to help.",
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
const scores = new Map();
|
|
109
|
+
for (const def of CATEGORIES) {
|
|
110
|
+
let score = 0;
|
|
111
|
+
for (const hay of haystacks) {
|
|
112
|
+
for (const { re, weight } of def.patterns) {
|
|
113
|
+
if (re.test(hay))
|
|
114
|
+
score += weight;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
if (score > 0)
|
|
118
|
+
scores.set(def.category, score);
|
|
119
|
+
}
|
|
120
|
+
if (scores.size === 0) {
|
|
121
|
+
return {
|
|
122
|
+
category: "unknown",
|
|
123
|
+
confidence: 0.1,
|
|
124
|
+
hint: "Failure text didn't match known flakiness signatures. Inspect the assertion and recent changes manually.",
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
let best = "unknown";
|
|
128
|
+
let bestScore = 0;
|
|
129
|
+
let total = 0;
|
|
130
|
+
for (const [cat, score] of scores) {
|
|
131
|
+
total += score;
|
|
132
|
+
if (score > bestScore) {
|
|
133
|
+
best = cat;
|
|
134
|
+
bestScore = score;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
const def = CATEGORIES.find((c) => c.category === best);
|
|
138
|
+
// Confidence blends absolute evidence with dominance over other categories.
|
|
139
|
+
const dominance = bestScore / total;
|
|
140
|
+
const saturation = Math.min(1, bestScore / 4);
|
|
141
|
+
const confidence = Math.round(Math.min(0.95, 0.35 + 0.4 * dominance + 0.25 * saturation) * 100) / 100;
|
|
142
|
+
return { category: best, confidence, hint: def.hint };
|
|
143
|
+
}
|
|
144
|
+
/** Short human label for a cause category (used in tables). */
|
|
145
|
+
export function causeLabel(category) {
|
|
146
|
+
switch (category) {
|
|
147
|
+
case "timing":
|
|
148
|
+
return "timing/async";
|
|
149
|
+
case "concurrency":
|
|
150
|
+
return "concurrency/race";
|
|
151
|
+
case "resource":
|
|
152
|
+
return "resource/network";
|
|
153
|
+
case "external":
|
|
154
|
+
return "external dep";
|
|
155
|
+
case "order-dependency":
|
|
156
|
+
return "order/state leak";
|
|
157
|
+
case "randomness":
|
|
158
|
+
return "randomness/time";
|
|
159
|
+
case "unknown":
|
|
160
|
+
return "unknown";
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
//# sourceMappingURL=classify.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classify.js","sourceRoot":"","sources":["../../src/analysis/classify.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,GAAG,CAAC,EAAU,EAAE,MAAM,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;AAEvD;;;;GAIG;AACH,MAAM,UAAU,GAAkB;IAChC;QACE,QAAQ,EAAE,QAAQ;QAClB,IAAI,EAAE,2HAA2H;QACjI,QAAQ,EAAE;YACR,CAAC,CAAC,mBAAmB,EAAE,CAAC,CAAC;YACzB,CAAC,CAAC,gCAAgC,EAAE,CAAC,CAAC;YACtC,CAAC,CAAC,4BAA4B,EAAE,CAAC,CAAC;YAClC,CAAC,CAAC,oBAAoB,EAAE,CAAC,CAAC;YAC1B,CAAC,CAAC,wBAAwB,EAAE,CAAC,CAAC;YAC9B,CAAC,CAAC,0CAA0C,EAAE,CAAC,CAAC;YAChD,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;YAClB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC;YACnB,CAAC,CAAC,0BAA0B,EAAE,CAAC,CAAC;SACjC;KACF;IACD;QACE,QAAQ,EAAE,aAAa;QACvB,IAAI,EAAE,4HAA4H;QAClI,QAAQ,EAAE;YACR,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC;YACtB,CAAC,CAAC,iBAAiB,EAAE,CAAC,CAAC;YACvB,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;YACrB,CAAC,CAAC,gCAAgC,EAAE,CAAC,CAAC;YACtC,CAAC,CAAC,yBAAyB,EAAE,CAAC,CAAC;YAC/B,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC;YACtB,CAAC,CAAC,uCAAuC,EAAE,CAAC,CAAC;YAC7C,CAAC,CAAC,oBAAoB,EAAE,CAAC,CAAC;SAC3B;KACF;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,IAAI,EAAE,sHAAsH;QAC5H,QAAQ,EAAE;YACR,CAAC,CAAC,yBAAyB,EAAE,CAAC,CAAC;YAC/B,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC;YACnB,CAAC,CAAC,kCAAkC,EAAE,CAAC,CAAC;YACxC,CAAC,CAAC,uCAAuC,EAAE,CAAC,CAAC;YAC7C,CAAC,CAAC,iDAAiD,EAAE,CAAC,CAAC;YACvD,CAAC,CAAC,6BAA6B,EAAE,CAAC,CAAC;YACnC,CAAC,CAAC,+BAA+B,EAAE,CAAC,CAAC;YACrC,CAAC,CAAC,qBAAqB,EAAE,CAAC,CAAC;YAC3B,CAAC,CAAC,4BAA4B,EAAE,CAAC,CAAC;SACnC;KACF;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,IAAI,EAAE,sHAAsH;QAC5H,QAAQ,EAAE;YACR,CAAC,CAAC,qCAAqC,EAAE,CAAC,CAAC;YAC3C,CAAC,CAAC,iCAAiC,EAAE,CAAC,CAAC;YACvC,CAAC,CAAC,8BAA8B,EAAE,CAAC,CAAC;YACpC,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5B,CAAC,CAAC,8BAA8B,EAAE,CAAC,CAAC;YACpC,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC;YACxB,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;SACjB;KACF;IACD;QACE,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,2HAA2H;QACjI,QAAQ,EAAE;YACR,CAAC,CAAC,iDAAiD,EAAE,CAAC,CAAC;YACvD,CAAC,CAAC,sCAAsC,EAAE,CAAC,CAAC;YAC5C,CAAC,CAAC,0BAA0B,EAAE,CAAC,CAAC;YAChC,CAAC,CAAC,wCAAwC,EAAE,CAAC,CAAC;YAC9C,CAAC,CAAC,0BAA0B,EAAE,CAAC,CAAC;YAChC,CAAC,CAAC,iCAAiC,EAAE,CAAC,CAAC;YACvC,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;SACnB;KACF;IACD;QACE,QAAQ,EAAE,YAAY;QACtB,IAAI,EAAE,sHAAsH;QAC5H,QAAQ,EAAE;YACR,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5B,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACjB,CAAC,CAAC,kDAAkD,EAAE,CAAC,CAAC;YACxD,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5B,CAAC,CAAC,oCAAoC,EAAE,CAAC,CAAC;YAC1C,CAAC,CAAC,6BAA6B,EAAE,CAAC,CAAC;YACnC,CAAC,CAAC,qBAAqB,EAAE,CAAC,CAAC;SAC5B;KACF;CACF,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,OAAwB;IACpD,MAAM,SAAS,GAAG,OAAO;SACtB,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;SAClD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO;YACL,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,CAAC;YACb,IAAI,EAAE,qGAAqG;SAC5G,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAyB,CAAC;IAChD,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,KAAK,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;gBAC1C,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC;oBAAE,KAAK,IAAI,MAAM,CAAC;YACpC,CAAC;QACH,CAAC;QACD,IAAI,KAAK,GAAG,CAAC;YAAE,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IACjD,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,GAAG;YACf,IAAI,EAAE,0GAA0G;SACjH,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,GAAkB,SAAS,CAAC;IACpC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QAClC,KAAK,IAAI,KAAK,CAAC;QACf,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;YACtB,IAAI,GAAG,GAAG,CAAC;YACX,SAAS,GAAG,KAAK,CAAC;QACpB,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAE,CAAC;IACzD,4EAA4E;IAC5E,MAAM,SAAS,GAAG,SAAS,GAAG,KAAK,CAAC;IACpC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,GAAG,GAAG,SAAS,GAAG,IAAI,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IAEtG,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;AACxD,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,UAAU,CAAC,QAAuB;IAChD,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ;YACX,OAAO,cAAc,CAAC;QACxB,KAAK,aAAa;YAChB,OAAO,kBAAkB,CAAC;QAC5B,KAAK,UAAU;YACb,OAAO,kBAAkB,CAAC;QAC5B,KAAK,UAAU;YACb,OAAO,cAAc,CAAC;QACxB,KAAK,kBAAkB;YACrB,OAAO,kBAAkB,CAAC;QAC5B,KAAK,YAAY;YACf,OAAO,iBAAiB,CAAC;QAC3B,KAAK,SAAS;YACZ,OAAO,SAAS,CAAC;IACrB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { AnalysisReport, Run, TestResult, TestVerdict } from "../types.js";
|
|
2
|
+
/** Compare flaky verdicts: worst (most worth investigating) first. */
|
|
3
|
+
export declare function compareFlaky(a: TestVerdict, b: TestVerdict): number;
|
|
4
|
+
/**
|
|
5
|
+
* Analyze a sequence of runs and produce a ranked flakiness report.
|
|
6
|
+
* Run order is significant — flips are computed along it.
|
|
7
|
+
*/
|
|
8
|
+
export declare function analyze(runs: Run[]): AnalysisReport;
|
|
9
|
+
/** Convenience: build a Run from already-parsed results. */
|
|
10
|
+
export declare function runFromResults(label: string, results: TestResult[]): Run;
|
|
11
|
+
//# sourceMappingURL=flakiness.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"flakiness.d.ts","sourceRoot":"","sources":["../../src/analysis/flakiness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EAEd,GAAG,EAEH,UAAU,EAEV,WAAW,EACZ,MAAM,aAAa,CAAC;AA4JrB,sEAAsE;AACtE,wBAAgB,YAAY,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,WAAW,GAAG,MAAM,CAKnE;AAED;;;GAGG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,cAAc,CAsBnD;AAED,4DAA4D;AAC5D,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,GAAG,CAExE"}
|