flakeradar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +309 -0
  3. package/bin/flakeradar.js +11 -0
  4. package/dist/analysis/classify.d.ts +10 -0
  5. package/dist/analysis/classify.d.ts.map +1 -0
  6. package/dist/analysis/classify.js +163 -0
  7. package/dist/analysis/classify.js.map +1 -0
  8. package/dist/analysis/flakiness.d.ts +11 -0
  9. package/dist/analysis/flakiness.d.ts.map +1 -0
  10. package/dist/analysis/flakiness.js +177 -0
  11. package/dist/analysis/flakiness.js.map +1 -0
  12. package/dist/cli.d.ts +5 -0
  13. package/dist/cli.d.ts.map +1 -0
  14. package/dist/cli.js +77 -0
  15. package/dist/cli.js.map +1 -0
  16. package/dist/color.d.ts +19 -0
  17. package/dist/color.d.ts.map +1 -0
  18. package/dist/color.js +36 -0
  19. package/dist/color.js.map +1 -0
  20. package/dist/commands/analyze.d.ts +4 -0
  21. package/dist/commands/analyze.d.ts.map +1 -0
  22. package/dist/commands/analyze.js +106 -0
  23. package/dist/commands/analyze.js.map +1 -0
  24. package/dist/commands/run.d.ts +4 -0
  25. package/dist/commands/run.d.ts.map +1 -0
  26. package/dist/commands/run.js +116 -0
  27. package/dist/commands/run.js.map +1 -0
  28. package/dist/commands/shared.d.ts +16 -0
  29. package/dist/commands/shared.d.ts.map +1 -0
  30. package/dist/commands/shared.js +44 -0
  31. package/dist/commands/shared.js.map +1 -0
  32. package/dist/index.d.ts +14 -0
  33. package/dist/index.d.ts.map +1 -0
  34. package/dist/index.js +13 -0
  35. package/dist/index.js.map +1 -0
  36. package/dist/junit/parser.d.ts +11 -0
  37. package/dist/junit/parser.d.ts.map +1 -0
  38. package/dist/junit/parser.js +98 -0
  39. package/dist/junit/parser.js.map +1 -0
  40. package/dist/junit/xml.d.ts +27 -0
  41. package/dist/junit/xml.d.ts.map +1 -0
  42. package/dist/junit/xml.js +238 -0
  43. package/dist/junit/xml.js.map +1 -0
  44. package/dist/report/json.d.ts +7 -0
  45. package/dist/report/json.d.ts.map +1 -0
  46. package/dist/report/json.js +38 -0
  47. package/dist/report/json.js.map +1 -0
  48. package/dist/report/markdown.d.ts +7 -0
  49. package/dist/report/markdown.d.ts.map +1 -0
  50. package/dist/report/markdown.js +75 -0
  51. package/dist/report/markdown.js.map +1 -0
  52. package/dist/report/terminal.d.ts +4 -0
  53. package/dist/report/terminal.d.ts.map +1 -0
  54. package/dist/report/terminal.js +101 -0
  55. package/dist/report/terminal.js.map +1 -0
  56. package/dist/runner/runner.d.ts +45 -0
  57. package/dist/runner/runner.d.ts.map +1 -0
  58. package/dist/runner/runner.js +125 -0
  59. package/dist/runner/runner.js.map +1 -0
  60. package/dist/types.d.ts +87 -0
  61. package/dist/types.d.ts.map +1 -0
  62. package/dist/types.js +2 -0
  63. package/dist/types.js.map +1 -0
  64. package/dist/util/args.d.ts +23 -0
  65. package/dist/util/args.d.ts.map +1 -0
  66. package/dist/util/args.js +83 -0
  67. package/dist/util/args.js.map +1 -0
  68. package/dist/util/glob.d.ts +11 -0
  69. package/dist/util/glob.d.ts.map +1 -0
  70. package/dist/util/glob.js +104 -0
  71. package/dist/util/glob.js.map +1 -0
  72. package/package.json +62 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 flakeradar contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,309 @@
1
+ <div align="center">
2
+
3
+ # 🛰️ flakeradar
4
+
5
+ **Find, rank, and diagnose flaky tests in any language — straight from JUnit XML.**
6
+
7
+ [![npm version](https://img.shields.io/npm/v/flakeradar?color=cb3837&logo=npm)](https://www.npmjs.com/package/flakeradar)
8
+ [![CI](https://github.com/shravnn/flakeradar/actions/workflows/ci.yml/badge.svg)](https://github.com/shravnn/flakeradar/actions/workflows/ci.yml)
9
+ [![license](https://img.shields.io/npm/l/flakeradar?color=blue)](./LICENSE)
10
+ [![node](https://img.shields.io/node/v/flakeradar)](https://nodejs.org)
11
+ [![zero dependencies](https://img.shields.io/badge/runtime%20deps-0-brightgreen)](./package.json)
12
+
13
+ Run your suite N times (or point it at your CI history) and flakeradar tells you
14
+ **which tests are flaky, how often they fail, and _why_.**
15
+
16
+ <img src="docs/demo.svg" alt="flakeradar ranking flaky tests with likely root causes" width="820">
17
+
18
+ </div>
19
+
20
+ ---
21
+
22
+ ## Why
23
+
24
+ Flaky tests are the most corrosive thing in a CI pipeline: they erode trust in
25
+ the suite, waste hours on re-runs, and bury real bugs in the noise. Most tools
26
+ just tell you a test *sometimes* fails. flakeradar goes further — it **ranks**
27
+ flakes by how disruptive they are and **guesses the root cause**, turning "ugh,
28
+ it failed again" into "here's the flaky test, here's how often, here's why."
29
+
30
+ - 🌐 **Language-agnostic** — anything that emits JUnit XML: pytest, Jest, Vitest, Go, Gradle/Maven, RSpec, PHPUnit, Mocha…
31
+ - 🧠 **Root-cause hints** — timing/async, resource, order-dependency, concurrency, external-dependency, or randomness
32
+ - 📊 **Ranked, not just listed** — worst offenders first
33
+ - 🐛 **Separates flaky from broken** — always-failing tests are real bugs, reported apart from the noise
34
+ - 📎 **Shareable Markdown report** — paste straight into a GitHub issue or PR
35
+ - 🚦 **CI-ready** — `--fail-on-flaky` gates merges; JSON output for dashboards
36
+ - 📦 **Zero runtime dependencies** — one small package, nothing transitive
37
+
38
+ ## Contents
39
+
40
+ - [Quick start](#quick-start)
41
+ - [Two ways to use it](#two-ways-to-use-it)
42
+ - [Framework recipes](#framework-recipes)
43
+ - [Use it in CI](#use-it-in-ci)
44
+ - [How it works](#how-it-works)
45
+ - [Output & exit codes](#output-formats)
46
+ - [Options](#options)
47
+ - [Use as a library](#use-as-a-library)
48
+ - [FAQ](#faq)
49
+
50
+ ---
51
+
52
+ ## Quick start
53
+
54
+ No install needed:
55
+
56
+ ```bash
57
+ # Analyze existing CI reports (each XML file = one run):
58
+ npx flakeradar analyze "test-reports/**/*.xml"
59
+
60
+ # Or run a suite repeatedly and watch for flakiness:
61
+ npx flakeradar run -n 20 -- pytest --junitxml={out}
62
+ ```
63
+
64
+ Or install it:
65
+
66
+ ```bash
67
+ npm install -g flakeradar
68
+ ```
69
+
70
+ Try it right now on the bundled example reports:
71
+
72
+ ```bash
73
+ npx flakeradar analyze "examples/*.xml"
74
+ ```
75
+
76
+ ```text
77
+ flakeradar — flakiness report
78
+
79
+ Runs analyzed: 5 Green runs: 0% Crashed: 0
80
+ Unique tests: 5 Flaky: 3 Always-failing: 1
81
+
82
+ Flaky tests (ranked, worst first):
83
+ # Test Fails Rate Flips Likely cause Conf
84
+ 1 checkout.CheckoutTest::test_places_order 2/5 40% 4 timing/async 95%
85
+ 2 db.UserRepoTest::test_create_user 2/5 40% 3 order/state leak 95%
86
+ 3 api.PaymentTest::test_charge_card 1/5 20% 2 resource/network 95%
87
+
88
+ Top offender: checkout.CheckoutTest::test_places_order
89
+ likely: timing/async (95% confident)
90
+ → Replace fixed sleeps with polling/awaits and raise timeouts.
91
+ sample failures:
92
+ • TimeoutError: timed out after 5000ms waiting for order confirmation
93
+
94
+ Always-failing (broken every run — likely a real bug, not flakiness):
95
+ ✗ math.TaxTest::test_tax_rate (5/5 failed)
96
+ ```
97
+
98
+ ---
99
+
100
+ ## Two ways to use it
101
+
102
+ ### 1. `run` — reproduce flakiness locally
103
+
104
+ Run a test command many times back-to-back. Put the `{out}` token where your
105
+ runner writes its JUnit XML; flakeradar swaps in a fresh path each run.
106
+
107
+ ```bash
108
+ flakeradar run -n 25 -- pytest --junitxml={out}
109
+ ```
110
+
111
+ If your runner writes to a fixed path (or many files), use `--report` instead:
112
+
113
+ ```bash
114
+ flakeradar run -n 15 -r "build/test-results/test/*.xml" -- ./gradlew test
115
+ ```
116
+
117
+ Stop as soon as a flake shows up:
118
+
119
+ ```bash
120
+ flakeradar run -n 50 --stop-on-first-flaky -- go test ./... 2>&1
121
+ ```
122
+
123
+ ### 2. `analyze` — mine your CI history
124
+
125
+ Already have per-run JUnit reports archived from CI? Point flakeradar at them.
126
+ Each file is treated as one run by default.
127
+
128
+ ```bash
129
+ flakeradar analyze "ci-artifacts/**/junit.xml"
130
+
131
+ # One run per directory instead of per file:
132
+ flakeradar analyze --group-by dir "ci-artifacts/*/"
133
+ ```
134
+
135
+ ---
136
+
137
+ ## Framework recipes
138
+
139
+ | Framework | Command |
140
+ |-----------|---------|
141
+ | **pytest** | `flakeradar run -n 20 -- pytest --junitxml={out}` |
142
+ | **Jest** | `flakeradar run -n 20 -r junit.xml -- npx jest --reporters=default --reporters=jest-junit` |
143
+ | **Vitest** | `flakeradar run -n 20 -r junit.xml -- npx vitest run --reporter=junit --outputFile=junit.xml` |
144
+ | **Go** | `flakeradar run -n 20 -- sh -c "gotestsum --junitfile={out} ./..."` |
145
+ | **Gradle** | `flakeradar run -n 15 -r "build/test-results/test/*.xml" -- ./gradlew test` |
146
+ | **Maven** | `flakeradar run -n 15 -r "target/surefire-reports/*.xml" -- mvn -q test` |
147
+ | **RSpec** | `flakeradar run -n 20 -- bundle exec rspec --format RspecJunitFormatter --out {out}` |
148
+ | **PHPUnit** | `flakeradar run -n 20 -- phpunit --log-junit {out}` |
149
+
150
+ > Tip: for runners that only write to a fixed file, pass that path to `--report`
151
+ > (`-r`). For runners that accept an output path, use the `{out}` token.
152
+
153
+ ---
154
+
155
+ ## Use it in CI
156
+
157
+ Detect flakiness that slips past a single run, and comment a report on the PR.
158
+
159
+ ```yaml
160
+ # .github/workflows/flaky.yml
161
+ name: flaky-test-check
162
+ on: [pull_request]
163
+
164
+ jobs:
165
+ flaky:
166
+ runs-on: ubuntu-latest
167
+ steps:
168
+ - uses: actions/checkout@v4
169
+ - uses: actions/setup-node@v4
170
+ with: { node-version: 20 }
171
+
172
+ # ... set up your language + install deps ...
173
+
174
+ - name: Hunt for flaky tests
175
+ run: npx flakeradar run -n 15 --markdown -- pytest --junitxml={out} > flaky.md
176
+
177
+ - name: Comment report on the PR
178
+ if: always()
179
+ uses: marocchino/sticky-pull-request-comment@v2
180
+ with:
181
+ path: flaky.md
182
+ ```
183
+
184
+ Or gate on your existing archived reports:
185
+
186
+ ```bash
187
+ flakeradar analyze --fail-on-flaky "artifacts/**/*.xml"
188
+ ```
189
+
190
+ ---
191
+
192
+ ## How it works
193
+
194
+ **Flakiness detection.** A test is **flaky** when it both passes and fails across
195
+ the runs analyzed. A test that fails *every* run isn't flaky — it's **broken**, and
196
+ flakeradar reports those separately so you don't confuse a real bug with noise.
197
+
198
+ Each flaky test gets:
199
+
200
+ - **Fails / Rate** — how many runs failed, and the failure rate.
201
+ - **Flips** — how many times it switched between pass and fail across the run
202
+ sequence. High flips = highly nondeterministic.
203
+ - **Score** — `failures + flips`, used to rank the worst offenders first.
204
+
205
+ **Root-cause classification.** flakeradar scans each test's failure output and
206
+ matches it against tuned signatures to guess a category:
207
+
208
+ | Category | Typical signals | Suggested fix |
209
+ |----------|-----------------|---------------|
210
+ | `timing/async` | `TimeoutError`, `deadline exceeded`, `waited for` | Poll/await instead of fixed sleeps; raise timeouts |
211
+ | `concurrency/race` | `data race`, `deadlock`, `ConcurrentModification` | Synchronize shared state; remove shared mutation |
212
+ | `resource/network` | `EADDRINUSE`, `ECONNREFUSED`, `too many open files` | Isolate ports/temp files per test |
213
+ | `order/state leak` | `duplicate key`, `already exists`, `unique constraint` | Reset fixtures/DB between tests; randomize order |
214
+ | `external dep` | `503`, `429 rate limit`, `bad gateway` | Mock the dependency or add tolerant retries |
215
+ | `randomness/time` | `random`, `uuid`, `Date.now`, `timezone` | Seed RNG; freeze the clock in tests |
216
+
217
+ The classifier is a heuristic — a strong hint about where to look, not a verdict.
218
+
219
+ ---
220
+
221
+ ## Output formats
222
+
223
+ | Flag | Output |
224
+ |------|--------|
225
+ | _(default)_ | Colored terminal report |
226
+ | `--markdown` | GitHub-flavored Markdown (tables + collapsible failure samples) |
227
+ | `--json` | Stable `flakeradar/v1` JSON for dashboards and scripts |
228
+
229
+ ### Exit codes
230
+
231
+ | Code | Meaning |
232
+ |------|---------|
233
+ | `0` | Success (no flaky tests, or `--fail-on-flaky` not set) |
234
+ | `1` | Usage error, or no reports/test data found |
235
+ | `2` | Flaky or always-failing tests detected **and** `--fail-on-flaky` was set |
236
+
237
+ ---
238
+
239
+ ## Options
240
+
241
+ ### `flakeradar run [options] -- <test command>`
242
+
243
+ | Option | Description |
244
+ |--------|-------------|
245
+ | `-n, --runs <N>` | Number of times to run the suite (default `10`) |
246
+ | `-r, --report <glob>` | Where the command writes its JUnit XML each run |
247
+ | `-o, --output <dir>` | Directory for `{out}` report files (default `.flakeradar/runs`) |
248
+ | `--stop-on-first-flaky` | Stop as soon as any flaky test is detected |
249
+ | `--keep-going` | Keep running even if a run crashes (default: stop) |
250
+ | `--fail-on-flaky` | Exit `2` if any flaky/always-failing test is found |
251
+ | `--json` / `--markdown` | Choose output format |
252
+ | `--no-color` | Disable ANSI colors |
253
+
254
+ ### `flakeradar analyze [options] <glob...>`
255
+
256
+ | Option | Description |
257
+ |--------|-------------|
258
+ | `--group-by <file\|dir>` | Treat each file (default) or each directory as one run |
259
+ | `--fail-on-flaky` | Exit `2` if any flaky/always-failing test is found |
260
+ | `--json` / `--markdown` | Choose output format |
261
+ | `--no-color` | Disable ANSI colors |
262
+
263
+ ---
264
+
265
+ ## Use as a library
266
+
267
+ The analysis engine is exported too, for custom integrations:
268
+
269
+ ```ts
270
+ import { parseJUnitXml, analyze, runFromResults } from "flakeradar";
271
+
272
+ const runs = reportStrings.map((xml, i) => runFromResults(`run ${i + 1}`, parseJUnitXml(xml)));
273
+ const report = analyze(runs);
274
+
275
+ console.log(report.summary.flakyCount, "flaky tests");
276
+ for (const t of report.flaky) {
277
+ console.log(t.id, t.failureRate, t.cause?.category);
278
+ }
279
+ ```
280
+
281
+ ---
282
+
283
+ ## FAQ
284
+
285
+ **How many runs do I need?** At least 2 to detect any flakiness. For confidence,
286
+ 10–30 runs is a good range — the rarer the flake, the more runs you need to catch it.
287
+
288
+ **Does it need my CI or a service?** No. It's a local CLI. Nothing is uploaded.
289
+
290
+ **My runner exits non-zero on failure — is that a problem?** No. flakeradar reads
291
+ the JUnit report regardless of exit code. A run only counts as *crashed* if it
292
+ produces no report at all.
293
+
294
+ ---
295
+
296
+ ## Contributing
297
+
298
+ Issues and PRs welcome.
299
+
300
+ ```bash
301
+ npm install
302
+ npm test # run the test suite (vitest)
303
+ npm run build # compile TypeScript to dist/
304
+ node bin/flakeradar.js analyze "examples/*.xml"
305
+ ```
306
+
307
+ ## License
308
+
309
+ [MIT](./LICENSE) © [shravnn](https://github.com/shravnn)
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env node
2
+ import { main } from "../dist/cli.js";
3
+
4
+ main(process.argv.slice(2))
5
+ .then((code) => {
6
+ process.exitCode = code;
7
+ })
8
+ .catch((err) => {
9
+ console.error(err instanceof Error ? err.stack ?? err.message : String(err));
10
+ process.exitCode = 1;
11
+ });
@@ -0,0 +1,10 @@
1
+ import type { CauseCategory, CauseGuess, FailureSample } from "../types.js";
2
+ /**
3
+ * Classify the likely root cause of a flaky test from its failure samples.
4
+ * Returns the best-scoring category with a rough confidence, or `unknown`
5
+ * when no signal is found.
6
+ */
7
+ export declare function classifyCause(samples: FailureSample[]): CauseGuess;
8
+ /** Short human label for a cause category (used in tables). */
9
+ export declare function causeLabel(category: CauseCategory): string;
10
+ //# sourceMappingURL=classify.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classify.d.ts","sourceRoot":"","sources":["../../src/analysis/classify.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAsG5E;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,OAAO,EAAE,aAAa,EAAE,GAAG,UAAU,CAkDlE;AAED,+DAA+D;AAC/D,wBAAgB,UAAU,CAAC,QAAQ,EAAE,aAAa,GAAG,MAAM,CAiB1D"}
@@ -0,0 +1,163 @@
1
+ const p = (re, weight = 1) => ({ re, weight });
2
+ /**
3
+ * Ordered strongest-signal-first. Each failure sample votes for categories;
4
+ * the category with the highest total weight wins. These patterns are tuned
5
+ * against real failure output from pytest, Jest, JUnit, RSpec and Go.
6
+ */
7
+ const CATEGORIES = [
8
+ {
9
+ category: "timing",
10
+ hint: "Timing/async: a wait, sleep, or deadline is racing the code. Replace fixed sleeps with polling/awaits and raise timeouts.",
11
+ patterns: [
12
+ p(/\btimed?\s?out\b/i, 3),
13
+ p(/\btimeout(error|exception)?\b/i, 3),
14
+ p(/context deadline exceeded/i, 3),
15
+ p(/deadline exceeded/i, 2),
16
+ p(/\bwait(ed|ing)? for\b/i, 2),
17
+ p(/element .*not (visible|found|clickable)/i, 2),
18
+ p(/\bflaky\b/i, 1),
19
+ p(/eventually/i, 1),
20
+ p(/retry|retries exhausted/i, 1),
21
+ ],
22
+ },
23
+ {
24
+ category: "concurrency",
25
+ hint: "Concurrency/race: shared mutable state or a race condition. Look for goroutines/threads, locks, and unsynchronised access.",
26
+ patterns: [
27
+ p(/\bdata race\b/i, 3),
28
+ p(/race condition/i, 3),
29
+ p(/\bdeadlock\b/i, 3),
30
+ p(/concurrent (map|modification)/i, 3),
31
+ p(/ConcurrentModification/i, 3),
32
+ p(/\bgoroutine\b/i, 1),
33
+ p(/\bmutex\b|\block (held|contention)\b/i, 2),
34
+ p(/thread|threadpool/i, 1),
35
+ ],
36
+ },
37
+ {
38
+ category: "resource",
39
+ hint: "Resource/network: a port, socket, DB, or file handle is contended or unavailable. Isolate ports/temp files per test.",
40
+ patterns: [
41
+ p(/address already in use/i, 3),
42
+ p(/EADDRINUSE/i, 3),
43
+ p(/ECONNREFUSED|connection refused/i, 3),
44
+ p(/EADDRNOTAVAIL|ENOTFOUND|no such host/i, 2),
45
+ p(/port \d+ (is )?(already )?(in use|unavailable)/i, 3),
46
+ p(/too many open files|EMFILE/i, 3),
47
+ p(/broken pipe|EPIPE|ECONNRESET/i, 2),
48
+ p(/socket|bind|listen/i, 1),
49
+ p(/disk|ENOSPC|no space left/i, 2),
50
+ ],
51
+ },
52
+ {
53
+ category: "external",
54
+ hint: "External dependency: a downstream service, API, or rate limit is flaky. Mock the dependency or add tolerant retries.",
55
+ patterns: [
56
+ p(/\b5\d\d\b.*(error|status|response)/i, 2),
57
+ p(/\b(429|rate.?limit(ed|ing)?)\b/i, 3),
58
+ p(/\b503\b|service unavailable/i, 3),
59
+ p(/\b502\b|bad gateway/i, 2),
60
+ p(/upstream|downstream service/i, 2),
61
+ p(/gateway timeout/i, 2),
62
+ p(/\bDNS\b/i, 1),
63
+ ],
64
+ },
65
+ {
66
+ category: "order-dependency",
67
+ hint: "Order/state leakage: the test depends on state left by another test. Reset fixtures/DB between tests and randomise order.",
68
+ patterns: [
69
+ p(/already exists|duplicate key|unique constraint/i, 3),
70
+ p(/leftover|not cleaned up|dirty state/i, 2),
71
+ p(/expected .* to be empty/i, 2),
72
+ p(/no such (row|record|entity)|not found/i, 1),
73
+ p(/global (state|variable)/i, 2),
74
+ p(/cache (hit|stale|invalidation)/i, 1),
75
+ p(/singleton/i, 1),
76
+ ],
77
+ },
78
+ {
79
+ category: "randomness",
80
+ hint: "Randomness/time: nondeterministic inputs (random, UUIDs, current date/time). Seed RNG and freeze the clock in tests.",
81
+ patterns: [
82
+ p(/\brandom\b|\bseed\b/i, 2),
83
+ p(/\bUUID\b/i, 1),
84
+ p(/current (date|time)|now\(\)|Date\.now|time\.Now/i, 2),
85
+ p(/timezone|utc|locale/i, 1),
86
+ p(/floating point|precision|rounding/i, 1),
87
+ p(/ordering of (keys|map|set)/i, 2),
88
+ p(/non.?deterministic/i, 2),
89
+ ],
90
+ },
91
+ ];
92
+ /**
93
+ * Classify the likely root cause of a flaky test from its failure samples.
94
+ * Returns the best-scoring category with a rough confidence, or `unknown`
95
+ * when no signal is found.
96
+ */
97
+ export function classifyCause(samples) {
98
+ const haystacks = samples
99
+ .flatMap((s) => [s.message ?? "", s.details ?? ""])
100
+ .filter((s) => s.length > 0);
101
+ if (haystacks.length === 0) {
102
+ return {
103
+ category: "unknown",
104
+ confidence: 0,
105
+ hint: "No failure output was captured, so the cause can't be inferred. Enable verbose test output to help.",
106
+ };
107
+ }
108
+ const scores = new Map();
109
+ for (const def of CATEGORIES) {
110
+ let score = 0;
111
+ for (const hay of haystacks) {
112
+ for (const { re, weight } of def.patterns) {
113
+ if (re.test(hay))
114
+ score += weight;
115
+ }
116
+ }
117
+ if (score > 0)
118
+ scores.set(def.category, score);
119
+ }
120
+ if (scores.size === 0) {
121
+ return {
122
+ category: "unknown",
123
+ confidence: 0.1,
124
+ hint: "Failure text didn't match known flakiness signatures. Inspect the assertion and recent changes manually.",
125
+ };
126
+ }
127
+ let best = "unknown";
128
+ let bestScore = 0;
129
+ let total = 0;
130
+ for (const [cat, score] of scores) {
131
+ total += score;
132
+ if (score > bestScore) {
133
+ best = cat;
134
+ bestScore = score;
135
+ }
136
+ }
137
+ const def = CATEGORIES.find((c) => c.category === best);
138
+ // Confidence blends absolute evidence with dominance over other categories.
139
+ const dominance = bestScore / total;
140
+ const saturation = Math.min(1, bestScore / 4);
141
+ const confidence = Math.round(Math.min(0.95, 0.35 + 0.4 * dominance + 0.25 * saturation) * 100) / 100;
142
+ return { category: best, confidence, hint: def.hint };
143
+ }
144
+ /** Short human label for a cause category (used in tables). */
145
+ export function causeLabel(category) {
146
+ switch (category) {
147
+ case "timing":
148
+ return "timing/async";
149
+ case "concurrency":
150
+ return "concurrency/race";
151
+ case "resource":
152
+ return "resource/network";
153
+ case "external":
154
+ return "external dep";
155
+ case "order-dependency":
156
+ return "order/state leak";
157
+ case "randomness":
158
+ return "randomness/time";
159
+ case "unknown":
160
+ return "unknown";
161
+ }
162
+ }
163
+ //# sourceMappingURL=classify.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classify.js","sourceRoot":"","sources":["../../src/analysis/classify.ts"],"names":[],"mappings":"AASA,MAAM,CAAC,GAAG,CAAC,EAAU,EAAE,MAAM,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;AAEvD;;;;GAIG;AACH,MAAM,UAAU,GAAkB;IAChC;QACE,QAAQ,EAAE,QAAQ;QAClB,IAAI,EAAE,2HAA2H;QACjI,QAAQ,EAAE;YACR,CAAC,CAAC,mBAAmB,EAAE,CAAC,CAAC;YACzB,CAAC,CAAC,gCAAgC,EAAE,CAAC,CAAC;YACtC,CAAC,CAAC,4BAA4B,EAAE,CAAC,CAAC;YAClC,CAAC,CAAC,oBAAoB,EAAE,CAAC,CAAC;YAC1B,CAAC,CAAC,wBAAwB,EAAE,CAAC,CAAC;YAC9B,CAAC,CAAC,0CAA0C,EAAE,CAAC,CAAC;YAChD,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;YAClB,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC;YACnB,CAAC,CAAC,0BAA0B,EAAE,CAAC,CAAC;SACjC;KACF;IACD;QACE,QAAQ,EAAE,aAAa;QACvB,IAAI,EAAE,4HAA4H;QAClI,QAAQ,EAAE;YACR,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC;YACtB,CAAC,CAAC,iBAAiB,EAAE,CAAC,CAAC;YACvB,CAAC,CAAC,eAAe,EAAE,CAAC,CAAC;YACrB,CAAC,CAAC,gCAAgC,EAAE,CAAC,CAAC;YACtC,CAAC,CAAC,yBAAyB,EAAE,CAAC,CAAC;YAC/B,CAAC,CAAC,gBAAgB,EAAE,CAAC,CAAC;YACtB,CAAC,CAAC,uCAAuC,EAAE,CAAC,CAAC;YAC7C,CAAC,CAAC,oBAAoB,EAAE,CAAC,CAAC;SAC3B;KACF;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,IAAI,EAAE,sHAAsH;QAC5H,QAAQ,EAAE;YACR,CAAC,CAAC,yBAAyB,EAAE,CAAC,CAAC;YAC/B,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC;YACnB,CAAC,CAAC,kCAAkC,EAAE,CAAC,CAAC;YACxC,CAAC,CAAC,uCAAuC,EAAE,CAAC,CAAC;YAC7C,CAAC,CAAC,iDAAiD,EAAE,CAAC,CAAC;YACvD,CAAC,CAAC,6BAA6B,EAAE,CAAC,CAAC;YACnC,CAAC,CAAC,+BAA+B,EAAE,CAAC,CAAC;YACrC,CAAC,CAAC,qBAAqB,EAAE,CAAC,CAAC;YAC3B,CAAC,CAAC,4BAA4B,EAAE,CAAC,CAAC;SACnC;KACF;IACD;QACE,QAAQ,EAAE,UAAU;QACpB,IAAI,EAAE,sHAAsH;QAC5H,QAAQ,EAAE;YACR,CAAC,CAAC,qCAAqC,EAAE,CAAC,CAAC;YAC3C,CAAC,CAAC,iCAAiC,EAAE,CAAC,CAAC;YACvC,CAAC,CAAC,8BAA8B,EAAE,CAAC,CAAC;YACpC,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5B,CAAC,CAAC,8BAA8B,EAAE,CAAC,CAAC;YACpC,CAAC,CAAC,kBAAkB,EAAE,CAAC,CAAC;YACxB,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;SACjB;KACF;IACD;QACE,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,2HAA2H;QACjI,QAAQ,EAAE;YACR,CAAC,CAAC,iDAAiD,EAAE,CAAC,CAAC;YACvD,CAAC,CAAC,sCAAsC,EAAE,CAAC,CAAC;YAC5C,CAAC,CAAC,0BAA0B,EAAE,CAAC,CAAC;YAChC,CAAC,CAAC,wCAAwC,EAAE,CAAC,CAAC;YAC9C,CAAC,CAAC,0BAA0B,EAAE,CAAC,CAAC;YAChC,CAAC,CAAC,iCAAiC,EAAE,CAAC,CAAC;YACvC,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC;SACnB;KACF;IACD;QACE,QAAQ,EAAE,YAAY;QACtB,IAAI,EAAE,sHAAsH;QAC5H,QAAQ,EAAE;YACR,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5B,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YACjB,CAAC,CAAC,kDAAkD,EAAE,CAAC,CAAC;YACxD,CAAC,CAAC,sBAAsB,EAAE,CAAC,CAAC;YAC5B,CAAC,CAAC,oCAAoC,EAAE,CAAC,CAAC;YAC1C,CAAC,CAAC,6BAA6B,EAAE,CAAC,CAAC;YACnC,CAAC,CAAC,qBAAqB,EAAE,CAAC,CAAC;SAC5B;KACF;CACF,CAAC;AAEF;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,OAAwB;IACpD,MAAM,SAAS,GAAG,OAAO;SACtB,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;SAClD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO;YACL,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,CAAC;YACb,IAAI,EAAE,qGAAqG;SAC5G,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAyB,CAAC;IAChD,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;YAC5B,KAAK,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;gBAC1C,IAAI,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC;oBAAE,KAAK,IAAI,MAAM,CAAC;YACpC,CAAC;QACH,CAAC;QACD,IAAI,KAAK,GAAG,CAAC;YAAE,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IACjD,CAAC;IAED,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,QAAQ,EAAE,SAAS;YACnB,UAAU,EAAE,GAAG;YACf,IAAI,EAAE,0GAA0G;SACjH,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,GAAkB,SAAS,CAAC;IACpC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QAClC,KAAK,IAAI,KAAK,CAAC;QACf,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;YACtB,IAAI,GAAG,GAAG,CAAC;YACX,SAAS,GAAG,KAAK,CAAC;QACpB,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAE,CAAC;IACzD,4EAA4E;IAC5E,MAAM,SAAS,GAAG,SAAS,GAAG,KAAK,CAAC;IACpC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,GAAG,GAAG,SAAS,GAAG,IAAI,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;IAEtG,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;AACxD,CAAC;AAED,+DAA+D;AAC/D,MAAM,UAAU,UAAU,CAAC,QAAuB;IAChD,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ;YACX,OAAO,cAAc,CAAC;QACxB,KAAK,aAAa;YAChB,OAAO,kBAAkB,CAAC;QAC5B,KAAK,UAAU;YACb,OAAO,kBAAkB,CAAC;QAC5B,KAAK,UAAU;YACb,OAAO,cAAc,CAAC;QACxB,KAAK,kBAAkB;YACrB,OAAO,kBAAkB,CAAC;QAC5B,KAAK,YAAY;YACf,OAAO,iBAAiB,CAAC;QAC3B,KAAK,SAAS;YACZ,OAAO,SAAS,CAAC;IACrB,CAAC;AACH,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { AnalysisReport, Run, TestResult, TestVerdict } from "../types.js";
2
+ /** Compare flaky verdicts: worst (most worth investigating) first. */
3
+ export declare function compareFlaky(a: TestVerdict, b: TestVerdict): number;
4
+ /**
5
+ * Analyze a sequence of runs and produce a ranked flakiness report.
6
+ * Run order is significant — flips are computed along it.
7
+ */
8
+ export declare function analyze(runs: Run[]): AnalysisReport;
9
+ /** Convenience: build a Run from already-parsed results. */
10
+ export declare function runFromResults(label: string, results: TestResult[]): Run;
11
+ //# sourceMappingURL=flakiness.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"flakiness.d.ts","sourceRoot":"","sources":["../../src/analysis/flakiness.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EAEd,GAAG,EAEH,UAAU,EAEV,WAAW,EACZ,MAAM,aAAa,CAAC;AA4JrB,sEAAsE;AACtE,wBAAgB,YAAY,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,WAAW,GAAG,MAAM,CAKnE;AAED;;;GAGG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,cAAc,CAsBnD;AAED,4DAA4D;AAC5D,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,GAAG,CAExE"}