flakiness 0.277.0 → 0.279.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cli/cli.js +14 -8
- package/package.json +4 -4
- package/types/tsconfig.tsbuildinfo +1 -1
package/lib/cli/cli.js
CHANGED
|
@@ -1034,7 +1034,7 @@ import path7 from "path";
|
|
|
1034
1034
|
// ../package.json
|
|
1035
1035
|
var package_default = {
|
|
1036
1036
|
name: "@flakiness/monorepo",
|
|
1037
|
-
version: "0.
|
|
1037
|
+
version: "0.279.0",
|
|
1038
1038
|
type: "module",
|
|
1039
1039
|
private: true,
|
|
1040
1040
|
scripts: {
|
|
@@ -2076,6 +2076,8 @@ async function cmdListTests(options) {
|
|
|
2076
2076
|
direction: options.sortDir
|
|
2077
2077
|
},
|
|
2078
2078
|
fql: options.fql,
|
|
2079
|
+
// Absent => the server falls back to the project's default history type.
|
|
2080
|
+
historyConfig: options.history ? { type: options.history } : void 0,
|
|
2079
2081
|
timelineSplit: buildTimelineSplit(options)
|
|
2080
2082
|
}).then((response) => response.page);
|
|
2081
2083
|
let scope = "default branch";
|
|
@@ -2102,7 +2104,8 @@ async function cmdListTests(options) {
|
|
|
2102
2104
|
const status = outcomeToStatus(testStats.outcome);
|
|
2103
2105
|
const duration = humanReadableMs(testStats.durationMs);
|
|
2104
2106
|
const trend = formatDurationTrend(testStats.durationChangeMs, testStats.durationMs);
|
|
2105
|
-
const flipRate =
|
|
2107
|
+
const flipRate = formatRate(testStats.flipRate);
|
|
2108
|
+
const failRate = formatRate(testStats.failRate);
|
|
2106
2109
|
const env = formatEnv(testStats.env);
|
|
2107
2110
|
const metadata = formatEnvMetadata(testStats.env);
|
|
2108
2111
|
lines.push(`### ${index + 1}. ${fullName}`);
|
|
@@ -2113,6 +2116,7 @@ async function cmdListTests(options) {
|
|
|
2113
2116
|
lines.push(`- Status: ${asInlineCode(status)}`);
|
|
2114
2117
|
lines.push(`- Duration: ${asInlineCode(duration)} (${asInlineCode(trend)})`);
|
|
2115
2118
|
lines.push(`- Flip Rate: ${asInlineCode(flipRate)}`);
|
|
2119
|
+
lines.push(`- Fail Rate: ${asInlineCode(failRate)}`);
|
|
2116
2120
|
lines.push(`- Error: ${error ? asInlineCode(error) : "None"}`);
|
|
2117
2121
|
lines.push("");
|
|
2118
2122
|
}
|
|
@@ -2166,10 +2170,10 @@ function formatDurationTrend(changeMs, currentMs) {
|
|
|
2166
2170
|
const sign = changeMs >= 0 ? "+" : "-";
|
|
2167
2171
|
return `${sign}${humanReadableMs(Math.abs(changeMs))} (${scale})`;
|
|
2168
2172
|
}
|
|
2169
|
-
function
|
|
2170
|
-
if (
|
|
2173
|
+
function formatRate(rate) {
|
|
2174
|
+
if (rate === void 0 || !Number.isFinite(rate))
|
|
2171
2175
|
return "n/a";
|
|
2172
|
-
return `${(
|
|
2176
|
+
return `${(rate * 100).toFixed(2)}%`;
|
|
2173
2177
|
}
|
|
2174
2178
|
function asInlineCode(text) {
|
|
2175
2179
|
const normalized = text.replace(/\s+/g, " ").trim();
|
|
@@ -2192,7 +2196,7 @@ var BUNDLED_SKILLS = [
|
|
|
2192
2196
|
"files": [
|
|
2193
2197
|
{
|
|
2194
2198
|
"path": "SKILL.md",
|
|
2195
|
-
"content": "---\nname: flakiness-investigation\ndescription: >-\n TRIGGER when: CI fails, tests fail, PR checks fail, user asks about test results, regressions, flakiness, or test health.\n Always use this INSTEAD OF `gh` CLI for investigating test failures \u2014 it distinguishes regressions (caused by the PR) from pre-existing failures and flakes.\n DO NOT TRIGGER when: the user is asking about non-test CI issues (e.g. build failures, deployment problems).\n---\n\n# Flakiness Investigation\n\nUse the `flakiness` CLI to query and analyze test data from Flakiness.io.\n\n## Prerequisites\n\n- The `flakiness` CLI must be available in PATH (installed via `npm install -g flakiness` or `npx flakiness`).\n- A project must be specified: `--project <org/project>` or set `FLAKINESS_PROJECT` env var.\n- Authentication: run `flakiness auth login` first, or set `FLAKINESS_ACCESS_TOKEN`.\n\n## Core command\n\n```bash\nflakiness list tests --project <org/project> [--pr <number>] [--branch <name>] [--fql <query>] [--sort <axis>] [--sort-dir asc|desc] [--page <n>] [--page-size <n>] [--env-name <value>] [--env-path <value>] [--env-category <value>] [--env-os <value>] [--env-arch <value>] [--env-metadata <key=value>]\n```\n\n### Scope options\n\n| Flag | Effect |\n|------|--------|\n| *(none)* | Default branch \u2014 tests from the day of its head commit (in the project's timezone) |\n| `--pr <number>` | Pull request \u2014 tests from the merge-commit of the PR branch into the target branch |\n| `--branch <name>` | Named branch \u2014 tests from the day of its head commit (in the project's timezone) |\n\n`--pr` and `--branch` are mutually exclusive.\n\n### Environment filter options\n\nFilter results by environment. All flags are repeatable. Multiple values for the same flag are OR'd; different flags are AND'd.\n\n| Flag | Filters by | Example |\n|------|-----------|---------|\n| `--env-name <value>` | Environment name | `--env-name chromium` |\n| `--env-path <value>` | Config file path | `--env-path playwright.config.ts` |\n| `--env-category <value>` | Test category | `--env-category playwright` |\n| `--env-os <value>` | OS name + version | `--env-os \"Ubuntu 22.04\"` |\n| `--env-arch <value>` | CPU architecture | `--env-arch x86_64` |\n| `--env-metadata <key=value>` | User-supplied metadata | `--env-metadata browser=chromium` |\n\nThe output for each test shows environment info in the same `key=value` format:\n- **Env** line: system-collected data with keys matching the `--env-*` flag suffixes (`category=`, `name=`, `path=`, `os=`, `arch=`)\n- **Env Metadata** line (if any): user-supplied key=value pairs\n\nThis makes it easy to copy a value from the output and use it as a filter flag.\n\n### Status semantics with `--pr`\n\nWhen querying a PR, the status values have specific meaning:\n\n| Status | Meaning |\n|--------|---------|\n| `regressed` | Test was passing on the target branch but fails in this PR \u2014 **caused by the PR** |\n| `failed` | Test also fails on the target branch \u2014 **pre-existing failure, not caused by the PR** |\n| `flaked` | Test failed but passed on retry \u2014 flaky, not a real failure |\n| `passed` | Test passes |\n| `skipped` | Test was skipped |\n\n### Sort axes\n\n- `outcome` \u2014 sort by test outcome severity (default)\n- `flip_rate` \u2014 sort by flip rate (how often a test flips between pass/fail)\n- `fail_rate` \u2014 sort by fail rate (share of commits where the test's terminal status was failing)\n- `duration` \u2014 sort by test duration\n- `duration_trend` \u2014 sort by duration trend\n- `name` \u2014 sort alphabetically\n\n### Common queries\n\n| Goal | FQL + flags |\n|------|-------------|\n| Flaky tests | `--fql 'flip>0%' --sort flip_rate --sort-dir desc` |\n| Very flaky (>50%) | `--fql 'flip>50%' --sort flip_rate --sort-dir desc` |\n| Tests that have failed (terminal failure) | `--fql 'fail>0%' --sort fail_rate --sort-dir desc` |\n| Tests that fail most often (>50%) | `--fql 'fail>50%' --sort fail_rate --sort-dir desc` |\n| Failed tests | `--fql 's:failed' --sort outcome --sort-dir desc` |\n| Regressions | `--fql 's:regressed'` |\n| All broken tests | `--fql 'status:(failed, regressed)'` |\n| Slow tests | `--fql 'd>5s' --sort duration --sort-dir desc` |\n| Tests matching error | `--fql '$timeout'` |\n| Tests in file | `--fql 'f:login.spec.ts'` |\n| Tests on Linux | `--env-os \"Ubuntu 22.04\"` |\n| Tests for chromium project | `--env-name chromium` |\n| Tests with specific metadata | `--env-metadata browser=firefox` |\n| Linux or macOS failures | `--env-os \"Ubuntu 22.04\" --env-os \"Darwin 24.0\" --fql 's:failed'` |\n\n## FQL (Filter Query Language)\n\nFull reference: [references/fql.md](references/fql.md)\n\nKey rules:\n- Multiple tokens combine with AND: `s:failed f:e2e` means \"failed AND in e2e files\"\n- Prefix with `-` to exclude: `-#smoke` excludes smoke-tagged tests\n- Same filter type uses OR: `status:(failed, regressed)` means \"failed OR regressed\"\n- Quote values with spaces: `f:'tests/e2e checkout'`\n\n### Filter types\n\n| Filter | Syntax | Example |\n|--------|--------|---------|\n| Text search | `<text>` | `login` |\n| Status | `s:<status>` | `s:failed`, `status:(failed, regressed)` |\n| File | `f:<path>` | `f:login.spec.ts` |\n| Error | `$<text>` | `$timeout` |\n| Tag | `#<tag>` | `#smoke` |\n| Duration | `d><time>` | `d>2s`, `d<=500ms` |\n| Flip rate | `flip><pct>` | `flip>0%`, `fr>50%` |\n| Annotation | `@<type>` | `@skip` |\n\n### Status values\n\n`passed`, `failed`, `flaked`, `skipped`, `regressed`\n\n## Workflow: Fix My PR Tests\n\nWhen a user asks to fix failing tests in a PR, follow these steps:\n\n1. **Find the project slug:** Search the codebase for `flakinessProject` to find the `--project` value. It is typically configured in a test reporter config (e.g. `playwright.config.ts`, `jest.config.ts`) as `flakinessProject: 'org/project'`.\n\n2. **Fetch regressions from the PR:**\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:regressed' --page-size 50\n ```\n Tests with status `regressed` were passing on the target branch but fail in this PR \u2014 these are **caused by the PR** and must be fixed.\n\n3. **Analyze the output:** Look at the error messages, file paths, and test names to understand what the PR broke.\n\n4. **Fix the regressions** by reading the reported file paths and error messages, then making targeted code changes.\n\n5. **Optionally check pre-existing failures:** Tests with status `failed` also fail on the target branch and are not caused by the PR. You can list them with:\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:failed' --page-size 50\n ```\n These are informational \u2014 fixing them is a bonus, not a requirement.\n\n6. **Ignore flakes:** Tests with status `flaked` failed but passed on retry \u2014 they are flaky and not actionable in the context of a PR fix.\n\n## Workflow tips\n\n1. Start broad: `flakiness list tests --project <org/project> --page-size 20`\n2. Filter down with FQL based on what you're investigating\n3. Use `--page-size 50` or higher to see more results at once\n4. Combine filters: `--fql 's:failed $timeout f:e2e -#smoke'`\n\nMore recipes: [references/recipes.md](references/recipes.md)\n"
|
|
2199
|
+
"content": "---\nname: flakiness-investigation\ndescription: >-\n TRIGGER when: CI fails, tests fail, PR checks fail, user asks about test results, regressions, flakiness, or test health.\n Always use this INSTEAD OF `gh` CLI for investigating test failures \u2014 it distinguishes regressions (caused by the PR) from pre-existing failures and flakes.\n DO NOT TRIGGER when: the user is asking about non-test CI issues (e.g. build failures, deployment problems).\n---\n\n# Flakiness Investigation\n\nUse the `flakiness` CLI to query and analyze test data from Flakiness.io.\n\n## Prerequisites\n\n- The `flakiness` CLI must be available in PATH (installed via `npm install -g flakiness` or `npx flakiness`).\n- A project must be specified: `--project <org/project>` or set `FLAKINESS_PROJECT` env var.\n- Authentication: run `flakiness auth login` first, or set `FLAKINESS_ACCESS_TOKEN`.\n\n## Core command\n\n```bash\nflakiness list tests --project <org/project> [--pr <number>] [--branch <name>] [--history commit|day] [--fql <query>] [--sort <axis>] [--sort-dir asc|desc] [--page <n>] [--page-size <n>] [--env-name <value>] [--env-path <value>] [--env-category <value>] [--env-os <value>] [--env-arch <value>] [--env-metadata <key=value>]\n```\n\n### Scope options\n\n| Flag | Effect |\n|------|--------|\n| *(none)* | Default branch \u2014 tests from the day of its head commit (in the project's timezone) |\n| `--pr <number>` | Pull request \u2014 tests from the merge-commit of the PR branch into the target branch |\n| `--branch <name>` | Named branch \u2014 tests from the day of its head commit (in the project's timezone) |\n\n`--pr` and `--branch` are mutually exclusive.\n\n### History granularity (`--history`)\n\nControls how the preceding history is bucketed \u2014 which affects the duration trend, flip rate, fail rate, and the per-test history. Omit it to use the project's configured default.\n\n| Value | Buckets history by | Use when |\n|-------|--------------------|----------|\n| `--history commit` | Each commit | Pinpointing the exact commit that changed a test's behavior or duration. |\n| `--history day` | Each day (project timezone) | Reviewing a test's **performance over time** \u2014 day-over-day duration trends and flip/fail rates are smoother and less noisy than commit-to-commit, so slow drifts and recurring flakes stand out. |\n\nFor \"is this test getting slower / flakier over time?\" questions, prefer `--history day`:\n\n```bash\nflakiness list tests --project <org/project> --history day --sort duration_trend --sort-dir desc\nflakiness list tests --project <org/project> --history day --fql 'flip>0%' --sort flip_rate --sort-dir desc\n```\n\n### Environment filter options\n\nFilter results by environment. All flags are repeatable. Multiple values for the same flag are OR'd; different flags are AND'd.\n\n| Flag | Filters by | Example |\n|------|-----------|---------|\n| `--env-name <value>` | Environment name | `--env-name chromium` |\n| `--env-path <value>` | Config file path | `--env-path playwright.config.ts` |\n| `--env-category <value>` | Test category | `--env-category playwright` |\n| `--env-os <value>` | OS name + version | `--env-os \"Ubuntu 22.04\"` |\n| `--env-arch <value>` | CPU architecture | `--env-arch x86_64` |\n| `--env-metadata <key=value>` | User-supplied metadata | `--env-metadata browser=chromium` |\n\nThe output for each test shows environment info in the same `key=value` format:\n- **Env** line: system-collected data with keys matching the `--env-*` flag suffixes (`category=`, `name=`, `path=`, `os=`, `arch=`)\n- **Env Metadata** line (if any): user-supplied key=value pairs\n\nThis makes it easy to copy a value from the output and use it as a filter flag.\n\n### Status semantics with `--pr`\n\nWhen querying a PR, the status values have specific meaning:\n\n| Status | Meaning |\n|--------|---------|\n| `regressed` | Test was passing on the target branch but fails in this PR \u2014 **caused by the PR** |\n| `failed` | Test also fails on the target branch \u2014 **pre-existing failure, not caused by the PR** |\n| `flaked` | Test failed but passed on retry \u2014 flaky, not a real failure |\n| `passed` | Test passes |\n| `skipped` | Test was skipped |\n\n### Sort axes\n\n- `outcome` \u2014 sort by test outcome severity (default)\n- `flip_rate` \u2014 sort by flip rate (how often a test flips between pass/fail)\n- `fail_rate` \u2014 sort by fail rate (share of commits where the test's terminal status was failing)\n- `duration` \u2014 sort by test duration\n- `duration_trend` \u2014 sort by duration trend\n- `name` \u2014 sort alphabetically\n\n### Common queries\n\n| Goal | FQL + flags |\n|------|-------------|\n| Flaky tests | `--fql 'flip>0%' --sort flip_rate --sort-dir desc` |\n| Very flaky (>50%) | `--fql 'flip>50%' --sort flip_rate --sort-dir desc` |\n| Tests that have failed (terminal failure) | `--fql 'fail>0%' --sort fail_rate --sort-dir desc` |\n| Tests that fail most often (>50%) | `--fql 'fail>50%' --sort fail_rate --sort-dir desc` |\n| Failed tests | `--fql 's:failed' --sort outcome --sort-dir desc` |\n| Regressions | `--fql 's:regressed'` |\n| All broken tests | `--fql 'status:(failed, regressed)'` |\n| Slow tests | `--fql 'd>5s' --sort duration --sort-dir desc` |\n| Tests matching error | `--fql '$timeout'` |\n| Tests in file | `--fql 'f:login.spec.ts'` |\n| Tests on Linux | `--env-os \"Ubuntu 22.04\"` |\n| Tests for chromium project | `--env-name chromium` |\n| Tests with specific metadata | `--env-metadata browser=firefox` |\n| Linux or macOS failures | `--env-os \"Ubuntu 22.04\" --env-os \"Darwin 24.0\" --fql 's:failed'` |\n\n## FQL (Filter Query Language)\n\nFull reference: [references/fql.md](references/fql.md)\n\nKey rules:\n- Multiple tokens combine with AND: `s:failed f:e2e` means \"failed AND in e2e files\"\n- Prefix with `-` to exclude: `-#smoke` excludes smoke-tagged tests\n- Same filter type uses OR: `status:(failed, regressed)` means \"failed OR regressed\"\n- Quote values with spaces: `f:'tests/e2e checkout'`\n\n### Filter types\n\n| Filter | Syntax | Example |\n|--------|--------|---------|\n| Text search | `<text>` | `login` |\n| Status | `s:<status>` | `s:failed`, `status:(failed, regressed)` |\n| File | `f:<path>` | `f:login.spec.ts` |\n| Error | `$<text>` | `$timeout` |\n| Tag | `#<tag>` | `#smoke` |\n| Duration | `d><time>` | `d>2s`, `d<=500ms` |\n| Flip rate | `flip><pct>` | `flip>0%`, `fr>50%` |\n| Annotation | `@<type>` | `@skip` |\n\n### Status values\n\n`passed`, `failed`, `flaked`, `skipped`, `regressed`\n\n## Workflow: Fix My PR Tests\n\nWhen a user asks to fix failing tests in a PR, follow these steps:\n\n1. **Find the project slug:** Search the codebase for `flakinessProject` to find the `--project` value. It is typically configured in a test reporter config (e.g. `playwright.config.ts`, `jest.config.ts`) as `flakinessProject: 'org/project'`.\n\n2. **Fetch regressions from the PR:**\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:regressed' --page-size 50\n ```\n Tests with status `regressed` were passing on the target branch but fail in this PR \u2014 these are **caused by the PR** and must be fixed.\n\n3. **Analyze the output:** Look at the error messages, file paths, and test names to understand what the PR broke.\n\n4. **Fix the regressions** by reading the reported file paths and error messages, then making targeted code changes.\n\n5. **Optionally check pre-existing failures:** Tests with status `failed` also fail on the target branch and are not caused by the PR. You can list them with:\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:failed' --page-size 50\n ```\n These are informational \u2014 fixing them is a bonus, not a requirement.\n\n6. **Ignore flakes:** Tests with status `flaked` failed but passed on retry \u2014 they are flaky and not actionable in the context of a PR fix.\n\n## Workflow tips\n\n1. Start broad: `flakiness list tests --project <org/project> --page-size 20`\n2. Filter down with FQL based on what you're investigating\n3. Use `--page-size 50` or higher to see more results at once\n4. Combine filters: `--fql 's:failed $timeout f:e2e -#smoke'`\n\nMore recipes: [references/recipes.md](references/recipes.md)\n"
|
|
2196
2200
|
},
|
|
2197
2201
|
{
|
|
2198
2202
|
"path": "references/fql.md",
|
|
@@ -2200,7 +2204,7 @@ var BUNDLED_SKILLS = [
|
|
|
2200
2204
|
},
|
|
2201
2205
|
{
|
|
2202
2206
|
"path": "references/recipes.md",
|
|
2203
|
-
"content": "# Investigation Recipes\n\nReplace `myorg/myproject` with the target project slug, or set `FLAKINESS_PROJECT`.\n\n## Query a pull request\n\nShows tests from the merge-commit of the PR branch into the target branch.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42\n```\n\n## Find PR regressions (caused by the PR)\n\nTests marked `regressed` were passing on the target branch but fail in this PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:regressed'\n```\n\n## Find pre-existing failures in a PR\n\nTests marked `failed` also fail on the target branch \u2014 not caused by the PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:failed'\n```\n\n## Find all broken tests in a PR\n\nIncludes both PR-caused regressions and pre-existing failures.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 'status:(failed, regressed)' --page-size 50\n```\n\n## Query a specific branch\n\nShows tests from the day of the branch's head commit, in the project's timezone.\n\n```bash\nflakiness list tests --project myorg/myproject --branch feature/login\n```\n\n## Find all flaky tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>0%' --sort flip_rate --sort-dir desc\n```\n\n## Find the most flaky tests (>50% flip rate)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>50%' --sort flip_rate --sort-dir desc\n```\n\n## Find tests that have ever failed (terminal failure, retries exhausted)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'fail>0%' --sort fail_rate --sort-dir desc\n```\n\n## Find tests that fail more than half the time\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'fail>50%' --sort fail_rate --sort-dir desc\n```\n\n## Show failed tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed' --sort outcome --sort-dir desc\n```\n\n## Show regressions\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:regressed'\n```\n\n## Show all currently broken tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'status:(failed, regressed)' --sort outcome --sort-dir desc\n```\n\n## Show flaked tests (passed on retry)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:flaked' --sort flip_rate --sort-dir desc\n```\n\n## Find slow tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'd>5s' --sort duration --sort-dir desc\n```\n\n## Find tests by error text\n\n```bash\nflakiness list tests --project myorg/myproject --fql '$timeout'\nflakiness list tests --project myorg/myproject --fql '$\"network error\"'\n```\n\n## Combine filters\n\nFailed tests in e2e files, excluding smoke-tagged tests:\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed f:e2e -#smoke'\n```\n\n## Narrow to a specific file\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'f:login.spec.ts'\n```\n\n## Find tests with a specific annotation\n\n```bash\nflakiness list tests --project myorg/myproject --fql '@skip'\nflakiness list tests --project myorg/myproject --fql '@fixme'\n```\n\n## Filter by environment\n\n### Show tests for a specific OS\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\"\n```\n\n### Show tests for a specific environment name\n\n```bash\nflakiness list tests --project myorg/myproject --env-name chromium\n```\n\n### Show tests for a specific architecture\n\n```bash\nflakiness list tests --project myorg/myproject --env-arch arm64\n```\n\n### Show tests for multiple OSes (OR)\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --env-os \"Darwin 24.0\"\n```\n\n### Combine env filter with FQL\n\nFailed tests on Linux only:\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --fql 's:failed'\n```\n\n### Filter by user-supplied metadata\n\n```bash\nflakiness list tests --project myorg/myproject --env-metadata browser=firefox\n```\n\n### Combine multiple env filters (AND)\n\nTests on Linux with arm64 architecture:\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --env-arch arm64\n```\n"
|
|
2207
|
+
"content": "# Investigation Recipes\n\nReplace `myorg/myproject` with the target project slug, or set `FLAKINESS_PROJECT`.\n\n## Query a pull request\n\nShows tests from the merge-commit of the PR branch into the target branch.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42\n```\n\n## Find PR regressions (caused by the PR)\n\nTests marked `regressed` were passing on the target branch but fail in this PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:regressed'\n```\n\n## Find pre-existing failures in a PR\n\nTests marked `failed` also fail on the target branch \u2014 not caused by the PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:failed'\n```\n\n## Find all broken tests in a PR\n\nIncludes both PR-caused regressions and pre-existing failures.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 'status:(failed, regressed)' --page-size 50\n```\n\n## Query a specific branch\n\nShows tests from the day of the branch's head commit, in the project's timezone.\n\n```bash\nflakiness list tests --project myorg/myproject --branch feature/login\n```\n\n## Find all flaky tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>0%' --sort flip_rate --sort-dir desc\n```\n\n## Find the most flaky tests (>50% flip rate)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>50%' --sort flip_rate --sort-dir desc\n```\n\n## Find tests that have ever failed (terminal failure, retries exhausted)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'fail>0%' --sort fail_rate --sort-dir desc\n```\n\n## Find tests that fail more than half the time\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'fail>50%' --sort fail_rate --sort-dir desc\n```\n\n## Show failed tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed' --sort outcome --sort-dir desc\n```\n\n## Show regressions\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:regressed'\n```\n\n## Show all currently broken tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'status:(failed, regressed)' --sort outcome --sort-dir desc\n```\n\n## Show flaked tests (passed on retry)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:flaked' --sort flip_rate --sort-dir desc\n```\n\n## Find slow tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'd>5s' --sort duration --sort-dir desc\n```\n\n## Find tests by error text\n\n```bash\nflakiness list tests --project myorg/myproject --fql '$timeout'\nflakiness list tests --project myorg/myproject --fql '$\"network error\"'\n```\n\n## Combine filters\n\nFailed tests in e2e files, excluding smoke-tagged tests:\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed f:e2e -#smoke'\n```\n\n## Narrow to a specific file\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'f:login.spec.ts'\n```\n\n## Find tests with a specific annotation\n\n```bash\nflakiness list tests --project myorg/myproject --fql '@skip'\nflakiness list tests --project myorg/myproject --fql '@fixme'\n```\n\n## Filter by environment\n\n### Show tests for a specific OS\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\"\n```\n\n### Show tests for a specific environment name\n\n```bash\nflakiness list tests --project myorg/myproject --env-name chromium\n```\n\n### Show tests for a specific architecture\n\n```bash\nflakiness list tests --project myorg/myproject --env-arch arm64\n```\n\n### Show tests for multiple OSes (OR)\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --env-os \"Darwin 24.0\"\n```\n\n### Combine env filter with FQL\n\nFailed tests on Linux only:\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --fql 's:failed'\n```\n\n### Filter by user-supplied metadata\n\n```bash\nflakiness list tests --project myorg/myproject --env-metadata browser=firefox\n```\n\n### Combine multiple env filters (AND)\n\nTests on Linux with arm64 architecture:\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --env-arch arm64\n```\n\n## Review performance over time (day history)\n\nBucket history by day instead of by commit so duration trends and flip/fail rates read as a smooth day-over-day time series.\n\n### Tests getting slower over time\n\n```bash\nflakiness list tests --project myorg/myproject --history day --sort duration_trend --sort-dir desc\n```\n\n### Recurring flakes, day over day\n\n```bash\nflakiness list tests --project myorg/myproject --history day --fql 'flip>0%' --sort flip_rate --sort-dir desc\n```\n\n### Force per-commit history (override a project default of `day`)\n\n```bash\nflakiness list tests --project myorg/myproject --history commit\n```\n"
|
|
2204
2208
|
}
|
|
2205
2209
|
]
|
|
2206
2210
|
}
|
|
@@ -2333,6 +2337,7 @@ var optTestsPR = new Option("--pr <number>", "Show tests from a specific pull re
|
|
|
2333
2337
|
return parsed;
|
|
2334
2338
|
}).conflicts("branch");
|
|
2335
2339
|
var optTestsBranch = new Option("--branch <name>", "Show tests from a specific branch").conflicts("pr");
|
|
2340
|
+
var optTestsHistory = new Option("--history <granularity>", "History granularity for trends, flip/fail rates, and the history column: per `commit` or per `day` (defaults to the project setting)").choices(["commit", "day"]);
|
|
2336
2341
|
function collectValues(value, prev) {
|
|
2337
2342
|
return [...prev ?? [], value];
|
|
2338
2343
|
}
|
|
@@ -2346,7 +2351,7 @@ var optTestsEnvMetadata = new Option("--env-metadata <key=value>", "Filter by us
|
|
|
2346
2351
|
throw new Error(`Invalid env-metadata format '${value}'; expected key=value`);
|
|
2347
2352
|
return [...prev ?? [], value];
|
|
2348
2353
|
});
|
|
2349
|
-
list.command("tests").description("Query tests data. Defaults to the default branch (day of head commit in project timezone). Use --pr for PR merge-commit tests, or --branch for a named branch.").addOption(optTestsPage).addOption(optTestsPageSize).addOption(optTestsSort).addOption(optTestsSortDir).addOption(optTestsFQL).addOption(optTestsPR).addOption(optTestsBranch).addOption(optTestsEnvName).addOption(optTestsEnvPath).addOption(optTestsEnvCategory).addOption(optTestsEnvOs).addOption(optTestsEnvArch).addOption(optTestsEnvMetadata).addOption(mustFlakinessProject).addOption(optEndpoint).addOption(optAccessToken).action(async (options) => runCommand(async () => {
|
|
2354
|
+
list.command("tests").description("Query tests data. Defaults to the default branch (day of head commit in project timezone). Use --pr for PR merge-commit tests, or --branch for a named branch.").addOption(optTestsPage).addOption(optTestsPageSize).addOption(optTestsSort).addOption(optTestsSortDir).addOption(optTestsFQL).addOption(optTestsPR).addOption(optTestsBranch).addOption(optTestsHistory).addOption(optTestsEnvName).addOption(optTestsEnvPath).addOption(optTestsEnvCategory).addOption(optTestsEnvOs).addOption(optTestsEnvArch).addOption(optTestsEnvMetadata).addOption(mustFlakinessProject).addOption(optEndpoint).addOption(optAccessToken).action(async (options) => runCommand(async () => {
|
|
2350
2355
|
await cmdListTests({
|
|
2351
2356
|
page: options.page,
|
|
2352
2357
|
pageSize: options.pageSize,
|
|
@@ -2355,6 +2360,7 @@ list.command("tests").description("Query tests data. Defaults to the default bra
|
|
|
2355
2360
|
fql: options.fql,
|
|
2356
2361
|
pr: options.pr,
|
|
2357
2362
|
branch: options.branch,
|
|
2363
|
+
history: options.history,
|
|
2358
2364
|
envName: options.envName,
|
|
2359
2365
|
envPath: options.envPath,
|
|
2360
2366
|
envCategory: options.envCategory,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "flakiness",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.279.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"bin": {
|
|
6
6
|
"flakiness": "./lib/cli/cli.js"
|
|
@@ -19,12 +19,12 @@
|
|
|
19
19
|
"author": "Degu Labs, Inc",
|
|
20
20
|
"license": "MIT",
|
|
21
21
|
"devDependencies": {
|
|
22
|
-
"@playwright/test": "^1.
|
|
22
|
+
"@playwright/test": "^1.61.0",
|
|
23
23
|
"@types/debug": "^4.1.13",
|
|
24
24
|
"@types/express": "^4.17.25",
|
|
25
25
|
"gray-matter": "^4.0.3",
|
|
26
|
-
"@flakiness/server": "0.
|
|
27
|
-
"@flakiness/shared": "0.
|
|
26
|
+
"@flakiness/server": "0.279.0",
|
|
27
|
+
"@flakiness/shared": "0.279.0"
|
|
28
28
|
},
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@flakiness/flakiness-report": "^0.34.0",
|