flakiness 0.218.0 → 0.220.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cli/cli.js CHANGED
@@ -1028,7 +1028,7 @@ import path7 from "path";
1028
1028
  // ../package.json
1029
1029
  var package_default = {
1030
1030
  name: "@flakiness/monorepo",
1031
- version: "0.218.0",
1031
+ version: "0.220.0",
1032
1032
  type: "module",
1033
1033
  private: true,
1034
1034
  scripts: {
@@ -1519,6 +1519,35 @@ import assert from "assert";
1519
1519
  import fs2 from "fs";
1520
1520
  import mime from "mime";
1521
1521
  import path2 from "path";
1522
+ import { Temporal } from "temporal-polyfill";
1523
+ var gTZAbbreviationToIANATimezone;
1524
+ function tzAbbreviationToIANA(tz) {
1525
+ if (!gTZAbbreviationToIANATimezone) {
1526
+ gTZAbbreviationToIANATimezone = /* @__PURE__ */ new Map();
1527
+ const probes = [/* @__PURE__ */ new Date("2026-06-15T12:00:00Z"), /* @__PURE__ */ new Date("2026-01-15T12:00:00Z")];
1528
+ for (const tz2 of Intl.supportedValuesOf("timeZone")) {
1529
+ for (const date of probes) {
1530
+ const parts = new Intl.DateTimeFormat("en-US", { timeZone: tz2, timeZoneName: "short" }).formatToParts(date);
1531
+ const abbr = parts.find((p) => p.type === "timeZoneName")?.value;
1532
+ if (abbr)
1533
+ gTZAbbreviationToIANATimezone.set(abbr, tz2);
1534
+ }
1535
+ }
1536
+ }
1537
+ return gTZAbbreviationToIANATimezone.get(tz);
1538
+ }
1539
+ function parseTimestamp(timestamp) {
1540
+ const native = new Date(timestamp).getTime();
1541
+ if (!isNaN(native))
1542
+ return native;
1543
+ const parts = timestamp.split(/\s+/);
1544
+ const iana = parts.length === 2 ? tzAbbreviationToIANA(parts[1]) : void 0;
1545
+ if (iana) {
1546
+ const d = Temporal.PlainDateTime.from(parts[0]);
1547
+ return d.toZonedDateTime(iana).epochMilliseconds;
1548
+ }
1549
+ throw new Error(`failed to parse timestamp: ${timestamp}`);
1550
+ }
1522
1551
  function getProperties(element) {
1523
1552
  const propertiesNodes = element.children.filter((node) => node instanceof XmlElement).filter((node) => node.name === "properties");
1524
1553
  if (!propertiesNodes.length)
@@ -1583,7 +1612,7 @@ async function traverseJUnitReport(context, node) {
1583
1612
  return;
1584
1613
  let { currentEnv, currentEnvIndex, currentSuite, report, currentTimeMs } = context;
1585
1614
  if (element.attributes["timestamp"])
1586
- currentTimeMs = new Date(element.attributes["timestamp"]).getTime();
1615
+ currentTimeMs = parseTimestamp(element.attributes["timestamp"]);
1587
1616
  if (element.name === "testsuite") {
1588
1617
  const file = element.attributes["file"];
1589
1618
  const line = parseInt(element.attributes["line"], 10);
@@ -1607,12 +1636,12 @@ async function traverseJUnitReport(context, node) {
1607
1636
  report.suites.push(newSuite);
1608
1637
  }
1609
1638
  currentSuite = newSuite;
1610
- const userSuppliedData = getProperties(element);
1611
- if (userSuppliedData.length) {
1639
+ const metadata = getProperties(element);
1640
+ if (metadata.length) {
1612
1641
  currentEnv = structuredClone(currentEnv);
1613
- currentEnv.userSuppliedData ??= {};
1614
- for (const [key, value] of userSuppliedData)
1615
- currentEnv.userSuppliedData[key] = value;
1642
+ currentEnv.metadata ??= {};
1643
+ for (const [key, value] of metadata)
1644
+ currentEnv.metadata[key] = value;
1616
1645
  currentEnvIndex = report.environments.push(currentEnv) - 1;
1617
1646
  }
1618
1647
  } else if (element.name === "testcase") {
@@ -1964,6 +1993,41 @@ function durationTrendScale(currentMs, baseMs) {
1964
1993
  }
1965
1994
 
1966
1995
  // src/cli/cmd-list-tests.ts
1996
+ function buildTimelineSplit(filters) {
1997
+ const systemFilters = [];
1998
+ if (filters.envName?.length)
1999
+ systemFilters.push({ name: "name", values: filters.envName });
2000
+ if (filters.envPath?.length)
2001
+ systemFilters.push({ name: "path", values: filters.envPath });
2002
+ if (filters.envCategory?.length)
2003
+ systemFilters.push({ name: "category", values: filters.envCategory });
2004
+ if (filters.envOs?.length)
2005
+ systemFilters.push({ name: "osname", values: filters.envOs });
2006
+ if (filters.envArch?.length)
2007
+ systemFilters.push({ name: "osarch", values: filters.envArch });
2008
+ const userFilters = /* @__PURE__ */ new Map();
2009
+ for (const entry of filters.envMetadata ?? []) {
2010
+ const eqIndex = entry.indexOf("=");
2011
+ const key = entry.slice(0, eqIndex);
2012
+ const value = entry.slice(eqIndex + 1);
2013
+ let values = userFilters.get(key);
2014
+ if (!values) {
2015
+ values = [];
2016
+ userFilters.set(key, values);
2017
+ }
2018
+ values.push(value);
2019
+ }
2020
+ if (!systemFilters.length && !userFilters.size)
2021
+ return void 0;
2022
+ return {
2023
+ splitByDefault: true,
2024
+ filters: {
2025
+ system: systemFilters.length ? systemFilters : void 0,
2026
+ user: userFilters.size ? Array.from(userFilters, ([name, values]) => ({ name, values })) : void 0
2027
+ },
2028
+ inverse: {}
2029
+ };
2030
+ }
1967
2031
  async function cmdListTests(options) {
1968
2032
  const [orgSlug, projectSlug] = options.flakinessProject.split("/");
1969
2033
  if (!orgSlug || !projectSlug)
@@ -1992,7 +2056,8 @@ async function cmdListTests(options) {
1992
2056
  direction: options.sortDir
1993
2057
  },
1994
2058
  historyBuckets: 10,
1995
- fql: options.fql
2059
+ fql: options.fql,
2060
+ timelineSplit: buildTimelineSplit(options)
1996
2061
  });
1997
2062
  let scope = "default branch";
1998
2063
  if (options.pr !== void 0)
@@ -2020,9 +2085,12 @@ async function cmdListTests(options) {
2020
2085
  const trend = formatDurationTrend(testStats.durationChangeMs, testStats.durationMs);
2021
2086
  const flipRate = formatFlipRate(testStats.flipRate);
2022
2087
  const env = formatEnv(testStats.env);
2088
+ const metadata = formatEnvMetadata(testStats.env);
2023
2089
  lines.push(`### ${index + 1}. ${fullName}`);
2024
2090
  lines.push(`- Location: ${asInlineCode(location)}`);
2025
2091
  lines.push(`- Env: ${asInlineCode(env)}`);
2092
+ if (metadata)
2093
+ lines.push(`- Env Metadata: ${asInlineCode(metadata)}`);
2026
2094
  lines.push(`- Status: ${asInlineCode(status)}`);
2027
2095
  lines.push(`- Duration: ${asInlineCode(duration)} (${asInlineCode(trend)})`);
2028
2096
  lines.push(`- Flip Rate: ${asInlineCode(flipRate)}`);
@@ -2053,18 +2121,25 @@ function outcomeToStatus(outcome) {
2053
2121
  }
2054
2122
  function formatEnv(env) {
2055
2123
  const parts = [];
2056
- const { systemData } = env;
2057
- if (systemData.osName)
2058
- parts.push(`os=${systemData.osName}`);
2059
- if (systemData.osVersion)
2060
- parts.push(`osVersion=${systemData.osVersion}`);
2061
- if (systemData.osArch)
2062
- parts.push(`arch=${systemData.osArch}`);
2063
- if (env.userSuppliedData) {
2064
- for (const [key, value] of Object.entries(env.userSuppliedData))
2065
- parts.push(`${key}=${value}`);
2066
- }
2067
- return parts.join(", ") || env.name || "unknown";
2124
+ if (env.category)
2125
+ parts.push(`category=${env.category}`);
2126
+ if (env.name)
2127
+ parts.push(`name=${env.name}`);
2128
+ if (env.configPath)
2129
+ parts.push(`path=${env.configPath}`);
2130
+ if (env.systemData.osName) {
2131
+ const os4 = [env.systemData.osName, env.systemData.osVersion].filter(Boolean).join(" ");
2132
+ parts.push(`os=${os4}`);
2133
+ }
2134
+ if (env.systemData.osArch)
2135
+ parts.push(`arch=${env.systemData.osArch}`);
2136
+ return parts.join(", ") || "unknown";
2137
+ }
2138
+ function formatEnvMetadata(env) {
2139
+ if (!env.userSuppliedData)
2140
+ return void 0;
2141
+ const parts = Object.entries(env.userSuppliedData).map(([key, value]) => `${key}=${value}`);
2142
+ return parts.length ? parts.join(", ") : void 0;
2068
2143
  }
2069
2144
  function formatDurationTrend(changeMs, currentMs) {
2070
2145
  const baseMs = currentMs - changeMs;
@@ -2087,17 +2162,18 @@ function asInlineCode(text) {
2087
2162
  import { execSync } from "child_process";
2088
2163
  import chalk3 from "chalk";
2089
2164
  import fs5 from "fs";
2165
+ import os3 from "os";
2090
2166
  import path5 from "path";
2091
2167
 
2092
2168
  // src/generated/bundledSkillsData.ts
2093
2169
  var BUNDLED_SKILLS = [
2094
2170
  {
2095
2171
  "name": "flakiness-investigation",
2096
- "description": "Use when querying Flakiness.io test data, writing FQL filters, finding flaky or failed tests, investigating regressions, analyzing test health, or fixing PR test failures with the flakiness CLI.",
2172
+ "description": "TRIGGER when: CI fails, tests fail, PR checks fail, user asks about test results, regressions, flakiness, or test health. Always use this INSTEAD OF `gh` CLI for investigating test failures \u2014 it distinguishes regressions (caused by the PR) from pre-existing failures and flakes. DO NOT TRIGGER when: the user is asking about non-test CI issues (e.g. build failures, deployment problems).",
2097
2173
  "files": [
2098
2174
  {
2099
2175
  "path": "SKILL.md",
2100
- "content": "---\nname: flakiness-investigation\ndescription: Use when querying Flakiness.io test data, writing FQL filters, finding flaky or failed tests, investigating regressions, analyzing test health, or fixing PR test failures with the flakiness CLI.\n---\n\n# Flakiness Investigation\n\nUse the `flakiness` CLI to query and analyze test data from Flakiness.io.\n\n## Prerequisites\n\n- The `flakiness` CLI must be available in PATH (installed via `npm install -g flakiness` or `npx flakiness`).\n- A project must be specified: `--project <org/project>` or set `FLAKINESS_PROJECT` env var.\n- Authentication: run `flakiness auth login` first, or set `FLAKINESS_ACCESS_TOKEN`.\n\n## Core command\n\n```bash\nflakiness list tests --project <org/project> [--pr <number>] [--branch <name>] [--fql <query>] [--sort <axis>] [--sort-dir asc|desc] [--page <n>] [--page-size <n>]\n```\n\n### Scope options\n\n| Flag | Effect |\n|------|--------|\n| *(none)* | Default branch \u2014 tests from the day of its head commit (in the project's timezone) |\n| `--pr <number>` | Pull request \u2014 tests from the merge-commit of the PR branch into the target branch |\n| `--branch <name>` | Named branch \u2014 tests from the day of its head commit (in the project's timezone) |\n\n`--pr` and `--branch` are mutually exclusive.\n\n### Status semantics with `--pr`\n\nWhen querying a PR, the status values have specific meaning:\n\n| Status | Meaning |\n|--------|---------|\n| `regressed` | Test was passing on the target branch but fails in this PR \u2014 **caused by the PR** |\n| `failed` | Test also fails on the target branch \u2014 **pre-existing failure, not caused by the PR** |\n| `flaked` | Test failed but passed on retry \u2014 flaky, not a real failure |\n| `passed` | Test passes |\n| `skipped` | Test was skipped |\n\n### Sort axes\n\n- `outcome` \u2014 sort by test outcome severity (default)\n- `flip_rate` \u2014 sort by flip rate (how often a test flips between pass/fail)\n- `duration` \u2014 sort by test duration\n- `duration_trend` \u2014 sort by duration trend\n- `name` \u2014 sort alphabetically\n\n### Common queries\n\n| Goal | FQL + flags |\n|------|-------------|\n| Flaky tests | `--fql 'flip>0%' --sort flip_rate --sort-dir desc` |\n| Very flaky (>50%) | `--fql 'flip>50%' --sort flip_rate --sort-dir desc` |\n| Failed tests | `--fql 's:failed' --sort outcome --sort-dir desc` |\n| Regressions | `--fql 's:regressed'` |\n| All broken tests | `--fql 'status:(failed, regressed)'` |\n| Slow tests | `--fql 'd>5s' --sort duration --sort-dir desc` |\n| Tests matching error | `--fql '$timeout'` |\n| Tests in file | `--fql 'f:login.spec.ts'` |\n\n## FQL (Filter Query Language)\n\nFull reference: [references/fql.md](references/fql.md)\n\nKey rules:\n- Multiple tokens combine with AND: `s:failed f:e2e` means \"failed AND in e2e files\"\n- Prefix with `-` to exclude: `-#smoke` excludes smoke-tagged tests\n- Same filter type uses OR: `status:(failed, regressed)` means \"failed OR regressed\"\n- Quote values with spaces: `f:'tests/e2e checkout'`\n\n### Filter types\n\n| Filter | Syntax | Example |\n|--------|--------|---------|\n| Text search | `<text>` | `login` |\n| Status | `s:<status>` | `s:failed`, `status:(failed, regressed)` |\n| File | `f:<path>` | `f:login.spec.ts` |\n| Error | `$<text>` | `$timeout` |\n| Tag | `#<tag>` | `#smoke` |\n| Duration | `d><time>` | `d>2s`, `d<=500ms` |\n| Flip rate | `flip><pct>` | `flip>0%`, `fr>50%` |\n| Annotation | `@<type>` | `@skip` |\n\n### Status values\n\n`passed`, `failed`, `flaked`, `skipped`, `regressed`\n\n## Workflow: Fix My PR Tests\n\nWhen a user asks to fix failing tests in a PR, follow these steps:\n\n1. **Fetch regressions from the PR:**\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:regressed' --page-size 50\n ```\n Tests with status `regressed` were passing on the target branch but fail in this PR \u2014 these are **caused by the PR** and must be fixed.\n\n2. **Analyze the output:** Look at the error messages, file paths, and test names to understand what the PR broke.\n\n3. **Fix the regressions** by reading the reported file paths and error messages, then making targeted code changes.\n\n4. **Optionally check pre-existing failures:** Tests with status `failed` also fail on the target branch and are not caused by the PR. You can list them with:\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:failed' --page-size 50\n ```\n These are informational \u2014 fixing them is a bonus, not a requirement.\n\n5. **Ignore flakes:** Tests with status `flaked` failed but passed on retry \u2014 they are flaky and not actionable in the context of a PR fix.\n\n## Workflow tips\n\n1. Start broad: `flakiness list tests --project <org/project> --page-size 20`\n2. Filter down with FQL based on what you're investigating\n3. Use `--page-size 50` or higher to see more results at once\n4. Combine filters: `--fql 's:failed $timeout f:e2e -#smoke'`\n\nMore recipes: [references/recipes.md](references/recipes.md)\n"
2176
+ "content": "---\nname: flakiness-investigation\ndescription: >-\n TRIGGER when: CI fails, tests fail, PR checks fail, user asks about test results, regressions, flakiness, or test health.\n Always use this INSTEAD OF `gh` CLI for investigating test failures \u2014 it distinguishes regressions (caused by the PR) from pre-existing failures and flakes.\n DO NOT TRIGGER when: the user is asking about non-test CI issues (e.g. build failures, deployment problems).\n---\n\n# Flakiness Investigation\n\nUse the `flakiness` CLI to query and analyze test data from Flakiness.io.\n\n## Prerequisites\n\n- The `flakiness` CLI must be available in PATH (installed via `npm install -g flakiness` or `npx flakiness`).\n- A project must be specified: `--project <org/project>` or set `FLAKINESS_PROJECT` env var.\n- Authentication: run `flakiness auth login` first, or set `FLAKINESS_ACCESS_TOKEN`.\n\n## Core command\n\n```bash\nflakiness list tests --project <org/project> [--pr <number>] [--branch <name>] [--fql <query>] [--sort <axis>] [--sort-dir asc|desc] [--page <n>] [--page-size <n>] [--env-name <value>] [--env-path <value>] [--env-category <value>] [--env-os <value>] [--env-arch <value>] [--env-metadata <key=value>]\n```\n\n### Scope options\n\n| Flag | Effect |\n|------|--------|\n| *(none)* | Default branch \u2014 tests from the day of its head commit (in the project's timezone) |\n| `--pr <number>` | Pull request \u2014 tests from the merge-commit of the PR branch into the target branch |\n| `--branch <name>` | Named branch \u2014 tests from the day of its head commit (in the project's timezone) |\n\n`--pr` and `--branch` are mutually exclusive.\n\n### Environment filter options\n\nFilter results by environment. All flags are repeatable. Multiple values for the same flag are OR'd; different flags are AND'd.\n\n| Flag | Filters by | Example |\n|------|-----------|---------|\n| `--env-name <value>` | Environment name | `--env-name chromium` |\n| `--env-path <value>` | Config file path | `--env-path playwright.config.ts` |\n| `--env-category <value>` | Test category | `--env-category playwright` |\n| `--env-os <value>` | OS name + version | `--env-os \"Ubuntu 22.04\"` |\n| `--env-arch <value>` | CPU architecture | `--env-arch x86_64` |\n| `--env-metadata <key=value>` | User-supplied metadata | `--env-metadata browser=chromium` |\n\nThe output for each test shows environment info in the same `key=value` format:\n- **Env** line: system-collected data with keys matching the `--env-*` flag suffixes (`category=`, `name=`, `path=`, `os=`, `arch=`)\n- **Env Metadata** line (if any): user-supplied key=value pairs\n\nThis makes it easy to copy a value from the output and use it as a filter flag.\n\n### Status semantics with `--pr`\n\nWhen querying a PR, the status values have specific meaning:\n\n| Status | Meaning |\n|--------|---------|\n| `regressed` | Test was passing on the target branch but fails in this PR \u2014 **caused by the PR** |\n| `failed` | Test also fails on the target branch \u2014 **pre-existing failure, not caused by the PR** |\n| `flaked` | Test failed but passed on retry \u2014 flaky, not a real failure |\n| `passed` | Test passes |\n| `skipped` | Test was skipped |\n\n### Sort axes\n\n- `outcome` \u2014 sort by test outcome severity (default)\n- `flip_rate` \u2014 sort by flip rate (how often a test flips between pass/fail)\n- `duration` \u2014 sort by test duration\n- `duration_trend` \u2014 sort by duration trend\n- `name` \u2014 sort alphabetically\n\n### Common queries\n\n| Goal | FQL + flags |\n|------|-------------|\n| Flaky tests | `--fql 'flip>0%' --sort flip_rate --sort-dir desc` |\n| Very flaky (>50%) | `--fql 'flip>50%' --sort flip_rate --sort-dir desc` |\n| Failed tests | `--fql 's:failed' --sort outcome --sort-dir desc` |\n| Regressions | `--fql 's:regressed'` |\n| All broken tests | `--fql 'status:(failed, regressed)'` |\n| Slow tests | `--fql 'd>5s' --sort duration --sort-dir desc` |\n| Tests matching error | `--fql '$timeout'` |\n| Tests in file | `--fql 'f:login.spec.ts'` |\n| Tests on Linux | `--env-os \"Ubuntu 22.04\"` |\n| Tests for chromium project | `--env-name chromium` |\n| Tests with specific metadata | `--env-metadata browser=firefox` |\n| Linux or macOS failures | `--env-os \"Ubuntu 22.04\" --env-os \"Darwin 24.0\" --fql 's:failed'` |\n\n## FQL (Filter Query Language)\n\nFull reference: [references/fql.md](references/fql.md)\n\nKey rules:\n- Multiple tokens combine with AND: `s:failed f:e2e` means \"failed AND in e2e files\"\n- Prefix with `-` to exclude: `-#smoke` excludes smoke-tagged tests\n- Same filter type uses OR: `status:(failed, regressed)` means \"failed OR regressed\"\n- Quote values with spaces: `f:'tests/e2e checkout'`\n\n### Filter types\n\n| Filter | Syntax | Example |\n|--------|--------|---------|\n| Text search | `<text>` | `login` |\n| Status | `s:<status>` | `s:failed`, `status:(failed, regressed)` |\n| File | `f:<path>` | `f:login.spec.ts` |\n| Error | `$<text>` | `$timeout` |\n| Tag | `#<tag>` | `#smoke` |\n| Duration | `d><time>` | `d>2s`, `d<=500ms` |\n| Flip rate | `flip><pct>` | `flip>0%`, `fr>50%` |\n| Annotation | `@<type>` | `@skip` |\n\n### Status values\n\n`passed`, `failed`, `flaked`, `skipped`, `regressed`\n\n## Workflow: Fix My PR Tests\n\nWhen a user asks to fix failing tests in a PR, follow these steps:\n\n1. **Find the project slug:** Search the codebase for `flakinessProject` to find the `--project` value. It is typically configured in a test reporter config (e.g. `playwright.config.ts`, `jest.config.ts`) as `flakinessProject: 'org/project'`.\n\n2. **Fetch regressions from the PR:**\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:regressed' --page-size 50\n ```\n Tests with status `regressed` were passing on the target branch but fail in this PR \u2014 these are **caused by the PR** and must be fixed.\n\n3. **Analyze the output:** Look at the error messages, file paths, and test names to understand what the PR broke.\n\n4. **Fix the regressions** by reading the reported file paths and error messages, then making targeted code changes.\n\n5. **Optionally check pre-existing failures:** Tests with status `failed` also fail on the target branch and are not caused by the PR. You can list them with:\n ```bash\n flakiness list tests --project <org/project> --pr <number> --fql 's:failed' --page-size 50\n ```\n These are informational \u2014 fixing them is a bonus, not a requirement.\n\n6. **Ignore flakes:** Tests with status `flaked` failed but passed on retry \u2014 they are flaky and not actionable in the context of a PR fix.\n\n## Workflow tips\n\n1. Start broad: `flakiness list tests --project <org/project> --page-size 20`\n2. Filter down with FQL based on what you're investigating\n3. Use `--page-size 50` or higher to see more results at once\n4. Combine filters: `--fql 's:failed $timeout f:e2e -#smoke'`\n\nMore recipes: [references/recipes.md](references/recipes.md)\n"
2101
2177
  },
2102
2178
  {
2103
2179
  "path": "references/fql.md",
@@ -2105,7 +2181,7 @@ var BUNDLED_SKILLS = [
2105
2181
  },
2106
2182
  {
2107
2183
  "path": "references/recipes.md",
2108
- "content": "# Investigation Recipes\n\nReplace `myorg/myproject` with the target project slug, or set `FLAKINESS_PROJECT`.\n\n## Query a pull request\n\nShows tests from the merge-commit of the PR branch into the target branch.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42\n```\n\n## Find PR regressions (caused by the PR)\n\nTests marked `regressed` were passing on the target branch but fail in this PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:regressed'\n```\n\n## Find pre-existing failures in a PR\n\nTests marked `failed` also fail on the target branch \u2014 not caused by the PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:failed'\n```\n\n## Find all broken tests in a PR\n\nIncludes both PR-caused regressions and pre-existing failures.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 'status:(failed, regressed)' --page-size 50\n```\n\n## Query a specific branch\n\nShows tests from the day of the branch's head commit, in the project's timezone.\n\n```bash\nflakiness list tests --project myorg/myproject --branch feature/login\n```\n\n## Find all flaky tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>0%' --sort flip_rate --sort-dir desc\n```\n\n## Find the most flaky tests (>50% flip rate)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>50%' --sort flip_rate --sort-dir desc\n```\n\n## Show failed tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed' --sort outcome --sort-dir desc\n```\n\n## Show regressions\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:regressed'\n```\n\n## Show all currently broken tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'status:(failed, regressed)' --sort outcome --sort-dir desc\n```\n\n## Show flaked tests (passed on retry)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:flaked' --sort flip_rate --sort-dir desc\n```\n\n## Find slow tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'd>5s' --sort duration --sort-dir desc\n```\n\n## Find tests by error text\n\n```bash\nflakiness list tests --project myorg/myproject --fql '$timeout'\nflakiness list tests --project myorg/myproject --fql '$\"network error\"'\n```\n\n## Combine filters\n\nFailed tests in e2e files, excluding smoke-tagged tests:\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed f:e2e -#smoke'\n```\n\n## Narrow to a specific file\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'f:login.spec.ts'\n```\n\n## Find tests with a specific annotation\n\n```bash\nflakiness list tests --project myorg/myproject --fql '@skip'\nflakiness list tests --project myorg/myproject --fql '@fixme'\n```\n"
2184
+ "content": "# Investigation Recipes\n\nReplace `myorg/myproject` with the target project slug, or set `FLAKINESS_PROJECT`.\n\n## Query a pull request\n\nShows tests from the merge-commit of the PR branch into the target branch.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42\n```\n\n## Find PR regressions (caused by the PR)\n\nTests marked `regressed` were passing on the target branch but fail in this PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:regressed'\n```\n\n## Find pre-existing failures in a PR\n\nTests marked `failed` also fail on the target branch \u2014 not caused by the PR.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 's:failed'\n```\n\n## Find all broken tests in a PR\n\nIncludes both PR-caused regressions and pre-existing failures.\n\n```bash\nflakiness list tests --project myorg/myproject --pr 42 --fql 'status:(failed, regressed)' --page-size 50\n```\n\n## Query a specific branch\n\nShows tests from the day of the branch's head commit, in the project's timezone.\n\n```bash\nflakiness list tests --project myorg/myproject --branch feature/login\n```\n\n## Find all flaky tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>0%' --sort flip_rate --sort-dir desc\n```\n\n## Find the most flaky tests (>50% flip rate)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'flip>50%' --sort flip_rate --sort-dir desc\n```\n\n## Show failed tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed' --sort outcome --sort-dir desc\n```\n\n## Show regressions\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:regressed'\n```\n\n## Show all currently broken tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'status:(failed, regressed)' --sort outcome --sort-dir desc\n```\n\n## Show flaked tests (passed on retry)\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:flaked' --sort flip_rate --sort-dir desc\n```\n\n## Find slow tests\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'd>5s' --sort duration --sort-dir desc\n```\n\n## Find tests by error text\n\n```bash\nflakiness list tests --project myorg/myproject --fql '$timeout'\nflakiness list tests --project myorg/myproject --fql '$\"network error\"'\n```\n\n## Combine filters\n\nFailed tests in e2e files, excluding smoke-tagged tests:\n\n```bash\nflakiness list tests --project myorg/myproject --fql 's:failed f:e2e -#smoke'\n```\n\n## Narrow to a specific file\n\n```bash\nflakiness list tests --project myorg/myproject --fql 'f:login.spec.ts'\n```\n\n## Find tests with a specific annotation\n\n```bash\nflakiness list tests --project myorg/myproject --fql '@skip'\nflakiness list tests --project myorg/myproject --fql '@fixme'\n```\n\n## Filter by environment\n\n### Show tests for a specific OS\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\"\n```\n\n### Show tests for a specific environment name\n\n```bash\nflakiness list tests --project myorg/myproject --env-name chromium\n```\n\n### Show tests for a specific architecture\n\n```bash\nflakiness list tests --project myorg/myproject --env-arch arm64\n```\n\n### Show tests for multiple OSes (OR)\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --env-os \"Darwin 24.0\"\n```\n\n### Combine env filter with FQL\n\nFailed tests on Linux only:\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --fql 's:failed'\n```\n\n### Filter by user-supplied metadata\n\n```bash\nflakiness list tests --project myorg/myproject --env-metadata browser=firefox\n```\n\n### Combine multiple env filters (AND)\n\nTests on Linux with arm64 architecture:\n\n```bash\nflakiness list tests --project myorg/myproject --env-os \"Ubuntu 22.04\" --env-arch arm64\n```\n"
2109
2185
  }
2110
2186
  ]
2111
2187
  }
@@ -2121,8 +2197,9 @@ function projectRoot() {
2121
2197
  }
2122
2198
  }
2123
2199
  async function cmdSkillsInstall(options) {
2200
+ const root = options.project ? projectRoot() : os3.homedir();
2124
2201
  for (const skill of BUNDLED_SKILLS) {
2125
- const dest = path5.join(projectRoot(), `.${options.agent}`, "skills", skill.name);
2202
+ const dest = path5.join(root, `.${options.agent}`, "skills", skill.name);
2126
2203
  await fs5.promises.rm(dest, { recursive: true, force: true });
2127
2204
  for (const file of skill.files) {
2128
2205
  const filePath = path5.join(dest, file.path);
@@ -2235,7 +2312,20 @@ var optTestsPR = new Option("--pr <number>", "Show tests from a specific pull re
2235
2312
  return parsed;
2236
2313
  }).conflicts("branch");
2237
2314
  var optTestsBranch = new Option("--branch <name>", "Show tests from a specific branch").conflicts("pr");
2238
- list.command("tests").description("Query tests data. Defaults to the default branch (day of head commit in project timezone). Use --pr for PR merge-commit tests, or --branch for a named branch.").addOption(optTestsPage).addOption(optTestsPageSize).addOption(optTestsSort).addOption(optTestsSortDir).addOption(optTestsFQL).addOption(optTestsPR).addOption(optTestsBranch).addOption(mustFlakinessProject).addOption(optEndpoint).addOption(optAccessToken).action(async (options) => runCommand(async () => {
2315
+ function collectValues(value, prev) {
2316
+ return [...prev ?? [], value];
2317
+ }
2318
+ var optTestsEnvName = new Option("--env-name <value>", "Filter by environment name (repeatable)").argParser(collectValues);
2319
+ var optTestsEnvPath = new Option("--env-path <value>", "Filter by config path (repeatable)").argParser(collectValues);
2320
+ var optTestsEnvCategory = new Option("--env-category <value>", "Filter by category (repeatable)").argParser(collectValues);
2321
+ var optTestsEnvOs = new Option("--env-os <value>", "Filter by OS name + version (repeatable)").argParser(collectValues);
2322
+ var optTestsEnvArch = new Option("--env-arch <value>", "Filter by CPU architecture (repeatable)").argParser(collectValues);
2323
+ var optTestsEnvMetadata = new Option("--env-metadata <key=value>", "Filter by user-supplied environment metadata (repeatable)").argParser((value, prev) => {
2324
+ if (!value.includes("="))
2325
+ throw new Error(`Invalid env-metadata format '${value}'; expected key=value`);
2326
+ return [...prev ?? [], value];
2327
+ });
2328
+ list.command("tests").description("Query tests data. Defaults to the default branch (day of head commit in project timezone). Use --pr for PR merge-commit tests, or --branch for a named branch.").addOption(optTestsPage).addOption(optTestsPageSize).addOption(optTestsSort).addOption(optTestsSortDir).addOption(optTestsFQL).addOption(optTestsPR).addOption(optTestsBranch).addOption(optTestsEnvName).addOption(optTestsEnvPath).addOption(optTestsEnvCategory).addOption(optTestsEnvOs).addOption(optTestsEnvArch).addOption(optTestsEnvMetadata).addOption(mustFlakinessProject).addOption(optEndpoint).addOption(optAccessToken).action(async (options) => runCommand(async () => {
2239
2329
  await cmdListTests({
2240
2330
  page: options.page,
2241
2331
  pageSize: options.pageSize,
@@ -2244,6 +2334,12 @@ list.command("tests").description("Query tests data. Defaults to the default bra
2244
2334
  fql: options.fql,
2245
2335
  pr: options.pr,
2246
2336
  branch: options.branch,
2337
+ envName: options.envName,
2338
+ envPath: options.envPath,
2339
+ envCategory: options.envCategory,
2340
+ envOs: options.envOs,
2341
+ envArch: options.envArch,
2342
+ envMetadata: options.envMetadata,
2247
2343
  endpoint: options.endpoint,
2248
2344
  accessToken: options.accessToken,
2249
2345
  flakinessProject: options.project
@@ -2261,8 +2357,8 @@ auth.command("whoami").description("Show current logged in user information").ac
2261
2357
  }));
2262
2358
  var skills = program.command("skills").description("Manage agent skills");
2263
2359
  var optAgent = new Option("--agent <agent>", "Target agent").choices(AGENTS).makeOptionMandatory();
2264
- skills.command("install").description("Install bundled skills into the project").addOption(optAgent).action(async (options) => runCommand(async () => {
2265
- await cmdSkillsInstall({ agent: options.agent });
2360
+ skills.command("install").description("Install bundled skills into the user home directory (use --project for project-local install)").addOption(optAgent).option("--project", "Install into the project directory instead of the user home directory").action(async (options) => runCommand(async () => {
2361
+ await cmdSkillsInstall({ agent: options.agent, project: options.project });
2266
2362
  }));
2267
2363
  program.command("access").description("Check access to a Flakiness.io project").addOption(mustFlakinessProject).addOption(optAccessToken).addOption(optEndpoint).option("--json", "Output result as JSON").option("-q, --quiet", "Suppress output, only set exit code").action(async (options) => runCommand(async () => {
2268
2364
  await cmdAccess({
package/lib/junit.js CHANGED
@@ -6,6 +6,35 @@ import assert from "assert";
6
6
  import fs from "fs";
7
7
  import mime from "mime";
8
8
  import path from "path";
9
+ import { Temporal } from "temporal-polyfill";
10
+ var gTZAbbreviationToIANATimezone;
11
+ function tzAbbreviationToIANA(tz) {
12
+ if (!gTZAbbreviationToIANATimezone) {
13
+ gTZAbbreviationToIANATimezone = /* @__PURE__ */ new Map();
14
+ const probes = [/* @__PURE__ */ new Date("2026-06-15T12:00:00Z"), /* @__PURE__ */ new Date("2026-01-15T12:00:00Z")];
15
+ for (const tz2 of Intl.supportedValuesOf("timeZone")) {
16
+ for (const date of probes) {
17
+ const parts = new Intl.DateTimeFormat("en-US", { timeZone: tz2, timeZoneName: "short" }).formatToParts(date);
18
+ const abbr = parts.find((p) => p.type === "timeZoneName")?.value;
19
+ if (abbr)
20
+ gTZAbbreviationToIANATimezone.set(abbr, tz2);
21
+ }
22
+ }
23
+ }
24
+ return gTZAbbreviationToIANATimezone.get(tz);
25
+ }
26
+ function parseTimestamp(timestamp) {
27
+ const native = new Date(timestamp).getTime();
28
+ if (!isNaN(native))
29
+ return native;
30
+ const parts = timestamp.split(/\s+/);
31
+ const iana = parts.length === 2 ? tzAbbreviationToIANA(parts[1]) : void 0;
32
+ if (iana) {
33
+ const d = Temporal.PlainDateTime.from(parts[0]);
34
+ return d.toZonedDateTime(iana).epochMilliseconds;
35
+ }
36
+ throw new Error(`failed to parse timestamp: ${timestamp}`);
37
+ }
9
38
  function getProperties(element) {
10
39
  const propertiesNodes = element.children.filter((node) => node instanceof XmlElement).filter((node) => node.name === "properties");
11
40
  if (!propertiesNodes.length)
@@ -70,7 +99,7 @@ async function traverseJUnitReport(context, node) {
70
99
  return;
71
100
  let { currentEnv, currentEnvIndex, currentSuite, report, currentTimeMs } = context;
72
101
  if (element.attributes["timestamp"])
73
- currentTimeMs = new Date(element.attributes["timestamp"]).getTime();
102
+ currentTimeMs = parseTimestamp(element.attributes["timestamp"]);
74
103
  if (element.name === "testsuite") {
75
104
  const file = element.attributes["file"];
76
105
  const line = parseInt(element.attributes["line"], 10);
@@ -94,12 +123,12 @@ async function traverseJUnitReport(context, node) {
94
123
  report.suites.push(newSuite);
95
124
  }
96
125
  currentSuite = newSuite;
97
- const userSuppliedData = getProperties(element);
98
- if (userSuppliedData.length) {
126
+ const metadata = getProperties(element);
127
+ if (metadata.length) {
99
128
  currentEnv = structuredClone(currentEnv);
100
- currentEnv.userSuppliedData ??= {};
101
- for (const [key, value] of userSuppliedData)
102
- currentEnv.userSuppliedData[key] = value;
129
+ currentEnv.metadata ??= {};
130
+ for (const [key, value] of metadata)
131
+ currentEnv.metadata[key] = value;
103
132
  currentEnvIndex = report.environments.push(currentEnv) - 1;
104
133
  }
105
134
  } else if (element.name === "testcase") {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "flakiness",
3
- "version": "0.218.0",
3
+ "version": "0.220.0",
4
4
  "private": false,
5
5
  "bin": {
6
6
  "flakiness": "./lib/cli/cli.js"
@@ -22,8 +22,9 @@
22
22
  "@playwright/test": "^1.57.0",
23
23
  "@types/debug": "^4.1.12",
24
24
  "@types/express": "^4.17.20",
25
- "@flakiness/shared": "0.218.0",
26
- "@flakiness/server": "0.218.0"
25
+ "gray-matter": "^4.0.3",
26
+ "@flakiness/shared": "0.220.0",
27
+ "@flakiness/server": "0.220.0"
27
28
  },
28
29
  "dependencies": {
29
30
  "@flakiness/flakiness-report": "^0.28.0",
@@ -34,9 +35,11 @@
34
35
  "debug": "^4.4.3",
35
36
  "mime": "^4.1.0",
36
37
  "open": "^10.2.0",
37
- "ora": "^8.2.0"
38
+ "ora": "^8.2.0",
39
+ "temporal-polyfill": "^0.3.0"
38
40
  },
39
41
  "scripts": {
42
+ "test": "pnpm playwright test",
40
43
  "build:all": "pnpm build:win && pnpm build:linux && pnpm build:mac && pnpm build:alpine && pnpm build:mac_intel",
41
44
  "build:win": "bun build ./lib/cli/cli.js --compile --minify --target=bun-windows-x64 --outfile dist/flakiness-win-x64.exe",
42
45
  "build:linux": "bun build ./lib/cli/cli.js --compile --minify --target=bun-linux-x64 --outfile dist/flakiness-linux-x64",