ci-triage 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +98 -5
- package/action.yml +21 -1
- package/dist/action.js +78 -63
- package/dist/classifier.js +70 -0
- package/dist/flake-store.js +106 -1
- package/dist/index.js +347 -11
- package/dist/llm-analyzer.js +22 -3
- package/dist/multi.js +116 -0
- package/dist/parser.js +117 -0
- package/dist/parsers/deploy-pages-failure.js +33 -0
- package/dist/parsers/http-error.js +13 -0
- package/dist/parsers/index.js +9 -0
- package/dist/parsers/shell-failure.js +14 -0
- package/dist/parsers/types.js +1 -0
- package/dist/providers/index.js +10 -2
- package/dist/repo-context.js +97 -0
- package/dist/reporter.js +1 -0
- package/package.json +3 -2
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.2.0] - 2026-02-25
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Provider abstraction: GitHub Actions, GitLab CI, CircleCI with auto-detection
|
|
15
|
+
- SQLite flake store at `~/.ci-triage/flake.db` for persistent cross-session test history
|
|
16
|
+
- `ci-triage flakes owner/repo` to list known flaky tests with fail/pass stats
|
|
17
|
+
- `ci-triage db prune [--days 90]` to remove old runs from local DB
|
|
18
|
+
- LLM root-cause analysis via OpenAI Responses API (`gpt-4.1-mini` default), gated on `OPENAI_API_KEY` or `--llm`
|
|
19
|
+
- Graceful LLM fallback to heuristic analysis with explicit `fallback_reason` in output
|
|
20
|
+
- LLM token usage and estimated cost logging per run
|
|
21
|
+
- `--provider github|gitlab|circleci` flag for explicit provider selection
|
|
22
|
+
- `--llm` and `--llm-model` flags
|
|
23
|
+
- `--version` flag
|
|
24
|
+
- `analysis` block in JSON output schema (`mode`, `provider`, `model`, `root_cause`, `fix_suggestions`, `llm.usage`)
|
|
25
|
+
- `action.yml` inputs: `llm`, `openai-api-key`, `llm-model`, `provider`
|
|
26
|
+
|
|
27
|
+
### Changed
|
|
28
|
+
|
|
29
|
+
- Actionable error messages when `gh` CLI is missing or unauthenticated
|
|
30
|
+
- README fully rewritten for v0.2
|
|
31
|
+
|
|
32
|
+
## [0.1.0] - 2026-02-22
|
|
33
|
+
|
|
34
|
+
### Added
|
|
35
|
+
|
|
36
|
+
- Initial release
|
|
37
|
+
- GitHub Actions log parsing with 16 failure categories
|
|
38
|
+
- Flake detection (in-memory, single-run)
|
|
39
|
+
- JUnit XML support
|
|
40
|
+
- Structured JSON output schema
|
|
41
|
+
- MCP server with 4 tools (`triage_run`, `list_failures`, `is_flaky`, `suggest_fix`)
|
|
42
|
+
- GitHub Action (`action.yml`) with PR comment and artifact upload
|
package/README.md
CHANGED
|
@@ -2,27 +2,39 @@
|
|
|
2
2
|
|
|
3
3
|
Open-source CI failure triage for humans and agents. Parses CI logs, classifies failures, detects flaky tests, and outputs structured JSON — with an MCP server for coding agents.
|
|
4
4
|
|
|
5
|
+
**v0.2** adds LLM root-cause analysis, a persistent SQLite flake database, and multi-CI support (GitHub, GitLab, CircleCI).
|
|
6
|
+
|
|
5
7
|
## Why
|
|
6
8
|
|
|
7
9
|
When CI fails, everyone does the same dance: open the run, scroll logs, squint at errors. Coding agents have it worse — they can't scroll. **ci-triage** gives both humans and agents structured, queryable failure data.
|
|
8
10
|
|
|
9
11
|
## Features
|
|
10
12
|
|
|
11
|
-
- 🔍 **Smart log parsing** — extracts errors, file:line references, stack traces
|
|
13
|
+
- 🔍 **Smart log parsing** — extracts errors, file:line references, stack traces
|
|
12
14
|
- 🏷️ **Failure classification** — 16 categories with severity levels and confidence scores
|
|
13
|
-
- 🧪 **Flake detection** —
|
|
15
|
+
- 🧪 **Flake detection** — in-memory (single run) + SQLite persistent history across runs
|
|
16
|
+
- 🤖 **LLM root-cause analysis** — OpenAI Responses API, gated on `OPENAI_API_KEY`, graceful fallback
|
|
17
|
+
- 🌐 **Multi-CI** — GitHub Actions, GitLab CI, CircleCI, with auto-detection
|
|
14
18
|
- 📋 **JUnit XML support** — parses standard test result files
|
|
15
19
|
- 📊 **Structured JSON output** — agent-consumable schema with full failure context
|
|
16
|
-
-
|
|
17
|
-
-
|
|
20
|
+
- 🔧 **MCP server** — coding agents (Codex, Claude Code) can query failures programmatically
|
|
21
|
+
- ⚙️ **GitHub Action** — drop into any workflow with PR comments and artifacts
|
|
18
22
|
|
|
19
23
|
## Quick Start
|
|
20
24
|
|
|
21
25
|
### CLI
|
|
26
|
+
|
|
22
27
|
```bash
|
|
23
|
-
# Install
|
|
28
|
+
# Install globally
|
|
24
29
|
npm install -g ci-triage
|
|
25
30
|
|
|
31
|
+
# Or use without installing
|
|
32
|
+
npx ci-triage owner/repo
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
#### Basic usage
|
|
36
|
+
|
|
37
|
+
```bash
|
|
26
38
|
# Triage the most recent failed run
|
|
27
39
|
ci-triage owner/repo
|
|
28
40
|
|
|
@@ -36,7 +48,38 @@ ci-triage owner/repo --run 12345
|
|
|
36
48
|
ci-triage owner/repo --md triage.md
|
|
37
49
|
```
|
|
38
50
|
|
|
51
|
+
#### v0.2 flags
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# LLM root-cause analysis (requires OPENAI_API_KEY)
|
|
55
|
+
ci-triage owner/repo --llm
|
|
56
|
+
|
|
57
|
+
# Override LLM model (default: gpt-4.1-mini)
|
|
58
|
+
ci-triage owner/repo --llm --llm-model gpt-4o-mini
|
|
59
|
+
|
|
60
|
+
# Force a specific CI provider
|
|
61
|
+
ci-triage owner/repo --provider gitlab
|
|
62
|
+
ci-triage owner/repo --provider circleci
|
|
63
|
+
|
|
64
|
+
# Show flaky tests from persistent history
|
|
65
|
+
ci-triage flakes owner/repo
|
|
66
|
+
|
|
67
|
+
# Print version
|
|
68
|
+
ci-triage --version
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### Multi-CI provider setup
|
|
72
|
+
|
|
73
|
+
| Provider | Token env var | Auto-detected by |
|
|
74
|
+
|----------|--------------|-----------------|
|
|
75
|
+
| GitHub Actions | `gh auth login` (uses `gh` CLI) | default |
|
|
76
|
+
| GitLab CI | `GITLAB_TOKEN` | `.gitlab-ci.yml` in cwd |
|
|
77
|
+
| CircleCI | `CIRCLE_TOKEN` | `.circleci/` in cwd |
|
|
78
|
+
|
|
79
|
+
Auto-detection runs at startup — pass `--provider` to override.
|
|
80
|
+
|
|
39
81
|
### GitHub Action
|
|
82
|
+
|
|
40
83
|
```yaml
|
|
41
84
|
- uses: clankamode/ci-triage@v1
|
|
42
85
|
with:
|
|
@@ -68,6 +111,42 @@ claude mcp add ci-triage npx -y ci-triage --mcp
|
|
|
68
111
|
| `is_flaky` | Flake history for a specific test |
|
|
69
112
|
| `suggest_fix` | Fix suggestions for failures |
|
|
70
113
|
|
|
114
|
+
## SQLite Flake Database
|
|
115
|
+
|
|
116
|
+
ci-triage persists run outcomes to `~/.ci-triage/flake.db`. Over time, this builds a cross-session history of test pass/fail patterns, making flake detection much more accurate than single-run heuristics.
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# After a few triage runs, list known flaky tests
|
|
120
|
+
ci-triage flakes owner/repo
|
|
121
|
+
|
|
122
|
+
# Output:
|
|
123
|
+
# Test Name Fails Passes Ratio Last Seen
|
|
124
|
+
# ─────────────────────────────────────────────────────────────────────────────────────
|
|
125
|
+
# integration::auth::test_token_refresh 3 7 30.0% 2026-02-25
|
|
126
|
+
# e2e::dashboard::renders_on_slow_network 2 5 28.6% 2026-02-24
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## LLM Root-Cause Analysis
|
|
130
|
+
|
|
131
|
+
When `OPENAI_API_KEY` is set (or `--llm` is passed), ci-triage calls OpenAI Responses API to produce a human-readable root cause and fix suggestions:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
OPENAI_API_KEY=sk-... ci-triage owner/repo --llm
|
|
135
|
+
|
|
136
|
+
# ── LLM Root-Cause Analysis ─────────────────────────────
|
|
137
|
+
# Model: gpt-4.1-mini (openai)
|
|
138
|
+
# Root Cause: The auth token refresh test fails intermittently due to a race
|
|
139
|
+
# condition in the mock clock setup.
|
|
140
|
+
# Fix Suggestions:
|
|
141
|
+
# • Use vi.useFakeTimers() with explicit tick advancement
|
|
142
|
+
# • Add a 50ms buffer after token expiry before asserting refresh
|
|
143
|
+
# • Consider extracting token timing into a testable utility
|
|
144
|
+
# Tokens: 1240 in / 187 out — est. $0.0008
|
|
145
|
+
# ─────────────────────────────────────────────────────────
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
If LLM is unavailable, ci-triage falls back to heuristic suggestions silently. The JSON output includes `analysis.mode` to distinguish.
|
|
149
|
+
|
|
71
150
|
## Output Schema
|
|
72
151
|
|
|
73
152
|
```json
|
|
@@ -100,6 +179,20 @@ claude mcp add ci-triage npx -y ci-triage --mcp
|
|
|
100
179
|
"flaky_count": 1,
|
|
101
180
|
"real_count": 0,
|
|
102
181
|
"root_cause": "Flaky assertion in Foo.validate"
|
|
182
|
+
},
|
|
183
|
+
"analysis": {
|
|
184
|
+
"mode": "llm",
|
|
185
|
+
"provider": "openai",
|
|
186
|
+
"model": "gpt-4.1-mini",
|
|
187
|
+
"root_cause": "Race condition in mock clock setup",
|
|
188
|
+
"fix_suggestions": ["Use vi.useFakeTimers() with explicit tick advancement"],
|
|
189
|
+
"llm": {
|
|
190
|
+
"usage": {
|
|
191
|
+
"input_tokens": 1240,
|
|
192
|
+
"output_tokens": 187,
|
|
193
|
+
"estimated_cost_usd": 0.0008
|
|
194
|
+
}
|
|
195
|
+
}
|
|
103
196
|
}
|
|
104
197
|
}
|
|
105
198
|
```
|
package/action.yml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
name: "CI Triage"
|
|
2
|
-
description: "Automated CI failure triage with flake detection"
|
|
2
|
+
description: "Automated CI failure triage with flake detection, LLM analysis, and multi-CI support"
|
|
3
3
|
inputs:
|
|
4
4
|
token:
|
|
5
5
|
description: "GitHub token for API access"
|
|
@@ -20,6 +20,22 @@ inputs:
|
|
|
20
20
|
description: "Number of historical runs to inspect"
|
|
21
21
|
required: false
|
|
22
22
|
default: "20"
|
|
23
|
+
llm:
|
|
24
|
+
description: "Enable LLM root-cause analysis (requires openai-api-key)"
|
|
25
|
+
required: false
|
|
26
|
+
default: "false"
|
|
27
|
+
openai-api-key:
|
|
28
|
+
description: "OpenAI API key for LLM analysis"
|
|
29
|
+
required: false
|
|
30
|
+
default: ""
|
|
31
|
+
llm-model:
|
|
32
|
+
description: "LLM model to use (default: gpt-4.1-mini)"
|
|
33
|
+
required: false
|
|
34
|
+
default: "gpt-4.1-mini"
|
|
35
|
+
provider:
|
|
36
|
+
description: "CI provider override: github | gitlab | circleci (default: auto-detect)"
|
|
37
|
+
required: false
|
|
38
|
+
default: ""
|
|
23
39
|
outputs:
|
|
24
40
|
report-json:
|
|
25
41
|
description: "Raw triage report JSON string"
|
|
@@ -57,6 +73,10 @@ runs:
|
|
|
57
73
|
CI_TRIAGE_COMMENT: ${{ inputs.comment }}
|
|
58
74
|
CI_TRIAGE_JSON_ARTIFACT: ${{ inputs.json-artifact }}
|
|
59
75
|
CI_TRIAGE_HISTORY_DEPTH: ${{ inputs.history-depth }}
|
|
76
|
+
CI_TRIAGE_LLM: ${{ inputs.llm }}
|
|
77
|
+
CI_TRIAGE_LLM_MODEL: ${{ inputs.llm-model }}
|
|
78
|
+
CI_TRIAGE_PROVIDER: ${{ inputs.provider }}
|
|
79
|
+
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
|
|
60
80
|
run: node dist/action.js
|
|
61
81
|
|
|
62
82
|
- name: Upload triage JSON artifact
|
package/dist/action.js
CHANGED
|
@@ -1,58 +1,18 @@
|
|
|
1
|
-
import { execFileSync } from 'node:child_process';
|
|
2
1
|
import { appendFileSync } from 'node:fs';
|
|
2
|
+
import { classify } from './classifier.js';
|
|
3
3
|
import { formatComment } from './comment.js';
|
|
4
|
+
import { analyzeLlm } from './llm-analyzer.js';
|
|
5
|
+
import { parseFailures } from './parser.js';
|
|
6
|
+
import { getProvider } from './providers/index.js';
|
|
7
|
+
import { buildJsonReport } from './reporter.js';
|
|
8
|
+
import { persistRun } from './flake-store.js';
|
|
4
9
|
import { getPRForRun, postOrUpdateComment, uploadArtifact } from './github.js';
|
|
5
10
|
function parseBool(value, defaultValue) {
|
|
6
11
|
if (!value)
|
|
7
12
|
return defaultValue;
|
|
8
13
|
return value.toLowerCase() === 'true';
|
|
9
14
|
}
|
|
10
|
-
function
|
|
11
|
-
if (job.failures)
|
|
12
|
-
return job.failures;
|
|
13
|
-
const out = [];
|
|
14
|
-
for (const step of job.steps ?? []) {
|
|
15
|
-
for (const failure of step.failures ?? []) {
|
|
16
|
-
out.push(failure);
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
return out;
|
|
20
|
-
}
|
|
21
|
-
function asReport(raw, repo, runId) {
|
|
22
|
-
if (raw && typeof raw === 'object' && 'summary' in raw && 'jobs' in raw && 'status' in raw) {
|
|
23
|
-
return raw;
|
|
24
|
-
}
|
|
25
|
-
const rows = Array.isArray(raw) ? raw : [];
|
|
26
|
-
const jobs = rows.map((row, idx) => ({
|
|
27
|
-
name: row.workflowName ?? `failed-job-${idx + 1}`,
|
|
28
|
-
status: 'failed',
|
|
29
|
-
failures: [
|
|
30
|
-
{
|
|
31
|
-
test_name: row.displayTitle ?? 'Unknown failure',
|
|
32
|
-
error: row.cause ?? 'Unknown cause',
|
|
33
|
-
suggested_fix: row.fix,
|
|
34
|
-
},
|
|
35
|
-
],
|
|
36
|
-
}));
|
|
37
|
-
const total = jobs.reduce((acc, job) => acc + flattenFailures(job).length, 0);
|
|
38
|
-
return {
|
|
39
|
-
version: '1.0',
|
|
40
|
-
repo,
|
|
41
|
-
run_id: runId,
|
|
42
|
-
timestamp: new Date().toISOString(),
|
|
43
|
-
status: total > 0 ? 'fail' : 'pass',
|
|
44
|
-
jobs,
|
|
45
|
-
summary: {
|
|
46
|
-
total_failures: total,
|
|
47
|
-
flaky_count: 0,
|
|
48
|
-
real_count: total,
|
|
49
|
-
categories: {},
|
|
50
|
-
root_cause: total > 0 ? 'See failed job sections for causes.' : 'No failures detected.',
|
|
51
|
-
action: total > 0 ? 'fix' : 'none',
|
|
52
|
-
},
|
|
53
|
-
};
|
|
54
|
-
}
|
|
55
|
-
function main() {
|
|
15
|
+
async function main() {
|
|
56
16
|
const token = process.env.INPUT_TOKEN;
|
|
57
17
|
const repo = process.env.GITHUB_REPOSITORY;
|
|
58
18
|
const runIdRaw = process.env.GITHUB_RUN_ID;
|
|
@@ -63,25 +23,76 @@ function main() {
|
|
|
63
23
|
throw new Error('Missing GITHUB_REPOSITORY');
|
|
64
24
|
if (!outputPath)
|
|
65
25
|
throw new Error('Missing GITHUB_OUTPUT');
|
|
66
|
-
const flakeDetect = parseBool(process.env.CI_TRIAGE_FLAKE_DETECT, true);
|
|
67
26
|
const shouldComment = parseBool(process.env.CI_TRIAGE_COMMENT, true);
|
|
68
27
|
const shouldUploadArtifact = parseBool(process.env.CI_TRIAGE_JSON_ARTIFACT, true);
|
|
69
|
-
const
|
|
70
|
-
const
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
28
|
+
const providerOverride = process.env.CI_TRIAGE_PROVIDER;
|
|
29
|
+
const llmEnabledByEnv = parseBool(process.env.CI_TRIAGE_LLM, false);
|
|
30
|
+
const llmModel = process.env.CI_TRIAGE_LLM_MODEL?.trim() || 'gpt-4.1-mini';
|
|
31
|
+
const runId = runIdRaw && /^\d+$/.test(runIdRaw) ? Number(runIdRaw) : undefined;
|
|
32
|
+
process.env.GH_TOKEN = token;
|
|
33
|
+
process.env.GITHUB_TOKEN = token;
|
|
34
|
+
const provider = getProvider(providerOverride);
|
|
35
|
+
const canHandle = await provider.canHandle(repo);
|
|
36
|
+
if (!canHandle) {
|
|
37
|
+
throw new Error(`Provider "${provider.name}" cannot handle repo "${repo}" in this environment.`);
|
|
74
38
|
}
|
|
75
|
-
const
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
39
|
+
const requestedRunId = runIdRaw ? (runId ? runId : runIdRaw) : undefined;
|
|
40
|
+
const run = await provider.resolveRun(repo, requestedRunId);
|
|
41
|
+
const [logBundle, metadata] = await Promise.all([
|
|
42
|
+
provider.fetchLogs({ provider: provider.name, repo, runId: run.id }),
|
|
43
|
+
provider.fetchMetadata({ provider: provider.name, repo, runId: run.id }),
|
|
44
|
+
]);
|
|
45
|
+
const rawLog = logBundle.combined;
|
|
46
|
+
const failures = parseFailures(rawLog);
|
|
47
|
+
const classified = failures.map((failure) => ({
|
|
48
|
+
...failure,
|
|
49
|
+
classification: classify(failure),
|
|
50
|
+
}));
|
|
51
|
+
const runInfo = {
|
|
52
|
+
databaseId: Number(run.id) || 0,
|
|
53
|
+
displayTitle: run.displayTitle,
|
|
54
|
+
workflowName: run.workflowName,
|
|
55
|
+
conclusion: run.conclusion,
|
|
56
|
+
url: run.url,
|
|
57
|
+
};
|
|
58
|
+
const report = buildJsonReport({
|
|
59
|
+
repo,
|
|
60
|
+
run: runInfo,
|
|
61
|
+
failures: classified,
|
|
62
|
+
metadata: {
|
|
63
|
+
headSha: metadata.headSha,
|
|
64
|
+
headBranch: metadata.headBranch,
|
|
65
|
+
event: metadata.event,
|
|
81
66
|
},
|
|
82
|
-
})
|
|
83
|
-
const
|
|
84
|
-
|
|
67
|
+
});
|
|
68
|
+
const llmEnabled = llmEnabledByEnv || !!process.env.OPENAI_API_KEY;
|
|
69
|
+
if (llmEnabled) {
|
|
70
|
+
const failureEntries = classified.map((f) => ({
|
|
71
|
+
type: f.classification?.type ?? 'unknown',
|
|
72
|
+
error: f.error,
|
|
73
|
+
stack: f.stack?.join('\n'),
|
|
74
|
+
category: f.classification?.category ?? 'unknown',
|
|
75
|
+
severity: f.classification?.severity ?? 'low',
|
|
76
|
+
suggested_fix: f.classification?.suggestedFix ?? '',
|
|
77
|
+
flaky: { is_flaky: false, confidence: 0, pass_rate_7d: 1, last_5_runs: [] },
|
|
78
|
+
}));
|
|
79
|
+
report.analysis = await analyzeLlm(failureEntries, rawLog, {
|
|
80
|
+
model: llmModel,
|
|
81
|
+
enabled: llmEnabled,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const testMap = {};
|
|
86
|
+
for (const failure of classified) {
|
|
87
|
+
if (failure.stepName) {
|
|
88
|
+
testMap[failure.stepName] = 'fail';
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
persistRun(repo, String(run.id), new Date().toISOString(), testMap);
|
|
92
|
+
}
|
|
93
|
+
catch {
|
|
94
|
+
// non-fatal
|
|
95
|
+
}
|
|
85
96
|
const reportJson = JSON.stringify(report);
|
|
86
97
|
const failureCount = String(report.summary.total_failures);
|
|
87
98
|
const status = report.status === 'failed' ? 'fail' : report.status;
|
|
@@ -101,4 +112,8 @@ function main() {
|
|
|
101
112
|
appendFileSync(outputPath, `failure-count=${failureCount}\n`);
|
|
102
113
|
appendFileSync(outputPath, `artifact-path=${artifactPath}\n`);
|
|
103
114
|
}
|
|
104
|
-
main()
|
|
115
|
+
main().catch((err) => {
|
|
116
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
117
|
+
console.error(message);
|
|
118
|
+
process.exit(1);
|
|
119
|
+
});
|
package/dist/classifier.js
CHANGED
|
@@ -39,6 +39,47 @@ const RULES = [
|
|
|
39
39
|
suggestedFix: 'Inspect failing expectations and fix logic or stabilize flaky assertions.',
|
|
40
40
|
patterns: [/\b(?:assertionerror|assertion failed|expected .* but received|\bFAIL\b.*\.(?:test|spec)\.)\b/i],
|
|
41
41
|
},
|
|
42
|
+
{
|
|
43
|
+
category: 'http_error',
|
|
44
|
+
severity: 'high',
|
|
45
|
+
type: 'external_service',
|
|
46
|
+
cause: 'HTTP request failed with non-success status',
|
|
47
|
+
suggestedFix: 'Check endpoint/auth configuration and add retries/backoff for transient failures.',
|
|
48
|
+
patterns: [/\b(?:http\s*(?:403|404|5\d{2})|403 forbidden|404 not found)\b/i],
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
category: 'infra_config',
|
|
52
|
+
severity: 'high',
|
|
53
|
+
type: 'configuration_error',
|
|
54
|
+
cause: 'CI infrastructure or workflow configuration error',
|
|
55
|
+
suggestedFix: 'Fix workflow/config settings for Actions, Pages, or CodeQL before rerunning.',
|
|
56
|
+
patterns: [
|
|
57
|
+
/\b(?:configuration error.*(?:codeql|code[- ]scanning)|codeql.*configuration error)\b/i,
|
|
58
|
+
/\b(?:error:\s*get pages site failed|httperror:\s*not found.*pages?)\b/i,
|
|
59
|
+
],
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
category: 'file_conflict',
|
|
63
|
+
severity: 'medium',
|
|
64
|
+
type: 'build_error',
|
|
65
|
+
cause: 'Input/output path conflict',
|
|
66
|
+
suggestedFix: 'Use distinct input and output paths and avoid in-place writes for copied artifacts.',
|
|
67
|
+
patterns: [/\b(?:input file is output file|is the same file)\b/i],
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
category: 'script_error',
|
|
71
|
+
severity: 'high',
|
|
72
|
+
type: 'runtime_error',
|
|
73
|
+
cause: 'Shell script or command execution failure',
|
|
74
|
+
suggestedFix: 'Validate shell commands, dependencies, and script paths in the failing step.',
|
|
75
|
+
patterns: [
|
|
76
|
+
/^.+:\s*line\s+\d+:\s*.+$/im,
|
|
77
|
+
/\bcommand not found\b/i,
|
|
78
|
+
/\bError:\s*Process completed with exit code\s+[1-9]\d*\b/i,
|
|
79
|
+
/\b(?:bash|sh|zsh):\s*.+\b(?:permission denied|no such file or directory)\b/i,
|
|
80
|
+
/\bnpm ERR!\b/i,
|
|
81
|
+
],
|
|
82
|
+
},
|
|
42
83
|
{
|
|
43
84
|
category: 'docker_error',
|
|
44
85
|
severity: 'high',
|
|
@@ -131,6 +172,33 @@ function confidenceFromMatch(failure, patternHits) {
|
|
|
131
172
|
}
|
|
132
173
|
return Number(Math.min(0.99, score).toFixed(2));
|
|
133
174
|
}
|
|
175
|
+
export function fixActionForCategory(category, context) {
|
|
176
|
+
const haystack = context.toLowerCase();
|
|
177
|
+
if (category === 'http_error') {
|
|
178
|
+
if (/\b404\b|not found/.test(haystack)) {
|
|
179
|
+
if (/\bfeature\b|disabled|not enabled|enable/.test(haystack)) {
|
|
180
|
+
return 'enable-feature';
|
|
181
|
+
}
|
|
182
|
+
return 'rename-ref';
|
|
183
|
+
}
|
|
184
|
+
return 'unknown';
|
|
185
|
+
}
|
|
186
|
+
if (category === 'missing_env')
|
|
187
|
+
return 'add-env-var';
|
|
188
|
+
if (category === 'file_conflict' || category === 'script_error')
|
|
189
|
+
return 'fix-script';
|
|
190
|
+
if (category === 'dependency_security')
|
|
191
|
+
return 'update-dependency';
|
|
192
|
+
if (category === 'timeout')
|
|
193
|
+
return 'retry';
|
|
194
|
+
if (category === 'infra_config') {
|
|
195
|
+
if (/\bworkflow\b.*\bnot found\b|\bdelete workflow\b|does not exist/.test(haystack)) {
|
|
196
|
+
return 'delete-workflow';
|
|
197
|
+
}
|
|
198
|
+
return 'update-config';
|
|
199
|
+
}
|
|
200
|
+
return 'unknown';
|
|
201
|
+
}
|
|
134
202
|
export function classify(failure) {
|
|
135
203
|
const haystack = [failure.error, ...failure.stack, ...failure.rawLines].join('\n');
|
|
136
204
|
for (const rule of RULES) {
|
|
@@ -143,6 +211,7 @@ export function classify(failure) {
|
|
|
143
211
|
cause: rule.cause,
|
|
144
212
|
suggestedFix: rule.suggestedFix,
|
|
145
213
|
type: rule.type,
|
|
214
|
+
fixAction: fixActionForCategory(rule.category, haystack),
|
|
146
215
|
};
|
|
147
216
|
}
|
|
148
217
|
}
|
|
@@ -153,5 +222,6 @@ export function classify(failure) {
|
|
|
153
222
|
cause: 'Unknown (manual triage needed)',
|
|
154
223
|
suggestedFix: 'Open failed logs and inspect the first failing step in detail.',
|
|
155
224
|
type: 'unknown_failure',
|
|
225
|
+
fixAction: 'unknown',
|
|
156
226
|
};
|
|
157
227
|
}
|
package/dist/flake-store.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
|
-
import { mkdirSync } from 'node:fs';
|
|
2
|
+
import { mkdirSync, statSync } from 'node:fs';
|
|
3
3
|
import { homedir } from 'node:os';
|
|
4
4
|
import { join } from 'node:path';
|
|
5
5
|
let _db = null;
|
|
@@ -32,9 +32,21 @@ function bootstrap(db) {
|
|
|
32
32
|
FOREIGN KEY (run_id) REFERENCES runs(id)
|
|
33
33
|
);
|
|
34
34
|
|
|
35
|
+
CREATE TABLE IF NOT EXISTS remediations (
|
|
36
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
37
|
+
repo TEXT NOT NULL,
|
|
38
|
+
run_id TEXT NOT NULL,
|
|
39
|
+
fix_action TEXT NOT NULL,
|
|
40
|
+
description TEXT NOT NULL,
|
|
41
|
+
resolved_at TEXT NOT NULL,
|
|
42
|
+
resolved_commit TEXT,
|
|
43
|
+
verified_clean_run_id TEXT
|
|
44
|
+
);
|
|
45
|
+
|
|
35
46
|
CREATE INDEX IF NOT EXISTS idx_runs_repo ON runs(repo);
|
|
36
47
|
CREATE INDEX IF NOT EXISTS idx_test_results_run_id ON test_results(run_id);
|
|
37
48
|
CREATE INDEX IF NOT EXISTS idx_test_results_test_name ON test_results(test_name);
|
|
49
|
+
CREATE INDEX IF NOT EXISTS idx_remediations_repo ON remediations(repo);
|
|
38
50
|
`);
|
|
39
51
|
}
|
|
40
52
|
/** Persist a run and its test outcomes. */
|
|
@@ -97,6 +109,99 @@ export function isFlakySqlite(repo, testName) {
|
|
|
97
109
|
const is_flaky = row.fail_count > 0 && row.pass_count > 0;
|
|
98
110
|
return { is_flaky, fail_count: row.fail_count, pass_count: row.pass_count, flake_ratio };
|
|
99
111
|
}
|
|
112
|
+
/** Remove runs older than `daysOld` days. Returns number of runs deleted. */
|
|
113
|
+
export function pruneRuns(daysOld = 90) {
|
|
114
|
+
const db = getDb();
|
|
115
|
+
const cutoff = new Date(Date.now() - daysOld * 24 * 60 * 60 * 1000).toISOString();
|
|
116
|
+
// Delete test_results for old runs first (FK constraint)
|
|
117
|
+
db.prepare(`
|
|
118
|
+
DELETE FROM test_results
|
|
119
|
+
WHERE run_id IN (SELECT id FROM runs WHERE created_at < ?)
|
|
120
|
+
`).run(cutoff);
|
|
121
|
+
const result = db.prepare('DELETE FROM runs WHERE created_at < ?').run(cutoff);
|
|
122
|
+
return result.changes;
|
|
123
|
+
}
|
|
124
|
+
export function getDbStats(repo) {
|
|
125
|
+
const db = getDb();
|
|
126
|
+
const path = dbPath();
|
|
127
|
+
const runCountRow = repo
|
|
128
|
+
? db.prepare('SELECT COUNT(*) AS count FROM runs WHERE repo = ?').get(repo)
|
|
129
|
+
: db.prepare('SELECT COUNT(*) AS count FROM runs').get();
|
|
130
|
+
const testResultCountRow = repo
|
|
131
|
+
? db.prepare(`
|
|
132
|
+
SELECT COUNT(*) AS count
|
|
133
|
+
FROM test_results tr
|
|
134
|
+
JOIN runs r ON tr.run_id = r.id
|
|
135
|
+
WHERE r.repo = ?
|
|
136
|
+
`).get(repo)
|
|
137
|
+
: db.prepare('SELECT COUNT(*) AS count FROM test_results').get();
|
|
138
|
+
const flakyTestCountRow = repo
|
|
139
|
+
? db.prepare(`
|
|
140
|
+
SELECT COUNT(*) AS count
|
|
141
|
+
FROM (
|
|
142
|
+
SELECT
|
|
143
|
+
tr.test_name
|
|
144
|
+
FROM test_results tr
|
|
145
|
+
JOIN runs r ON tr.run_id = r.id
|
|
146
|
+
WHERE r.repo = ?
|
|
147
|
+
GROUP BY tr.test_name
|
|
148
|
+
HAVING
|
|
149
|
+
SUM(CASE WHEN tr.status IN ('fail', 'error') THEN 1 ELSE 0 END) > 0
|
|
150
|
+
AND SUM(CASE WHEN tr.status = 'pass' THEN 1 ELSE 0 END) > 0
|
|
151
|
+
) t
|
|
152
|
+
`).get(repo)
|
|
153
|
+
: db.prepare(`
|
|
154
|
+
SELECT COUNT(*) AS count
|
|
155
|
+
FROM (
|
|
156
|
+
SELECT
|
|
157
|
+
r.repo,
|
|
158
|
+
tr.test_name
|
|
159
|
+
FROM test_results tr
|
|
160
|
+
JOIN runs r ON tr.run_id = r.id
|
|
161
|
+
GROUP BY r.repo, tr.test_name
|
|
162
|
+
HAVING
|
|
163
|
+
SUM(CASE WHEN tr.status IN ('fail', 'error') THEN 1 ELSE 0 END) > 0
|
|
164
|
+
AND SUM(CASE WHEN tr.status = 'pass' THEN 1 ELSE 0 END) > 0
|
|
165
|
+
) t
|
|
166
|
+
`).get();
|
|
167
|
+
return {
|
|
168
|
+
run_count: runCountRow?.count ?? 0,
|
|
169
|
+
test_result_count: testResultCountRow?.count ?? 0,
|
|
170
|
+
flaky_test_count: flakyTestCountRow?.count ?? 0,
|
|
171
|
+
db_path: path,
|
|
172
|
+
db_size_bytes: statSync(path).size,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
/** Mark a run as resolved. Deduplicates on (repo, run_id). */
|
|
176
|
+
export function markResolved(repo, runId, fixAction, description, resolvedCommit) {
|
|
177
|
+
const db = getDb();
|
|
178
|
+
const existing = db.prepare('SELECT id FROM remediations WHERE repo = ? AND run_id = ?').get(repo, String(runId));
|
|
179
|
+
if (existing)
|
|
180
|
+
return;
|
|
181
|
+
db.prepare('INSERT INTO remediations (repo, run_id, fix_action, description, resolved_at, resolved_commit) VALUES (?, ?, ?, ?, ?, ?)').run(repo, String(runId), fixAction, description, new Date().toISOString(), resolvedCommit ?? null);
|
|
182
|
+
}
|
|
183
|
+
/** Mark a subsequent clean run as verifying the fix. */
|
|
184
|
+
export function markVerified(repo, runId, cleanRunId) {
|
|
185
|
+
const db = getDb();
|
|
186
|
+
db.prepare('UPDATE remediations SET verified_clean_run_id = ? WHERE repo = ? AND run_id = ?')
|
|
187
|
+
.run(String(cleanRunId), repo, String(runId));
|
|
188
|
+
}
|
|
189
|
+
/** Get remediation history for a repo, newest first. */
|
|
190
|
+
export function getRemediations(repo) {
|
|
191
|
+
const db = getDb();
|
|
192
|
+
const rows = db.prepare(`
|
|
193
|
+
SELECT id, repo, run_id, fix_action, description, resolved_at, resolved_commit, verified_clean_run_id
|
|
194
|
+
FROM remediations
|
|
195
|
+
WHERE repo = ?
|
|
196
|
+
ORDER BY id DESC
|
|
197
|
+
`).all(repo);
|
|
198
|
+
return rows.map((r) => ({
|
|
199
|
+
...r,
|
|
200
|
+
resolved_commit: r.resolved_commit ?? undefined,
|
|
201
|
+
verified_clean_run_id: r.verified_clean_run_id ?? undefined,
|
|
202
|
+
is_verified: !!r.verified_clean_run_id,
|
|
203
|
+
}));
|
|
204
|
+
}
|
|
100
205
|
/** Close the DB (useful in tests). */
|
|
101
206
|
export function closeDb() {
|
|
102
207
|
if (_db) {
|