@htekdev/actions-debugger 1.0.15 → 1.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/cache-save-same-key-html-conflict.yml +109 -0
- package/errors/caching-artifacts/upload-artifact-v4-large-file-macos-hang.yml +111 -0
- package/errors/known-unsolved/no-automatic-job-retry.yml +92 -0
- package/errors/known-unsolved/workflow-50-rerun-limit.yml +110 -0
- package/errors/permissions-auth/check-run-status-modification-blocked.yml +134 -0
- package/errors/runner-environment/macos-13-deprecation-brownout.yml +93 -0
- package/errors/runner-environment/multi-runtime-nov2025-removal.yml +120 -0
- package/errors/runner-environment/ubuntu-2004-retirement-brownout.yml +107 -0
- package/errors/silent-failures/github-env-same-step-not-available.yml +72 -0
- package/errors/triggers/push-pull-request-duplicate-runs.yml +74 -0
- package/errors/triggers/workflow-dispatch-inputs-string-coercion.yml +89 -0
- package/errors/yaml-syntax/continue-on-error-env-context-rejected.yml +130 -0
- package/package.json +1 -1
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
id: caching-artifacts-020
|
|
2
|
+
title: "actions/cache save fails with HTML error when identical key already exists for same ref"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: warning
|
|
5
|
+
tags:
|
|
6
|
+
- cache
|
|
7
|
+
- cache-save
|
|
8
|
+
- 409-conflict
|
|
9
|
+
- pr-ref
|
|
10
|
+
- retry-loop
|
|
11
|
+
- key-collision
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Attempt \\d+ of \\d+ failed with error.*Unexpected token '<'"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "cache.*already exists.*ref|key.*already.*saved.*ref"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "Unexpected token '<'.*DOCTYPE|<!DOCTYPE.*cache"
|
|
18
|
+
flags: "im"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Attempt 1 of 5 failed with error: Unexpected token '<', \"<!DOCTYPE ...\" is not valid JSON"
|
|
21
|
+
- "Failed to save: Unexpected token '<'"
|
|
22
|
+
- "Attempt 5 of 5 failed with error: Unexpected token '<'"
|
|
23
|
+
root_cause: |
|
|
24
|
+
actions/cache v3 and v4 use the GitHub cache service to save cache entries
|
|
25
|
+
scoped to a key + ref combination. When a workflow run saves a cache entry
|
|
26
|
+
under a given key for refs/pull/N/merge, subsequent runs against the same
|
|
27
|
+
PR ref that produce the EXACT same cache key will attempt to save again.
|
|
28
|
+
|
|
29
|
+
The cache service rejects the duplicate write with an HTTP 409 response.
|
|
30
|
+
However, the response body is an HTML error page rather than a JSON error
|
|
31
|
+
object. The cache toolkit's retry logic does not recognize the HTML response
|
|
32
|
+
as a permanent, non-retryable conflict — it treats it as a transient network
|
|
33
|
+
error and retries up to 5 times with exponential backoff (~20-30 seconds
|
|
34
|
+
total wasted time before all attempts are exhausted).
|
|
35
|
+
|
|
36
|
+
The cache entry itself is not updated or overwritten (actions/cache v4 uses
|
|
37
|
+
immutable cache entries), but the 5-retry failure loop causes the post-job
|
|
38
|
+
cleanup step to report errors in the workflow log, adding noise and wasted
|
|
39
|
+
time to every successive commit pushed to a long-lived PR branch.
|
|
40
|
+
|
|
41
|
+
This is most visible in actions that use deterministic, content-addressed
|
|
42
|
+
cache keys (e.g., a Bun binary cache keyed on its checksum) where every
|
|
43
|
+
run produces the same key for the same PR.
|
|
44
|
+
fix: |
|
|
45
|
+
Add a dynamic component to the cache key that changes between runs so that
|
|
46
|
+
consecutive runs on the same PR ref never attempt to save over an existing
|
|
47
|
+
entry. Using ${{ github.run_id }} or ${{ github.run_attempt }} as part of
|
|
48
|
+
the key ensures each run gets a unique save slot.
|
|
49
|
+
|
|
50
|
+
If you specifically want a stable cache that all runs on a PR share (only
|
|
51
|
+
save once), use restore-keys for lookup and add a save condition so the
|
|
52
|
+
save step only runs when a cache miss occurred:
|
|
53
|
+
|
|
54
|
+
if: steps.cache.outputs.cache-hit != 'true'
|
|
55
|
+
|
|
56
|
+
For large dependency caches, prefer restore-keys with a prefix approach so
|
|
57
|
+
partial hits are possible and saves only happen when the lockfile changes.
|
|
58
|
+
fix_code:
|
|
59
|
+
- language: yaml
|
|
60
|
+
label: "Use restore-keys + conditional save to avoid same-key conflicts"
|
|
61
|
+
code: |
|
|
62
|
+
jobs:
|
|
63
|
+
build:
|
|
64
|
+
runs-on: ubuntu-latest
|
|
65
|
+
steps:
|
|
66
|
+
- uses: actions/checkout@v4
|
|
67
|
+
|
|
68
|
+
- name: Cache Bun binary
|
|
69
|
+
id: bun-cache
|
|
70
|
+
uses: actions/cache@v4
|
|
71
|
+
with:
|
|
72
|
+
path: ~/.bun/install/cache
|
|
73
|
+
# Lock file hash in key + restore-keys for partial fallback
|
|
74
|
+
key: ${{ runner.os }}-bun-${{ hashFiles('bun.lockb') }}
|
|
75
|
+
restore-keys: |
|
|
76
|
+
${{ runner.os }}-bun-
|
|
77
|
+
|
|
78
|
+
# Only save if there was a cache miss — avoids HTML 409 on same PR
|
|
79
|
+
- name: Save Bun cache (miss only)
|
|
80
|
+
if: steps.bun-cache.outputs.cache-hit != 'true'
|
|
81
|
+
uses: actions/cache/save@v4
|
|
82
|
+
with:
|
|
83
|
+
path: ~/.bun/install/cache
|
|
84
|
+
key: ${{ steps.bun-cache.outputs.cache-primary-key }}
|
|
85
|
+
- language: yaml
|
|
86
|
+
label: "Include run_id in cache key to guarantee a unique save slot per run"
|
|
87
|
+
code: |
|
|
88
|
+
- uses: actions/cache@v4
|
|
89
|
+
with:
|
|
90
|
+
path: .build/
|
|
91
|
+
# run_id makes every run's save unique — no 409 conflicts
|
|
92
|
+
key: ${{ runner.os }}-build-${{ hashFiles('**/*.go') }}-${{ github.run_id }}
|
|
93
|
+
restore-keys: |
|
|
94
|
+
${{ runner.os }}-build-${{ hashFiles('**/*.go') }}-
|
|
95
|
+
${{ runner.os }}-build-
|
|
96
|
+
prevention:
|
|
97
|
+
- "Use restore-keys for cache lookup and actions/cache/save with an if: cache-hit != 'true' guard to skip redundant saves"
|
|
98
|
+
- "Avoid fully deterministic cache keys that never change between runs on the same PR; add a per-run or per-lockfile component"
|
|
99
|
+
- "Treat 'Unexpected token <' in cache post-job as a 409 key-collision signal, not a network error"
|
|
100
|
+
- "Use actions/cache v4's separate save/restore actions for fine-grained control over when saves actually occur"
|
|
101
|
+
docs:
|
|
102
|
+
- url: "https://github.com/actions/cache"
|
|
103
|
+
label: "actions/cache — GitHub Actions cache action documentation"
|
|
104
|
+
- url: "https://github.com/actions/cache/blob/main/tips-and-workarounds.md"
|
|
105
|
+
label: "actions/cache — Tips and workarounds (immutability, restore-keys)"
|
|
106
|
+
- url: "https://github.com/anthropics/claude-code-action/issues/1252"
|
|
107
|
+
label: "anthropics/claude-code-action#1252 — Cache save HTML 409 on same PR ref"
|
|
108
|
+
- url: "https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache"
|
|
109
|
+
label: "GitHub Docs — Cache restrictions and immutability"
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
id: caching-artifacts-021
|
|
2
|
+
title: "upload-artifact v4 Silently Hangs on Large Files (500MB+) on macOS Runners"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- upload-artifact
|
|
7
|
+
- macos
|
|
8
|
+
- large-file
|
|
9
|
+
- hang
|
|
10
|
+
- timeout
|
|
11
|
+
- silent-failure
|
|
12
|
+
- v4
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: "Uploaded bytes \\d+"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "upload-artifact.*stall|stall.*upload-artifact"
|
|
17
|
+
flags: "i"
|
|
18
|
+
- regex: "The operation was cancelled.*upload|upload.*operation was cancelled"
|
|
19
|
+
flags: "i"
|
|
20
|
+
- regex: "Error: The process.*took too long.*upload-artifact"
|
|
21
|
+
flags: "i"
|
|
22
|
+
error_messages:
|
|
23
|
+
- "Uploaded bytes 8388608"
|
|
24
|
+
- "The runner has received a shutdown signal. This can happen when the runner service is stopped, or a manually started runner is canceled."
|
|
25
|
+
root_cause: |
|
|
26
|
+
`actions/upload-artifact@v4` intermittently stalls during upload on macOS GitHub-hosted
|
|
27
|
+
runners (macos-13-xl-arm64, macos-14-xlarge, macos-15) when artifact size is approximately
|
|
28
|
+
500 MB or larger. The stall manifests as the upload progress halting after logging "Uploaded
|
|
29
|
+
bytes XXXXXXXXX" with no further output — the job then exceeds its timeout and is cancelled
|
|
30
|
+
by GitHub without an explicit error message.
|
|
31
|
+
|
|
32
|
+
This behavior was reported and tracked in actions/upload-artifact#527. The hang appears
|
|
33
|
+
intermittent (roughly 30–50% failure rate for affected workflows), which makes it difficult
|
|
34
|
+
to diagnose in standard CI logs — the workflow shows as "cancelled" rather than "failed",
|
|
35
|
+
masking the root cause.
|
|
36
|
+
|
|
37
|
+
Contributing factors observed in community reports:
|
|
38
|
+
- Large uncompressed artifacts (binaries, build artifacts, test reports with raw data)
|
|
39
|
+
- macOS ARM64 hosted runners appear more susceptible than Linux or Windows runners
|
|
40
|
+
- Compression level settings do not consistently prevent the hang
|
|
41
|
+
|
|
42
|
+
This is a silent failure because:
|
|
43
|
+
1. The upload simply stops with no error log entry
|
|
44
|
+
2. The job shows as "cancelled" (not "failed") in the GitHub UI
|
|
45
|
+
3. Downstream artifact-download steps fail with "no artifact found" — the real cause is upstream
|
|
46
|
+
fix: |
|
|
47
|
+
Use one or more of these mitigations while the issue is tracked by the actions team:
|
|
48
|
+
|
|
49
|
+
1. **Split large artifacts**: Break large upload paths into multiple smaller upload steps, each
|
|
50
|
+
under ~200MB. This reduces the risk of hitting the hang threshold.
|
|
51
|
+
|
|
52
|
+
2. **Add an explicit timeout**: Set `timeout-minutes` on the upload step to detect hangs faster
|
|
53
|
+
and fail with a clear error rather than waiting for the job-level timeout.
|
|
54
|
+
|
|
55
|
+
3. **Retry the upload**: Wrap the upload step in a retry loop or use a community action like
|
|
56
|
+
`nick-fields/retry` to automatically re-attempt on failure.
|
|
57
|
+
|
|
58
|
+
4. **Use direct storage for very large artifacts**: For artifacts over 1GB, upload directly to
|
|
59
|
+
S3, Azure Blob Storage, or GCS using provider CLI tools. Use upload-artifact only for test
|
|
60
|
+
reports and smaller build outputs.
|
|
61
|
+
|
|
62
|
+
5. **Switch to Linux runners**: If macOS-specific features are not required for the upload
|
|
63
|
+
phase, run artifact collection on an ubuntu-latest runner where the hang does not occur.
|
|
64
|
+
fix_code:
|
|
65
|
+
- language: yaml
|
|
66
|
+
label: "Add timeout and retry to upload step"
|
|
67
|
+
code: |
|
|
68
|
+
- name: Upload large artifact
|
|
69
|
+
uses: actions/upload-artifact@v4
|
|
70
|
+
timeout-minutes: 10 # fail fast instead of waiting for job timeout
|
|
71
|
+
with:
|
|
72
|
+
name: release-binaries
|
|
73
|
+
path: dist/
|
|
74
|
+
compression-level: 6
|
|
75
|
+
retention-days: 7
|
|
76
|
+
- language: yaml
|
|
77
|
+
label: "Split large artifact into parts to reduce hang risk"
|
|
78
|
+
code: |
|
|
79
|
+
- name: Upload binaries (part 1)
|
|
80
|
+
uses: actions/upload-artifact@v4
|
|
81
|
+
with:
|
|
82
|
+
name: binaries-part1
|
|
83
|
+
path: dist/platform-a/
|
|
84
|
+
|
|
85
|
+
- name: Upload binaries (part 2)
|
|
86
|
+
uses: actions/upload-artifact@v4
|
|
87
|
+
with:
|
|
88
|
+
name: binaries-part2
|
|
89
|
+
path: dist/platform-b/
|
|
90
|
+
- language: yaml
|
|
91
|
+
label: "Upload to S3 for very large artifacts (bypass upload-artifact)"
|
|
92
|
+
code: |
|
|
93
|
+
- name: Upload large artifact to S3
|
|
94
|
+
env:
|
|
95
|
+
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
96
|
+
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
97
|
+
run: |
|
|
98
|
+
aws s3 cp dist/release.tar.gz s3://my-bucket/artifacts/${{ github.sha }}/release.tar.gz
|
|
99
|
+
echo "Uploaded to s3://my-bucket/artifacts/${{ github.sha }}/release.tar.gz"
|
|
100
|
+
prevention:
|
|
101
|
+
- "Keep individual artifact uploads under 200MB per upload step on macOS runners to avoid the hang threshold."
|
|
102
|
+
- "Always set `timeout-minutes` on upload steps for large files so the CI job fails fast with a clear error instead of silently timing out."
|
|
103
|
+
- "Monitor for `cancelled` status on jobs that use upload-artifact on macOS — these may be silently failing uploads."
|
|
104
|
+
- "For release artifacts exceeding 1GB, use cloud storage (S3, Azure Blob, GCS) directly rather than upload-artifact."
|
|
105
|
+
docs:
|
|
106
|
+
- url: "https://github.com/actions/upload-artifact/issues/527"
|
|
107
|
+
label: "actions/upload-artifact#527 — macOS large-file upload hang report and discussion"
|
|
108
|
+
- url: "https://github.com/actions/upload-artifact"
|
|
109
|
+
label: "actions/upload-artifact — official repository and documentation"
|
|
110
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/storing-workflow-data-as-artifacts"
|
|
111
|
+
label: "GitHub Docs — storing workflow data as artifacts"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
id: known-unsolved-019
|
|
2
|
+
title: "No built-in automatic retry for individual failed jobs — only full workflow re-run or manual step"
|
|
3
|
+
category: known-unsolved
|
|
4
|
+
severity: limitation
|
|
5
|
+
tags:
|
|
6
|
+
- retry
|
|
7
|
+
- flaky
|
|
8
|
+
- job
|
|
9
|
+
- re-run
|
|
10
|
+
- known-limitation
|
|
11
|
+
- matrix
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "retry|re-run|rerun|flaky"
|
|
14
|
+
flags: "i"
|
|
15
|
+
error_messages:
|
|
16
|
+
- "There is no native retry: N option for jobs in GitHub Actions"
|
|
17
|
+
root_cause: |
|
|
18
|
+
GitHub Actions provides no built-in retry: count field at the job level. When a
|
|
19
|
+
job fails due to a flaky test, a transient network error, or an intermittent
|
|
20
|
+
service dependency, the only native options are: (1) manually re-run the failed
|
|
21
|
+
job via the UI or API, (2) re-run the entire workflow, or (3) add shell-level
|
|
22
|
+
retry loops inside run steps. There is no declarative way to say "retry this job
|
|
23
|
+
up to N times automatically before marking it as failed." This is a long-standing
|
|
24
|
+
platform limitation tracked in multiple GitHub Community discussions. The
|
|
25
|
+
workflow-level timeout-minutes and job-level timeout-minutes exist, but neither
|
|
26
|
+
provides automatic retry semantics. For matrix jobs, a single flaky matrix entry
|
|
27
|
+
failing causes the whole matrix (or the workflow, with default fail-fast:true) to
|
|
28
|
+
fail with no automatic per-matrix-slot retry.
|
|
29
|
+
fix: |
|
|
30
|
+
Workarounds depend on the scope of flakiness. For step-level flakiness, use a
|
|
31
|
+
shell retry loop or the nick-fields/retry third-party action. For job-level
|
|
32
|
+
flakiness in CI, consider using the GitHub REST API with a calling orchestrator
|
|
33
|
+
workflow that re-dispatches on failure. For matrix flakiness, capture failed
|
|
34
|
+
matrix entries and dispatch a targeted follow-up workflow run. Manual re-run via
|
|
35
|
+
gh run rerun --failed is the simplest workaround for human-in-the-loop flows.
|
|
36
|
+
fix_code:
|
|
37
|
+
- language: yaml
|
|
38
|
+
label: "Workaround — shell-level retry loop inside a run step"
|
|
39
|
+
code: |
|
|
40
|
+
steps:
|
|
41
|
+
- name: Flaky network step with retry
|
|
42
|
+
run: |
|
|
43
|
+
for i in 1 2 3; do
|
|
44
|
+
curl -sf https://api.example.com/deploy && break
|
|
45
|
+
echo "Attempt $i failed, retrying in 10s..."
|
|
46
|
+
sleep 10
|
|
47
|
+
done
|
|
48
|
+
- language: yaml
|
|
49
|
+
label: "Workaround — nick-fields/retry action for step-level retry"
|
|
50
|
+
code: |
|
|
51
|
+
steps:
|
|
52
|
+
- name: Retry flaky step
|
|
53
|
+
uses: nick-fields/retry@v3
|
|
54
|
+
with:
|
|
55
|
+
timeout_minutes: 10
|
|
56
|
+
max_attempts: 3
|
|
57
|
+
command: npm test
|
|
58
|
+
- language: yaml
|
|
59
|
+
label: "Workaround — re-run only failed jobs via gh CLI after workflow completes"
|
|
60
|
+
code: |
|
|
61
|
+
# In a post-pipeline script or calling workflow:
|
|
62
|
+
# gh run rerun <RUN_ID> --failed re-runs only the jobs that failed
|
|
63
|
+
# Automate this with an on.workflow_run trigger checking conclusion == 'failure'
|
|
64
|
+
on:
|
|
65
|
+
workflow_run:
|
|
66
|
+
workflows: ["CI"]
|
|
67
|
+
types: [completed]
|
|
68
|
+
|
|
69
|
+
jobs:
|
|
70
|
+
auto-retry:
|
|
71
|
+
if: github.event.workflow_run.conclusion == 'failure'
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
steps:
|
|
74
|
+
- name: Re-run failed jobs once
|
|
75
|
+
env:
|
|
76
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
77
|
+
run: |
|
|
78
|
+
gh run rerun ${{ github.event.workflow_run.id }} --failed --repo ${{ github.repository }}
|
|
79
|
+
prevention:
|
|
80
|
+
- "Design tests and deployment steps to be idempotent so manual re-runs are safe"
|
|
81
|
+
- "Use step-level retry wrappers (shell loops or nick-fields/retry) for known-flaky external calls"
|
|
82
|
+
- "Separate flaky integration tests into their own workflow so re-runs are scoped and cheaper"
|
|
83
|
+
- "Track flakiness metrics; persistent flakiness signals a real bug, not just a retry need"
|
|
84
|
+
docs:
|
|
85
|
+
- url: "https://github.com/orgs/community/discussions/26186"
|
|
86
|
+
label: "GitHub Community — native job retry support request"
|
|
87
|
+
- url: "https://docs.github.com/en/actions/managing-workflow-runs-and-deployments/managing-workflow-runs/re-running-workflows-and-jobs"
|
|
88
|
+
label: "Re-running workflows and jobs — GitHub Actions"
|
|
89
|
+
- url: "https://github.com/nick-fields/retry"
|
|
90
|
+
label: "nick-fields/retry — step-level retry action (third-party workaround)"
|
|
91
|
+
- url: "https://cli.github.com/manual/gh_run_rerun"
|
|
92
|
+
label: "gh run rerun — GitHub CLI reference"
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
id: known-unsolved-018
|
|
2
|
+
title: "Workflow rerun limit of 50 exceeded — subsequent reruns fail permanently"
|
|
3
|
+
category: known-unsolved
|
|
4
|
+
severity: limitation
|
|
5
|
+
tags:
|
|
6
|
+
- rerun
|
|
7
|
+
- retry
|
|
8
|
+
- automation
|
|
9
|
+
- limits
|
|
10
|
+
- platform-limit
|
|
11
|
+
patterns:
|
|
12
|
+
- regex: "exceeded.*maximum.*reruns|maximum.*reruns.*exceeded"
|
|
13
|
+
flags: "i"
|
|
14
|
+
- regex: "rerun limit.*50|50.*rerun limit"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "This workflow run was cancelled because it exceeded"
|
|
17
|
+
flags: "i"
|
|
18
|
+
error_messages:
|
|
19
|
+
- "This workflow run exceeded the limit of 50 reruns"
|
|
20
|
+
- "exceeded the maximum number of reruns (50)"
|
|
21
|
+
- "Cannot rerun this workflow: rerun limit reached"
|
|
22
|
+
root_cause: |
|
|
23
|
+
Since April 10, 2026, GitHub Actions enforces a hard limit of 50 reruns
|
|
24
|
+
per individual workflow run (counting both full reruns and re-runs of
|
|
25
|
+
individual job subsets). Once a workflow run reaches 50 attempts, any
|
|
26
|
+
further rerun request — whether triggered manually, via the GitHub CLI,
|
|
27
|
+
the REST API, or an automated rerun action — will fail with an error
|
|
28
|
+
annotation on the check suite.
|
|
29
|
+
|
|
30
|
+
The limit was introduced to stop automation patterns that use infinite-retry
|
|
31
|
+
loops (e.g., rerunning every failed job automatically, retrying flaky
|
|
32
|
+
infrastructure hundreds of times) from adding excessive load to GitHub's
|
|
33
|
+
infrastructure. The count is tied to the original workflow run ID — creating
|
|
34
|
+
a new workflow run by re-pushing or re-dispatching does NOT inherit the
|
|
35
|
+
count.
|
|
36
|
+
|
|
37
|
+
This limitation is most frequently encountered by:
|
|
38
|
+
- Flaky test environments that rely on automated rerun tooling
|
|
39
|
+
(k1LoW/rerun-action, lewagon/retry-workflow-action, etc.)
|
|
40
|
+
- Long-running deployment pipelines with retry-until-success patterns
|
|
41
|
+
- Teams using merge queues or merge trains that auto-requeue failed checks
|
|
42
|
+
- Internal automation that retries status checks repeatedly against a PR
|
|
43
|
+
|
|
44
|
+
There is no way to exceed this limit — it is enforced server-side with no
|
|
45
|
+
configuration option. The only resolution is to trigger a fresh workflow run.
|
|
46
|
+
fix: |
|
|
47
|
+
There is no way to raise or bypass the 50-rerun limit. Mitigation strategies:
|
|
48
|
+
|
|
49
|
+
1. Fix the underlying flakiness so retries are rare (<50 per workflow run).
|
|
50
|
+
2. Trigger a fresh workflow run rather than retrying the same run ID:
|
|
51
|
+
- Re-push to the branch or rebase (creates new push event → new run)
|
|
52
|
+
- For pull_request: close and reopen the PR (creates new run ID)
|
|
53
|
+
- For workflow_dispatch: dispatch again (creates a completely new run)
|
|
54
|
+
3. Redesign retry logic to use a separate `workflow_run` triggered workflow
|
|
55
|
+
that dispatches a NEW workflow instead of rerunning the same one.
|
|
56
|
+
4. For flaky infra: fix at the infra layer (idempotent step design, retry
|
|
57
|
+
at the shell/script level within a single attempt) rather than rerunning
|
|
58
|
+
the entire workflow.
|
|
59
|
+
fix_code:
|
|
60
|
+
- language: yaml
|
|
61
|
+
label: "Use shell-level retry for flaky steps instead of workflow-level rerun"
|
|
62
|
+
code: |
|
|
63
|
+
jobs:
|
|
64
|
+
deploy:
|
|
65
|
+
runs-on: ubuntu-latest
|
|
66
|
+
steps:
|
|
67
|
+
- name: Flaky deploy step with shell retry
|
|
68
|
+
run: |
|
|
69
|
+
# Retry up to 5 times with 30s backoff — no workflow rerun needed
|
|
70
|
+
for i in 1 2 3 4 5; do
|
|
71
|
+
echo "Attempt $i..."
|
|
72
|
+
./scripts/deploy.sh && break || {
|
|
73
|
+
if [ $i -lt 5 ]; then
|
|
74
|
+
echo "Deploy failed, retrying in 30s..."
|
|
75
|
+
sleep 30
|
|
76
|
+
else
|
|
77
|
+
echo "All $i attempts failed"
|
|
78
|
+
exit 1
|
|
79
|
+
fi
|
|
80
|
+
}
|
|
81
|
+
done
|
|
82
|
+
- language: yaml
|
|
83
|
+
label: "Dispatch a new workflow run instead of rerunning the same run"
|
|
84
|
+
code: |
|
|
85
|
+
# If you need automated rerun logic, trigger a fresh dispatch
|
|
86
|
+
# rather than using gh run rerun (which increments the rerun counter)
|
|
87
|
+
jobs:
|
|
88
|
+
retry-via-dispatch:
|
|
89
|
+
runs-on: ubuntu-latest
|
|
90
|
+
if: failure()
|
|
91
|
+
steps:
|
|
92
|
+
- name: Trigger fresh run via workflow_dispatch
|
|
93
|
+
run: |
|
|
94
|
+
gh workflow run ci.yml \
|
|
95
|
+
--ref ${{ github.ref }} \
|
|
96
|
+
--field triggered_by=auto-retry
|
|
97
|
+
env:
|
|
98
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
99
|
+
prevention:
|
|
100
|
+
- "Treat workflow reruns as a debugging tool, not a reliability mechanism — fix flakiness at the source"
|
|
101
|
+
- "Use shell-level retry loops (for loop with sleep) for individual steps that are inherently flaky"
|
|
102
|
+
- "Use workflow_dispatch to start a fresh run rather than rerunning the same run ID when automation needs to retry"
|
|
103
|
+
- "Monitor workflows that regularly exceed 5 reruns — they signal a deeper reliability problem that needs fixing"
|
|
104
|
+
docs:
|
|
105
|
+
- url: "https://github.blog/changelog/2026-04-10-actions-workflows-are-limited-to-50-reruns/"
|
|
106
|
+
label: "GitHub Changelog — Actions workflows are limited to 50 reruns (Apr 2026)"
|
|
107
|
+
- url: "https://docs.github.com/en/actions/managing-workflow-runs/re-running-workflows-and-jobs"
|
|
108
|
+
label: "GitHub Docs — Re-running workflows and jobs"
|
|
109
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_dispatch"
|
|
110
|
+
label: "GitHub Docs — workflow_dispatch event"
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
id: permissions-auth-018
|
|
2
|
+
title: "GITHUB_TOKEN cannot modify check run status or conclusion after March 2025"
|
|
3
|
+
category: permissions-auth
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- check-runs
|
|
7
|
+
- github-token
|
|
8
|
+
- checks-api
|
|
9
|
+
- permissions
|
|
10
|
+
- breaking-change
|
|
11
|
+
patterns:
|
|
12
|
+
- regex: "Check run status and conclusions can only be updated internally by GitHub Actions"
|
|
13
|
+
flags: "i"
|
|
14
|
+
- regex: "check_run.*status.*conclusion.*updated internally"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "Changes to check run status modification"
|
|
17
|
+
flags: "i"
|
|
18
|
+
error_messages:
|
|
19
|
+
- "Error: Check run status and conclusions can only be updated internally by GitHub Actions. Please see https://github.blog/changelog/2025-02-12-notice-of-upcoming-deprecations-and-breaking-changes-for-github-actions/#changes-to-check-run-status-modification"
|
|
20
|
+
- "Check run status and conclusions can only be updated internally by GitHub Actions"
|
|
21
|
+
root_cause: |
|
|
22
|
+
GitHub deprecated external modification of check run status and conclusion
|
|
23
|
+
fields on March 31, 2025. Prior to this change, workflows and third-party
|
|
24
|
+
actions (e.g., LouisBrunner/checks-action, custom Checks API calls) could
|
|
25
|
+
use the repository's GITHUB_TOKEN to call the REST API and update the status
|
|
26
|
+
(queued, in_progress, completed) or conclusion (success, failure, etc.) of
|
|
27
|
+
an existing check run that was originally created by GitHub Actions.
|
|
28
|
+
|
|
29
|
+
GitHub blocked this pattern to prevent confusion and race conditions that
|
|
30
|
+
could arise when external code overwrote the status of a check run that
|
|
31
|
+
GitHub Actions was still managing. Starting March 31, 2025, any attempt
|
|
32
|
+
to PATCH an existing Actions-created check run's status or conclusion via
|
|
33
|
+
GITHUB_TOKEN returns an error.
|
|
34
|
+
|
|
35
|
+
This affects:
|
|
36
|
+
- Actions that wrap the GitHub Checks API to create multi-step annotations
|
|
37
|
+
(e.g., LouisBrunner/checks-action)
|
|
38
|
+
- Custom workflows that read a check_run ID from a prior step and patch it
|
|
39
|
+
- External tooling that patches check run state using the built-in token
|
|
40
|
+
- Any pattern where one step creates a check run and a later step or job
|
|
41
|
+
tries to set it to "completed"
|
|
42
|
+
|
|
43
|
+
Check runs that were NOT created by GitHub Actions (created via a GitHub App
|
|
44
|
+
or OAuth token) are not affected and can still be updated by their creator.
|
|
45
|
+
fix: |
|
|
46
|
+
Workflows that need to update check run status or conclusion must create
|
|
47
|
+
those check runs with a non-GITHUB_TOKEN credential so that the same
|
|
48
|
+
credential can also update them later.
|
|
49
|
+
|
|
50
|
+
Option 1 (GitHub App): Use actions/create-github-app-token to generate an
|
|
51
|
+
installation token and create check runs with that token. The same GitHub App
|
|
52
|
+
token can then update the check runs it created.
|
|
53
|
+
|
|
54
|
+
Option 2 (Remove update steps): If the check run is only being updated to
|
|
55
|
+
mark it "completed" at the end of a job, let GitHub Actions manage the
|
|
56
|
+
conclusion automatically. Remove the final PATCH step.
|
|
57
|
+
|
|
58
|
+
Option 3 (Migrate to annotations): Replace custom check run status tracking
|
|
59
|
+
with GitHub Actions step annotations (::notice::, ::warning::, ::error::).
|
|
60
|
+
These use the workflow's built-in UI and require no Checks API calls.
|
|
61
|
+
fix_code:
|
|
62
|
+
- language: yaml
|
|
63
|
+
label: "Create and update check runs using a GitHub App installation token"
|
|
64
|
+
code: |
|
|
65
|
+
jobs:
|
|
66
|
+
annotate:
|
|
67
|
+
runs-on: ubuntu-latest
|
|
68
|
+
permissions:
|
|
69
|
+
# id-token:write needed if using OIDC-based App token
|
|
70
|
+
contents: read
|
|
71
|
+
steps:
|
|
72
|
+
- name: Generate GitHub App token
|
|
73
|
+
id: app-token
|
|
74
|
+
uses: actions/create-github-app-token@v2
|
|
75
|
+
with:
|
|
76
|
+
app-id: ${{ vars.CHECKS_APP_ID }}
|
|
77
|
+
private-key: ${{ secrets.CHECKS_APP_PRIVATE_KEY }}
|
|
78
|
+
|
|
79
|
+
- name: Create check run
|
|
80
|
+
id: create-check
|
|
81
|
+
uses: actions/github-script@v7
|
|
82
|
+
with:
|
|
83
|
+
github-token: ${{ steps.app-token.outputs.token }}
|
|
84
|
+
script: |
|
|
85
|
+
const { data } = await github.rest.checks.create({
|
|
86
|
+
owner: context.repo.owner,
|
|
87
|
+
repo: context.repo.repo,
|
|
88
|
+
name: 'My Custom Check',
|
|
89
|
+
head_sha: context.sha,
|
|
90
|
+
status: 'in_progress',
|
|
91
|
+
});
|
|
92
|
+
return data.id;
|
|
93
|
+
|
|
94
|
+
- name: Do work...
|
|
95
|
+
run: echo "running checks..."
|
|
96
|
+
|
|
97
|
+
- name: Update check run to completed
|
|
98
|
+
uses: actions/github-script@v7
|
|
99
|
+
with:
|
|
100
|
+
# Use the App token — NOT github.token — to update the check
|
|
101
|
+
github-token: ${{ steps.app-token.outputs.token }}
|
|
102
|
+
script: |
|
|
103
|
+
await github.rest.checks.update({
|
|
104
|
+
owner: context.repo.owner,
|
|
105
|
+
repo: context.repo.repo,
|
|
106
|
+
check_run_id: ${{ steps.create-check.outputs.result }},
|
|
107
|
+
status: 'completed',
|
|
108
|
+
conclusion: 'success',
|
|
109
|
+
});
|
|
110
|
+
- language: yaml
|
|
111
|
+
label: "Use built-in step annotations instead of custom check runs"
|
|
112
|
+
code: |
|
|
113
|
+
- name: Emit warning annotation
|
|
114
|
+
run: echo "::warning file=src/main.js,line=42::Deprecated API usage detected"
|
|
115
|
+
|
|
116
|
+
- name: Emit error annotation (fails step)
|
|
117
|
+
run: echo "::error title=Lint Failed::3 lint violations found in src/"
|
|
118
|
+
|
|
119
|
+
# These annotations appear in the workflow summary and PR checks UI
|
|
120
|
+
# without any Checks API calls or external tokens
|
|
121
|
+
prevention:
|
|
122
|
+
- "Never use GITHUB_TOKEN to PATCH the status or conclusion of an Actions-created check run"
|
|
123
|
+
- "Create custom check runs using a GitHub App installation token so the same credential owns them end-to-end"
|
|
124
|
+
- "Prefer built-in step annotations (::warning::, ::error::) over custom Checks API calls for simple annotation needs"
|
|
125
|
+
- "Audit third-party actions that wrap the Checks API (e.g., LouisBrunner/checks-action) for compatibility with the March 2025 change"
|
|
126
|
+
docs:
|
|
127
|
+
- url: "https://github.blog/changelog/2025-02-12-notice-of-upcoming-deprecations-and-breaking-changes-for-github-actions/#changes-to-check-run-status-modification"
|
|
128
|
+
label: "GitHub Changelog — Changes to check run status modification (Feb 2025)"
|
|
129
|
+
- url: "https://docs.github.com/en/rest/checks/runs"
|
|
130
|
+
label: "GitHub REST API — Check Runs"
|
|
131
|
+
- url: "https://github.com/LouisBrunner/checks-action/issues/369"
|
|
132
|
+
label: "LouisBrunner/checks-action#369 — Error after March 2025 enforcement"
|
|
133
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-a-warning-message"
|
|
134
|
+
label: "GitHub Docs — Workflow commands for annotations"
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
id: runner-environment-050
|
|
2
|
+
title: "macOS 13 Runner Deprecated and Removed — Jobs Fail During Brownout Windows"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- macos-13
|
|
7
|
+
- runner-deprecation
|
|
8
|
+
- brownout
|
|
9
|
+
- migration
|
|
10
|
+
- macos-14
|
|
11
|
+
- runner-images
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "The label 'macos-13' is not present on any runner"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "No runner matching the specified labels was found.*macos-13"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "macos-13.*deprecated|deprecated.*macos-13"
|
|
18
|
+
flags: "i"
|
|
19
|
+
- regex: "Request 'macos-13'.*could not be satisfied"
|
|
20
|
+
flags: "i"
|
|
21
|
+
error_messages:
|
|
22
|
+
- "The label 'macos-13' is not present on any runner"
|
|
23
|
+
- "No runner matching the specified labels was found: macos-13"
|
|
24
|
+
root_cause: |
|
|
25
|
+
GitHub deprecated and eventually removed the `macos-13` and `macos-13-xlarge` runner labels
|
|
26
|
+
in 2025 (announced in runner-images#13046). GitHub applied brownout windows before full removal:
|
|
27
|
+
during these windows the macOS 13 label is temporarily unavailable and any job requesting it
|
|
28
|
+
either hangs waiting for a runner or immediately fails with a "label not present" error.
|
|
29
|
+
|
|
30
|
+
After the final retirement date, the label is gone entirely. The retirement schedule followed
|
|
31
|
+
the same brownout → retirement pattern used for Ubuntu 20.04 and Windows 2019.
|
|
32
|
+
|
|
33
|
+
Common workflows affected:
|
|
34
|
+
- iOS/macOS app CI that pinned to `macos-13` for Xcode 15 compatibility
|
|
35
|
+
- Workflows that avoided `macos-14` (Apple Silicon) due to architecture differences
|
|
36
|
+
- Repos that never updated after initially picking `macos-13` at release time
|
|
37
|
+
fix: |
|
|
38
|
+
Migrate `runs-on: macos-13` to a supported macOS label. Recommended choices:
|
|
39
|
+
|
|
40
|
+
- `macos-latest` — automatically follows GitHub's current default (tracks major version bumps)
|
|
41
|
+
- `macos-15` — macOS 15 Sequoia, ARM64 (Apple Silicon), Xcode 16+
|
|
42
|
+
- `macos-14` — macOS 14 Sonoma, ARM64 (Apple Silicon), well-supported
|
|
43
|
+
- `macos-15-intel` or `macos-14-large` — for workflows requiring x86-64 architecture
|
|
44
|
+
|
|
45
|
+
Note: macOS 14+ runners are ARM64 by default. If your build toolchain requires x86-64, use
|
|
46
|
+
an explicitly-labeled Intel variant. Test Homebrew packages, build scripts, and any binary
|
|
47
|
+
tools on the new architecture before fully migrating.
|
|
48
|
+
fix_code:
|
|
49
|
+
- language: yaml
|
|
50
|
+
label: "Migrate from macos-13 to macos-15 (ARM64)"
|
|
51
|
+
code: |
|
|
52
|
+
jobs:
|
|
53
|
+
build:
|
|
54
|
+
# Before: runs-on: macos-13
|
|
55
|
+
runs-on: macos-15 # macOS 15 Sequoia, ARM64, Xcode 16+
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
- name: Build and test
|
|
59
|
+
run: xcodebuild test -scheme MyApp -destination 'platform=iOS Simulator,name=iPhone 16'
|
|
60
|
+
- language: yaml
|
|
61
|
+
label: "Use macos-latest for automatic version tracking"
|
|
62
|
+
code: |
|
|
63
|
+
jobs:
|
|
64
|
+
build:
|
|
65
|
+
runs-on: macos-latest # tracks GitHub's current recommended version
|
|
66
|
+
steps:
|
|
67
|
+
- uses: actions/checkout@v4
|
|
68
|
+
- name: Build
|
|
69
|
+
run: swift build
|
|
70
|
+
- language: yaml
|
|
71
|
+
label: "Matrix testing across macOS versions"
|
|
72
|
+
code: |
|
|
73
|
+
jobs:
|
|
74
|
+
test:
|
|
75
|
+
strategy:
|
|
76
|
+
matrix:
|
|
77
|
+
os: [macos-14, macos-15]
|
|
78
|
+
runs-on: ${{ matrix.os }}
|
|
79
|
+
steps:
|
|
80
|
+
- uses: actions/checkout@v4
|
|
81
|
+
- run: swift test
|
|
82
|
+
prevention:
|
|
83
|
+
- "Subscribe to runner-images announcements to learn about deprecation timelines before brownout windows start."
|
|
84
|
+
- "Use `macos-latest` when your workflow does not require a specific OS version — it automatically follows GitHub's supported default."
|
|
85
|
+
- "Test on the new image in a feature branch before the official retirement date to catch Xcode, SDK, or toolchain differences."
|
|
86
|
+
- "For x86-64-specific toolchains, check whether an Intel variant label is available before the migration deadline."
|
|
87
|
+
docs:
|
|
88
|
+
- url: "https://github.com/actions/runner-images/issues/13046"
|
|
89
|
+
label: "runner-images#13046 — macOS 13 deprecation and brownout schedule"
|
|
90
|
+
- url: "https://github.com/actions/runner-images/releases"
|
|
91
|
+
label: "runner-images releases — current supported macOS image versions"
|
|
92
|
+
- url: "https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners"
|
|
93
|
+
label: "GitHub Docs — supported GitHub-hosted runner labels"
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
id: runner-environment-051
|
|
2
|
+
title: "Node.js 18, Ruby 3.1, Android NDK 26, GCC 9/10 Removed from Runner Images (Nov 2025)"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- nodejs-18
|
|
7
|
+
- ruby-31
|
|
8
|
+
- android-ndk
|
|
9
|
+
- gcc
|
|
10
|
+
- tool-removal
|
|
11
|
+
- ubuntu
|
|
12
|
+
- runner-images
|
|
13
|
+
- eol
|
|
14
|
+
patterns:
|
|
15
|
+
- regex: "node: command not found|node: not found"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "ruby: command not found|ruby: not found"
|
|
18
|
+
flags: "i"
|
|
19
|
+
- regex: "ndk-build: command not found|NDK.*not found"
|
|
20
|
+
flags: "i"
|
|
21
|
+
- regex: "gcc-9: command not found|gcc-10: command not found|g\\+\\+-9: command not found"
|
|
22
|
+
flags: "i"
|
|
23
|
+
- regex: "Could not find the requested version of Node.*18"
|
|
24
|
+
flags: "i"
|
|
25
|
+
error_messages:
|
|
26
|
+
- "node: command not found"
|
|
27
|
+
- "ruby: command not found"
|
|
28
|
+
- "ndk-build: command not found"
|
|
29
|
+
- "gcc-9: command not found"
|
|
30
|
+
- "gcc-10: command not found"
|
|
31
|
+
- "Could not find the requested version of Node 18"
|
|
32
|
+
root_cause: |
|
|
33
|
+
In November 2025, GitHub removed several EOL (end-of-life) tool versions from the preinstalled
|
|
34
|
+
toolcache of Ubuntu and Windows runner images as part of their scheduled tool maintenance policy
|
|
35
|
+
(announced in runner-images#12898). The removals included:
|
|
36
|
+
|
|
37
|
+
- **Node.js 18** — reached EOL April 2025; removed from toolcache November 2025
|
|
38
|
+
- **Ruby 3.1** — reached EOL March 2025; removed from toolcache November 2025
|
|
39
|
+
- **Android NDK 26** — superseded by NDK 27/28; removed from images November 2025
|
|
40
|
+
- **GCC 9 and GCC 10** — superseded by GCC 11+; removed from Ubuntu images November 2025
|
|
41
|
+
|
|
42
|
+
Workflows that assumed these runtimes were preinstalled either fail with `command not found`
|
|
43
|
+
(if the tool was used without an explicit setup step) or fail during the setup action phase
|
|
44
|
+
(if a setup action is used but the requested version is no longer in the toolcache and
|
|
45
|
+
compilation from source fails in the time limit).
|
|
46
|
+
|
|
47
|
+
This is a particularly common failure pattern for:
|
|
48
|
+
- Legacy Android NDK builds using exact NDK version pinning
|
|
49
|
+
- Older Ruby gems/Fastlane pipelines that required Ruby 3.1 specifically
|
|
50
|
+
- C/C++ projects that specified `gcc-9` or `gcc-10` explicitly in their Makefiles or CMake configs
|
|
51
|
+
- Any workflow that ran `node --version` expecting v18.x without a prior setup-node step
|
|
52
|
+
fix: |
|
|
53
|
+
Explicitly install the required runtime version in the workflow using the appropriate setup
|
|
54
|
+
action, or migrate to a supported version that remains preinstalled on the runner.
|
|
55
|
+
|
|
56
|
+
**Node.js 18 → Node.js 22 (LTS):**
|
|
57
|
+
Upgrade to Node.js 22 (currently LTS) or at minimum Node.js 20 if v18-specific behavior
|
|
58
|
+
is required. Use `actions/setup-node` for explicit installation.
|
|
59
|
+
|
|
60
|
+
**Ruby 3.1 → Ruby 3.3+:**
|
|
61
|
+
Use `ruby/setup-ruby` to explicitly pin any Ruby version. Ruby 3.3 is the current stable release.
|
|
62
|
+
|
|
63
|
+
**Android NDK 26 → NDK 27/28:**
|
|
64
|
+
Update `ndk-version` in `actions/setup-android` or manually install the required NDK version
|
|
65
|
+
via the Android SDK manager.
|
|
66
|
+
|
|
67
|
+
**GCC 9/10 → GCC 12+:**
|
|
68
|
+
Use `apt-get install gcc-12 g++-12` or update your Makefile/CMake to target a supported GCC
|
|
69
|
+
version (GCC 11, 12, 13 remain available on Ubuntu 22.04 runners).
|
|
70
|
+
fix_code:
|
|
71
|
+
- language: yaml
|
|
72
|
+
label: "Explicitly install Node.js 18 (or upgrade to 22)"
|
|
73
|
+
code: |
|
|
74
|
+
jobs:
|
|
75
|
+
build:
|
|
76
|
+
runs-on: ubuntu-latest
|
|
77
|
+
steps:
|
|
78
|
+
- uses: actions/checkout@v4
|
|
79
|
+
- uses: actions/setup-node@v4
|
|
80
|
+
with:
|
|
81
|
+
node-version: '22' # or '18' to pin if still needed
|
|
82
|
+
cache: 'npm'
|
|
83
|
+
- run: npm ci && npm test
|
|
84
|
+
- language: yaml
|
|
85
|
+
label: "Install a specific Ruby version via setup-ruby"
|
|
86
|
+
code: |
|
|
87
|
+
jobs:
|
|
88
|
+
test:
|
|
89
|
+
runs-on: ubuntu-latest
|
|
90
|
+
steps:
|
|
91
|
+
- uses: actions/checkout@v4
|
|
92
|
+
- uses: ruby/setup-ruby@v1
|
|
93
|
+
with:
|
|
94
|
+
ruby-version: '3.3' # pin explicitly; never rely on preinstalled ruby
|
|
95
|
+
bundler-cache: true
|
|
96
|
+
- run: bundle exec rspec
|
|
97
|
+
- language: yaml
|
|
98
|
+
label: "Install GCC 9 explicitly on Ubuntu (for legacy C++ code)"
|
|
99
|
+
code: |
|
|
100
|
+
jobs:
|
|
101
|
+
build:
|
|
102
|
+
runs-on: ubuntu-22.04 # Ubuntu 22.04 still has apt packages for gcc-9
|
|
103
|
+
steps:
|
|
104
|
+
- uses: actions/checkout@v4
|
|
105
|
+
- run: sudo apt-get install -y gcc-9 g++-9
|
|
106
|
+
- run: make CC=gcc-9 CXX=g++-9
|
|
107
|
+
prevention:
|
|
108
|
+
- "Never assume a specific tool version is preinstalled. Always add an explicit setup step (`setup-node`, `setup-ruby`, `setup-android`) with a pinned version."
|
|
109
|
+
- "Subscribe to runner-images announcements and act before the removal deadline — GitHub typically gives 3-6 months notice."
|
|
110
|
+
- "Use the [runner-images software lists](https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2204-Readme.md) to verify which versions are available before assuming they exist."
|
|
111
|
+
- "When a CI tool version is EOL, plan the upgrade at EOL time — not when GitHub removes it from the runner."
|
|
112
|
+
docs:
|
|
113
|
+
- url: "https://github.com/actions/runner-images/issues/12898"
|
|
114
|
+
label: "runner-images#12898 — Nov 2025 tool removals announcement (Node 18, Ruby 3.1, NDK 26, GCC 9/10)"
|
|
115
|
+
- url: "https://github.com/actions/setup-node"
|
|
116
|
+
label: "actions/setup-node — explicitly install any Node.js version"
|
|
117
|
+
- url: "https://github.com/ruby/setup-ruby"
|
|
118
|
+
label: "ruby/setup-ruby — install any Ruby version"
|
|
119
|
+
- url: "https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2204-Readme.md"
|
|
120
|
+
label: "runner-images — Ubuntu 22.04 software list (verify what is preinstalled)"
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
id: runner-environment-049
|
|
2
|
+
title: "ubuntu-20.04 runner retired — jobs fail during brownouts or after full removal"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- ubuntu
|
|
7
|
+
- ubuntu-20-04
|
|
8
|
+
- runner-retirement
|
|
9
|
+
- image-removal
|
|
10
|
+
- brownout
|
|
11
|
+
- eol
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "scheduled Ubuntu 20\\.04 retirement|Ubuntu 20\\.04.*retirement"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "ubuntu-20\\.04.*removed|ubuntu-20\\.04.*unavailable"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "ubuntu-20\\.04.*runner will be removed"
|
|
18
|
+
flags: "i"
|
|
19
|
+
- regex: "No hosted runner.*ubuntu-20|ubuntu-20\\.04.*no.*runner"
|
|
20
|
+
flags: "i"
|
|
21
|
+
error_messages:
|
|
22
|
+
- "This is a scheduled Ubuntu 20.04 retirement. Ubuntu 20.04 LTS runner will be removed on 2025-04-15. For more details, see https://github.com/actions/runner-images/issues/11101"
|
|
23
|
+
- "ubuntu-20.04 is no longer available. Please use ubuntu-22.04 or ubuntu-24.04 instead."
|
|
24
|
+
- "No hosted runner was found that matches the requested labels: ubuntu-20.04"
|
|
25
|
+
root_cause: |
|
|
26
|
+
Ubuntu 20.04 LTS (Focal Fossa) reached end of standard support on April 2,
|
|
27
|
+
2025. GitHub followed by retiring the ubuntu-20.04 GitHub-hosted runner
|
|
28
|
+
image on April 15, 2025, after a series of intentional brownout windows
|
|
29
|
+
that began in March 2025 (March 4, 11, 18, 25 and April 1, 8).
|
|
30
|
+
|
|
31
|
+
During each brownout window (13:00–21:00 UTC), any job using runs-on:
|
|
32
|
+
ubuntu-20.04 was deliberately failed with an explicit retirement message.
|
|
33
|
+
After April 15, 2025, the image was permanently removed and no longer
|
|
34
|
+
available as a runner label on GitHub.com.
|
|
35
|
+
|
|
36
|
+
Workflows that hardcode ubuntu-20.04 in their runs-on: field now fail
|
|
37
|
+
immediately with "No hosted runner was found that matches the requested
|
|
38
|
+
labels: ubuntu-20.04". This also affects:
|
|
39
|
+
- Actions pinned inside third-party repos that ship their own workflow
|
|
40
|
+
files using ubuntu-20.04
|
|
41
|
+
- Reusable workflow templates or starter workflows referencing ubuntu-20.04
|
|
42
|
+
- Matrix strategies that include ubuntu-20.04 as one of several OS targets
|
|
43
|
+
- Composite actions or action.yml files in public actions that list
|
|
44
|
+
ubuntu-20.04 as a required environment
|
|
45
|
+
|
|
46
|
+
Self-hosted runners with the ubuntu-20.04 label are NOT affected — they
|
|
47
|
+
only match the name locally and keep running as long as the self-hosted
|
|
48
|
+
runner agent is online.
|
|
49
|
+
fix: |
|
|
50
|
+
Replace ubuntu-20.04 with ubuntu-22.04 or ubuntu-24.04 (ubuntu-latest).
|
|
51
|
+
|
|
52
|
+
For most workflows, ubuntu-22.04 is the most compatible drop-in replacement:
|
|
53
|
+
it shares glibc 2.35 which supports binaries built on Ubuntu 20. However,
|
|
54
|
+
several packages and tools differ between 20.04 and 22.04:
|
|
55
|
+
- Python 3.8 and 3.9 are no longer pre-installed on ubuntu-22.04+
|
|
56
|
+
- libssl1.1 is absent; libssl3 is the current version
|
|
57
|
+
- Some apt package versions differ
|
|
58
|
+
|
|
59
|
+
For ubuntu-24.04:
|
|
60
|
+
- Python 3.11 is the default (3.8/3.9/3.10 absent)
|
|
61
|
+
- libssl3 only
|
|
62
|
+
- Node 18 removed from toolcache
|
|
63
|
+
|
|
64
|
+
Run your workflow with ubuntu-22.04 first. Address any package-version
|
|
65
|
+
failures before migrating to ubuntu-24.04.
|
|
66
|
+
fix_code:
|
|
67
|
+
- language: yaml
|
|
68
|
+
label: "Upgrade hardcoded ubuntu-20.04 references to ubuntu-22.04"
|
|
69
|
+
code: |
|
|
70
|
+
jobs:
|
|
71
|
+
build:
|
|
72
|
+
# Replace deprecated image with supported LTS
|
|
73
|
+
runs-on: ubuntu-22.04 # was: ubuntu-20.04
|
|
74
|
+
steps:
|
|
75
|
+
- uses: actions/checkout@v4
|
|
76
|
+
- name: Install dependencies
|
|
77
|
+
run: sudo apt-get install -y libssl-dev # libssl3 on 22.04, libssl1.1 on 20.04
|
|
78
|
+
- run: make build
|
|
79
|
+
- language: yaml
|
|
80
|
+
label: "Migrate matrix strategy that included ubuntu-20.04"
|
|
81
|
+
code: |
|
|
82
|
+
jobs:
|
|
83
|
+
test:
|
|
84
|
+
strategy:
|
|
85
|
+
matrix:
|
|
86
|
+
os:
|
|
87
|
+
- ubuntu-22.04 # was ubuntu-20.04
|
|
88
|
+
- ubuntu-24.04 # was ubuntu-latest (if you want explicit LTS pin)
|
|
89
|
+
- windows-latest
|
|
90
|
+
- macos-latest
|
|
91
|
+
runs-on: ${{ matrix.os }}
|
|
92
|
+
steps:
|
|
93
|
+
- uses: actions/checkout@v4
|
|
94
|
+
- run: make test
|
|
95
|
+
prevention:
|
|
96
|
+
- "Use ubuntu-latest instead of pinning specific Ubuntu versions to avoid hard failures on retirement"
|
|
97
|
+
- "If you need a specific Ubuntu LTS, pin to ubuntu-22.04 or ubuntu-24.04 — never a version past its support window"
|
|
98
|
+
- "Subscribe to runner-images retirement notifications on GitHub (watch actions/runner-images for Issues)"
|
|
99
|
+
- "Add a monthly workflow audit job that fails if any runs-on: references a known-retired image label"
|
|
100
|
+
- "Check third-party action.yml files in your dependency tree for hardcoded ubuntu-20.04 references"
|
|
101
|
+
docs:
|
|
102
|
+
- url: "https://github.com/actions/runner-images/issues/11101"
|
|
103
|
+
label: "runner-images#11101 — Ubuntu 20.04 retirement announcement and timeline"
|
|
104
|
+
- url: "https://github.blog/changelog/2025-02-12-notice-of-upcoming-deprecations-and-breaking-changes-for-github-actions/"
|
|
105
|
+
label: "GitHub Changelog — Ubuntu 20.04 brownout schedule (Feb 2025)"
|
|
106
|
+
- url: "https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources"
|
|
107
|
+
label: "GitHub Docs — Supported GitHub-hosted runner images"
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
id: silent-failures-024
|
|
2
|
+
title: "GITHUB_ENV variable written in a step is not available within the same step"
|
|
3
|
+
category: silent-failures
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- GITHUB_ENV
|
|
7
|
+
- environment-variables
|
|
8
|
+
- step-ordering
|
|
9
|
+
- run
|
|
10
|
+
- same-step
|
|
11
|
+
patterns:
|
|
12
|
+
- regex: "GITHUB_ENV[^\\n]*\\n[^\\n]*\\$\\{?[A-Z_][A-Z0-9_]*\\}?"
|
|
13
|
+
flags: "ms"
|
|
14
|
+
- regex: "echo\\s+[\"']?[A-Z_][A-Z0-9_]*=[^\"'\\n]+[\"']?\\s*>>\\s*\\$GITHUB_ENV"
|
|
15
|
+
flags: "i"
|
|
16
|
+
error_messages:
|
|
17
|
+
- "echo MY_VAR=value >> $GITHUB_ENV"
|
|
18
|
+
root_cause: |
|
|
19
|
+
Writes to $GITHUB_ENV (appending NAME=value lines to the runner's environment file)
|
|
20
|
+
take effect for all SUBSEQUENT steps in the same job, but NOT for the current
|
|
21
|
+
step's run block. The Actions runner reads the environment file once at the start
|
|
22
|
+
of each step, before the run block executes. Any $GITHUB_ENV writes made during
|
|
23
|
+
that run block are not re-read until the next step begins. Consequently, if a run
|
|
24
|
+
block writes `echo "BUILD_ID=abc" >> $GITHUB_ENV` and then immediately references
|
|
25
|
+
`$BUILD_ID` later in the same run block, the variable is empty. No error or warning
|
|
26
|
+
is emitted — the reference silently evaluates to an empty string. The identical
|
|
27
|
+
delayed-effect behavior applies to $GITHUB_PATH: path entries added in one step
|
|
28
|
+
are only visible in the PATH of subsequent steps.
|
|
29
|
+
fix: |
|
|
30
|
+
Split the write and the read into separate steps. If you need the value within the
|
|
31
|
+
same shell invocation, use a native shell variable assignment (VAR=value) for
|
|
32
|
+
immediate access and still write to $GITHUB_ENV if the value is needed by later
|
|
33
|
+
steps. Do not rely on $GITHUB_ENV for intra-step communication.
|
|
34
|
+
fix_code:
|
|
35
|
+
- language: yaml
|
|
36
|
+
label: "Wrong — reference GITHUB_ENV variable in the same step that writes it"
|
|
37
|
+
code: |
|
|
38
|
+
steps:
|
|
39
|
+
- name: Set and use variable (BROKEN — BUILD_ID is empty)
|
|
40
|
+
run: |
|
|
41
|
+
echo "BUILD_ID=abc123" >> $GITHUB_ENV
|
|
42
|
+
echo "Build ID is: $BUILD_ID" # Empty — GITHUB_ENV not re-read mid-step
|
|
43
|
+
- language: yaml
|
|
44
|
+
label: "Correct — read the exported variable in the following step"
|
|
45
|
+
code: |
|
|
46
|
+
steps:
|
|
47
|
+
- name: Export variable
|
|
48
|
+
run: echo "BUILD_ID=abc123" >> $GITHUB_ENV
|
|
49
|
+
|
|
50
|
+
- name: Use exported variable
|
|
51
|
+
run: echo "Build ID is: $BUILD_ID" # Available here — next step reads GITHUB_ENV
|
|
52
|
+
- language: yaml
|
|
53
|
+
label: "Correct — use a shell variable for same-step access, also export for later steps"
|
|
54
|
+
code: |
|
|
55
|
+
steps:
|
|
56
|
+
- name: Compute and export build ID
|
|
57
|
+
run: |
|
|
58
|
+
BUILD_ID=$(git rev-parse --short HEAD)
|
|
59
|
+
echo "BUILD_ID=${BUILD_ID}" >> $GITHUB_ENV # Export for later steps
|
|
60
|
+
echo "Build ID is: ${BUILD_ID}" # Shell var — available immediately
|
|
61
|
+
prevention:
|
|
62
|
+
- "Never reference a variable by its GITHUB_ENV name ($VAR_NAME) in the same run block that writes it"
|
|
63
|
+
- "Use native shell variables (VAR=value; echo $VAR) for same-step access; only use GITHUB_ENV for cross-step sharing"
|
|
64
|
+
- "The same rule applies to $GITHUB_PATH — path additions take effect in the next step, not the current one"
|
|
65
|
+
- "If a step relies on a GITHUB_ENV variable set by a prior step in the same run, ensure step ordering is correct"
|
|
66
|
+
docs:
|
|
67
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#passing-a-value-between-steps"
|
|
68
|
+
label: "Passing values between steps using GITHUB_ENV — GitHub Actions"
|
|
69
|
+
- url: "https://github.com/orgs/community/discussions/26672"
|
|
70
|
+
label: "GitHub Community — GITHUB_ENV variable not available in same step it is written"
|
|
71
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-environment-variable"
|
|
72
|
+
label: "Setting an environment variable via workflow commands — GitHub Actions"
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
id: triggers-017
|
|
2
|
+
title: "push and pull_request events both fire for same-repo PR branches causing duplicate CI runs"
|
|
3
|
+
category: triggers
|
|
4
|
+
severity: warning
|
|
5
|
+
tags:
|
|
6
|
+
- push
|
|
7
|
+
- pull_request
|
|
8
|
+
- duplicate-runs
|
|
9
|
+
- concurrency
|
|
10
|
+
- same-repo
|
|
11
|
+
- branch
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "on:\\s*\\n\\s+(push|pull_request):"
|
|
14
|
+
flags: "ms"
|
|
15
|
+
error_messages:
|
|
16
|
+
- "Duplicate workflow runs triggered for the same commit SHA"
|
|
17
|
+
- "Two simultaneous CI runs on the same branch"
|
|
18
|
+
root_cause: |
|
|
19
|
+
When a developer pushes to a branch that has an open pull request within the same
|
|
20
|
+
repository (not a fork), both the push event and the pull_request event (types:
|
|
21
|
+
[synchronize]) fire independently for the same commit SHA. This creates two
|
|
22
|
+
separate workflow runs executing identical CI checks simultaneously. For fork PRs
|
|
23
|
+
only the pull_request event fires (the fork push doesn't trigger the base repo),
|
|
24
|
+
so duplication only affects same-repository branches. The redundant run wastes
|
|
25
|
+
billed minutes, creates confusing duplicate status check entries on the PR, and
|
|
26
|
+
can cause race conditions in deployment or release workflows where two runs race
|
|
27
|
+
to update the same environment.
|
|
28
|
+
fix: |
|
|
29
|
+
For workflows intended purely as PR checks, use only on.pull_request and remove
|
|
30
|
+
the push trigger. If the same workflow needs to run both on PRs and on direct
|
|
31
|
+
pushes to main/trunk (post-merge), filter with branches: so they don't overlap.
|
|
32
|
+
Adding a concurrency group keyed on github.ref or github.head_ref automatically
|
|
33
|
+
cancels the slower duplicate run when both triggers must remain.
|
|
34
|
+
fix_code:
|
|
35
|
+
- language: yaml
|
|
36
|
+
label: "Option 1 — separate triggers so they do not overlap"
|
|
37
|
+
code: |
|
|
38
|
+
on:
|
|
39
|
+
# PR CI: only pull_request covers commits pushed to the PR branch
|
|
40
|
+
pull_request:
|
|
41
|
+
branches: [main]
|
|
42
|
+
# Post-merge CI: only the push to main after merging
|
|
43
|
+
push:
|
|
44
|
+
branches: [main]
|
|
45
|
+
# This way a push to a feature branch triggers pull_request ONLY,
|
|
46
|
+
# and a merge to main triggers push ONLY — no duplicates.
|
|
47
|
+
- language: yaml
|
|
48
|
+
label: "Option 2 — concurrency group to cancel the slower duplicate"
|
|
49
|
+
code: |
|
|
50
|
+
on:
|
|
51
|
+
push:
|
|
52
|
+
branches-ignore: [main]
|
|
53
|
+
pull_request:
|
|
54
|
+
branches: [main]
|
|
55
|
+
|
|
56
|
+
concurrency:
|
|
57
|
+
group: ci-${{ github.head_ref || github.ref }}
|
|
58
|
+
cancel-in-progress: true
|
|
59
|
+
# When both push and pull_request fire for the same branch, the second
|
|
60
|
+
# run cancels the first, leaving only one active run per branch.
|
|
61
|
+
prevention:
|
|
62
|
+
- "Audit every workflow with both on.push and on.pull_request — verify the branch filters don't overlap for same-repo contributors"
|
|
63
|
+
- "For PR validation CI, prefer on.pull_request only; add a separate on.push.branches: [main] for post-merge checks"
|
|
64
|
+
- "Always add a concurrency group when both push and pull_request triggers are needed to prevent redundant runs"
|
|
65
|
+
- "Check your Actions billing dashboard for unexpectedly doubled minute usage as a signal for duplicate trigger overlap"
|
|
66
|
+
docs:
|
|
67
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#pull_request"
|
|
68
|
+
label: "pull_request event — GitHub Actions documentation"
|
|
69
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#push"
|
|
70
|
+
label: "push event — GitHub Actions documentation"
|
|
71
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/control-the-concurrency-of-workflows-and-jobs"
|
|
72
|
+
label: "Controlling concurrency of workflows and jobs — GitHub Actions"
|
|
73
|
+
- url: "https://github.com/orgs/community/discussions/26284"
|
|
74
|
+
label: "GitHub Community — avoiding duplicate workflow runs on push and pull_request"
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
id: triggers-016
|
|
2
|
+
title: "workflow_dispatch inputs are always string type regardless of declared type"
|
|
3
|
+
category: triggers
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- workflow_dispatch
|
|
7
|
+
- inputs
|
|
8
|
+
- type-coercion
|
|
9
|
+
- boolean
|
|
10
|
+
- number
|
|
11
|
+
- expression
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "inputs\\.[a-zA-Z_][a-zA-Z0-9_]*\\s*==\\s*(true|false)"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "inputs\\.[a-zA-Z_][a-zA-Z0-9_]*\\s*[><!]=?\\s*[0-9]"
|
|
16
|
+
flags: "i"
|
|
17
|
+
error_messages:
|
|
18
|
+
- "if: ${{ inputs.enable_debug == true }}"
|
|
19
|
+
- "if: ${{ inputs.retry_count > 3 }}"
|
|
20
|
+
- "if: ${{ inputs.deploy == false }}"
|
|
21
|
+
root_cause: |
|
|
22
|
+
All workflow_dispatch input values are delivered as strings at runtime, regardless
|
|
23
|
+
of the type: boolean or type: number declaration in the workflow YAML. The type
|
|
24
|
+
declaration controls the GitHub UI widget (checkbox vs text field) but does NOT
|
|
25
|
+
change the runtime data type. Comparing inputs.my_flag == true evaluates as
|
|
26
|
+
string('true') == boolean(true) which is always false. Similarly, arithmetic
|
|
27
|
+
comparisons like inputs.count > 3 perform lexicographic string comparison, not
|
|
28
|
+
numeric comparison. This is documented in GitHub Actions docs but commonly missed
|
|
29
|
+
by developers relying on the YAML type declaration to enforce runtime types.
|
|
30
|
+
The same caveat applies to inputs passed via the GitHub REST API and GitHub CLI.
|
|
31
|
+
fix: |
|
|
32
|
+
Compare boolean inputs against the string 'true' or 'false'. For numeric inputs,
|
|
33
|
+
wrap with fromJSON() before arithmetic comparison. The expression
|
|
34
|
+
inputs.enable_debug == 'true' correctly evaluates when the checkbox is checked
|
|
35
|
+
in the GitHub UI or when the value 'true' is passed via the API.
|
|
36
|
+
fix_code:
|
|
37
|
+
- language: yaml
|
|
38
|
+
label: "Boolean input — compare against string literal 'true'"
|
|
39
|
+
code: |
|
|
40
|
+
on:
|
|
41
|
+
workflow_dispatch:
|
|
42
|
+
inputs:
|
|
43
|
+
enable_debug:
|
|
44
|
+
type: boolean
|
|
45
|
+
description: "Enable debug mode"
|
|
46
|
+
default: false
|
|
47
|
+
|
|
48
|
+
jobs:
|
|
49
|
+
build:
|
|
50
|
+
runs-on: ubuntu-latest
|
|
51
|
+
steps:
|
|
52
|
+
- name: Debug step
|
|
53
|
+
# WRONG: inputs.enable_debug == true (string vs boolean, always false)
|
|
54
|
+
# CORRECT: compare against string
|
|
55
|
+
if: inputs.enable_debug == 'true'
|
|
56
|
+
run: echo "Debug mode enabled"
|
|
57
|
+
- language: yaml
|
|
58
|
+
label: "Number input — use fromJSON() for numeric comparison"
|
|
59
|
+
code: |
|
|
60
|
+
on:
|
|
61
|
+
workflow_dispatch:
|
|
62
|
+
inputs:
|
|
63
|
+
retry_count:
|
|
64
|
+
type: number
|
|
65
|
+
description: "Number of retries"
|
|
66
|
+
default: 3
|
|
67
|
+
|
|
68
|
+
jobs:
|
|
69
|
+
deploy:
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
steps:
|
|
72
|
+
- name: High-retry warning
|
|
73
|
+
# WRONG: inputs.retry_count > 5 (string comparison, '6' > '5' is true but '10' > '5' is false lexicographically)
|
|
74
|
+
# CORRECT: cast to number first
|
|
75
|
+
if: fromJSON(inputs.retry_count) > 5
|
|
76
|
+
run: echo "Warning: high retry count configured"
|
|
77
|
+
prevention:
|
|
78
|
+
- "Always compare workflow_dispatch boolean inputs against the string 'true' or 'false', never native boolean literals"
|
|
79
|
+
- "Use fromJSON(inputs.my_number) before any arithmetic or numeric comparison on number inputs"
|
|
80
|
+
- "Add an early diagnostic step echoing ${{ toJSON(inputs) }} to inspect actual runtime types during development"
|
|
81
|
+
- "Document in workflow comments that all workflow_dispatch inputs are strings at runtime, regardless of declared type"
|
|
82
|
+
- "When passing inputs via API or gh CLI, always pass boolean values as the strings 'true' or 'false'"
|
|
83
|
+
docs:
|
|
84
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#providing-inputs"
|
|
85
|
+
label: "workflow_dispatch inputs documentation — GitHub Actions"
|
|
86
|
+
- url: "https://github.com/actions/runner/issues/1483"
|
|
87
|
+
label: "actions/runner#1483 — Boolean workflow_dispatch inputs treated as strings"
|
|
88
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#fromjson"
|
|
89
|
+
label: "fromJSON() expression function — GitHub Actions"
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
id: yaml-syntax-022
|
|
2
|
+
title: "env Context Rejected in continue-on-error and Other Job-Level Boolean Attributes"
|
|
3
|
+
category: yaml-syntax
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- continue-on-error
|
|
7
|
+
- env-context
|
|
8
|
+
- expression
|
|
9
|
+
- job-level
|
|
10
|
+
- unrecognized-named-value
|
|
11
|
+
- context-availability
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Unrecognized named-value: 'env'"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "Unexpected value.*env\\."
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "continue-on-error.*env\\.|env\\..*continue-on-error"
|
|
18
|
+
flags: "i"
|
|
19
|
+
- regex: "The workflow is not valid.*Unrecognized named-value: 'env'"
|
|
20
|
+
flags: "i"
|
|
21
|
+
error_messages:
|
|
22
|
+
- "The workflow is not valid. .github/workflows/ci.yml (Line X, Col Y): Unrecognized named-value: 'env'. Located at position 1 within expression: env.MY_FLAG"
|
|
23
|
+
- "Unexpected value '${{ contains(env.MY_LIST, matrix.value) }}'"
|
|
24
|
+
root_cause: |
|
|
25
|
+
The `env` context is NOT available in all workflow attribute positions. Specifically,
|
|
26
|
+
job-level and step-level attributes that accept boolean values or simple flags — such as
|
|
27
|
+
`continue-on-error`, `timeout-minutes` (value expressions), and similar fields — do not
|
|
28
|
+
support the `env` context in their expression evaluations.
|
|
29
|
+
|
|
30
|
+
This is a known GitHub Actions platform limitation documented in actions/runner#1492. The
|
|
31
|
+
error occurs because these attributes are evaluated at workflow parse/validation time (before
|
|
32
|
+
the job runs), when environment variables have not yet been set in the runner context.
|
|
33
|
+
|
|
34
|
+
Contexts available in `continue-on-error`:
|
|
35
|
+
- ✅ `github`, `needs`, `strategy`, `matrix`
|
|
36
|
+
- ❌ `env`, `secrets`, `steps`, `jobs`
|
|
37
|
+
|
|
38
|
+
A common pattern that fails:
|
|
39
|
+
```yaml
|
|
40
|
+
steps:
|
|
41
|
+
- run: ./build.sh
|
|
42
|
+
continue-on-error: ${{ contains(env.SUPPORTED_VERSIONS, matrix.version) }}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Even if `env.SUPPORTED_VERSIONS` was set in the `env:` block at the top of the workflow,
|
|
46
|
+
the expression fails because `env` is not a supported context in that attribute.
|
|
47
|
+
fix: |
|
|
48
|
+
Move the boolean logic into a step output computed earlier in the job, then reference the
|
|
49
|
+
step output in `continue-on-error`. Alternatively, restructure the workflow to avoid needing
|
|
50
|
+
`env`-based expressions in boolean job/step attributes.
|
|
51
|
+
|
|
52
|
+
**Option 1**: Compute the flag in an earlier step and use `steps` context:
|
|
53
|
+
```yaml
|
|
54
|
+
- id: check
|
|
55
|
+
run: echo "should_skip=$([[ "$MY_FLAG" == "true" ]] && echo true || echo false)" >> $GITHUB_OUTPUT
|
|
56
|
+
- run: ./potentially-failing-task.sh
|
|
57
|
+
continue-on-error: ${{ steps.check.outputs.should_skip == 'true' }}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Option 2**: Use `matrix` context directly (supported in continue-on-error):
|
|
61
|
+
```yaml
|
|
62
|
+
strategy:
|
|
63
|
+
matrix:
|
|
64
|
+
experimental: [true, false]
|
|
65
|
+
steps:
|
|
66
|
+
- run: ./build.sh
|
|
67
|
+
continue-on-error: ${{ matrix.experimental }}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Option 3**: Wrap the step in a conditional run script that handles the error inline.
|
|
71
|
+
fix_code:
|
|
72
|
+
- language: yaml
|
|
73
|
+
label: "Use step output to control continue-on-error dynamically"
|
|
74
|
+
code: |
|
|
75
|
+
jobs:
|
|
76
|
+
test:
|
|
77
|
+
runs-on: ubuntu-latest
|
|
78
|
+
env:
|
|
79
|
+
EXPERIMENTAL_VERSIONS: "3.12 3.13"
|
|
80
|
+
steps:
|
|
81
|
+
- uses: actions/checkout@v4
|
|
82
|
+
|
|
83
|
+
# ❌ This FAILS: env context not available in continue-on-error
|
|
84
|
+
# - run: pytest
|
|
85
|
+
# continue-on-error: ${{ contains(env.EXPERIMENTAL_VERSIONS, matrix.python) }}
|
|
86
|
+
|
|
87
|
+
# ✅ Compute the flag first, then reference via steps context
|
|
88
|
+
- id: flags
|
|
89
|
+
run: |
|
|
90
|
+
if echo "$EXPERIMENTAL_VERSIONS" | grep -qw "${{ matrix.python }}"; then
|
|
91
|
+
echo "experimental=true" >> $GITHUB_OUTPUT
|
|
92
|
+
else
|
|
93
|
+
echo "experimental=false" >> $GITHUB_OUTPUT
|
|
94
|
+
fi
|
|
95
|
+
- run: pytest
|
|
96
|
+
continue-on-error: ${{ steps.flags.outputs.experimental == 'true' }}
|
|
97
|
+
- language: yaml
|
|
98
|
+
label: "Use matrix boolean directly (no env needed)"
|
|
99
|
+
code: |
|
|
100
|
+
jobs:
|
|
101
|
+
test:
|
|
102
|
+
strategy:
|
|
103
|
+
matrix:
|
|
104
|
+
include:
|
|
105
|
+
- python: "3.11"
|
|
106
|
+
experimental: false
|
|
107
|
+
- python: "3.12"
|
|
108
|
+
experimental: true
|
|
109
|
+
- python: "3.13"
|
|
110
|
+
experimental: true
|
|
111
|
+
runs-on: ubuntu-latest
|
|
112
|
+
steps:
|
|
113
|
+
- uses: actions/checkout@v4
|
|
114
|
+
- uses: actions/setup-python@v5
|
|
115
|
+
with:
|
|
116
|
+
python-version: ${{ matrix.python }}
|
|
117
|
+
- run: pytest
|
|
118
|
+
continue-on-error: ${{ matrix.experimental }} # ✅ matrix context IS supported
|
|
119
|
+
prevention:
|
|
120
|
+
- "Check the GitHub Actions context availability table before using any context in job-level attributes — not all contexts are available everywhere."
|
|
121
|
+
- "When you need dynamic boolean flags in `continue-on-error`, compute them in an earlier step and read via `steps.<id>.outputs`."
|
|
122
|
+
- "The `matrix` context is fully supported in `continue-on-error` — prefer encoding experimental flags in the matrix definition."
|
|
123
|
+
- "Run `actionlint` on your workflow to catch context-availability errors before pushing."
|
|
124
|
+
docs:
|
|
125
|
+
- url: "https://github.com/actions/runner/issues/1492"
|
|
126
|
+
label: "actions/runner#1492 — env context not available in continue-on-error (original report)"
|
|
127
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#context-availability"
|
|
128
|
+
label: "GitHub Docs — context availability table (where each context can be used)"
|
|
129
|
+
- url: "https://github.com/rhysd/actionlint"
|
|
130
|
+
label: "actionlint — static analyzer that catches context-availability errors"
|
package/package.json
CHANGED