@htekdev/actions-debugger 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/artifact-download-no-artifacts-found.yml +118 -0
- package/errors/concurrency-timing/job-stuck-waiting-for-runner.yml +105 -0
- package/errors/concurrency-timing/matrix-fail-fast-sibling-cancellation.yml +113 -0
- package/errors/concurrency-timing/timeout-minutes-job-killed.yml +107 -0
- package/errors/known-unsolved/github-step-summary-size-limit.yml +112 -0
- package/errors/known-unsolved/job-maximum-execution-time.yml +127 -0
- package/errors/runner-environment/macos-14-sonoma-eol.yml +89 -0
- package/errors/runner-environment/macos-latest-to-macos-26.yml +127 -0
- package/errors/runner-environment/powershell-74-to-76-upgrade.yml +112 -0
- package/errors/runner-environment/service-container-unhealthy.yml +126 -0
- package/errors/runner-environment/windows-latest-vs2026-migration.yml +131 -0
- package/errors/silent-failures/hashfiles-empty-string-cache-collision.yml +96 -0
- package/errors/triggers/environment-protection-rules-silent-block.yml +105 -0
- package/errors/yaml-syntax/env-context-unavailable-job-level.yml +109 -0
- package/package.json +1 -1
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
id: caching-artifacts-010
|
|
2
|
+
title: "Artifact Download Fails — Unable to Find Any Artifacts for Run"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- artifact
|
|
7
|
+
- download
|
|
8
|
+
- run-id
|
|
9
|
+
- cross-workflow
|
|
10
|
+
- upload-artifact
|
|
11
|
+
- download-artifact
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Unable to find any artifacts for the associated workflow"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "No artifacts found for run"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "Error: Unable to find any artifacts"
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Unable to find any artifacts for the associated workflow"
|
|
21
|
+
- "Error: Unable to find any artifacts for the associated workflow"
|
|
22
|
+
- "No artifacts were found with the provided name and filters."
|
|
23
|
+
root_cause: |
|
|
24
|
+
GitHub Actions artifacts are scoped to a specific workflow run. The
|
|
25
|
+
`actions/download-artifact@v4` action (and v3 with `run-id:`) will fail with
|
|
26
|
+
"Unable to find any artifacts" when:
|
|
27
|
+
|
|
28
|
+
1. **Wrong or stale run-id** — The run ID is hard-coded or fetched from a
|
|
29
|
+
prior run that no longer has the expected artifact (artifacts expire after
|
|
30
|
+
the configured retention period, default 90 days).
|
|
31
|
+
|
|
32
|
+
2. **Upload step never ran** — The producing workflow job was cancelled, the
|
|
33
|
+
upload step was skipped due to a failed prior step, or the `if:` condition
|
|
34
|
+
on the upload step evaluated to false.
|
|
35
|
+
|
|
36
|
+
3. **Name mismatch** — The `name:` specified in `download-artifact` doesn't
|
|
37
|
+
exactly match the name used in `upload-artifact` (case-sensitive).
|
|
38
|
+
|
|
39
|
+
4. **Cross-workflow download without explicit run-id** — In v4, downloading
|
|
40
|
+
artifacts from a different workflow run requires an explicit `run-id:`.
|
|
41
|
+
Omitting it defaults to the current run, which may not have those artifacts.
|
|
42
|
+
|
|
43
|
+
Documented in GitHub Community discussion #109905.
|
|
44
|
+
fix: |
|
|
45
|
+
For cross-workflow artifact sharing:
|
|
46
|
+
1. Capture the producing workflow's run-id dynamically using the GitHub REST
|
|
47
|
+
API or by passing it as a workflow_dispatch input or repository_dispatch payload.
|
|
48
|
+
2. Verify the upload step ran successfully in the producer workflow before
|
|
49
|
+
downloading in a consumer workflow.
|
|
50
|
+
3. Ensure `name:` matches exactly (case-sensitive) between upload and download.
|
|
51
|
+
4. For same-workflow jobs, omit `run-id:` — download-artifact v4 defaults to
|
|
52
|
+
the current run automatically.
|
|
53
|
+
fix_code:
|
|
54
|
+
- language: yaml
|
|
55
|
+
label: "WRONG — hard-coded run-id that may be stale"
|
|
56
|
+
code: |
|
|
57
|
+
- uses: actions/download-artifact@v4
|
|
58
|
+
with:
|
|
59
|
+
name: build-output
|
|
60
|
+
run-id: 12345678 # ❌ hard-coded run ID — stale if re-run or wrong workflow
|
|
61
|
+
- language: yaml
|
|
62
|
+
label: "RIGHT — same-workflow, no run-id needed"
|
|
63
|
+
code: |
|
|
64
|
+
# Job A (producer)
|
|
65
|
+
jobs:
|
|
66
|
+
build:
|
|
67
|
+
runs-on: ubuntu-latest
|
|
68
|
+
steps:
|
|
69
|
+
- run: make build
|
|
70
|
+
- uses: actions/upload-artifact@v4
|
|
71
|
+
with:
|
|
72
|
+
name: build-output # ✅ exact name matters
|
|
73
|
+
path: dist/
|
|
74
|
+
|
|
75
|
+
test:
|
|
76
|
+
needs: build
|
|
77
|
+
runs-on: ubuntu-latest
|
|
78
|
+
steps:
|
|
79
|
+
- uses: actions/download-artifact@v4
|
|
80
|
+
with:
|
|
81
|
+
name: build-output # ✅ same name, no run-id needed for current run
|
|
82
|
+
- language: yaml
|
|
83
|
+
label: "RIGHT — cross-workflow download, run-id from API"
|
|
84
|
+
code: |
|
|
85
|
+
- name: Get latest successful run ID
|
|
86
|
+
id: get-run
|
|
87
|
+
env:
|
|
88
|
+
GH_TOKEN: ${{ github.token }}
|
|
89
|
+
run: |
|
|
90
|
+
RUN_ID=$(gh run list \
|
|
91
|
+
--workflow=build.yml \
|
|
92
|
+
--branch=main \
|
|
93
|
+
--status=success \
|
|
94
|
+
--limit=1 \
|
|
95
|
+
--json databaseId \
|
|
96
|
+
--jq '.[0].databaseId')
|
|
97
|
+
echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT
|
|
98
|
+
|
|
99
|
+
- uses: actions/download-artifact@v4
|
|
100
|
+
with:
|
|
101
|
+
name: build-output
|
|
102
|
+
run-id: ${{ steps.get-run.outputs.run_id }} # ✅ dynamic run ID
|
|
103
|
+
github-token: ${{ github.token }}
|
|
104
|
+
prevention:
|
|
105
|
+
- "Never hard-code run-id values — always fetch them dynamically via the GitHub API or pass them as workflow inputs."
|
|
106
|
+
- "Always verify artifact upload completed successfully before writing a consuming workflow that depends on it."
|
|
107
|
+
- "Use exact case-matching artifact names — `build-output` and `Build-Output` are different artifacts."
|
|
108
|
+
- "For same-workflow artifact sharing between jobs, omit `run-id:` entirely — it defaults to the current run."
|
|
109
|
+
- "Check artifact retention settings — artifacts expire after 90 days (public) or 400 days (private) by default."
|
|
110
|
+
docs:
|
|
111
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/storing-and-sharing-data-from-a-workflow"
|
|
112
|
+
label: "Storing and sharing data from a workflow"
|
|
113
|
+
- url: "https://github.com/actions/download-artifact"
|
|
114
|
+
label: "actions/download-artifact — README and cross-run download docs"
|
|
115
|
+
- url: "https://github.com/orgs/community/discussions/109905"
|
|
116
|
+
label: "GitHub Community #109905 — Unable to find any artifacts troubleshooting"
|
|
117
|
+
- url: "https://stackoverflow.com/questions/78238187/unable-to-find-any-artifacts-for-the-associated-workflow-github-actions"
|
|
118
|
+
label: "Stack Overflow — Unable to find any artifacts for the associated workflow"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
id: concurrency-timing-006
|
|
2
|
+
title: "Job Stuck: 'Waiting for a Runner to Pick Up This Job'"
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- runner
|
|
7
|
+
- runs-on
|
|
8
|
+
- self-hosted
|
|
9
|
+
- queued
|
|
10
|
+
- stuck
|
|
11
|
+
- deprecated-runner
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Waiting for a runner to pick up this job"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "No runner matching the specified labels"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "Could not find any online and idle runners"
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Waiting for a runner to pick up this job."
|
|
21
|
+
- "No runner matching the specified labels was found: [your-label]"
|
|
22
|
+
- "Could not find any online and idle runners matching the required labels."
|
|
23
|
+
root_cause: |
|
|
24
|
+
A job remains stuck in the "queued" state — showing "Waiting for a runner to pick up
|
|
25
|
+
this job" — when GitHub Actions cannot find an available runner matching the `runs-on:`
|
|
26
|
+
labels. The job will wait indefinitely until the `timeout-minutes` limit is reached.
|
|
27
|
+
|
|
28
|
+
The most common causes:
|
|
29
|
+
|
|
30
|
+
1. **Deprecated or retired runner label** — GitHub periodically retires old runner images.
|
|
31
|
+
`ubuntu-18.04` was retired in April 2023. `ubuntu-20.04` deprecation is in progress.
|
|
32
|
+
Jobs using these labels get stuck because no GitHub-hosted runners serve the label.
|
|
33
|
+
|
|
34
|
+
2. **Typo in `runs-on:` label** — `ubuntu-latets`, `ubuntu_latest`, `UBuntu-latest` all
|
|
35
|
+
fail silently. GitHub-hosted label matching is case-sensitive for custom labels.
|
|
36
|
+
|
|
37
|
+
3. **Self-hosted runner offline or de-registered** — the runner was stopped, the service
|
|
38
|
+
was not restarted after a reboot, or the runner registration token expired. GitHub queues
|
|
39
|
+
the job and waits for a registered runner with matching labels to come online.
|
|
40
|
+
|
|
41
|
+
4. **Runner group restrictions** — organization admins restrict which repositories can use
|
|
42
|
+
which runner groups. A job referencing a group the repository is not authorized for will
|
|
43
|
+
queue indefinitely without an explicit permission error.
|
|
44
|
+
|
|
45
|
+
5. **All runners busy** — all matching runners are executing other jobs. The job correctly
|
|
46
|
+
queues but appears "stuck" during peak usage. It will eventually be picked up.
|
|
47
|
+
|
|
48
|
+
There is no notification when a job has been queued for an unusually long time — the only
|
|
49
|
+
signal is the job's wall-clock age and the static "Waiting for a runner" message.
|
|
50
|
+
fix: |
|
|
51
|
+
Verify the `runs-on:` label against the current list of supported GitHub-hosted runner
|
|
52
|
+
images. For self-hosted runners, check runner registration and service health.
|
|
53
|
+
fix_code:
|
|
54
|
+
- language: yaml
|
|
55
|
+
label: "Use current, non-deprecated GitHub-hosted runner labels"
|
|
56
|
+
code: |
|
|
57
|
+
jobs:
|
|
58
|
+
build:
|
|
59
|
+
# Use current supported labels only
|
|
60
|
+
runs-on: ubuntu-latest # OR ubuntu-22.04, ubuntu-24.04
|
|
61
|
+
# NOT: ubuntu-18.04 (retired), ubuntu-20.04 (deprecated)
|
|
62
|
+
|
|
63
|
+
build-windows:
|
|
64
|
+
runs-on: windows-latest # OR windows-2022, windows-2025
|
|
65
|
+
|
|
66
|
+
build-macos:
|
|
67
|
+
runs-on: macos-latest # OR macos-13, macos-14, macos-15
|
|
68
|
+
- language: yaml
|
|
69
|
+
label: "Self-hosted runner — verify registration and labels match exactly"
|
|
70
|
+
code: |
|
|
71
|
+
jobs:
|
|
72
|
+
deploy:
|
|
73
|
+
# Labels must exactly match what the runner was registered with
|
|
74
|
+
# Check: GitHub Settings → Actions → Runners → click runner → Labels
|
|
75
|
+
runs-on: [self-hosted, linux, production]
|
|
76
|
+
|
|
77
|
+
steps:
|
|
78
|
+
- name: Verify runner is the expected host
|
|
79
|
+
run: echo "Running on $RUNNER_NAME at $(hostname)"
|
|
80
|
+
- language: yaml
|
|
81
|
+
label: "Fallback: matrix across hosted and self-hosted runners"
|
|
82
|
+
code: |
|
|
83
|
+
jobs:
|
|
84
|
+
build:
|
|
85
|
+
strategy:
|
|
86
|
+
matrix:
|
|
87
|
+
runner: [ubuntu-latest, [self-hosted, linux]]
|
|
88
|
+
runs-on: ${{ matrix.runner }}
|
|
89
|
+
prevention:
|
|
90
|
+
- "Audit `runs-on:` labels in all workflows when GitHub announces runner image deprecations."
|
|
91
|
+
- "Set a job-level `timeout-minutes` so stuck jobs don't consume queue slots indefinitely."
|
|
92
|
+
- "For self-hosted runners, configure the runner service to auto-restart on reboot (e.g., `--service` install on Linux via `./svc.sh install`)."
|
|
93
|
+
- "Use GitHub's runner status page (Settings → Actions → Runners) to verify runners are Online before triggering long jobs."
|
|
94
|
+
- "Subscribe to GitHub Changelog and Actions deprecation notices to catch retiring runner labels early."
|
|
95
|
+
docs:
|
|
96
|
+
- url: "https://docs.github.com/en/actions/using-github-hosted-runners/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources"
|
|
97
|
+
label: "Supported GitHub-hosted runner labels"
|
|
98
|
+
- url: "https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/adding-self-hosted-runners"
|
|
99
|
+
label: "Adding self-hosted runners"
|
|
100
|
+
- url: "https://stackoverflow.com/questions/70959954/error-waiting-for-a-runner-to-pick-up-this-job-using-github-actions"
|
|
101
|
+
label: "Stack Overflow: Waiting for a runner to pick up this job"
|
|
102
|
+
- url: "https://github.com/actions/runner/issues/3609"
|
|
103
|
+
label: "actions/runner#3609 — Self-hosted runner stuck / deadlock"
|
|
104
|
+
- url: "https://github.com/orgs/community/discussions/147604"
|
|
105
|
+
label: "Community: Workflow stuck in queued state"
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
id: concurrency-timing-007
|
|
2
|
+
title: "Matrix Sibling Jobs Silently Cancelled by fail-fast Default"
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- matrix
|
|
7
|
+
- fail-fast
|
|
8
|
+
- cancellation
|
|
9
|
+
- silent-failure
|
|
10
|
+
- strategy
|
|
11
|
+
- job-cancelled
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Some jobs were not run because a sibling job failed"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "Canceling since a higher priority waiting run was found"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "The workflow run was canceled\\."
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Some jobs were not run because a sibling job failed. To allow them to run anyway, add 'continue-on-error: true' to the matrix job."
|
|
21
|
+
- "Job was cancelled"
|
|
22
|
+
root_cause: |
|
|
23
|
+
GitHub Actions matrix strategy defaults to `fail-fast: true`. When ANY matrix leg fails,
|
|
24
|
+
GitHub immediately cancels all other in-progress and pending legs in the same matrix.
|
|
25
|
+
|
|
26
|
+
This default is rarely what developers want during debugging or CI investigation, and
|
|
27
|
+
produces a confusing failure pattern:
|
|
28
|
+
|
|
29
|
+
1. **Cancelled legs appear as "Cancelled" not "Failed"** — matrix siblings killed by
|
|
30
|
+
`fail-fast` show as CANCELLED in the UI (grey icon) rather than red failures. Developers
|
|
31
|
+
scanning the run summary see one red failure and many grey cancellations, and may not
|
|
32
|
+
realize those sibling legs had reached significant progress (e.g., partway through a
|
|
33
|
+
test suite on a different OS or Node version) before being killed.
|
|
34
|
+
|
|
35
|
+
2. **Root cause is obscured** — the only failing leg that matters for diagnosis is the one
|
|
36
|
+
that triggered `fail-fast`, but with multiple cancellations in the UI, it can be hard to
|
|
37
|
+
identify which leg failed first.
|
|
38
|
+
|
|
39
|
+
3. **`fail-fast` is inherited silently** — there is no warning annotation that says
|
|
40
|
+
"fail-fast is enabled and cancelled 5 sibling legs." The default is documented but
|
|
41
|
+
easy to forget when adding a new matrix.
|
|
42
|
+
|
|
43
|
+
4. **Re-running failed jobs doesn't re-run cancelled siblings** — "Re-run failed jobs"
|
|
44
|
+
only re-runs the legs that explicitly FAILED, not the ones that were cancelled by
|
|
45
|
+
fail-fast. Developers re-running failed jobs think they'll see results from all legs,
|
|
46
|
+
but cancelled siblings stay cancelled. Only "Re-run all jobs" restarts everything.
|
|
47
|
+
|
|
48
|
+
Example: a 3-OS matrix (ubuntu, windows, macos) where ubuntu fails. With fail-fast,
|
|
49
|
+
windows and macos are immediately cancelled. The developer sees one failure and two
|
|
50
|
+
cancellations, re-runs the failed ubuntu job, and never discovers that windows also
|
|
51
|
+
had an independent failing test.
|
|
52
|
+
fix: |
|
|
53
|
+
Set `fail-fast: false` explicitly on any matrix where you need full signal from all
|
|
54
|
+
legs — especially for cross-platform or multi-version compatibility matrices. Use
|
|
55
|
+
`fail-fast: true` intentionally only when running the full matrix after one failure is
|
|
56
|
+
wasteful (e.g., expensive build matrices during pre-merge CI).
|
|
57
|
+
fix_code:
|
|
58
|
+
- language: yaml
|
|
59
|
+
label: "Disable fail-fast to see all matrix leg results"
|
|
60
|
+
code: |
|
|
61
|
+
jobs:
|
|
62
|
+
test:
|
|
63
|
+
strategy:
|
|
64
|
+
fail-fast: false # All legs run regardless of siblings failing
|
|
65
|
+
matrix:
|
|
66
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
67
|
+
node: [18, 20, 22]
|
|
68
|
+
runs-on: ${{ matrix.os }}
|
|
69
|
+
steps:
|
|
70
|
+
- uses: actions/checkout@v4
|
|
71
|
+
- uses: actions/setup-node@v4
|
|
72
|
+
with:
|
|
73
|
+
node-version: ${{ matrix.node }}
|
|
74
|
+
- run: npm ci
|
|
75
|
+
- run: npm test
|
|
76
|
+
- language: yaml
|
|
77
|
+
label: "Use fail-fast: true only for expensive pre-merge CI"
|
|
78
|
+
code: |
|
|
79
|
+
jobs:
|
|
80
|
+
# Pre-merge: fail fast to conserve minutes — just need to know if it passes
|
|
81
|
+
lint-and-typecheck:
|
|
82
|
+
strategy:
|
|
83
|
+
fail-fast: true # OK: fast, cheap, fail early
|
|
84
|
+
matrix:
|
|
85
|
+
node: [20, 22]
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
steps:
|
|
88
|
+
- run: npm run lint && npm run typecheck
|
|
89
|
+
|
|
90
|
+
# Post-merge: always see all platform results
|
|
91
|
+
full-test-suite:
|
|
92
|
+
if: github.event_name == 'push'
|
|
93
|
+
strategy:
|
|
94
|
+
fail-fast: false # Need full signal on all platforms
|
|
95
|
+
matrix:
|
|
96
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
97
|
+
runs-on: ${{ matrix.os }}
|
|
98
|
+
steps:
|
|
99
|
+
- run: npm test
|
|
100
|
+
prevention:
|
|
101
|
+
- "Always set `fail-fast: false` explicitly on cross-platform or multi-version matrices where you need full compatibility signal."
|
|
102
|
+
- "After a matrix failure, use 'Re-run all jobs' (not 'Re-run failed jobs') to get results from previously-cancelled siblings."
|
|
103
|
+
- "Add a workflow summary step with `if: always()` to collect and consolidate test results across all matrix legs even when some are cancelled."
|
|
104
|
+
- "Be aware that cancelled legs (grey) are NOT the same as passed legs (green) — visually scan for both red and grey when investigating failures."
|
|
105
|
+
docs:
|
|
106
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idstrategyfail-fast"
|
|
107
|
+
label: "Workflow syntax: jobs.<job_id>.strategy.fail-fast"
|
|
108
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idstrategymatrix"
|
|
109
|
+
label: "Workflow syntax: jobs.<job_id>.strategy.matrix"
|
|
110
|
+
- url: "https://github.com/orgs/community/discussions/26822"
|
|
111
|
+
label: "Community: fail-fast cancels matrix siblings unexpectedly"
|
|
112
|
+
- url: "https://stackoverflow.com/questions/57850553/github-actions-check-steps-status"
|
|
113
|
+
label: "Stack Overflow: Matrix job cancellation behavior with fail-fast"
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
id: concurrency-timing-005
|
|
2
|
+
title: "Job Silently Cancelled When timeout-minutes Is Exceeded"
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- timeout
|
|
7
|
+
- timeout-minutes
|
|
8
|
+
- job-cancelled
|
|
9
|
+
- timing
|
|
10
|
+
- runner
|
|
11
|
+
patterns:
|
|
12
|
+
- regex: "##\\[error\\]The operation was cancelled\\."
|
|
13
|
+
flags: "i"
|
|
14
|
+
- regex: "The job '.*' was cancelled because it exceeded the maximum execution time"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "Error: The operation was canceled"
|
|
17
|
+
flags: "i"
|
|
18
|
+
- regex: "cancel is received"
|
|
19
|
+
flags: "i"
|
|
20
|
+
error_messages:
|
|
21
|
+
- "##[error]The operation was cancelled."
|
|
22
|
+
- "Error: The operation was canceled"
|
|
23
|
+
- "The runner has received a shutdown signal. This can happen when the runner service is stopped, or a manually started runner is canceled."
|
|
24
|
+
root_cause: |
|
|
25
|
+
When a job (or step) exceeds its configured `timeout-minutes`, GitHub Actions sends a
|
|
26
|
+
cancellation signal to the runner. The runner has 5 minutes to complete graceful shutdown,
|
|
27
|
+
after which it is forcibly terminated.
|
|
28
|
+
|
|
29
|
+
The failure mode has two layers of confusion:
|
|
30
|
+
|
|
31
|
+
1. **Status shows "Cancelled" not "Failed"** — a timed-out job is marked CANCELLED in the
|
|
32
|
+
UI. It does not appear as a red failure. Developers scanning the Actions tab may miss it
|
|
33
|
+
entirely, especially if another run succeeded after it.
|
|
34
|
+
|
|
35
|
+
2. **No step-level attribution** — the job log shows "The operation was cancelled" but does
|
|
36
|
+
not identify which specific step was still running or how far it had progressed. Long
|
|
37
|
+
builds, network-heavy steps, and interactive prompts are common culprits.
|
|
38
|
+
|
|
39
|
+
3. **Default timeout is 360 minutes (6 hours)** — if `timeout-minutes` is not explicitly
|
|
40
|
+
set, GitHub uses the platform default of 6 hours for GitHub-hosted runners. A job that
|
|
41
|
+
accidentally blocks (waiting for user input, infinite loop, hung network call) will silently
|
|
42
|
+
consume 6 hours of runner minutes before being cancelled with no diagnostic output.
|
|
43
|
+
|
|
44
|
+
4. **Step-level timeouts are independent** — `timeout-minutes` on a `steps[*]` entry cancels
|
|
45
|
+
only that step; the job continues. `timeout-minutes` on `jobs[*]` cancels the entire job.
|
|
46
|
+
Mixing both is valid but must be understood deliberately.
|
|
47
|
+
fix: |
|
|
48
|
+
Always set explicit `timeout-minutes` at the job level to bound worst-case runner cost.
|
|
49
|
+
Tune based on your typical build time (e.g., 2-3× the median duration). Add step-level
|
|
50
|
+
timeouts on known slow steps (network downloads, test suites) to get better attribution.
|
|
51
|
+
|
|
52
|
+
To diagnose which step was running at cancellation: add a step near the end that dumps
|
|
53
|
+
elapsed time, or use `if: cancelled()` post-steps to capture diagnostics on timeout.
|
|
54
|
+
fix_code:
|
|
55
|
+
- language: yaml
|
|
56
|
+
label: "Explicit job-level timeout with diagnostic post-step"
|
|
57
|
+
code: |
|
|
58
|
+
jobs:
|
|
59
|
+
build:
|
|
60
|
+
runs-on: ubuntu-latest
|
|
61
|
+
timeout-minutes: 30 # Set explicitly — don't rely on 6h default
|
|
62
|
+
steps:
|
|
63
|
+
- uses: actions/checkout@v4
|
|
64
|
+
|
|
65
|
+
- name: Build
|
|
66
|
+
run: make build
|
|
67
|
+
|
|
68
|
+
- name: Tests
|
|
69
|
+
timeout-minutes: 15 # Step-level timeout for attribution
|
|
70
|
+
run: make test
|
|
71
|
+
|
|
72
|
+
# Always runs — captures which step caused the timeout
|
|
73
|
+
- name: Dump elapsed time on cancellation
|
|
74
|
+
if: cancelled()
|
|
75
|
+
run: echo "Job was cancelled at $(date -u). Check step durations above."
|
|
76
|
+
- language: yaml
|
|
77
|
+
label: "Identify which step timed out with job summary annotation"
|
|
78
|
+
code: |
|
|
79
|
+
steps:
|
|
80
|
+
- name: Long network operation
|
|
81
|
+
timeout-minutes: 10
|
|
82
|
+
run: |
|
|
83
|
+
# Use --max-time with curl to avoid relying solely on timeout-minutes
|
|
84
|
+
curl --max-time 300 https://example.com/large-asset -o output.bin
|
|
85
|
+
|
|
86
|
+
- name: Report timeout if cancelled
|
|
87
|
+
if: cancelled()
|
|
88
|
+
run: |
|
|
89
|
+
echo "## ⏱️ Job Timed Out" >> $GITHUB_STEP_SUMMARY
|
|
90
|
+
echo "The job was cancelled. Review step durations in the log." >> $GITHUB_STEP_SUMMARY
|
|
91
|
+
prevention:
|
|
92
|
+
- "Always set `timeout-minutes` at the job level — never rely on the 6-hour GitHub default."
|
|
93
|
+
- "Add step-level `timeout-minutes` on network-heavy or test steps so cancellation is attributed to a specific step."
|
|
94
|
+
- "Use `if: cancelled()` post-steps to write a job summary annotation explaining the timeout."
|
|
95
|
+
- "Run commands with their own timeout flags (e.g., `curl --max-time`, `pytest --timeout`) in addition to runner timeouts."
|
|
96
|
+
- "Monitor job duration trends — a job approaching its timeout limit is a signal to investigate performance."
|
|
97
|
+
docs:
|
|
98
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idtimeout-minutes"
|
|
99
|
+
label: "Workflow syntax: jobs.<job_id>.timeout-minutes"
|
|
100
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idstepstimeout-minutes"
|
|
101
|
+
label: "Workflow syntax: jobs.<job_id>.steps[*].timeout-minutes"
|
|
102
|
+
- url: "https://github.com/actions/runner/issues/1326"
|
|
103
|
+
label: "actions/runner#1326 — Steps hanging until timeout with no log output"
|
|
104
|
+
- url: "https://github.com/orgs/community/discussions/38004"
|
|
105
|
+
label: "Community: Job stops producing output and is later cancelled"
|
|
106
|
+
- url: "https://docs.github.com/en/actions/administering-github-actions/usage-limits-billing-and-administration#usage-limits"
|
|
107
|
+
label: "Usage limits: maximum job execution time"
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
id: known-unsolved-008
|
|
2
|
+
title: "GITHUB_STEP_SUMMARY Upload Aborted When Content Exceeds 1024k"
|
|
3
|
+
category: known-unsolved
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- step-summary
|
|
7
|
+
- GITHUB_STEP_SUMMARY
|
|
8
|
+
- size-limit
|
|
9
|
+
- job-summary
|
|
10
|
+
- markdown
|
|
11
|
+
- limitation
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "\\$GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got \\d+k"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "upload aborted.*supports content up to a size of 1024k"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "Error: GITHUB_STEP_SUMMARY.*1024"
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "$GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got 1387k"
|
|
21
|
+
- "$GITHUB_STEP_SUMMARY upload aborted, supports content up to a size of 1024k, got 2048k"
|
|
22
|
+
root_cause: |
|
|
23
|
+
GitHub Actions imposes a hard 1 MiB (1024 KiB) size limit on the content written to
|
|
24
|
+
`$GITHUB_STEP_SUMMARY`. When a step writes more than this limit, the runner aborts
|
|
25
|
+
the summary upload and logs an error.
|
|
26
|
+
|
|
27
|
+
This is a **platform limit with no workaround** — you cannot increase it. GitHub has not
|
|
28
|
+
announced plans to raise the limit.
|
|
29
|
+
|
|
30
|
+
Common triggers:
|
|
31
|
+
1. **Test reporters** — tools like `dorny/test-reporter`, `ctrf-io/github-actions-ctrf`,
|
|
32
|
+
or `EnricoMi/publish-unit-test-result-action` write per-test result tables. Large
|
|
33
|
+
test suites (thousands of test cases, especially with long failure messages) easily
|
|
34
|
+
exceed 1 MiB.
|
|
35
|
+
2. **Dependency review action** — `actions/dependency-review-action` writes full
|
|
36
|
+
dependency diff tables. Large projects with hundreds of transitive dependencies produce
|
|
37
|
+
summaries well above 1 MiB.
|
|
38
|
+
3. **Coverage reports** — HTML-style coverage tables written to `$GITHUB_STEP_SUMMARY`
|
|
39
|
+
with per-file rows can grow unboundedly on large monorepos.
|
|
40
|
+
4. **Log echo pipelines** — `cat large-file >> $GITHUB_STEP_SUMMARY` without size
|
|
41
|
+
checking is the most direct way to hit the limit.
|
|
42
|
+
|
|
43
|
+
The error aborts the summary upload but does **not** fail the step or job by default.
|
|
44
|
+
Depending on the action's error handling, the step may succeed (exit 0) even though the
|
|
45
|
+
summary was not written — making this a silent failure from a reporting perspective.
|
|
46
|
+
fix: |
|
|
47
|
+
Truncate or paginate summary content before writing it. Most test reporters provide
|
|
48
|
+
options to limit which results are written (e.g., only failures, not all passed tests).
|
|
49
|
+
For custom summary generation, check the size before writing and truncate with a note.
|
|
50
|
+
fix_code:
|
|
51
|
+
- language: yaml
|
|
52
|
+
label: "Truncate summary content with size check before writing"
|
|
53
|
+
code: |
|
|
54
|
+
- name: Generate test report
|
|
55
|
+
run: |
|
|
56
|
+
# Generate report to a temp file first
|
|
57
|
+
./scripts/generate-report.sh > /tmp/report.md
|
|
58
|
+
|
|
59
|
+
# Check size before writing to summary
|
|
60
|
+
SIZE_KB=$(du -k /tmp/report.md | cut -f1)
|
|
61
|
+
MAX_KB=800 # Leave headroom below 1024k limit
|
|
62
|
+
|
|
63
|
+
if [ "$SIZE_KB" -gt "$MAX_KB" ]; then
|
|
64
|
+
echo "⚠️ Full report too large (${SIZE_KB}k). Showing failures only." >> "$GITHUB_STEP_SUMMARY"
|
|
65
|
+
./scripts/generate-report.sh --failures-only >> "$GITHUB_STEP_SUMMARY"
|
|
66
|
+
else
|
|
67
|
+
cat /tmp/report.md >> "$GITHUB_STEP_SUMMARY"
|
|
68
|
+
fi
|
|
69
|
+
- language: yaml
|
|
70
|
+
label: "dorny/test-reporter — limit to failures only for large test suites"
|
|
71
|
+
code: |
|
|
72
|
+
- name: Test Report
|
|
73
|
+
uses: dorny/test-reporter@v1
|
|
74
|
+
if: always()
|
|
75
|
+
with:
|
|
76
|
+
name: Test Results
|
|
77
|
+
path: test-results/**/*.xml
|
|
78
|
+
reporter: jest-junit
|
|
79
|
+
# Limit output to avoid 1024k summary limit on large suites
|
|
80
|
+
only-summary: true # Write only totals, not per-test rows
|
|
81
|
+
fail-on-error: false
|
|
82
|
+
- language: yaml
|
|
83
|
+
label: "Upload full report as artifact instead of writing to summary"
|
|
84
|
+
code: |
|
|
85
|
+
- name: Generate full coverage report
|
|
86
|
+
run: ./scripts/coverage.sh > /tmp/coverage-full.md
|
|
87
|
+
|
|
88
|
+
- name: Write summary (truncated)
|
|
89
|
+
run: |
|
|
90
|
+
head -100 /tmp/coverage-full.md >> "$GITHUB_STEP_SUMMARY"
|
|
91
|
+
echo "" >> "$GITHUB_STEP_SUMMARY"
|
|
92
|
+
echo "_Full report available as workflow artifact._" >> "$GITHUB_STEP_SUMMARY"
|
|
93
|
+
|
|
94
|
+
- name: Upload full report as artifact
|
|
95
|
+
uses: actions/upload-artifact@v4
|
|
96
|
+
with:
|
|
97
|
+
name: coverage-report
|
|
98
|
+
path: /tmp/coverage-full.md
|
|
99
|
+
prevention:
|
|
100
|
+
- "Never pipe unbounded command output directly to `$GITHUB_STEP_SUMMARY` — always size-check or limit first."
|
|
101
|
+
- "Configure test reporter actions to write only failures (not all passing tests) when the test suite is large."
|
|
102
|
+
- "Upload large reports as workflow artifacts and link to them from a short summary, instead of embedding all content in the summary."
|
|
103
|
+
- "The undocumented historical limit of 65,535 characters cited in older docs/answers is no longer accurate — the current limit is 1024 KiB (1 MiB)."
|
|
104
|
+
docs:
|
|
105
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#adding-a-job-summary"
|
|
106
|
+
label: "Workflow commands: Adding a job summary"
|
|
107
|
+
- url: "https://github.com/actions/dependency-review-action/issues/786"
|
|
108
|
+
label: "dependency-review-action#786 — Job Summary Size Limitation aborts the job"
|
|
109
|
+
- url: "https://github.com/dorny/test-reporter/issues/379"
|
|
110
|
+
label: "dorny/test-reporter#379 — Is the step summary limit for 65535 characters still accurate?"
|
|
111
|
+
- url: "https://docs.github.com/en/actions/administering-github-actions/usage-limits-billing-and-administration#usage-limits"
|
|
112
|
+
label: "Usage limits — GitHub Actions"
|