@htekdev/actions-debugger 1.0.23 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/artifact-minimum-retention-one-day.yml +153 -0
- package/errors/caching-artifacts/cache-api-propagation-delay-post-save.yml +128 -0
- package/errors/caching-artifacts/cache-backend-internal-error-skipped.yml +75 -0
- package/errors/caching-artifacts/cache-hit-step-id-case-sensitive-mismatch.yml +95 -0
- package/errors/caching-artifacts/cache-save-post-step-skipped-on-failure.yml +114 -0
- package/errors/concurrency-timing/deploy-pages-in-progress-deployment-wedged.yml +70 -0
- package/errors/concurrency-timing/deployment-review-timeout-expired.yml +88 -0
- package/errors/concurrency-timing/job-concurrency-scope-per-run-not-global.yml +81 -0
- package/errors/concurrency-timing/merge-queue-concurrency-cancel-blocks-all.yml +86 -0
- package/errors/concurrency-timing/reusable-workflow-github-workflow-context-cancel.yml +124 -0
- package/errors/concurrency-timing/runner-scale-set-jobs-never-start.yml +123 -0
- package/errors/concurrency-timing/runner-temp-dir-race-concurrent-workers.yml +90 -0
- package/errors/known-unsolved/artifact-download-url-unauthenticated-404.yml +98 -0
- package/errors/known-unsolved/checkout-v6-credentials-docker-run-manual.yml +105 -0
- package/errors/known-unsolved/concurrency-groups-repo-scoped-only.yml +138 -0
- package/errors/known-unsolved/matrix-256-job-limit.yml +142 -0
- package/errors/known-unsolved/merge-group-paths-filter-not-supported.yml +137 -0
- package/errors/known-unsolved/no-job-allow-failure.yml +73 -0
- package/errors/known-unsolved/schedule-cron-hours-long-queue-drift.yml +101 -0
- package/errors/permissions-auth/checkout-persist-credentials-token-write.yml +90 -0
- package/errors/permissions-auth/create-github-app-token-cross-job-token-revoked.yml +95 -0
- package/errors/permissions-auth/github-token-contents-write-missing-git-push.yml +117 -0
- package/errors/permissions-auth/org-actions-policy-blocks-unapproved-action.yml +106 -0
- package/errors/runner-environment/codeql-action-v2-deprecated.yml +110 -0
- package/errors/runner-environment/macos-26-openssl-3-system-library-breaking.yml +114 -0
- package/errors/runner-environment/macos-26-ruby-34-default-upgrade.yml +114 -0
- package/errors/runner-environment/macos-26-xcode-default-265-pin-required.yml +99 -0
- package/errors/runner-environment/macos-latest-label-switches-to-macos26.yml +127 -0
- package/errors/runner-environment/node20-removed-toolcache-default-node22.yml +104 -0
- package/errors/runner-environment/powershell-74-76-threadjob-module-rename.yml +124 -0
- package/errors/runner-environment/self-hosted-runner-not-found.yml +134 -0
- package/errors/runner-environment/self-hosted-runner-selinux-service-exec-failure.yml +116 -0
- package/errors/runner-environment/service-container-no-healthcheck.yml +158 -0
- package/errors/runner-environment/setup-node-v5-corepack-pnpm-not-found.yml +101 -0
- package/errors/runner-environment/setup-node-yarn-not-installed-self-hosted.yml +76 -0
- package/errors/runner-environment/setup-python-externally-managed-env-error.yml +95 -0
- package/errors/runner-environment/windows-2019-runner-retired-june2025.yml +118 -0
- package/errors/runner-environment/windows-2022-docker-daemon-not-started.yml +108 -0
- package/errors/silent-failures/cache-hit-output-string-not-boolean.yml +96 -0
- package/errors/silent-failures/checkout-lfs-pointer-not-content.yml +105 -0
- package/errors/silent-failures/reusable-workflow-output-skipped-contains-secret.yml +115 -0
- package/errors/silent-failures/setup-node-silent-download-exit-zero.yml +105 -0
- package/errors/silent-failures/setup-python-truncated-manifest-silent-exit.yml +111 -0
- package/errors/silent-failures/undefined-env-expression-empty-string-silent.yml +115 -0
- package/errors/silent-failures/windows-powershell-github-output-bash-syntax.yml +118 -0
- package/errors/triggers/fork-pr-first-time-contributor-approval-required.yml +142 -0
- package/errors/triggers/on-push-branches-glob-star-no-slash-match.yml +78 -0
- package/errors/triggers/pull-request-target-env-protection-default-branch-eval.yml +117 -0
- package/errors/triggers/required-status-check-renamed-never-passes.yml +87 -0
- package/errors/triggers/schedule-cron-self-hosted-runner-not-triggered.yml +107 -0
- package/errors/yaml-syntax/composite-action-run-shell-missing.yml +90 -0
- package/errors/yaml-syntax/composite-action-secrets-context-unavailable.yml +99 -0
- package/errors/yaml-syntax/github-script-octokit-renamed-to-github.yml +130 -0
- package/errors/yaml-syntax/labeler-v5-config-format-breaking.yml +67 -0
- package/errors/yaml-syntax/runs-on-expression-array-syntax-error.yml +121 -0
- package/errors/yaml-syntax/setup-go-matrix-version-float-coercion.yml +69 -0
- package/package.json +1 -1
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
id: caching-artifacts-024
|
|
2
|
+
title: "Artifact Minimum Retention Is 1 Day — Cannot Set retention-days to 0 for Immediate Cleanup"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: limitation
|
|
5
|
+
tags:
|
|
6
|
+
- artifact
|
|
7
|
+
- retention
|
|
8
|
+
- storage
|
|
9
|
+
- quota
|
|
10
|
+
- known-limit
|
|
11
|
+
- upload-artifact
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "retention.days.*0|minimum.*retention.*1|retention.*must be between"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "artifact.*storage.*quota|storage.*quota.*exceeded"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "invalid.*retention|retention.*invalid"
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Invalid retention days: must be between 1 and 90 inclusive"
|
|
21
|
+
- "Artifact storage quota has been exceeded"
|
|
22
|
+
- "retention-days must be between 1 and 90"
|
|
23
|
+
- "Artifact storage quota has been hit"
|
|
24
|
+
root_cause: |
|
|
25
|
+
GitHub Actions enforces a minimum artifact retention period of 1 day. Setting
|
|
26
|
+
`retention-days: 0` in `actions/upload-artifact` causes a validation error. There is
|
|
27
|
+
no way to have artifacts automatically deleted at workflow completion — they persist for
|
|
28
|
+
at least 24 hours, consuming storage quota whether needed or not.
|
|
29
|
+
|
|
30
|
+
This limitation is tied to the backing storage system's minimum time-based retention
|
|
31
|
+
policy: "the minimum retention interval for a time-based retention policy is one day."
|
|
32
|
+
|
|
33
|
+
Practical consequences:
|
|
34
|
+
1. Workflows using artifacts for intra-workflow job-to-job data passing (build outputs,
|
|
35
|
+
test binaries, coverage reports) accumulate unwanted 1-day artifacts that count
|
|
36
|
+
against the repository's artifact storage quota.
|
|
37
|
+
2. Quota exhaustion can appear "sudden" because storage usage is recalculated every
|
|
38
|
+
6-12 hours — a burst of CI runs can drain the quota before the next recalculation.
|
|
39
|
+
3. The first signal of an impending quota problem is a failed upload: "Artifact storage
|
|
40
|
+
quota has been exceeded." There is no proactive warning in the UI.
|
|
41
|
+
4. Free/Pro plan organizations with small storage quotas (500 MB free tier) are
|
|
42
|
+
especially vulnerable when intermediate build artifacts accumulate across many runs.
|
|
43
|
+
5. Setting `retention-days: 1` (the minimum) still means 24 hours of quota usage per
|
|
44
|
+
artifact, which multiplies quickly in high-velocity repos with large CI matrices.
|
|
45
|
+
|
|
46
|
+
This is a known platform limitation with 301+ reactions on the tracking issue
|
|
47
|
+
(actions/upload-artifact#290, open since January 2022) and no committed resolution.
|
|
48
|
+
|
|
49
|
+
Source: actions/upload-artifact#290 (Allow retention-days: 0 for immediate cleanup)
|
|
50
|
+
Source: GitHub Docs — Artifact and log retention policies
|
|
51
|
+
fix: |
|
|
52
|
+
There is no way to set `retention-days: 0`. Use these strategies to manage storage:
|
|
53
|
+
|
|
54
|
+
Mitigation 1 — Set retention-days: 1 (minimum possible):
|
|
55
|
+
Limits accumulation to 24 hours instead of the 90-day default. Use for all
|
|
56
|
+
intermediate build artifacts not needed for post-run debugging.
|
|
57
|
+
|
|
58
|
+
Mitigation 2 — Delete artifacts after use via workflow_run trigger:
|
|
59
|
+
Trigger a cleanup workflow via `workflow_run` that uses the GitHub REST API to
|
|
60
|
+
delete all artifacts from the completed run immediately after it finishes.
|
|
61
|
+
|
|
62
|
+
Mitigation 3 — Use GitHub Cache for intra-workflow job-to-job data:
|
|
63
|
+
Cache entries are auto-evicted after 7 days without access and when the 10 GB
|
|
64
|
+
repo cache limit is approached. For transient build outputs, cache is better than
|
|
65
|
+
artifacts because it has no minimum retention window.
|
|
66
|
+
|
|
67
|
+
Mitigation 4 — Use job outputs for small values:
|
|
68
|
+
For small job-to-job values (build numbers, git SHAs, test counts), use
|
|
69
|
+
`$GITHUB_OUTPUT` + `needs.<job>.outputs` instead of uploading an artifact entirely.
|
|
70
|
+
|
|
71
|
+
Mitigation 5 — Set a repository-level default retention policy:
|
|
72
|
+
In Settings → Actions → General, set a shorter default retention period (e.g., 7 days)
|
|
73
|
+
to limit the blast radius for workflows that forget to set `retention-days`.
|
|
74
|
+
fix_code:
|
|
75
|
+
- language: yaml
|
|
76
|
+
label: "Set retention-days: 1 (minimum) for disposable intermediate artifacts"
|
|
77
|
+
code: |
|
|
78
|
+
- name: Upload test results
|
|
79
|
+
uses: actions/upload-artifact@v4
|
|
80
|
+
if: always() # Upload even on failure for debugging
|
|
81
|
+
with:
|
|
82
|
+
name: test-results-${{ matrix.os }}-${{ matrix.node }}
|
|
83
|
+
path: coverage/
|
|
84
|
+
retention-days: 1 # Minimum: deleted after ~24 hours
|
|
85
|
+
|
|
86
|
+
- language: yaml
|
|
87
|
+
label: "Post-run artifact cleanup via workflow_run trigger"
|
|
88
|
+
code: |
|
|
89
|
+
# .github/workflows/cleanup-artifacts.yml
|
|
90
|
+
name: Cleanup Run Artifacts
|
|
91
|
+
on:
|
|
92
|
+
workflow_run:
|
|
93
|
+
workflows: ["CI"]
|
|
94
|
+
types: [completed]
|
|
95
|
+
|
|
96
|
+
jobs:
|
|
97
|
+
cleanup:
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
steps:
|
|
100
|
+
- name: Delete all artifacts from completed run
|
|
101
|
+
run: |
|
|
102
|
+
ARTIFACT_IDS=$(gh api \
|
|
103
|
+
"/repos/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}/artifacts" \
|
|
104
|
+
--jq '.artifacts[].id')
|
|
105
|
+
for ID in $ARTIFACT_IDS; do
|
|
106
|
+
gh api --method DELETE \
|
|
107
|
+
"/repos/${{ github.repository }}/actions/artifacts/$ID"
|
|
108
|
+
echo "Deleted artifact $ID"
|
|
109
|
+
done
|
|
110
|
+
env:
|
|
111
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
112
|
+
|
|
113
|
+
- language: yaml
|
|
114
|
+
label: "Use cache instead of artifact for intra-workflow data (auto-evicted)"
|
|
115
|
+
code: |
|
|
116
|
+
jobs:
|
|
117
|
+
build:
|
|
118
|
+
runs-on: ubuntu-latest
|
|
119
|
+
steps:
|
|
120
|
+
- uses: actions/checkout@v4
|
|
121
|
+
- name: Build
|
|
122
|
+
run: npm run build
|
|
123
|
+
- name: Cache build output for downstream jobs
|
|
124
|
+
uses: actions/cache/save@v4
|
|
125
|
+
with:
|
|
126
|
+
path: dist/
|
|
127
|
+
key: build-${{ github.run_id }}-${{ github.run_attempt }}
|
|
128
|
+
# Cache auto-evicts after 7 days or when repo 10 GB limit is approached
|
|
129
|
+
|
|
130
|
+
deploy:
|
|
131
|
+
needs: build
|
|
132
|
+
runs-on: ubuntu-latest
|
|
133
|
+
steps:
|
|
134
|
+
- name: Restore build output
|
|
135
|
+
uses: actions/cache/restore@v4
|
|
136
|
+
with:
|
|
137
|
+
path: dist/
|
|
138
|
+
key: build-${{ github.run_id }}-${{ github.run_attempt }}
|
|
139
|
+
- name: Deploy
|
|
140
|
+
run: ./deploy.sh dist/
|
|
141
|
+
prevention:
|
|
142
|
+
- "Always set `retention-days: 1` for intermediate build artifacts not needed after workflow completion."
|
|
143
|
+
- "Monitor artifact storage in Settings → Billing → Actions — spikes indicate uncleaned intermediate artifacts."
|
|
144
|
+
- "Set a repository-level default retention period in Settings → Actions → General to limit the default."
|
|
145
|
+
- "Use GitHub Cache for intra-workflow job-to-job data when persistence after the run is not required."
|
|
146
|
+
- "For matrix workflows, multiply the artifact size by matrix dimension count to estimate storage impact per run."
|
|
147
|
+
docs:
|
|
148
|
+
- url: "https://github.com/actions/upload-artifact/issues/290"
|
|
149
|
+
label: "actions/upload-artifact#290: Allow retention-days: 0 (301 reactions, open since 2022)"
|
|
150
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/storing-workflow-data-as-artifacts#configuring-a-custom-artifact-retention-period"
|
|
151
|
+
label: "GitHub Docs: Configuring a custom artifact retention period"
|
|
152
|
+
- url: "https://docs.github.com/en/actions/administering-github-actions/usage-limits-billing-and-administration#artifact-and-log-retention-policy"
|
|
153
|
+
label: "GitHub Docs: Artifact and log retention policy"
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
id: caching-artifacts-023
|
|
2
|
+
title: "Cache Saved Successfully But Unavailable for Restore for Several Minutes (API Propagation Lag)"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- actions/cache
|
|
7
|
+
- cache-miss
|
|
8
|
+
- propagation-delay
|
|
9
|
+
- api-lag
|
|
10
|
+
- restore
|
|
11
|
+
- gh-cache-list
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Cache not found for input keys"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "Cache service responded with 404"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "Saved successfully"
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Cache not found for input keys"
|
|
21
|
+
- "Cache service responded with 404"
|
|
22
|
+
- "Received 0 of 0 (0.0 B), 0.0 MBs/sec"
|
|
23
|
+
root_cause: |
|
|
24
|
+
There is a deliberate propagation delay between when the cache backend finishes
|
|
25
|
+
writing a cache entry (reporting "Saved successfully") and when that entry becomes
|
|
26
|
+
visible via the cache REST API or available for restore by subsequent jobs.
|
|
27
|
+
|
|
28
|
+
GitHub's cache service uses an eventually-consistent storage backend. After a cache
|
|
29
|
+
save completes, the cache metadata may not propagate to the query/restore API for
|
|
30
|
+
up to 4-5 minutes. During this window:
|
|
31
|
+
|
|
32
|
+
- `gh cache list` reports no cache for the key
|
|
33
|
+
- The Actions UI cache tab does not show the entry
|
|
34
|
+
- `actions/cache/restore@v5` reports a cache miss on the exact key
|
|
35
|
+
|
|
36
|
+
This is commonly hit when a workflow saves a cache in a "warm-up" job that is a
|
|
37
|
+
dependency for a fan-out matrix job. If the downstream jobs start before the cache
|
|
38
|
+
propagates, they all miss the cache and fall back to a cold install — often on a
|
|
39
|
+
schedule-triggered first-run-of-day workflow.
|
|
40
|
+
|
|
41
|
+
The issue was reported against `actions/cache` v4.2.2 with GitHub-hosted runners
|
|
42
|
+
and is not unique to any runner OS.
|
|
43
|
+
fix: |
|
|
44
|
+
Design workflows to tolerate cache misses — always include a fallback install step
|
|
45
|
+
that runs when the cache is cold. Do not architect workflows where a cache miss
|
|
46
|
+
causes a hard failure.
|
|
47
|
+
|
|
48
|
+
For workflows that must read a cache from a previous job, add a `restore-keys`
|
|
49
|
+
fallback so a partial match is used while the exact key propagates:
|
|
50
|
+
|
|
51
|
+
restore-keys: |
|
|
52
|
+
build-deps-${{ runner.os }}-
|
|
53
|
+
|
|
54
|
+
If you need the exact cache entry, add a sleep + retry loop in the calling job
|
|
55
|
+
using `actions/cache/restore` wrapped with `nick-invision/retry`.
|
|
56
|
+
|
|
57
|
+
Alternatively, restructure the workflow so that cache-consuming jobs share the
|
|
58
|
+
same job context as the cache writer (using job outputs or artifacts) rather
|
|
59
|
+
than relying on cross-run cache availability.
|
|
60
|
+
fix_code:
|
|
61
|
+
- language: yaml
|
|
62
|
+
label: "Always include restore-keys fallback to tolerate propagation lag"
|
|
63
|
+
code: |
|
|
64
|
+
steps:
|
|
65
|
+
- uses: actions/cache/restore@v5
|
|
66
|
+
id: cache-restore
|
|
67
|
+
with:
|
|
68
|
+
path: |
|
|
69
|
+
~/.npm
|
|
70
|
+
node_modules
|
|
71
|
+
key: build-deps-${{ runner.os }}-${{ hashFiles('**/package-lock.json') }}
|
|
72
|
+
restore-keys: |
|
|
73
|
+
build-deps-${{ runner.os }}- # partial match handles lag window
|
|
74
|
+
fail-on-cache-miss: false # never hard-fail on cache miss
|
|
75
|
+
|
|
76
|
+
- name: Install (runs on cache miss or during lag window)
|
|
77
|
+
if: steps.cache-restore.outputs.cache-hit != 'true'
|
|
78
|
+
run: npm ci
|
|
79
|
+
- language: yaml
|
|
80
|
+
label: "Warm-up job: save cache with explicit key confirmation before fan-out"
|
|
81
|
+
code: |
|
|
82
|
+
jobs:
|
|
83
|
+
warm-cache:
|
|
84
|
+
runs-on: ubuntu-latest
|
|
85
|
+
outputs:
|
|
86
|
+
cache-key: ${{ steps.cache-key.outputs.key }}
|
|
87
|
+
steps:
|
|
88
|
+
- uses: actions/checkout@v4
|
|
89
|
+
- id: cache-key
|
|
90
|
+
run: echo "key=build-deps-${{ runner.os }}-${{ hashFiles('**/package-lock.json') }}" >> "$GITHUB_OUTPUT"
|
|
91
|
+
- uses: actions/cache@v5
|
|
92
|
+
id: cache
|
|
93
|
+
with:
|
|
94
|
+
path: node_modules
|
|
95
|
+
key: ${{ steps.cache-key.outputs.key }}
|
|
96
|
+
- if: steps.cache.outputs.cache-hit != 'true'
|
|
97
|
+
run: npm ci
|
|
98
|
+
- if: steps.cache.outputs.cache-hit != 'true'
|
|
99
|
+
uses: actions/cache/save@v5
|
|
100
|
+
with:
|
|
101
|
+
path: node_modules
|
|
102
|
+
key: ${{ steps.cache-key.outputs.key }}
|
|
103
|
+
|
|
104
|
+
# Fan-out jobs include restore-keys fallback; tolerate lag window
|
|
105
|
+
build:
|
|
106
|
+
needs: warm-cache
|
|
107
|
+
strategy:
|
|
108
|
+
matrix:
|
|
109
|
+
target: [lint, test, typecheck]
|
|
110
|
+
steps:
|
|
111
|
+
- uses: actions/cache/restore@v5
|
|
112
|
+
with:
|
|
113
|
+
path: node_modules
|
|
114
|
+
key: ${{ needs.warm-cache.outputs.cache-key }}
|
|
115
|
+
restore-keys: build-deps-${{ runner.os }}-
|
|
116
|
+
prevention:
|
|
117
|
+
- "Never hard-fail on cache miss (fail-on-cache-miss: false) — always include a fallback install."
|
|
118
|
+
- "Include restore-keys to allow partial matches during the propagation window."
|
|
119
|
+
- "Avoid architectures where downstream jobs strictly require a cache written in the same run's prior job."
|
|
120
|
+
- "Use job artifacts (upload-artifact/download-artifact) for dependencies that must be reliably shared within a run."
|
|
121
|
+
- "If using gh cache list to verify saves, wait at least 5 minutes before concluding a save failed."
|
|
122
|
+
docs:
|
|
123
|
+
- url: "https://github.com/actions/cache/issues/1710"
|
|
124
|
+
label: "actions/cache#1710: Cache indexing lag / API propagation delay (15 reactions)"
|
|
125
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows"
|
|
126
|
+
label: "GitHub Docs: Caching dependencies to speed up workflows"
|
|
127
|
+
- url: "https://github.com/actions/cache/tree/main/restore"
|
|
128
|
+
label: "actions/cache/restore: restore-only action"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
id: "caching-artifacts-026"
|
|
2
|
+
title: "Cache backend internal error silently skips caching"
|
|
3
|
+
category: "caching-artifacts"
|
|
4
|
+
severity: "warning"
|
|
5
|
+
tags:
|
|
6
|
+
- "cache"
|
|
7
|
+
- "backend"
|
|
8
|
+
- "internal-error"
|
|
9
|
+
- "github-status"
|
|
10
|
+
- "intermittent"
|
|
11
|
+
- "skipped"
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "An internal error has occurred in cache backend"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "The runner was not able to contact the cache service\\. Caching will be skipped"
|
|
16
|
+
flags: "i"
|
|
17
|
+
error_messages:
|
|
18
|
+
- "Warning: An internal error has occurred in cache backend. Please check https://www.githubstatus.com/ for any ongoing issues in actions."
|
|
19
|
+
- "Warning: The runner was not able to contact the cache service. Caching will be skipped. If you are concerned about the results of this, you can contact GitHub support."
|
|
20
|
+
root_cause: |
|
|
21
|
+
GitHub's distributed cache backend occasionally returns internal errors when the runner
|
|
22
|
+
attempts to save or restore a cache entry. This is a transient platform-side failure
|
|
23
|
+
unrelated to your workflow configuration or cache key. The runner catches this error
|
|
24
|
+
and emits a warning, then continues without failing the step or the job.
|
|
25
|
+
|
|
26
|
+
As a result, the cache is silently not saved (or not restored), and downstream steps
|
|
27
|
+
that depend on the cached state will see cache misses or stale data — but the overall
|
|
28
|
+
job still succeeds. This makes the problem invisible unless you inspect logs carefully.
|
|
29
|
+
|
|
30
|
+
These errors typically coincide with GitHub Actions infrastructure incidents. Checking
|
|
31
|
+
https://www.githubstatus.com/ during an active incident will confirm whether the issue
|
|
32
|
+
is platform-wide. The problem is not reproducible locally and usually self-resolves
|
|
33
|
+
within minutes to hours.
|
|
34
|
+
fix: |
|
|
35
|
+
No workflow change can prevent this — it is a transient platform issue. Mitigations:
|
|
36
|
+
|
|
37
|
+
1. Check https://www.githubstatus.com/ when you see this error repeatedly.
|
|
38
|
+
2. Always verify that a cache restore actually produced the expected files rather than
|
|
39
|
+
relying solely on the cache-hit output (backend errors return empty cache-hit).
|
|
40
|
+
3. Make build steps tolerant of cold-cache starts: do not gate your pipeline on the
|
|
41
|
+
cache being present.
|
|
42
|
+
4. Use restore-keys as a fallback so a partial match reduces the blast radius.
|
|
43
|
+
5. For critical workflows, consider a self-hosted runner with a local file-system or
|
|
44
|
+
S3-backed cache to avoid the GitHub-hosted backend entirely.
|
|
45
|
+
fix_code:
|
|
46
|
+
- language: yaml
|
|
47
|
+
label: "Always reinstall when cache-hit is not exactly 'true'"
|
|
48
|
+
code: |
|
|
49
|
+
- name: Cache npm dependencies
|
|
50
|
+
id: npm-cache
|
|
51
|
+
uses: actions/cache@v4
|
|
52
|
+
with:
|
|
53
|
+
path: ~/.npm
|
|
54
|
+
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
|
|
55
|
+
restore-keys: |
|
|
56
|
+
${{ runner.os }}-npm-
|
|
57
|
+
|
|
58
|
+
- name: Install dependencies
|
|
59
|
+
# cache-hit is empty string (not 'false') on backend errors — guard with != 'true'
|
|
60
|
+
if: steps.npm-cache.outputs.cache-hit != 'true'
|
|
61
|
+
run: npm ci
|
|
62
|
+
prevention:
|
|
63
|
+
- "Never assume the cache was saved — treat each cache entry as optional and ensure the build succeeds from scratch."
|
|
64
|
+
- "Use restore-keys to allow partial cache hits; a stale partial match is better than a full miss."
|
|
65
|
+
- "Add a post-restore verification step that checks for expected files rather than relying solely on cache-hit output."
|
|
66
|
+
- "Monitor https://www.githubstatus.com/ when you see repeated intermittent cache failures."
|
|
67
|
+
docs:
|
|
68
|
+
- url: "https://github.com/actions/cache/issues/1611"
|
|
69
|
+
label: "actions/cache#1611 — Internal error in cache backend (39 reactions)"
|
|
70
|
+
- url: "https://github.com/actions/cache/issues/1621"
|
|
71
|
+
label: "actions/cache#1621 — Random restore failure (231 reactions)"
|
|
72
|
+
- url: "https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows"
|
|
73
|
+
label: "GitHub Docs — Caching dependencies to speed up workflows"
|
|
74
|
+
- url: "https://www.githubstatus.com/"
|
|
75
|
+
label: "GitHub Status — active incident tracker"
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
id: caching-artifacts-022
|
|
2
|
+
title: "cache-hit Output Always Empty Due to Case-Sensitive Step ID Mismatch"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- actions/cache
|
|
7
|
+
- cache-hit
|
|
8
|
+
- step-id
|
|
9
|
+
- case-sensitive
|
|
10
|
+
- conditional-install
|
|
11
|
+
patterns:
|
|
12
|
+
- regex: 'steps\.[a-zA-Z0-9_-]+\.outputs\.cache-hit'
|
|
13
|
+
flags: "i"
|
|
14
|
+
- regex: 'cache-hit\s*!=\s*''true'''
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: 'cache-hit\s*==\s*''true'''
|
|
17
|
+
flags: "i"
|
|
18
|
+
error_messages:
|
|
19
|
+
- "Cache not found for input keys"
|
|
20
|
+
- "Post Cache: Saved successfully"
|
|
21
|
+
root_cause: |
|
|
22
|
+
The `actions/cache` action sets a `cache-hit` output on the step that can be read
|
|
23
|
+
via `steps.<step-id>.outputs.cache-hit`. Step IDs in GitHub Actions are case-sensitive
|
|
24
|
+
and must exactly match between the `id:` field and the `steps.<id>.outputs.*` reference.
|
|
25
|
+
|
|
26
|
+
If the step ID in `id:` does not exactly match the ID in the `if:` condition — even
|
|
27
|
+
by a single character difference (hyphen vs underscore, camelCase vs kebab-case) —
|
|
28
|
+
the expression evaluates to an empty string, not `false`. No error or warning is emitted.
|
|
29
|
+
|
|
30
|
+
Common ID mismatches:
|
|
31
|
+
- `id: npm-cache` referenced as `steps.npmCache.outputs.cache-hit`
|
|
32
|
+
- `id: node_modules_cache` referenced as `steps.node-modules-cache.outputs.cache-hit`
|
|
33
|
+
- `id: Cache` referenced as `steps.cache.outputs.cache-hit` (capitalization)
|
|
34
|
+
|
|
35
|
+
The result is that the conditional install step (`if: steps.X.outputs.cache-hit !=
|
|
36
|
+
'true'`) ALWAYS runs (because empty string != 'true' is always true), wasting time
|
|
37
|
+
on every run even when the cache was a perfect hit.
|
|
38
|
+
fix: |
|
|
39
|
+
Verify that the step `id:` field and all `steps.<id>.outputs.cache-hit` references
|
|
40
|
+
are identical character-for-character. Use lowercase kebab-case IDs to minimize
|
|
41
|
+
typo risk.
|
|
42
|
+
|
|
43
|
+
To debug: add a step that prints the cache-hit value before the conditional:
|
|
44
|
+
- run: echo "cache-hit=${{ steps.my-cache.outputs.cache-hit }}"
|
|
45
|
+
|
|
46
|
+
If the value is empty (not 'true' or 'false'), the ID is mismatched.
|
|
47
|
+
fix_code:
|
|
48
|
+
- language: yaml
|
|
49
|
+
label: "Wrong: mismatched step IDs (cache-hit always empty)"
|
|
50
|
+
code: |
|
|
51
|
+
steps:
|
|
52
|
+
- name: Cache node_modules
|
|
53
|
+
uses: actions/cache@v4
|
|
54
|
+
id: npm-cache # ID: npm-cache (hyphen)
|
|
55
|
+
with:
|
|
56
|
+
path: node_modules
|
|
57
|
+
key: npm-${{ hashFiles('package-lock.json') }}
|
|
58
|
+
|
|
59
|
+
- name: Install
|
|
60
|
+
# BUG: references npmCache (camelCase) — evaluates to empty string
|
|
61
|
+
if: steps.npmCache.outputs.cache-hit != 'true'
|
|
62
|
+
run: npm ci
|
|
63
|
+
- language: yaml
|
|
64
|
+
label: "Correct: matching step IDs (cache-hit works as expected)"
|
|
65
|
+
code: |
|
|
66
|
+
steps:
|
|
67
|
+
- name: Cache node_modules
|
|
68
|
+
uses: actions/cache@v4
|
|
69
|
+
id: npm-cache # ID: npm-cache
|
|
70
|
+
with:
|
|
71
|
+
path: node_modules
|
|
72
|
+
key: npm-${{ runner.os }}-${{ hashFiles('**/package-lock.json') }}
|
|
73
|
+
restore-keys: |
|
|
74
|
+
npm-${{ runner.os }}-
|
|
75
|
+
|
|
76
|
+
- name: Debug cache hit
|
|
77
|
+
run: echo "cache-hit=${{ steps.npm-cache.outputs.cache-hit }}"
|
|
78
|
+
|
|
79
|
+
- name: Install
|
|
80
|
+
# CORRECT: matches id exactly — cache-hit is 'true' or 'false'
|
|
81
|
+
if: steps.npm-cache.outputs.cache-hit != 'true'
|
|
82
|
+
run: npm ci
|
|
83
|
+
prevention:
|
|
84
|
+
- "Always use identical lowercase kebab-case strings in id: and steps.<id>.outputs.cache-hit references."
|
|
85
|
+
- "Add a debug echo step to print the cache-hit value when troubleshooting unexpected installs."
|
|
86
|
+
- "Use actions/cache's save-always: true option to always save cache and remove the conditional entirely."
|
|
87
|
+
- "Lint workflows with actionlint — it catches step ID reference mismatches at the expression level."
|
|
88
|
+
- "Consider using the setup-* actions (setup-node, setup-python) which manage caching internally."
|
|
89
|
+
docs:
|
|
90
|
+
- url: "https://github.com/actions/cache#outputs"
|
|
91
|
+
label: "actions/cache: Outputs (cache-hit)"
|
|
92
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#steps-context"
|
|
93
|
+
label: "GitHub Docs: steps context"
|
|
94
|
+
- url: "https://github.com/rhysd/actionlint"
|
|
95
|
+
label: "actionlint: Static checker for GitHub Actions workflow files"
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
id: caching-artifacts-025
|
|
2
|
+
title: "Cache Post-Step Skipped on Job Failure — Dependencies Not Saved"
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- cache
|
|
7
|
+
- post-step
|
|
8
|
+
- job-failure
|
|
9
|
+
- save-always
|
|
10
|
+
- actions-cache
|
|
11
|
+
- dependency-caching
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Post job cleanup|Skipping.*save.*post-if|Cache not saved.*failure"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "post-if.*success\\(\\)|cache.*post.*step.*skip"
|
|
16
|
+
flags: "i"
|
|
17
|
+
error_messages:
|
|
18
|
+
- "Post job cleanup."
|
|
19
|
+
- "Cache not saved due to an early exit"
|
|
20
|
+
- "##[debug]Skipping: no reason to save state"
|
|
21
|
+
root_cause: |
|
|
22
|
+
The actions/cache action hardcodes post-if: success() in its action.yml.
|
|
23
|
+
This means the post-step that actually saves the cache only runs when the
|
|
24
|
+
entire job succeeds. If any step after the cache restore step fails — a
|
|
25
|
+
build failure, a test failure, a linting error — the cache is silently
|
|
26
|
+
discarded and subsequent runs must re-download all dependencies from scratch.
|
|
27
|
+
|
|
28
|
+
This is particularly painful when an expensive dependency install succeeds
|
|
29
|
+
but a later step (npm run build, pytest, etc.) fails. The dependencies are
|
|
30
|
+
already installed and ready, but the cache save post-step is skipped because
|
|
31
|
+
job.status != success().
|
|
32
|
+
|
|
33
|
+
Original issue opened January 2020 (actions/cache#92, 500+ reactions).
|
|
34
|
+
Partially addressed in v3 via actions/cache/save sub-action and in v4 via
|
|
35
|
+
the save-always: true input.
|
|
36
|
+
|
|
37
|
+
The silent nature of the failure means developers often waste hours debugging
|
|
38
|
+
why subsequent runs are slow before realizing the cache was never saved.
|
|
39
|
+
fix: |
|
|
40
|
+
Three approaches depending on the cache action version in use:
|
|
41
|
+
|
|
42
|
+
Option 1 — actions/cache@v4 with save-always: true (simplest):
|
|
43
|
+
Add save-always: true to the cache step. Note that steps between restore
|
|
44
|
+
and save must use continue-on-error: true, or the save step must be placed
|
|
45
|
+
immediately after the install step (before build/test steps that might fail).
|
|
46
|
+
|
|
47
|
+
Option 2 — Explicit save step with if: always() (v3+):
|
|
48
|
+
Split into actions/cache/restore and actions/cache/save steps. Place the
|
|
49
|
+
save step immediately after the install step and mark it with if: always()
|
|
50
|
+
so it runs regardless of subsequent step outcomes.
|
|
51
|
+
|
|
52
|
+
Option 3 — Job restructuring:
|
|
53
|
+
Move dependency installation into a dedicated setup job. The build/test job
|
|
54
|
+
depends on setup succeeding. Cache is always saved because the setup job
|
|
55
|
+
only contains the install step.
|
|
56
|
+
fix_code:
|
|
57
|
+
- language: yaml
|
|
58
|
+
label: "actions/cache@v4 with save-always: true (simplest fix)"
|
|
59
|
+
code: |
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v4
|
|
62
|
+
- name: Restore cached dependencies
|
|
63
|
+
uses: actions/cache@v4
|
|
64
|
+
with:
|
|
65
|
+
path: ~/.npm
|
|
66
|
+
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
|
|
67
|
+
restore-keys: |
|
|
68
|
+
${{ runner.os }}-npm-
|
|
69
|
+
save-always: true # Save cache even if a later step fails
|
|
70
|
+
- name: Install dependencies
|
|
71
|
+
run: npm ci
|
|
72
|
+
- name: Build
|
|
73
|
+
run: npm run build # Cache is saved even if this fails
|
|
74
|
+
- name: Test
|
|
75
|
+
run: npm test # Cache is saved even if this fails
|
|
76
|
+
- language: yaml
|
|
77
|
+
label: "Explicit save step with if: always() (works with v3+)"
|
|
78
|
+
code: |
|
|
79
|
+
steps:
|
|
80
|
+
- uses: actions/checkout@v4
|
|
81
|
+
- name: Restore cached dependencies
|
|
82
|
+
id: cache-restore
|
|
83
|
+
uses: actions/cache/restore@v4
|
|
84
|
+
with:
|
|
85
|
+
path: ~/.npm
|
|
86
|
+
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
|
|
87
|
+
restore-keys: |
|
|
88
|
+
${{ runner.os }}-npm-
|
|
89
|
+
- name: Install dependencies
|
|
90
|
+
run: npm ci
|
|
91
|
+
- name: Save cache (always, even on build/test failure)
|
|
92
|
+
if: always()
|
|
93
|
+
uses: actions/cache/save@v4
|
|
94
|
+
with:
|
|
95
|
+
path: ~/.npm
|
|
96
|
+
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
|
|
97
|
+
- name: Build
|
|
98
|
+
run: npm run build
|
|
99
|
+
- name: Test
|
|
100
|
+
run: npm test
|
|
101
|
+
prevention:
|
|
102
|
+
- "Use save-always: true on actions/cache@v4 for dependency caches that should persist regardless of build outcome."
|
|
103
|
+
- "Split cache restore/save steps and use if: always() on the save step for fine-grained control."
|
|
104
|
+
- "Place the cache save step immediately after the install step — before build/test steps that might fail."
|
|
105
|
+
- "Add a post-install verification step (e.g., node --version, pip list) to confirm install succeeded before building."
|
|
106
|
+
docs:
|
|
107
|
+
- url: "https://github.com/actions/cache/issues/92"
|
|
108
|
+
label: "actions/cache#92: Cache not saved on job failure (500+ reactions, opened 2020)"
|
|
109
|
+
- url: "https://github.com/actions/cache/tree/main/save#always-save-cache"
|
|
110
|
+
label: "actions/cache: always-save-cache documentation"
|
|
111
|
+
- url: "https://stackoverflow.com/questions/60491837/saving-cache-on-job-failure-in-github-actions"
|
|
112
|
+
label: "Stack Overflow: Saving cache on job failure in GitHub Actions (28 votes, 11K views)"
|
|
113
|
+
- url: "https://github.com/actions/cache?tab=readme-ov-file#v4"
|
|
114
|
+
label: "actions/cache v4 changelog: save-always parameter"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
id: concurrency-timing-020
|
|
2
|
+
title: "deploy-pages deployment wedged after workflow cancellation — in progress forever"
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- deploy-pages
|
|
7
|
+
- github-pages
|
|
8
|
+
- workflow-cancellation
|
|
9
|
+
- pages-deployment
|
|
10
|
+
- in-progress
|
|
11
|
+
- stuck
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "Deployment request failed for .+ due to in progress deployment"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "Please cancel .+ first or wait for it to complete"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "Deployment cancellation failed"
|
|
18
|
+
flags: "i"
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Deployment request failed for 75152aed304ba9378fd527c226e9849ca39d5eda due to in progress deployment. Please cancel 4cb1cd27f0f1bceb7df2e4b7cb82785922dc978d first or wait for it to complete."
|
|
21
|
+
- "Error: Error: Request failed with status code 400"
|
|
22
|
+
- "Deployment cancellation failed Error: Request failed with status code 401"
|
|
23
|
+
- "Failed to create deployment (status: 400) with build version X. Responded with: Deployment request failed due to in progress deployment."
|
|
24
|
+
root_cause: |
|
|
25
|
+
When an Actions workflow running actions/deploy-pages is cancelled mid-execution,
|
|
26
|
+
the action's post-step cleanup attempts to call the Pages deployment cancellation API.
|
|
27
|
+
If this cancellation API call fails (401/404, rate limit, or network timeout),
|
|
28
|
+
the Pages deployment remains permanently stuck in the "in_progress" state.
|
|
29
|
+
All subsequent deploy-pages runs fail with a 400 error because the GitHub Pages API
|
|
30
|
+
enforces a single active deployment per repo — new deployments are rejected until
|
|
31
|
+
the stuck one is explicitly cancelled or replaced.
|
|
32
|
+
fix: |
|
|
33
|
+
To prevent the wedge: use a workflow-level concurrency group with cancel-in-progress: false
|
|
34
|
+
on your deploy job so cancellations cannot interrupt mid-deployment.
|
|
35
|
+
|
|
36
|
+
To recover from a stuck deployment: use the GitHub REST API to cancel or delete the
|
|
37
|
+
stuck Pages deployment. Find the stuck deployment ID from the error message, then
|
|
38
|
+
DELETE it via the deployments API.
|
|
39
|
+
fix_code:
|
|
40
|
+
- language: yaml
|
|
41
|
+
label: "Prevent mid-deploy cancellation (recommended)"
|
|
42
|
+
code: |
|
|
43
|
+
jobs:
|
|
44
|
+
deploy:
|
|
45
|
+
concurrency:
|
|
46
|
+
group: pages-deploy
|
|
47
|
+
cancel-in-progress: false # never cancel a running Pages deployment
|
|
48
|
+
runs-on: ubuntu-latest
|
|
49
|
+
steps:
|
|
50
|
+
- uses: actions/deploy-pages@v4
|
|
51
|
+
- language: yaml
|
|
52
|
+
label: "Recover: cancel stuck deployment via GitHub API"
|
|
53
|
+
code: |
|
|
54
|
+
# Find stuck deployment ID from the error message, then:
|
|
55
|
+
# gh api repos/OWNER/REPO/deployments --jq '[.[] | select(.environment == "github-pages")]'
|
|
56
|
+
# gh api repos/OWNER/REPO/deployments/DEPLOYMENT_ID/statuses \
|
|
57
|
+
# -X POST -f state=inactive
|
|
58
|
+
# gh api repos/OWNER/REPO/deployments/DEPLOYMENT_ID -X DELETE
|
|
59
|
+
prevention:
|
|
60
|
+
- "Use concurrency: cancel-in-progress: false on all deploy jobs to prevent mid-deploy cancellation"
|
|
61
|
+
- "Avoid cancelling workflows while deploy-pages step is actively running"
|
|
62
|
+
- "Set up a separate deploy workflow triggered by workflow_run to decouple build from deploy"
|
|
63
|
+
- "Monitor the GitHub Pages deployment environment in your repo Settings for stuck states"
|
|
64
|
+
docs:
|
|
65
|
+
- url: "https://github.com/actions/deploy-pages/issues/22"
|
|
66
|
+
label: "actions/deploy-pages#22: Workflow cancellation left pages deploy wedged"
|
|
67
|
+
- url: "https://docs.github.com/en/rest/deployments/deployments"
|
|
68
|
+
label: "GitHub REST API: Deployments (for manual cleanup)"
|
|
69
|
+
- url: "https://docs.github.com/en/actions/using-jobs/using-concurrency"
|
|
70
|
+
label: "GitHub Actions: Using concurrency"
|