@htekdev/actions-debugger 1.0.124 → 1.0.126
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/caching-artifacts-073.yml +100 -0
- package/errors/caching-artifacts/caching-artifacts-074.yml +117 -0
- package/errors/concurrency-timing/concurrency-timing-059.yml +146 -0
- package/errors/concurrency-timing/concurrency-timing-060.yml +144 -0
- package/errors/known-unsolved/known-unsolved-071.yml +122 -0
- package/errors/known-unsolved/known-unsolved-072.yml +143 -0
- package/errors/known-unsolved/known-unsolved-073.yml +172 -0
- package/errors/permissions-auth/permissions-auth-071.yml +144 -0
- package/errors/permissions-auth/permissions-auth-072.yml +112 -0
- package/errors/permissions-auth/permissions-auth-073.yml +127 -0
- package/errors/permissions-auth/permissions-auth-074.yml +106 -0
- package/errors/permissions-auth/permissions-auth-075.yml +137 -0
- package/errors/runner-environment/runner-environment-227.yml +106 -0
- package/errors/runner-environment/runner-environment-228.yml +117 -0
- package/errors/runner-environment/runner-environment-229.yml +119 -0
- package/errors/runner-environment/runner-environment-230.yml +129 -0
- package/errors/runner-environment/runner-environment-231.yml +90 -0
- package/errors/runner-environment/runner-environment-232.yml +131 -0
- package/errors/runner-environment/runner-environment-233.yml +90 -0
- package/errors/runner-environment/runner-environment-234.yml +114 -0
- package/errors/runner-environment/runner-environment-235.yml +151 -0
- package/errors/silent-failures/silent-failures-112.yml +97 -0
- package/errors/silent-failures/silent-failures-113.yml +110 -0
- package/errors/silent-failures/silent-failures-114.yml +116 -0
- package/errors/silent-failures/silent-failures-115.yml +130 -0
- package/errors/silent-failures/silent-failures-116.yml +117 -0
- package/errors/silent-failures/silent-failures-117.yml +137 -0
- package/errors/silent-failures/silent-failures-118.yml +156 -0
- package/errors/triggers/triggers-072.yml +150 -0
- package/errors/yaml-syntax/yaml-syntax-075.yml +128 -0
- package/errors/yaml-syntax/yaml-syntax-076.yml +107 -0
- package/package.json +1 -1
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
id: caching-artifacts-073
|
|
2
|
+
title: 'actions/upload-artifact and runner blob uploads stall or fail with Bad Request through HTTPS_PROXY — BlobClient missing proxy transport'
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- upload-artifact
|
|
7
|
+
- proxy
|
|
8
|
+
- https-proxy
|
|
9
|
+
- azure-blob
|
|
10
|
+
- self-hosted
|
|
11
|
+
- blob-client
|
|
12
|
+
- bad-request
|
|
13
|
+
- no-proxy
|
|
14
|
+
patterns:
|
|
15
|
+
- regex: 'Beginning upload of artifact content to blob storage'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
- regex: '^Error: Bad Request$'
|
|
18
|
+
flags: 'im'
|
|
19
|
+
- regex: 'CONNECT.*blob\.core\.windows\.net.*200.*ALLOWED'
|
|
20
|
+
flags: 'i'
|
|
21
|
+
- regex: 'latency=\d+\.\d+s.*stall|stall.*blob.*proxy'
|
|
22
|
+
flags: 'i'
|
|
23
|
+
error_messages:
|
|
24
|
+
- 'Beginning upload of artifact content to blob storage'
|
|
25
|
+
- 'Error: Bad Request'
|
|
26
|
+
- 'CONNECT productionresultssa*.blob.core.windows.net:443 → status=200 (ALLOWED)'
|
|
27
|
+
root_cause: |
|
|
28
|
+
When a self-hosted runner is behind an HTTPS forward proxy (HTTPS_PROXY / https_proxy env var),
|
|
29
|
+
artifact uploads (actions/upload-artifact) and runner-internal uploads (step summaries, job logs,
|
|
30
|
+
diagnostics) stall or fail with "Error: Bad Request".
|
|
31
|
+
|
|
32
|
+
Root cause — two layers, same problem:
|
|
33
|
+
|
|
34
|
+
1. @actions/artifact (TypeScript, used by upload-artifact@v4-v7) creates a BlobClient from
|
|
35
|
+
@azure/storage-blob with only the authenticated URL:
|
|
36
|
+
new BlobClient(authenticatedUploadURL)
|
|
37
|
+
No StoragePipelineOptions with proxy configuration are passed. The Azure SDK builds its own
|
|
38
|
+
HTTP pipeline without proxy transport, so even when HTTPS_PROXY is set in the environment, the
|
|
39
|
+
SDK does not correctly route the CONNECT tunnel.
|
|
40
|
+
|
|
41
|
+
2. The runner's .NET ResultsHttpClient (used for step summaries, logs, diagnostics) also creates a
|
|
42
|
+
BlobClient without proxy transport options (runner#4351).
|
|
43
|
+
|
|
44
|
+
Proxy logs show:
|
|
45
|
+
- CONNECT tunnel to *.blob.core.windows.net:443 succeeds (HTTP 200 ALLOWED).
|
|
46
|
+
- Only ~17 KB of the payload is transmitted.
|
|
47
|
+
- The connection stalls for ~75 seconds and returns a "Bad Request" response.
|
|
48
|
+
- curl / Python / .NET HttpClient all upload successfully to the same endpoint in <1 second.
|
|
49
|
+
|
|
50
|
+
The upload step logs show the artifact upload starting but no "Artifact successfully finalized" line,
|
|
51
|
+
followed immediately by "Error: Bad Request". The step fails with no further detail.
|
|
52
|
+
fix: |
|
|
53
|
+
Add the Azure Blob Storage hostname to NO_PROXY to bypass the HTTPS proxy for all blob traffic.
|
|
54
|
+
The upload URL already contains a time-limited SAS token for authentication, so bypassing the
|
|
55
|
+
proxy for this destination does not weaken security in most configurations.
|
|
56
|
+
|
|
57
|
+
Set NO_PROXY (and no_proxy for case-insensitive tools) to include .blob.core.windows.net.
|
|
58
|
+
|
|
59
|
+
This can be set:
|
|
60
|
+
- In the runner's .env file (persists across all jobs on the runner)
|
|
61
|
+
- As job-level env: in the workflow (overrides only for that job)
|
|
62
|
+
fix_code:
|
|
63
|
+
- language: bash
|
|
64
|
+
label: 'Persist NO_PROXY in the runner service .env file'
|
|
65
|
+
code: |
|
|
66
|
+
# Append to the runner .env file (path varies by install location)
|
|
67
|
+
echo 'NO_PROXY=.blob.core.windows.net' >> /home/runner/actions-runner/.env
|
|
68
|
+
echo 'no_proxy=.blob.core.windows.net' >> /home/runner/actions-runner/.env
|
|
69
|
+
# Restart the runner service to pick up the change
|
|
70
|
+
sudo systemctl restart actions.runner.*.service
|
|
71
|
+
|
|
72
|
+
- language: yaml
|
|
73
|
+
label: 'Set NO_PROXY per workflow job to bypass proxy for artifact uploads'
|
|
74
|
+
code: |
|
|
75
|
+
jobs:
|
|
76
|
+
build:
|
|
77
|
+
runs-on: [self-hosted]
|
|
78
|
+
env:
|
|
79
|
+
# Bypass HTTPS proxy for Azure Blob Storage — prevents BlobClient stall
|
|
80
|
+
NO_PROXY: '.blob.core.windows.net'
|
|
81
|
+
no_proxy: '.blob.core.windows.net'
|
|
82
|
+
steps:
|
|
83
|
+
- uses: actions/checkout@v4
|
|
84
|
+
- run: ./build.sh
|
|
85
|
+
- uses: actions/upload-artifact@v6
|
|
86
|
+
with:
|
|
87
|
+
name: build-output
|
|
88
|
+
path: ./dist/
|
|
89
|
+
prevention:
|
|
90
|
+
- 'Always test artifact uploads when deploying self-hosted runners behind an HTTPS forward proxy before putting runners into production.'
|
|
91
|
+
- 'Set NO_PROXY=.blob.core.windows.net in the runner environment before rolling out proxy-configured runners.'
|
|
92
|
+
- 'Watch proxy logs for 75-second CONNECT stalls to *.blob.core.windows.net as the signature for this issue.'
|
|
93
|
+
- 'Runner-internal uploads (step summaries, job logs) are also affected — verify both artifact and job-summary visibility when testing.'
|
|
94
|
+
docs:
|
|
95
|
+
- url: 'https://github.com/actions/toolkit/issues/2377'
|
|
96
|
+
label: 'actions/toolkit#2377 — @actions/artifact BlobClient missing proxy transport (open)'
|
|
97
|
+
- url: 'https://github.com/actions/runner/issues/4351'
|
|
98
|
+
label: 'actions/runner#4351 — runner Azure Blob uploads stall through HTTPS proxy (open)'
|
|
99
|
+
- url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#communication-between-self-hosted-runners-and-github'
|
|
100
|
+
label: 'GitHub Docs — Self-hosted runner network communication requirements'
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
id: caching-artifacts-074
|
|
2
|
+
title: 'actions/cache restore silently treats 429 rate limit as cache miss — no retry, full rebuild forced'
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- cache
|
|
7
|
+
- rate-limit
|
|
8
|
+
- 429
|
|
9
|
+
- restore
|
|
10
|
+
- cache-miss
|
|
11
|
+
- rebuild
|
|
12
|
+
- no-retry
|
|
13
|
+
- matrix
|
|
14
|
+
patterns:
|
|
15
|
+
- regex: "You've hit a rate limit"
|
|
16
|
+
flags: 'i'
|
|
17
|
+
- regex: 'Failed to restore.*Rate limited.*429'
|
|
18
|
+
flags: 'i'
|
|
19
|
+
- regex: 'Failed to GetCacheEntryDownloadURL.*rate limit exceeded'
|
|
20
|
+
flags: 'i'
|
|
21
|
+
- regex: 'Too Many Requests.*rate limit exceeded'
|
|
22
|
+
flags: 'i'
|
|
23
|
+
error_messages:
|
|
24
|
+
- "Warning: You've hit a rate limit, your rate limit will reset in 18 seconds"
|
|
25
|
+
- "Warning: Failed to restore: Failed to GetCacheEntryDownloadURL: Rate limited: Failed request: (429) Too Many Requests: rate limit exceeded"
|
|
26
|
+
- "Cache not found for input keys: ..."
|
|
27
|
+
root_cause: |
|
|
28
|
+
When the GitHub Actions Cache Service rate-limits a restore request with HTTP 429, the cache
|
|
29
|
+
action (v4/v5) emits a warning and immediately falls back to "Cache not found" — treating the
|
|
30
|
+
rate limit as a permanent cache miss rather than a transient error worth retrying.
|
|
31
|
+
|
|
32
|
+
The cache action does not:
|
|
33
|
+
- Retry the restore after the rate-limit reset period (reported in the warning, typically 10-60s).
|
|
34
|
+
- Fail the step with a hard error so the developer is alerted to an infrastructure issue.
|
|
35
|
+
- Implement any exponential backoff on the restore path.
|
|
36
|
+
|
|
37
|
+
Downstream steps see only "Cache not found for input keys: ..." and proceed to rebuild
|
|
38
|
+
dependencies from scratch, as if the cache had never been saved. The real cause — a transient
|
|
39
|
+
429 from the cache service — is easily missed because it appears only as a "Warning:" line
|
|
40
|
+
mid-step, several lines before the final "Cache not found" output.
|
|
41
|
+
|
|
42
|
+
This is most disruptive in large parallel matrix workflows where many concurrent jobs all hit
|
|
43
|
+
the cache service simultaneously. The cache service rate-limits the burst, every affected job
|
|
44
|
+
sees a cache miss, and the entire matrix rebuilds from scratch. CI time can multiply by 5-10x
|
|
45
|
+
with no clear indication in the job summary that the rebuilds were avoidable.
|
|
46
|
+
fix: |
|
|
47
|
+
While the cache action does not yet implement automatic retry on 429, you can reduce the impact:
|
|
48
|
+
|
|
49
|
+
1. Limit max-parallel on matrix strategies to reduce simultaneous cache restore bursts.
|
|
50
|
+
|
|
51
|
+
2. Use restore-keys as a fallback: even if the exact-key restore is rate-limited, a prefix
|
|
52
|
+
match restore-keys request may succeed (different cache entry, different cache service shard).
|
|
53
|
+
|
|
54
|
+
3. Stagger cache-heavy workflows using concurrency groups or needs: dependencies so they don't
|
|
55
|
+
all restore caches at the same second.
|
|
56
|
+
|
|
57
|
+
4. Upgrade to the latest cache action — retry logic for 429 is a tracked improvement in
|
|
58
|
+
actions/cache#1758.
|
|
59
|
+
fix_code:
|
|
60
|
+
- language: yaml
|
|
61
|
+
label: 'Use restore-keys as a fallback to reduce full rebuilds on 429 rate limit'
|
|
62
|
+
code: |
|
|
63
|
+
- uses: actions/cache@v4
|
|
64
|
+
id: cache
|
|
65
|
+
with:
|
|
66
|
+
path: ~/.npm
|
|
67
|
+
key: ${{ runner.os }}-npm-${{ hashFiles('**/package-lock.json') }}
|
|
68
|
+
# Fallback: match any npm cache for this OS — may succeed even when exact key is rate-limited
|
|
69
|
+
restore-keys: |
|
|
70
|
+
${{ runner.os }}-npm-
|
|
71
|
+
|
|
72
|
+
- language: yaml
|
|
73
|
+
label: 'Limit matrix parallelism to reduce simultaneous cache restore bursts'
|
|
74
|
+
code: |
|
|
75
|
+
jobs:
|
|
76
|
+
build:
|
|
77
|
+
strategy:
|
|
78
|
+
matrix:
|
|
79
|
+
target: [linux-x64, linux-arm64, windows-x64, macos-x64, macos-arm64]
|
|
80
|
+
# Limit concurrent cache restores — burst of 2 is much less likely to
|
|
81
|
+
# trigger 429 than a burst of 5 hitting the cache service simultaneously.
|
|
82
|
+
max-parallel: 2
|
|
83
|
+
runs-on: ubuntu-latest
|
|
84
|
+
steps:
|
|
85
|
+
- uses: actions/cache@v4
|
|
86
|
+
with:
|
|
87
|
+
path: ~/.cache
|
|
88
|
+
key: ${{ matrix.target }}-deps-${{ hashFiles('**/Cargo.lock') }}
|
|
89
|
+
restore-keys: |
|
|
90
|
+
${{ matrix.target }}-deps-
|
|
91
|
+
|
|
92
|
+
- language: yaml
|
|
93
|
+
label: 'Stagger cache-restore-heavy jobs using concurrency groups'
|
|
94
|
+
code: |
|
|
95
|
+
jobs:
|
|
96
|
+
restore-cache:
|
|
97
|
+
concurrency:
|
|
98
|
+
group: cache-restore-${{ github.ref }}
|
|
99
|
+
cancel-in-progress: false # queue, not cancel
|
|
100
|
+
runs-on: ubuntu-latest
|
|
101
|
+
steps:
|
|
102
|
+
- uses: actions/cache@v4
|
|
103
|
+
with:
|
|
104
|
+
path: ~/.gradle/caches
|
|
105
|
+
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }}
|
|
106
|
+
prevention:
|
|
107
|
+
- 'Never run more than ~8 simultaneous cache restore operations in the same repository — the cache service rate limit is per repository.'
|
|
108
|
+
- 'Always include restore-keys as a fallback so partial cache hits reduce rebuild cost when the exact key is rate-limited.'
|
|
109
|
+
- 'Watch for the "Warning: You''ve hit a rate limit" log line when investigating unexpectedly slow CI builds.'
|
|
110
|
+
- 'Treat "Cache not found" as potentially a transient 429, not necessarily a first-run or key-miss, especially in high-parallelism workflows.'
|
|
111
|
+
docs:
|
|
112
|
+
- url: 'https://github.com/actions/cache/issues/1758'
|
|
113
|
+
label: 'actions/cache#1758 — Handle rate limit with retry instead of silent cache miss (open)'
|
|
114
|
+
- url: 'https://github.com/actions/cache#inputs'
|
|
115
|
+
label: 'actions/cache — restore-keys documentation'
|
|
116
|
+
- url: 'https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows'
|
|
117
|
+
label: 'GitHub Docs — Caching dependencies to speed up workflows'
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
id: concurrency-timing-059
|
|
2
|
+
title: 'Skipped downstream job satisfies required status check — PR merges despite upstream job failure'
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- needs
|
|
7
|
+
- skipped
|
|
8
|
+
- required-status-check
|
|
9
|
+
- branch-protection
|
|
10
|
+
- dependency
|
|
11
|
+
- silent-merge
|
|
12
|
+
- job-conclusion
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'This job was skipped'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'Result: skipped'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'needs\.[a-zA-Z0-9_-]+\.result.*skipped'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- "This job was skipped."
|
|
22
|
+
- "Skipping this job because a previous job in the chain was skipped or failed."
|
|
23
|
+
root_cause: |
|
|
24
|
+
GitHub Actions marks a downstream job as `skipped` (not `failure`) when an upstream
|
|
25
|
+
`needs:` dependency fails or is cancelled and the downstream job has no explicit `if:`
|
|
26
|
+
condition to handle that state.
|
|
27
|
+
|
|
28
|
+
Since April 2023, GitHub's branch protection rules treat a `skipped` check conclusion
|
|
29
|
+
as **passing** — equivalent to `success` — to support the common "aggregator job"
|
|
30
|
+
pattern. This means:
|
|
31
|
+
|
|
32
|
+
1. `build` job fails.
|
|
33
|
+
2. `test-results` job (which `needs: [build]`) is marked `skipped`.
|
|
34
|
+
3. Branch protection rule requires `test-results` to pass.
|
|
35
|
+
4. GitHub sees conclusion = `skipped` → treats it as satisfied → merge is allowed.
|
|
36
|
+
|
|
37
|
+
The PR can be merged even though the `build` step failed. There is no warning or
|
|
38
|
+
error in the UI — the required check shows a green checkmark (or neutral status)
|
|
39
|
+
rather than a red blocking indicator.
|
|
40
|
+
|
|
41
|
+
This behavior is distinct from:
|
|
42
|
+
- `cancel-in-progress` cancelling a required check (conclusion = `cancelled`, which
|
|
43
|
+
blocks merging — a separate issue).
|
|
44
|
+
- Path-filter causing a workflow to never run (check stays "Expected" / pending).
|
|
45
|
+
- The general skipped-needs cascade (which documents that downstream jobs skip, but
|
|
46
|
+
not the branch protection bypass consequence).
|
|
47
|
+
fix: |
|
|
48
|
+
Add an explicit **catch-all aggregator job** that runs whenever any dependency failed
|
|
49
|
+
or was cancelled, and exits with a non-zero code:
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
ci-gate:
|
|
53
|
+
if: ${{ always() && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) }}
|
|
54
|
+
needs: [build, test, lint]
|
|
55
|
+
runs-on: ubuntu-latest
|
|
56
|
+
steps:
|
|
57
|
+
- name: Fail — one or more required jobs did not succeed
|
|
58
|
+
run: |
|
|
59
|
+
echo "Required jobs: ${{ toJSON(needs.*.result) }}"
|
|
60
|
+
exit 1
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Set `ci-gate` as the sole required status check in branch protection. This aggregator
|
|
64
|
+
job only runs (and fails) when something upstream fails. When all upstream jobs
|
|
65
|
+
succeed, `ci-gate` is `skipped` → satisfies the required check. When any upstream
|
|
66
|
+
fails, `ci-gate` runs and explicitly fails → blocks the merge.
|
|
67
|
+
|
|
68
|
+
Alternatively, use the pattern that always runs the aggregator:
|
|
69
|
+
|
|
70
|
+
```yaml
|
|
71
|
+
ci-gate:
|
|
72
|
+
if: always()
|
|
73
|
+
needs: [build, test, lint]
|
|
74
|
+
runs-on: ubuntu-latest
|
|
75
|
+
steps:
|
|
76
|
+
- name: Check all required jobs passed
|
|
77
|
+
run: |
|
|
78
|
+
results='${{ toJSON(needs.*.result) }}'
|
|
79
|
+
if echo "$results" | grep -qE '"failure"|"cancelled"'; then
|
|
80
|
+
echo "One or more jobs failed: $results"
|
|
81
|
+
exit 1
|
|
82
|
+
fi
|
|
83
|
+
echo "All required jobs passed."
|
|
84
|
+
```
|
|
85
|
+
fix_code:
|
|
86
|
+
- language: yaml
|
|
87
|
+
label: 'Broken — downstream skipped check silently satisfies branch protection'
|
|
88
|
+
code: |
|
|
89
|
+
jobs:
|
|
90
|
+
build:
|
|
91
|
+
runs-on: ubuntu-latest
|
|
92
|
+
steps:
|
|
93
|
+
- run: ./build.sh # Fails
|
|
94
|
+
|
|
95
|
+
test-results:
|
|
96
|
+
needs: [build] # Skipped when build fails
|
|
97
|
+
runs-on: ubuntu-latest
|
|
98
|
+
# ⚠ No if: condition — job is skipped when build fails
|
|
99
|
+
# ⚠ Branch protection requires test-results to "pass"
|
|
100
|
+
# ⚠ skipped = passing in GitHub's branch protection evaluation
|
|
101
|
+
steps:
|
|
102
|
+
- run: ./test.sh
|
|
103
|
+
|
|
104
|
+
- language: yaml
|
|
105
|
+
label: 'Fixed — explicit catch-all aggregator job that fails on upstream failure'
|
|
106
|
+
code: |
|
|
107
|
+
jobs:
|
|
108
|
+
build:
|
|
109
|
+
runs-on: ubuntu-latest
|
|
110
|
+
steps:
|
|
111
|
+
- run: ./build.sh
|
|
112
|
+
|
|
113
|
+
test:
|
|
114
|
+
runs-on: ubuntu-latest
|
|
115
|
+
steps:
|
|
116
|
+
- run: ./test.sh
|
|
117
|
+
|
|
118
|
+
ci-gate:
|
|
119
|
+
if: always()
|
|
120
|
+
needs: [build, test]
|
|
121
|
+
runs-on: ubuntu-latest
|
|
122
|
+
steps:
|
|
123
|
+
- name: All required jobs must succeed
|
|
124
|
+
run: |
|
|
125
|
+
results='${{ toJSON(needs.*.result) }}'
|
|
126
|
+
if echo "$results" | grep -qE '"failure"|"cancelled"'; then
|
|
127
|
+
echo "Pipeline failed: $results"
|
|
128
|
+
exit 1
|
|
129
|
+
fi
|
|
130
|
+
echo "All jobs passed: $results"
|
|
131
|
+
|
|
132
|
+
# In branch protection: require "ci-gate" — not "build" or "test" individually
|
|
133
|
+
|
|
134
|
+
prevention:
|
|
135
|
+
- 'Use a single aggregator job (`ci-gate`) as the required status check instead of individual job names.'
|
|
136
|
+
- 'Always include `if: always()` on the aggregator and explicitly check `needs.*.result` for failure/cancelled.'
|
|
137
|
+
- 'Do NOT rely on `needs:` skipping to propagate failures to branch protection — skipped is treated as passing.'
|
|
138
|
+
- 'After changing which jobs are required status checks, verify by letting a build fail and confirm the PR is blocked.'
|
|
139
|
+
- 'actionlint does not detect this misconfiguration — manual testing is required.'
|
|
140
|
+
docs:
|
|
141
|
+
- url: 'https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/troubleshooting-required-status-checks'
|
|
142
|
+
label: 'GitHub Docs: Troubleshooting required status checks'
|
|
143
|
+
- url: 'https://github.com/actions/runner/issues/2566'
|
|
144
|
+
label: 'actions/runner#2566: Skipped jobs satisfy required checks (many reactions)'
|
|
145
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-conditions-to-control-job-execution'
|
|
146
|
+
label: 'GitHub Docs: Using conditions to control job execution'
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
id: concurrency-timing-060
|
|
2
|
+
title: 'cancel-in-progress: false still silently drops older pending run when third concurrent dispatch arrives — GitHub enforces a 1-active + 1-pending hard limit per concurrency group'
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- concurrency
|
|
7
|
+
- cancel-in-progress
|
|
8
|
+
- pending
|
|
9
|
+
- silent-cancel
|
|
10
|
+
- dispatch
|
|
11
|
+
- queue
|
|
12
|
+
- lost-run
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'This run was cancelled'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'cancel-in-progress:\s*false'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'Run was cancelled'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- "This run was cancelled."
|
|
22
|
+
- "Run was cancelled."
|
|
23
|
+
root_cause: |
|
|
24
|
+
Setting `cancel-in-progress: false` in a concurrency group does NOT mean "queue all
|
|
25
|
+
runs indefinitely." It means "do not cancel the currently *in-progress* run."
|
|
26
|
+
|
|
27
|
+
GitHub Actions enforces a hard internal limit of **1 in-progress + 1 pending** run
|
|
28
|
+
per concurrency group (when using the default or `cancel-in-progress: false`).
|
|
29
|
+
When a third concurrent run arrives:
|
|
30
|
+
|
|
31
|
+
1. Run A is **in-progress** (held in slot 1).
|
|
32
|
+
2. Run B is **pending** (held in slot 2, waiting for Run A to finish).
|
|
33
|
+
3. Run C arrives → GitHub silently cancels Run B to free slot 2, then queues Run C.
|
|
34
|
+
|
|
35
|
+
The net effect: Run B is lost. Run C is now pending and will eventually execute.
|
|
36
|
+
With `cancel-in-progress: false`, *only Run A is protected* from cancellation — not
|
|
37
|
+
Run B. The arriving run always displaces the previously pending one.
|
|
38
|
+
|
|
39
|
+
This surprises developers who read `cancel-in-progress: false` as "let all runs
|
|
40
|
+
queue and execute in order." The actual semantics are: "don't kill the in-flight
|
|
41
|
+
run, but still replace any queued-but-not-yet-running run."
|
|
42
|
+
|
|
43
|
+
There is no log entry showing Run B was cancelled by Run C's arrival; the cancellation
|
|
44
|
+
shows as "This run was cancelled" with no attribution.
|
|
45
|
+
|
|
46
|
+
To allow more than 1 pending run, use `queue: max` (up to 100 pending slots) — but
|
|
47
|
+
note that `queue: max` and `cancel-in-progress: true` cannot be combined.
|
|
48
|
+
fix: |
|
|
49
|
+
Choose the concurrency strategy that matches your desired behavior:
|
|
50
|
+
|
|
51
|
+
**Option 1 — Allow up to 100 queued runs (strict ordering, no drops):**
|
|
52
|
+
Use `queue: max`. Every run is preserved and executes in arrival order. The 101st
|
|
53
|
+
concurrent run silently cancels the oldest pending run (see concurrency-timing-058
|
|
54
|
+
for the queue:max overflow edge case).
|
|
55
|
+
|
|
56
|
+
**Option 2 — Cancel stale runs, always run the latest (most common for CI):**
|
|
57
|
+
Use `cancel-in-progress: true`. Stale pending runs are cancelled; only the most
|
|
58
|
+
recent push/dispatch runs.
|
|
59
|
+
|
|
60
|
+
**Option 3 — Fine-grained concurrency keys to prevent grouping:**
|
|
61
|
+
Include `github.sha` or `github.run_id` in the group key so each run gets its own
|
|
62
|
+
isolated slot and nothing is ever cancelled or queued against another run.
|
|
63
|
+
|
|
64
|
+
**Option 4 — Accept the default behavior:**
|
|
65
|
+
Understand that `cancel-in-progress: false` means 1-active + 1-pending, and design
|
|
66
|
+
your pipeline around this. For example, if you need every commit tested, use
|
|
67
|
+
`cancel-in-progress: true` so you always test the latest commit, or `queue: max`
|
|
68
|
+
if you need every commit tested in order.
|
|
69
|
+
fix_code:
|
|
70
|
+
- language: yaml
|
|
71
|
+
label: 'Broken — cancel-in-progress: false does NOT queue all runs; 3rd run drops 2nd'
|
|
72
|
+
code: |
|
|
73
|
+
on: push
|
|
74
|
+
|
|
75
|
+
concurrency:
|
|
76
|
+
group: deploy-${{ github.ref }}
|
|
77
|
+
cancel-in-progress: false # ❌ Only protects the in-progress run (slot 1)
|
|
78
|
+
# ❌ Slot 2 (pending) is still replaced when a 3rd run arrives
|
|
79
|
+
|
|
80
|
+
jobs:
|
|
81
|
+
deploy:
|
|
82
|
+
runs-on: ubuntu-latest
|
|
83
|
+
steps:
|
|
84
|
+
- run: ./deploy.sh
|
|
85
|
+
|
|
86
|
+
- language: yaml
|
|
87
|
+
label: 'Fixed — use queue: max to preserve all pending runs (up to 100)'
|
|
88
|
+
code: |
|
|
89
|
+
on: push
|
|
90
|
+
|
|
91
|
+
concurrency:
|
|
92
|
+
group: deploy-${{ github.ref }}
|
|
93
|
+
queue: max # ✅ Up to 100 pending runs preserved, executed in order
|
|
94
|
+
# ✅ No run is silently dropped until the 101st arrives
|
|
95
|
+
|
|
96
|
+
jobs:
|
|
97
|
+
deploy:
|
|
98
|
+
runs-on: ubuntu-latest
|
|
99
|
+
steps:
|
|
100
|
+
- run: ./deploy.sh
|
|
101
|
+
|
|
102
|
+
- language: yaml
|
|
103
|
+
label: 'Alternative — cancel stale runs, keep only the latest (typical CI pattern)'
|
|
104
|
+
code: |
|
|
105
|
+
on: push
|
|
106
|
+
|
|
107
|
+
concurrency:
|
|
108
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
|
109
|
+
cancel-in-progress: true # ✅ Latest commit always runs; stale runs cancelled
|
|
110
|
+
|
|
111
|
+
jobs:
|
|
112
|
+
ci:
|
|
113
|
+
runs-on: ubuntu-latest
|
|
114
|
+
steps:
|
|
115
|
+
- run: ./test.sh
|
|
116
|
+
|
|
117
|
+
- language: yaml
|
|
118
|
+
label: 'Alternative — unique key per run (no cancellation, no queueing)'
|
|
119
|
+
code: |
|
|
120
|
+
on: push
|
|
121
|
+
|
|
122
|
+
concurrency:
|
|
123
|
+
group: ${{ github.workflow }}-${{ github.run_id }} # ✅ Each run is isolated
|
|
124
|
+
cancel-in-progress: false
|
|
125
|
+
|
|
126
|
+
jobs:
|
|
127
|
+
ci:
|
|
128
|
+
runs-on: ubuntu-latest
|
|
129
|
+
steps:
|
|
130
|
+
- run: ./test.sh
|
|
131
|
+
|
|
132
|
+
prevention:
|
|
133
|
+
- 'Read `cancel-in-progress: false` as "protect the running job, not all pending jobs."'
|
|
134
|
+
- 'Use `queue: max` when you need every dispatched run to eventually execute.'
|
|
135
|
+
- '`queue: max` and `cancel-in-progress: true` cannot be combined — validation error.'
|
|
136
|
+
- 'Include `github.sha` or `github.run_id` in the group key to give each run its own isolated slot.'
|
|
137
|
+
- 'Monitor the Actions tab for unexplained cancellations after rapid pushes to confirm you are hitting the 1-pending limit.'
|
|
138
|
+
docs:
|
|
139
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/control-the-concurrency-of-workflows-and-jobs'
|
|
140
|
+
label: 'GitHub Docs: Control the concurrency of workflows and jobs'
|
|
141
|
+
- url: 'https://github.com/orgs/community/discussions/5435'
|
|
142
|
+
label: 'GitHub Community #5435: cancel-in-progress: false still cancels pending runs'
|
|
143
|
+
- url: 'https://docs.github.com/en/actions/reference/workflow-syntax-for-github-actions#concurrency'
|
|
144
|
+
label: 'GitHub Docs: Workflow syntax — concurrency'
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
id: known-unsolved-071
|
|
2
|
+
title: 'Actions cache is repository-scoped — cannot be shared across repositories in the same organization'
|
|
3
|
+
category: known-unsolved
|
|
4
|
+
severity: limitation
|
|
5
|
+
tags:
|
|
6
|
+
- cache
|
|
7
|
+
- cross-repo
|
|
8
|
+
- organization
|
|
9
|
+
- scope
|
|
10
|
+
- monorepo
|
|
11
|
+
- limitation
|
|
12
|
+
- cache-isolation
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Cache not found for input keys.*(?:cross-repo|other.repo|shared.cache)'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'No cache found.*(?:cross-repo|other.repo|shared)'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
error_messages:
|
|
19
|
+
- 'Cache not found for input keys: ...'
|
|
20
|
+
- 'No cache found'
|
|
21
|
+
root_cause: |
|
|
22
|
+
The GitHub Actions cache service scopes all cache entries to the repository where they
|
|
23
|
+
were created. There is no mechanism to share a cache entry between two different
|
|
24
|
+
repositories, even within the same organization.
|
|
25
|
+
|
|
26
|
+
Cache access rules per the GitHub documentation:
|
|
27
|
+
- A workflow can restore caches created in the current branch, the default branch (main),
|
|
28
|
+
or (for pull requests) the base branch including base branches of forks.
|
|
29
|
+
- "Cross-branch" access is supported within the same repository.
|
|
30
|
+
- There is NO "cross-repository" access — a cache created in `org/repo-a` is
|
|
31
|
+
completely invisible to workflows running in `org/repo-b`.
|
|
32
|
+
|
|
33
|
+
This affects teams who:
|
|
34
|
+
- Manage related repositories that share build toolchains (e.g., a shared Go module cache
|
|
35
|
+
across a dozen microservices).
|
|
36
|
+
- Have split monorepos where a common library is built separately and cached.
|
|
37
|
+
- Want to cache a slow Docker layer in one repo and reuse it in a deployment repo.
|
|
38
|
+
|
|
39
|
+
The underlying reason is cache isolation as a security boundary: allowing cross-repo
|
|
40
|
+
cache access could leak build artifacts or credentials stored in the cache between
|
|
41
|
+
unrelated repositories.
|
|
42
|
+
|
|
43
|
+
There is no current GitHub Actions native solution for cross-repo cache sharing. The
|
|
44
|
+
GitHub roadmap has not publicly committed to this feature as of 2026.
|
|
45
|
+
fix: |
|
|
46
|
+
There is no built-in fix. Workarounds depend on your use case:
|
|
47
|
+
|
|
48
|
+
1. Publish shared artifacts to a package registry (GitHub Packages, npm, PyPI, Docker Hub).
|
|
49
|
+
Instead of caching, version-tag the shared artifact and consume it as a dependency.
|
|
50
|
+
This is the recommended approach for shared libraries and Docker base images.
|
|
51
|
+
|
|
52
|
+
2. Use a self-hosted runner with a shared filesystem. The runner's local disk or a
|
|
53
|
+
network share can act as a cross-repo cache. Use the `path:` input of actions/cache
|
|
54
|
+
pointing to a shared mount. Cache hits and misses are managed manually via key files.
|
|
55
|
+
|
|
56
|
+
3. Use a third-party caching backend (S3, GCS, Azure Blob, Artifactory) for build
|
|
57
|
+
artifacts that must be shared. Upload/download via CLI in workflow steps.
|
|
58
|
+
|
|
59
|
+
4. Consolidate the related repositories into a single repository (monorepo).
|
|
60
|
+
All workflows within the same repo can share cache entries.
|
|
61
|
+
fix_code:
|
|
62
|
+
- language: yaml
|
|
63
|
+
label: 'Publish shared Docker base image to GHCR instead of caching across repos'
|
|
64
|
+
code: |
|
|
65
|
+
# repo-a: builds and publishes the shared base image
|
|
66
|
+
jobs:
|
|
67
|
+
publish-base:
|
|
68
|
+
runs-on: ubuntu-latest
|
|
69
|
+
permissions:
|
|
70
|
+
packages: write
|
|
71
|
+
steps:
|
|
72
|
+
- uses: actions/checkout@v4
|
|
73
|
+
- uses: docker/login-action@v3
|
|
74
|
+
with:
|
|
75
|
+
registry: ghcr.io
|
|
76
|
+
username: ${{ github.actor }}
|
|
77
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
78
|
+
- uses: docker/build-push-action@v6
|
|
79
|
+
with:
|
|
80
|
+
context: ./base-image
|
|
81
|
+
push: true
|
|
82
|
+
tags: ghcr.io/${{ github.repository_owner }}/shared-base:latest
|
|
83
|
+
|
|
84
|
+
# repo-b: pulls the published image instead of relying on cache
|
|
85
|
+
jobs:
|
|
86
|
+
build:
|
|
87
|
+
runs-on: ubuntu-latest
|
|
88
|
+
container:
|
|
89
|
+
image: ghcr.io/myorg/shared-base:latest
|
|
90
|
+
credentials:
|
|
91
|
+
username: ${{ github.actor }}
|
|
92
|
+
password: ${{ secrets.GITHUB_TOKEN }}
|
|
93
|
+
steps:
|
|
94
|
+
- uses: actions/checkout@v4
|
|
95
|
+
- run: ./build.sh
|
|
96
|
+
|
|
97
|
+
- language: yaml
|
|
98
|
+
label: 'Self-hosted runner shared-path cache as a cross-repo workaround'
|
|
99
|
+
code: |
|
|
100
|
+
# Both repo-a and repo-b workflows — same self-hosted runner, shared disk at /opt/shared-cache
|
|
101
|
+
jobs:
|
|
102
|
+
build:
|
|
103
|
+
runs-on: [self-hosted, linux, shared-cache]
|
|
104
|
+
steps:
|
|
105
|
+
- uses: actions/checkout@v4
|
|
106
|
+
- uses: actions/cache@v4
|
|
107
|
+
with:
|
|
108
|
+
path: /opt/shared-cache/gradle
|
|
109
|
+
# Key is independent of the repo — deliberately shared
|
|
110
|
+
key: gradle-${{ hashFiles('**/*.gradle*') }}
|
|
111
|
+
# NOTE: This bypasses GitHub's repo-scope isolation.
|
|
112
|
+
# Ensure the self-hosted runner pool is trusted and isolated.
|
|
113
|
+
- run: ./gradlew build
|
|
114
|
+
prevention:
|
|
115
|
+
- 'Design shared build artifacts as versioned dependencies (packages) from the start — avoids cross-repo cache needs entirely.'
|
|
116
|
+
- 'For Docker base images shared across repos, publish to a registry and reference by digest, not by mutable tags.'
|
|
117
|
+
- 'When adopting self-hosted runners for cross-repo cache sharing, audit what secrets and artifacts are accessible to all jobs that share the runner to avoid cross-contamination.'
|
|
118
|
+
docs:
|
|
119
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#restrictions-for-accessing-a-cache'
|
|
120
|
+
label: 'GitHub Docs — Cache access restrictions and scoping'
|
|
121
|
+
- url: 'https://github.com/actions/cache/blob/main/tips-and-workarounds.md'
|
|
122
|
+
label: 'actions/cache — Tips and workarounds (cross-branch, cross-OS, but not cross-repo)'
|