@htekdev/actions-debugger 1.0.39 → 1.0.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/setup-node-npm-cache-monorepo-lockfile-not-found.yml +118 -0
- package/errors/concurrency-timing/cancel-in-progress-queued-run-status-never-posts.yml +111 -0
- package/errors/concurrency-timing/step-timeout-not-supported-job-holds-runner.yml +101 -0
- package/errors/concurrency-timing/workflow-dispatch-push-shared-concurrency-silent-cancel.yml +104 -0
- package/errors/permissions-auth/github-actor-vs-triggering-actor-rerun-bypass.yml +111 -0
- package/errors/silent-failures/upload-artifact-path-ignores-working-directory.yml +103 -0
- package/errors/triggers/required-status-check-paths-filter-pr-stuck.yml +119 -0
- package/errors/yaml-syntax/runs-on-label-and-matching-infinite-queue.yml +97 -0
- package/package.json +1 -1
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
id: caching-artifacts-032
|
|
2
|
+
title: 'setup-node cache: npm silently skips caching in monorepos — lockfile not at workspace root'
|
|
3
|
+
category: caching-artifacts
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- setup-node
|
|
7
|
+
- npm
|
|
8
|
+
- cache
|
|
9
|
+
- monorepo
|
|
10
|
+
- lockfile
|
|
11
|
+
- cache-dependency-path
|
|
12
|
+
- silent-failure
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: "Warning: No file found for: package-lock\\.json"
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'No file found for.*No cache will be (saved|restored)'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'cache-dependency-path.*not found|No lockfile found'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Warning: No file found for: package-lock.json"
|
|
22
|
+
- "Warning: No file found for: yarn.lock"
|
|
23
|
+
- "Warning: No file found for: pnpm-lock.yaml"
|
|
24
|
+
- "No file found for: package-lock.json. No cache will be saved."
|
|
25
|
+
root_cause: |
|
|
26
|
+
actions/setup-node with cache: 'npm' (or 'yarn' / 'pnpm') searches for the
|
|
27
|
+
package manager lockfile at the workspace root ($GITHUB_WORKSPACE) by default.
|
|
28
|
+
|
|
29
|
+
In monorepos where the lockfile lives in a subdirectory (apps/frontend/,
|
|
30
|
+
packages/api/, etc.), or when the workflow uses working-directory: to change
|
|
31
|
+
the build context, setup-node logs a warning and continues WITHOUT configuring
|
|
32
|
+
a cache. The step exits 0, making this a silent failure.
|
|
33
|
+
|
|
34
|
+
The result: npm ci (or yarn install / pnpm install) downloads all dependencies
|
|
35
|
+
from the network on every workflow run, as though no cache were configured.
|
|
36
|
+
Build times are identical to runs with no cache setting at all.
|
|
37
|
+
|
|
38
|
+
The warning message ("No file found for: package-lock.json") is easily missed
|
|
39
|
+
in long step logs and does not fail the step, so developers often go weeks
|
|
40
|
+
without noticing the cache was never active.
|
|
41
|
+
|
|
42
|
+
The same behavior applies when:
|
|
43
|
+
- yarn.lock is not at workspace root
|
|
44
|
+
- pnpm-lock.yaml is not at workspace root
|
|
45
|
+
- Multiple lockfiles exist across packages (only the first match is used
|
|
46
|
+
unless cache-dependency-path is explicitly set to a glob)
|
|
47
|
+
fix: |
|
|
48
|
+
Use the cache-dependency-path input to specify the path to the lockfile
|
|
49
|
+
relative to the workspace root. Supports glob patterns for monorepos.
|
|
50
|
+
|
|
51
|
+
Single package in subdirectory:
|
|
52
|
+
cache-dependency-path: 'frontend/package-lock.json'
|
|
53
|
+
|
|
54
|
+
Multiple lockfiles across a monorepo:
|
|
55
|
+
cache-dependency-path: '**/package-lock.json'
|
|
56
|
+
|
|
57
|
+
Using a glob creates a combined cache key from all matched lockfiles. Any
|
|
58
|
+
change to any package lockfile invalidates the shared cache — this is
|
|
59
|
+
correct behavior for a monorepo where cross-package installs are common.
|
|
60
|
+
fix_code:
|
|
61
|
+
- language: yaml
|
|
62
|
+
label: 'Single package in subdirectory — point cache-dependency-path at lockfile'
|
|
63
|
+
code: |
|
|
64
|
+
jobs:
|
|
65
|
+
build:
|
|
66
|
+
runs-on: ubuntu-latest
|
|
67
|
+
steps:
|
|
68
|
+
- uses: actions/checkout@v4
|
|
69
|
+
|
|
70
|
+
- uses: actions/setup-node@v4
|
|
71
|
+
with:
|
|
72
|
+
node-version: '22'
|
|
73
|
+
cache: 'npm'
|
|
74
|
+
# WRONG (omitted): setup-node looks at $GITHUB_WORKSPACE/package-lock.json
|
|
75
|
+
# and silently skips caching when not found there
|
|
76
|
+
|
|
77
|
+
# CORRECT: path relative to $GITHUB_WORKSPACE
|
|
78
|
+
cache-dependency-path: 'frontend/package-lock.json'
|
|
79
|
+
|
|
80
|
+
- name: Install dependencies
|
|
81
|
+
working-directory: frontend
|
|
82
|
+
run: npm ci
|
|
83
|
+
- language: yaml
|
|
84
|
+
label: 'Monorepo — cache all packages using glob pattern'
|
|
85
|
+
code: |
|
|
86
|
+
jobs:
|
|
87
|
+
build:
|
|
88
|
+
runs-on: ubuntu-latest
|
|
89
|
+
steps:
|
|
90
|
+
- uses: actions/checkout@v4
|
|
91
|
+
|
|
92
|
+
- uses: actions/setup-node@v4
|
|
93
|
+
with:
|
|
94
|
+
node-version: '22'
|
|
95
|
+
cache: 'npm'
|
|
96
|
+
# Glob matches all package-lock.json files anywhere in the repo
|
|
97
|
+
# Combined hash from all matched lockfiles forms the cache key
|
|
98
|
+
cache-dependency-path: '**/package-lock.json'
|
|
99
|
+
|
|
100
|
+
- name: Install root dependencies
|
|
101
|
+
run: npm ci
|
|
102
|
+
|
|
103
|
+
- name: Install frontend dependencies
|
|
104
|
+
working-directory: packages/frontend
|
|
105
|
+
run: npm ci
|
|
106
|
+
prevention:
|
|
107
|
+
- 'Always set cache-dependency-path when the lockfile is not in the repository root'
|
|
108
|
+
- 'Use **/package-lock.json glob in monorepos to cover all packages with a single cache configuration'
|
|
109
|
+
- 'Verify caching is active by checking setup-node logs for "Cache restored successfully" or "Cache saved"'
|
|
110
|
+
- 'Check for "No file found for: package-lock.json" warnings as early signal that caching is silently disabled'
|
|
111
|
+
- 'When using working-directory: on install steps, ensure cache-dependency-path is also adjusted to match'
|
|
112
|
+
docs:
|
|
113
|
+
- url: 'https://github.com/actions/setup-node#caching-global-packages-data'
|
|
114
|
+
label: 'actions/setup-node: Caching global packages data — cache-dependency-path input'
|
|
115
|
+
- url: 'https://github.com/actions/setup-node/issues/530'
|
|
116
|
+
label: 'actions/setup-node#530: cache: npm silently skips in monorepos without cache-dependency-path'
|
|
117
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows'
|
|
118
|
+
label: 'GitHub Docs: Caching dependencies to speed up workflows — lockfile path configuration'
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
id: concurrency-timing-027
|
|
2
|
+
title: 'Queued run cancelled by cancel-in-progress before any job starts — required status check never posts, PR permanently blocked'
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- concurrency
|
|
7
|
+
- cancel-in-progress
|
|
8
|
+
- required-status-check
|
|
9
|
+
- branch-protection
|
|
10
|
+
- pr-blocked
|
|
11
|
+
- status-check
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'This run was cancelled because another run in the same concurrency group'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'Waiting for.*status.*reported|Expected.*Waiting'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
error_messages:
|
|
18
|
+
- "This run was cancelled because another run in the same concurrency group is in progress."
|
|
19
|
+
- "Waiting for status to be reported"
|
|
20
|
+
root_cause: |
|
|
21
|
+
When cancel-in-progress: true cancels a workflow run that was queued but had
|
|
22
|
+
not yet started any job, GitHub does NOT post a status (pending, cancelled, or
|
|
23
|
+
failure) to the commit SHA. The run silently disappears from the run list.
|
|
24
|
+
|
|
25
|
+
Branch protection rules that require a specific status check (e.g., "CI / test")
|
|
26
|
+
only observe statuses that were posted. A run cancelled before its first job
|
|
27
|
+
starts never posts any status — not even "pending".
|
|
28
|
+
|
|
29
|
+
Impact on PRs with high push frequency:
|
|
30
|
+
1. Developer opens PR, pushes commit A — run starts, posts "pending"
|
|
31
|
+
2. Developer pushes fix commit B — run for A is cancelled, run for B is queued
|
|
32
|
+
3. Developer pushes commit C before B's run starts — B's run cancelled (never
|
|
33
|
+
started), run for C queued
|
|
34
|
+
4. Run for C finally starts and posts statuses
|
|
35
|
+
5. But if C is also cancelled before starting, the commit has NO status
|
|
36
|
+
6. Branch protection shows the required check as "Expected" forever
|
|
37
|
+
7. PR cannot be merged — the Merge button stays disabled indefinitely
|
|
38
|
+
|
|
39
|
+
This condition is self-healing if a new commit is pushed (starting a fresh
|
|
40
|
+
run that won't be cancelled), but in rapid-push scenarios the window persists
|
|
41
|
+
for many minutes and developers mistakenly believe the CI is broken.
|
|
42
|
+
fix: |
|
|
43
|
+
Option 1 — Queue instead of cancel (safest):
|
|
44
|
+
concurrency:
|
|
45
|
+
group: "${{ github.workflow }}-${{ github.ref }}"
|
|
46
|
+
cancel-in-progress: false
|
|
47
|
+
|
|
48
|
+
Runs queue behind one another; every commit eventually gets a status posted.
|
|
49
|
+
|
|
50
|
+
Option 2 — Status anchor job:
|
|
51
|
+
Add a minimal first job that completes instantly. Its "queued" + "in_progress"
|
|
52
|
+
status is posted to the commit SHA immediately, ensuring GitHub registers the
|
|
53
|
+
run before any cancellation can remove the status.
|
|
54
|
+
|
|
55
|
+
Option 3 — GitHub Actions recommended pattern:
|
|
56
|
+
Use cancel-in-progress: true for the expensive test jobs only, and have a
|
|
57
|
+
separate fast-posting job that always runs (not subject to concurrency group).
|
|
58
|
+
fix_code:
|
|
59
|
+
- language: yaml
|
|
60
|
+
label: 'Queue runs instead of cancelling — every commit gets a status'
|
|
61
|
+
code: |
|
|
62
|
+
on:
|
|
63
|
+
pull_request:
|
|
64
|
+
|
|
65
|
+
concurrency:
|
|
66
|
+
# Queue instead of cancel — latest run waits, but always posts status
|
|
67
|
+
group: '${{ github.workflow }}-${{ github.ref }}'
|
|
68
|
+
cancel-in-progress: false
|
|
69
|
+
|
|
70
|
+
jobs:
|
|
71
|
+
test:
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
- run: npm test
|
|
76
|
+
- language: yaml
|
|
77
|
+
label: 'Status anchor job — posts status immediately before cancel window'
|
|
78
|
+
code: |
|
|
79
|
+
on:
|
|
80
|
+
pull_request:
|
|
81
|
+
|
|
82
|
+
concurrency:
|
|
83
|
+
group: '${{ github.workflow }}-${{ github.ref }}'
|
|
84
|
+
cancel-in-progress: true # OK — anchor ensures status is posted first
|
|
85
|
+
|
|
86
|
+
jobs:
|
|
87
|
+
# Minimal job: completes in seconds, ensuring a status is recorded on the SHA
|
|
88
|
+
anchor:
|
|
89
|
+
runs-on: ubuntu-latest
|
|
90
|
+
steps:
|
|
91
|
+
- run: echo "CI registered for ${{ github.sha }}"
|
|
92
|
+
|
|
93
|
+
# Expensive test job that is safe to cancel
|
|
94
|
+
test:
|
|
95
|
+
needs: anchor
|
|
96
|
+
runs-on: ubuntu-latest
|
|
97
|
+
steps:
|
|
98
|
+
- uses: actions/checkout@v4
|
|
99
|
+
- run: npm test
|
|
100
|
+
prevention:
|
|
101
|
+
- 'Avoid cancel-in-progress: true on workflows whose jobs are required status checks for branch protection'
|
|
102
|
+
- 'Use cancel-in-progress: false with queuing semantics when PR mergeability must be preserved on every commit'
|
|
103
|
+
- 'Add a lightweight anchor job as the first required job to ensure a status posts before any cancel window closes'
|
|
104
|
+
- 'Monitor PRs stuck with Expected required checks — check whether recent commits had all their runs cancelled before starting'
|
|
105
|
+
docs:
|
|
106
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/using-concurrency'
|
|
107
|
+
label: 'GitHub Actions: Using concurrency — cancel-in-progress behavior'
|
|
108
|
+
- url: 'https://github.com/orgs/community/discussions/21280'
|
|
109
|
+
label: 'GitHub Community: Required status check never posts when run cancelled before start (120+ reactions)'
|
|
110
|
+
- url: 'https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-protected-branches/about-protected-branches#require-status-checks-before-merging'
|
|
111
|
+
label: 'GitHub Docs: Required status checks — how commit statuses are evaluated for branch protection'
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
id: concurrency-timing-025
|
|
2
|
+
title: 'No step-level timeout — a hung step holds the runner slot for up to 6 hours'
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: limitation
|
|
5
|
+
tags:
|
|
6
|
+
- timeout
|
|
7
|
+
- hung-step
|
|
8
|
+
- runner-slot
|
|
9
|
+
- step
|
|
10
|
+
- limitation
|
|
11
|
+
- self-hosted-runner
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'The runner has received a shutdown signal'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'Error: The process.*timed out after \d+ minutes'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
- regex: 'Canceling since the workflow was cancelled'
|
|
18
|
+
flags: 'i'
|
|
19
|
+
error_messages:
|
|
20
|
+
- "The runner has received a shutdown signal. This can happen when the runner service is stopped, or a manually started runner is canceled."
|
|
21
|
+
- "Error: The process '/usr/bin/bash' failed with exit code 124"
|
|
22
|
+
- "Canceling since the workflow was cancelled."
|
|
23
|
+
root_cause: |
|
|
24
|
+
GitHub Actions supports timeout-minutes at the job level only. There is no
|
|
25
|
+
per-step timeout. A single run: step that hangs (network call blocked, test
|
|
26
|
+
suite deadlocked, subprocess waiting on stdin) holds the entire job until
|
|
27
|
+
the job-level timeout fires.
|
|
28
|
+
|
|
29
|
+
The default job timeout is 6 hours (360 minutes) for GitHub-hosted runners
|
|
30
|
+
and 35 days (unlimited in practice) for self-hosted runners. A single hung
|
|
31
|
+
step therefore silently consumes 6 hours of runner minutes and one complete
|
|
32
|
+
runner slot before GitHub kills the job.
|
|
33
|
+
|
|
34
|
+
Runner slot starvation is the secondary effect: while the hung job occupies a
|
|
35
|
+
runner, queued jobs wait. On self-hosted runners with limited capacity one
|
|
36
|
+
hung step can block an entire team's CI queue indefinitely.
|
|
37
|
+
|
|
38
|
+
This is a documented platform limitation tracked in actions/runner#1120
|
|
39
|
+
(220+ reactions, open since 2020) with no scheduled fix date.
|
|
40
|
+
fix: |
|
|
41
|
+
Apply one of these workarounds depending on operating system:
|
|
42
|
+
|
|
43
|
+
Linux/macOS: Wrap the command with the system timeout utility (seconds):
|
|
44
|
+
run: timeout 300 ./integration-test.sh
|
|
45
|
+
|
|
46
|
+
Cross-platform: Use curl/Invoke-RestMethod built-in timeout options for
|
|
47
|
+
network calls, and test-runner native timeouts for test suites.
|
|
48
|
+
|
|
49
|
+
Always set timeout-minutes explicitly on every job to establish a hard upper
|
|
50
|
+
bound regardless of which step hangs.
|
|
51
|
+
fix_code:
|
|
52
|
+
- language: yaml
|
|
53
|
+
label: 'Set explicit job timeout and use OS timeout for individual steps'
|
|
54
|
+
code: |
|
|
55
|
+
jobs:
|
|
56
|
+
build:
|
|
57
|
+
runs-on: ubuntu-latest
|
|
58
|
+
timeout-minutes: 30 # explicit ceiling — never rely on 6h default
|
|
59
|
+
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v4
|
|
62
|
+
|
|
63
|
+
# Linux/macOS: system timeout (in seconds)
|
|
64
|
+
- name: Run integration tests
|
|
65
|
+
run: timeout 180 ./scripts/integration-test.sh
|
|
66
|
+
|
|
67
|
+
# Network call with built-in timeout
|
|
68
|
+
- name: Fetch external resource
|
|
69
|
+
run: |
|
|
70
|
+
curl --max-time 60 --retry 3 https://api.example.com/data -o data.json
|
|
71
|
+
- language: yaml
|
|
72
|
+
label: 'Self-hosted runner — conservative timeout prevents slot starvation'
|
|
73
|
+
code: |
|
|
74
|
+
jobs:
|
|
75
|
+
deploy:
|
|
76
|
+
runs-on: [self-hosted, production]
|
|
77
|
+
timeout-minutes: 45 # critical on self-hosted — hung jobs block all runners
|
|
78
|
+
|
|
79
|
+
steps:
|
|
80
|
+
- name: Deploy with timeout guard
|
|
81
|
+
shell: pwsh
|
|
82
|
+
run: |
|
|
83
|
+
$job = Start-Job { ./deploy.ps1 }
|
|
84
|
+
if (-not (Wait-Job $job -Timeout 120)) {
|
|
85
|
+
Stop-Job $job
|
|
86
|
+
throw "Deploy script timed out after 120s"
|
|
87
|
+
}
|
|
88
|
+
Receive-Job $job
|
|
89
|
+
prevention:
|
|
90
|
+
- 'Always set explicit timeout-minutes on every job — never rely on the 6-hour GitHub-hosted default'
|
|
91
|
+
- 'Use OS-level timeout utilities (timeout on Linux/macOS) for individual network calls and scripts'
|
|
92
|
+
- 'Configure test-runner-native timeouts for test suites — they are more granular than job timeouts'
|
|
93
|
+
- 'Monitor runner queue depth on self-hosted runners — sudden growth often signals a hung step holding a slot'
|
|
94
|
+
- 'Set timeout-minutes: 5 on jobs you know should complete quickly to catch regressions early'
|
|
95
|
+
docs:
|
|
96
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idtimeout-minutes'
|
|
97
|
+
label: 'GitHub Actions: jobs.<id>.timeout-minutes — job-level timeout (no step-level equivalent)'
|
|
98
|
+
- url: 'https://github.com/actions/runner/issues/1120'
|
|
99
|
+
label: 'actions/runner#1120: Feature request — step-level timeout (220+ reactions, open since 2020)'
|
|
100
|
+
- url: 'https://docs.github.com/en/actions/administering-github-actions/usage-limits-billing-and-administration#usage-limits'
|
|
101
|
+
label: 'GitHub Actions usage limits — default job timeout values per runner type'
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
id: concurrency-timing-026
|
|
2
|
+
title: 'workflow_dispatch run silently cancelled when push to same branch shares the concurrency group'
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- concurrency
|
|
7
|
+
- workflow-dispatch
|
|
8
|
+
- cancel-in-progress
|
|
9
|
+
- push
|
|
10
|
+
- manual-trigger
|
|
11
|
+
- silent-cancel
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'This run was cancelled because another run in the same concurrency group'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'Canceling run due to.*concurrency group'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
- regex: 'This run has been canceled'
|
|
18
|
+
flags: 'i'
|
|
19
|
+
error_messages:
|
|
20
|
+
- "This run was cancelled because another run in the same concurrency group is in progress."
|
|
21
|
+
- "Canceling run due to a newer request for the same concurrency group."
|
|
22
|
+
- "This run has been canceled."
|
|
23
|
+
root_cause: |
|
|
24
|
+
A common concurrency pattern groups runs by workflow name and ref:
|
|
25
|
+
|
|
26
|
+
concurrency:
|
|
27
|
+
group: "${{ github.workflow }}-${{ github.ref }}"
|
|
28
|
+
cancel-in-progress: true
|
|
29
|
+
|
|
30
|
+
workflow_dispatch and push events on the same branch share identical
|
|
31
|
+
github.workflow and github.ref values, so they are placed in the same
|
|
32
|
+
concurrency slot.
|
|
33
|
+
|
|
34
|
+
A developer manually triggers a workflow_dispatch run (e.g., to deploy to
|
|
35
|
+
staging, run a migration, or kick off a release). While it is running, any
|
|
36
|
+
push to that branch — including a small documentation fix or revert — creates
|
|
37
|
+
a new run in the same concurrency slot and immediately cancels the in-progress
|
|
38
|
+
manual dispatch with no notification.
|
|
39
|
+
|
|
40
|
+
The developer discovers this only when checking on the deployment minutes
|
|
41
|
+
later to find it was cancelled mid-run. The automatic push run that replaced
|
|
42
|
+
it may be completely irrelevant to the manual operation.
|
|
43
|
+
fix: |
|
|
44
|
+
Include github.event_name in the concurrency group key to give workflow_dispatch
|
|
45
|
+
and push events separate concurrency slots:
|
|
46
|
+
|
|
47
|
+
concurrency:
|
|
48
|
+
group: "${{ github.workflow }}-${{ github.event_name }}-${{ github.ref }}"
|
|
49
|
+
cancel-in-progress: true
|
|
50
|
+
|
|
51
|
+
workflow_dispatch runs now share a slot only with other workflow_dispatch runs
|
|
52
|
+
on the same branch, and push runs share a slot only with other push runs.
|
|
53
|
+
|
|
54
|
+
For deployment or migration workflows where even manual-vs-manual cancellation
|
|
55
|
+
is undesirable, use cancel-in-progress: false and let runs queue.
|
|
56
|
+
fix_code:
|
|
57
|
+
- language: yaml
|
|
58
|
+
label: 'Include event_name in concurrency group — isolates manual from automatic triggers'
|
|
59
|
+
code: |
|
|
60
|
+
on:
|
|
61
|
+
push:
|
|
62
|
+
branches: [main]
|
|
63
|
+
workflow_dispatch:
|
|
64
|
+
|
|
65
|
+
concurrency:
|
|
66
|
+
# event_name isolates push and workflow_dispatch into separate concurrency slots
|
|
67
|
+
group: '${{ github.workflow }}-${{ github.event_name }}-${{ github.ref_name }}'
|
|
68
|
+
cancel-in-progress: true
|
|
69
|
+
|
|
70
|
+
jobs:
|
|
71
|
+
deploy:
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
steps:
|
|
74
|
+
- uses: actions/checkout@v4
|
|
75
|
+
- run: ./deploy.sh
|
|
76
|
+
- language: yaml
|
|
77
|
+
label: 'Separate workflow file for manual operations — no cancel-in-progress'
|
|
78
|
+
code: |
|
|
79
|
+
# deploy-manual.yml — only triggered by workflow_dispatch
|
|
80
|
+
on:
|
|
81
|
+
workflow_dispatch:
|
|
82
|
+
inputs:
|
|
83
|
+
environment:
|
|
84
|
+
description: 'Target environment'
|
|
85
|
+
required: true
|
|
86
|
+
type: choice
|
|
87
|
+
options: [staging, production]
|
|
88
|
+
|
|
89
|
+
# No shared concurrency group with push workflows
|
|
90
|
+
concurrency:
|
|
91
|
+
group: 'manual-deploy-${{ github.event.inputs.environment }}'
|
|
92
|
+
# cancel-in-progress defaults to false — manual deploys queue, not cancel
|
|
93
|
+
prevention:
|
|
94
|
+
- 'Always include github.event_name in concurrency group keys for workflows triggered by both push and workflow_dispatch'
|
|
95
|
+
- 'Use separate workflow files for manual deployment operations to avoid shared concurrency with automated triggers'
|
|
96
|
+
- 'Add a summary step or Telegram/Slack notification in the cleanup phase to alert when a run is cancelled mid-execution'
|
|
97
|
+
- 'Audit all concurrency group patterns when adding workflow_dispatch to an existing automated workflow'
|
|
98
|
+
docs:
|
|
99
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/using-concurrency'
|
|
100
|
+
label: 'GitHub Actions: Using concurrency — group patterns and cancel-in-progress'
|
|
101
|
+
- url: 'https://github.com/orgs/community/discussions/5435'
|
|
102
|
+
label: 'GitHub Community: workflow_dispatch cancelled by push with same concurrency group'
|
|
103
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#github-context'
|
|
104
|
+
label: 'GitHub context: github.event_name property'
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
id: permissions-auth-033
|
|
2
|
+
title: 'github.actor frozen at original trigger time — github.triggering_actor differs on re-run, breaking actor-based access checks'
|
|
3
|
+
category: permissions-auth
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- github.actor
|
|
7
|
+
- github.triggering_actor
|
|
8
|
+
- re-run
|
|
9
|
+
- access-control
|
|
10
|
+
- if-condition
|
|
11
|
+
- silent-failure
|
|
12
|
+
- permissions
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'github\.triggering_actor|triggering_actor'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'Skipping.*actor.*not.*allowed|actor.*not in.*allowlist'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'Access denied.*github\.actor'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Skipping deployment: actor 'original-bot' is not in the deployers allowlist"
|
|
22
|
+
- "Access denied: github.actor does not match the expected release account"
|
|
23
|
+
root_cause: |
|
|
24
|
+
GitHub Actions provides two separate context properties for workflow actor identity:
|
|
25
|
+
|
|
26
|
+
github.actor — the user or app that ORIGINALLY triggered the workflow run
|
|
27
|
+
github.triggering_actor — the user or app that triggered the CURRENT run attempt
|
|
28
|
+
(reflects re-runs and re-trigger operations)
|
|
29
|
+
|
|
30
|
+
These values are identical for the first run of a workflow. They diverge when
|
|
31
|
+
the workflow is re-triggered via the "Re-run jobs" button, "Re-run failed jobs",
|
|
32
|
+
or the REST API re-run endpoint. In that case, github.actor still holds the
|
|
33
|
+
original requester's login while github.triggering_actor holds the person or
|
|
34
|
+
app that clicked re-run.
|
|
35
|
+
|
|
36
|
+
Two classes of silent failure result:
|
|
37
|
+
|
|
38
|
+
1. Security bypass: A workflow guards a deployment with
|
|
39
|
+
if: github.actor == 'deploy-bot'. A human clicks "Re-run" — github.actor
|
|
40
|
+
still shows 'deploy-bot' (original requester), so the guard passes and the
|
|
41
|
+
human inadvertently triggers a privileged deployment step.
|
|
42
|
+
|
|
43
|
+
2. Unintended block: A workflow allows deployments only for a specific release
|
|
44
|
+
manager by checking github.actor. A second authorized team member re-runs
|
|
45
|
+
the workflow — github.actor shows the original requester (someone else),
|
|
46
|
+
so the guard incorrectly blocks the re-run.
|
|
47
|
+
|
|
48
|
+
In both cases there is no error message indicating the actor mismatch — the
|
|
49
|
+
guard silently passes or silently blocks based on stale identity data.
|
|
50
|
+
fix: |
|
|
51
|
+
For access control checks that should apply to WHO IS CURRENTLY RUNNING the
|
|
52
|
+
workflow (including re-runs), replace github.actor with github.triggering_actor.
|
|
53
|
+
|
|
54
|
+
For checks that must enforce the original requester (e.g., "only the PR author
|
|
55
|
+
can trigger this check"), use github.actor explicitly and document the intent.
|
|
56
|
+
|
|
57
|
+
For high-security deployment gates, use GitHub Environment protection rules
|
|
58
|
+
with required reviewers. Environment protection is enforced by GitHub platform
|
|
59
|
+
security rather than YAML if: conditions and cannot be bypassed by re-runs.
|
|
60
|
+
fix_code:
|
|
61
|
+
- language: yaml
|
|
62
|
+
label: 'Use triggering_actor for re-run-aware access control'
|
|
63
|
+
code: |
|
|
64
|
+
jobs:
|
|
65
|
+
deploy:
|
|
66
|
+
runs-on: ubuntu-latest
|
|
67
|
+
steps:
|
|
68
|
+
# WRONG: github.actor is frozen at original trigger time
|
|
69
|
+
# github.actor still shows 'deploy-bot' even when a human re-runs
|
|
70
|
+
# - name: Gate check
|
|
71
|
+
# if: github.actor != 'deploy-bot'
|
|
72
|
+
# run: exit 1
|
|
73
|
+
|
|
74
|
+
# CORRECT: github.triggering_actor reflects who actually ran this attempt
|
|
75
|
+
- name: Gate check
|
|
76
|
+
if: github.triggering_actor != 'deploy-bot'
|
|
77
|
+
run: |
|
|
78
|
+
echo "::error::Deploy must be triggered by deploy-bot, got ${{ github.triggering_actor }}"
|
|
79
|
+
exit 1
|
|
80
|
+
|
|
81
|
+
- name: Deploy
|
|
82
|
+
run: ./deploy.sh
|
|
83
|
+
- language: yaml
|
|
84
|
+
label: 'Use environment protection rules for production deployments (preferred)'
|
|
85
|
+
code: |
|
|
86
|
+
jobs:
|
|
87
|
+
deploy:
|
|
88
|
+
runs-on: ubuntu-latest
|
|
89
|
+
environment: production # required reviewers enforced at platform level
|
|
90
|
+
steps:
|
|
91
|
+
- name: Deploy
|
|
92
|
+
env:
|
|
93
|
+
ACTOR: ${{ github.actor }}
|
|
94
|
+
TRIGGERED_BY: ${{ github.triggering_actor }}
|
|
95
|
+
run: |
|
|
96
|
+
echo "Original trigger: $ACTOR"
|
|
97
|
+
echo "This run triggered by: $TRIGGERED_BY"
|
|
98
|
+
./deploy.sh
|
|
99
|
+
prevention:
|
|
100
|
+
- 'Use github.triggering_actor (not github.actor) when the intent is to check who triggered the current run attempt'
|
|
101
|
+
- 'Prefer GitHub Environment protection rules and required reviewers over actor-based if: conditions for deployment gates'
|
|
102
|
+
- 'Document in each workflow whether actor checks use github.actor (original) or github.triggering_actor (current)'
|
|
103
|
+
- 'Test actor-based gates by having a second authorized user re-run the workflow to verify the check fires as expected'
|
|
104
|
+
- 'Never use github.actor alone for security-sensitive production deployment gates — it is not re-run-aware'
|
|
105
|
+
docs:
|
|
106
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/accessing-contextual-information-about-workflow-runs#github-context'
|
|
107
|
+
label: 'github context — github.actor vs github.triggering_actor properties'
|
|
108
|
+
- url: 'https://github.blog/changelog/2022-08-09-github-actions-re-run-workflows-and-jobs/'
|
|
109
|
+
label: 'GitHub Changelog 2022-08-09: Re-run workflows and jobs — introducing triggering_actor'
|
|
110
|
+
- url: 'https://github.com/orgs/community/discussions/27154'
|
|
111
|
+
label: 'GitHub Community: github.actor vs github.triggering_actor for re-run access control'
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
id: silent-failures-044
|
|
2
|
+
title: 'upload-artifact path patterns resolved from workspace root — working-directory setting ignored'
|
|
3
|
+
category: silent-failures
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- upload-artifact
|
|
7
|
+
- working-directory
|
|
8
|
+
- path
|
|
9
|
+
- glob
|
|
10
|
+
- artifacts
|
|
11
|
+
- workspace
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'No files were found with the provided path'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'Warning: No files were found with the provided path'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
- regex: 'artifact.*no files.*found|no files.*artifact'
|
|
18
|
+
flags: 'i'
|
|
19
|
+
error_messages:
|
|
20
|
+
- "Error: No files were found with the provided path: dist/build.zip. No artifacts will be uploaded."
|
|
21
|
+
- "Warning: No files were found with the provided path: *.tar.gz. No artifacts will be uploaded."
|
|
22
|
+
- "No files were found with the provided path: output/*.zip"
|
|
23
|
+
root_cause: |
|
|
24
|
+
The path: input in actions/upload-artifact (all versions) is always resolved
|
|
25
|
+
relative to $GITHUB_WORKSPACE (the repository root), regardless of any
|
|
26
|
+
working-directory: setting on the step or enclosing job.
|
|
27
|
+
|
|
28
|
+
Developers who set working-directory: dist at the job level or on the step
|
|
29
|
+
and then specify path: *.js or path: build.zip expect the glob to be evaluated
|
|
30
|
+
from within dist/. Instead, the action evaluates it from workspace root, finds
|
|
31
|
+
no matching files, and either warns and uploads nothing (silent-failure) or
|
|
32
|
+
fails the step entirely depending on if-no-files-found setting.
|
|
33
|
+
|
|
34
|
+
The working-directory: context is respected only by run: shell steps for
|
|
35
|
+
command execution. It is not propagated to uses: action inputs — actions
|
|
36
|
+
receive path inputs as raw strings and resolve them internally from GITHUB_WORKSPACE.
|
|
37
|
+
|
|
38
|
+
The same behavior applies to actions/download-artifact path: inputs and to
|
|
39
|
+
other actions that accept file path parameters (e.g., docker/build-push-action
|
|
40
|
+
context: and file: inputs also require workspace-relative paths).
|
|
41
|
+
fix: |
|
|
42
|
+
Prefix all path: patterns with the subdirectory path relative to GITHUB_WORKSPACE.
|
|
43
|
+
|
|
44
|
+
Instead of:
|
|
45
|
+
working-directory: dist
|
|
46
|
+
path: build.zip
|
|
47
|
+
|
|
48
|
+
Use:
|
|
49
|
+
path: dist/build.zip
|
|
50
|
+
|
|
51
|
+
Or to upload an entire directory tree:
|
|
52
|
+
path: dist/
|
|
53
|
+
|
|
54
|
+
Set if-no-files-found: error to convert the silent warning into a visible
|
|
55
|
+
failure so path mistakes are caught immediately rather than silently producing
|
|
56
|
+
empty or missing artifacts downstream.
|
|
57
|
+
fix_code:
|
|
58
|
+
- language: yaml
|
|
59
|
+
label: 'Use workspace-relative path instead of relying on working-directory'
|
|
60
|
+
code: |
|
|
61
|
+
jobs:
|
|
62
|
+
build:
|
|
63
|
+
runs-on: ubuntu-latest
|
|
64
|
+
steps:
|
|
65
|
+
- uses: actions/checkout@v4
|
|
66
|
+
|
|
67
|
+
- name: Build
|
|
68
|
+
working-directory: dist # only affects this run: step
|
|
69
|
+
run: npm run build
|
|
70
|
+
|
|
71
|
+
# WRONG: path: build.zip searches workspace root, not dist/
|
|
72
|
+
# - uses: actions/upload-artifact@v4
|
|
73
|
+
# with:
|
|
74
|
+
# name: release
|
|
75
|
+
# path: build.zip
|
|
76
|
+
|
|
77
|
+
# CORRECT: include the subdirectory in the path
|
|
78
|
+
- uses: actions/upload-artifact@v4
|
|
79
|
+
with:
|
|
80
|
+
name: release
|
|
81
|
+
path: dist/build.zip # relative to GITHUB_WORKSPACE
|
|
82
|
+
if-no-files-found: error
|
|
83
|
+
- language: yaml
|
|
84
|
+
label: 'Upload all files from a subdirectory and fail fast on missing files'
|
|
85
|
+
code: |
|
|
86
|
+
- uses: actions/upload-artifact@v4
|
|
87
|
+
with:
|
|
88
|
+
name: dist-files
|
|
89
|
+
path: dist/ # uploads entire dist/ directory tree
|
|
90
|
+
if-no-files-found: error # fail fast instead of silent warning
|
|
91
|
+
prevention:
|
|
92
|
+
- 'Always write path: patterns in upload-artifact relative to GITHUB_WORKSPACE (repository root), not working-directory'
|
|
93
|
+
- 'Set if-no-files-found: error on every upload-artifact step to catch path mistakes immediately'
|
|
94
|
+
- 'Verify artifact contents in the Actions UI after the first run to confirm the correct files were captured'
|
|
95
|
+
- 'Remember that working-directory: only affects run: shell steps — uses: action inputs are always workspace-relative'
|
|
96
|
+
- 'Use ${{ github.workspace }} to build absolute paths when the relative path is ambiguous'
|
|
97
|
+
docs:
|
|
98
|
+
- url: 'https://github.com/actions/upload-artifact#inputs'
|
|
99
|
+
label: 'actions/upload-artifact inputs — path field documentation'
|
|
100
|
+
- url: 'https://github.com/actions/upload-artifact/issues/232'
|
|
101
|
+
label: 'upload-artifact#232: path is resolved relative to workspace root, not working-directory (community report)'
|
|
102
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables'
|
|
103
|
+
label: 'GITHUB_WORKSPACE default environment variable — GitHub Docs'
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
id: triggers-031
|
|
2
|
+
title: 'Required status check never satisfied when workflow uses paths: filter — PR permanently blocked'
|
|
3
|
+
category: triggers
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- required-status-check
|
|
7
|
+
- paths-filter
|
|
8
|
+
- pull-request
|
|
9
|
+
- branch-protection
|
|
10
|
+
- PR-blocked
|
|
11
|
+
- monorepo
|
|
12
|
+
- status-check
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Required status check.*is expected'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'Merging is blocked.*required status check'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'Waiting for status to be reported'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Required status check 'CI / build' is expected — 1 pending check"
|
|
22
|
+
- "Merging is blocked: 1 required status check has not completed"
|
|
23
|
+
- "Waiting for status to be reported"
|
|
24
|
+
root_cause: |
|
|
25
|
+
When a GitHub Actions workflow uses on: push: paths: or on: pull_request: paths:
|
|
26
|
+
filter AND that workflow's job name is configured as a required status check in
|
|
27
|
+
branch protection rules, a critical gap emerges.
|
|
28
|
+
|
|
29
|
+
If a pull request does NOT modify any files matching the paths: filter, the
|
|
30
|
+
workflow is skipped entirely — no workflow run is created and no check status
|
|
31
|
+
is posted to the commit. GitHub branch protection interprets a missing status
|
|
32
|
+
check as "pending" (not as "skipped" or "passed"), which permanently blocks the
|
|
33
|
+
PR from merging.
|
|
34
|
+
|
|
35
|
+
This is the most common misconfiguration in monorepo setups where teams add
|
|
36
|
+
per-service CI: a workflow for service-A runs only when src/service-a/** changes.
|
|
37
|
+
Adding this workflow's job as a required status check then breaks ALL other PRs
|
|
38
|
+
that don't touch service-A — they are blocked forever waiting for a check that
|
|
39
|
+
will never run.
|
|
40
|
+
|
|
41
|
+
The same issue occurs with paths-ignore: if ALL changed files match the ignore
|
|
42
|
+
patterns, the workflow is skipped and the required check is never posted.
|
|
43
|
+
|
|
44
|
+
Note: GitHub does not automatically treat a "skipped" workflow as passing a
|
|
45
|
+
required status check. The job must explicitly run and succeed.
|
|
46
|
+
fix: |
|
|
47
|
+
Remove the paths: filter from the on: trigger and instead use dorny/paths-filter
|
|
48
|
+
or tj-actions/changed-files inside the workflow to detect changed files. The
|
|
49
|
+
workflow always runs (posting a check status) but skips expensive build steps
|
|
50
|
+
when irrelevant files changed.
|
|
51
|
+
|
|
52
|
+
Add a final ci-complete or always-pass job that runs with if: always() and
|
|
53
|
+
depends on the conditional jobs. Configure this final job name as the required
|
|
54
|
+
status check. It posts a success status for all PRs, whether or not the
|
|
55
|
+
upstream jobs ran.
|
|
56
|
+
fix_code:
|
|
57
|
+
- language: yaml
|
|
58
|
+
label: 'Replace paths: filter with internal file detection — always post a check status'
|
|
59
|
+
code: |
|
|
60
|
+
name: Service A CI
|
|
61
|
+
|
|
62
|
+
on:
|
|
63
|
+
pull_request:
|
|
64
|
+
branches: [main]
|
|
65
|
+
# REMOVE the paths: filter that prevents check from running on non-matching PRs
|
|
66
|
+
# paths:
|
|
67
|
+
# - src/service-a/**
|
|
68
|
+
|
|
69
|
+
jobs:
|
|
70
|
+
detect-changes:
|
|
71
|
+
runs-on: ubuntu-latest
|
|
72
|
+
outputs:
|
|
73
|
+
service-a-changed: ${{ steps.filter.outputs.service-a }}
|
|
74
|
+
steps:
|
|
75
|
+
- uses: actions/checkout@v4
|
|
76
|
+
- uses: dorny/paths-filter@v3
|
|
77
|
+
id: filter
|
|
78
|
+
with:
|
|
79
|
+
filters: |
|
|
80
|
+
service-a:
|
|
81
|
+
- 'src/service-a/**'
|
|
82
|
+
|
|
83
|
+
build:
|
|
84
|
+
needs: detect-changes
|
|
85
|
+
if: needs.detect-changes.outputs.service-a-changed == 'true'
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
steps:
|
|
88
|
+
- uses: actions/checkout@v4
|
|
89
|
+
- run: make build-service-a
|
|
90
|
+
|
|
91
|
+
# Required status check name: "Service A CI / ci-complete"
|
|
92
|
+
# This job ALWAYS runs and posts a check — satisfies branch protection for all PRs
|
|
93
|
+
ci-complete:
|
|
94
|
+
needs: [detect-changes, build]
|
|
95
|
+
if: always()
|
|
96
|
+
runs-on: ubuntu-latest
|
|
97
|
+
steps:
|
|
98
|
+
- name: Evaluate build result
|
|
99
|
+
run: |
|
|
100
|
+
if [[ "${{ needs.build.result }}" == "failure" ]]; then
|
|
101
|
+
echo "Build failed"
|
|
102
|
+
exit 1
|
|
103
|
+
fi
|
|
104
|
+
echo "CI complete (build skipped or passed)"
|
|
105
|
+
prevention:
|
|
106
|
+
- 'Never add a required status check on a workflow that has a paths: filter — the check will be missing for non-matching PRs'
|
|
107
|
+
- 'Use paths-filter action inside the workflow instead of on.pull_request.paths to keep the workflow always running'
|
|
108
|
+
- 'Add a ci-complete job with if: always() as the required status check — not the individual build job'
|
|
109
|
+
- 'Test branch protection + paths filter by opening a PR that changes only unrelated files before enabling required checks'
|
|
110
|
+
- 'Document in your monorepo contributing guide that required checks use the internal paths-filter pattern'
|
|
111
|
+
docs:
|
|
112
|
+
- url: 'https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-protected-branches/about-protected-branches#require-status-checks-before-merging'
|
|
113
|
+
label: 'About protected branches — require status checks before merging'
|
|
114
|
+
- url: 'https://github.com/orgs/community/discussions/20548'
|
|
115
|
+
label: 'GitHub Community: Required status check never satisfies when workflow has paths filter (30+ reactions)'
|
|
116
|
+
- url: 'https://github.com/dorny/paths-filter'
|
|
117
|
+
label: 'dorny/paths-filter — recommended replacement for workflow-level paths: filter'
|
|
118
|
+
- url: 'https://github.com/orgs/community/discussions/44490'
|
|
119
|
+
label: 'GitHub Community: paths filter skips required checks causing PR stuck (additional discussion)'
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
id: yaml-syntax-034
|
|
2
|
+
title: 'runs-on label array requires ALL labels to match (AND logic) — job queues indefinitely when no runner qualifies'
|
|
3
|
+
category: yaml-syntax
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- runs-on
|
|
7
|
+
- self-hosted
|
|
8
|
+
- labels
|
|
9
|
+
- runner-matching
|
|
10
|
+
- AND-logic
|
|
11
|
+
- job-queue
|
|
12
|
+
- infinite-wait
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Waiting for a runner to pick up this job'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'Could not find any available self-hosted runner that matches the required labels'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'No hosted runner matching the labels.*was found'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Waiting for a runner to pick up this job"
|
|
22
|
+
- "Could not find any available self-hosted runner that matches the required labels: self-hosted, linux, arm64, fast-ssd"
|
|
23
|
+
- "No hosted runner matching the labels ['self-hosted', 'linux', 'gpu'] was found"
|
|
24
|
+
root_cause: |
|
|
25
|
+
When runs-on: specifies an array of labels, GitHub requires a runner to have
|
|
26
|
+
ALL of the listed labels simultaneously (AND logic, not OR). A runner registered
|
|
27
|
+
with labels [self-hosted, linux, x64] will NOT match
|
|
28
|
+
runs-on: [self-hosted, linux, arm64] because it is missing the arm64 label,
|
|
29
|
+
even though it has two of the three required labels.
|
|
30
|
+
|
|
31
|
+
Common causes of permanent queueing:
|
|
32
|
+
- Combining labels from different runner types in one runs-on: array, expecting
|
|
33
|
+
any runner matching any label to pick up the job
|
|
34
|
+
- A typo in one label (e.g., "arm64" vs "aarch64") making the full set unmatchable
|
|
35
|
+
- Adding a new label requirement without ensuring at least one runner carries it
|
|
36
|
+
- Using a label that was renamed on the runner registration but not updated in workflows
|
|
37
|
+
- Combining GitHub-hosted runner identifiers with self-hosted labels
|
|
38
|
+
(e.g., [ubuntu-latest, self-hosted]) — GitHub-hosted runners do not carry
|
|
39
|
+
the self-hosted label, so this combination never matches any runner
|
|
40
|
+
|
|
41
|
+
The job queues indefinitely with "Waiting for a runner to pick up this job" and
|
|
42
|
+
never times out automatically unless job timeout-minutes or org-level timeout
|
|
43
|
+
limits are configured. The error message lists all required labels but does not
|
|
44
|
+
indicate which specific label is unmatched.
|
|
45
|
+
fix: |
|
|
46
|
+
Ensure at least one registered runner has EVERY label in the runs-on: array.
|
|
47
|
+
Check Settings > Actions > Runners to verify exact label sets on each runner.
|
|
48
|
+
|
|
49
|
+
For different runner types (e.g., x64 vs ARM), use separate jobs rather than
|
|
50
|
+
a single job with an impossible label combination. For OR semantics across
|
|
51
|
+
runner types, use a matrix strategy.
|
|
52
|
+
|
|
53
|
+
For GitHub-hosted runners, use a single label string (ubuntu-latest,
|
|
54
|
+
windows-latest, macos-latest) not an array.
|
|
55
|
+
fix_code:
|
|
56
|
+
- language: yaml
|
|
57
|
+
label: 'Use only labels that ALL exist on at least one registered runner'
|
|
58
|
+
code: |
|
|
59
|
+
jobs:
|
|
60
|
+
# WRONG: no single runner has both fast-ssd AND arm64 labels
|
|
61
|
+
# build:
|
|
62
|
+
# runs-on: [self-hosted, linux, arm64, fast-ssd]
|
|
63
|
+
|
|
64
|
+
# CORRECT: use only labels that exist together on one runner
|
|
65
|
+
build:
|
|
66
|
+
runs-on: [self-hosted, linux, arm64] # matches a runner with all three
|
|
67
|
+
|
|
68
|
+
# CORRECT for GitHub-hosted: single string label
|
|
69
|
+
test:
|
|
70
|
+
runs-on: ubuntu-latest
|
|
71
|
+
- language: yaml
|
|
72
|
+
label: 'Use matrix to target multiple runner types (OR semantics)'
|
|
73
|
+
code: |
|
|
74
|
+
jobs:
|
|
75
|
+
test:
|
|
76
|
+
strategy:
|
|
77
|
+
matrix:
|
|
78
|
+
runner:
|
|
79
|
+
- ubuntu-latest
|
|
80
|
+
- [self-hosted, linux, arm64]
|
|
81
|
+
runs-on: ${{ matrix.runner }}
|
|
82
|
+
steps:
|
|
83
|
+
- uses: actions/checkout@v4
|
|
84
|
+
- run: make test
|
|
85
|
+
prevention:
|
|
86
|
+
- 'Treat runs-on: arrays as requiring ALL labels on one runner simultaneously — never use them as OR matching'
|
|
87
|
+
- 'Keep self-hosted runner label sets minimal (2-3 labels) to reduce the risk of unmatched combinations'
|
|
88
|
+
- 'Audit all workflow runs-on: references when renaming or removing runner labels — old labels cause indefinite queueing'
|
|
89
|
+
- 'Verify exact label sets in GitHub Settings > Actions > Runners before writing runs-on: arrays'
|
|
90
|
+
- 'Set a job-level timeout-minutes to prevent stuck jobs from blocking runners indefinitely'
|
|
91
|
+
docs:
|
|
92
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-where-your-workflow-runs/choosing-the-runner-for-a-job#choosing-self-hosted-runners'
|
|
93
|
+
label: 'Choosing self-hosted runners — label matching behavior (AND semantics)'
|
|
94
|
+
- url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/using-labels-with-self-hosted-runners'
|
|
95
|
+
label: 'Using labels with self-hosted runners'
|
|
96
|
+
- url: 'https://github.com/orgs/community/discussions/25033'
|
|
97
|
+
label: 'GitHub Community: Self-hosted runner label AND matching — job queues indefinitely'
|
package/package.json
CHANGED