@htekdev/actions-debugger 1.0.23 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/artifact-minimum-retention-one-day.yml +153 -0
- package/errors/caching-artifacts/cache-api-propagation-delay-post-save.yml +128 -0
- package/errors/caching-artifacts/cache-backend-internal-error-skipped.yml +75 -0
- package/errors/caching-artifacts/cache-hit-step-id-case-sensitive-mismatch.yml +95 -0
- package/errors/caching-artifacts/cache-save-post-step-skipped-on-failure.yml +114 -0
- package/errors/concurrency-timing/deploy-pages-in-progress-deployment-wedged.yml +70 -0
- package/errors/concurrency-timing/deployment-review-timeout-expired.yml +88 -0
- package/errors/concurrency-timing/job-concurrency-scope-per-run-not-global.yml +81 -0
- package/errors/concurrency-timing/merge-queue-concurrency-cancel-blocks-all.yml +86 -0
- package/errors/concurrency-timing/reusable-workflow-github-workflow-context-cancel.yml +124 -0
- package/errors/concurrency-timing/runner-scale-set-jobs-never-start.yml +123 -0
- package/errors/concurrency-timing/runner-temp-dir-race-concurrent-workers.yml +90 -0
- package/errors/known-unsolved/artifact-download-url-unauthenticated-404.yml +98 -0
- package/errors/known-unsolved/checkout-v6-credentials-docker-run-manual.yml +105 -0
- package/errors/known-unsolved/concurrency-groups-repo-scoped-only.yml +138 -0
- package/errors/known-unsolved/matrix-256-job-limit.yml +142 -0
- package/errors/known-unsolved/merge-group-paths-filter-not-supported.yml +137 -0
- package/errors/known-unsolved/no-job-allow-failure.yml +73 -0
- package/errors/known-unsolved/schedule-cron-hours-long-queue-drift.yml +101 -0
- package/errors/permissions-auth/checkout-persist-credentials-token-write.yml +90 -0
- package/errors/permissions-auth/create-github-app-token-cross-job-token-revoked.yml +95 -0
- package/errors/permissions-auth/github-token-contents-write-missing-git-push.yml +117 -0
- package/errors/permissions-auth/org-actions-policy-blocks-unapproved-action.yml +106 -0
- package/errors/runner-environment/codeql-action-v2-deprecated.yml +110 -0
- package/errors/runner-environment/macos-26-openssl-3-system-library-breaking.yml +114 -0
- package/errors/runner-environment/macos-26-ruby-34-default-upgrade.yml +114 -0
- package/errors/runner-environment/macos-26-xcode-default-265-pin-required.yml +99 -0
- package/errors/runner-environment/macos-latest-label-switches-to-macos26.yml +127 -0
- package/errors/runner-environment/node20-removed-toolcache-default-node22.yml +104 -0
- package/errors/runner-environment/powershell-74-76-threadjob-module-rename.yml +124 -0
- package/errors/runner-environment/self-hosted-runner-not-found.yml +134 -0
- package/errors/runner-environment/self-hosted-runner-selinux-service-exec-failure.yml +116 -0
- package/errors/runner-environment/service-container-no-healthcheck.yml +158 -0
- package/errors/runner-environment/setup-node-v5-corepack-pnpm-not-found.yml +101 -0
- package/errors/runner-environment/setup-node-yarn-not-installed-self-hosted.yml +76 -0
- package/errors/runner-environment/setup-python-externally-managed-env-error.yml +95 -0
- package/errors/runner-environment/windows-2019-runner-retired-june2025.yml +118 -0
- package/errors/runner-environment/windows-2022-docker-daemon-not-started.yml +108 -0
- package/errors/silent-failures/cache-hit-output-string-not-boolean.yml +96 -0
- package/errors/silent-failures/checkout-lfs-pointer-not-content.yml +105 -0
- package/errors/silent-failures/reusable-workflow-output-skipped-contains-secret.yml +115 -0
- package/errors/silent-failures/setup-node-silent-download-exit-zero.yml +105 -0
- package/errors/silent-failures/setup-python-truncated-manifest-silent-exit.yml +111 -0
- package/errors/silent-failures/undefined-env-expression-empty-string-silent.yml +115 -0
- package/errors/silent-failures/windows-powershell-github-output-bash-syntax.yml +118 -0
- package/errors/triggers/fork-pr-first-time-contributor-approval-required.yml +142 -0
- package/errors/triggers/on-push-branches-glob-star-no-slash-match.yml +78 -0
- package/errors/triggers/pull-request-target-env-protection-default-branch-eval.yml +117 -0
- package/errors/triggers/required-status-check-renamed-never-passes.yml +87 -0
- package/errors/triggers/schedule-cron-self-hosted-runner-not-triggered.yml +107 -0
- package/errors/yaml-syntax/composite-action-run-shell-missing.yml +90 -0
- package/errors/yaml-syntax/composite-action-secrets-context-unavailable.yml +99 -0
- package/errors/yaml-syntax/github-script-octokit-renamed-to-github.yml +130 -0
- package/errors/yaml-syntax/labeler-v5-config-format-breaking.yml +67 -0
- package/errors/yaml-syntax/runs-on-expression-array-syntax-error.yml +121 -0
- package/errors/yaml-syntax/setup-go-matrix-version-float-coercion.yml +69 -0
- package/package.json +1 -1
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
id: runner-environment-063
|
|
2
|
+
title: "PowerShell 7.4 → 7.6 LTS Upgrade Breaks ThreadJob Module Name and .NET Runtime"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: warning
|
|
5
|
+
tags:
|
|
6
|
+
- powershell
|
|
7
|
+
- powershell-76
|
|
8
|
+
- threadjob
|
|
9
|
+
- dotnet10
|
|
10
|
+
- runner-images
|
|
11
|
+
- breaking-change
|
|
12
|
+
- module
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: "The term 'ThreadJob\\\\Start-ThreadJob' is not recognized"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "CommandNotFoundException.*ThreadJob"
|
|
17
|
+
flags: "i"
|
|
18
|
+
- regex: "Cannot find module.*ThreadJob"
|
|
19
|
+
flags: "i"
|
|
20
|
+
- regex: "Join-Path.*cannot process argument transformation on parameter 'ChildPath'"
|
|
21
|
+
flags: "i"
|
|
22
|
+
- regex: "PowerShell.*7\\.4.*not found"
|
|
23
|
+
flags: "i"
|
|
24
|
+
error_messages:
|
|
25
|
+
- "The term 'ThreadJob\\Start-ThreadJob' is not recognized as a name of a cmdlet, function, script file, or executable program."
|
|
26
|
+
- "Cannot find module ThreadJob"
|
|
27
|
+
- "Join-Path: Cannot process argument transformation on parameter 'ChildPath'."
|
|
28
|
+
- "Get-Module: The specified module 'ThreadJob' was not found"
|
|
29
|
+
root_cause: |
|
|
30
|
+
GitHub Actions runner images are being updated from PowerShell 7.4.x to 7.6.x
|
|
31
|
+
(the latest LTS release, based on .NET 10) across all runner images between
|
|
32
|
+
June 8–15, 2026 (runner-images#14150). PowerShell 7.6 is a major.minor
|
|
33
|
+
version upgrade from 7.4 and contains several breaking changes:
|
|
34
|
+
|
|
35
|
+
1. **ThreadJob module renamed**: The `ThreadJob` module has been replaced by
|
|
36
|
+
`Microsoft.PowerShell.ThreadJob`. The `Start-ThreadJob` cmdlet itself is
|
|
37
|
+
unchanged, but any script using the module-qualified name
|
|
38
|
+
`ThreadJob\Start-ThreadJob` will throw a CommandNotFoundException because
|
|
39
|
+
the old module name no longer exists. Scripts that call `Start-ThreadJob`
|
|
40
|
+
without the module prefix continue to work.
|
|
41
|
+
|
|
42
|
+
2. **Join-Path -ChildPath is now string[]**: The `-ChildPath` parameter type
|
|
43
|
+
changed from `string` to `string[]`. In most cases this is backward-
|
|
44
|
+
compatible, but scripts with unusual argument binding patterns or that
|
|
45
|
+
pass a typed `[string]` variable explicitly may encounter parameter
|
|
46
|
+
transformation errors.
|
|
47
|
+
|
|
48
|
+
3. **.NET 10 runtime**: PowerShell 7.6 ships on .NET 10 (7.4 was on .NET 8).
|
|
49
|
+
Scripts that load .NET assemblies, use P/Invoke, or invoke .NET API
|
|
50
|
+
members that changed between .NET 8 and .NET 10 may break silently
|
|
51
|
+
or throw runtime exceptions.
|
|
52
|
+
|
|
53
|
+
4. **WildcardPattern.Escape behavior change**: `WildcardPattern.Escape` now
|
|
54
|
+
correctly escapes lone backticks. Scripts that relied on the old (incorrect)
|
|
55
|
+
no-escape behavior may produce different wildcard pattern results.
|
|
56
|
+
|
|
57
|
+
5. **Trailing space removed from event source name**: Scripts that match
|
|
58
|
+
exact event source names (e.g., for Windows Event Log) may fail to find
|
|
59
|
+
the source if they include a trailing space.
|
|
60
|
+
fix: |
|
|
61
|
+
1. Replace module-qualified ThreadJob references:
|
|
62
|
+
Find all occurrences of `ThreadJob\Start-ThreadJob` in your scripts and
|
|
63
|
+
update them to `Microsoft.PowerShell.ThreadJob\Start-ThreadJob` or simply
|
|
64
|
+
use the unqualified `Start-ThreadJob`.
|
|
65
|
+
|
|
66
|
+
2. Review Join-Path usage:
|
|
67
|
+
Scripts using `Join-Path` with `-ChildPath` should continue to work in
|
|
68
|
+
most cases. If you see parameter binding errors, check that you are not
|
|
69
|
+
passing a typed `[string]` variable in a context that is now ambiguous.
|
|
70
|
+
|
|
71
|
+
3. Test .NET assembly loading:
|
|
72
|
+
If your scripts load .NET assemblies via `Add-Type -Path` or
|
|
73
|
+
`[System.Reflection.Assembly]::LoadFrom()`, test them against .NET 10
|
|
74
|
+
to catch any API compatibility issues before the upgrade rolls out.
|
|
75
|
+
|
|
76
|
+
4. Pin PowerShell version (temporary workaround):
|
|
77
|
+
Until migration is complete, you can install a specific PowerShell version
|
|
78
|
+
via the MSI/package manager in a workflow step. This is a temporary
|
|
79
|
+
workaround and should not be the long-term solution.
|
|
80
|
+
fix_code:
|
|
81
|
+
- language: yaml
|
|
82
|
+
label: "Fix module-qualified ThreadJob reference"
|
|
83
|
+
code: |
|
|
84
|
+
# Before (PowerShell 7.4 — breaks on 7.6):
|
|
85
|
+
# $job = ThreadJob\Start-ThreadJob -ScriptBlock { ... }
|
|
86
|
+
#
|
|
87
|
+
# After (works on both 7.4 and 7.6):
|
|
88
|
+
- name: Run threaded job
|
|
89
|
+
shell: pwsh
|
|
90
|
+
run: |
|
|
91
|
+
# Option A: unqualified (works on all versions)
|
|
92
|
+
$job = Start-ThreadJob -ScriptBlock { Get-Process }
|
|
93
|
+
|
|
94
|
+
# Option B: fully qualified with new module name (7.6+)
|
|
95
|
+
$job = Microsoft.PowerShell.ThreadJob\Start-ThreadJob -ScriptBlock { Get-Process }
|
|
96
|
+
|
|
97
|
+
$result = $job | Wait-Job | Receive-Job
|
|
98
|
+
Write-Output $result
|
|
99
|
+
- language: yaml
|
|
100
|
+
label: "Verify PowerShell version in workflow to detect future upgrades early"
|
|
101
|
+
code: |
|
|
102
|
+
- name: Check PowerShell version
|
|
103
|
+
shell: pwsh
|
|
104
|
+
run: |
|
|
105
|
+
$version = $PSVersionTable.PSVersion
|
|
106
|
+
Write-Output "PowerShell version: $version"
|
|
107
|
+
if ($version.Major -lt 7 -or ($version.Major -eq 7 -and $version.Minor -lt 6)) {
|
|
108
|
+
Write-Warning "PowerShell < 7.6 detected — ensure ThreadJob references use unqualified names"
|
|
109
|
+
}
|
|
110
|
+
prevention:
|
|
111
|
+
- "Use unqualified cmdlet names (Start-ThreadJob, not ThreadJob\\Start-ThreadJob) to avoid module-name dependencies."
|
|
112
|
+
- "Add a PowerShell version check step at the start of complex pwsh workflows to detect unexpected upgrades early."
|
|
113
|
+
- "Test workflows against the next PowerShell LTS version in a matrix before the runner image upgrade lands."
|
|
114
|
+
- "Subscribe to GitHub notifications on actions/runner-images to receive Announcement issues for upcoming breaking changes."
|
|
115
|
+
- "Avoid loading .NET assemblies by absolute path in runner workflows — prefer NuGet package installation to ensure .NET runtime compatibility."
|
|
116
|
+
docs:
|
|
117
|
+
- url: "https://github.com/actions/runner-images/issues/14150"
|
|
118
|
+
label: "runner-images #14150: PowerShell will be updated from 7.4 to 7.6 LTS (Jun 8-15 2026)"
|
|
119
|
+
- url: "https://learn.microsoft.com/en-us/powershell/scripting/whats-new/what-s-new-in-powershell-76"
|
|
120
|
+
label: "PowerShell 7.6 Release Notes — breaking changes"
|
|
121
|
+
- url: "https://github.com/PowerShell/PowerShell/releases/tag/v7.6.0"
|
|
122
|
+
label: "PowerShell v7.6.0 GitHub release"
|
|
123
|
+
- url: "https://learn.microsoft.com/en-us/powershell/scripting/install/powershell-support-lifecycle"
|
|
124
|
+
label: "PowerShell support lifecycle"
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
id: runner-environment-067
|
|
2
|
+
title: "Self-Hosted Runner 'An error occurred: Runner not found'"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- self-hosted
|
|
7
|
+
- runner
|
|
8
|
+
- runner-not-found
|
|
9
|
+
- jit
|
|
10
|
+
- registration
|
|
11
|
+
- ephemeral
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: "An error occurred: Runner not found"
|
|
14
|
+
flags: "i"
|
|
15
|
+
- regex: "Runner not found"
|
|
16
|
+
flags: "i"
|
|
17
|
+
- regex: "No runner matching the specified criteria was found"
|
|
18
|
+
flags: "i"
|
|
19
|
+
- regex: "Could not find any runner that matches the selector"
|
|
20
|
+
flags: "i"
|
|
21
|
+
error_messages:
|
|
22
|
+
- "An error occurred: Runner not found"
|
|
23
|
+
- "Error: An error occurred: Runner not found"
|
|
24
|
+
- "No runner matching the specified criteria was found"
|
|
25
|
+
root_cause: |
|
|
26
|
+
"An error occurred: Runner not found" is a vague error emitted by the
|
|
27
|
+
GitHub Actions broker when it cannot match or allocate a self-hosted runner
|
|
28
|
+
for a queued job. It has multiple distinct root causes:
|
|
29
|
+
|
|
30
|
+
1. **JIT (Just-in-Time) token expiry** (most common with ARC/Kubernetes runners):
|
|
31
|
+
Ephemeral runners provisioned via Just-in-Time tokens have a short registration
|
|
32
|
+
window. If the runner process does not connect within the token validity period
|
|
33
|
+
(~60 seconds), the broker discards the registration and emits "Runner not found"
|
|
34
|
+
when the job is dispatched. This particularly affects actions-runner-controller
|
|
35
|
+
(ARC) on Kubernetes when pod startup time exceeds the JIT window.
|
|
36
|
+
|
|
37
|
+
2. **Runner de-registered before job starts**:
|
|
38
|
+
Autoscaling controllers (ARC, custom scripts) that aggressively recycle idle
|
|
39
|
+
runners may de-register the runner in the brief window between job queue and
|
|
40
|
+
job dispatch. The broker finds no runner for the job's labels.
|
|
41
|
+
|
|
42
|
+
3. **Label mismatch**:
|
|
43
|
+
The workflow specifies `runs-on: [self-hosted, linux, x64]` but the registered
|
|
44
|
+
runner has different labels (e.g., just `[self-hosted, linux]`). The broker
|
|
45
|
+
treats this as "no matching runner found" and emits the same vague error.
|
|
46
|
+
|
|
47
|
+
4. **Runner registered at wrong scope**:
|
|
48
|
+
A runner registered at the organization level is not visible to a repository
|
|
49
|
+
not in that runner's group, or vice versa. Org runner group access policy
|
|
50
|
+
may restrict which repos can use the runner.
|
|
51
|
+
|
|
52
|
+
5. **Concurrent job stealing**:
|
|
53
|
+
In autoscaling pools where multiple runners share the same label set, a
|
|
54
|
+
job token issued to one runner is occasionally "stolen" by another — the
|
|
55
|
+
original runner's job token is invalid when it tries to start. Less common
|
|
56
|
+
but documented in high-concurrency pools (actions/runner#3857, 116 reactions).
|
|
57
|
+
|
|
58
|
+
The error is intentionally vague because it covers multiple broker-side failures
|
|
59
|
+
that are indistinguishable from the runner's perspective.
|
|
60
|
+
fix: |
|
|
61
|
+
Diagnose by checking the runner registration logs first:
|
|
62
|
+
|
|
63
|
+
1. **For JIT token expiry (ARC/Kubernetes)**:
|
|
64
|
+
Reduce pod startup time — use pre-pulled images, smaller base images, or
|
|
65
|
+
warm pools. Alternatively, switch to Long-Running runners (PAT/App-registered)
|
|
66
|
+
which don't have JIT token windows. Check actions-runner-controller v0.9+
|
|
67
|
+
which extended the JIT window.
|
|
68
|
+
|
|
69
|
+
2. **For de-registered runner race condition**:
|
|
70
|
+
Add a grace period to your autoscaler before de-registering idle runners —
|
|
71
|
+
at least 60 seconds after a job completes. Use `--once` flag on ephemeral
|
|
72
|
+
runners so they only exit after completing one job, not before.
|
|
73
|
+
|
|
74
|
+
3. **For label mismatch**:
|
|
75
|
+
Run `gh api repos/{owner}/{repo}/actions/runners --jq '.[].labels'` to inspect
|
|
76
|
+
registered runner labels. Compare against the `runs-on:` in your workflow.
|
|
77
|
+
Labels are case-sensitive and must be an exact subset match.
|
|
78
|
+
|
|
79
|
+
4. **For wrong scope (org vs repo)**:
|
|
80
|
+
Check Settings → Actions → Runners in both the repo and org. Confirm the
|
|
81
|
+
runner appears under the correct scope and the runner group allows access
|
|
82
|
+
to the repository.
|
|
83
|
+
|
|
84
|
+
5. **General debugging**:
|
|
85
|
+
Enable runner diagnostic logs by setting `ACTIONS_RUNNER_DEBUG: true` and
|
|
86
|
+
`ACTIONS_STEP_DEBUG: true` as repository secrets. This enables verbose
|
|
87
|
+
broker negotiation logs in the Actions runner output.
|
|
88
|
+
fix_code:
|
|
89
|
+
- language: yaml
|
|
90
|
+
label: "Enable runner diagnostic logs to capture broker negotiation details"
|
|
91
|
+
code: |
|
|
92
|
+
# Add these as repository secrets (Settings → Secrets → Actions):
|
|
93
|
+
# ACTIONS_RUNNER_DEBUG = true
|
|
94
|
+
# ACTIONS_STEP_DEBUG = true
|
|
95
|
+
|
|
96
|
+
# Then re-run the failing job. The runner logs will include:
|
|
97
|
+
# "Checking runner for labels: [self-hosted, linux, x64]"
|
|
98
|
+
# "Connected to GitHub Actions service"
|
|
99
|
+
# "Received job assignment: ..."
|
|
100
|
+
jobs:
|
|
101
|
+
build:
|
|
102
|
+
runs-on: [self-hosted, linux, x64]
|
|
103
|
+
steps:
|
|
104
|
+
- uses: actions/checkout@v4
|
|
105
|
+
- run: echo "Runner labels verified"
|
|
106
|
+
- language: yaml
|
|
107
|
+
label: "Verify registered runner labels via GitHub API"
|
|
108
|
+
code: |
|
|
109
|
+
# Check what labels your self-hosted runners actually have:
|
|
110
|
+
# gh api repos/{owner}/{repo}/actions/runners --jq '.runners[] | {name: .name, labels: [.labels[].name]}'
|
|
111
|
+
# Example output:
|
|
112
|
+
# {"name": "my-runner", "labels": ["self-hosted", "Linux", "X64"]}
|
|
113
|
+
# Note: Labels are case-sensitive — "Linux" != "linux"
|
|
114
|
+
|
|
115
|
+
# Workflow label must match runner label exactly:
|
|
116
|
+
jobs:
|
|
117
|
+
build:
|
|
118
|
+
# Use exact case matching the runner's registered labels:
|
|
119
|
+
runs-on: [self-hosted, Linux, X64]
|
|
120
|
+
prevention:
|
|
121
|
+
- "Use explicit, versioned runner labels instead of generic `self-hosted` to make label mismatches immediately visible in the runner registration."
|
|
122
|
+
- "For ephemeral/JIT runners on Kubernetes, use pre-pulled base images and resource requests that ensure fast pod startup to avoid JIT token expiry."
|
|
123
|
+
- "Add ACTIONS_RUNNER_DEBUG=true as a repository secret during initial setup to capture detailed registration logs before the runner goes into production."
|
|
124
|
+
- "Implement health monitoring on your self-hosted runner pool — alert when the runner count drops below the minimum needed for your queue depth."
|
|
125
|
+
- "Prefer Long-Running runners over JIT ephemeral runners for workflows with unpredictable startup patterns — JIT token windows are unforgiving on slow infrastructure."
|
|
126
|
+
docs:
|
|
127
|
+
- url: "https://github.com/actions/runner/issues/3857"
|
|
128
|
+
label: "actions/runner #3857: 'An error occurred: Runner not found' (116 reactions)"
|
|
129
|
+
- url: "https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/monitoring-and-troubleshooting-self-hosted-runners"
|
|
130
|
+
label: "GitHub Docs: Monitoring and troubleshooting self-hosted runners"
|
|
131
|
+
- url: "https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/using-labels-with-self-hosted-runners"
|
|
132
|
+
label: "GitHub Docs: Using labels with self-hosted runners"
|
|
133
|
+
- url: "https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions#hardening-for-self-hosted-runners"
|
|
134
|
+
label: "GitHub Docs: Security hardening for self-hosted runners"
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
id: runner-environment-068
|
|
2
|
+
title: "Self-Hosted Runner Service Fails on RHEL/CentOS — SELinux Blocks runsvc.sh with status=203/EXEC"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- self-hosted
|
|
7
|
+
- selinux
|
|
8
|
+
- rhel
|
|
9
|
+
- centos
|
|
10
|
+
- oracle-linux
|
|
11
|
+
- service
|
|
12
|
+
- systemd
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: "status=203/EXEC|code=exited.*status=203"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "runsvc\\.sh.*failed|svc\\.sh.*start.*fail|runner.*service.*203"
|
|
17
|
+
flags: "i"
|
|
18
|
+
- regex: "avc.*denied.*runner|selinux.*denied.*exec.*runsvc"
|
|
19
|
+
flags: "i"
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Active: failed (Result: exit-code)"
|
|
22
|
+
- "Main PID: XXXX (code=exited, status=203/EXEC)"
|
|
23
|
+
- "Failed to execute: Permission denied"
|
|
24
|
+
- "svc install succeeded but svc start exits immediately with status=203/EXEC"
|
|
25
|
+
root_cause: |
|
|
26
|
+
On SELinux-enforcing Linux distributions (RHEL, CentOS, Oracle Linux, AlmaLinux,
|
|
27
|
+
Rocky Linux), the GitHub Actions self-hosted runner service fails to start with
|
|
28
|
+
systemd exit code 203/EXEC when run via sudo ./svc.sh start.
|
|
29
|
+
|
|
30
|
+
The root cause is a SELinux security context mismatch:
|
|
31
|
+
1. GitHub's ./config.sh creates runsvc.sh and run.sh with the security context
|
|
32
|
+
of the installing user's SSH/interactive session (e.g., unconfined_u:object_r:user_home_t:s0).
|
|
33
|
+
2. When systemd launches the service, it executes the script in a more restricted
|
|
34
|
+
domain context. SELinux enforces that service scripts must have a context
|
|
35
|
+
allowing execution from the systemd context.
|
|
36
|
+
3. The user_home_t label applied during installation does not permit execution by
|
|
37
|
+
systemd, so the exec() syscall is denied before the process can start.
|
|
38
|
+
4. Running ./run.sh directly in an interactive shell works because interactive
|
|
39
|
+
shells run in a more permissive SELinux context that allows user_home_t exec.
|
|
40
|
+
|
|
41
|
+
Exit code 203/EXEC specifically means systemd failed during exec() — not during
|
|
42
|
+
runtime — confirming SELinux is the blocker rather than a configuration or
|
|
43
|
+
dependency issue.
|
|
44
|
+
fix: |
|
|
45
|
+
Apply the correct SELinux security context to the runner service scripts using
|
|
46
|
+
the chcon command. The usr_t type permits execution by systemd service contexts.
|
|
47
|
+
|
|
48
|
+
Step 1 — Fix runsvc.sh context (required):
|
|
49
|
+
cd /path/to/runner
|
|
50
|
+
sudo chcon system_u:object_r:usr_t:s0 runsvc.sh
|
|
51
|
+
|
|
52
|
+
Step 2 — Fix run.sh context (required if service starts but crashes immediately):
|
|
53
|
+
sudo chcon system_u:object_r:usr_t:s0 run.sh
|
|
54
|
+
|
|
55
|
+
Step 3 — Start the service:
|
|
56
|
+
sudo ./svc.sh start
|
|
57
|
+
sudo ./svc.sh status # Should show: Active: active (running)
|
|
58
|
+
|
|
59
|
+
For persistent context that survives runner upgrades and ./config.sh reinstalls,
|
|
60
|
+
use semanage fcontext to write the context rule permanently. This requires the
|
|
61
|
+
policycoreutils-python-utils package.
|
|
62
|
+
fix_code:
|
|
63
|
+
- language: yaml
|
|
64
|
+
label: "Fix SELinux context — run these commands on the host (not in workflow)"
|
|
65
|
+
code: |
|
|
66
|
+
# Navigate to the runner installation directory
|
|
67
|
+
cd /home/github-runner/actions-runner
|
|
68
|
+
|
|
69
|
+
# Step 1: Fix runsvc.sh SELinux context (required)
|
|
70
|
+
sudo chcon system_u:object_r:usr_t:s0 runsvc.sh
|
|
71
|
+
|
|
72
|
+
# Step 2: Fix run.sh as well (needed if service crashes immediately after start)
|
|
73
|
+
sudo chcon system_u:object_r:usr_t:s0 run.sh
|
|
74
|
+
|
|
75
|
+
# Step 3: Start the runner service
|
|
76
|
+
sudo ./svc.sh start
|
|
77
|
+
|
|
78
|
+
# Step 4: Verify the service is running
|
|
79
|
+
sudo ./svc.sh status
|
|
80
|
+
# Expected output: Active: active (running)
|
|
81
|
+
|
|
82
|
+
- language: yaml
|
|
83
|
+
label: "Persistent SELinux context via semanage (survives reinstalls)"
|
|
84
|
+
code: |
|
|
85
|
+
# Install policycoreutils-python-utils if semanage is not available
|
|
86
|
+
# sudo dnf install policycoreutils-python-utils
|
|
87
|
+
|
|
88
|
+
RUNNER_DIR="/home/github-runner/actions-runner"
|
|
89
|
+
|
|
90
|
+
# Add permanent file context rules
|
|
91
|
+
sudo semanage fcontext -a -t usr_t "${RUNNER_DIR}/runsvc.sh"
|
|
92
|
+
sudo semanage fcontext -a -t usr_t "${RUNNER_DIR}/run.sh"
|
|
93
|
+
|
|
94
|
+
# Apply rules to existing files
|
|
95
|
+
sudo restorecon -v "${RUNNER_DIR}/runsvc.sh"
|
|
96
|
+
sudo restorecon -v "${RUNNER_DIR}/run.sh"
|
|
97
|
+
|
|
98
|
+
# Verify contexts are set correctly
|
|
99
|
+
ls -lZ "${RUNNER_DIR}/runsvc.sh" "${RUNNER_DIR}/run.sh"
|
|
100
|
+
# Expected: system_u:object_r:usr_t:s0
|
|
101
|
+
|
|
102
|
+
# Start the runner service
|
|
103
|
+
sudo ./svc.sh start
|
|
104
|
+
prevention:
|
|
105
|
+
- "After ./config.sh on SELinux-enforcing systems, always run chcon on runsvc.sh before ./svc.sh start."
|
|
106
|
+
- "Use semanage fcontext for persistent context that survives runner upgrades and reinstalls."
|
|
107
|
+
- "Add chcon steps to your runner provisioning runbooks and Ansible/Terraform automation."
|
|
108
|
+
- "Verify SELinux denials with: sudo ausearch -m avc -ts recent | grep runner"
|
|
109
|
+
- "Consider using the RHEL runner container image which handles SELinux context automatically."
|
|
110
|
+
docs:
|
|
111
|
+
- url: "https://stackoverflow.com/questions/71818706/github-self-hosted-runner-fails-to-run-as-a-service-on-rh-linux"
|
|
112
|
+
label: "Stack Overflow: Self-hosted runner fails as service on RH Linux (17-vote answer)"
|
|
113
|
+
- url: "https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/configuring-the-self-hosted-runner-application-as-a-service"
|
|
114
|
+
label: "GitHub Docs: Configuring the self-hosted runner as a service"
|
|
115
|
+
- url: "https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/8/html/using_selinux/changing-selinux-contexts_using-selinux"
|
|
116
|
+
label: "Red Hat Docs: Changing SELinux file contexts with chcon"
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
id: runner-environment-066
|
|
2
|
+
title: "Service Container Unhealthy — No Healthcheck Causes Job to Run Before Container Is Ready"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- service-container
|
|
7
|
+
- postgres
|
|
8
|
+
- mysql
|
|
9
|
+
- redis
|
|
10
|
+
- healthcheck
|
|
11
|
+
- docker
|
|
12
|
+
- connection-refused
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: "db service is unhealthy"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "(?:postgres|mysql|redis|service).*(?:unhealthy|not ready|failed to start)"
|
|
17
|
+
flags: "i"
|
|
18
|
+
- regex: "One or more containers failed to start"
|
|
19
|
+
flags: "i"
|
|
20
|
+
- regex: "Connection refused.*(?:5432|3306|6379)"
|
|
21
|
+
flags: "i"
|
|
22
|
+
- regex: "could not connect to server.*Connection refused"
|
|
23
|
+
flags: "i"
|
|
24
|
+
- regex: "ECONNREFUSED.*(?:5432|3306|6379)"
|
|
25
|
+
flags: "i"
|
|
26
|
+
error_messages:
|
|
27
|
+
- "Failed to initialize, db service is unhealthy."
|
|
28
|
+
- "One or more containers failed to start."
|
|
29
|
+
- "Error: connect ECONNREFUSED 127.0.0.1:5432"
|
|
30
|
+
- "could not connect to server: Connection refused"
|
|
31
|
+
- "Is the server running on that host and accepting TCP/IP connections on port 5432?"
|
|
32
|
+
- "ERROR 2002 (HY000): Can't connect to MySQL server on '127.0.0.1' (111)"
|
|
33
|
+
root_cause: |
|
|
34
|
+
GitHub Actions starts service containers (defined under `services:`) before
|
|
35
|
+
job steps begin, but it does NOT wait for the container's internal process to
|
|
36
|
+
be fully ready unless a healthcheck is configured. The runner only waits for
|
|
37
|
+
the container to be in a running state — not for the database or service
|
|
38
|
+
inside to accept connections.
|
|
39
|
+
|
|
40
|
+
Databases like PostgreSQL, MySQL, and Redis typically take several seconds
|
|
41
|
+
after the container starts before they are ready to accept connections. Without
|
|
42
|
+
a healthcheck configured via the `options:` key, GitHub Actions proceeds to
|
|
43
|
+
run job steps immediately while the service is still initializing. The first
|
|
44
|
+
step that attempts a database connection will fail with "Connection refused"
|
|
45
|
+
or "service is unhealthy."
|
|
46
|
+
|
|
47
|
+
Common misconfiguration patterns:
|
|
48
|
+
1. No `options:` block at all — the service starts but the job doesn't wait
|
|
49
|
+
for readiness. The runner polls the healthcheck status if one is present.
|
|
50
|
+
2. Wrong healthcheck command — using `pg_isready` without the correct user/host
|
|
51
|
+
flags, or using `mysqladmin ping` without the correct credentials.
|
|
52
|
+
3. Insufficient retries/timeouts — using `--health-retries 1` means a single
|
|
53
|
+
failed attempt marks the container unhealthy immediately.
|
|
54
|
+
4. Port not mapped — the `ports:` key is optional when running steps directly
|
|
55
|
+
on the runner (accessing via 127.0.0.1), but is required when steps run
|
|
56
|
+
inside a container.
|
|
57
|
+
|
|
58
|
+
Note: Even with a healthcheck, the DB container can transiently fail the
|
|
59
|
+
healthcheck during slow GH-hosted runner provisioning. Default values of
|
|
60
|
+
`--health-interval 10s --health-timeout 5s --health-retries 5` work for
|
|
61
|
+
most cases, but MySQL/MariaDB may need longer retries during first initialization.
|
|
62
|
+
fix: |
|
|
63
|
+
Add a `options:` block to your service definition with a proper Docker
|
|
64
|
+
healthcheck command. GitHub Actions reads the container's healthcheck status
|
|
65
|
+
and waits until the container reports "healthy" before starting job steps.
|
|
66
|
+
|
|
67
|
+
PostgreSQL:
|
|
68
|
+
Use `pg_isready` with `-h localhost` and the correct username.
|
|
69
|
+
|
|
70
|
+
MySQL/MariaDB:
|
|
71
|
+
Use `mysqladmin ping` with credentials matching the MYSQL_ROOT_PASSWORD.
|
|
72
|
+
|
|
73
|
+
Redis:
|
|
74
|
+
Use `redis-cli ping` — returns PONG when Redis is ready.
|
|
75
|
+
|
|
76
|
+
If the container is still failing, increase `--health-retries` and
|
|
77
|
+
`--health-start-period` to give the container more initialization time.
|
|
78
|
+
MySQL in particular needs a longer start period on its first run because it
|
|
79
|
+
initializes the data directory.
|
|
80
|
+
fix_code:
|
|
81
|
+
- language: yaml
|
|
82
|
+
label: "PostgreSQL service with proper healthcheck"
|
|
83
|
+
code: |
|
|
84
|
+
jobs:
|
|
85
|
+
test:
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
services:
|
|
88
|
+
postgres:
|
|
89
|
+
image: postgres:16
|
|
90
|
+
env:
|
|
91
|
+
POSTGRES_USER: testuser
|
|
92
|
+
POSTGRES_PASSWORD: testpass
|
|
93
|
+
POSTGRES_DB: testdb
|
|
94
|
+
ports:
|
|
95
|
+
- 5432:5432
|
|
96
|
+
options: >-
|
|
97
|
+
--health-cmd pg_isready
|
|
98
|
+
--health-interval 10s
|
|
99
|
+
--health-timeout 5s
|
|
100
|
+
--health-retries 5
|
|
101
|
+
steps:
|
|
102
|
+
- uses: actions/checkout@v4
|
|
103
|
+
- run: npm test
|
|
104
|
+
env:
|
|
105
|
+
DATABASE_URL: postgresql://testuser:testpass@localhost:5432/testdb
|
|
106
|
+
- language: yaml
|
|
107
|
+
label: "MySQL service with proper healthcheck"
|
|
108
|
+
code: |
|
|
109
|
+
jobs:
|
|
110
|
+
test:
|
|
111
|
+
runs-on: ubuntu-latest
|
|
112
|
+
services:
|
|
113
|
+
mysql:
|
|
114
|
+
image: mysql:8.0
|
|
115
|
+
env:
|
|
116
|
+
MYSQL_ROOT_PASSWORD: rootpass
|
|
117
|
+
MYSQL_DATABASE: testdb
|
|
118
|
+
ports:
|
|
119
|
+
- 3306:3306
|
|
120
|
+
options: >-
|
|
121
|
+
--health-cmd "mysqladmin ping -h 127.0.0.1 -u root -prootpass"
|
|
122
|
+
--health-interval 10s
|
|
123
|
+
--health-timeout 5s
|
|
124
|
+
--health-retries 10
|
|
125
|
+
--health-start-period 30s
|
|
126
|
+
- language: yaml
|
|
127
|
+
label: "Redis service with proper healthcheck"
|
|
128
|
+
code: |
|
|
129
|
+
jobs:
|
|
130
|
+
test:
|
|
131
|
+
runs-on: ubuntu-latest
|
|
132
|
+
services:
|
|
133
|
+
redis:
|
|
134
|
+
image: redis:7
|
|
135
|
+
ports:
|
|
136
|
+
- 6379:6379
|
|
137
|
+
options: >-
|
|
138
|
+
--health-cmd "redis-cli ping"
|
|
139
|
+
--health-interval 10s
|
|
140
|
+
--health-timeout 5s
|
|
141
|
+
--health-retries 5
|
|
142
|
+
prevention:
|
|
143
|
+
- "Always add a `options: --health-cmd ...` block to every service container — the runner will wait for 'healthy' status before running steps."
|
|
144
|
+
- "Use `--health-start-period 30s` for MySQL/MariaDB which initializes its data directory on the first run and takes longer than PostgreSQL or Redis."
|
|
145
|
+
- "Test your healthcheck command locally with `docker run --health-cmd ...` to verify it exits 0 when the service is ready."
|
|
146
|
+
- "Use `ports: - 5432:5432` when steps run directly on the runner host (ubuntu-latest); ports are optional when steps run in a container job."
|
|
147
|
+
- "Avoid hardcoded `sleep 30` workarounds — these waste time on fast machines and still fail on slow ones. Use healthchecks instead."
|
|
148
|
+
docs:
|
|
149
|
+
- url: "https://docs.github.com/en/actions/use-cases-and-examples/creating-postgresql-service-containers"
|
|
150
|
+
label: "GitHub Docs: Creating PostgreSQL service containers"
|
|
151
|
+
- url: "https://docs.github.com/en/actions/use-cases-and-examples/creating-redis-service-containers"
|
|
152
|
+
label: "GitHub Docs: Creating Redis service containers"
|
|
153
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/running-jobs-in-a-container"
|
|
154
|
+
label: "GitHub Docs: Running jobs in a container (services section)"
|
|
155
|
+
- url: "https://stackoverflow.com/questions/60618118/docker-postgres-image-failed-to-initialize-db-service-is-unhealthy"
|
|
156
|
+
label: "Stack Overflow #60618118: db service is unhealthy (25 votes, 8.8K views)"
|
|
157
|
+
- url: "https://github.com/orgs/community/discussions/27021"
|
|
158
|
+
label: "GitHub Community #27021: MySQL service never comes up healthy"
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
id: runner-environment-060
|
|
2
|
+
title: "setup-node@v5 Cannot Locate pnpm After Corepack Enable"
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- setup-node
|
|
7
|
+
- pnpm
|
|
8
|
+
- corepack
|
|
9
|
+
- v5
|
|
10
|
+
- package-manager
|
|
11
|
+
patterns:
|
|
12
|
+
- regex: "Unable to locate executable file: pnpm"
|
|
13
|
+
flags: "i"
|
|
14
|
+
- regex: "actions/setup-node@[a-z0-9]+ # v5"
|
|
15
|
+
flags: "i"
|
|
16
|
+
- regex: "package-manager-cache: true"
|
|
17
|
+
flags: "i"
|
|
18
|
+
error_messages:
|
|
19
|
+
- "Error: Unable to locate executable file: pnpm. Please verify either the file path exists or the file can be found within a directory specified by the PATH environment variable. Also check the file mode to verify the file is executable."
|
|
20
|
+
root_cause: |
|
|
21
|
+
`actions/setup-node@v5` changed how corepack and package manager shims are
|
|
22
|
+
initialized compared to v4. In v5, the action enables `package-manager-cache: true`
|
|
23
|
+
by default and reads the `packageManager` field from `package.json` to configure
|
|
24
|
+
corepack, but it no longer automatically activates corepack shims for the pnpm
|
|
25
|
+
binary in the runner's PATH.
|
|
26
|
+
|
|
27
|
+
In v4, setup-node would run `corepack enable` implicitly as part of its package
|
|
28
|
+
manager setup, making pnpm available immediately after the action. In v5 this
|
|
29
|
+
behavior changed: corepack is configured for caching purposes but pnpm shims are
|
|
30
|
+
not written into PATH, so any subsequent `corepack enable` or direct `pnpm`
|
|
31
|
+
invocation fails with "Unable to locate executable file: pnpm."
|
|
32
|
+
|
|
33
|
+
The issue is specific to v5 because the internal action scripts now use Node 24
|
|
34
|
+
and a refactored corepack integration path that treats corepack enablement as
|
|
35
|
+
separate from the shim installation step.
|
|
36
|
+
|
|
37
|
+
Downgrading to `actions/setup-node@v4` restores the previous behavior where pnpm
|
|
38
|
+
shims are automatically available after the action.
|
|
39
|
+
fix: |
|
|
40
|
+
Use `pnpm/action-setup` BEFORE `actions/setup-node` to install pnpm independently
|
|
41
|
+
of corepack shim management. This is the officially recommended pattern for pnpm
|
|
42
|
+
on GitHub-hosted runners and works correctly with both v4 and v5 of setup-node.
|
|
43
|
+
|
|
44
|
+
Alternatively, pin to `actions/setup-node@v4` until setup-node@v5 resolves the
|
|
45
|
+
corepack shim regression.
|
|
46
|
+
|
|
47
|
+
Do NOT rely on `corepack enable` run after setup-node@v5 to make pnpm available —
|
|
48
|
+
the shim is not written even after corepack is enabled in a subsequent step.
|
|
49
|
+
fix_code:
|
|
50
|
+
- language: yaml
|
|
51
|
+
label: "Wrong: setup-node@v5 + corepack enable (pnpm not found)"
|
|
52
|
+
code: |
|
|
53
|
+
steps:
|
|
54
|
+
- uses: actions/checkout@v4
|
|
55
|
+
- uses: actions/setup-node@v5 # v5 does not write pnpm shim
|
|
56
|
+
with:
|
|
57
|
+
node-version-file: package.json
|
|
58
|
+
- run: |
|
|
59
|
+
corepack enable # pnpm still not in PATH after this
|
|
60
|
+
corepack prepare --activate
|
|
61
|
+
- run: pnpm install # Error: Unable to locate executable file: pnpm
|
|
62
|
+
- language: yaml
|
|
63
|
+
label: "Correct: pnpm/action-setup before setup-node (works with v4 and v5)"
|
|
64
|
+
code: |
|
|
65
|
+
steps:
|
|
66
|
+
- uses: actions/checkout@v4
|
|
67
|
+
- uses: pnpm/action-setup@v4 # install pnpm FIRST, independent of corepack
|
|
68
|
+
with:
|
|
69
|
+
version: 10
|
|
70
|
+
- uses: actions/setup-node@v5
|
|
71
|
+
with:
|
|
72
|
+
node-version-file: package.json
|
|
73
|
+
cache: pnpm
|
|
74
|
+
- run: pnpm install --frozen-lockfile
|
|
75
|
+
- language: yaml
|
|
76
|
+
label: "Alternative: pin to setup-node@v4 (restores previous corepack behavior)"
|
|
77
|
+
code: |
|
|
78
|
+
steps:
|
|
79
|
+
- uses: actions/checkout@v4
|
|
80
|
+
- uses: actions/setup-node@v4 # v4 writes corepack shims automatically
|
|
81
|
+
with:
|
|
82
|
+
node-version-file: package.json
|
|
83
|
+
- run: |
|
|
84
|
+
corepack enable
|
|
85
|
+
corepack prepare --activate
|
|
86
|
+
- run: pnpm install --frozen-lockfile
|
|
87
|
+
prevention:
|
|
88
|
+
- "Use pnpm/action-setup before setup-node for reliable pnpm availability regardless of setup-node version."
|
|
89
|
+
- "Do not assume corepack shim behavior is identical between setup-node major versions."
|
|
90
|
+
- "Pin action versions with SHAs (e.g. @a0853c24...) to avoid unexpected behavior changes on major bumps."
|
|
91
|
+
- "Read setup-node release notes and CHANGELOG when upgrading from v4 to v5 — corepack integration changed."
|
|
92
|
+
- "Test pnpm availability explicitly in CI by adding a debug step: run: which pnpm && pnpm --version"
|
|
93
|
+
docs:
|
|
94
|
+
- url: "https://github.com/actions/setup-node/issues/1357"
|
|
95
|
+
label: "actions/setup-node#1357: v5 fails immediately when using pnpm (31 reactions)"
|
|
96
|
+
- url: "https://github.com/pnpm/action-setup"
|
|
97
|
+
label: "pnpm/action-setup: Official pnpm GitHub Action"
|
|
98
|
+
- url: "https://nodejs.org/api/corepack.html"
|
|
99
|
+
label: "Node.js Corepack documentation"
|
|
100
|
+
- url: "https://github.com/actions/setup-node/releases"
|
|
101
|
+
label: "actions/setup-node releases and CHANGELOG"
|