@htekdev/actions-debugger 1.0.38 → 1.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ id: caching-artifacts-031
2
+ title: 'Cache failures from manually-overridden ACTIONS_CACHE_URL after legacy service decommission (April 2025)'
3
+ category: caching-artifacts
4
+ severity: error
5
+ tags:
6
+ - cache
7
+ - legacy-service
8
+ - ACTIONS_CACHE_URL
9
+ - service-migration
10
+ - brownout
11
+ - self-hosted
12
+ patterns:
13
+ - regex: 'ACTIONS_CACHE_URL.*403|ACTIONS_CACHE_URL.*503|ACTIONS_CACHE_URL.*Connection refused'
14
+ flags: 'i'
15
+ - regex: 'Cache service responded with (403|503|5\d\d)'
16
+ flags: 'i'
17
+ - regex: 'Failed to restore cache entry\. Exiting\.\.\.'
18
+ flags: 'i'
19
+ - regex: 'Unable to reserve cache with key.*ACTIONS_CACHE_URL'
20
+ flags: 'i'
21
+ error_messages:
22
+ - "Cache service responded with 503"
23
+ - "Cache service responded with 403"
24
+ - "Failed to restore cache entry. Exiting..."
25
+ - "Unable to reserve cache with key"
26
+ - "Error: ECONNREFUSED connecting to ACTIONS_CACHE_URL"
27
+ root_cause: |
28
+ GitHub migrated all customers to a new cache service backend in early 2025
29
+ and decommissioned the legacy service on April 15, 2025. Prior to decommission,
30
+ brownout windows were scheduled on April 1 and April 8 (each 4-8 hours).
31
+
32
+ Workflows that explicitly override ACTIONS_CACHE_URL, ACTIONS_RESULTS_URL, or
33
+ ACTIONS_RUNTIME_URL continue routing requests to the decommissioned endpoint,
34
+ causing 403 or 503 responses. The cache action reports "Failed to restore cache
35
+ entry" or "Unable to reserve cache" and either skips caching silently or fails
36
+ the step (depending on fail-on-cache-miss settings).
37
+
38
+ Common sources of stale overrides:
39
+ - Third-party self-hosted runner software (ARC add-ons, Buildkite agents, etc.)
40
+ that inject a corporate cache proxy URL into the runner environment
41
+ - Composite actions or reusable workflows with hardcoded env var overrides
42
+ copied from pre-migration documentation
43
+ - Container images with ENV ACTIONS_CACHE_URL set in their Dockerfile,
44
+ inherited by container jobs and overriding the runner-injected value
45
+ - Enterprise self-hosted runner configurations in on-premises GitHub instances
46
+ that still point to a proxy configured for the old cache service API
47
+
48
+ The runner agent always injects the correct new-service values at job startup.
49
+ Any explicit override — even one set a millisecond earlier — takes precedence
50
+ and silently routes to the wrong endpoint.
51
+ fix: |
52
+ Remove all explicit overrides of ACTIONS_CACHE_URL, ACTIONS_RESULTS_URL, and
53
+ ACTIONS_RUNTIME_URL from:
54
+ - Workflow env: blocks (top-level, job-level, and step-level)
55
+ - Composite action env blocks
56
+ - Reusable workflow env blocks
57
+ - Container image Dockerfiles (ENV directives)
58
+ - Self-hosted runner startup scripts and systemd unit files
59
+ - ARC runner controller configuration and any environment-injection middleware
60
+
61
+ Let the runner agent inject the correct values automatically at job startup.
62
+ These values are ephemeral per-job tokens — they cannot be cached or pre-set
63
+ and must come from the runner agent.
64
+
65
+ For self-hosted setups using a corporate cache proxy, update the proxy
66
+ to forward requests to the new service endpoint format, or decommission the
67
+ proxy if it is no longer required.
68
+ fix_code:
69
+ - language: yaml
70
+ label: 'Remove ACTIONS_CACHE_URL override from workflow — let runner inject correct value'
71
+ code: |
72
+ # WRONG: overriding cache service URL causes failures after April 2025
73
+ # env:
74
+ # ACTIONS_CACHE_URL: https://my-corp-proxy.example.com/_apis/artifactcache/
75
+ # ACTIONS_RESULTS_URL: https://my-corp-proxy.example.com/_apis/
76
+ # ACTIONS_RUNTIME_URL: https://my-corp-proxy.example.com/
77
+
78
+ # CORRECT: remove all overrides, let the runner inject the values
79
+ jobs:
80
+ build:
81
+ runs-on: [self-hosted, linux]
82
+ steps:
83
+ - uses: actions/checkout@v4
84
+ - name: Cache dependencies
85
+ uses: actions/cache@v4
86
+ with:
87
+ path: ~/.npm
88
+ key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
89
+ - name: Install
90
+ run: npm ci
91
+ - language: yaml
92
+ label: 'Audit container images for hardcoded ACTIONS_CACHE_URL'
93
+ code: |
94
+ # In your Dockerfile — remove any hardcoded cache service URL
95
+ # FROM ubuntu:22.04
96
+ # ENV ACTIONS_CACHE_URL=https://... ← REMOVE: causes failures after April 2025
97
+
98
+ # In your workflow — do not set cache URL env vars in container definitions
99
+ jobs:
100
+ build:
101
+ runs-on: ubuntu-latest
102
+ container:
103
+ image: myimage:latest
104
+ # Do not add env overrides for ACTIONS_CACHE_URL here
105
+ steps:
106
+ - uses: actions/cache@v4
107
+ with:
108
+ path: ~/.cache/pip
109
+ key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
110
+ prevention:
111
+ - 'Never hardcode ACTIONS_CACHE_URL, ACTIONS_RESULTS_URL, or ACTIONS_RUNTIME_URL in workflows, container images, or runner configs'
112
+ - 'Audit third-party composite actions and runner middleware for injected cache URL overrides after any GitHub cache service migration'
113
+ - 'Subscribe to the GitHub Changelog (github.blog/changelog) to receive advance notice of cache service migrations and brownout schedules'
114
+ - 'Upgrade to actions/cache@v4 or later — the v4 client was updated to use the new service API and is the supported path forward'
115
+ - 'Add a pre-flight check step on self-hosted runners that logs the injected ACTIONS_CACHE_URL to detect unexpected overrides'
116
+ docs:
117
+ - url: 'https://github.blog/changelog/2025-03-20-notification-of-upcoming-breaking-changes-in-github-actions/'
118
+ label: 'GitHub Changelog: Upcoming breaking changes — legacy cache service decommission (March 2025)'
119
+ - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows'
120
+ label: 'Caching dependencies to speed up workflows — actions/cache@v4'
@@ -0,0 +1,103 @@
1
+ id: runner-environment-097
2
+ title: 'Container job with non-root Docker user fails with EACCES on workspace and command files'
3
+ category: runner-environment
4
+ severity: error
5
+ tags:
6
+ - container-jobs
7
+ - non-root
8
+ - rootless
9
+ - permissions
10
+ - EACCES
11
+ - docker
12
+ - workspace
13
+ patterns:
14
+ - regex: 'EACCES.*permission denied.*_runner_file_commands|permission denied.*_runner_file_commands'
15
+ flags: 'i'
16
+ - regex: 'permission denied.*GITHUB_ENV|permission denied.*GITHUB_OUTPUT|permission denied.*GITHUB_PATH|permission denied.*GITHUB_STEP_SUMMARY'
17
+ flags: 'i'
18
+ - regex: 'could not lock config file.*Permission denied'
19
+ flags: 'i'
20
+ - regex: 'EACCES.*permission denied.*__w|permission denied.*github/workspace'
21
+ flags: 'i'
22
+ error_messages:
23
+ - "Error: EACCES: permission denied, open '/home/runner/work/_temp/_runner_file_commands/set_env_'"
24
+ - "Error: EACCES: permission denied, open '/__w/_temp/_runner_file_commands/add_path_'"
25
+ - "error: could not lock config file /home/runner/work/repo/.git/config: Permission denied"
26
+ - "EACCES: permission denied, open '/github/workspace/_temp/_runner_file_commands/'"
27
+ root_cause: |
28
+ When a workflow uses a container job (jobs.<id>.container), the GitHub Actions
29
+ runner creates the workspace directories, .git directory, and all special command
30
+ files (GITHUB_ENV, GITHUB_OUTPUT, GITHUB_PATH, GITHUB_STEP_SUMMARY) as root UID
31
+ on the host before starting the container.
32
+
33
+ If the container image runs as a non-root user — via a USER directive in the
34
+ Dockerfile, docker run --user, or a Kubernetes securityContext.runAsUser setting —
35
+ that user has no write access to the root-owned files and directories. Every step
36
+ that writes to GITHUB_ENV, GITHUB_OUTPUT, GITHUB_PATH, or GITHUB_STEP_SUMMARY
37
+ fails with EACCES: permission denied. The checkout step also fails because git
38
+ cannot write the lock file for the config.
39
+
40
+ The runner does not automatically adjust ownership, set ACLs, or apply mount
41
+ options to make the workspace writable by non-root container users. This is a
42
+ known long-standing platform limitation tracked in runner#2411 (30 reactions)
43
+ with no built-in fix as of 2026. The issue also affects ARC (actions-runner-controller)
44
+ Kubernetes runners when the pod securityContext sets a non-root runAsUser.
45
+ fix: |
46
+ Option 1 — Add `options: --user root` to the container definition.
47
+ This overrides the container USER and runs job steps as root, granting
48
+ full access to all runner-created files.
49
+
50
+ Option 2 — Set fsGroup in Kubernetes/ARC pod spec so mounted directories
51
+ are group-writable and the container's supplemental group matches.
52
+
53
+ Option 3 — Use a privileged init step (--user 0) to chown the workspace
54
+ to the container's non-root UID before executing actual build steps.
55
+
56
+ Option 4 — Build the container image with UID 1001 (the runner UID on
57
+ GitHub-hosted runners) instead of a custom non-root UID, so ownership
58
+ matches automatically.
59
+ fix_code:
60
+ - language: yaml
61
+ label: 'Force container to run as root with options: --user root'
62
+ code: |
63
+ jobs:
64
+ build:
65
+ runs-on: ubuntu-latest
66
+ container:
67
+ image: myapp:latest # Has a non-root USER in its Dockerfile
68
+ options: --user root # Override: run job steps as root
69
+ steps:
70
+ - uses: actions/checkout@v4
71
+ - name: Build
72
+ run: make build
73
+ - language: yaml
74
+ label: 'Set fsGroup in ARC RunnerSet to make workspace group-writable'
75
+ code: |
76
+ apiVersion: actions.github.com/v1alpha1
77
+ kind: RunnerSet
78
+ metadata:
79
+ name: my-runners
80
+ spec:
81
+ template:
82
+ spec:
83
+ securityContext:
84
+ fsGroup: 1001 # Match runner UID so workspace is accessible
85
+ containers:
86
+ - name: runner
87
+ image: ghcr.io/actions/actions-runner:latest
88
+ securityContext:
89
+ runAsUser: 1001
90
+ runAsGroup: 1001
91
+ prevention:
92
+ - 'Prefer running container jobs as root or as UID 1001 to match the runner workspace ownership'
93
+ - 'Test container images locally with docker run --user <uid> to catch permission issues before CI'
94
+ - 'Document the --user root workaround in workflow templates that use custom container images'
95
+ - 'Consider using a job-level container for tool availability only, keeping privileged steps on the host runner'
96
+ - 'When building custom runner images, set USER to UID 1001 to match GitHub-hosted runner conventions'
97
+ docs:
98
+ - url: 'https://github.com/actions/runner/issues/2411'
99
+ label: 'runner#2411: Runner does not set proper permissions for mounted folders in rootless container jobs (30 reactions)'
100
+ - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-where-your-workflow-runs/running-jobs-in-a-container'
101
+ label: 'Running jobs in a container — options field and container configuration'
102
+ - url: 'https://github.com/actions/runner/issues/3290'
103
+ label: 'runner#3290: Kubernetes container pods fail with EACCES when using a custom user'
@@ -0,0 +1,112 @@
1
+ id: runner-environment-096
2
+ title: 'Self-hosted runner runsvc.sh corrupted to 0 bytes after auto-update'
3
+ category: runner-environment
4
+ severity: error
5
+ tags:
6
+ - self-hosted
7
+ - auto-update
8
+ - runsvc
9
+ - systemd
10
+ - service-restart
11
+ patterns:
12
+ - regex: 'runsvc\.sh.*0 bytes|0 bytes.*runsvc\.sh'
13
+ flags: 'i'
14
+ - regex: 'code=exited.*status=203/EXEC|status=203/EXEC'
15
+ flags: 'i'
16
+ - regex: 'Failed to start GitHub Actions Runner'
17
+ flags: 'i'
18
+ - regex: 'bin/runsvc\.sh.*Syntax error.*end of file'
19
+ flags: 'i'
20
+ error_messages:
21
+ - "(code=exited, status=203/EXEC)"
22
+ - "Failed to start GitHub Actions Runner (svc.sh)"
23
+ - "bin/runsvc.sh: 1: Syntax error: Unexpected end of file"
24
+ - "bin.2.334.0/runsvc.sh: 0 bytes"
25
+ root_cause: |
26
+ When a self-hosted runner auto-updates from v2.328.0 to v2.334.0 (and some
27
+ adjacent version pairs), a race condition in the update file-extraction process
28
+ sometimes writes the new bin.{version}/runsvc.sh as 0 bytes instead of the
29
+ correct shell script content.
30
+
31
+ The currently-running listener operates from in-memory state and continues
32
+ accepting and completing jobs normally — so the runner appears healthy in
33
+ the GitHub Actions UI and the corruption is not immediately visible. The
34
+ bin/runsvc.sh symlink quietly points to the corrupted 0-byte file.
35
+
36
+ On the next service restart (host reboot, manual systemctl restart, OOM kill,
37
+ scheduled maintenance window), systemd attempts to execute the 0-byte runsvc.sh,
38
+ receives Status=203/EXEC (exec format error because the file is empty), and the
39
+ runner fails to start entirely — dropping all pending and future jobs.
40
+
41
+ The failure is especially hard to diagnose because the runner appears fully
42
+ online right up to the restart event. Runners in pools that restart infrequently
43
+ may run corrupted for days before the symptom appears.
44
+ fix: |
45
+ Immediate recovery:
46
+ 1. Check if runsvc.sh is 0 bytes:
47
+ ls -lh /path/to/runner/bin/runsvc.sh
48
+ 2. If 0 bytes, stop the service and re-download the runner package at the
49
+ same version, or manually copy runsvc.sh from a known-good runner
50
+ installation of the same version.
51
+ 3. Restart the service and confirm it comes up cleanly before re-enabling
52
+ job acceptance.
53
+
54
+ Preventive measures:
55
+ - Disable auto-update (--no-auto-update flag during registration) and manage
56
+ runner version upgrades manually during maintenance windows.
57
+ - Add a startup health check (see fix_code) that verifies runsvc.sh is
58
+ non-zero before the service is considered ready.
59
+ - For ephemeral runners, use container restart policies that detect the
60
+ 0-byte condition and re-register a fresh runner instead of recycling
61
+ a corrupted one.
62
+ fix_code:
63
+ - language: yaml
64
+ label: 'Detect corrupted runsvc.sh in a pre-job health check step'
65
+ code: |
66
+ jobs:
67
+ preflight:
68
+ runs-on: [self-hosted, linux]
69
+ steps:
70
+ - name: Verify runner service script integrity
71
+ shell: bash
72
+ run: |
73
+ RUNNER_SVC="$(dirname "$(realpath "$0")")/../bin/runsvc.sh"
74
+ if [ ! -s "$RUNNER_SVC" ]; then
75
+ echo "::error::runsvc.sh is 0 bytes — runner update corrupted the service script. Re-register this runner."
76
+ exit 1
77
+ fi
78
+ echo "runsvc.sh OK ($(wc -c < "$RUNNER_SVC") bytes)"
79
+ - language: yaml
80
+ label: 'Pin runner version and disable auto-update via ARC RunnerSet spec'
81
+ code: |
82
+ # actions-runner-controller RunnerSet — pin version, disable auto-update
83
+ apiVersion: actions.github.com/v1alpha1
84
+ kind: RunnerSet
85
+ metadata:
86
+ name: my-runners
87
+ spec:
88
+ githubConfigUrl: https://github.com/my-org/my-repo
89
+ githubConfigSecret: controller-manager
90
+ minRunners: 2
91
+ maxRunners: 10
92
+ template:
93
+ spec:
94
+ containers:
95
+ - name: runner
96
+ image: ghcr.io/actions/actions-runner:2.334.0 # pin version
97
+ env:
98
+ - name: DISABLE_RUNNER_UPDATE
99
+ value: '1'
100
+ prevention:
101
+ - 'Pin runner version with --no-auto-update and upgrade manually during maintenance windows'
102
+ - 'Monitor runner systemd units with alerting on failed states before jobs queue up'
103
+ - 'Use ephemeral runners (--ephemeral flag) so each job gets a freshly registered runner, avoiding accumulated update corruption'
104
+ - 'After any auto-update event, verify that bin/runsvc.sh is non-zero before accepting production jobs'
105
+ - 'Run a nightly canary workflow that exercises a restart-then-run cycle on self-hosted pools'
106
+ docs:
107
+ - url: 'https://github.com/actions/runner/issues/4421'
108
+ label: 'runner#4421: runsvc.sh sometimes 0 bytes after auto-update from 2.328.0 to 2.334.0 (May 2026)'
109
+ - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/autoscaling-with-self-hosted-runners'
110
+ label: 'Autoscaling with self-hosted runners — runner lifecycle management'
111
+ - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-version-updates'
112
+ label: 'Self-hosted runner version updates'
@@ -0,0 +1,123 @@
1
+ id: silent-failures-043
2
+ title: 'github.workspace and runner.workspace return host paths, not container paths, inside container jobs'
3
+ category: silent-failures
4
+ severity: silent-failure
5
+ tags:
6
+ - container-jobs
7
+ - github-context
8
+ - workspace
9
+ - path-mismatch
10
+ - expressions
11
+ - silent
12
+ patterns:
13
+ - regex: '\$\{\{\s*github\.workspace\s*\}\}.*container|\$\{\{\s*runner\.workspace\s*\}\}.*container'
14
+ flags: 'i'
15
+ - regex: '/home/runner/work/[^/]+/[^/]+.*no such file|not found.*home/runner/work'
16
+ flags: 'i'
17
+ error_messages: []
18
+ root_cause: |
19
+ Inside container jobs, `${{ github.workspace }}` and `${{ runner.workspace }}`
20
+ resolve to the HOST-side path (e.g., /home/runner/work/repo/repo), not the
21
+ path that the container actually sees at runtime (e.g., /github/workspace or
22
+ /__w/repo/repo depending on the runner type).
23
+
24
+ The container mounts the workspace at a different path than the host. Any
25
+ expression using these context values to construct file paths, Docker build
26
+ contexts, artifact paths, or shell script arguments will silently resolve to
27
+ the wrong location inside the container — either a non-existent path or an
28
+ unexpected host directory leaked into the container mount namespace.
29
+
30
+ By contrast, the GITHUB_WORKSPACE and RUNNER_WORKSPACE environment variables
31
+ ARE injected with the correct container-visible paths. Only the expression
32
+ context values (${{ github.workspace }}) are wrong. This inconsistency is the
33
+ core trap: developers expect context and env var to be equivalent, but inside
34
+ containers they diverge.
35
+
36
+ No error is raised — wrong paths are silently accepted by shells and tools,
37
+ causing subtly incorrect behavior: files not found, wrong directory used for
38
+ builds, artifacts uploaded from wrong paths, or test results pointing to
39
+ non-existent locations.
40
+
41
+ Tracked in runner#2058 (81 reactions, 13 confused reactions) as a known bug
42
+ with no built-in fix as of 2026.
43
+ fix: |
44
+ Use the GITHUB_WORKSPACE environment variable instead of the
45
+ ${{ github.workspace }} expression inside container job steps. The env var
46
+ is injected by the runner with the correct container-visible path.
47
+
48
+ Similarly, use RUNNER_WORKSPACE instead of ${{ runner.workspace }}.
49
+
50
+ For Docker build contexts and tool paths that require absolute paths, use
51
+ the container-visible path directly:
52
+ - GitHub-hosted runners: /github/workspace
53
+ - ARC/self-hosted: /__w/<repo-name>/<repo-name> (verify with pwd in first step)
54
+
55
+ Alternatively, use relative paths (.) wherever possible to avoid the
56
+ host/container path discrepancy entirely.
57
+ fix_code:
58
+ - language: yaml
59
+ label: 'Use GITHUB_WORKSPACE env var instead of ${{ github.workspace }} inside container steps'
60
+ code: |
61
+ jobs:
62
+ build:
63
+ runs-on: ubuntu-latest
64
+ container:
65
+ image: myapp-builder:latest
66
+ steps:
67
+ - uses: actions/checkout@v4
68
+
69
+ # WRONG: returns host path, not the container-visible mount path
70
+ # - name: Build
71
+ # run: cd "${{ github.workspace }}" && make build
72
+
73
+ # CORRECT: GITHUB_WORKSPACE is set to the container-visible path
74
+ - name: Build
75
+ run: |
76
+ echo "Container workspace: $GITHUB_WORKSPACE"
77
+ cd "$GITHUB_WORKSPACE" && make build
78
+ - language: yaml
79
+ label: 'Use relative paths for Docker build context inside container jobs'
80
+ code: |
81
+ jobs:
82
+ docker-in-docker:
83
+ runs-on: ubuntu-latest
84
+ container:
85
+ image: docker:24
86
+ steps:
87
+ - uses: actions/checkout@v4
88
+ - name: Build image
89
+ run: |
90
+ # Use . (current dir) not ${{ github.workspace }} for build context
91
+ docker build -t myimage:latest .
92
+
93
+ # If absolute path needed, use the env var:
94
+ docker build -t myimage:latest "$GITHUB_WORKSPACE"
95
+ - language: yaml
96
+ label: 'Debug step to reveal the actual container workspace path'
97
+ code: |
98
+ jobs:
99
+ debug:
100
+ runs-on: ubuntu-latest
101
+ container:
102
+ image: ubuntu:22.04
103
+ steps:
104
+ - name: Show path discrepancy
105
+ run: |
106
+ echo "Host path via context (WRONG inside container):"
107
+ echo " github.workspace = ${{ github.workspace }}"
108
+ echo ""
109
+ echo "Container-visible path via env var (CORRECT):"
110
+ echo " GITHUB_WORKSPACE = $GITHUB_WORKSPACE"
111
+ echo ""
112
+ echo "Actual current directory: $(pwd)"
113
+ prevention:
114
+ - 'Never use ${{ github.workspace }} or ${{ runner.workspace }} expressions inside container job steps — they return host paths'
115
+ - 'Use the GITHUB_WORKSPACE and RUNNER_WORKSPACE environment variables for all path references inside containers'
116
+ - 'Use relative paths wherever possible in container steps to avoid the host/container path mismatch entirely'
117
+ - 'Add a debug step printing both the context value and the env var when authoring new container workflows'
118
+ - 'Test container-based workflows locally with nektos/act to surface path resolution issues before CI'
119
+ docs:
120
+ - url: 'https://github.com/actions/runner/issues/2058'
121
+ label: 'runner#2058: github.workspace and runner.workspace are incorrect inside container jobs (81 reactions)'
122
+ - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-where-your-workflow-runs/running-jobs-in-a-container'
123
+ label: 'Running jobs in a container — environment variables and context values'
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@htekdev/actions-debugger",
3
- "version": "1.0.38",
3
+ "version": "1.0.39",
4
4
  "description": "65+ real GitHub Actions errors, queryable by agents. CLI + MCP server + Copilot skills + error database.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",