@htekdev/actions-debugger 1.0.113 → 1.0.115

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/errors/caching-artifacts/cache-corrupt-on-cancel-during-restore-save-always.yml +136 -0
  2. package/errors/caching-artifacts/restore-keys-asterisk-literal-not-glob.yml +107 -0
  3. package/errors/concurrency-timing/concurrency-timing-053.yml +83 -0
  4. package/errors/concurrency-timing/pull-request-review-shared-concurrency-cancels-ci.yml +131 -0
  5. package/errors/known-unsolved/github-script-esm-not-supported.yml +111 -0
  6. package/errors/known-unsolved/job-outputs-string-only-no-array-object.yml +142 -0
  7. package/errors/known-unsolved/known-unsolved-062.yml +87 -0
  8. package/errors/known-unsolved/runner-rest-api-busy-false-broker-state-desync.yml +102 -0
  9. package/errors/permissions-auth/oidc-immutable-sub-claim-new-repo-trust-policy-mismatch.yml +122 -0
  10. package/errors/permissions-auth/permissions-auth-064.yml +122 -0
  11. package/errors/permissions-auth/permissions-auth-065.yml +97 -0
  12. package/errors/permissions-auth/permissions-auth-066.yml +129 -0
  13. package/errors/permissions-auth/upload-code-coverage-missing-code-quality-write-permission.yml +94 -0
  14. package/errors/runner-environment/arc-kubernetes-checkout-circular-json-container-hook.yml +101 -0
  15. package/errors/runner-environment/cache-restore-windows-runner-silent-crash.yml +130 -0
  16. package/errors/runner-environment/git-248-fetch-tags-shallow-clone-regression.yml +100 -0
  17. package/errors/runner-environment/javascript-actions-alpine-arm64-not-supported.yml +121 -0
  18. package/errors/runner-environment/runner-environment-188.yml +96 -0
  19. package/errors/runner-environment/runner-environment-191.yml +147 -0
  20. package/errors/runner-environment/runner-environment-192.yml +144 -0
  21. package/errors/runner-environment/runner-environment-193.yml +136 -0
  22. package/errors/runner-environment/runner-environment-194.yml +86 -0
  23. package/errors/runner-environment/runner-environment-199.yml +93 -0
  24. package/errors/runner-environment/setup-python-macos-self-hosted-symlink-permission-denied.yml +94 -0
  25. package/errors/runner-environment/setup-python-windows-self-hosted-no-admin-install-fails.yml +101 -0
  26. package/errors/silent-failures/checkout-v6-clean-false-deletes-workspace-on-repo-change.yml +119 -0
  27. package/errors/silent-failures/queue-max-silently-ignored-with-cancel-in-progress.yml +109 -0
  28. package/errors/silent-failures/silent-failures-102.yml +141 -0
  29. package/errors/silent-failures/silent-failures-104.yml +119 -0
  30. package/errors/triggers/triggers-069.yml +100 -0
  31. package/errors/yaml-syntax/continue-on-error-inputs-composite-action-unexpected-value.yml +110 -0
  32. package/errors/yaml-syntax/yaml-syntax-068.yml +137 -0
  33. package/errors/yaml-syntax/yaml-syntax-069.yml +118 -0
  34. package/package.json +1 -1
@@ -0,0 +1,97 @@
1
+ id: permissions-auth-065
2
+ title: "PAT Not SSO-Authorized for SAML SSO Organization — Checkout Returns 'Not Found' Despite Valid Token"
3
+ category: permissions-auth
4
+ severity: error
5
+ tags:
6
+ - pat
7
+ - sso
8
+ - saml
9
+ - enterprise
10
+ - checkout
11
+ - not-found
12
+ - authentication
13
+ patterns:
14
+ - regex: 'Not Found.*docs\.github\.com/rest/repos/repos'
15
+ flags: i
16
+ - regex: 'Retrieving the default branch name\s+Not Found'
17
+ flags: i
18
+ - regex: 'Waiting \d+ seconds before trying again\s+Retrieving the default branch name\s+Not Found'
19
+ flags: i
20
+ error_messages:
21
+ - "Not Found - https://docs.github.com/rest/repos/repos#get-a-repository"
22
+ - "Retrieving the default branch name"
23
+ - "Error: Not Found - https://docs.github.com/rest/repos/repos#get-a-repository"
24
+ root_cause: |
25
+ GitHub organizations that enforce SAML SSO require Personal Access Tokens (PATs) to be
26
+ explicitly authorized for that organization before they can access any organization resources
27
+ — regardless of the token's scopes. A PAT with `repo` (or even `admin:org`) scope is
28
+ insufficient on its own inside an SSO-enforced organization.
29
+
30
+ When an unauthorized PAT tries to access the repository metadata endpoint
31
+ (`GET /repos/{owner}/{repo}`), GitHub returns HTTP 404 "Not Found" instead of 401 or 403.
32
+ This is intentional — SSO-unauthorized access is treated as if the resource does not exist,
33
+ preventing information leakage. The result is a deeply confusing error because:
34
+ 1. The repository exists and the PAT owner has access (confirmed via browser with SSO session)
35
+ 2. The error says "Not Found" not "Unauthorized" or "SSO authorization required"
36
+ 3. The checkout action retries with a 19-second back-off before finally failing
37
+
38
+ This affects `actions/checkout` with `token:` set to a PAT, any `gh api` or `curl` call
39
+ using the PAT, and any third-party action that uses a PAT to access the organization.
40
+ fix: |
41
+ Authorize the PAT for the SAML SSO organization in GitHub Developer Settings:
42
+
43
+ 1. Go to GitHub.com → Your profile → Settings → Developer settings
44
+ 2. Select "Personal access tokens" → find the affected PAT
45
+ 3. Click "Configure SSO" next to the token
46
+ 4. Click "Authorize" next to the organization name
47
+ 5. Complete the SSO authorization flow
48
+
49
+ After authorizing, the PAT can access organization resources and the checkout succeeds.
50
+
51
+ Alternative: Use a GitHub App installation token instead of a PAT. App tokens are not
52
+ subject to the SSO authorization requirement — the app must be installed in the organization
53
+ (which requires an admin to approve), but once installed its tokens work without per-token
54
+ SSO authorization.
55
+ fix_code:
56
+ - language: yaml
57
+ label: "Checkout using a PAT that has been SSO-authorized for the org"
58
+ code: |
59
+ # First: authorize the PAT for the org in GitHub Developer Settings → Configure SSO
60
+ # Then use it in the workflow:
61
+ jobs:
62
+ build:
63
+ runs-on: ubuntu-latest
64
+ steps:
65
+ - uses: actions/checkout@v4
66
+ with:
67
+ repository: my-org/private-repo
68
+ token: ${{ secrets.GH_PAT }} # must be SSO-authorized for my-org
69
+ - language: yaml
70
+ label: "Use a GitHub App token to avoid SSO authorization requirement"
71
+ code: |
72
+ jobs:
73
+ build:
74
+ runs-on: ubuntu-latest
75
+ steps:
76
+ - uses: actions/create-github-app-token@v1
77
+ id: app-token
78
+ with:
79
+ app-id: ${{ vars.APP_ID }}
80
+ private-key: ${{ secrets.APP_PRIVATE_KEY }}
81
+ owner: my-org
82
+ - uses: actions/checkout@v4
83
+ with:
84
+ repository: my-org/private-repo
85
+ token: ${{ steps.app-token.outputs.token }}
86
+ prevention:
87
+ - "After creating a PAT for use in an SSO-enforced organization, always click 'Configure SSO' and authorize it for every relevant organization immediately"
88
+ - "Prefer GitHub App installation tokens over PATs for organization-scoped workflows — Apps do not require per-token SSO authorization"
89
+ - "Document the SSO authorization requirement in your team's GitHub Actions onboarding guide — it is not surfaced in the error message"
90
+ - "When troubleshooting a 'Not Found' error with a PAT that has correct scopes, check SSO authorization before assuming the repo path or scopes are wrong"
91
+ docs:
92
+ - url: "https://docs.github.com/en/enterprise-cloud@latest/authentication/authenticating-with-saml-single-sign-on/authorizing-a-personal-access-token-for-use-with-saml-single-sign-on"
93
+ label: "GitHub Docs: Authorizing a PAT for use with SAML SSO"
94
+ - url: "https://stackoverflow.com/questions/79874764/github-actions-checkout-fails-with-not-found-error-for-sso-protected-enterpris"
95
+ label: "Stack Overflow — GitHub Actions checkout fails with Not Found for SSO-protected enterprise repo (Jan 2026)"
96
+ - url: "https://docs.github.com/en/enterprise-cloud@latest/rest/authentication/authenticating-to-the-rest-api"
97
+ label: "GitHub Docs: Authenticating to the REST API (SSO requirements)"
@@ -0,0 +1,129 @@
1
+ id: permissions-auth-066
2
+ title: "AWS IAM OIDC Trust Policy Pins Reusable Workflow to Version Tag — Breaks on Called Action Major Version Upgrade"
3
+ category: permissions-auth
4
+ severity: error
5
+ tags:
6
+ - oidc
7
+ - aws
8
+ - iam
9
+ - reusable-workflow
10
+ - job_workflow_ref
11
+ - version-tag
12
+ - trust-policy
13
+ patterns:
14
+ - regex: 'Not authorized to perform sts:AssumeRoleWithWebIdentity'
15
+ flags: i
16
+ - regex: 'Could not assume role with OIDC'
17
+ flags: i
18
+ - regex: 'AccessDenied.*AssumeRoleWithWebIdentity'
19
+ flags: i
20
+ error_messages:
21
+ - "Could not assume role with OIDC: Not authorized to perform sts:AssumeRoleWithWebIdentity"
22
+ - "An error occurred (AccessDenied) when calling the AssumeRoleWithWebIdentity operation: Not authorized to perform sts:AssumeRoleWithWebIdentity"
23
+ root_cause: |
24
+ When a reusable workflow is called at a specific version tag (e.g.,
25
+ `uses: org/lib/.github/workflows/deploy.yml@v2`), GitHub's OIDC token embeds the called
26
+ workflow's ref in the `sub` claim as `job_workflow_ref`:
27
+
28
+ repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v2.1.0
29
+
30
+ An AWS IAM trust policy that uses a `StringLike` condition pinned to a specific version
31
+ pattern such as `@refs/tags/v2*` accepts this token. However, when the caller upgrades
32
+ the called action from `@v2` to `@v3`, the OIDC token `sub` claim changes to:
33
+
34
+ repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v3.0.0
35
+
36
+ The `@refs/tags/v2*` condition no longer matches `@refs/tags/v3.0.0`, so AWS STS rejects
37
+ the `AssumeRoleWithWebIdentity` call with AccessDenied. The error message is identical to
38
+ any other OIDC trust policy mismatch — nothing in the error output reveals that the version
39
+ tag in the `job_workflow_ref` claim is the cause.
40
+
41
+ This is distinct from the ref→job_workflow_ref format change (permissions-auth-044) which
42
+ occurs when a direct job is refactored into a reusable workflow. Here, the `job_workflow_ref`
43
+ format is already in use and working — it breaks silently only after a version upgrade.
44
+ fix: |
45
+ Update the AWS IAM trust policy `StringLike` condition to accept any version of the called
46
+ workflow, not a pinned version pattern. Use a wildcard that matches across versions.
47
+
48
+ Option A (wildcard on the version suffix — recommended):
49
+ Replace `@refs/tags/v2*` with `@*` or use a broader pattern that includes the full path
50
+ before the version, such as:
51
+ `"repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@*"`
52
+
53
+ Option B (OIDC subject claim customization):
54
+ Use GitHub's OIDC subject claim customization (repo Settings → Actions → General → OIDC
55
+ subject claims) to define a custom sub template that excludes the `job_workflow_ref` field
56
+ or removes the version portion. Then update the trust policy to match the customized sub.
57
+
58
+ Option C (accept both old and new versions):
59
+ Add both version patterns to the IAM trust policy during a transition window:
60
+ `StringLike: ["...deploy.yml@refs/tags/v2*", "...deploy.yml@refs/tags/v3*"]`
61
+ Remove the old pattern after all callers have upgraded.
62
+ fix_code:
63
+ - language: json
64
+ label: "AWS IAM trust policy — version-agnostic StringLike condition (recommended)"
65
+ code: |
66
+ {
67
+ "Version": "2012-10-17",
68
+ "Statement": [
69
+ {
70
+ "Effect": "Allow",
71
+ "Principal": {
72
+ "Federated": "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com"
73
+ },
74
+ "Action": "sts:AssumeRoleWithWebIdentity",
75
+ "Condition": {
76
+ "StringEquals": {
77
+ "token.actions.githubusercontent.com:aud": "sts.amazonaws.com"
78
+ },
79
+ "StringLike": {
80
+ "token.actions.githubusercontent.com:sub": "repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@*"
81
+ }
82
+ }
83
+ }
84
+ ]
85
+ }
86
+ - language: json
87
+ label: "Multi-version transition — accept both v2 and v3 during upgrade"
88
+ code: |
89
+ {
90
+ "Condition": {
91
+ "StringEquals": {
92
+ "token.actions.githubusercontent.com:aud": "sts.amazonaws.com"
93
+ },
94
+ "StringLike": {
95
+ "token.actions.githubusercontent.com:sub": [
96
+ "repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v2*",
97
+ "repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v3*"
98
+ ]
99
+ }
100
+ }
101
+ }
102
+ - language: yaml
103
+ label: "How to decode the actual sub claim from a failing run"
104
+ code: |
105
+ # Add this step before configure-aws-credentials to log the actual sub claim
106
+ - name: Debug OIDC subject claim
107
+ id: debug-oidc
108
+ run: |
109
+ token=$(curl -sS -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
110
+ "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=sts.amazonaws.com" | jq -r '.value')
111
+ echo "$token" | cut -d. -f2 | base64 -d 2>/dev/null | jq '.sub'
112
+ env:
113
+ ACTIONS_ID_TOKEN_REQUEST_TOKEN: ${{ env.ACTIONS_ID_TOKEN_REQUEST_TOKEN }}
114
+ ACTIONS_ID_TOKEN_REQUEST_URL: ${{ env.ACTIONS_ID_TOKEN_REQUEST_URL }}
115
+ prevention:
116
+ - "Use version-agnostic wildcards in IAM trust policy StringLike conditions (e.g., `@*` suffix) unless the policy intentionally restricts to a specific version"
117
+ - "Treat IAM OIDC trust policies as part of the release checklist — whenever a reusable workflow's major version is bumped, update the trust policy before merging callers"
118
+ - "Use GitHub OIDC subject claim customization to remove the version from the sub claim if version-agnostic trust is always preferred"
119
+ - "Document the expected sub claim format in the reusable workflow README alongside the required IAM trust policy pattern"
120
+ - "Run the sub-claim debug step in a dry-run workflow before updating the IAM trust policy to confirm the exact new sub value"
121
+ docs:
122
+ - url: "https://github.com/aws-actions/configure-aws-credentials/issues/1707"
123
+ label: "aws-actions/configure-aws-credentials#1707 — OIDC AssumeRole fails after action version upgrade"
124
+ - url: "https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/using-openid-connect-with-reusable-workflows"
125
+ label: "GitHub Docs: Using OIDC with reusable workflows"
126
+ - url: "https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect#customizing-the-subject-claims-for-an-organization-or-repository"
127
+ label: "GitHub Docs: Customizing OIDC subject claims"
128
+ - url: "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_oidc.html"
129
+ label: "AWS Docs: IAM OIDC identity providers"
@@ -0,0 +1,94 @@
1
+ id: permissions-auth-068
2
+ title: 'upload-code-coverage action fails with 403 — missing code-quality:write permission'
3
+ category: permissions-auth
4
+ severity: error
5
+ tags:
6
+ - permissions
7
+ - code-quality
8
+ - upload-code-coverage
9
+ - github-token
10
+ - 403
11
+ - fine-grained
12
+ patterns:
13
+ - regex: 'Resource not accessible by integration'
14
+ flags: 'i'
15
+ - regex: 'Upload failed.*403|HTTP 403.*code.coverage|code.coverage.*403'
16
+ flags: 'i'
17
+ - regex: 'code-quality.*write|code_quality.*write'
18
+ flags: 'i'
19
+ error_messages:
20
+ - '{"message":"Resource not accessible by integration","documentation_url":"https://docs.github.com/rest"}'
21
+ - 'Error: Upload failed: HTTP 403 Forbidden'
22
+ - 'HTTP Status: 403'
23
+ root_cause: |
24
+ GitHub's code coverage upload API (introduced May 2026 as part of Code Quality for
25
+ pull requests) requires the new fine-grained permission `code-quality:write` on the
26
+ calling token. The default GITHUB_TOKEN in GitHub Actions workflows has this permission
27
+ set to `none` unless explicitly granted.
28
+
29
+ When `actions/upload-code-coverage` calls the coverage upload API without this
30
+ permission, GitHub returns HTTP 403 "Resource not accessible by integration". Because
31
+ `code-quality:write` is a newly introduced permission class (not present in older
32
+ workflow permission schemas), developers familiar with the standard permissions
33
+ (contents, issues, pull-requests, etc.) don't know to add it.
34
+
35
+ This affects all workflows that do not specify `permissions:` at all (which defaults to
36
+ `read-all` — but `code-quality` is still `none` for new permissions), as well as
37
+ workflows that explicitly set `permissions: {}` or use a restrictive block.
38
+ fix: |
39
+ Add `code-quality: write` to the `permissions` block of the job that runs
40
+ `actions/upload-code-coverage`. This grants the GITHUB_TOKEN the required scope to
41
+ call the code coverage upload API.
42
+
43
+ Note: `code-quality:` is a job-level permission. It cannot be set as a global
44
+ `GITHUB_TOKEN` permission through repository settings — it must be declared in the
45
+ workflow YAML per-job.
46
+ fix_code:
47
+ - language: yaml
48
+ label: 'Add code-quality:write to the coverage upload job'
49
+ code: |
50
+ jobs:
51
+ test:
52
+ runs-on: ubuntu-latest
53
+ permissions:
54
+ contents: read
55
+ code-quality: write # Required for upload-code-coverage
56
+ steps:
57
+ - uses: actions/checkout@v4
58
+
59
+ - name: Run tests and generate coverage
60
+ run: pytest --cov=src --cov-report=xml
61
+
62
+ - name: Upload code coverage
63
+ uses: actions/upload-code-coverage@v1
64
+ with:
65
+ file: coverage.xml
66
+ language: Python
67
+ - language: yaml
68
+ label: 'Minimal permissions block if no others are needed'
69
+ code: |
70
+ jobs:
71
+ coverage:
72
+ runs-on: ubuntu-latest
73
+ permissions:
74
+ code-quality: write
75
+ steps:
76
+ - uses: actions/upload-code-coverage@v1
77
+ with:
78
+ file: cobertura.xml
79
+ language: Java
80
+ label: code-coverage/jacoco
81
+ prevention:
82
+ - "Whenever you add actions/upload-code-coverage to a workflow, immediately add `code-quality: write` to the job's permissions block."
83
+ - "Use a linter or policy-as-code tool (e.g., Poutine, StepSecurity) that validates required permissions against known action requirements."
84
+ - "If your org uses required permissions: {} at the workflow level for security hardening, remember that code-quality: write must still be declared per-job."
85
+ - "Check the GitHub Changelog periodically — new actions introduce new permission classes that aren't reflected in older documentation or IDE auto-complete."
86
+ docs:
87
+ - url: 'https://github.blog/changelog/2026-05-26-code-coverage-in-pull-requests-is-now-in-public-preview/'
88
+ label: 'GitHub Changelog: Code coverage in pull requests (May 26, 2026)'
89
+ - url: 'https://github.com/actions/upload-code-coverage'
90
+ label: 'actions/upload-code-coverage repository'
91
+ - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/controlling-permissions-for-github_token'
92
+ label: 'Controlling permissions for GITHUB_TOKEN'
93
+ - url: 'https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository'
94
+ label: 'GitHub Actions permissions documentation'
@@ -0,0 +1,101 @@
1
+ id: runner-environment-189
2
+ title: 'ARC Kubernetes container hook fails with "Converting circular structure to JSON" during checkout'
3
+ category: runner-environment
4
+ severity: error
5
+ tags:
6
+ - arc
7
+ - kubernetes
8
+ - container-hooks
9
+ - checkout
10
+ - circular-json
11
+ - self-hosted
12
+ - custom-container
13
+ patterns:
14
+ - regex: 'Converting circular structure to JSON'
15
+ flags: 'i'
16
+ - regex: 'starting at object with constructor .ClientRequest.'
17
+ flags: 'i'
18
+ - regex: 'Executing the custom container implementation failed'
19
+ flags: 'i'
20
+ - regex: 'property .socket. -> object with constructor .TLSSocket.'
21
+ flags: 'i'
22
+ error_messages:
23
+ - "TypeError: Converting circular structure to JSON\n --> starting at object with constructor 'ClientRequest'\n | property 'socket' -> object with constructor 'TLSSocket'\n --- property '_httpMessage' closes the circle"
24
+ - "Error: Executing the custom container implementation failed. Please contact your self hosted runner administrator."
25
+ - "Error: Process completed with exit code 1."
26
+ root_cause: |
27
+ Actions Runner Controller (ARC) on Kubernetes uses Node.js container-hooks to manage
28
+ the lifecycle of job container pods. During container startup, ARC makes HTTP requests
29
+ to the Kubernetes API server to create or wait for pods. When a network request fails
30
+ mid-connection — due to Kubernetes API server latency, pod scheduling delays, or transient
31
+ network errors — the error handler in the container hook attempts to serialize the failing
32
+ Node.js http.ClientRequest object into JSON for logging or error reporting.
33
+
34
+ Node.js http.ClientRequest objects contain circular references:
35
+ ClientRequest → socket (TLSSocket) → _httpMessage → ClientRequest
36
+
37
+ JSON.stringify() cannot handle circular structures and throws:
38
+ "TypeError: Converting circular structure to JSON"
39
+
40
+ This error surfaces during the checkout step (or another early step) because the container
41
+ hook failure terminates the entire job. The error is not in actions/checkout itself — it is
42
+ in the ARC container-hooks layer executing underneath. The message
43
+ "Executing the custom container implementation failed" confirms the failure is in the
44
+ container hook, not the action code.
45
+
46
+ This is most commonly triggered by:
47
+ - Kubernetes API server under high load (slow pod scheduling acknowledgement)
48
+ - ARC container-hooks < v0.8.1 which lacks circular-reference guarding in error serialization
49
+ - Network timeouts between ARC runner pod and Kubernetes API during container create
50
+ - Combined ARC v2.332.0 + container-hooks v0.8.0 regression (see actions/runner#4302)
51
+ fix: |
52
+ Upgrade ARC and container-hooks to versions that guard against circular reference
53
+ serialization. The actions/runner-controller v0.9.x chart pins container-hooks
54
+ v0.8.1+ which patches the circular JSON error.
55
+
56
+ If on ARC v2.332.0, also upgrade the runner image tag — that release combined a
57
+ base image change (Ubuntu 22.04 → 24.04) with a container-hooks version that
58
+ had this regression.
59
+
60
+ As a diagnostic step, check if the Kubernetes API server is under pressure — slow
61
+ pod acknowledgements cause the HTTP client to timeout in ways that expose this code path.
62
+ fix_code:
63
+ - language: yaml
64
+ label: 'Pin ARC to a release with container-hooks v0.8.1+ fix'
65
+ code: |
66
+ # In your ARC AutoscalingRunnerSet Helm values:
67
+ # Upgrade to controller + runner image that includes container-hooks >= v0.8.1
68
+ # Check releases at https://github.com/actions/actions-runner-controller/releases
69
+ template:
70
+ spec:
71
+ containers:
72
+ - name: runner
73
+ # Use a runner image tag that ships container-hooks >= v0.8.1
74
+ image: ghcr.io/actions/actions-runner:latest
75
+ - language: yaml
76
+ label: 'Increase pod startup timeout to reduce API server pressure race'
77
+ code: |
78
+ # helm values — increase pod startup grace period to reduce
79
+ # Kubernetes API timeout racing against container hook
80
+ template:
81
+ spec:
82
+ terminationGracePeriodSeconds: 3600
83
+ containers:
84
+ - name: runner
85
+ resources:
86
+ requests:
87
+ memory: '2Gi'
88
+ cpu: '500m'
89
+ prevention:
90
+ - 'Keep ARC runner controller and container-hooks up to date — circular JSON fixes land in patch releases'
91
+ - 'Monitor Kubernetes API server latency; high API server load increases the probability of this race condition'
92
+ - 'Pin to a specific ARC chart version in staging before rolling out to production runners'
93
+ - 'Check actions/runner#4302 for the specific v2.332.0 regression if pinned to that release'
94
+ - 'Set appropriate resource requests/limits so pod scheduling completes quickly and avoids API timeout races'
95
+ docs:
96
+ - url: 'https://github.com/actions/checkout/issues/2056'
97
+ label: 'actions/checkout#2056: TypeError: Converting circular structure to JSON'
98
+ - url: 'https://github.com/actions/runner/issues/4302'
99
+ label: 'actions/runner#4302: ARC v2.332.0 container hook UID / permission regression'
100
+ - url: 'https://github.com/actions/actions-runner-controller'
101
+ label: 'Actions Runner Controller (ARC) — GitHub repository'
@@ -0,0 +1,130 @@
1
+ id: runner-environment-196
2
+ title: 'actions/cache restore silently crashes Windows runner — job jumps to Post cleanup with no error'
3
+ category: runner-environment
4
+ severity: silent-failure
5
+ tags:
6
+ - cache
7
+ - windows
8
+ - crash
9
+ - silent-failure
10
+ - cargo
11
+ - large-cache
12
+ - post-cleanup
13
+ patterns:
14
+ - regex: 'Cache hit for:.*\n(?:.*\n){0,3}Post job cleanup'
15
+ flags: 'i'
16
+ - regex: 'Cache hit for:[\s\S]{0,200}Post job cleanup'
17
+ flags: 'i'
18
+ - regex: 'Cache up-to-date\.\s*\(node:\d+\) \[DEP0040\] DeprecationWarning.*punycode'
19
+ flags: 'i'
20
+ error_messages:
21
+ - 'Cache hit for: [key]'
22
+ - 'Post job cleanup.'
23
+ - 'Cache up-to-date.'
24
+ root_cause: |
25
+ On Windows GitHub-hosted runners, actions/cache@v5 can silently crash the Node.js
26
+ runner process during cache restore when extracting very large cache archives (multi-GB
27
+ caches, e.g. Rust/Cargo registry + cache, large Maven/Gradle dependency trees).
28
+
29
+ The failure manifests as the job jumping directly from "Cache hit for: [key]" to
30
+ "Post job cleanup." with no intervening restore log lines and no error message.
31
+ The step exits with code 0 (success), but the cache was never extracted. Subsequent
32
+ build steps fail with missing dependency errors (e.g. "error: no such file or directory:
33
+ ~/.cargo/registry") rather than a cache-related error, making the root cause opaque.
34
+
35
+ The log sequence for affected runs:
36
+ 1. "Cache hit for: [cache-key]" (restore begins)
37
+ 2. [no tar extraction log lines]
38
+ 3. "Post job cleanup." (job finishes or runner crashes)
39
+ 4. "Cache up-to-date."
40
+ 5. "(node:XXXX) [DEP0040] DeprecationWarning: The `punycode` module is deprecated"
41
+ 6. "Post job cleanup."
42
+
43
+ Root cause analysis: The Windows runner process (Runner.Worker.exe) terminates
44
+ abnormally during tar/zstd decompression of the cache archive. This appears to be a
45
+ memory-related crash (similar to the Windows heap corruption pattern in upload-artifact,
46
+ tracked in toolkit#2406) triggered by the high memory pressure of decompressing large
47
+ archives within the Node.js 20 heap on Windows runners as of May 2026.
48
+
49
+ The crash is non-deterministic (intermittent) — the same cache key may restore
50
+ successfully on retry. Affected cache sizes are typically 1 GB+ uncompressed.
51
+ Rust Cargo caches (registry/index + registry/cache + git/db) are the most commonly
52
+ reported trigger.
53
+
54
+ Source: actions/cache#1754 (May 2026, Windows runner, Cargo cache).
55
+ fix: |
56
+ Short-term workaround: Add `continue-on-error: true` to the cache restore step.
57
+ The job will proceed to the build step which will then reinstall dependencies from
58
+ scratch. The build takes longer but completes reliably.
59
+
60
+ Preferred workaround: Split the cache into smaller chunks. Rust/Cargo caches can be
61
+ split by caching registry/index, registry/cache, and git/db in separate cache steps
62
+ with different keys, keeping each archive under ~500 MB.
63
+
64
+ Alternative: Use sccache or a remote cache (e.g. Cloudflare R2 + sccache) instead of
65
+ actions/cache for Rust builds on Windows — this avoids large local archives entirely.
66
+
67
+ Long-term: Track actions/cache#1754 for an upstream fix. Adding
68
+ `ACTIONS_STEP_DEBUG: true` as a repository secret may reveal the crash signal in
69
+ verbose runner logs.
70
+ fix_code:
71
+ - language: yaml
72
+ label: 'Short-term: continue-on-error to prevent job failure on crash'
73
+ code: |
74
+ - name: Restore Cargo cache
75
+ uses: actions/cache@v5
76
+ continue-on-error: true # Job proceeds even if cache restore crashes
77
+ with:
78
+ path: |
79
+ ~/.cargo/registry/index/
80
+ ~/.cargo/registry/cache/
81
+ ~/.cargo/git/db/
82
+ target/
83
+ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
84
+ restore-keys: ${{ runner.os }}-cargo-
85
+
86
+ - language: yaml
87
+ label: 'Split large Cargo cache into smaller chunks to avoid crash threshold'
88
+ code: |
89
+ - name: Restore Cargo registry index (small, fast)
90
+ uses: actions/cache@v5
91
+ with:
92
+ path: ~/.cargo/registry/index/
93
+ key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
94
+ restore-keys: ${{ runner.os }}-cargo-index-
95
+
96
+ - name: Restore Cargo registry cache (large packages)
97
+ uses: actions/cache@v5
98
+ continue-on-error: true
99
+ with:
100
+ path: ~/.cargo/registry/cache/
101
+ key: ${{ runner.os }}-cargo-cache-${{ hashFiles('**/Cargo.lock') }}
102
+ restore-keys: ${{ runner.os }}-cargo-cache-
103
+
104
+ - name: Restore Cargo git sources
105
+ uses: actions/cache@v5
106
+ continue-on-error: true
107
+ with:
108
+ path: ~/.cargo/git/db/
109
+ key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }}
110
+
111
+ - name: Restore build target dir
112
+ uses: actions/cache@v5
113
+ continue-on-error: true
114
+ with:
115
+ path: target/
116
+ key: ${{ runner.os }}-cargo-target-${{ hashFiles('**/Cargo.lock') }}
117
+
118
+ prevention:
119
+ - 'Keep individual cache archives under ~500 MB by splitting large dependency trees (Cargo, Maven, Gradle) into multiple cache steps'
120
+ - 'Add continue-on-error: true to cache restore steps on Windows runners as a safety net for intermittent crashes'
121
+ - 'Monitor workflow durations — a sudden increase in Windows build time (cache miss equivalent) with no cache-related error in logs is a symptom of this crash'
122
+ - 'For Rust/Cargo on Windows runners, consider sccache with a remote backend to avoid large local cache archives entirely'
123
+ - 'Enable ACTIONS_STEP_DEBUG=true (as repository secret) to capture runner-level crash signals when this failure is suspected'
124
+ docs:
125
+ - url: 'https://github.com/actions/cache/issues/1754'
126
+ label: 'actions/cache#1754 — Windows runner randomly dies during cache restore (May 2026)'
127
+ - url: 'https://github.com/actions/cache#tips-for-using-cache'
128
+ label: 'actions/cache — usage tips and cache size guidance'
129
+ - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows'
130
+ label: 'Caching dependencies — limits and best practices'
@@ -0,0 +1,100 @@
1
+ id: runner-environment-190
2
+ title: 'Git 2.48.0 silently stops fetching tags with fetch-tags: true on non-depth-1 shallow clones'
3
+ category: runner-environment
4
+ severity: silent-failure
5
+ tags:
6
+ - git-version
7
+ - fetch-tags
8
+ - shallow-clone
9
+ - fetch-depth
10
+ - ubuntu-24.04
11
+ - regression
12
+ - checkout
13
+ patterns:
14
+ - regex: 'git version 2\.48\.'
15
+ flags: 'i'
16
+ - regex: 'fetch-tags.*fetch-depth|fetch-depth.*fetch-tags'
17
+ flags: 'i'
18
+ error_messages:
19
+ - "# No error — tags are silently absent after checkout with fetch-tags: true and fetch-depth: N on git 2.48.0"
20
+ - "fatal: No names found, cannot describe anything."
21
+ - "fatal: not a tag 'HEAD'"
22
+ root_cause: |
23
+ Git 2.48.0 introduced a change in how `git fetch --depth=N` handles tag following for
24
+ direct refspec fetches. In git ≤ 2.47.x, when actions/checkout ran:
25
+
26
+ git fetch --depth=N origin +<sha>:refs/remotes/origin/<branch>
27
+
28
+ git would automatically follow tags reachable within the depth window — any tag pointing
29
+ to a commit within the fetched depth was included. This is known as automatic tag following.
30
+
31
+ Starting with git 2.48.0, automatic tag following is suppressed for direct refspec fetches
32
+ with `--depth`. Only the explicitly requested ref is fetched; no tags are included even if
33
+ they point to commits within the shallow clone window. The fetch log shows only the branch
34
+ ref fetched — no tag lines appear.
35
+
36
+ Result: `fetch-tags: true` combined with `fetch-depth: N` (where N > 1, such as 100, 383, 500)
37
+ silently returns no tags on runner images shipping git 2.48.0. The workflow log shows no error
38
+ and no warning — `git tag -l` returns empty. Downstream steps using git describe, semantic-release,
39
+ helm chart versioning, or any tool that reads git tags break with "no names found" or
40
+ "not a tag 'HEAD'" errors.
41
+
42
+ This regression first appeared when ubuntu-24.04 runner image updated from 20250105.1.0 to
43
+ 20250113.1.0 (which shipped git 2.48.0). The issue was resolved in runner image 20250117.1.0+
44
+ when git was updated to 2.48.1 which patched the regression. Self-hosted runners running
45
+ git 2.48.0 remain affected.
46
+
47
+ Note: This is distinct from the existing known silent failure where fetch-depth: 1 silently
48
+ fetches no tags regardless of git version. That is expected shallow-clone behavior. This
49
+ regression affects fetch-depth: N > 1 scenarios that previously worked.
50
+ fix: |
51
+ Use `fetch-depth: 0` when git tags are required. A full clone fetches all history and all
52
+ tags regardless of git version. This is the most reliable fix.
53
+
54
+ For large repositories where a full clone is too slow, add a separate fetch --tags step
55
+ immediately after checkout to explicitly fetch all tag objects:
56
+
57
+ git fetch --tags --force
58
+
59
+ Self-hosted runners on git 2.48.0 should upgrade to git 2.48.1 or later which patches
60
+ the tag following regression.
61
+ fix_code:
62
+ - language: yaml
63
+ label: 'Use fetch-depth: 0 for reliable tag fetching (recommended)'
64
+ code: |
65
+ - name: Checkout with full history and all tags
66
+ uses: actions/checkout@v4
67
+ with:
68
+ # fetch-depth: 0 always fetches all commits and tags regardless of git version
69
+ fetch-depth: 0
70
+ - language: yaml
71
+ label: 'Add explicit git fetch --tags step after shallow checkout'
72
+ code: |
73
+ - name: Checkout (shallow)
74
+ uses: actions/checkout@v4
75
+ with:
76
+ fetch-depth: 100
77
+ # fetch-tags: true is unreliable on git 2.48.0 — use explicit fetch instead
78
+
79
+ - name: Fetch tags explicitly (git-version-safe)
80
+ run: git fetch --tags --force
81
+ - language: yaml
82
+ label: 'Check git version in CI for debugging'
83
+ code: |
84
+ - name: Debug git version and tags
85
+ run: |
86
+ git --version
87
+ git tag -l | head -20
88
+ git describe --tags --always || echo "No reachable tags"
89
+ prevention:
90
+ - 'Always use fetch-depth: 0 when git tags are required by downstream steps like git describe or semantic-release'
91
+ - 'Add a git tag -l debug step after checkout to verify tags are present before release tooling runs'
92
+ - 'For self-hosted runners, prefer git 2.48.1+ over 2.48.0 — the regression was patched in 2.48.1'
93
+ - 'Pin to fetch-depth: 0 in release workflows — the performance cost of a full clone is worth the reliability'
94
+ docs:
95
+ - url: 'https://github.com/actions/checkout/issues/2041'
96
+ label: 'actions/checkout#2041: Tags no longer fetch with Git v2.48.0'
97
+ - url: 'https://github.com/actions/checkout#usage'
98
+ label: 'actions/checkout — fetch-depth and fetch-tags input documentation'
99
+ - url: 'https://git-scm.com/docs/git-fetch#_description'
100
+ label: 'git fetch documentation — tag following with --depth'