@htekdev/actions-debugger 1.0.113 → 1.0.115
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/errors/caching-artifacts/cache-corrupt-on-cancel-during-restore-save-always.yml +136 -0
- package/errors/caching-artifacts/restore-keys-asterisk-literal-not-glob.yml +107 -0
- package/errors/concurrency-timing/concurrency-timing-053.yml +83 -0
- package/errors/concurrency-timing/pull-request-review-shared-concurrency-cancels-ci.yml +131 -0
- package/errors/known-unsolved/github-script-esm-not-supported.yml +111 -0
- package/errors/known-unsolved/job-outputs-string-only-no-array-object.yml +142 -0
- package/errors/known-unsolved/known-unsolved-062.yml +87 -0
- package/errors/known-unsolved/runner-rest-api-busy-false-broker-state-desync.yml +102 -0
- package/errors/permissions-auth/oidc-immutable-sub-claim-new-repo-trust-policy-mismatch.yml +122 -0
- package/errors/permissions-auth/permissions-auth-064.yml +122 -0
- package/errors/permissions-auth/permissions-auth-065.yml +97 -0
- package/errors/permissions-auth/permissions-auth-066.yml +129 -0
- package/errors/permissions-auth/upload-code-coverage-missing-code-quality-write-permission.yml +94 -0
- package/errors/runner-environment/arc-kubernetes-checkout-circular-json-container-hook.yml +101 -0
- package/errors/runner-environment/cache-restore-windows-runner-silent-crash.yml +130 -0
- package/errors/runner-environment/git-248-fetch-tags-shallow-clone-regression.yml +100 -0
- package/errors/runner-environment/javascript-actions-alpine-arm64-not-supported.yml +121 -0
- package/errors/runner-environment/runner-environment-188.yml +96 -0
- package/errors/runner-environment/runner-environment-191.yml +147 -0
- package/errors/runner-environment/runner-environment-192.yml +144 -0
- package/errors/runner-environment/runner-environment-193.yml +136 -0
- package/errors/runner-environment/runner-environment-194.yml +86 -0
- package/errors/runner-environment/runner-environment-199.yml +93 -0
- package/errors/runner-environment/setup-python-macos-self-hosted-symlink-permission-denied.yml +94 -0
- package/errors/runner-environment/setup-python-windows-self-hosted-no-admin-install-fails.yml +101 -0
- package/errors/silent-failures/checkout-v6-clean-false-deletes-workspace-on-repo-change.yml +119 -0
- package/errors/silent-failures/queue-max-silently-ignored-with-cancel-in-progress.yml +109 -0
- package/errors/silent-failures/silent-failures-102.yml +141 -0
- package/errors/silent-failures/silent-failures-104.yml +119 -0
- package/errors/triggers/triggers-069.yml +100 -0
- package/errors/yaml-syntax/continue-on-error-inputs-composite-action-unexpected-value.yml +110 -0
- package/errors/yaml-syntax/yaml-syntax-068.yml +137 -0
- package/errors/yaml-syntax/yaml-syntax-069.yml +118 -0
- package/package.json +1 -1
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
id: permissions-auth-065
|
|
2
|
+
title: "PAT Not SSO-Authorized for SAML SSO Organization — Checkout Returns 'Not Found' Despite Valid Token"
|
|
3
|
+
category: permissions-auth
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- pat
|
|
7
|
+
- sso
|
|
8
|
+
- saml
|
|
9
|
+
- enterprise
|
|
10
|
+
- checkout
|
|
11
|
+
- not-found
|
|
12
|
+
- authentication
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Not Found.*docs\.github\.com/rest/repos/repos'
|
|
15
|
+
flags: i
|
|
16
|
+
- regex: 'Retrieving the default branch name\s+Not Found'
|
|
17
|
+
flags: i
|
|
18
|
+
- regex: 'Waiting \d+ seconds before trying again\s+Retrieving the default branch name\s+Not Found'
|
|
19
|
+
flags: i
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Not Found - https://docs.github.com/rest/repos/repos#get-a-repository"
|
|
22
|
+
- "Retrieving the default branch name"
|
|
23
|
+
- "Error: Not Found - https://docs.github.com/rest/repos/repos#get-a-repository"
|
|
24
|
+
root_cause: |
|
|
25
|
+
GitHub organizations that enforce SAML SSO require Personal Access Tokens (PATs) to be
|
|
26
|
+
explicitly authorized for that organization before they can access any organization resources
|
|
27
|
+
— regardless of the token's scopes. A PAT with `repo` (or even `admin:org`) scope is
|
|
28
|
+
insufficient on its own inside an SSO-enforced organization.
|
|
29
|
+
|
|
30
|
+
When an unauthorized PAT tries to access the repository metadata endpoint
|
|
31
|
+
(`GET /repos/{owner}/{repo}`), GitHub returns HTTP 404 "Not Found" instead of 401 or 403.
|
|
32
|
+
This is intentional — SSO-unauthorized access is treated as if the resource does not exist,
|
|
33
|
+
preventing information leakage. The result is a deeply confusing error because:
|
|
34
|
+
1. The repository exists and the PAT owner has access (confirmed via browser with SSO session)
|
|
35
|
+
2. The error says "Not Found" not "Unauthorized" or "SSO authorization required"
|
|
36
|
+
3. The checkout action retries with a 19-second back-off before finally failing
|
|
37
|
+
|
|
38
|
+
This affects `actions/checkout` with `token:` set to a PAT, any `gh api` or `curl` call
|
|
39
|
+
using the PAT, and any third-party action that uses a PAT to access the organization.
|
|
40
|
+
fix: |
|
|
41
|
+
Authorize the PAT for the SAML SSO organization in GitHub Developer Settings:
|
|
42
|
+
|
|
43
|
+
1. Go to GitHub.com → Your profile → Settings → Developer settings
|
|
44
|
+
2. Select "Personal access tokens" → find the affected PAT
|
|
45
|
+
3. Click "Configure SSO" next to the token
|
|
46
|
+
4. Click "Authorize" next to the organization name
|
|
47
|
+
5. Complete the SSO authorization flow
|
|
48
|
+
|
|
49
|
+
After authorizing, the PAT can access organization resources and the checkout succeeds.
|
|
50
|
+
|
|
51
|
+
Alternative: Use a GitHub App installation token instead of a PAT. App tokens are not
|
|
52
|
+
subject to the SSO authorization requirement — the app must be installed in the organization
|
|
53
|
+
(which requires an admin to approve), but once installed its tokens work without per-token
|
|
54
|
+
SSO authorization.
|
|
55
|
+
fix_code:
|
|
56
|
+
- language: yaml
|
|
57
|
+
label: "Checkout using a PAT that has been SSO-authorized for the org"
|
|
58
|
+
code: |
|
|
59
|
+
# First: authorize the PAT for the org in GitHub Developer Settings → Configure SSO
|
|
60
|
+
# Then use it in the workflow:
|
|
61
|
+
jobs:
|
|
62
|
+
build:
|
|
63
|
+
runs-on: ubuntu-latest
|
|
64
|
+
steps:
|
|
65
|
+
- uses: actions/checkout@v4
|
|
66
|
+
with:
|
|
67
|
+
repository: my-org/private-repo
|
|
68
|
+
token: ${{ secrets.GH_PAT }} # must be SSO-authorized for my-org
|
|
69
|
+
- language: yaml
|
|
70
|
+
label: "Use a GitHub App token to avoid SSO authorization requirement"
|
|
71
|
+
code: |
|
|
72
|
+
jobs:
|
|
73
|
+
build:
|
|
74
|
+
runs-on: ubuntu-latest
|
|
75
|
+
steps:
|
|
76
|
+
- uses: actions/create-github-app-token@v1
|
|
77
|
+
id: app-token
|
|
78
|
+
with:
|
|
79
|
+
app-id: ${{ vars.APP_ID }}
|
|
80
|
+
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
|
81
|
+
owner: my-org
|
|
82
|
+
- uses: actions/checkout@v4
|
|
83
|
+
with:
|
|
84
|
+
repository: my-org/private-repo
|
|
85
|
+
token: ${{ steps.app-token.outputs.token }}
|
|
86
|
+
prevention:
|
|
87
|
+
- "After creating a PAT for use in an SSO-enforced organization, always click 'Configure SSO' and authorize it for every relevant organization immediately"
|
|
88
|
+
- "Prefer GitHub App installation tokens over PATs for organization-scoped workflows — Apps do not require per-token SSO authorization"
|
|
89
|
+
- "Document the SSO authorization requirement in your team's GitHub Actions onboarding guide — it is not surfaced in the error message"
|
|
90
|
+
- "When troubleshooting a 'Not Found' error with a PAT that has correct scopes, check SSO authorization before assuming the repo path or scopes are wrong"
|
|
91
|
+
docs:
|
|
92
|
+
- url: "https://docs.github.com/en/enterprise-cloud@latest/authentication/authenticating-with-saml-single-sign-on/authorizing-a-personal-access-token-for-use-with-saml-single-sign-on"
|
|
93
|
+
label: "GitHub Docs: Authorizing a PAT for use with SAML SSO"
|
|
94
|
+
- url: "https://stackoverflow.com/questions/79874764/github-actions-checkout-fails-with-not-found-error-for-sso-protected-enterpris"
|
|
95
|
+
label: "Stack Overflow — GitHub Actions checkout fails with Not Found for SSO-protected enterprise repo (Jan 2026)"
|
|
96
|
+
- url: "https://docs.github.com/en/enterprise-cloud@latest/rest/authentication/authenticating-to-the-rest-api"
|
|
97
|
+
label: "GitHub Docs: Authenticating to the REST API (SSO requirements)"
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
id: permissions-auth-066
|
|
2
|
+
title: "AWS IAM OIDC Trust Policy Pins Reusable Workflow to Version Tag — Breaks on Called Action Major Version Upgrade"
|
|
3
|
+
category: permissions-auth
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- oidc
|
|
7
|
+
- aws
|
|
8
|
+
- iam
|
|
9
|
+
- reusable-workflow
|
|
10
|
+
- job_workflow_ref
|
|
11
|
+
- version-tag
|
|
12
|
+
- trust-policy
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Not authorized to perform sts:AssumeRoleWithWebIdentity'
|
|
15
|
+
flags: i
|
|
16
|
+
- regex: 'Could not assume role with OIDC'
|
|
17
|
+
flags: i
|
|
18
|
+
- regex: 'AccessDenied.*AssumeRoleWithWebIdentity'
|
|
19
|
+
flags: i
|
|
20
|
+
error_messages:
|
|
21
|
+
- "Could not assume role with OIDC: Not authorized to perform sts:AssumeRoleWithWebIdentity"
|
|
22
|
+
- "An error occurred (AccessDenied) when calling the AssumeRoleWithWebIdentity operation: Not authorized to perform sts:AssumeRoleWithWebIdentity"
|
|
23
|
+
root_cause: |
|
|
24
|
+
When a reusable workflow is called at a specific version tag (e.g.,
|
|
25
|
+
`uses: org/lib/.github/workflows/deploy.yml@v2`), GitHub's OIDC token embeds the called
|
|
26
|
+
workflow's ref in the `sub` claim as `job_workflow_ref`:
|
|
27
|
+
|
|
28
|
+
repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v2.1.0
|
|
29
|
+
|
|
30
|
+
An AWS IAM trust policy that uses a `StringLike` condition pinned to a specific version
|
|
31
|
+
pattern such as `@refs/tags/v2*` accepts this token. However, when the caller upgrades
|
|
32
|
+
the called action from `@v2` to `@v3`, the OIDC token `sub` claim changes to:
|
|
33
|
+
|
|
34
|
+
repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v3.0.0
|
|
35
|
+
|
|
36
|
+
The `@refs/tags/v2*` condition no longer matches `@refs/tags/v3.0.0`, so AWS STS rejects
|
|
37
|
+
the `AssumeRoleWithWebIdentity` call with AccessDenied. The error message is identical to
|
|
38
|
+
any other OIDC trust policy mismatch — nothing in the error output reveals that the version
|
|
39
|
+
tag in the `job_workflow_ref` claim is the cause.
|
|
40
|
+
|
|
41
|
+
This is distinct from the ref→job_workflow_ref format change (permissions-auth-044) which
|
|
42
|
+
occurs when a direct job is refactored into a reusable workflow. Here, the `job_workflow_ref`
|
|
43
|
+
format is already in use and working — it breaks silently only after a version upgrade.
|
|
44
|
+
fix: |
|
|
45
|
+
Update the AWS IAM trust policy `StringLike` condition to accept any version of the called
|
|
46
|
+
workflow, not a pinned version pattern. Use a wildcard that matches across versions.
|
|
47
|
+
|
|
48
|
+
Option A (wildcard on the version suffix — recommended):
|
|
49
|
+
Replace `@refs/tags/v2*` with `@*` or use a broader pattern that includes the full path
|
|
50
|
+
before the version, such as:
|
|
51
|
+
`"repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@*"`
|
|
52
|
+
|
|
53
|
+
Option B (OIDC subject claim customization):
|
|
54
|
+
Use GitHub's OIDC subject claim customization (repo Settings → Actions → General → OIDC
|
|
55
|
+
subject claims) to define a custom sub template that excludes the `job_workflow_ref` field
|
|
56
|
+
or removes the version portion. Then update the trust policy to match the customized sub.
|
|
57
|
+
|
|
58
|
+
Option C (accept both old and new versions):
|
|
59
|
+
Add both version patterns to the IAM trust policy during a transition window:
|
|
60
|
+
`StringLike: ["...deploy.yml@refs/tags/v2*", "...deploy.yml@refs/tags/v3*"]`
|
|
61
|
+
Remove the old pattern after all callers have upgraded.
|
|
62
|
+
fix_code:
|
|
63
|
+
- language: json
|
|
64
|
+
label: "AWS IAM trust policy — version-agnostic StringLike condition (recommended)"
|
|
65
|
+
code: |
|
|
66
|
+
{
|
|
67
|
+
"Version": "2012-10-17",
|
|
68
|
+
"Statement": [
|
|
69
|
+
{
|
|
70
|
+
"Effect": "Allow",
|
|
71
|
+
"Principal": {
|
|
72
|
+
"Federated": "arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com"
|
|
73
|
+
},
|
|
74
|
+
"Action": "sts:AssumeRoleWithWebIdentity",
|
|
75
|
+
"Condition": {
|
|
76
|
+
"StringEquals": {
|
|
77
|
+
"token.actions.githubusercontent.com:aud": "sts.amazonaws.com"
|
|
78
|
+
},
|
|
79
|
+
"StringLike": {
|
|
80
|
+
"token.actions.githubusercontent.com:sub": "repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@*"
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
]
|
|
85
|
+
}
|
|
86
|
+
- language: json
|
|
87
|
+
label: "Multi-version transition — accept both v2 and v3 during upgrade"
|
|
88
|
+
code: |
|
|
89
|
+
{
|
|
90
|
+
"Condition": {
|
|
91
|
+
"StringEquals": {
|
|
92
|
+
"token.actions.githubusercontent.com:aud": "sts.amazonaws.com"
|
|
93
|
+
},
|
|
94
|
+
"StringLike": {
|
|
95
|
+
"token.actions.githubusercontent.com:sub": [
|
|
96
|
+
"repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v2*",
|
|
97
|
+
"repo:ORG/CALLER:job_workflow_ref:org/lib/.github/workflows/deploy.yml@refs/tags/v3*"
|
|
98
|
+
]
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
- language: yaml
|
|
103
|
+
label: "How to decode the actual sub claim from a failing run"
|
|
104
|
+
code: |
|
|
105
|
+
# Add this step before configure-aws-credentials to log the actual sub claim
|
|
106
|
+
- name: Debug OIDC subject claim
|
|
107
|
+
id: debug-oidc
|
|
108
|
+
run: |
|
|
109
|
+
token=$(curl -sS -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
|
|
110
|
+
"${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=sts.amazonaws.com" | jq -r '.value')
|
|
111
|
+
echo "$token" | cut -d. -f2 | base64 -d 2>/dev/null | jq '.sub'
|
|
112
|
+
env:
|
|
113
|
+
ACTIONS_ID_TOKEN_REQUEST_TOKEN: ${{ env.ACTIONS_ID_TOKEN_REQUEST_TOKEN }}
|
|
114
|
+
ACTIONS_ID_TOKEN_REQUEST_URL: ${{ env.ACTIONS_ID_TOKEN_REQUEST_URL }}
|
|
115
|
+
prevention:
|
|
116
|
+
- "Use version-agnostic wildcards in IAM trust policy StringLike conditions (e.g., `@*` suffix) unless the policy intentionally restricts to a specific version"
|
|
117
|
+
- "Treat IAM OIDC trust policies as part of the release checklist — whenever a reusable workflow's major version is bumped, update the trust policy before merging callers"
|
|
118
|
+
- "Use GitHub OIDC subject claim customization to remove the version from the sub claim if version-agnostic trust is always preferred"
|
|
119
|
+
- "Document the expected sub claim format in the reusable workflow README alongside the required IAM trust policy pattern"
|
|
120
|
+
- "Run the sub-claim debug step in a dry-run workflow before updating the IAM trust policy to confirm the exact new sub value"
|
|
121
|
+
docs:
|
|
122
|
+
- url: "https://github.com/aws-actions/configure-aws-credentials/issues/1707"
|
|
123
|
+
label: "aws-actions/configure-aws-credentials#1707 — OIDC AssumeRole fails after action version upgrade"
|
|
124
|
+
- url: "https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/using-openid-connect-with-reusable-workflows"
|
|
125
|
+
label: "GitHub Docs: Using OIDC with reusable workflows"
|
|
126
|
+
- url: "https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect#customizing-the-subject-claims-for-an-organization-or-repository"
|
|
127
|
+
label: "GitHub Docs: Customizing OIDC subject claims"
|
|
128
|
+
- url: "https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_providers_oidc.html"
|
|
129
|
+
label: "AWS Docs: IAM OIDC identity providers"
|
package/errors/permissions-auth/upload-code-coverage-missing-code-quality-write-permission.yml
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
id: permissions-auth-068
|
|
2
|
+
title: 'upload-code-coverage action fails with 403 — missing code-quality:write permission'
|
|
3
|
+
category: permissions-auth
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- permissions
|
|
7
|
+
- code-quality
|
|
8
|
+
- upload-code-coverage
|
|
9
|
+
- github-token
|
|
10
|
+
- 403
|
|
11
|
+
- fine-grained
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'Resource not accessible by integration'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'Upload failed.*403|HTTP 403.*code.coverage|code.coverage.*403'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
- regex: 'code-quality.*write|code_quality.*write'
|
|
18
|
+
flags: 'i'
|
|
19
|
+
error_messages:
|
|
20
|
+
- '{"message":"Resource not accessible by integration","documentation_url":"https://docs.github.com/rest"}'
|
|
21
|
+
- 'Error: Upload failed: HTTP 403 Forbidden'
|
|
22
|
+
- 'HTTP Status: 403'
|
|
23
|
+
root_cause: |
|
|
24
|
+
GitHub's code coverage upload API (introduced May 2026 as part of Code Quality for
|
|
25
|
+
pull requests) requires the new fine-grained permission `code-quality:write` on the
|
|
26
|
+
calling token. The default GITHUB_TOKEN in GitHub Actions workflows has this permission
|
|
27
|
+
set to `none` unless explicitly granted.
|
|
28
|
+
|
|
29
|
+
When `actions/upload-code-coverage` calls the coverage upload API without this
|
|
30
|
+
permission, GitHub returns HTTP 403 "Resource not accessible by integration". Because
|
|
31
|
+
`code-quality:write` is a newly introduced permission class (not present in older
|
|
32
|
+
workflow permission schemas), developers familiar with the standard permissions
|
|
33
|
+
(contents, issues, pull-requests, etc.) don't know to add it.
|
|
34
|
+
|
|
35
|
+
This affects all workflows that do not specify `permissions:` at all (which defaults to
|
|
36
|
+
`read-all` — but `code-quality` is still `none` for new permissions), as well as
|
|
37
|
+
workflows that explicitly set `permissions: {}` or use a restrictive block.
|
|
38
|
+
fix: |
|
|
39
|
+
Add `code-quality: write` to the `permissions` block of the job that runs
|
|
40
|
+
`actions/upload-code-coverage`. This grants the GITHUB_TOKEN the required scope to
|
|
41
|
+
call the code coverage upload API.
|
|
42
|
+
|
|
43
|
+
Note: `code-quality:` is a job-level permission. It cannot be set as a global
|
|
44
|
+
`GITHUB_TOKEN` permission through repository settings — it must be declared in the
|
|
45
|
+
workflow YAML per-job.
|
|
46
|
+
fix_code:
|
|
47
|
+
- language: yaml
|
|
48
|
+
label: 'Add code-quality:write to the coverage upload job'
|
|
49
|
+
code: |
|
|
50
|
+
jobs:
|
|
51
|
+
test:
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
permissions:
|
|
54
|
+
contents: read
|
|
55
|
+
code-quality: write # Required for upload-code-coverage
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
|
|
59
|
+
- name: Run tests and generate coverage
|
|
60
|
+
run: pytest --cov=src --cov-report=xml
|
|
61
|
+
|
|
62
|
+
- name: Upload code coverage
|
|
63
|
+
uses: actions/upload-code-coverage@v1
|
|
64
|
+
with:
|
|
65
|
+
file: coverage.xml
|
|
66
|
+
language: Python
|
|
67
|
+
- language: yaml
|
|
68
|
+
label: 'Minimal permissions block if no others are needed'
|
|
69
|
+
code: |
|
|
70
|
+
jobs:
|
|
71
|
+
coverage:
|
|
72
|
+
runs-on: ubuntu-latest
|
|
73
|
+
permissions:
|
|
74
|
+
code-quality: write
|
|
75
|
+
steps:
|
|
76
|
+
- uses: actions/upload-code-coverage@v1
|
|
77
|
+
with:
|
|
78
|
+
file: cobertura.xml
|
|
79
|
+
language: Java
|
|
80
|
+
label: code-coverage/jacoco
|
|
81
|
+
prevention:
|
|
82
|
+
- "Whenever you add actions/upload-code-coverage to a workflow, immediately add `code-quality: write` to the job's permissions block."
|
|
83
|
+
- "Use a linter or policy-as-code tool (e.g., Poutine, StepSecurity) that validates required permissions against known action requirements."
|
|
84
|
+
- "If your org uses required permissions: {} at the workflow level for security hardening, remember that code-quality: write must still be declared per-job."
|
|
85
|
+
- "Check the GitHub Changelog periodically — new actions introduce new permission classes that aren't reflected in older documentation or IDE auto-complete."
|
|
86
|
+
docs:
|
|
87
|
+
- url: 'https://github.blog/changelog/2026-05-26-code-coverage-in-pull-requests-is-now-in-public-preview/'
|
|
88
|
+
label: 'GitHub Changelog: Code coverage in pull requests (May 26, 2026)'
|
|
89
|
+
- url: 'https://github.com/actions/upload-code-coverage'
|
|
90
|
+
label: 'actions/upload-code-coverage repository'
|
|
91
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/controlling-permissions-for-github_token'
|
|
92
|
+
label: 'Controlling permissions for GITHUB_TOKEN'
|
|
93
|
+
- url: 'https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository'
|
|
94
|
+
label: 'GitHub Actions permissions documentation'
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
id: runner-environment-189
|
|
2
|
+
title: 'ARC Kubernetes container hook fails with "Converting circular structure to JSON" during checkout'
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- arc
|
|
7
|
+
- kubernetes
|
|
8
|
+
- container-hooks
|
|
9
|
+
- checkout
|
|
10
|
+
- circular-json
|
|
11
|
+
- self-hosted
|
|
12
|
+
- custom-container
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Converting circular structure to JSON'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'starting at object with constructor .ClientRequest.'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'Executing the custom container implementation failed'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
- regex: 'property .socket. -> object with constructor .TLSSocket.'
|
|
21
|
+
flags: 'i'
|
|
22
|
+
error_messages:
|
|
23
|
+
- "TypeError: Converting circular structure to JSON\n --> starting at object with constructor 'ClientRequest'\n | property 'socket' -> object with constructor 'TLSSocket'\n --- property '_httpMessage' closes the circle"
|
|
24
|
+
- "Error: Executing the custom container implementation failed. Please contact your self hosted runner administrator."
|
|
25
|
+
- "Error: Process completed with exit code 1."
|
|
26
|
+
root_cause: |
|
|
27
|
+
Actions Runner Controller (ARC) on Kubernetes uses Node.js container-hooks to manage
|
|
28
|
+
the lifecycle of job container pods. During container startup, ARC makes HTTP requests
|
|
29
|
+
to the Kubernetes API server to create or wait for pods. When a network request fails
|
|
30
|
+
mid-connection — due to Kubernetes API server latency, pod scheduling delays, or transient
|
|
31
|
+
network errors — the error handler in the container hook attempts to serialize the failing
|
|
32
|
+
Node.js http.ClientRequest object into JSON for logging or error reporting.
|
|
33
|
+
|
|
34
|
+
Node.js http.ClientRequest objects contain circular references:
|
|
35
|
+
ClientRequest → socket (TLSSocket) → _httpMessage → ClientRequest
|
|
36
|
+
|
|
37
|
+
JSON.stringify() cannot handle circular structures and throws:
|
|
38
|
+
"TypeError: Converting circular structure to JSON"
|
|
39
|
+
|
|
40
|
+
This error surfaces during the checkout step (or another early step) because the container
|
|
41
|
+
hook failure terminates the entire job. The error is not in actions/checkout itself — it is
|
|
42
|
+
in the ARC container-hooks layer executing underneath. The message
|
|
43
|
+
"Executing the custom container implementation failed" confirms the failure is in the
|
|
44
|
+
container hook, not the action code.
|
|
45
|
+
|
|
46
|
+
This is most commonly triggered by:
|
|
47
|
+
- Kubernetes API server under high load (slow pod scheduling acknowledgement)
|
|
48
|
+
- ARC container-hooks < v0.8.1 which lacks circular-reference guarding in error serialization
|
|
49
|
+
- Network timeouts between ARC runner pod and Kubernetes API during container create
|
|
50
|
+
- Combined ARC v2.332.0 + container-hooks v0.8.0 regression (see actions/runner#4302)
|
|
51
|
+
fix: |
|
|
52
|
+
Upgrade ARC and container-hooks to versions that guard against circular reference
|
|
53
|
+
serialization. The actions/runner-controller v0.9.x chart pins container-hooks
|
|
54
|
+
v0.8.1+ which patches the circular JSON error.
|
|
55
|
+
|
|
56
|
+
If on ARC v2.332.0, also upgrade the runner image tag — that release combined a
|
|
57
|
+
base image change (Ubuntu 22.04 → 24.04) with a container-hooks version that
|
|
58
|
+
had this regression.
|
|
59
|
+
|
|
60
|
+
As a diagnostic step, check if the Kubernetes API server is under pressure — slow
|
|
61
|
+
pod acknowledgements cause the HTTP client to timeout in ways that expose this code path.
|
|
62
|
+
fix_code:
|
|
63
|
+
- language: yaml
|
|
64
|
+
label: 'Pin ARC to a release with container-hooks v0.8.1+ fix'
|
|
65
|
+
code: |
|
|
66
|
+
# In your ARC AutoscalingRunnerSet Helm values:
|
|
67
|
+
# Upgrade to controller + runner image that includes container-hooks >= v0.8.1
|
|
68
|
+
# Check releases at https://github.com/actions/actions-runner-controller/releases
|
|
69
|
+
template:
|
|
70
|
+
spec:
|
|
71
|
+
containers:
|
|
72
|
+
- name: runner
|
|
73
|
+
# Use a runner image tag that ships container-hooks >= v0.8.1
|
|
74
|
+
image: ghcr.io/actions/actions-runner:latest
|
|
75
|
+
- language: yaml
|
|
76
|
+
label: 'Increase pod startup timeout to reduce API server pressure race'
|
|
77
|
+
code: |
|
|
78
|
+
# helm values — increase pod startup grace period to reduce
|
|
79
|
+
# Kubernetes API timeout racing against container hook
|
|
80
|
+
template:
|
|
81
|
+
spec:
|
|
82
|
+
terminationGracePeriodSeconds: 3600
|
|
83
|
+
containers:
|
|
84
|
+
- name: runner
|
|
85
|
+
resources:
|
|
86
|
+
requests:
|
|
87
|
+
memory: '2Gi'
|
|
88
|
+
cpu: '500m'
|
|
89
|
+
prevention:
|
|
90
|
+
- 'Keep ARC runner controller and container-hooks up to date — circular JSON fixes land in patch releases'
|
|
91
|
+
- 'Monitor Kubernetes API server latency; high API server load increases the probability of this race condition'
|
|
92
|
+
- 'Pin to a specific ARC chart version in staging before rolling out to production runners'
|
|
93
|
+
- 'Check actions/runner#4302 for the specific v2.332.0 regression if pinned to that release'
|
|
94
|
+
- 'Set appropriate resource requests/limits so pod scheduling completes quickly and avoids API timeout races'
|
|
95
|
+
docs:
|
|
96
|
+
- url: 'https://github.com/actions/checkout/issues/2056'
|
|
97
|
+
label: 'actions/checkout#2056: TypeError: Converting circular structure to JSON'
|
|
98
|
+
- url: 'https://github.com/actions/runner/issues/4302'
|
|
99
|
+
label: 'actions/runner#4302: ARC v2.332.0 container hook UID / permission regression'
|
|
100
|
+
- url: 'https://github.com/actions/actions-runner-controller'
|
|
101
|
+
label: 'Actions Runner Controller (ARC) — GitHub repository'
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
id: runner-environment-196
|
|
2
|
+
title: 'actions/cache restore silently crashes Windows runner — job jumps to Post cleanup with no error'
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- cache
|
|
7
|
+
- windows
|
|
8
|
+
- crash
|
|
9
|
+
- silent-failure
|
|
10
|
+
- cargo
|
|
11
|
+
- large-cache
|
|
12
|
+
- post-cleanup
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'Cache hit for:.*\n(?:.*\n){0,3}Post job cleanup'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'Cache hit for:[\s\S]{0,200}Post job cleanup'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
- regex: 'Cache up-to-date\.\s*\(node:\d+\) \[DEP0040\] DeprecationWarning.*punycode'
|
|
19
|
+
flags: 'i'
|
|
20
|
+
error_messages:
|
|
21
|
+
- 'Cache hit for: [key]'
|
|
22
|
+
- 'Post job cleanup.'
|
|
23
|
+
- 'Cache up-to-date.'
|
|
24
|
+
root_cause: |
|
|
25
|
+
On Windows GitHub-hosted runners, actions/cache@v5 can silently crash the Node.js
|
|
26
|
+
runner process during cache restore when extracting very large cache archives (multi-GB
|
|
27
|
+
caches, e.g. Rust/Cargo registry + cache, large Maven/Gradle dependency trees).
|
|
28
|
+
|
|
29
|
+
The failure manifests as the job jumping directly from "Cache hit for: [key]" to
|
|
30
|
+
"Post job cleanup." with no intervening restore log lines and no error message.
|
|
31
|
+
The step exits with code 0 (success), but the cache was never extracted. Subsequent
|
|
32
|
+
build steps fail with missing dependency errors (e.g. "error: no such file or directory:
|
|
33
|
+
~/.cargo/registry") rather than a cache-related error, making the root cause opaque.
|
|
34
|
+
|
|
35
|
+
The log sequence for affected runs:
|
|
36
|
+
1. "Cache hit for: [cache-key]" (restore begins)
|
|
37
|
+
2. [no tar extraction log lines]
|
|
38
|
+
3. "Post job cleanup." (job finishes or runner crashes)
|
|
39
|
+
4. "Cache up-to-date."
|
|
40
|
+
5. "(node:XXXX) [DEP0040] DeprecationWarning: The `punycode` module is deprecated"
|
|
41
|
+
6. "Post job cleanup."
|
|
42
|
+
|
|
43
|
+
Root cause analysis: The Windows runner process (Runner.Worker.exe) terminates
|
|
44
|
+
abnormally during tar/zstd decompression of the cache archive. This appears to be a
|
|
45
|
+
memory-related crash (similar to the Windows heap corruption pattern in upload-artifact,
|
|
46
|
+
tracked in toolkit#2406) triggered by the high memory pressure of decompressing large
|
|
47
|
+
archives within the Node.js 20 heap on Windows runners as of May 2026.
|
|
48
|
+
|
|
49
|
+
The crash is non-deterministic (intermittent) — the same cache key may restore
|
|
50
|
+
successfully on retry. Affected cache sizes are typically 1 GB+ uncompressed.
|
|
51
|
+
Rust Cargo caches (registry/index + registry/cache + git/db) are the most commonly
|
|
52
|
+
reported trigger.
|
|
53
|
+
|
|
54
|
+
Source: actions/cache#1754 (May 2026, Windows runner, Cargo cache).
|
|
55
|
+
fix: |
|
|
56
|
+
Short-term workaround: Add `continue-on-error: true` to the cache restore step.
|
|
57
|
+
The job will proceed to the build step which will then reinstall dependencies from
|
|
58
|
+
scratch. The build takes longer but completes reliably.
|
|
59
|
+
|
|
60
|
+
Preferred workaround: Split the cache into smaller chunks. Rust/Cargo caches can be
|
|
61
|
+
split by caching registry/index, registry/cache, and git/db in separate cache steps
|
|
62
|
+
with different keys, keeping each archive under ~500 MB.
|
|
63
|
+
|
|
64
|
+
Alternative: Use sccache or a remote cache (e.g. Cloudflare R2 + sccache) instead of
|
|
65
|
+
actions/cache for Rust builds on Windows — this avoids large local archives entirely.
|
|
66
|
+
|
|
67
|
+
Long-term: Track actions/cache#1754 for an upstream fix. Adding
|
|
68
|
+
`ACTIONS_STEP_DEBUG: true` as a repository secret may reveal the crash signal in
|
|
69
|
+
verbose runner logs.
|
|
70
|
+
fix_code:
|
|
71
|
+
- language: yaml
|
|
72
|
+
label: 'Short-term: continue-on-error to prevent job failure on crash'
|
|
73
|
+
code: |
|
|
74
|
+
- name: Restore Cargo cache
|
|
75
|
+
uses: actions/cache@v5
|
|
76
|
+
continue-on-error: true # Job proceeds even if cache restore crashes
|
|
77
|
+
with:
|
|
78
|
+
path: |
|
|
79
|
+
~/.cargo/registry/index/
|
|
80
|
+
~/.cargo/registry/cache/
|
|
81
|
+
~/.cargo/git/db/
|
|
82
|
+
target/
|
|
83
|
+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
|
84
|
+
restore-keys: ${{ runner.os }}-cargo-
|
|
85
|
+
|
|
86
|
+
- language: yaml
|
|
87
|
+
label: 'Split large Cargo cache into smaller chunks to avoid crash threshold'
|
|
88
|
+
code: |
|
|
89
|
+
- name: Restore Cargo registry index (small, fast)
|
|
90
|
+
uses: actions/cache@v5
|
|
91
|
+
with:
|
|
92
|
+
path: ~/.cargo/registry/index/
|
|
93
|
+
key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
|
|
94
|
+
restore-keys: ${{ runner.os }}-cargo-index-
|
|
95
|
+
|
|
96
|
+
- name: Restore Cargo registry cache (large packages)
|
|
97
|
+
uses: actions/cache@v5
|
|
98
|
+
continue-on-error: true
|
|
99
|
+
with:
|
|
100
|
+
path: ~/.cargo/registry/cache/
|
|
101
|
+
key: ${{ runner.os }}-cargo-cache-${{ hashFiles('**/Cargo.lock') }}
|
|
102
|
+
restore-keys: ${{ runner.os }}-cargo-cache-
|
|
103
|
+
|
|
104
|
+
- name: Restore Cargo git sources
|
|
105
|
+
uses: actions/cache@v5
|
|
106
|
+
continue-on-error: true
|
|
107
|
+
with:
|
|
108
|
+
path: ~/.cargo/git/db/
|
|
109
|
+
key: ${{ runner.os }}-cargo-git-${{ hashFiles('**/Cargo.lock') }}
|
|
110
|
+
|
|
111
|
+
- name: Restore build target dir
|
|
112
|
+
uses: actions/cache@v5
|
|
113
|
+
continue-on-error: true
|
|
114
|
+
with:
|
|
115
|
+
path: target/
|
|
116
|
+
key: ${{ runner.os }}-cargo-target-${{ hashFiles('**/Cargo.lock') }}
|
|
117
|
+
|
|
118
|
+
prevention:
|
|
119
|
+
- 'Keep individual cache archives under ~500 MB by splitting large dependency trees (Cargo, Maven, Gradle) into multiple cache steps'
|
|
120
|
+
- 'Add continue-on-error: true to cache restore steps on Windows runners as a safety net for intermittent crashes'
|
|
121
|
+
- 'Monitor workflow durations — a sudden increase in Windows build time (cache miss equivalent) with no cache-related error in logs is a symptom of this crash'
|
|
122
|
+
- 'For Rust/Cargo on Windows runners, consider sccache with a remote backend to avoid large local cache archives entirely'
|
|
123
|
+
- 'Enable ACTIONS_STEP_DEBUG=true (as repository secret) to capture runner-level crash signals when this failure is suspected'
|
|
124
|
+
docs:
|
|
125
|
+
- url: 'https://github.com/actions/cache/issues/1754'
|
|
126
|
+
label: 'actions/cache#1754 — Windows runner randomly dies during cache restore (May 2026)'
|
|
127
|
+
- url: 'https://github.com/actions/cache#tips-for-using-cache'
|
|
128
|
+
label: 'actions/cache — usage tips and cache size guidance'
|
|
129
|
+
- url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows'
|
|
130
|
+
label: 'Caching dependencies — limits and best practices'
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
id: runner-environment-190
|
|
2
|
+
title: 'Git 2.48.0 silently stops fetching tags with fetch-tags: true on non-depth-1 shallow clones'
|
|
3
|
+
category: runner-environment
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- git-version
|
|
7
|
+
- fetch-tags
|
|
8
|
+
- shallow-clone
|
|
9
|
+
- fetch-depth
|
|
10
|
+
- ubuntu-24.04
|
|
11
|
+
- regression
|
|
12
|
+
- checkout
|
|
13
|
+
patterns:
|
|
14
|
+
- regex: 'git version 2\.48\.'
|
|
15
|
+
flags: 'i'
|
|
16
|
+
- regex: 'fetch-tags.*fetch-depth|fetch-depth.*fetch-tags'
|
|
17
|
+
flags: 'i'
|
|
18
|
+
error_messages:
|
|
19
|
+
- "# No error — tags are silently absent after checkout with fetch-tags: true and fetch-depth: N on git 2.48.0"
|
|
20
|
+
- "fatal: No names found, cannot describe anything."
|
|
21
|
+
- "fatal: not a tag 'HEAD'"
|
|
22
|
+
root_cause: |
|
|
23
|
+
Git 2.48.0 introduced a change in how `git fetch --depth=N` handles tag following for
|
|
24
|
+
direct refspec fetches. In git ≤ 2.47.x, when actions/checkout ran:
|
|
25
|
+
|
|
26
|
+
git fetch --depth=N origin +<sha>:refs/remotes/origin/<branch>
|
|
27
|
+
|
|
28
|
+
git would automatically follow tags reachable within the depth window — any tag pointing
|
|
29
|
+
to a commit within the fetched depth was included. This is known as automatic tag following.
|
|
30
|
+
|
|
31
|
+
Starting with git 2.48.0, automatic tag following is suppressed for direct refspec fetches
|
|
32
|
+
with `--depth`. Only the explicitly requested ref is fetched; no tags are included even if
|
|
33
|
+
they point to commits within the shallow clone window. The fetch log shows only the branch
|
|
34
|
+
ref fetched — no tag lines appear.
|
|
35
|
+
|
|
36
|
+
Result: `fetch-tags: true` combined with `fetch-depth: N` (where N > 1, such as 100, 383, 500)
|
|
37
|
+
silently returns no tags on runner images shipping git 2.48.0. The workflow log shows no error
|
|
38
|
+
and no warning — `git tag -l` returns empty. Downstream steps using git describe, semantic-release,
|
|
39
|
+
helm chart versioning, or any tool that reads git tags break with "no names found" or
|
|
40
|
+
"not a tag 'HEAD'" errors.
|
|
41
|
+
|
|
42
|
+
This regression first appeared when ubuntu-24.04 runner image updated from 20250105.1.0 to
|
|
43
|
+
20250113.1.0 (which shipped git 2.48.0). The issue was resolved in runner image 20250117.1.0+
|
|
44
|
+
when git was updated to 2.48.1 which patched the regression. Self-hosted runners running
|
|
45
|
+
git 2.48.0 remain affected.
|
|
46
|
+
|
|
47
|
+
Note: This is distinct from the existing known silent failure where fetch-depth: 1 silently
|
|
48
|
+
fetches no tags regardless of git version. That is expected shallow-clone behavior. This
|
|
49
|
+
regression affects fetch-depth: N > 1 scenarios that previously worked.
|
|
50
|
+
fix: |
|
|
51
|
+
Use `fetch-depth: 0` when git tags are required. A full clone fetches all history and all
|
|
52
|
+
tags regardless of git version. This is the most reliable fix.
|
|
53
|
+
|
|
54
|
+
For large repositories where a full clone is too slow, add a separate fetch --tags step
|
|
55
|
+
immediately after checkout to explicitly fetch all tag objects:
|
|
56
|
+
|
|
57
|
+
git fetch --tags --force
|
|
58
|
+
|
|
59
|
+
Self-hosted runners on git 2.48.0 should upgrade to git 2.48.1 or later which patches
|
|
60
|
+
the tag following regression.
|
|
61
|
+
fix_code:
|
|
62
|
+
- language: yaml
|
|
63
|
+
label: 'Use fetch-depth: 0 for reliable tag fetching (recommended)'
|
|
64
|
+
code: |
|
|
65
|
+
- name: Checkout with full history and all tags
|
|
66
|
+
uses: actions/checkout@v4
|
|
67
|
+
with:
|
|
68
|
+
# fetch-depth: 0 always fetches all commits and tags regardless of git version
|
|
69
|
+
fetch-depth: 0
|
|
70
|
+
- language: yaml
|
|
71
|
+
label: 'Add explicit git fetch --tags step after shallow checkout'
|
|
72
|
+
code: |
|
|
73
|
+
- name: Checkout (shallow)
|
|
74
|
+
uses: actions/checkout@v4
|
|
75
|
+
with:
|
|
76
|
+
fetch-depth: 100
|
|
77
|
+
# fetch-tags: true is unreliable on git 2.48.0 — use explicit fetch instead
|
|
78
|
+
|
|
79
|
+
- name: Fetch tags explicitly (git-version-safe)
|
|
80
|
+
run: git fetch --tags --force
|
|
81
|
+
- language: yaml
|
|
82
|
+
label: 'Check git version in CI for debugging'
|
|
83
|
+
code: |
|
|
84
|
+
- name: Debug git version and tags
|
|
85
|
+
run: |
|
|
86
|
+
git --version
|
|
87
|
+
git tag -l | head -20
|
|
88
|
+
git describe --tags --always || echo "No reachable tags"
|
|
89
|
+
prevention:
|
|
90
|
+
- 'Always use fetch-depth: 0 when git tags are required by downstream steps like git describe or semantic-release'
|
|
91
|
+
- 'Add a git tag -l debug step after checkout to verify tags are present before release tooling runs'
|
|
92
|
+
- 'For self-hosted runners, prefer git 2.48.1+ over 2.48.0 — the regression was patched in 2.48.1'
|
|
93
|
+
- 'Pin to fetch-depth: 0 in release workflows — the performance cost of a full clone is worth the reliability'
|
|
94
|
+
docs:
|
|
95
|
+
- url: 'https://github.com/actions/checkout/issues/2041'
|
|
96
|
+
label: 'actions/checkout#2041: Tags no longer fetch with Git v2.48.0'
|
|
97
|
+
- url: 'https://github.com/actions/checkout#usage'
|
|
98
|
+
label: 'actions/checkout — fetch-depth and fetch-tags input documentation'
|
|
99
|
+
- url: 'https://git-scm.com/docs/git-fetch#_description'
|
|
100
|
+
label: 'git fetch documentation — tag following with --depth'
|