npm - @htekdev/actions-debugger - Versions diffs - 1.0.113 → 1.0.115 - Mend

@htekdev/actions-debugger 1.0.113 → 1.0.115

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/errors/known-unsolved/job-outputs-string-only-no-array-object.yml ADDED Viewed

@@ -0,0 +1,142 @@
+id: known-unsolved-060
+title: 'Job outputs are strings only — arrays, objects, and booleans must be manually JSON-serialized'
+category: known-unsolved
+severity: limitation
+tags:
+  - job-outputs
+  - outputs
+  - string
+  - fromJSON
+  - array
+  - known-limitation
+  - GITHUB_OUTPUT
+patterns:
+  - regex: 'GITHUB_OUTPUT'
+    flags: 'i'
+  - regex: 'needs\.[a-z_-]+\.outputs\.'
+    flags: 'i'
+  - regex: 'fromJSON\s*\('
+    flags: 'i'
+error_messages:
+  - '(no runtime error — arrays or objects written to GITHUB_OUTPUT are silently converted to strings like "Array" or "[object Object]" if not JSON-serialized before writing)'
+root_cause: |
+  GitHub Actions job outputs use the GITHUB_OUTPUT file protocol which only supports
+  string key-value pairs. There is no native type system for job outputs — every
+  value passed through GITHUB_OUTPUT is stored and transmitted as a plain string,
+  regardless of the producing language or shell.
+  This becomes a problem in several common scenarios:
+  1. Passing arrays: A bash array or newline-separated list written to GITHUB_OUTPUT
+     becomes a space-separated string or a literal "[array]" — not a JSON array.
+     Downstream matrix.include: fromJSON() fails or produces a single-element matrix.
+  2. Passing objects: A JSON object must be written as a compact single-line string.
+     Multi-line JSON written to GITHUB_OUTPUT breaks the key=value line format and
+     corrupts the output, causing a parse error or silently reading only the first
+     line as the value.
+  3. Boolean comparisons: Outputs written as "true" or "false" are strings, not
+     booleans. Comparing ${{ needs.x.outputs.flag == true }} (boolean literal)
+     silently evaluates to false; only ${{ needs.x.outputs.flag == 'true' }} (string)
+     or ${{ fromJSON(needs.x.outputs.flag) }} works correctly.
+  This is a known platform limitation. The GitHub Actions team has acknowledged the
+  string-only constraint in multiple community discussions. There is no native typed
+  output support on the public roadmap as of mid-2026, and no timeline has been given
+  for adding array or object output types.
+fix: |
+  Serialize arrays and objects to compact single-line JSON before writing to
+  GITHUB_OUTPUT, then deserialize with fromJSON() in consuming jobs.
+  Key rule: use jq -c to produce compact (one-line) JSON — multi-line JSON
+  embedded in GITHUB_OUTPUT silently truncates at the first newline.
+  For booleans: write the literal string "true"/"false" and compare with == 'true'
+  in if: conditions, or wrap with fromJSON() to get a native boolean for contexts
+  that require one (e.g., matrix include).
+fix_code:
+  - language: yaml
+    label: 'Write and consume a JSON array as a job output'
+    code: |
+      jobs:
+        generate-matrix:
+          runs-on: ubuntu-latest
+          outputs:
+            targets: ${{ steps.set-matrix.outputs.targets }}
+          steps:
+            - id: set-matrix
+              run: |
+                # Use jq -c for compact single-line JSON — multi-line breaks GITHUB_OUTPUT
+                TARGETS=$(echo '[{"env":"staging"},{"env":"prod"}]' | jq -c .)
+                echo "targets=$TARGETS" >> "$GITHUB_OUTPUT"
+        deploy:
+          needs: generate-matrix
+          strategy:
+            matrix:
+              target: ${{ fromJSON(needs.generate-matrix.outputs.targets) }}
+          runs-on: ubuntu-latest
+          steps:
+            - run: echo "Deploying to ${{ matrix.target.env }}"
+  - language: yaml
+    label: 'Boolean output — write as string, compare correctly downstream'
+    code: |
+      jobs:
+        check-changes:
+          runs-on: ubuntu-latest
+          outputs:
+            has_changes: ${{ steps.diff.outputs.has_changes }}
+          steps:
+            - id: diff
+              run: |
+                if git diff --quiet HEAD~1; then
+                  echo "has_changes=false" >> "$GITHUB_OUTPUT"
+                else
+                  echo "has_changes=true" >> "$GITHUB_OUTPUT"
+                fi
+        build:
+          needs: check-changes
+          # Compare as string literal — NOT: == true (boolean comparison silently fails)
+          if: needs.check-changes.outputs.has_changes == 'true'
+          runs-on: ubuntu-latest
+          steps:
+            - run: make build
+  - language: yaml
+    label: 'Multi-line JSON — always compact with jq -c before writing'
+    code: |
+      jobs:
+        gather-config:
+          runs-on: ubuntu-latest
+          outputs:
+            config: ${{ steps.read-config.outputs.config }}
+          steps:
+            - uses: actions/checkout@v4
+            - id: read-config
+              run: |
+                # jq -c converts any JSON (even pretty-printed) to a single compact line
+                CONFIG=$(cat deploy-config.json | jq -c .)
+                echo "config=$CONFIG" >> "$GITHUB_OUTPUT"
+        deploy:
+          needs: gather-config
+          runs-on: ubuntu-latest
+          steps:
+            - run: |
+                CONFIG='${{ needs.gather-config.outputs.config }}'
+                echo "Region: $(echo "$CONFIG" | jq -r .region)"
+prevention:
+  - 'Always pipe JSON values through jq -c before writing to GITHUB_OUTPUT — compact single-line format prevents silent value truncation at embedded newlines'
+  - 'Compare boolean-string outputs with == ''true'' (string) not == true (boolean) in if: conditions; or use fromJSON() to convert to a native boolean'
+  - 'Never write a raw bash array to GITHUB_OUTPUT — it expands to a space-separated string; always serialize through jq -c first'
+  - 'Add a debug step that prints the raw output value using cat $GITHUB_OUTPUT before a consuming job runs, to confirm the serialized form looks correct'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/passing-information-between-jobs'
+    label: 'GitHub Docs: Passing information between jobs'
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-output-parameter'
+    label: 'GitHub Docs: Setting an output parameter (GITHUB_OUTPUT)'
+  - url: 'https://github.com/orgs/community/discussions/17245'
+    label: 'GitHub Community: Job outputs only support strings — feature request for typed outputs'
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#fromjson'
+    label: 'GitHub Docs: fromJSON expression function'

package/errors/known-unsolved/known-unsolved-062.yml ADDED Viewed

@@ -0,0 +1,87 @@
+id: known-unsolved-062
+title: 'workflow_run chains are limited to one level — a workflow_run-triggered workflow cannot trigger another downstream workflow via workflow_run'
+category: known-unsolved
+severity: limitation
+tags:
+  - workflow-run
+  - chaining
+  - pipeline
+  - limitation
+  - no-fix
+  - event-trigger
+patterns:
+  - regex: 'on:\s*\n\s+workflow_run:'
+    flags: 'i'
+error_messages: []
+root_cause: |
+  GitHub Actions explicitly prevents workflow_run events from chaining more than one
+  level deep. A workflow triggered by workflow_run CANNOT itself use workflow_run as
+  an on: trigger to fire a third downstream workflow.
+  From GitHub documentation: "A workflow triggered by a workflow_run event can only
+  be triggered by a workflow that is not itself triggered by a workflow_run event."
+  This restriction prevents infinite trigger loops but also prevents building linear
+  CI/CD pipelines using workflow_run chaining alone. Multi-stage pipelines of the form
+  Build (push) → Test (workflow_run) → Deploy (workflow_run) → Notify (workflow_run)
+  fail silently at the second hop: the Deploy and Notify workflows never appear in the
+  Actions tab and no error is raised anywhere.
+  There is no runtime error, no annotation, and no warning. The on: workflow_run
+  trigger on the downstream workflow is simply never evaluated.
+fix: |
+  Replace the second-hop workflow_run trigger with an explicit dispatch from the
+  first downstream workflow:
+  Option 1 — repository_dispatch: use the GitHub REST API from a job step in workflow
+  B to POST to /repos/{owner}/{repo}/dispatches with a custom event_type. Workflow C
+  listens on on: repository_dispatch with a matching types: filter.
+  Option 2 — workflow_dispatch: use gh workflow run from a step in workflow B to
+  directly trigger workflow C by filename. Requires a GitHub token with actions:write.
+  Option 3 — Consolidate: merge the second and third workflows into a single workflow
+  with job dependencies (needs:) eliminating the cross-workflow hop entirely.
+fix_code:
+  - language: yaml
+    label: 'Does NOT work: workflow_run cannot chain more than one level deep'
+    code: |
+      # Workflow C — this trigger is never evaluated when Workflow B is
+      # itself triggered by workflow_run
+      on:
+        workflow_run:
+          workflows: ["B - Integration Tests"]
+          types: [completed]
+  - language: yaml
+    label: 'Fix: dispatch workflow C via repository_dispatch from workflow B'
+    code: |
+      # In workflow B (intermediate workflow, triggered by workflow_run):
+      jobs:
+        dispatch-downstream:
+          runs-on: ubuntu-latest
+          if: ${{ github.event.workflow_run.conclusion == 'success' }}
+          steps:
+            - name: Trigger workflow C via repository_dispatch
+              run: |
+                gh api repos/${{ github.repository }}/dispatches \
+                  --method POST \
+                  --field event_type=run-deploy \
+                  --field client_payload='{"run_id":"${{ github.event.workflow_run.id }}"}'
+              env:
+                GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      # In workflow C:
+      on:
+        repository_dispatch:
+          types: [run-deploy]
+prevention:
+  - 'Design CI/CD pipelines assuming workflow_run allows only one hop; use repository_dispatch or workflow_dispatch for any second-level chaining'
+  - 'Prefer consolidating multi-stage pipelines into a single workflow with job dependencies (needs:) when the stages always execute in sequence'
+  - 'When a downstream workflow never appears in the Actions tab after merging, verify whether its on: trigger is workflow_run and whether its upstream workflow is also workflow_run-triggered'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_run'
+    label: 'GitHub Docs: workflow_run event — one-level-deep restriction'
+  - url: 'https://docs.github.com/en/rest/repos/repos#create-a-repository-dispatch-event'
+    label: 'GitHub REST API: Create a repository dispatch event'
+  - url: 'https://cli.github.com/manual/gh_workflow_run'
+    label: 'GitHub CLI: gh workflow run (alternative dispatch method)'

package/errors/known-unsolved/runner-rest-api-busy-false-broker-state-desync.yml ADDED Viewed

@@ -0,0 +1,102 @@
+id: known-unsolved-063
+title: 'REST API reports runner busy:false while broker shows runner actively executing a job'
+category: known-unsolved
+severity: silent-failure
+tags:
+  - self-hosted
+  - runner
+  - autoscaler
+  - rest-api
+  - broker
+  - state-desync
+  - v2-flow
+  - job-killed
+patterns:
+  - regex: 'busy.*false.*runner.*executing|runner.*busy.*false.*job'
+    flags: 'i'
+  - regex: '"busy"\s*:\s*false'
+    flags: 'i'
+error_messages:
+  - '"busy": false'
+  - 'GET /repos/{owner}/{repo}/actions/runners/{id} → {"busy": false}'
+root_cause: |
+  On non-ephemeral self-hosted runners using the V2 broker flow
+  (`broker.actions.githubusercontent.com`), a state desynchronization exists between
+  the broker service and the GitHub REST API:
+  - The broker correctly tracks runner state in real-time: after picking up Job B, the
+    runner reports `JobState: Busy` to the broker and renews its job lease every 60s.
+  - However, `GET /repos/{owner}/{repo}/actions/runners/{runner_id}` (the public REST
+    API) continues to return `"busy": false` during the early phase of job execution.
+    The REST API state may only update after the runner's next periodic sync, which
+    can lag 30–120 seconds behind the broker state.
+  Auto-scaling tools that rely on the REST API to identify idle runners (e.g.,
+  `github-aws-runners/terraform-aws-github-runner`, KEDA GitHub Actions scaler,
+  custom Lambda/CloudFunction scalers) interpret `busy: false` as "runner is idle and
+  safe to terminate." This causes the autoscaler to terminate an EC2/GCE/Azure instance
+  mid-job — killing the runner process with no Actions-level error and marking the job
+  as failed with a runner disconnection error.
+  From the affected job's perspective, the log ends mid-step with "The runner has
+  received a shutdown signal" or the job times out. There is no annotation indicating
+  the root cause was an autoscaler decision based on stale REST API data.
+  No GitHub-side fix is available as of June 2026. The REST API does not expose a
+  real-time busy status consistent with the broker. Open at actions/runner#4422.
+fix: |
+  There is no complete fix — this is a known state inconsistency in the GitHub platform.
+  Workarounds (choose one based on your autoscaling setup):
+  1. **Switch to ephemeral JIT runners (recommended)**: Use JIT tokens and terminate
+     runners after exactly one job. There is no window for autoscalers to misidentify
+     a running job as idle because the runner is registered and deregistered atomically.
+  2. **Add a grace period before termination**: When your autoscaler sees `busy: false`,
+     wait 2–3 minutes and re-poll before actually terminating. This covers the lag
+     between broker state and REST API state.
+  3. **Poll job status instead of runner status**: Use
+     `GET /repos/{owner}/{repo}/actions/runs` to check for `in_progress` workflow runs
+     before terminating any runner, rather than relying on per-runner `busy` status.
+  4. **Use runner labels + job assignment**: If your autoscaler assigns specific runners
+     to specific jobs via labels, you can cross-reference queued/in-progress job
+     assignments against runner IDs before terminating.
+fix_code:
+  - language: yaml
+    label: 'Example: Switch to ephemeral JIT runners (removes the desync window entirely)'
+    code: |
+      # Use JIT runner registration in your autoscaler
+      # Each runner handles exactly one job — busy/idle desync cannot occur
+      # See: https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions#using-just-in-time-runners
+      # In your autoscaler provisioning logic:
+      #   POST /repos/{owner}/{repo}/actions/runners/generate-jit-config
+      #   → Use the returned jit_config to start an ephemeral runner
+      #   → Runner auto-deregisters after job completes — no stale REST state possible
+  - language: yaml
+    label: 'Example: Grace period before termination (Terraform-style pseudocode)'
+    code: |
+      # In your autoscaler Lambda/script, before terminating an instance:
+      # 1. GET /repos/{owner}/{repo}/actions/runners/{runner_id}
+      # 2. If busy == false, wait 2 minutes
+      # 3. Re-poll: GET /repos/{owner}/{repo}/actions/runners/{runner_id}
+      # 4. Only terminate if STILL busy == false after the grace period
+      # This covers the broker→REST lag window (~30-120s observed in practice)
+prevention:
+  - "Prefer ephemeral JIT runners for any workload where mid-job termination would be costly; the broker-REST desync window is zero for single-job-per-runner setups."
+  - "Never terminate a runner instance based solely on a single REST API `busy: false` reading — always double-check with a grace period or secondary signal."
+  - "Monitor for jobs that end with 'runner has received a shutdown signal' — this is a reliable indicator that a runner was terminated externally mid-job."
+  - "If using terraform-aws-github-runner or similar, check whether the tool version has built-in grace periods for the busy-state lag."
+docs:
+  - url: 'https://github.com/actions/runner/issues/4422'
+    label: 'actions/runner#4422 — /runners REST API reports busy:false for active runner'
+  - url: 'https://docs.github.com/en/rest/actions/self-hosted-runners'
+    label: 'REST API: Self-hosted runners'
+  - url: 'https://docs.github.com/en/actions/security-for-github-actions/security-guides/security-hardening-for-github-actions#using-just-in-time-runners'
+    label: 'Just-in-time runners documentation'
+  - url: 'https://github.com/github-aws-runners/terraform-aws-github-runner'
+    label: 'terraform-aws-github-runner (commonly affected autoscaler)'

package/errors/permissions-auth/oidc-immutable-sub-claim-new-repo-trust-policy-mismatch.yml ADDED Viewed

@@ -0,0 +1,122 @@
+id: permissions-auth-067
+title: "OIDC immutable sub claim format for new repos breaks cloud trust policies"
+category: permissions-auth
+severity: error
+tags:
+  - oidc
+  - sub-claim
+  - aws
+  - azure
+  - gcp
+  - immutable
+  - trust-policy
+  - new-repo
+patterns:
+  - regex: 'Not authorized to perform sts:AssumeRoleWithWebIdentity'
+    flags: 'i'
+  - regex: 'AccessDenied.*AssumeRoleWithWebIdentity'
+    flags: 'i'
+  - regex: 'WorkloadIdentityPool.*rejected|token validation failed'
+    flags: 'i'
+  - regex: 'Credentials could not be loaded.*Could not load credentials from any providers'
+    flags: 'i'
+error_messages:
+  - "Not authorized to perform sts:AssumeRoleWithWebIdentity"
+  - "Error: Credentials could not be loaded, please check your action inputs: Could not load credentials from any providers"
+  - "AuthorizationError: Token validation failed: subject claim does not match trust policy"
+  - "WorkloadIdentityPool: token was rejected: sub does not match condition"
+root_cause: |
+  Starting June 18, 2026, GitHub automatically applies a new **immutable subject
+  claim format** to all OIDC tokens issued for repositories created or renamed on
+  or after that date. The new format appends numeric IDs to the owner and repo
+  names:
+    Old format: repo:my-org/my-repo:ref:refs/heads/main
+    New format: repo:my-org-123456/my-repo-456789:ref:refs/heads/main
+  Cloud provider trust policies (AWS IAM `StringEquals`, GCP Workload Identity
+  Federation condition, Azure Federated Identity) that were copied from docs or
+  examples using only the human-readable `repo:OWNER/REPO:*` pattern will never
+  match the new immutable format. The claim is technically valid — GitHub issued it
+  correctly — but the trust policy simply rejects it, producing an
+  authorization error.
+  Existing repositories are NOT affected automatically; they must explicitly opt in
+  via the organization or repository OIDC settings. Only new repos created after
+  June 18, 2026 and repos renamed/transferred after that date receive the new
+  format unconditionally.
+  This is distinct from the repo-rename scenario (permissions-auth-019, which covers
+  mutable names changing) and the environment-block scenario (permissions-auth-054,
+  which covers the sub claim changing format when an `environment:` key is added).
+  This entry covers the baseline mismatch for freshly created repos.
+fix: |
+  Update your cloud provider trust policy to use the new immutable sub claim
+  format. Use the GitHub OIDC preview API to inspect the exact subject claim your
+  repository will produce before updating the policy:
+    GET /repos/{owner}/{repo}/actions/oidc/customization/sub
+  For AWS IAM, replace the StringEquals condition value with the new format
+  (including numeric IDs), or switch to StringLike with a wildcard:
+    repo:my-org-*/my-repo-*:ref:refs/heads/main
+  For GCP Workload Identity Federation, update the attribute condition string.
+  For Azure Federated Identity, update the subject field in the credential.
+  Alternatively, use GitHub's custom subject claim feature to define a simplified
+  subject that is stable across formats (e.g., only include `repository` and
+  `ref` fields that won't change).
+fix_code:
+  - language: yaml
+    label: "AWS — update IAM trust policy StringEquals to new immutable format"
+    code: |
+      # In your AWS IAM trust policy JSON (not YAML), update the condition:
+      # OLD — will fail for repos created after June 18, 2026:
+      #   "token.actions.githubusercontent.com:sub": "repo:my-org/my-repo:ref:refs/heads/main"
+      #
+      # NEW — use StringLike with wildcard to match both formats:
+      #   "token.actions.githubusercontent.com:sub": "repo:my-org*my-repo*:ref:refs/heads/main"
+      #
+      # Or use the exact immutable format shown in the OIDC preview API response.
+      #
+      # Alternatively, switch to a custom subject claim in GitHub OIDC settings
+      # that only includes fields you control.
+  - language: yaml
+    label: "GitHub Actions workflow is unchanged — the issue is in the cloud provider trust policy"
+    code: |
+      jobs:
+        deploy:
+          permissions:
+            id-token: write
+            contents: read
+          runs-on: ubuntu-latest
+          steps:
+            - uses: aws-actions/configure-aws-credentials@v4
+              with:
+                role-to-assume: arn:aws:iam::123456789012:role/my-role
+                aws-region: us-east-1
+            # If the above step fails with "Not authorized to perform
+            # sts:AssumeRoleWithWebIdentity", the trust policy sub claim
+            # condition does not match the new immutable format.
+            # Update the IAM trust policy — not this workflow file.
+prevention:
+  - "Use StringLike with a wildcard pattern in cloud trust policies instead of
+    StringEquals with an exact sub claim value — this accommodates both old and
+    new immutable formats."
+  - "After creating a new repo, call the GitHub OIDC preview API
+    (GET /repos/{owner}/{repo}/actions/oidc/customization/sub) to see the
+    exact sub claim format before configuring the cloud trust policy."
+  - "Consider using GitHub's custom subject claim feature to pin a simplified,
+    stable sub claim structure that does not change with naming format updates."
+  - "Existing repos can opt in to the new format in repository or organization
+    OIDC settings — test in a staging environment first to confirm trust policies
+    are updated before enabling in production."
+docs:
+  - url: "https://github.blog/changelog/2026-04-23-immutable-subject-claims-for-github-actions-oidc-tokens/"
+    label: "GitHub Changelog: Immutable subject claims for GitHub Actions OIDC tokens (April 23, 2026)"
+  - url: "https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect"
+    label: "About security hardening with OpenID Connect"
+  - url: "https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/customizing-the-subject-claims-for-an-organization-or-repository"
+    label: "Customizing the subject claims for an organization or repository"

package/errors/permissions-auth/permissions-auth-064.yml ADDED Viewed

@@ -0,0 +1,122 @@
+id: permissions-auth-064
+title: 'GitHub OIDC Provider Intermittent 500 — Transient Token Request Failures'
+category: permissions-auth
+severity: error
+tags:
+  - oidc
+  - intermittent
+  - 500
+  - token-request
+  - transient
+  - retry
+patterns:
+  - regex: 'Request to OIDC provider failed with status 500'
+    flags: 'i'
+  - regex: 'Unable to get OIDC token.*500'
+    flags: 'i'
+  - regex: 'Failed to get ID token.*status.*500'
+    flags: 'i'
+error_messages:
+  - "Error: Unable to get OIDC token: Error: Request to OIDC provider failed with status 500"
+  - "Request to OIDC provider failed with status 500"
+  - "Error: Failed to get ID token: Request failed with status code 500"
+root_cause: |
+  GitHub's OIDC token endpoint (`https://token.actions.githubusercontent.com`) occasionally
+  returns HTTP **500 Internal Server Error** responses due to transient infrastructure issues
+  on GitHub's side. The failure is server-side and non-deterministic — workflows that ran
+  successfully hours ago may fail today, and re-running the identical job minutes later often
+  succeeds.
+  **This error is distinct from other OIDC failures:**
+  | Error | Cause | Fix |
+  |---|---|---|
+  | `Unable to get ACTIONS_ID_TOKEN_REQUEST_URL env variable` | Missing `id-token: write` permission | Add permission to workflow |
+  | `OIDC: Could not parse JWT` | Malformed sub-claim or trust config | Fix cloud provider OIDC trust config |
+  | `429 Too Many Requests` | Rate limiting (large parallel matrix) | Add delays, reduce parallelism |
+  | **`status 500`** | **Transient GitHub infrastructure failure** | **Retry the job** |
+  The 500 occurs **after** the permission and environment variable checks pass. The OIDC
+  endpoint is reachable but GitHub's token minting service returned an internal error.
+  Common patterns where 500s surface:
+  - Workflows that run OIDC auth in every push/PR commit (high frequency)
+  - Parallel matrix jobs that all request tokens simultaneously
+  - Periods of GitHub infrastructure maintenance or elevated load
+fix: |
+  **Option 1 — Re-run the failed job.** The 500 is stateless; simply triggering a re-run
+  from the GitHub Actions UI is usually sufficient for non-blocking situations.
+  **Option 2 — Add a retry wrapper** around the cloud authentication step using an action
+  like `nick-fields/retry` or `Wandalen/wretry.action`. Wrap only the OIDC-dependent step
+  so that a transient 500 triggers an automatic retry without failing the entire workflow.
+  **Option 3 — Shell retry loop** for custom OIDC token fetches. Use a loop that retries
+  the request up to 3 times with a short backoff before failing.
+  There is no client-side configuration that prevents the server 500 — retrying is the
+  only mitigation.
+fix_code:
+  - language: yaml
+    label: 'Retry wrapper with nick-fields/retry'
+    code: |
+      jobs:
+        deploy:
+          runs-on: ubuntu-latest
+          permissions:
+            id-token: write
+            contents: read
+          steps:
+            - uses: actions/checkout@v4
+            # Wrap the OIDC-based auth step in a retry wrapper
+            - name: Configure AWS credentials (with retry)
+              uses: nick-fields/retry@v3
+              with:
+                max_attempts: 3
+                retry_wait_seconds: 15
+                timeout_minutes: 5
+                command: >
+                  aws sts get-caller-identity  # Or any OIDC-gated step
+            # Direct usage example — if your action supports retries natively
+            - name: Authenticate to AWS
+              uses: aws-actions/configure-aws-credentials@v4
+              with:
+                role-to-assume: arn:aws:iam::123456789012:role/GitHubActionsRole
+                aws-region: us-east-1
+  - language: yaml
+    label: 'Shell retry loop for custom OIDC token fetch'
+    code: |
+      steps:
+        - name: Fetch OIDC token with retry
+          run: |
+            MAX_ATTEMPTS=3
+            for i in $(seq 1 $MAX_ATTEMPTS); do
+              echo "Attempt $i of $MAX_ATTEMPTS..."
+              TOKEN=$(curl -sf \
+                -H "Authorization: bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
+                "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=sts.amazonaws.com" \
+                | jq -r '.value') && {
+                echo "OIDC_TOKEN=${TOKEN}" >> $GITHUB_ENV
+                echo "Token acquired."
+                exit 0
+              }
+              echo "Attempt $i failed."
+              [ $i -lt $MAX_ATTEMPTS ] && sleep $((10 * i))
+            done
+            echo "All $MAX_ATTEMPTS attempts failed."
+            exit 1
+prevention:
+  - 'Add retry logic to all OIDC-based cloud auth steps — a transient 500 should not block a deployment that can safely retry'
+  - 'Distinguish error types in runbooks: 500 = transient (retry), 403 = permission error (fix config), 429 = rate limited (add delay)'
+  - 'Monitor https://www.githubstatus.com/ when 500s appear in multiple unrelated workflows simultaneously — it often indicates a GitHub Actions incident'
+  - 'For critical pipelines, implement automatic re-run-on-failure so OIDC 500s recover without manual intervention (e.g., peter-evans/create-or-update-comment + workflow_run trigger)'
+docs:
+  - url: 'https://docs.github.com/en/actions/security-for-github-actions/security-hardening-your-deployments/about-security-hardening-with-openid-connect'
+    label: 'About security hardening with OpenID Connect — GitHub Actions docs'
+  - url: 'https://www.githubstatus.com/'
+    label: 'GitHub Status — check for active GitHub Actions incidents'
+  - url: 'https://github.com/nick-fields/retry'
+    label: 'nick-fields/retry — retry wrapper action for GitHub Actions'