@htekdev/actions-debugger 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/db/search.js +3 -1
  2. package/dist/db/search.js.map +1 -1
  3. package/dist/tools/suggest-fix.d.ts.map +1 -1
  4. package/dist/tools/suggest-fix.js +5 -1
  5. package/dist/tools/suggest-fix.js.map +1 -1
  6. package/errors/caching-artifacts/cache-key-too-long.yml +93 -0
  7. package/errors/caching-artifacts/cache-path-not-exist-skipped.yml +152 -0
  8. package/errors/caching-artifacts/cache-save-same-key-html-conflict.yml +109 -0
  9. package/errors/caching-artifacts/docker-buildx-gha-cache-capacity.yml +107 -0
  10. package/errors/caching-artifacts/setup-ruby-bundler-ephemeral-workdir-cache-miss.yml +147 -0
  11. package/errors/caching-artifacts/upload-artifact-v3-retirement-blocked.yml +123 -0
  12. package/errors/caching-artifacts/upload-artifact-v4-large-file-macos-hang.yml +111 -0
  13. package/errors/concurrency-timing/always-cleanup-5min-forced-kill.yml +140 -0
  14. package/errors/concurrency-timing/concurrency-group-env-context-undefined.yml +99 -0
  15. package/errors/concurrency-timing/required-check-pending-path-filter-skip.yml +160 -0
  16. package/errors/concurrency-timing/wait-timer-cancel-in-progress-starvation.yml +125 -0
  17. package/errors/known-unsolved/composite-action-step-timeout-minutes-ignored.yml +146 -0
  18. package/errors/known-unsolved/reusable-workflow-no-composite-action-call.yml +116 -0
  19. package/errors/known-unsolved/schedule-trigger-default-branch-only.yml +113 -0
  20. package/errors/known-unsolved/secrets-not-allowed-in-if-conditions.yml +149 -0
  21. package/errors/known-unsolved/workflow-50-rerun-limit.yml +110 -0
  22. package/errors/permissions-auth/check-run-status-modification-blocked.yml +134 -0
  23. package/errors/permissions-auth/dependabot-pr-secrets-unavailable.yml +133 -0
  24. package/errors/permissions-auth/fine-grained-pat-deployment-write-required.yml +146 -0
  25. package/errors/permissions-auth/github-app-installation-token-new-format.yml +124 -0
  26. package/errors/permissions-auth/github-packages-read-requires-packages-permission.yml +128 -0
  27. package/errors/permissions-auth/oidc-id-token-write-permission-missing.yml +169 -0
  28. package/errors/permissions-auth/permissions-empty-block-removes-contents-read.yml +97 -0
  29. package/errors/permissions-auth/reusable-workflow-permissions-not-inherited.yml +114 -0
  30. package/errors/runner-environment/checkout-windows-ebusy-lock.yml +124 -0
  31. package/errors/runner-environment/deprecated-action-version-auto-rejected.yml +89 -0
  32. package/errors/runner-environment/github-hosted-runner-disk-space-full.yml +85 -0
  33. package/errors/runner-environment/github-path-same-step-not-found.yml +114 -0
  34. package/errors/runner-environment/github-script-v6-octokit-rest-actions-not-function.yml +87 -0
  35. package/errors/runner-environment/macos-13-deprecation-brownout.yml +93 -0
  36. package/errors/runner-environment/macos-15-mono-nuget-removed.yml +151 -0
  37. package/errors/runner-environment/macos-15-xcode-simulator-sdk-policy.yml +141 -0
  38. package/errors/runner-environment/multi-runtime-nov2025-removal.yml +120 -0
  39. package/errors/runner-environment/runner-oom-exit-code-137.yml +117 -0
  40. package/errors/runner-environment/setup-go-go123-telemetry-cache-failure.yml +92 -0
  41. package/errors/runner-environment/setup-java-distribution-required.yml +108 -0
  42. package/errors/runner-environment/ubuntu-2004-retirement-brownout.yml +107 -0
  43. package/errors/runner-environment/windows-latest-d-drive-removed.yml +104 -0
  44. package/errors/runner-environment/windows-vs2026-cuda-host-compiler-unsupported.yml +145 -0
  45. package/errors/silent-failures/event-commits-empty-on-workflow-dispatch.yml +110 -0
  46. package/errors/silent-failures/fetch-tags-depth-one-silent-no-op.yml +77 -0
  47. package/errors/silent-failures/github-env-multiline-value-truncated.yml +127 -0
  48. package/errors/silent-failures/github-sha-pr-merge-commit-not-head.yml +150 -0
  49. package/errors/silent-failures/job-output-masked-as-secret-empty.yml +147 -0
  50. package/errors/silent-failures/upload-artifact-permissions-stripped.yml +98 -0
  51. package/errors/triggers/pull-request-branches-filter-matches-base-not-head.yml +140 -0
  52. package/errors/triggers/push-event-fires-on-branch-delete.yml +129 -0
  53. package/errors/triggers/push-first-commit-before-sha-zeros.yml +160 -0
  54. package/errors/yaml-syntax/continue-on-error-env-context-rejected.yml +130 -0
  55. package/errors/yaml-syntax/fromjson-empty-string-crash.yml +99 -0
  56. package/errors/yaml-syntax/if-bang-negation-yaml-tag.yml +145 -0
  57. package/errors/yaml-syntax/local-action-path-always-top-level.yml +142 -0
  58. package/package.json +1 -1
@@ -0,0 +1,147 @@
1
+ id: caching-artifacts-018
2
+ title: "setup-ruby Bundler Cache Always Misses on Ephemeral Self-Hosted Runners Due to Workdir in Cache Key"
3
+ category: caching-artifacts
4
+ severity: silent-failure
5
+ tags:
6
+ - ruby
7
+ - setup-ruby
8
+ - bundler
9
+ - cache
10
+ - self-hosted
11
+ - ephemeral
12
+ - cache-miss
13
+ - cache-key
14
+ patterns:
15
+ - regex: "Cache not found for.*setup-ruby-bundler-cache.*wd-.*[0-9]{8,}"
16
+ flags: "i"
17
+ - regex: "setup-ruby-bundler-cache.*wd-\\/.*[a-f0-9]{8,}.*Gemfile\\.lock"
18
+ flags: "i"
19
+ - regex: "No cache found.*setup-ruby-bundler.*workdir.*ephemeral"
20
+ flags: "i"
21
+ - regex: "Cache miss.*bundler.*setup-ruby.*self-hosted"
22
+ flags: "i"
23
+ error_messages:
24
+ - "Cache not found for input keys: setup-ruby-bundler-cache-v6-ubuntu-22.04-x64-ruby-3.3.6-wd-/codebuild/output/src1813367680/src/actions-runner/_work/myapp/myapp-with--without--only--Gemfile.lock-3f96ad38..."
25
+ - "No cache found for key: setup-ruby-bundler-cache-v6-..."
26
+ root_cause: |
27
+ `ruby/setup-ruby` includes the absolute working directory path (`wd-<path>`) as a
28
+ component of the Bundler cache key. The full cache key format is:
29
+
30
+ setup-ruby-bundler-cache-v{VERSION}-{OS}-{ARCH}-ruby-{RUBY_VERSION}-wd-{WORKDIR}-...{GEMFILE_HASH}
31
+
32
+ On GitHub-hosted runners, the working directory is deterministic per run
33
+ (`/home/runner/work/{repo}/{repo}` on Linux), so the cache key is stable across runs
34
+ and the cache is reused correctly.
35
+
36
+ On **ephemeral self-hosted runners** (e.g., AWS CodeBuild, GitLab CI runners with
37
+ unique workspace paths, or any runner that generates a unique working directory path
38
+ per run for isolation), the `wd-` component changes with each run. This makes every
39
+ cache lookup a miss — the Bundler gems are reinstalled from scratch on every run,
40
+ completely defeating the purpose of caching.
41
+
42
+ This affects:
43
+ - AWS CodeBuild with GitHub Actions runners (CodeBuild generates unique src paths per build)
44
+ - Kubernetes-based ephemeral runners where the pod workspace path includes a job ID
45
+ - Any custom runner setup that includes a timestamp or job ID in the workspace path
46
+
47
+ The issue is open (ruby/setup-ruby#904, April 2026) and has no upstream fix yet as of
48
+ mid-2026. The workdir was included in the cache key to allow multiple Ruby projects to
49
+ have separate caches within the same repository, but it breaks ephemeral runners as a
50
+ side effect.
51
+
52
+ Note: This is a **silent failure** — no error is thrown; the workflow succeeds but Bundler
53
+ installs all gems on every run, causing slow CI with no visible warning about cache
54
+ effectiveness.
55
+ fix: |
56
+ **Workaround 1 — Disable setup-ruby's built-in bundler cache, use actions/cache manually**:
57
+ Set `bundler-cache: false` in setup-ruby and manage the Bundler cache yourself with
58
+ `actions/cache`, using only the `BUNDLE_PATH` and `Gemfile.lock` hash as the key
59
+ (no workdir component). This is the most reliable fix for ephemeral runners.
60
+
61
+ **Workaround 2 — Normalize the working directory** (if your runner supports it):
62
+ Configure your runner to use a fixed, predictable working directory path instead of
63
+ a unique-per-job path. This makes the setup-ruby cache key stable.
64
+
65
+ **Workaround 3 — Cache the Ruby gems directory directly**:
66
+ Cache `~/.bundle` or the Bundler install path rather than the per-project `vendor/bundle`,
67
+ since the home directory path is typically stable even on ephemeral runners.
68
+ fix_code:
69
+ - language: yaml
70
+ label: "Disable setup-ruby cache and use actions/cache with stable key"
71
+ code: |
72
+ jobs:
73
+ test:
74
+ runs-on: self-hosted # ephemeral runner
75
+ steps:
76
+ - uses: actions/checkout@v4
77
+
78
+ - uses: ruby/setup-ruby@v1
79
+ with:
80
+ ruby-version: '3.3'
81
+ bundler-cache: false # Disable built-in cache (broken on ephemeral runners)
82
+
83
+ # Cache gems using a workdir-independent key
84
+ - uses: actions/cache@v4
85
+ id: bundle-cache
86
+ with:
87
+ path: ~/.bundle/cache
88
+ key: ${{ runner.os }}-bundle-${{ hashFiles('**/Gemfile.lock') }}
89
+ restore-keys: |
90
+ ${{ runner.os }}-bundle-
91
+
92
+ - name: Install gems
93
+ run: bundle install --path ~/.bundle/cache
94
+ if: steps.bundle-cache.outputs.cache-hit != 'true'
95
+
96
+ - name: Bundle check
97
+ run: bundle check || bundle install
98
+ - language: yaml
99
+ label: "Cache vendor/bundle with BUNDLE_PATH and stable key (no workdir)"
100
+ code: |
101
+ jobs:
102
+ test:
103
+ runs-on: self-hosted
104
+ steps:
105
+ - uses: actions/checkout@v4
106
+
107
+ - uses: ruby/setup-ruby@v1
108
+ with:
109
+ ruby-version: '3.3'
110
+ bundler-cache: false
111
+
112
+ - uses: actions/cache@v4
113
+ with:
114
+ path: vendor/bundle
115
+ key: ${{ runner.os }}-gems-${{ hashFiles('**/Gemfile.lock') }}
116
+ restore-keys: |
117
+ ${{ runner.os }}-gems-
118
+
119
+ - name: Install gems
120
+ run: |
121
+ bundle config path vendor/bundle
122
+ bundle install --jobs 4 --retry 3
123
+ - language: yaml
124
+ label: "GitHub-hosted runner — built-in cache works fine (no workaround needed)"
125
+ code: |
126
+ jobs:
127
+ test:
128
+ runs-on: ubuntu-latest # GitHub-hosted: stable workdir, cache works
129
+ steps:
130
+ - uses: actions/checkout@v4
131
+
132
+ - uses: ruby/setup-ruby@v1
133
+ with:
134
+ ruby-version: '3.3'
135
+ bundler-cache: true # Works correctly on GitHub-hosted runners
136
+ prevention:
137
+ - "Always use `bundler-cache: false` with `ruby/setup-ruby` on ephemeral self-hosted runners (CodeBuild, ephemeral Kubernetes runners) and manage caching manually."
138
+ - "Verify cache effectiveness by checking the `cache-hit` output and monitoring job duration across runs — a stable cache hit means no Bundler reinstall."
139
+ - "Use workdir-independent cache keys: `${{ runner.os }}-bundle-${{ hashFiles('**/Gemfile.lock') }}` instead of paths that include dynamic segments."
140
+ - "Track ruby/setup-ruby#904 for an upstream fix that makes the cache key workdir-independent by default."
141
+ docs:
142
+ - url: "https://github.com/ruby/setup-ruby/issues/904"
143
+ label: "ruby/setup-ruby#904: Bundler cache not working due to ephemeral workdir (open)"
144
+ - url: "https://github.com/ruby/setup-ruby#caching-bundle-install-automatically"
145
+ label: "setup-ruby README: Caching bundle install automatically"
146
+ - url: "https://github.com/actions/cache/blob/main/README.md"
147
+ label: "actions/cache README: manual caching approach"
@@ -0,0 +1,123 @@
1
+ id: caching-artifacts-016
2
+ title: "actions/upload-artifact v3 Automatically Blocked After January 2025 Retirement"
3
+ category: caching-artifacts
4
+ severity: error
5
+ tags:
6
+ - upload-artifact
7
+ - download-artifact
8
+ - v3
9
+ - deprecated
10
+ - retirement
11
+ - brownout
12
+ patterns:
13
+ - regex: "automatically failed.*deprecated.*version.*upload-artifact"
14
+ flags: "i"
15
+ - regex: "This request has been automatically failed because it uses a deprecated version"
16
+ flags: "i"
17
+ - regex: "upload-artifact.*v3.*deprecated"
18
+ flags: "i"
19
+ - regex: "download-artifact.*v3.*deprecated"
20
+ flags: "i"
21
+ error_messages:
22
+ - "This request has been automatically failed because it uses a deprecated version of actions/upload-artifact: v3"
23
+ - "This request has been automatically failed because it uses a deprecated version of actions/download-artifact: v3"
24
+ root_cause: |
25
+ GitHub retired **actions/upload-artifact@v3** and **actions/download-artifact@v3**
26
+ on January 30, 2025. After the retirement date, any workflow still calling v3 of
27
+ these actions receives an immediate hard failure at the step level — the action
28
+ does not run; instead, the runner returns:
29
+
30
+ "This request has been automatically failed because it uses a deprecated
31
+ version of actions/upload-artifact: v3"
32
+
33
+ **Timeline:**
34
+ - April 16, 2024 — GitHub announced v3 deprecation and scheduled retirement
35
+ - November 2024 → January 2025 — Brownout periods (random scheduled failures)
36
+ - January 30, 2025 — Full retirement: all v3 calls blocked unconditionally
37
+
38
+ **Why repos are still affected:**
39
+ - Many CI configurations were written before the deprecation announcement and
40
+ were never updated
41
+ - Reusable workflows called from other orgs/repos may reference v3 internally
42
+ - Third-party action marketplace actions that internally use v3 as a dependency
43
+ were broken until their own maintainers upgraded
44
+ - Workflows with infrequent trigger schedules (e.g., monthly releases) only hit
45
+ the brownout windows occasionally, masking the problem until full retirement
46
+
47
+ Source: GitHub Changelog 2024-04-16, community discussions/149325
48
+ fix: |
49
+ Upgrade both upload and download steps to v4 simultaneously. Do NOT mix v3 and v4
50
+ in the same workflow — they use different artifact backends and are not cross-compatible.
51
+
52
+ **Key v4 behavior changes to be aware of:**
53
+ - Artifact names must be unique per workflow run (v4 does NOT overwrite; throws 409)
54
+ - Hidden files (dotfiles) are excluded by default — set `include-hidden-files: true`
55
+ if you need them
56
+ - Cross-repo artifact access requires explicit permissions
57
+ - GHES instances older than 3.15 do not support v4 — pin to v3 only if on old GHES
58
+ (but old GHES has its own known issues)
59
+ fix_code:
60
+ - language: yaml
61
+ label: "Migrate upload and download to v4 (minimal change)"
62
+ code: |
63
+ jobs:
64
+ build:
65
+ runs-on: ubuntu-latest
66
+ steps:
67
+ - uses: actions/checkout@v4
68
+ - run: npm run build
69
+
70
+ # ❌ Retired — will auto-fail after Jan 30, 2025
71
+ # - uses: actions/upload-artifact@v3
72
+ # with:
73
+ # name: dist
74
+ # path: dist/
75
+
76
+ # ✅ Use v4
77
+ - uses: actions/upload-artifact@v4
78
+ with:
79
+ name: dist
80
+ path: dist/
81
+
82
+ deploy:
83
+ needs: build
84
+ runs-on: ubuntu-latest
85
+ steps:
86
+ # ✅ Download also on v4 — must match upload version
87
+ - uses: actions/download-artifact@v4
88
+ with:
89
+ name: dist
90
+ path: dist/
91
+ - language: yaml
92
+ label: "Handle v4 duplicate-name conflict if multiple jobs upload the same name"
93
+ code: |
94
+ jobs:
95
+ build:
96
+ strategy:
97
+ matrix:
98
+ target: [linux, windows, macos]
99
+ runs-on: ubuntu-latest
100
+ steps:
101
+ - run: echo "Build ${{ matrix.target }}" > output.txt
102
+
103
+ # v4: artifact names must be unique per run
104
+ - uses: actions/upload-artifact@v4
105
+ with:
106
+ # Append matrix value to keep names unique
107
+ name: output-${{ matrix.target }}
108
+ path: output.txt
109
+ prevention:
110
+ - "Run `grep -r 'upload-artifact@v3\\|download-artifact@v3' .github/` periodically to catch stale version pins."
111
+ - "Use Dependabot or Renovate to automatically open PRs when GitHub-maintained actions release new major versions."
112
+ - "Subscribe to the GitHub Changelog (https://github.blog/changelog/) for deprecation notices."
113
+ - "When upgrading to v4, test artifact names for uniqueness — v4 throws HTTP 409 when the same name is uploaded twice in one run."
114
+ - "Set `retention-days` explicitly on v4 artifacts; default retention changed between v3 and v4."
115
+ docs:
116
+ - url: "https://github.blog/changelog/2024-04-16-deprecation-notice-v3-of-the-artifact-actions"
117
+ label: "GitHub Changelog: Deprecation notice — v3 of the artifact actions"
118
+ - url: "https://github.com/orgs/community/discussions/149325"
119
+ label: "Community discussion — workflows failing after artifact v3 retirement"
120
+ - url: "https://github.com/actions/upload-artifact/blob/main/docs/MIGRATION.md"
121
+ label: "actions/upload-artifact — v3 to v4 migration guide"
122
+ - url: "https://docs.github.com/en/actions/using-workflows/storing-workflow-data-as-artifacts"
123
+ label: "GitHub Docs: Storing workflow data as artifacts"
@@ -0,0 +1,111 @@
1
+ id: caching-artifacts-021
2
+ title: "upload-artifact v4 Silently Hangs on Large Files (500MB+) on macOS Runners"
3
+ category: caching-artifacts
4
+ severity: silent-failure
5
+ tags:
6
+ - upload-artifact
7
+ - macos
8
+ - large-file
9
+ - hang
10
+ - timeout
11
+ - silent-failure
12
+ - v4
13
+ patterns:
14
+ - regex: "Uploaded bytes \\d+"
15
+ flags: "i"
16
+ - regex: "upload-artifact.*stall|stall.*upload-artifact"
17
+ flags: "i"
18
+ - regex: "The operation was cancelled.*upload|upload.*operation was cancelled"
19
+ flags: "i"
20
+ - regex: "Error: The process.*took too long.*upload-artifact"
21
+ flags: "i"
22
+ error_messages:
23
+ - "Uploaded bytes 8388608"
24
+ - "The runner has received a shutdown signal. This can happen when the runner service is stopped, or a manually started runner is canceled."
25
+ root_cause: |
26
+ `actions/upload-artifact@v4` intermittently stalls during upload on macOS GitHub-hosted
27
+ runners (macos-13-xl-arm64, macos-14-xlarge, macos-15) when artifact size is approximately
28
+ 500 MB or larger. The stall manifests as the upload progress halting after logging "Uploaded
29
+ bytes XXXXXXXXX" with no further output — the job then exceeds its timeout and is cancelled
30
+ by GitHub without an explicit error message.
31
+
32
+ This behavior was reported and tracked in actions/upload-artifact#527. The hang appears
33
+ intermittent (roughly 30–50% failure rate for affected workflows), which makes it difficult
34
+ to diagnose in standard CI logs — the workflow shows as "cancelled" rather than "failed",
35
+ masking the root cause.
36
+
37
+ Contributing factors observed in community reports:
38
+ - Large uncompressed artifacts (binaries, build artifacts, test reports with raw data)
39
+ - macOS ARM64 hosted runners appear more susceptible than Linux or Windows runners
40
+ - Compression level settings do not consistently prevent the hang
41
+
42
+ This is a silent failure because:
43
+ 1. The upload simply stops with no error log entry
44
+ 2. The job shows as "cancelled" (not "failed") in the GitHub UI
45
+ 3. Downstream artifact-download steps fail with "no artifact found" — the real cause is upstream
46
+ fix: |
47
+ Use one or more of these mitigations while the issue is tracked by the actions team:
48
+
49
+ 1. **Split large artifacts**: Break large upload paths into multiple smaller upload steps, each
50
+ under ~200MB. This reduces the risk of hitting the hang threshold.
51
+
52
+ 2. **Add an explicit timeout**: Set `timeout-minutes` on the upload step to detect hangs faster
53
+ and fail with a clear error rather than waiting for the job-level timeout.
54
+
55
+ 3. **Retry the upload**: Wrap the upload step in a retry loop or use a community action like
56
+ `nick-fields/retry` to automatically re-attempt on failure.
57
+
58
+ 4. **Use direct storage for very large artifacts**: For artifacts over 1GB, upload directly to
59
+ S3, Azure Blob Storage, or GCS using provider CLI tools. Use upload-artifact only for test
60
+ reports and smaller build outputs.
61
+
62
+ 5. **Switch to Linux runners**: If macOS-specific features are not required for the upload
63
+ phase, run artifact collection on an ubuntu-latest runner where the hang does not occur.
64
+ fix_code:
65
+ - language: yaml
66
+ label: "Add timeout and retry to upload step"
67
+ code: |
68
+ - name: Upload large artifact
69
+ uses: actions/upload-artifact@v4
70
+ timeout-minutes: 10 # fail fast instead of waiting for job timeout
71
+ with:
72
+ name: release-binaries
73
+ path: dist/
74
+ compression-level: 6
75
+ retention-days: 7
76
+ - language: yaml
77
+ label: "Split large artifact into parts to reduce hang risk"
78
+ code: |
79
+ - name: Upload binaries (part 1)
80
+ uses: actions/upload-artifact@v4
81
+ with:
82
+ name: binaries-part1
83
+ path: dist/platform-a/
84
+
85
+ - name: Upload binaries (part 2)
86
+ uses: actions/upload-artifact@v4
87
+ with:
88
+ name: binaries-part2
89
+ path: dist/platform-b/
90
+ - language: yaml
91
+ label: "Upload to S3 for very large artifacts (bypass upload-artifact)"
92
+ code: |
93
+ - name: Upload large artifact to S3
94
+ env:
95
+ AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
96
+ AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
97
+ run: |
98
+ aws s3 cp dist/release.tar.gz s3://my-bucket/artifacts/${{ github.sha }}/release.tar.gz
99
+ echo "Uploaded to s3://my-bucket/artifacts/${{ github.sha }}/release.tar.gz"
100
+ prevention:
101
+ - "Keep individual artifact uploads under 200MB per upload step on macOS runners to avoid the hang threshold."
102
+ - "Always set `timeout-minutes` on upload steps for large files so the CI job fails fast with a clear error instead of silently timing out."
103
+ - "Monitor for `cancelled` status on jobs that use upload-artifact on macOS — these may be silently failing uploads."
104
+ - "For release artifacts exceeding 1GB, use cloud storage (S3, Azure Blob, GCS) directly rather than upload-artifact."
105
+ docs:
106
+ - url: "https://github.com/actions/upload-artifact/issues/527"
107
+ label: "actions/upload-artifact#527 — macOS large-file upload hang report and discussion"
108
+ - url: "https://github.com/actions/upload-artifact"
109
+ label: "actions/upload-artifact — official repository and documentation"
110
+ - url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/storing-workflow-data-as-artifacts"
111
+ label: "GitHub Docs — storing workflow data as artifacts"
@@ -0,0 +1,140 @@
1
+ id: concurrency-timing-011
2
+ title: "always() Cleanup Jobs Forcibly Killed After 5-Minute Cancellation Timeout"
3
+ category: concurrency-timing
4
+ severity: warning
5
+ tags:
6
+ - always
7
+ - cancellation
8
+ - cleanup
9
+ - forced-termination
10
+ - notification
11
+ - timeout
12
+ - teardown
13
+ patterns:
14
+ - regex: "The runner has received a shutdown signal"
15
+ flags: "i"
16
+ - regex: "Job was cancelled"
17
+ flags: "i"
18
+ - regex: "The operation was canceled"
19
+ flags: "i"
20
+ error_messages:
21
+ - "The runner has received a shutdown signal. This can happen when the runner service is stopped, a new job is started, or the runner is in the process of shutting down."
22
+ - "Job was cancelled"
23
+ root_cause: |
24
+ When a workflow run is cancelled (manually or via `cancel-in-progress`), GitHub Actions
25
+ re-evaluates the `if:` condition for every currently running job. Jobs marked with
26
+ `if: always()` continue running — this is the intended mechanism for cleanup, notifications,
27
+ and teardown steps.
28
+
29
+ However, GitHub enforces a **5-minute hard termination window** after cancellation is
30
+ initiated. Once 5 minutes have elapsed since the cancellation signal, ALL remaining jobs
31
+ are forcibly killed by the server, regardless of their `if:` conditions — including jobs
32
+ explicitly marked `if: always()`.
33
+
34
+ This means:
35
+ - Cleanup jobs that take more than 5 minutes (Terraform destroy, test result uploads,
36
+ Slack notifications with retries, database teardown) will be killed mid-execution.
37
+ - The job may appear partially completed in the logs with no clear failure message —
38
+ it simply stops, often leaving infrastructure in a partial or inconsistent state.
39
+ - Developers are surprised that `always()` does not guarantee the job completes after
40
+ a workflow cancellation.
41
+
42
+ Common failure scenarios:
43
+ - Artifact upload in an `if: always()` post-job step when the upload is slow
44
+ - Terraform `destroy` as a cleanup job when a long-running deployment is cancelled
45
+ - Notification jobs that retry on transient failures and consume more time than expected
46
+ - Integration test teardown (database resets, container removal) that exceeds 5 minutes
47
+
48
+ Source: GitHub Docs — Canceling a workflow: "After the 5 minute cancellation timeout
49
+ period, the server will forcibly terminate all jobs that are still running."
50
+ fix: |
51
+ Design `always()` cleanup jobs to complete well within 5 minutes. Add a job-level
52
+ `timeout-minutes: 4` to any cleanup job that runs after cancellation so it fails
53
+ cleanly rather than being force-killed at an unpredictable point.
54
+
55
+ For teardown that cannot be shortened, trigger cleanup from a separate workflow using
56
+ `workflow_run: [completed]` — it runs after the cancelled run fully settles and is
57
+ not subject to the 5-minute window.
58
+
59
+ Use the `cancelled()` expression to detect cancellation and take a fast code path.
60
+ fix_code:
61
+ - language: yaml
62
+ label: "Guard cleanup job with timeout-minutes to fail fast before forced kill"
63
+ code: |
64
+ jobs:
65
+ deploy:
66
+ runs-on: ubuntu-latest
67
+ timeout-minutes: 60
68
+ steps:
69
+ - uses: actions/checkout@v4
70
+ - run: ./deploy.sh
71
+
72
+ cleanup:
73
+ needs: deploy
74
+ if: always()
75
+ runs-on: ubuntu-latest
76
+ timeout-minutes: 4 # Stay under the 5-min forced-kill window
77
+ steps:
78
+ - name: Teardown infrastructure
79
+ run: ./teardown.sh
80
+ timeout-minutes: 3 # Per-step guard too
81
+
82
+ - language: yaml
83
+ label: "Use cancelled() to take a fast notification path on cancellation"
84
+ code: |
85
+ jobs:
86
+ build:
87
+ runs-on: ubuntu-latest
88
+ steps:
89
+ - run: ./slow-build.sh
90
+
91
+ notify:
92
+ needs: build
93
+ if: always()
94
+ runs-on: ubuntu-latest
95
+ steps:
96
+ - name: Quick notification (cancellation — must be fast)
97
+ if: cancelled()
98
+ run: |
99
+ curl -s -X POST "$SLACK_WEBHOOK" \
100
+ -H 'Content-type: application/json' \
101
+ -d '{"text":"⚠️ Workflow cancelled — cleanup may be incomplete"}'
102
+
103
+ - name: Full notification (success or failure path — has time)
104
+ if: "!cancelled()"
105
+ run: ./full-notify.sh "${{ needs.build.result }}"
106
+
107
+ - language: yaml
108
+ label: "Post-cancellation teardown via workflow_run — not subject to 5-min window"
109
+ code: |
110
+ # cleanup.yml — separate workflow triggered after any completion including cancellation
111
+ on:
112
+ workflow_run:
113
+ workflows: ["Deploy"]
114
+ types: [completed]
115
+
116
+ jobs:
117
+ teardown:
118
+ runs-on: ubuntu-latest
119
+ steps:
120
+ - uses: actions/checkout@v4
121
+
122
+ - name: Emergency cleanup when deploy was cancelled
123
+ if: github.event.workflow_run.conclusion == 'cancelled'
124
+ run: ./emergency-teardown.sh
125
+
126
+ - name: Normal cleanup on success or failure
127
+ if: github.event.workflow_run.conclusion != 'cancelled'
128
+ run: ./standard-teardown.sh
129
+ prevention:
130
+ - "Keep `if: always()` cleanup jobs under 4 minutes — add `timeout-minutes: 4` as a safety guard."
131
+ - "Use `if: cancelled()` to detect cancellation and take a fast code path rather than the full teardown path."
132
+ - "For cleanup that takes longer than 5 minutes, use a separate `workflow_run: [completed]` workflow that runs outside the cancellation window."
133
+ - "Test cancellation behavior by manually cancelling a long-running workflow and verifying cleanup jobs complete before 5 minutes."
134
+ docs:
135
+ - url: "https://docs.github.com/en/actions/managing-workflow-runs-and-deployments/managing-workflow-runs/canceling-a-workflow"
136
+ label: "GitHub Docs: Canceling a workflow (5-minute forced termination)"
137
+ - url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#status-check-functions"
138
+ label: "Status check functions: always(), cancelled()"
139
+ - url: "https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_run"
140
+ label: "workflow_run event — trigger cleanup after completed workflows"
@@ -0,0 +1,99 @@
1
+ id: concurrency-timing-010
2
+ title: "env Context Unavailable in Concurrency Group Expression Collapses All Runs"
3
+ category: concurrency-timing
4
+ severity: silent-failure
5
+ tags:
6
+ - concurrency
7
+ - env-context
8
+ - expression
9
+ - silent-failure
10
+ - group-collision
11
+ patterns:
12
+ - regex: "Canceling since a higher priority waiting"
13
+ flags: "i"
14
+ - regex: "concurrency.*group.*\"\""
15
+ flags: "i"
16
+ error_messages:
17
+ - "Canceling since a higher priority waiting request for '' exists"
18
+ - "Canceling since a higher priority waiting run was found for ''"
19
+ root_cause: |
20
+ The `concurrency.group` expression is evaluated at workflow scheduling time, before
21
+ most runtime contexts are available. The `env` context is one of the contexts that
22
+ is NOT available when concurrency expressions are evaluated.
23
+
24
+ When you use `${{ env.MY_VAR }}` in a concurrency group key:
25
+ - The expression silently evaluates to an empty string `""`
26
+ - Every workflow run (across all branches, all events) shares the same group: `""`
27
+ - Runs from completely unrelated branches cancel each other unexpectedly
28
+ - The runner may emit "Canceling since a higher priority waiting request for '' exists"
29
+ with an empty group name — which is the giveaway
30
+
31
+ Contexts available in `concurrency.group`: `github`, `inputs`, `vars`
32
+ Contexts NOT available: `env`, `steps`, `job`, `runner`, `secrets`, `matrix`, `needs`
33
+
34
+ This is a documented limitation but easy to miss because the expression evaluates
35
+ silently without error — it just returns empty string.
36
+
37
+ Sources: GitHub Community #26308, #45734, #69704
38
+ fix: |
39
+ Replace `env` context references in concurrency group expressions with supported
40
+ contexts. Use `github` (event properties, ref, workflow name), `inputs` (for
41
+ workflow_dispatch or workflow_call), or `vars` (repository/org variables).
42
+
43
+ For environment-specific group keys, use `github.event_name`, `github.ref_name`,
44
+ `github.workflow`, or pass an explicit input to workflow_dispatch.
45
+ fix_code:
46
+ - language: yaml
47
+ label: "Broken — env context evaluates to empty string in concurrency group"
48
+ code: |
49
+ # ❌ BROKEN: ${{ env.ENVIRONMENT }} returns "" at scheduling time
50
+ env:
51
+ ENVIRONMENT: production
52
+
53
+ concurrency:
54
+ group: deploy-${{ env.ENVIRONMENT }} # Always evaluates to "deploy-"
55
+ cancel-in-progress: false
56
+ - language: yaml
57
+ label: "Fixed — use github context or vars instead of env"
58
+ code: |
59
+ # ✅ FIXED: use github context properties (available at scheduling time)
60
+ concurrency:
61
+ group: deploy-${{ github.ref_name }}-${{ github.workflow }}
62
+ cancel-in-progress: false
63
+ - language: yaml
64
+ label: "Fixed — pass environment as workflow_dispatch input for dynamic group key"
65
+ code: |
66
+ # ✅ FIXED: expose the value as an input so it's available via `inputs` context
67
+ on:
68
+ workflow_dispatch:
69
+ inputs:
70
+ environment:
71
+ required: true
72
+ type: choice
73
+ options: [production, staging]
74
+
75
+ concurrency:
76
+ group: deploy-${{ inputs.environment }}
77
+ cancel-in-progress: false
78
+ - language: yaml
79
+ label: "Fixed — use repository variable (vars context is available)"
80
+ code: |
81
+ # ✅ FIXED: vars context is available in concurrency expressions
82
+ concurrency:
83
+ group: deploy-${{ vars.DEPLOY_ENV }}-${{ github.ref_name }}
84
+ cancel-in-progress: false
85
+ prevention:
86
+ - "Only use `github`, `inputs`, and `vars` contexts in `concurrency.group` expressions."
87
+ - "If you see runs from unrelated branches cancelling each other, inspect the concurrency group key for empty-string evaluation."
88
+ - "Test concurrency group expressions by adding a step that echoes the group key: `run: echo 'group=${{ github.workflow }}-${{ github.ref_name }}'`."
89
+ - "If concurrency cancellation messages show an empty group name `''`, the expression evaluated to an empty string."
90
+ - "Use `vars` (repository/org variables) rather than `env` when you need a configured value in the group key."
91
+ docs:
92
+ - url: "https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/using-concurrency"
93
+ label: "Using concurrency — supported expression contexts"
94
+ - url: "https://github.com/orgs/community/discussions/26308"
95
+ label: "GitHub Community #26308 — env context not available in concurrency"
96
+ - url: "https://github.com/orgs/community/discussions/69704"
97
+ label: "GitHub Community #69704 — concurrency group context limitations"
98
+ - url: "https://github.com/orgs/community/discussions/45734"
99
+ label: "GitHub Community #45734 — concurrency expression supported contexts"