@htekdev/actions-debugger 1.0.119 → 1.0.120
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
id: concurrency-timing-058
|
|
2
|
+
title: "concurrency queue:max 100-Slot Overflow Silently Cancels the Oldest Pending Run"
|
|
3
|
+
category: concurrency-timing
|
|
4
|
+
severity: silent-failure
|
|
5
|
+
tags:
|
|
6
|
+
- concurrency
|
|
7
|
+
- queue-max
|
|
8
|
+
- silent-cancel
|
|
9
|
+
- pending
|
|
10
|
+
- overflow
|
|
11
|
+
- deployment
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'queue.*max|queue:.*max'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'This run was cancelled|run.*cancelled'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
error_messages:
|
|
18
|
+
- "This run was cancelled."
|
|
19
|
+
- "Run was cancelled."
|
|
20
|
+
root_cause: |
|
|
21
|
+
GitHub Actions concurrency queue:max allows up to 100 workflow runs or jobs
|
|
22
|
+
to wait in a single concurrency group instead of being cancelled immediately.
|
|
23
|
+
When the 100-slot queue is already full and a new run arrives, the oldest
|
|
24
|
+
pending run in the queue is silently cancelled to make room.
|
|
25
|
+
|
|
26
|
+
The cancelled run shows "This run was cancelled." in the UI with no
|
|
27
|
+
indication that the cancellation was caused by queue overflow. This message
|
|
28
|
+
is identical to normal concurrency cancel-in-progress cancellations, making
|
|
29
|
+
it impossible to distinguish overflow cancellation from intentional
|
|
30
|
+
cancellation without additional observability.
|
|
31
|
+
|
|
32
|
+
This problem surfaces when:
|
|
33
|
+
- A pipeline is slower than the push rate (runs queue faster than they drain)
|
|
34
|
+
- A burst of commits or tags fires many workflow runs simultaneously
|
|
35
|
+
- A monorepo path filter causes many unrelated commits to all queue in the
|
|
36
|
+
same deployment concurrency group
|
|
37
|
+
- A workflow is accidentally triggered on every push to any branch and all
|
|
38
|
+
share a single production-deploy concurrency group
|
|
39
|
+
|
|
40
|
+
Note: queue:max and cancel-in-progress:true cannot be combined (validation
|
|
41
|
+
error). queue:max assumes you want all runs to eventually execute in order.
|
|
42
|
+
fix: |
|
|
43
|
+
queue:max with 100 slots is generous for most pipelines. If you are hitting
|
|
44
|
+
the overflow limit, the queue depth is a symptom of a throughput mismatch:
|
|
45
|
+
|
|
46
|
+
1. Speed up the job so the queue drains faster than it fills. Profile and
|
|
47
|
+
optimize the slowest steps (build caching, parallelism inside the job).
|
|
48
|
+
|
|
49
|
+
2. Narrow the trigger to reduce unnecessary queuing:
|
|
50
|
+
- Use paths: filters so only relevant changes trigger the workflow
|
|
51
|
+
- Only trigger on specific branches (main, release/*) not all branches
|
|
52
|
+
- Use workflow_dispatch for on-demand deploys instead of automatic pushes
|
|
53
|
+
|
|
54
|
+
3. Split into multiple concurrency groups scoped per environment or service
|
|
55
|
+
component. Each group has its own 100-slot limit, distributing capacity
|
|
56
|
+
across workloads.
|
|
57
|
+
|
|
58
|
+
4. If strict ordering is not required and drops are acceptable, switch back
|
|
59
|
+
to queue:single (the default) with cancel-in-progress:false so only one
|
|
60
|
+
pending run is kept, avoiding the 100-slot limit entirely.
|
|
61
|
+
fix_code:
|
|
62
|
+
- language: yaml
|
|
63
|
+
label: "queue:max — silently cancels oldest pending run when >100 queued"
|
|
64
|
+
code: |
|
|
65
|
+
on:
|
|
66
|
+
push:
|
|
67
|
+
branches: ['**'] # Triggers on every push to every branch
|
|
68
|
+
|
|
69
|
+
concurrency:
|
|
70
|
+
group: production-deploy # All branches share ONE slot — queue fills fast
|
|
71
|
+
queue: max # Up to 100 queued; 101st silently cancels the 1st
|
|
72
|
+
|
|
73
|
+
- language: yaml
|
|
74
|
+
label: "Fixed — scope concurrency group per environment + narrow push trigger"
|
|
75
|
+
code: |
|
|
76
|
+
on:
|
|
77
|
+
push:
|
|
78
|
+
branches:
|
|
79
|
+
- main
|
|
80
|
+
- 'release/**'
|
|
81
|
+
|
|
82
|
+
concurrency:
|
|
83
|
+
# Each environment gets its own 100-slot queue
|
|
84
|
+
group: deploy-${{ github.event.deployment.environment || 'staging' }}-${{ github.ref_name }}
|
|
85
|
+
queue: max
|
|
86
|
+
|
|
87
|
+
- language: yaml
|
|
88
|
+
label: "Fixed — add observability step to detect queue overflow"
|
|
89
|
+
code: |
|
|
90
|
+
jobs:
|
|
91
|
+
deploy:
|
|
92
|
+
runs-on: ubuntu-latest
|
|
93
|
+
concurrency:
|
|
94
|
+
group: production-deploy
|
|
95
|
+
queue: max
|
|
96
|
+
steps:
|
|
97
|
+
- name: Check concurrency queue depth
|
|
98
|
+
env:
|
|
99
|
+
GH_TOKEN: ${{ github.token }}
|
|
100
|
+
run: |
|
|
101
|
+
PENDING=$(gh api \
|
|
102
|
+
"repos/${{ github.repository }}/actions/runs?status=waiting&per_page=100" \
|
|
103
|
+
--jq '[.workflow_runs[] | select(.name == "${{ github.workflow }}")] | length')
|
|
104
|
+
echo "Runs currently waiting in concurrency queue: $PENDING"
|
|
105
|
+
if [ "${PENDING:-0}" -ge 90 ]; then
|
|
106
|
+
echo "::warning::Queue near capacity (${PENDING}/100). Oldest run may be dropped on next push."
|
|
107
|
+
fi
|
|
108
|
+
|
|
109
|
+
- name: Deploy
|
|
110
|
+
run: ./deploy.sh
|
|
111
|
+
prevention:
|
|
112
|
+
- "Monitor pipeline throughput: if runs consistently back up to 50+ in queue, the job is too slow for the push frequency."
|
|
113
|
+
- "Narrow workflow triggers with paths: and branches: filters to reduce unnecessary queuing."
|
|
114
|
+
- "Scope concurrency groups per environment or service to distribute the 100-slot limit across workloads."
|
|
115
|
+
- "Use queue:max only when strict ordering matters (deployments). For CI checks, cancel-in-progress:true is preferable."
|
|
116
|
+
- "The combination queue:max and cancel-in-progress:true is a workflow validation error — do not use both."
|
|
117
|
+
docs:
|
|
118
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-concurrency"
|
|
119
|
+
label: "GitHub Docs — Using concurrency (queue:max behavior and 100-slot limit)"
|
|
120
|
+
- url: "https://docs.github.com/en/actions/reference/limits"
|
|
121
|
+
label: "GitHub Actions Limits — Concurrency group queue: 100 runs per group"
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
id: known-unsolved-069
|
|
2
|
+
title: "Matrix Strategy Hard Limit: 256 Jobs Per Workflow Run"
|
|
3
|
+
category: known-unsolved
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- matrix
|
|
7
|
+
- strategy
|
|
8
|
+
- job-limit
|
|
9
|
+
- dynamic-matrix
|
|
10
|
+
- fromjson
|
|
11
|
+
- scalability
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'matrix.*generates.*too many|too many.*entries.*matrix'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'limited to 256|exceeding.*allowed maximum.*256|256.*jobs.*per.*workflow'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
error_messages:
|
|
18
|
+
- "Matrix generates too many entries. Limited to 256."
|
|
19
|
+
root_cause: |
|
|
20
|
+
GitHub Actions enforces a hard limit of 256 jobs per workflow run across
|
|
21
|
+
all matrix dimensions. This is calculated as the total Cartesian product of
|
|
22
|
+
all matrix axes after applying include/exclude entries.
|
|
23
|
+
|
|
24
|
+
Common triggers:
|
|
25
|
+
- Dynamic matrices from fromJSON output that grow over time as new targets
|
|
26
|
+
are added to a test matrix config file
|
|
27
|
+
- Multi-dimensional matrices (os x language x version) where the product
|
|
28
|
+
of all axis sizes exceeds 256
|
|
29
|
+
- Adding include: entries on top of an already-large base matrix, each
|
|
30
|
+
include: adds one additional job that counts against the 256 limit
|
|
31
|
+
- Monorepo CI matrices that test all services x all supported runtimes
|
|
32
|
+
|
|
33
|
+
The error fires immediately when the workflow is queued and prevents any
|
|
34
|
+
jobs from running. GitHub reports the total generated count in the error
|
|
35
|
+
message: "Matrix generates N entries. Limited to 256."
|
|
36
|
+
|
|
37
|
+
This limit applies equally to GitHub-hosted and self-hosted runners and
|
|
38
|
+
cannot be increased by contacting support.
|
|
39
|
+
fix: |
|
|
40
|
+
The 256-job-per-run limit is hard and cannot be increased. Options:
|
|
41
|
+
|
|
42
|
+
1. Split across multiple workflow files, each handling a subset of targets.
|
|
43
|
+
Trigger them from a parent coordinator workflow or via separate triggers.
|
|
44
|
+
|
|
45
|
+
2. Reduce matrix dimensions: test only meaningful combinations using
|
|
46
|
+
explicit include: lists rather than full Cartesian products. Not every
|
|
47
|
+
OS x language x version triplet needs to be tested.
|
|
48
|
+
|
|
49
|
+
3. Chunk using workflow_dispatch fan-out: a generator job creates N chunks
|
|
50
|
+
of at most 256 items and dispatches sub-workflow runs per chunk via
|
|
51
|
+
repository_dispatch or workflow_dispatch API calls.
|
|
52
|
+
|
|
53
|
+
4. Use a single job with a shell loop for some dimensions: instead of
|
|
54
|
+
separate matrix jobs per version, loop over versions inside one job step.
|
|
55
|
+
Sacrifices parallelism but avoids the limit entirely.
|
|
56
|
+
fix_code:
|
|
57
|
+
- language: yaml
|
|
58
|
+
label: "Broken — Cartesian product silently grows past 256 as versions are added"
|
|
59
|
+
code: |
|
|
60
|
+
strategy:
|
|
61
|
+
matrix:
|
|
62
|
+
os: [ubuntu-22.04, ubuntu-24.04, windows-2022, macos-14, macos-15]
|
|
63
|
+
node: [18, 20, 22, 23]
|
|
64
|
+
python: ['3.9', '3.10', '3.11', '3.12', '3.13']
|
|
65
|
+
# 5 x 4 x 5 = 100 jobs today — fine, but adding node 24 pushes to 125,
|
|
66
|
+
# adding python 3.14 pushes to 150, and so on until 256 is silently hit
|
|
67
|
+
|
|
68
|
+
- language: yaml
|
|
69
|
+
label: "Fixed — explicit include list tests only meaningful combinations"
|
|
70
|
+
code: |
|
|
71
|
+
strategy:
|
|
72
|
+
matrix:
|
|
73
|
+
include:
|
|
74
|
+
# Test LTS node on primary OS with latest stable Python
|
|
75
|
+
- os: ubuntu-24.04
|
|
76
|
+
node: 20
|
|
77
|
+
python: '3.12'
|
|
78
|
+
- os: ubuntu-24.04
|
|
79
|
+
node: 22
|
|
80
|
+
python: '3.12'
|
|
81
|
+
# Windows only gets the most recent LTS
|
|
82
|
+
- os: windows-2022
|
|
83
|
+
node: 20
|
|
84
|
+
python: '3.11'
|
|
85
|
+
# macOS gets one combination
|
|
86
|
+
- os: macos-15
|
|
87
|
+
node: 20
|
|
88
|
+
python: '3.12'
|
|
89
|
+
# Total: 4 jobs instead of 100+ from full Cartesian product
|
|
90
|
+
|
|
91
|
+
- language: yaml
|
|
92
|
+
label: "Defensive — validate dynamic matrix size before passing to strategy"
|
|
93
|
+
code: |
|
|
94
|
+
jobs:
|
|
95
|
+
generate-matrix:
|
|
96
|
+
runs-on: ubuntu-latest
|
|
97
|
+
outputs:
|
|
98
|
+
matrix: ${{ steps.gen.outputs.matrix }}
|
|
99
|
+
steps:
|
|
100
|
+
- id: gen
|
|
101
|
+
run: |
|
|
102
|
+
MATRIX=$(cat test-matrix.json)
|
|
103
|
+
COUNT=$(echo "$MATRIX" | jq '.include | length')
|
|
104
|
+
if [ "$COUNT" -gt 256 ]; then
|
|
105
|
+
echo "::error::Matrix has $COUNT entries (limit: 256). Split into multiple workflows."
|
|
106
|
+
exit 1
|
|
107
|
+
fi
|
|
108
|
+
echo "matrix=$MATRIX" >> "$GITHUB_OUTPUT"
|
|
109
|
+
|
|
110
|
+
test:
|
|
111
|
+
needs: generate-matrix
|
|
112
|
+
strategy:
|
|
113
|
+
matrix: ${{ fromJSON(needs.generate-matrix.outputs.matrix) }}
|
|
114
|
+
runs-on: ${{ matrix.os }}
|
|
115
|
+
steps:
|
|
116
|
+
- run: echo "Testing ${{ matrix.os }} / ${{ matrix.version }}"
|
|
117
|
+
prevention:
|
|
118
|
+
- "Before adding a new matrix axis, calculate the new total job count — it must stay at or below 256."
|
|
119
|
+
- "For dynamic matrices (fromJSON), validate the output list length in the generator job and fail fast if > 256."
|
|
120
|
+
- "Prefer explicit include: lists over full Cartesian products when not all dimension combinations are meaningful."
|
|
121
|
+
- "Monitor matrix job counts over time: a previously-safe matrix silently crosses 256 as new versions are added."
|
|
122
|
+
- "Document the current job count in a comment next to the matrix block so reviewers notice when PRs push it higher."
|
|
123
|
+
docs:
|
|
124
|
+
- url: "https://docs.github.com/en/actions/reference/limits"
|
|
125
|
+
label: "GitHub Actions Limits — Job Matrix: 256 jobs per workflow run"
|
|
126
|
+
- url: "https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/running-variations-of-jobs-in-a-workflow"
|
|
127
|
+
label: "GitHub Docs — Using a matrix for your jobs"
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
id: permissions-auth-070
|
|
2
|
+
title: "GITHUB_TOKEN packages:write Cannot Delete GitHub Container Registry Packages"
|
|
3
|
+
category: permissions-auth
|
|
4
|
+
severity: error
|
|
5
|
+
tags:
|
|
6
|
+
- github-packages
|
|
7
|
+
- ghcr
|
|
8
|
+
- container-registry
|
|
9
|
+
- packages-delete
|
|
10
|
+
- permissions
|
|
11
|
+
- pat
|
|
12
|
+
patterns:
|
|
13
|
+
- regex: 'delete.*package.*403|package.*delete.*forbidden'
|
|
14
|
+
flags: 'i'
|
|
15
|
+
- regex: 'Resource not accessible by integration'
|
|
16
|
+
flags: 'i'
|
|
17
|
+
error_messages:
|
|
18
|
+
- "Resource not accessible by integration"
|
|
19
|
+
- "HttpError: Resource not accessible by integration"
|
|
20
|
+
- "403 Forbidden"
|
|
21
|
+
root_cause: |
|
|
22
|
+
GitHub Container Registry (ghcr.io) and other granular-permission package
|
|
23
|
+
registries (npm, NuGet, RubyGems on ghpr) use package-level access control
|
|
24
|
+
that is separate from repository permissions. The GITHUB_TOKEN workflow
|
|
25
|
+
permission block supports packages: write, which grants the ability to:
|
|
26
|
+
- Publish (push) new package versions
|
|
27
|
+
- Update package metadata and visibility
|
|
28
|
+
|
|
29
|
+
However, packages: write does NOT grant the ability to DELETE package
|
|
30
|
+
versions. Deletion of granular-permission packages requires the delete:packages
|
|
31
|
+
scope, which exists only on classic personal access tokens (PATs) — there is
|
|
32
|
+
no equivalent permission in the GITHUB_TOKEN fine-grained model.
|
|
33
|
+
|
|
34
|
+
As a result, workflows that use actions/delete-package-versions (or make
|
|
35
|
+
direct REST API calls to DELETE /user/packages/{type}/{name}/versions/{id}
|
|
36
|
+
or DELETE /orgs/{org}/packages/{type}/{package_name}/versions/{id}) always
|
|
37
|
+
fail with 403 "Resource not accessible by integration" even with:
|
|
38
|
+
permissions:
|
|
39
|
+
packages: write
|
|
40
|
+
|
|
41
|
+
Note: Repository-scoped packages (Apache Maven, legacy Gradle) that inherit
|
|
42
|
+
repository permissions may behave differently — deletion can work via the
|
|
43
|
+
repo admin role. Container Registry packages always use granular permissions.
|
|
44
|
+
fix: |
|
|
45
|
+
Replace GITHUB_TOKEN with a token that carries delete:packages authority.
|
|
46
|
+
|
|
47
|
+
Option 1 (recommended for orgs): GitHub App token
|
|
48
|
+
- Create a GitHub App with "Packages: Read & Write" and "Packages: Admin"
|
|
49
|
+
permissions
|
|
50
|
+
- Install the app on the org or repo
|
|
51
|
+
- Use actions/create-github-app-token to generate a token in the workflow
|
|
52
|
+
- Pass the token to the deletion step
|
|
53
|
+
|
|
54
|
+
Option 2 (simpler for personal/small-team repos): Classic PAT
|
|
55
|
+
- Generate a classic PAT (Settings > Developer settings > Personal access
|
|
56
|
+
tokens > Tokens (classic)) with the delete:packages scope
|
|
57
|
+
- Store as a repository or organization secret
|
|
58
|
+
- Reference the secret in the deletion step
|
|
59
|
+
|
|
60
|
+
Option 3 (repo-scoped packages only — Apache Maven, Gradle):
|
|
61
|
+
- Ensure the package uses repository-scoped permissions (not granular)
|
|
62
|
+
- Grant the workflow admin access to the repository
|
|
63
|
+
- This does NOT apply to ghcr.io container images, which always require
|
|
64
|
+
a PAT or App approach for deletion
|
|
65
|
+
fix_code:
|
|
66
|
+
- language: yaml
|
|
67
|
+
label: "Broken — GITHUB_TOKEN packages:write cannot delete container images"
|
|
68
|
+
code: |
|
|
69
|
+
permissions:
|
|
70
|
+
packages: write # Grants publish access but NOT delete access
|
|
71
|
+
|
|
72
|
+
jobs:
|
|
73
|
+
cleanup:
|
|
74
|
+
runs-on: ubuntu-latest
|
|
75
|
+
steps:
|
|
76
|
+
- name: Delete old versions
|
|
77
|
+
uses: actions/delete-package-versions@v5
|
|
78
|
+
with:
|
|
79
|
+
package-name: my-image
|
|
80
|
+
package-type: container
|
|
81
|
+
min-versions-to-keep: 5
|
|
82
|
+
token: ${{ secrets.GITHUB_TOKEN }} # Fails with 403 for container packages
|
|
83
|
+
|
|
84
|
+
- language: yaml
|
|
85
|
+
label: "Fixed — classic PAT with delete:packages scope"
|
|
86
|
+
code: |
|
|
87
|
+
permissions:
|
|
88
|
+
packages: write
|
|
89
|
+
|
|
90
|
+
jobs:
|
|
91
|
+
cleanup:
|
|
92
|
+
runs-on: ubuntu-latest
|
|
93
|
+
steps:
|
|
94
|
+
- name: Delete old versions
|
|
95
|
+
uses: actions/delete-package-versions@v5
|
|
96
|
+
with:
|
|
97
|
+
package-name: my-image
|
|
98
|
+
package-type: container
|
|
99
|
+
min-versions-to-keep: 5
|
|
100
|
+
# Classic PAT must have: write:packages + delete:packages scopes
|
|
101
|
+
token: ${{ secrets.PAT_DELETE_PACKAGES }}
|
|
102
|
+
|
|
103
|
+
- language: yaml
|
|
104
|
+
label: "Fixed — GitHub App token (recommended for organizations)"
|
|
105
|
+
code: |
|
|
106
|
+
jobs:
|
|
107
|
+
cleanup:
|
|
108
|
+
runs-on: ubuntu-latest
|
|
109
|
+
steps:
|
|
110
|
+
- name: Generate GitHub App token
|
|
111
|
+
id: app-token
|
|
112
|
+
uses: actions/create-github-app-token@v1
|
|
113
|
+
with:
|
|
114
|
+
app-id: ${{ vars.PACKAGE_CLEANUP_APP_ID }}
|
|
115
|
+
private-key: ${{ secrets.PACKAGE_CLEANUP_APP_KEY }}
|
|
116
|
+
|
|
117
|
+
- name: Delete old container image versions
|
|
118
|
+
uses: actions/delete-package-versions@v5
|
|
119
|
+
with:
|
|
120
|
+
package-name: my-image
|
|
121
|
+
package-type: container
|
|
122
|
+
min-versions-to-keep: 5
|
|
123
|
+
token: ${{ steps.app-token.outputs.token }}
|
|
124
|
+
prevention:
|
|
125
|
+
- "Never assume packages:write grants deletion rights — it only covers publish and metadata operations."
|
|
126
|
+
- "Provision a dedicated classic PAT or GitHub App with package admin access for all cleanup/retention workflows."
|
|
127
|
+
- "Document which secrets are needed for deletion workflows so maintainers do not accidentally replace them with GITHUB_TOKEN."
|
|
128
|
+
- "Audit all uses of actions/delete-package-versions in your org to confirm each workflow uses an appropriate token."
|
|
129
|
+
- "Prefer GitHub App tokens over classic PATs for org workflows — PAT credentials are tied to a specific user account."
|
|
130
|
+
docs:
|
|
131
|
+
- url: "https://docs.github.com/en/packages/learn-github-packages/about-permissions-for-github-packages"
|
|
132
|
+
label: "GitHub Docs — About permissions for GitHub Packages (granular vs repository-scoped)"
|
|
133
|
+
- url: "https://github.com/actions/delete-package-versions"
|
|
134
|
+
label: "actions/delete-package-versions — token requirements"
|
|
135
|
+
- url: "https://docs.github.com/en/rest/packages/packages"
|
|
136
|
+
label: "GitHub REST API — Packages DELETE endpoints"
|
package/package.json
CHANGED