npm - @htekdev/actions-debugger - Versions diffs - 1.0.28 → 1.0.29 - Mend

@htekdev/actions-debugger 1.0.28 → 1.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/errors/caching-artifacts/hashfiles-empty-result-cache-key-collision.yml ADDED Viewed

@@ -0,0 +1,78 @@
+id: 'caching-artifacts-028'
+title: 'hashFiles() returns empty string when no files match pattern, causing cache key collision across all runs'
+category: caching-artifacts
+severity: silent-failure
+tags:
+  - hashfiles
+  - cache-key
+  - empty-string
+  - collision
+  - lock-file
+  - monorepo
+patterns:
+  - regex: 'hashFiles\('
+    flags: 'i'
+error_messages:
+  - 'Cache hit for key:'
+  - 'hashFiles result is empty string'
+root_cause: |
+  When hashFiles('**/package-lock.json') finds no matching files, it returns an empty
+  string instead of failing. A cache key like:
+    ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
+  evaluates to:
+    Linux-node-
+  (no hash suffix). All workflow runs — regardless of their actual dependency state —
+  then share this single cache key. The first run to complete saves its node_modules
+  under this key; every subsequent run gets a stale cache hit with potentially outdated
+  or wrong dependencies.
+  The cache-hit output shows 'true' and the step succeeds with no warning. Developers
+  see unexpectedly fast runs (cache always hits) but may encounter subtle dependency
+  staleness bugs.
+  Behavior varies across versions: actions/toolkit prior to 1.9.0 threw an exception
+  on empty results; later versions silently return empty string, making the collapsing
+  key the default behavior for repositories without the expected lock file.
+fix: |
+  Guard hashFiles() with a fallback value so the cache key is never incomplete when
+  no matching files exist. Using || github.sha or || github.run_id ensures each run
+  gets a unique key when no lock file is present, preventing stale cache collisions.
+fix_code:
+  - language: yaml
+    label: 'Add fallback to hashFiles to prevent empty cache key'
+    code: |
+      - uses: actions/cache@v4
+        with:
+          path: ~/.npm
+          # Fallback to github.sha when no lock file exists — prevents key collision
+          key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') || github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-node-
+  - language: yaml
+    label: 'Verify cache is populated before relying on it'
+    code: |
+      - uses: actions/cache@v4
+        id: npm-cache
+        with:
+          path: ~/.npm
+          key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') || github.sha }}
+      - name: Confirm cache restored correctly
+        if: steps.npm-cache.outputs.cache-hit == 'true'
+        run: |
+          if [ ! -d ~/.npm ]; then
+            echo "Cache hit claimed but directory missing — likely empty-key collision"
+            exit 1
+          fi
+prevention:
+  - 'Always add || github.sha fallback after hashFiles() in cache keys'
+  - 'Use actions built-in caching (setup-node cache: npm) which handles missing lock files safely'
+  - 'In monorepos without a root-level lock file, construct keys from per-package hash patterns'
+  - 'Test cache behavior in branches or forks where lock files might not yet exist'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/evaluate-expressions-in-workflows-and-actions#hashfiles'
+    label: 'GitHub Docs: hashFiles() function'
+  - url: 'https://github.com/actions/cache/issues/1175'
+    label: 'actions/cache#1175: hashFiles empty result causes key collision'
+  - url: 'https://github.com/actions/toolkit/blob/main/packages/glob/README.md'
+    label: 'actions/toolkit: glob — hashFiles behavior on no match'

package/errors/caching-artifacts/restore-keys-cross-arch-cache-mismatch.yml ADDED Viewed

@@ -0,0 +1,80 @@
+id: 'caching-artifacts-027'
+title: 'restore-keys fallback matches cross-OS or cross-architecture cache, restoring incompatible binaries'
+category: caching-artifacts
+severity: silent-failure
+tags:
+  - cache
+  - restore-keys
+  - cross-platform
+  - architecture
+  - arm64
+  - runner-os
+patterns:
+  - regex: 'restore-keys:'
+    flags: 'i'
+error_messages:
+  - 'Cache restored from key'
+  - 'Exec format error'
+  - 'cannot execute binary file: Exec format error'
+root_cause: |
+  restore-keys performs prefix matching against ALL cached entries in the repository,
+  regardless of operating system or CPU architecture. When a restore-keys prefix is
+  shorter than the primary cache key and omits runner.os or runner.arch, a cache
+  saved on one platform can be silently restored on a different one.
+  Example: primary key Linux-x64-node-abc123, restore-keys Linux-node- will match
+  a Linux ARM64 cache saved as Linux-arm64-node-xyz789. The ARM64 node_modules
+  contains native addon binaries (esbuild, sqlite3, etc.) compiled for ARM64; when
+  restored on an x64 runner, they fail at runtime with "Exec format error."
+  This became a widespread issue after GitHub introduced macOS ARM64 (M1/M2) runners
+  in 2023 and Linux ARM64 runners in 2024. Teams adding new runner architectures
+  to existing matrix builds often expose this silently.
+  The cache-hit output evaluates to 'true' even for cross-architecture restores,
+  providing no indication that the restored content may be incompatible.
+fix: |
+  Always include runner.os AND runner.arch in every level of restore-keys, mirroring
+  whatever isolation is present in the primary cache key. No restore-keys prefix should
+  ever be shorter than the architecture scope of the primary key.
+fix_code:
+  - language: yaml
+    label: 'Include runner.os and runner.arch in all restore-keys levels'
+    code: |
+      - uses: actions/cache@v4
+        with:
+          path: ~/.npm
+          # Primary key includes full OS and architecture isolation
+          key: ${{ runner.os }}-${{ runner.arch }}-node-${{ hashFiles('**/package-lock.json') }}
+          # Every fallback level maintains OS plus architecture isolation
+          restore-keys: |
+            ${{ runner.os }}-${{ runner.arch }}-node-
+            ${{ runner.os }}-${{ runner.arch }}-
+  - language: yaml
+    label: 'Matrix build with per-arch cache keys'
+    code: |
+      strategy:
+        matrix:
+          os: [ubuntu-latest, macos-latest, windows-latest]
+          arch: [x64, arm64]
+      steps:
+        - uses: actions/cache@v4
+          with:
+            path: |
+              ~/.cargo/registry
+              target/
+            key: ${{ matrix.os }}-${{ matrix.arch }}-rust-${{ hashFiles('**/Cargo.lock') }}
+            restore-keys: |
+              ${{ matrix.os }}-${{ matrix.arch }}-rust-
+prevention:
+  - 'Always include runner.os AND runner.arch in every level of restore-keys'
+  - 'Audit cache configurations when adding new runner OS or arch combinations to matrix builds'
+  - 'Add a verification step after cache restore to confirm a native binary executes correctly'
+  - 'When migrating from x64-only to multi-arch, update all restore-keys at the same time'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/caching-dependencies-to-speed-up-workflows#matching-a-cache-key'
+    label: 'GitHub Docs: Matching a cache key'
+  - url: 'https://github.com/actions/cache#inputs'
+    label: 'actions/cache README: restore-keys input'
+  - url: 'https://github.com/actions/cache/issues/1660'
+    label: 'actions/cache#1660: restore-keys cross-architecture match'

package/errors/concurrency-timing/cleanup-job-if-cancelled-skipped-by-concurrency.yml ADDED Viewed

@@ -0,0 +1,86 @@
+id: 'concurrency-timing-023'
+title: 'Cleanup jobs with if: cancelled() do not run when workflow is canceled by concurrency group'
+category: concurrency-timing
+severity: silent-failure
+tags:
+  - concurrency
+  - cancelled
+  - cleanup
+  - cancel-in-progress
+  - if-condition
+patterns:
+  - regex: 'cancel-in-progress:\s*true'
+    flags: 'i'
+  - regex: 'if:\s*cancelled\(\)'
+    flags: 'i'
+error_messages:
+  - 'This run has been cancelled.'
+  - 'Job cancelled by a newer workflow run'
+root_cause: |
+  When cancel-in-progress: true cancels a workflow run because a new run was queued in the same
+  concurrency group, GitHub cancels the entire workflow run at the infrastructure level before
+  individual job-level if: conditions are evaluated. As a result, jobs with if: cancelled() or
+  if: always() defined to run after a canceled parent job are themselves canceled before they
+  can be dispatched to a runner.
+  This is distinct from a job failing or being manually canceled: concurrency-group cancellation
+  is an external platform signal. In practice, cleanup jobs that rely on if: cancelled() may
+  start briefly but are killed mid-execution if they happen to be in-flight when the cancel
+  propagates.
+fix: |
+  Use a separate workflow triggered by workflow_run with types: [completed] as the cleanup
+  trigger rather than relying on in-workflow if: cancelled() jobs. The workflow_run approach
+  fires reliably regardless of how the parent workflow ended.
+  If the in-workflow approach is required, use if: always() rather than if: cancelled() and
+  ensure the cleanup job starts quickly (lightweight first step) to reduce the window during
+  which the cancellation signal can reach it.
+fix_code:
+  - language: yaml
+    label: 'Reliable cleanup via separate workflow_run trigger'
+    code: |
+      # .github/workflows/cleanup.yml
+      on:
+        workflow_run:
+          workflows: ['CI']
+          types: [completed]
+      jobs:
+        cleanup:
+          runs-on: ubuntu-latest
+          if: >-
+            ${{ github.event.workflow_run.conclusion == 'cancelled' ||
+                github.event.workflow_run.conclusion == 'failure' }}
+          steps:
+            - name: Run cleanup
+              run: echo "Cleaning up after ${{ github.event.workflow_run.conclusion }}"
+  - language: yaml
+    label: 'Best-effort if:always() with fast first step'
+    code: |
+      jobs:
+        build:
+          runs-on: ubuntu-latest
+          steps:
+            - run: ./run-tests.sh
+        cleanup:
+          needs: build
+          if: always()
+          runs-on: ubuntu-latest
+          steps:
+            - name: Signal start immediately
+              run: echo "Cleanup starting"
+            - name: Do cleanup
+              run: ./cleanup.sh
+prevention:
+  - 'Do not rely solely on if: cancelled() for critical cleanup when cancel-in-progress: true is active'
+  - 'Use a separate workflow_run: completed trigger for guaranteed post-run cleanup logic'
+  - 'Use if: always() instead of if: cancelled() for broader coverage'
+  - 'Keep cleanup steps inside the main job where possible — step-level if: always() is more reliable than job-level when concurrency cancels the run'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-concurrency'
+    label: 'GitHub Docs: Using concurrency'
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_run'
+    label: 'GitHub Docs: workflow_run event'
+  - url: 'https://github.com/orgs/community/discussions/13655'
+    label: 'GitHub Community: cleanup jobs not running after concurrency cancellation'

package/errors/concurrency-timing/timeout-minutes-queue-wait-not-included.yml ADDED Viewed

@@ -0,0 +1,72 @@
+id: 'concurrency-timing-024'
+title: 'timeout-minutes applies to job execution only, not queue wait time — jobs can wait indefinitely'
+category: concurrency-timing
+severity: warning
+tags:
+  - timeout
+  - queue
+  - self-hosted
+  - runner
+  - wait-time
+patterns:
+  - regex: 'timeout-minutes:\s*\d+'
+    flags: 'i'
+error_messages:
+  - 'The job running on runner has exceeded the maximum execution time of'
+  - 'The operation was canceled.'
+root_cause: |
+  timeout-minutes only counts elapsed time from when a job actually begins executing on
+  a runner — not from when it enters the queue. A job waiting for an available runner
+  slot (including jobs waiting in a concurrency group queue) can sit pending for hours
+  or indefinitely without any timeout being applied.
+  This is particularly impactful with:
+  - Self-hosted runners under heavy load with limited runner capacity
+  - Concurrency groups with cancel-in-progress: false that accumulate queued jobs
+  - Repository-level runner quotas on GitHub-hosted runners during peak usage
+  Developers are often surprised that a job with timeout-minutes: 30 waited 4+ hours
+  before starting, then proceeded to run for its full 30-minute budget.
+fix: |
+  There is no native queue-timeout setting in GitHub Actions. Recommended workarounds:
+  1. Set cancel-in-progress: true in concurrency groups to drop stale queued jobs
+     when newer commits arrive, preventing queue accumulation.
+  2. Monitor queue depth using the GitHub REST API /repos/{owner}/{repo}/actions/runs
+     and set up external alerting for runs stuck in 'queued' status too long.
+  3. Ensure adequate self-hosted runner pool capacity relative to expected parallelism.
+  4. Use github-hosted runners for time-sensitive jobs to avoid self-hosted queue depth issues.
+fix_code:
+  - language: yaml
+    label: 'Prevent queue accumulation with cancel-in-progress'
+    code: |
+      jobs:
+        build:
+          runs-on: self-hosted
+          timeout-minutes: 30  # Only counts execution time, NOT queue wait time
+          concurrency:
+            group: ${{ github.workflow }}-${{ github.ref }}
+            cancel-in-progress: true  # Drop stale queued jobs on new push
+          steps:
+            - uses: actions/checkout@v4
+            - run: ./build.sh
+  - language: yaml
+    label: 'External queue monitoring via API'
+    code: |
+      # Monitor for stuck queued runs via GitHub API
+      # GET /repos/{owner}/{repo}/actions/runs?status=queued
+      # Alert if any run has been queued for more than N minutes
+      # (implement in a separate monitoring workflow or external system)
+prevention:
+  - 'Do not assume timeout-minutes prevents jobs from waiting indefinitely in the runner queue'
+  - 'Use cancel-in-progress: true for CI workflows to prevent queue accumulation'
+  - 'Size self-hosted runner pools to handle expected peak concurrency'
+  - 'Monitor workflow run queue depth separately via the GitHub REST API'
+  - 'Document queue wait behavior in team CI runbooks so on-call engineers know what to expect'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#jobsjob_idtimeout-minutes'
+    label: 'GitHub Docs: timeout-minutes'
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-concurrency'
+    label: 'GitHub Docs: Using concurrency'
+  - url: 'https://docs.github.com/en/rest/actions/workflow-runs'
+    label: 'GitHub REST API: Workflow runs'

package/errors/triggers/workflow-run-skipped-when-trigger-workflow-filtered.yml ADDED Viewed

@@ -0,0 +1,88 @@
+id: 'triggers-026'
+title: 'workflow_run does not fire when triggering workflow is skipped by paths or branches filter'
+category: triggers
+severity: silent-failure
+tags:
+  - workflow-run
+  - skipped
+  - paths-filter
+  - branches-filter
+  - trigger-chain
+patterns:
+  - regex: 'workflow_run'
+    flags: 'i'
+  - regex: 'types:\s*\[.*completed.*\]'
+    flags: 'i'
+error_messages:
+  - 'This run was triggered by a workflow_run event but the parent workflow was not found'
+root_cause: |
+  When a workflow is skipped because its on.push.paths or on.push.branches filter does not
+  match the pushed commit, GitHub does not create a workflow run record and therefore does
+  not emit a workflow_run completion event. A downstream workflow that listens for
+  on.workflow_run: [UpstreamWorkflow] with types: [completed] silently never fires.
+  This breaks fan-out CI/CD architectures where a primary workflow is gated by path/branch
+  filters, and secondary workflows (deploy, notify, publish) depend on its completion.
+  When the paths filter causes the primary workflow to be skipped entirely, the downstream
+  chain is dropped with no error message.
+  The issue affects both on.push.paths and on.push.branches filters. It does not affect
+  workflows that run but exit early via an if: condition on a job — only skipped runs
+  (which never appear in the GitHub Actions run list) cause the downstream gap.
+fix: |
+  Replace trigger-level on.push.paths filtering with in-workflow job-level path detection
+  using an action like dorny/paths-filter. This ensures the upstream workflow always
+  creates a run (triggering the workflow_run event), while individual jobs are skipped
+  when paths do not match.
+fix_code:
+  - language: yaml
+    label: 'Replace trigger-level paths filter with in-workflow detection'
+    code: |
+      # upstream.yml
+      # BAD: on.push.paths silently skips the run — workflow_run downstream never fires
+      # on:
+      #   push:
+      #     paths: ['src/**']
+      # GOOD: Always run, detect paths inside the workflow
+      on: [push]
+      jobs:
+        detect-changes:
+          runs-on: ubuntu-latest
+          outputs:
+            src-changed: ${{ steps.filter.outputs.src }}
+          steps:
+            - uses: actions/checkout@v4
+            - uses: dorny/paths-filter@v3
+              id: filter
+              with:
+                filters: |
+                  src:
+                    - 'src/**'
+        build:
+          needs: detect-changes
+          if: ${{ needs.detect-changes.outputs.src-changed == 'true' }}
+          runs-on: ubuntu-latest
+          steps:
+            - uses: actions/checkout@v4
+            - run: ./build.sh
+      # downstream.yml — now reliably fires on every push
+      # on:
+      #   workflow_run:
+      #     workflows: ['Upstream CI']
+      #     types: [completed]
+prevention:
+  - 'Do not combine on.push.paths/branches filters with workflow_run downstream dependencies'
+  - 'Use dorny/paths-filter or tj-actions/changed-files inside always-running workflows instead'
+  - 'Test the full trigger chain end-to-end by pushing commits that both match and do not match the filter'
+  - 'Document the skipped-runs gap in team CI docs for anyone building workflow_run chains'
+docs:
+  - url: 'https://docs.github.com/en/actions/writing-workflows/choosing-when-your-workflow-runs/events-that-trigger-workflows#workflow_run'
+    label: 'GitHub Docs: workflow_run — triggering workflow must run on default branch'
+  - url: 'https://github.com/dorny/paths-filter'
+    label: 'dorny/paths-filter — job-level path filtering'
+  - url: 'https://github.com/orgs/community/discussions/23710'
+    label: 'GitHub Community: workflow_run not triggered when upstream workflow is skipped'

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@htekdev/actions-debugger",
-  "version": "1.0.28",
+  "version": "1.0.29",
   "description": "65+ real GitHub Actions errors, queryable by agents. CLI + MCP server + Copilot skills + error database.",
   "type": "module",
   "main": "./dist/index.js",