npm - @htekdev/actions-debugger - Versions diffs - 1.0.29 → 1.0.30 - Mend

@htekdev/actions-debugger 1.0.29 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/errors/known-unsolved/input-unset-vs-empty-string.yml ADDED Viewed

@@ -0,0 +1,78 @@
+id: 'known-unsolved-030'
+title: 'core.getInput cannot distinguish unset input from explicitly-empty input'
+category: known-unsolved
+severity: limitation
+tags:
+  - toolkit
+  - core-getInput
+  - composite-action
+  - empty-string
+  - null-input
+  - fork-secrets
+patterns:
+  - regex: 'core\.getInput\('
+    flags: 'i'
+error_messages:
+  - 'No way to detect if input was provided vs set to empty string'
+root_cause: |
+  The `core.getInput(name)` function in `@actions/core` always returns an empty string ('')
+  when an input is either not provided by the caller or explicitly set to the empty string.
+  There is no `core.hasInput(name)` API or other mechanism to distinguish these two cases.
+  This creates several practical problems for action authors:
+  - A required input set to '' passes the required: true check in getInput but is semantically absent
+  - Fork pull requests inject secrets as empty strings (secrets are unavailable); an action cannot tell
+    if a secret input was omitted vs provided-as-empty-because-fork
+  - Composite action callers cannot express "I intentionally leave this blank" vs "I don't provide this at all"
+  - YAML null or ~ values (e.g., with: my_val: ~) are coerced to '' by the runner before the action sees them
+  Upstream GitHub toolkit issue #940 has been open since 2022 with 22 upvotes and no fix planned.
+fix: |
+  No direct fix exists — there is no core.hasInput() API. Workarounds depend on the use case:
+  - For sentinel detection: document a convention like 'none' or '__unset__' as the explicit absent value
+    and check getInput('x') === 'none'
+  - For fork secret detection: check github.event.pull_request.head.repo.fork == true and gate on that
+    rather than on whether the secret is empty
+  - For optional inputs: provide a well-documented default value in action.yml so callers always get a
+    predictable non-empty string when they omit the input
+  - For composite actions: use ${{ inputs.my_input != '' }} in if: conditions, documenting that
+    callers must pass a non-empty string to opt in
+fix_code:
+  - language: yaml
+    label: 'Use sentinel value convention to detect absent input'
+    code: |
+      # action.yml — declare sentinel default
+      inputs:
+        deploy_env:
+          description: 'Target environment (leave blank to skip deployment)'
+          required: false
+          default: '__unset__'
+      # In composite action steps
+      steps:
+        - name: Deploy
+          if: ${{ inputs.deploy_env != '__unset__' && inputs.deploy_env != '' }}
+          run: echo "Deploying to ${{ inputs.deploy_env }}"
+  - language: yaml
+    label: 'Detect fork PR to guard secret-gated steps instead of empty-check'
+    code: |
+      steps:
+        - name: Publish (skip on fork PRs)
+          if: >-
+            ${{ github.event_name != 'pull_request' ||
+                github.event.pull_request.head.repo.full_name == github.repository }}
+          run: echo "$NPM_TOKEN" | npm login --registry https://registry.npmjs.org
+          env:
+            NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+prevention:
+  - 'Document in action.yml that empty string and absent are treated identically by core.getInput'
+  - 'Use a non-empty sentinel default value (e.g. __unset__) instead of relying on empty-check logic'
+  - 'Never gate fork-secret logic on secret emptiness — use fork-detection via event context instead'
+  - 'For required inputs that must be non-empty, add an explicit validation step that fails with a helpful message'
+docs:
+  - url: 'https://github.com/actions/toolkit/issues/940'
+    label: 'actions/toolkit#940: Impossible to detect unset inputs from inputs set as empty string'
+  - url: 'https://github.com/actions/toolkit/tree/main/packages/core'
+    label: 'actions/toolkit core package — getInput API'
+  - url: 'https://docs.github.com/en/actions/sharing-automations/creating-actions/metadata-syntax-for-github-actions#inputs'
+    label: 'GitHub Docs: Action metadata — inputs'

package/errors/runner-environment/arc-task-cancelled-pod-eviction.yml ADDED Viewed

@@ -0,0 +1,90 @@
+id: 'runner-environment-083'
+title: 'Actions Runner Controller pods intermittently fail with "A task was cancelled" during large matrix jobs'
+category: runner-environment
+severity: error
+tags:
+  - arc
+  - kubernetes
+  - matrix
+  - task-cancelled
+  - pod-eviction
+  - self-hosted
+  - keda
+patterns:
+  - regex: 'A task was cancell?ed\.'
+    flags: 'i'
+  - regex: 'The operation was canceled\.'
+    flags: 'i'
+error_messages:
+  - 'Error: A task was cancelled.'
+  - 'Error: The operation was canceled.'
+root_cause: |
+  When using Actions Runner Controller (ARC) to run GitHub Actions on Kubernetes, ephemeral
+  runner pods can be evicted or preempted mid-job by the Kubernetes scheduler, producing
+  a "A task was cancelled" or "The operation was canceled" error with no application-level
+  log output before the failure.
+  Common causes:
+  - Kubernetes resource pressure: if a node is under memory or CPU pressure, the kubelet
+    evicts lower-priority pods. ARC runner pods have no PriorityClass by default and are
+    among the first to be evicted
+  - Node autoscaling: Cluster Autoscaler draining nodes for scale-down triggers eviction of
+    runner pods that have been running longer than the scale-down grace period
+  - KEDA queue-length scaling: KEDA scaling down the runner Deployment while jobs are
+    in-flight terminates runner pods before jobs complete
+  - OOM kills: matrix jobs that each consume significant memory can saturate node memory,
+    causing the OOM killer to terminate runner pods
+  The issue is especially common with large matrix builds (10+ parallel jobs) because the
+  aggregate resource demand spike can trigger autoscaler or eviction behavior. Because ARC
+  runner pods are ephemeral and have no restart policy, the job is permanently failed when
+  the pod is evicted.
+fix: |
+  Assign a high PriorityClass to ARC runner pods so the Kubernetes scheduler avoids evicting
+  them during resource pressure. Also set adequate resource requests/limits and configure
+  terminationGracePeriodSeconds to at least the expected maximum job duration.
+fix_code:
+  - language: yaml
+    label: 'Create a high-priority PriorityClass for ARC runner pods'
+    code: |
+      apiVersion: scheduling.k8s.io/v1
+      kind: PriorityClass
+      metadata:
+        name: github-runner-high
+      value: 1000000
+      globalDefault: false
+      description: 'High priority for GitHub Actions runner pods to prevent eviction'
+  - language: yaml
+    label: 'Reference PriorityClass and set resource limits in ARC AutoscalingRunnerSet values'
+    code: |
+      # helm values for actions-runner-controller AutoscalingRunnerSet chart
+      template:
+        spec:
+          priorityClassName: github-runner-high
+          # Allow jobs up to 1 hour to finish before pod is force-terminated
+          terminationGracePeriodSeconds: 3600
+          containers:
+            - name: runner
+              resources:
+                requests:
+                  memory: '2Gi'
+                  cpu: '500m'
+                limits:
+                  memory: '4Gi'
+                  cpu: '2000m'
+prevention:
+  - 'Assign a PriorityClass to ARC runner pods to prevent eviction under resource pressure'
+  - 'Set terminationGracePeriodSeconds to at least the expected maximum single-job duration'
+  - 'Set explicit resource requests and limits to avoid OOM kills during large matrix builds'
+  - 'Configure KEDA scale-down stabilization windows to prevent scaling down while jobs run'
+  - 'Monitor node resource utilization and right-size cluster nodes for peak matrix concurrency'
+  - 'Enable PodDisruptionBudgets for runner workloads to reduce involuntary evictions during node drains'
+docs:
+  - url: 'https://github.com/actions/runner/issues/3819'
+    label: 'actions/runner#3819: A lot of random "A task was cancelled" errors'
+  - url: 'https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/'
+    label: 'Kubernetes: Pod Priority and Preemption'
+  - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/autoscaling-with-self-hosted-runners'
+    label: 'GitHub Docs: Autoscaling with self-hosted runners'
+  - url: 'https://github.com/actions/actions-runner-controller'
+    label: 'Actions Runner Controller (ARC) GitHub repository'

package/errors/runner-environment/bash-script-path-unquoted-spaces.yml ADDED Viewed

@@ -0,0 +1,83 @@
+id: 'runner-environment-084'
+title: 'Bash/sh script handler does not quote script path — fails when path contains spaces'
+category: runner-environment
+severity: error
+tags:
+  - bash
+  - shell
+  - path-spaces
+  - job-hooks
+  - self-hosted
+  - macos
+  - tart-vm
+patterns:
+  - regex: 'ACTIONS_RUNNER_HOOK_JOB_(?:STARTED|COMPLETED)'
+    flags: 'i'
+  - regex: 'bash.*No such file or directory'
+    flags: 'i'
+error_messages:
+  - 'bash: /Volumes/My Shared Files/hook.sh: No such file or directory'
+  - 'sh: /path with spaces/script.sh: not found'
+  - '/path/with: not found'
+root_cause: |
+  The GitHub Actions runner bash/sh script handler does not quote the script path
+  placeholder when building the shell invocation. The default bash arguments in
+  ScriptHandlerHelpers.cs are:
+    --noprofile --norc -e -o pipefail {0}
+  When {0} is replaced with a path containing spaces — e.g.,
+    /Volumes/My Shared Files/hook.sh
+  bash receives the path as two separate arguments due to word splitting:
+    bash --noprofile --norc -e -o pipefail /Volumes/My Shared Files/hook.sh
+  This causes a "No such file or directory" error for the first word-split token.
+  By contrast, the PowerShell and cmd handlers correctly quote the path:
+    pwsh:       -command "& '{0}'"
+    powershell: -command ". '{0}'"
+    cmd:        /D /E:ON /V:OFF /S /C "CALL "{0}""
+  Only bash and sh are affected (runner#4404, unresolved as of June 2026).
+  Practical impact:
+  - Job hooks (ACTIONS_RUNNER_HOOK_JOB_STARTED, ACTIONS_RUNNER_HOOK_JOB_COMPLETED) placed in
+    shared directories with spaces — common on macOS Tart VMs mounted at /Volumes/My Shared Files/
+  - Self-hosted runner workspaces on paths containing spaces (less common but possible)
+  - Any run: step using a working-directory with spaces in the resolved path
+fix: |
+  Ensure hook script paths and runner working directories never contain spaces.
+  On macOS Tart VMs, place hook scripts under a path without spaces (e.g., /Users/runner/hooks/).
+  Use a wrapper script at a space-free path that exec-delegates to the actual script if
+  it must reside under a shared mount with spaces in its path.
+  Monitor actions/runner#4404 for the upstream fix and upgrade when a patched runner ships.
+fix_code:
+  - language: yaml
+    label: 'Configure job hook at a space-free path (environment variable)'
+    code: |
+      # In the runner .env file (e.g., /home/runner/actions-runner/.env):
+      # Point hook variables to a path WITHOUT spaces
+      ACTIONS_RUNNER_HOOK_JOB_STARTED=/Users/runner/hooks/job-started.sh
+      ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/Users/runner/hooks/job-completed.sh
+      #
+      # Avoid paths like:
+      # /Volumes/My Shared Files/hooks/  ← spaces cause bash word-splitting error
+  - language: yaml
+    label: 'Wrapper script at space-free path delegates to actual hook in shared mount'
+    code: |
+      #!/bin/bash
+      # /Users/runner/hooks/job-started.sh  (space-free path — registered as the hook)
+      #
+      # Exec-delegates to the actual hook that lives under a shared volume with spaces.
+      # Using exec preserves exit codes and avoids a subprocess layer.
+      exec "/Volumes/My Shared Files/hooks/actual-job-started.sh" "$@"
+prevention:
+  - 'Never place runner hooks, workspace paths, or working-directories in paths containing spaces'
+  - 'On macOS Tart VMs, configure shared mounts to use space-free mount points (e.g., /Volumes/SharedFiles)'
+  - 'Test runner hook invocations explicitly on macOS or Windows deployments with shared mounts'
+  - 'Watch actions/runner#4404 for the upstream fix; upgrade the runner version when it ships'
+docs:
+  - url: 'https://github.com/actions/runner/issues/4404'
+    label: 'actions/runner#4404: Bash script handler does not quote script path — breaks with spaces'
+  - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job'
+    label: 'GitHub Docs: Running scripts before or after a job (hooks)'
+  - url: 'https://github.com/actions/runner/blob/main/src/Runner.Worker/Handlers/ScriptHandlerHelpers.cs'
+    label: 'Runner source: ScriptHandlerHelpers.cs (unquoted bash path template)'

package/errors/runner-environment/self-hosted-runner-stuck-between-jobs.yml ADDED Viewed

@@ -0,0 +1,109 @@
+id: 'runner-environment-082'
+title: 'Self-hosted runner gets stuck "Waiting for a runner to pick up this job" between jobs in the same workflow'
+category: runner-environment
+severity: error
+tags:
+  - self-hosted
+  - runner
+  - multi-job
+  - queued
+  - stuck
+  - windows
+  - auto-update
+patterns:
+  - regex: 'Waiting for a runner to pick up this job'
+    flags: 'i'
+error_messages:
+  - 'Waiting for a runner to pick up this job...'
+root_cause: |
+  After completing the first job in a multi-job workflow, a self-hosted runner sometimes
+  fails to pick up subsequent jobs in the same workflow run. The subsequent jobs remain
+  in queued status indefinitely, with no timeout and no automatic retry.
+  Common root causes:
+  - Runner auto-update race condition: when the runner auto-updates between jobs, the post-job
+    cleanup of the first job can leave the runner in a state where it reports idle to the
+    broker but cannot accept new job messages
+  - Windows service restart latency: on Windows hosts, if the runner was auto-updated or
+    the service restarted between jobs, the new process may not have fully re-registered
+    with the GitHub Actions broker before the second job is dispatched
+  - JIT token expiry: in ephemeral/JIT runner setups, the registration token can expire
+    between jobs if the first job runs for a long time, and the runner cannot re-register
+  - Broker disconnect: a transient network interruption between jobs severs the long-poll
+    connection; the runner reconnects but the already-dispatched job message is missed
+  Manually cancelling and re-running the workflow, or restarting the runner service,
+  resolves the issue immediately, confirming the runner is functional but lost broker contact.
+fix: |
+  Immediate workaround: cancel the stuck workflow run and re-trigger it, or restart the
+  runner service:
+    Windows: Restart-Service "actions.runner.*"
+    Linux:   sudo systemctl restart actions.runner.*.<name>.service
+  Long-term fixes:
+  - Disable runner auto-update during active workflows by setting RUNNER_ALLOW_RUNASROOT
+    environment variable and pinning a specific runner version
+  - Use ephemeral runners (--ephemeral flag) so each job dispatches a fresh runner that
+    registers anew with the broker, eliminating the between-job reconnect window
+  - Split long workflows into separate workflow files triggered via workflow_run or
+    repository_dispatch so each workflow gets an independent runner session
+  - On Windows: ensure the runner service account has the "Log on as a service" right
+    and that antivirus is not blocking runner binary updates
+fix_code:
+  - language: yaml
+    label: 'Use ephemeral runners to avoid stuck-between-jobs on self-hosted'
+    code: |
+      # When configuring the runner, use the --ephemeral flag:
+      # ./config.sh --url https://github.com/OWNER/REPO --token TOKEN --ephemeral
+      #
+      # For ARC (Actions Runner Controller), set runnerScaleSetSettings:
+      # spec:
+      #   template:
+      #     metadata:
+      #       labels:
+      #         ephemeral: 'true'
+      #
+      # Each job gets a freshly-registered runner; no between-job broker reconnect issues.
+  - language: yaml
+    label: 'Split multi-job workflow into two workflows triggered by workflow_run'
+    code: |
+      # phase1.yml
+      on:
+        push:
+      jobs:
+        build:
+          runs-on: [self-hosted, linux]
+          steps:
+            - uses: actions/checkout@v4
+            - run: make build
+            - uses: actions/upload-artifact@v4
+              with:
+                name: build-output
+                path: dist/
+      # phase2.yml (fresh runner registration — no stuck-between-jobs risk)
+      on:
+        workflow_run:
+          workflows: [phase1.yml]
+          types: [completed]
+      jobs:
+        test:
+          if: ${{ github.event.workflow_run.conclusion == 'success' }}
+          runs-on: [self-hosted, linux]
+          steps:
+            - uses: actions/download-artifact@v4
+              with:
+                name: build-output
+            - run: make test
+prevention:
+  - 'Use ephemeral runners (--ephemeral) to ensure each job gets a fresh broker registration'
+  - 'Configure the runner service with Restart=on-failure to auto-recover from crashes between jobs'
+  - 'Pin runner versions and suppress auto-updates in production to prevent mid-workflow upgrades'
+  - 'Monitor for stuck runs via the GitHub Actions API and alert or auto-cancel them'
+docs:
+  - url: 'https://github.com/actions/runner/issues/3609'
+    label: 'actions/runner#3609: Self-hosted runner stuck on "Waiting for a runner to pick up this job"'
+  - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners'
+    label: 'GitHub Docs: About self-hosted runners'
+  - url: 'https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/autoscaling-with-self-hosted-runners'
+    label: 'GitHub Docs: Autoscaling with self-hosted runners'

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@htekdev/actions-debugger",
-  "version": "1.0.29",
+  "version": "1.0.30",
   "description": "65+ real GitHub Actions errors, queryable by agents. CLI + MCP server + Copilot skills + error database.",
   "type": "module",
   "main": "./dist/index.js",