security-mcp 1.1.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/README.md +164 -185
  2. package/defaults/checklists/ai.json +20 -1
  3. package/defaults/checklists/api.json +35 -1
  4. package/defaults/checklists/infra.json +34 -1
  5. package/defaults/checklists/mobile.json +23 -1
  6. package/defaults/checklists/payments.json +15 -1
  7. package/defaults/checklists/web.json +11 -1
  8. package/defaults/control-catalog.json +200 -0
  9. package/defaults/security-policy.json +2 -2
  10. package/dist/cli/index.js +82 -5
  11. package/dist/cli/install.js +36 -6
  12. package/dist/cli/onboarding.js +6 -0
  13. package/dist/gate/baseline.js +82 -7
  14. package/dist/gate/catalog.js +10 -2
  15. package/dist/gate/checks/ai.js +757 -39
  16. package/dist/gate/checks/auth-deep.js +935 -0
  17. package/dist/gate/checks/business-logic.js +751 -0
  18. package/dist/gate/checks/ci-pipeline.js +399 -4
  19. package/dist/gate/checks/crypto.js +423 -2
  20. package/dist/gate/checks/dependencies.js +571 -15
  21. package/dist/gate/checks/graphql.js +201 -19
  22. package/dist/gate/checks/infra.js +246 -1
  23. package/dist/gate/checks/injection-deep.js +848 -0
  24. package/dist/gate/checks/k8s.js +114 -1
  25. package/dist/gate/checks/mobile-android.js +917 -3
  26. package/dist/gate/checks/mobile-ios.js +797 -5
  27. package/dist/gate/checks/required-artifacts.js +194 -0
  28. package/dist/gate/checks/runtime.js +178 -0
  29. package/dist/gate/checks/secrets.js +244 -13
  30. package/dist/gate/checks/supply-chain-deep.js +787 -0
  31. package/dist/gate/checks/web-nextjs.js +572 -48
  32. package/dist/gate/diff.js +17 -5
  33. package/dist/gate/evidence.js +8 -1
  34. package/dist/gate/exceptions.js +131 -9
  35. package/dist/gate/policy.js +282 -129
  36. package/dist/mcp/audit-chain.js +122 -28
  37. package/dist/mcp/auth.js +169 -0
  38. package/dist/mcp/learning.js +129 -4
  39. package/dist/mcp/model-router.js +158 -21
  40. package/dist/mcp/orchestration.js +186 -51
  41. package/dist/mcp/server.js +608 -94
  42. package/dist/repo/fs.js +24 -1
  43. package/dist/repo/search.js +31 -6
  44. package/dist/review/store.js +52 -1
  45. package/package.json +7 -7
  46. package/prompts/SECURITY_PROMPT.md +73 -0
  47. package/skills/_TEMPLATE/SKILL.md +99 -0
  48. package/skills/advanced-dos-tester/SKILL.md +109 -0
  49. package/skills/agentic-loop-exploiter/SKILL.md +368 -0
  50. package/skills/ai-llm-redteam/SKILL.md +104 -0
  51. package/skills/ai-model-supply-chain-agent/SKILL.md +103 -0
  52. package/skills/algorithm-implementation-reviewer/SKILL.md +98 -0
  53. package/skills/android-penetration-tester/SKILL.md +455 -46
  54. package/skills/anti-replay-tester/SKILL.md +106 -0
  55. package/skills/appsec-code-auditor/SKILL.md +120 -0
  56. package/skills/artifact-integrity-analyst/SKILL.md +441 -0
  57. package/skills/attack-navigator/SKILL.md +467 -8
  58. package/skills/auth-session-hacker/SKILL.md +128 -0
  59. package/skills/aws-penetration-tester/SKILL.md +456 -0
  60. package/skills/azure-penetration-tester/SKILL.md +490 -3
  61. package/skills/binary-auth-validator/SKILL.md +111 -0
  62. package/skills/bot-detection-specialist/SKILL.md +109 -0
  63. package/skills/business-logic-attacker/SKILL.md +231 -0
  64. package/skills/capec-code-mapper/SKILL.md +84 -0
  65. package/skills/cert-pin-rotation-specialist/SKILL.md +112 -0
  66. package/skills/cicd-pipeline-hijacker/SKILL.md +405 -0
  67. package/skills/ciso-orchestrator/SKILL.md +454 -43
  68. package/skills/cloud-infra-specialist/SKILL.md +118 -0
  69. package/skills/compliance-gap-analyst/SKILL.md +422 -0
  70. package/skills/compliance-grc/SKILL.md +85 -0
  71. package/skills/compliance-lifecycle-tracker/SKILL.md +84 -0
  72. package/skills/credential-stuffing-specialist/SKILL.md +102 -0
  73. package/skills/crypto-pki-specialist/SKILL.md +87 -0
  74. package/skills/csa-ccm-mapper/SKILL.md +84 -0
  75. package/skills/csf2-governance-mapper/SKILL.md +84 -0
  76. package/skills/deep-link-fuzzer/SKILL.md +109 -0
  77. package/skills/dependency-confusion-attacker/SKILL.md +415 -0
  78. package/skills/device-integrity-aggregator/SKILL.md +108 -0
  79. package/skills/dos-resilience-tester/SKILL.md +97 -0
  80. package/skills/dread-scorer/SKILL.md +84 -0
  81. package/skills/egress-policy-enforcer/SKILL.md +99 -0
  82. package/skills/evidence-collector/SKILL.md +98 -0
  83. package/skills/file-upload-attacker/SKILL.md +109 -0
  84. package/skills/gcp-penetration-tester/SKILL.md +459 -2
  85. package/skills/git-history-secret-scanner/SKILL.md +106 -0
  86. package/skills/iam-privesc-graph-builder/SKILL.md +152 -0
  87. package/skills/incident-responder/SKILL.md +111 -0
  88. package/skills/injection-specialist/SKILL.md +131 -0
  89. package/skills/ios-security-auditor/SKILL.md +282 -0
  90. package/skills/json-ambiguity-tester/SKILL.md +0 -0
  91. package/skills/k8s-container-escaper/SKILL.md +384 -0
  92. package/skills/key-management-lifecycle-analyst/SKILL.md +98 -0
  93. package/skills/kill-switch-engineer/SKILL.md +102 -0
  94. package/skills/linddun-privacy-analyst/SKILL.md +102 -0
  95. package/skills/logic-race-fuzzer/SKILL.md +443 -0
  96. package/skills/mobile-api-network-attacker/SKILL.md +421 -0
  97. package/skills/mobile-binary-hardener/SKILL.md +102 -0
  98. package/skills/mobile-security-specialist/SKILL.md +85 -0
  99. package/skills/mobile-webview-auditor/SKILL.md +96 -0
  100. package/skills/model-extraction-attacker/SKILL.md +219 -0
  101. package/skills/multipart-abuse-tester/SKILL.md +84 -0
  102. package/skills/oauth-pkce-specialist/SKILL.md +104 -0
  103. package/skills/parser-exhaustion-tester/SKILL.md +142 -0
  104. package/skills/pentest-infra/SKILL.md +141 -0
  105. package/skills/pentest-social/SKILL.md +201 -0
  106. package/skills/pentest-team/SKILL.md +134 -0
  107. package/skills/pentest-web-api/SKILL.md +151 -0
  108. package/skills/privacy-flow-analyst/SKILL.md +234 -0
  109. package/skills/prompt-injection-specialist/SKILL.md +394 -0
  110. package/skills/quantum-migration-planner/SKILL.md +96 -0
  111. package/skills/rag-poisoning-specialist/SKILL.md +358 -0
  112. package/skills/registry-mirror-enforcer/SKILL.md +84 -0
  113. package/skills/rotation-validation-agent/SKILL.md +112 -0
  114. package/skills/samm-assessor/SKILL.md +85 -0
  115. package/skills/secrets-mask-bypass-tester/SKILL.md +100 -0
  116. package/skills/senior-security-engineer/SKILL.md +370 -2
  117. package/skills/serialization-memory-attacker/SKILL.md +332 -0
  118. package/skills/session-timeout-tester/SKILL.md +161 -0
  119. package/skills/slsa-level3-enforcer/SKILL.md +112 -0
  120. package/skills/slsa-provenance-enforcer/SKILL.md +102 -0
  121. package/skills/ssrf-detection-validator/SKILL.md +108 -0
  122. package/skills/step-up-auth-enforcer/SKILL.md +84 -0
  123. package/skills/stride-pasta-analyst/SKILL.md +420 -0
  124. package/skills/supply-chain-devsecops/SKILL.md +98 -0
  125. package/skills/threat-infrastructure-analyst/SKILL.md +84 -0
  126. package/skills/threat-modeler/SKILL.md +85 -0
  127. package/skills/tls-certificate-auditor/SKILL.md +573 -18
  128. package/skills/token-reuse-detector/SKILL.md +95 -0
  129. package/skills/trike-risk-modeler/SKILL.md +84 -0
  130. package/skills/unicode-homograph-tester/SKILL.md +84 -0
  131. package/skills/waf-rule-lifecycle-agent/SKILL.md +97 -0
  132. package/skills/webhook-security-tester/SKILL.md +102 -0
  133. package/skills/zero-trust-architect/SKILL.md +109 -0
@@ -72,3 +72,387 @@ If internet permitted:
72
72
  - Escape chain or privilege escalation path
73
73
  - Fixed Kubernetes manifest written inline
74
74
  - §4 CIS Benchmark control reference
75
+
76
+ Every findings JSON MUST include `intelligenceForOtherAgents`:
77
+ ```json
78
+ {
79
+ "intelligenceForOtherAgents": {
80
+ "forPentestTeam": [{ "type": "HIGH_VALUE_TARGET", "description": "...", "exploitHint": "..." }],
81
+ "forCryptoSpecialist": [{ "type": "CRYPTO_WEAKNESS_REFERENCE", "algorithm": "...", "location": "..." }],
82
+ "forCloudSpecialist": [{ "type": "SSRF_TO_CLOUD_CHAIN", "ssrfLocation": "...", "escalationPath": "..." }],
83
+ "forComplianceGrc": [{ "type": "COMPLIANCE_BLOCKER", "frameworks": ["..."], "releaseBlock": true }]
84
+ }
85
+ }
86
+ ```
87
+
88
+ ---
89
+
90
+ ## BEYOND SKILL.MD — MANDATORY EXPANSIONS
91
+
92
+ ### 1. CVE-2022-0185 — Linux Kernel `fsconfig` Heap Overflow → Container Escape
93
+
94
+ **Technique:** A heap overflow in the `legacy_parse_param` function of the Linux kernel's filesystem context API allows an unprivileged user inside a container with `CAP_SYS_ADMIN` (or a user namespace with that capability) to escalate to full host root. Containers running on kernel versions < 5.16.2 that expose `CAP_SYS_ADMIN` or run with `privileged: true` are directly exploitable.
95
+
96
+ **Concrete test:**
97
+ ```bash
98
+ # Detect vulnerable kernel version in-cluster
99
+ kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.status.nodeInfo.kernelVersion}{"\n"}{end}'
100
+ # Flag any node kernel < 5.16.2
101
+ # Grep manifests for capability grants
102
+ grep -r "SYS_ADMIN\|ALL\|privileged: true" k8s/ helm/
103
+ ```
104
+
105
+ **Finding:** Any manifest granting `CAP_SYS_ADMIN` or `privileged: true` on a node with kernel < 5.16.2 is a confirmed CRITICAL escape path. Remediation: patch kernel; remove capability; enforce `allowPrivilegeEscalation: false`.
106
+
107
+ ---
108
+
109
+ ### 2. CVE-2021-25741 — Symlink Race Condition in kubelet → hostPath Escape
110
+
111
+ **Technique:** The kubelet's `subPath` volume handling in Kubernetes < 1.19.15, < 1.20.11, and < 1.21.5 allowed an attacker who controlled a Pod's writable filesystem to replace a directory with a symlink after the kubelet validated it, causing the kubelet to follow the symlink and expose arbitrary host paths. An attacker with pod creation permission could read `/etc/kubernetes/pki/` or the host `/etc/shadow`.
112
+
113
+ **Concrete test:**
114
+ ```bash
115
+ # Check cluster version
116
+ kubectl version --short
117
+ # Grep for subPath usage paired with writable volumes
118
+ grep -r "subPath" k8s/ | grep -v readOnly
119
+ # Policy check: does OPA/Kyverno block subPath + hostPath combos?
120
+ kubectl get constrainttemplate -o name | grep -i hostpath
121
+ ```
122
+
123
+ **Finding:** Cluster version in the affected range + any `subPath` use on a writable volume without patching = CRITICAL. Fix: upgrade kubelet; if upgrade blocked, apply the Kyverno policy that denies `subPath` on `hostPath` volumes.
124
+
125
+ ---
126
+
127
+ ### 3. Token Projection Attack — Audience-Bound Service Account Tokens Bypassed via `tokenRequestProjection`
128
+
129
+ **Technique:** When a pod uses a projected service account token with a non-default audience (e.g., `audience: vault`), the token is considered scoped. However, if the kube-apiserver's `--service-account-issuer` is the same issuer as an external OIDC consumer and the audience validation is misconfigured, the token may be accepted by both the Kubernetes API and the external service. This allows an attacker who steals one token to authenticate to both systems.
130
+
131
+ **Concrete test:**
132
+ ```bash
133
+ # Find all projected token volumes and their audiences
134
+ grep -r "serviceAccountToken\|audience:" k8s/ helm/ --include="*.yaml" -A3
135
+ # Verify issuer isolation
136
+ kubectl get --raw /.well-known/openid-configuration | jq .issuer
137
+ # Test: does the cluster SA token work against an external OIDC endpoint?
138
+ ```
139
+
140
+ **Finding:** Any projected token whose audience matches an external OIDC relying party that also accepts the cluster issuer = CRITICAL token reuse chain.
141
+
142
+ ---
143
+
144
+ ### 4. AI-Assisted Fuzzing of Kubernetes Admission Webhook Bypass (Emerging Threat)
145
+
146
+ **Technique:** LLM-powered fuzzers (e.g., Peach Fuzzer with GPT augmentation, or custom tool chains built on the Anthropic and OpenAI APIs) can generate syntactically valid but semantically adversarial Kubernetes manifests at scale — targeting admission webhook logic. Bypasses include: deeply nested `initContainers` that webhooks fail to traverse, annotations with null bytes triggering parser differentials between the webhook and kubelet, and `ephemeralContainers` that some OPA/Kyverno policies do not evaluate.
147
+
148
+ **Concrete test:**
149
+ ```bash
150
+ # Check if webhook covers ephemeralContainers
151
+ kubectl get validatingwebhookconfigurations -o json | jq '.items[].webhooks[].rules[].resources'
152
+ # Flag if "ephemeralcontainers" is absent from the resource list
153
+ # Also test null byte in annotation key via dry-run
154
+ kubectl apply --dry-run=server -f - <<EOF
155
+ apiVersion: v1
156
+ kind: Pod
157
+ metadata:
158
+ name: test
159
+ annotations:
160
+ "key\x00evil": "value"
161
+ spec:
162
+ containers: [{"name":"c","image":"alpine"}]
163
+ EOF
164
+ ```
165
+
166
+ **Finding:** Webhooks not covering `ephemeralContainers` = HIGH bypass surface. Null-byte parser differential = CRITICAL if kubelet accepts what webhook rejects.
167
+
168
+ ---
169
+
170
+ ### 5. Supply Chain Attack via Compromised Base Image in Private Registry
171
+
172
+ **Technique:** Attackers who compromise a private container registry (via weak credentials, SSRF to the registry API, or a poisoned CI/CD pipeline) can replace a legitimate base image with a backdoored layer. If image pull policies are `Always` but no image signing verification (Sigstore/Cosign, Notary v2) is enforced at admission, a compromised image ships to production silently. This is distinct from public registry typosquatting — it targets the org's own registry.
173
+
174
+ **Concrete test:**
175
+ ```bash
176
+ # Check imagePullPolicy across all deployments
177
+ grep -r "imagePullPolicy" k8s/ helm/ | grep -v "Always\|IfNotPresent" # flag Never
178
+ grep -r "image:" k8s/ helm/ | grep -v "sha256:" # images without digest pinning
179
+ # Check for Cosign/Sigstore admission policy
180
+ kubectl get clusterimagepolicies 2>/dev/null || kubectl get imagepolicy -A 2>/dev/null
181
+ # Check registry credentials rotation age
182
+ kubectl get secrets -A -o json | jq '.items[] | select(.type=="kubernetes.io/dockerconfigjson") | .metadata'
183
+ ```
184
+
185
+ **Finding:** No image digest pinning + no signing policy + stale registry credentials = CRITICAL supply chain entry point.
186
+
187
+ ---
188
+
189
+ ### 6. Post-Quantum Threat — etcd Encryption at Rest Using AES-CBC is Harvest-Now-Decrypt-Later Exposed
190
+
191
+ **Technique:** Kubernetes encrypts secrets at rest in etcd using provider configurations. The default `aescbc` provider uses AES-256-CBC, which is classically secure but will be broken by a Cryptographically Relevant Quantum Computer (CRQC) estimated by NIST to arrive 2028–2032. Any attacker performing harvest-now-decrypt-later (HNDL) attacks — capturing etcd snapshots today to decrypt later — will gain full access to all cluster secrets stored during this window. etcd backups stored in S3/GCS long-term are the highest-risk surface.
192
+
193
+ **Concrete test:**
194
+ ```bash
195
+ # Check encryption provider config
196
+ kubectl get apiserver -o yaml 2>/dev/null | grep -A10 "encryption"
197
+ # If self-managed, check the apiserver manifest
198
+ grep -r "encryption-provider-config\|aescbc\|aesgcm\|secretbox" /etc/kubernetes/manifests/ 2>/dev/null
199
+ # Check etcd backup retention policies
200
+ # Flag any backup older than the post-quantum migration deadline stored with classical-only encryption
201
+ ```
202
+
203
+ **Finding:** etcd using `aescbc` or `aesgcm` without a post-quantum migration plan + long-lived backups = HIGH risk (HNDL). Prepare by: inventorying secrets lifetime; migrating to `kms` provider with a quantum-safe KMS backend when available; reducing backup retention windows for classical-encrypted snapshots.
204
+
205
+ ---
206
+
207
+ ### 7. Sidecar Injection MITM via Mutating Webhook Abuse
208
+
209
+ **Technique:** A mutating admission webhook with broad permissions can inject a malicious sidecar into every pod in targeted namespaces. If an attacker gains control of the webhook server (via compromising the service it routes to, or by creating a MutatingWebhookConfiguration with a `failurePolicy: Ignore` that takes over from a legitimate one), they inject a sidecar that performs in-cluster traffic interception, credential harvesting from environment variables, or exfiltrates secrets to an external endpoint — all transparently to the application container.
210
+
211
+ **Concrete test:**
212
+ ```bash
213
+ # List all mutating webhooks and their target services
214
+ kubectl get mutatingwebhookconfigurations -o json | jq '.items[] | {name: .metadata.name, service: .webhooks[].clientConfig.service, failurePolicy: .webhooks[].failurePolicy}'
215
+ # Flag: failurePolicy: Ignore (allows bypass if webhook is down)
216
+ # Flag: webhooks targeting services outside kube-system or a known-safe namespace
217
+ # Verify the webhook service TLS cert issuer
218
+ kubectl get mutatingwebhookconfigurations -o json | jq '.items[].webhooks[].clientConfig.caBundle' | base64 -d | openssl x509 -noout -issuer -dates
219
+ ```
220
+
221
+ **Finding:** `failurePolicy: Ignore` on a mutating webhook with namespace-wide scope = HIGH. Webhook service reachable from application namespaces without network policy = CRITICAL escalation path.
222
+
223
+ ---
224
+
225
+ ### 8. Kubernetes API Server Unauthenticated Access via `--anonymous-auth=true`
226
+
227
+ **Technique:** If `--anonymous-auth=true` is set on the kube-apiserver (the default in some distributions prior to 1.20 hardening) and RBAC binds the `system:anonymous` or `system:unauthenticated` group to any ClusterRole, external or in-cluster attackers can perform API operations without credentials. Combine with `cluster-admin` binding to `system:unauthenticated` (seen in misconfigured development clusters promoted to production) = full cluster takeover with a single `curl` command.
228
+
229
+ **Concrete test:**
230
+ ```bash
231
+ # Test from inside the cluster (any pod can do this)
232
+ curl -k https://kubernetes.default.svc/api/v1/namespaces -H "Authorization: " 2>&1 | grep -c "items"
233
+ # Check RBAC bindings for anonymous/unauthenticated
234
+ kubectl get clusterrolebindings -o json | jq '.items[] | select(.subjects[]?.name == "system:anonymous" or .subjects[]?.name == "system:unauthenticated")'
235
+ # Check apiserver flags
236
+ ps aux | grep kube-apiserver | grep -o -- '--anonymous-auth=[^ ]*'
237
+ ```
238
+
239
+ **Finding:** Any ClusterRoleBinding to `system:anonymous` or `system:unauthenticated` = CRITICAL. Immediately escalate.
240
+
241
+ ---
242
+
243
+ ## §K8S_CONTAINER_ESCAPER-CHECKLIST
244
+
245
+ 1. **Privileged Container Check** — Mechanism: `privileged: true` grants full host kernel capabilities equivalent to root on the node. Grep: `grep -r "privileged: true" k8s/ helm/`. Finding: any match is CRITICAL; the container can run `nsenter --target 1 --mount --uts --ipc --net --pid` to obtain a host shell immediately.
246
+
247
+ 2. **Host Namespace Sharing** — Mechanism: `hostPID`, `hostNetwork`, `hostIPC: true` share the node's process table, network stack, or IPC namespace with the container. Grep: `grep -rE "hostPID: true|hostNetwork: true|hostIPC: true" k8s/ helm/`. Finding: any match allows cross-process signal injection, host network sniffing, or IPC abuse; severity HIGH to CRITICAL depending on what runs on the host.
248
+
249
+ 3. **Dangerous Capability Grants** — Mechanism: `capabilities.add` with `SYS_ADMIN`, `NET_ADMIN`, `SYS_PTRACE`, `SYS_MODULE`, or `ALL` enables kernel exploit chains (CVE-2022-0185 etc.) and module loading. Grep: `grep -r "capabilities" k8s/ helm/ -A5 | grep -E "SYS_ADMIN|NET_ADMIN|SYS_PTRACE|SYS_MODULE|ALL"`. Finding: `SYS_ADMIN` = CRITICAL escape path; `NET_ADMIN` = HIGH (ARP/routing attacks); `ALL` = CRITICAL.
250
+
251
+ 4. **hostPath Volume Abuse** — Mechanism: `hostPath` volumes mount node filesystem paths into the container. Sensitive paths (`/`, `/etc`, `/var/lib/kubelet`, `/proc`) allow reading kubelet credentials, cluster CA keys, or node secrets. Grep: `grep -r "hostPath:" k8s/ helm/ -A2`. Finding: `path: /` or `path: /etc/kubernetes` = CRITICAL; any hostPath without `readOnly: true` = HIGH.
252
+
253
+ 5. **Service Account Token Auto-Mount Without Need** — Mechanism: `automountServiceAccountToken: true` (the default) mounts the pod's SA token at `/var/run/secrets/kubernetes.io/serviceaccount/token`. If the SA has broad RBAC, any RCE in the app becomes cluster compromise. Test: `grep -r "automountServiceAccountToken" k8s/ helm/` — flag any `true` or absence of explicit `false` on pods that don't call the K8s API. Finding: auto-mount + SA with `get secrets` or `pods/exec` = CRITICAL chain.
254
+
255
+ 6. **Overly Permissive RBAC — Wildcard Verbs or Resources** — Mechanism: RBAC rules with `verbs: ["*"]` or `resources: ["*"]` grant the bound subject full API access. Particularly dangerous when bound at cluster scope. Grep: `grep -r 'verbs:\|resources:' k8s/ helm/ -A2 | grep '"\\*"'`. Finding: any wildcard at ClusterRole scope = CRITICAL; wildcard in namespace Role with pod/secret access = HIGH.
256
+
257
+ 7. **RBAC `escalate`, `bind`, `impersonate` Permissions** — Mechanism: `escalate` allows a subject to create Roles with permissions exceeding their own; `bind` allows binding any Role to any subject; `impersonate` allows acting as any user/SA. These are privilege escalation primitives. Grep: `grep -rE "escalate|bind|impersonate" k8s/ helm/ --include="*.yaml"`. Finding: any of these at cluster scope = CRITICAL escalation path regardless of current role.
258
+
259
+ 8. **Namespaces Without NetworkPolicy** — Mechanism: absent NetworkPolicy means all pods in the cluster can communicate with all pods in the namespace on any port. An attacker who compromises one pod has unrestricted east-west movement. Test: `kubectl get networkpolicy -A` — flag namespaces with zero policies. Finding: production namespaces with no NetworkPolicy = HIGH lateral movement exposure; combined with privileged pods = CRITICAL.
260
+
261
+ 9. **Secrets Stored as Environment Variables** — Mechanism: secrets mounted as env vars appear in `kubectl describe pod`, in `/proc/<pid>/environ` inside any container with `hostPID`, and in crash dumps/logging frameworks that capture env state. Grep: `grep -r "secretKeyRef\|valueFrom:" k8s/ helm/ -B2 | grep -v "secretKeyRef"` to find raw values; also `grep -rE "env:.*value:.*password|secret|key|token" k8s/ helm/ -i`. Finding: plaintext secret values in manifest = CRITICAL; secret references in env (vs volume mount) = MEDIUM (prefer volume mounts for files, env only for non-file configs).
262
+
263
+ 10. **Missing Pod Security Admission / OPA / Kyverno Enforcement** — Mechanism: without admission control enforcing a policy baseline, any developer with `create pods` can bypass all securityContext requirements by simply omitting them. Test: `kubectl get ns --show-labels | grep pod-security`; `kubectl get constrainttemplate,kyverno -A 2>/dev/null`. Finding: no Pod Security Admission labels on production namespaces AND no OPA/Kyverno policies = HIGH systematic risk; all other findings in this checklist are trivially reachable.
264
+
265
+ 11. **Dockerfile Secrets in ENV or ARG** — Mechanism: `ENV API_KEY=hardcoded` and `ARG SECRET=value` embed secrets into image layers that persist in the image history (`docker history --no-trunc <image>`). Finding: `grep -r "^ENV\|^ARG" */Dockerfile* | grep -iE "key|secret|pass|token|credential"`. Any match = CRITICAL; rotate the exposed credential immediately; rebuild without it using runtime injection.
266
+
267
+ 12. **Image Without Digest Pinning and No Cosign Policy** — Mechanism: image references like `image: nginx:1.25` without a `sha256:` digest can be silently replaced in the registry (tag mutability). Without Sigstore/Cosign admission enforcement, a compromised registry delivers a backdoored image to all nodes on next pull. Grep: `grep -r "image:" k8s/ helm/ | grep -v "sha256:"`. Finding: any production workload without digest pinning = HIGH supply chain risk; no signing policy = compound HIGH.
268
+
269
+ ---
270
+
271
+ ## §POC-REQUIREMENT
272
+
273
+ For every CRITICAL or HIGH finding in this domain:
274
+
275
+ 1. **Write the working PoC FIRST** — exact payload, exact request, observed impact documented before remediation begins.
276
+ 2. **Confirm the PoC reproduces the issue** — run it in a test cluster or simulate the call path; record the output.
277
+ 3. **THEN write the fix** — corrected manifest, RBAC rule, or policy.
278
+ 4. **THEN verify the PoC fails against the fix** — re-run the exact same PoC; confirm it is blocked.
279
+ 5. **Record the PoC in findings JSON** under `exploitPoC`:
280
+
281
+ ```json
282
+ {
283
+ "findingId": "K8S-001",
284
+ "severity": "CRITICAL",
285
+ "title": "Privileged container escape via nsenter",
286
+ "exploitPoC": {
287
+ "precondition": "Pod with privileged: true is running on node",
288
+ "payload": "kubectl exec -it <pod> -- nsenter --target 1 --mount --uts --ipc --net --pid -- /bin/bash",
289
+ "observedImpact": "Interactive shell on host node as root; full filesystem access; can read /etc/kubernetes/pki/",
290
+ "reproduced": true,
291
+ "fixApplied": "Removed privileged: true; added allowPrivilegeEscalation: false; added seccompProfile: RuntimeDefault",
292
+ "pocFailsAfterFix": true
293
+ }
294
+ }
295
+ ```
296
+
297
+ **PoC skipping = finding severity downgraded to MEDIUM automatically.** This is enforced by the orchestrator at merge time. No exceptions for "obvious" findings — the PoC is the proof.
298
+
299
+ ---
300
+
301
+ ## §PROJECT-ESCALATION
302
+
303
+ Call `orchestration.update_agent_status` with `status: "CRITICAL_ESCALATION"` and halt normal flow immediately when any of the following conditions are detected:
304
+
305
+ 1. **`cluster-admin` ClusterRoleBinding to a non-system subject** — Any service account, user, or group outside `kube-system` bound to `cluster-admin` means the entire cluster is one compromise away from total takeover. Every other finding becomes secondary. Halt, escalate, alert orchestrator.
306
+
307
+ 2. **`privileged: true` on a workload reachable from the internet** — A pod with `privileged: true` that is also exposed via an Ingress, NodePort, or LoadBalancer service gives an external attacker a direct path to host-level escape. The blast radius is the entire node and, via node credentials, the entire cluster.
308
+
309
+ 3. **kube-apiserver or etcd exposed without authentication** — Anonymous auth enabled with any RBAC binding to `system:unauthenticated`, OR etcd port 2379/2380 reachable without mTLS, means the cluster's entire secret store and control plane are externally accessible. This is a P0 incident-class finding.
310
+
311
+ 4. **Cluster CA private key or admin kubeconfig committed to the repository** — If `grep -r "BEGIN RSA PRIVATE KEY\|BEGIN EC PRIVATE KEY\|BEGIN CERTIFICATE" k8s/ helm/` or `grep -r "certificate-authority-data\|client-key-data" . --include="*.yaml" --include="*.conf"` returns matches outside of `.gitignore`d paths, the cluster's root of trust is compromised. Immediately escalate — the CA must be rotated, which is a full cluster re-bootstrap.
312
+
313
+ 5. **Supply chain compromise evidence — image digest mismatch or unexpected layer in known image** — If image manifest digests in running pods differ from what is recorded in the repo's manifests or CI build artifacts, a registry-level compromise may have occurred. This is an active incident, not a misconfiguration.
314
+
315
+ 6. **Admission webhook with `failurePolicy: Ignore` and a non-responding or attacker-reachable backend** — If the webhook server is down or its service is reachable from an application namespace, all admission controls fail open. Combined with any other finding in this checklist, the effective policy is "no policy." Escalate to have the webhook restored or set to `Fail` before any other remediation.
316
+
317
+ 7. **RBAC `bind` or `impersonate` permission detected on any non-admin identity** — These permissions are cluster-level privilege escalation primitives. A subject with `bind` can grant themselves `cluster-admin` without directly having it. This renders all other RBAC controls meaningless. Escalate before attempting any fix.
318
+
319
+ 8. **Evidence of an already-executed container escape or lateral movement in pod logs or audit logs** — Strings like `nsenter`, `mount /proc`, `kubectl create pod` from application pod service accounts in the audit log, or anomalous processes in pod stderr, indicate the vulnerability has already been exploited. This transitions from a security review to an active incident response. Stop the review, escalate with full evidence, and do not modify any artifacts that may be needed for forensics.
320
+
321
+ ---
322
+
323
+ ## §EDGE-CASE-MATRIX
324
+
325
+ The 5 attack cases in this domain that automated scanners and naive manual review universally miss. MANDATORY checks — do not skip.
326
+
327
+ | # | Edge Case | Why Scanners Miss It | Concrete Test |
328
+ |---|-----------|----------------------|---------------|
329
+ | 1 | Second-order / stored payload executed in different context | Scanner checks input context, not execution context | Store payload safely; trigger in separate request/session |
330
+ | 2 | Unicode normalisation bypass | Regex filters run before normalisation; attacker uses homoglyphs or composed forms | Submit Ⅰ (U+2160) or < (U+FF1C) variants of known-bad strings |
331
+ | 3 | Polyglot payload active in multiple sinks simultaneously | Scanners test one injection class per payload | `'"><script>{{7*7}}</script><!--` — SQL + XSS + SSTI in one request |
332
+ | 4 | Out-of-band exfiltration (DNS/HTTP callback) | Scanner looks for inline response difference; OOB leaves no visible trace | Use Burp Collaborator / interactsh; inject DNS lookup payload |
333
+ | 5 | Race condition between check and use (TOCTOU) | Sequential scanners don't model concurrency | Send two simultaneous requests to the same state-changing endpoint |
334
+
335
+ **K8s-specific edge cases that additionally MUST be checked:**
336
+
337
+ | # | Edge Case | Why Scanners Miss It | Concrete Test |
338
+ |---|-----------|----------------------|---------------|
339
+ | 6 | `ephemeralContainers` bypassing admission webhook | Most webhooks enumerate `containers` and `initContainers` but skip `ephemeralContainers` in the JSON path | `kubectl debug -it <pod> --image=alpine --target=<container>` — observe if the debug container inherits privileged context or bypasses policy |
340
+ | 7 | Helm post-install hooks running privileged Jobs | Helm hook pods are short-lived; scanners that enumerate running pods miss them; manifests may not be in the main chart path | `grep -r "helm.sh/hook" k8s/ helm/ -A5 | grep -i "privileged\|hostPath"` |
341
+ | 8 | `startupProbe` / `livenessProbe` exec commands writing to host via hostPath | Probe exec commands run inside the container but against volumes that may be hostPath-backed | Cross-reference all exec probes with their pod's volume mounts and check for hostPath write paths |
342
+
343
+ ---
344
+
345
+ ## §TEMPORAL-THREATS
346
+
347
+ Threats materialising in the 2025–2030 window that defences designed today must account for.
348
+
349
+ | Threat | Est. Timeline | Relevance to This Domain | Prepare Now By |
350
+ |--------|--------------|--------------------------|----------------|
351
+ | Cryptographically Relevant Quantum Computer (CRQC) | 2028–2032 | Harvest-now-decrypt-later attacks active today; RSA/ECDSA keys signed today will be broken | Inventory all RSA/ECDSA usage; migrate long-lived data to ML-KEM (FIPS 203) |
352
+ | AI-assisted adversaries at scale | 2025–2027 (active) | LLM-powered fuzzing finds 10× more edge cases; automated PoC generation | Assume attackers have LLM help; expand test surface to match |
353
+ | EU AI Act full enforcement | 2026 | High-risk AI systems require mandatory conformity assessments | Classify all AI features against AI Act tiers now |
354
+ | Post-quantum TLS migration deadline | 2028–2030 | Browser vendors will drop classical-only TLS connections | Begin TLS agility assessment; test hybrid key exchange |
355
+ | Mandatory SBOM + build provenance (US EO 14028 / EU CRA) | 2025–2026 (active) | SBOM and SLSA attestation are becoming legally required | Achieve SLSA L2 minimum; generate CycloneDX SBOM per release |
356
+ | AI-generated malicious container images | 2025–2027 (active) | LLMs can generate plausible Dockerfiles with hidden backdoors at scale; indistinguishable from legitimate images without signing | Enforce Cosign/Sigstore admission; pin all images to digests; SBOM every image |
357
+ | Kubernetes API server LLM-assisted exploit discovery | 2026–2027 | Automated systems scanning misconfigured clusters at internet scale using LLM-curated payloads | Harden apiserver exposure; enable audit logging; alert on anomalous API call patterns |
358
+
359
+ ---
360
+
361
+ ## §DETECTION-GAP
362
+
363
+ What current security monitoring CANNOT detect in this domain, and what to build to close each gap.
364
+
365
+ **Standard gaps that MUST be checked:**
366
+
367
+ - **Second-order attack execution**: The storage request looks safe; only the retrieval+execution step is dangerous. Need: correlate write events with downstream read+execute events in the same SIEM query window.
368
+ - **Timing-side-channel leakage**: No log event emitted; only observable as microsecond response-time variance. Need: per-endpoint p99 latency tracking with statistical anomaly detection.
369
+ - **Low-and-slow credential stuffing**: Individually, each request is under rate limits. Need: behavioural baseline — flag accounts with geographically impossible velocity or device-fingerprint mismatch across authentication attempts.
370
+ - **Insider exfiltration via legitimate process**: Authorised exports, reports, and data downloads that individually are permitted but collectively constitute data exfiltration. Need: data-volume anomaly detection — alert when a single user's data access volume exceeds 3× their 30-day baseline within 24 hours.
371
+ - **Cross-agent attack chains**: Phase 1 finding A + Phase 1 finding B = CRITICAL chain invisible to either agent alone. Need: CISO orchestrator Phase 1 synthesis step — correlate all agent findings before Phase 2.
372
+
373
+ **K8s-domain-specific detection gaps:**
374
+
375
+ - **Container escape via kernel exploit**: No Kubernetes audit log event is generated for `nsenter` or `/proc` traversal — these are kernel-level operations. Need: Falco or Tetragon eBPF rules detecting `process.name == nsenter` or `open(/proc/1/root)` syscall from container context.
376
+ - **Ephemeral container privileged execution**: `kubectl debug` ephemeral containers may not trigger admission webhooks in older configurations. Need: audit log alert on `ephemeralcontainers` PATCH verb from non-admin identities.
377
+ - **SA token exfiltration via in-cluster DNS exfil**: An attacker reading `/var/run/secrets/kubernetes.io/serviceaccount/token` and sending it via DNS TXT lookup leaves no Kubernetes API audit trail. Need: DNS query logging at the CoreDNS level; alert on base64-resembling subdomains or unusually long query labels.
378
+ - **Helm release secret access**: Helm stores release state in K8s secrets named `sh.helm.release.v1.*`. A user with `get secrets` in the `default` namespace can read all Helm release values including any secrets passed via `--set`. Need: RBAC audit — flag any non-admin identity with `get` on `secrets` in namespaces containing Helm releases.
379
+ - **Admission controller bypass via large payload**: Some admission webhooks have payload size limits and will timeout or return allow on oversized requests. Need: admission webhook performance monitoring; alert on webhook latency spikes that correlate with new pod creation events.
380
+
381
+ ---
382
+
383
+ ## §ZERO-MISS-MANDATE
384
+
385
+ This agent CANNOT declare any attack class clean without explicit evidence of checking. For each item, output one of:
386
+ - `CHECKED: [N files] | [patterns used] | CLEAN`
387
+ - `CHECKED: [N files] | [patterns used] | [N findings, all fixed]`
388
+ - `SKIPPED: [reason — must be "not applicable: [evidence]"]`
389
+
390
+ **Silent skip = FAILED COVERAGE.** The orchestrator flags this as a quality gap.
391
+
392
+ The output findings JSON MUST include a `coverageManifest` key:
393
+ ```json
394
+ {
395
+ "coverageManifest": {
396
+ "attackClassesCovered": [
397
+ {
398
+ "class": "Privileged Container Escape",
399
+ "filesReviewed": 34,
400
+ "patterns": ["privileged: true", "hostPID", "hostNetwork", "hostIPC"],
401
+ "result": "CLEAN"
402
+ },
403
+ {
404
+ "class": "Dangerous Capability Grants",
405
+ "filesReviewed": 34,
406
+ "patterns": ["SYS_ADMIN", "NET_ADMIN", "SYS_PTRACE", "ALL"],
407
+ "result": "2 findings, all fixed"
408
+ },
409
+ {
410
+ "class": "hostPath Volume Abuse",
411
+ "filesReviewed": 34,
412
+ "patterns": ["hostPath:", "readOnly:"],
413
+ "result": "CLEAN"
414
+ },
415
+ {
416
+ "class": "RBAC Wildcard / Escalation Primitives",
417
+ "filesReviewed": 12,
418
+ "patterns": ["\\\"*\\\"", "escalate", "bind", "impersonate"],
419
+ "result": "1 finding, fixed"
420
+ },
421
+ {
422
+ "class": "SA Token Auto-Mount",
423
+ "filesReviewed": 34,
424
+ "patterns": ["automountServiceAccountToken"],
425
+ "result": "CLEAN"
426
+ },
427
+ {
428
+ "class": "Supply Chain / Image Pinning",
429
+ "filesReviewed": 34,
430
+ "patterns": ["image:", "sha256:"],
431
+ "result": "6 findings, all fixed"
432
+ }
433
+ ],
434
+ "filesReviewed": 46,
435
+ "negativeAssertions": [
436
+ "Privileged container: pattern 'privileged: true' searched across 34 manifests — 0 matches",
437
+ "Cluster CA key: pattern 'BEGIN EC PRIVATE KEY' searched across entire repo — 0 matches"
438
+ ],
439
+ "uncoveredReason": {}
440
+ }
441
+ }
442
+ ```
443
+
444
+ ---
445
+
446
+ ## LEARNING SIGNAL
447
+
448
+ On every finding resolved, emit:
449
+ ```json
450
+ {
451
+ "findingId": "FINDING_ID",
452
+ "agentName": "k8s-container-escaper",
453
+ "resolved": true,
454
+ "remediationTemplate": "one-line description of what was done",
455
+ "falsePositive": false
456
+ }
457
+ ```
458
+ Call `security.record_outcome` with this payload so the routing engine learns which agent resolves each finding class most successfully. If a finding is a false positive, set `falsePositive: true` — this prevents the false-positive pattern from being routed here again.
@@ -90,3 +90,101 @@ If internet permitted:
90
90
  - Fixed configuration: secrets manager reference, rotation schedule
91
91
  - Post-quantum risk assessment for long-lived keys
92
92
  - CWE, CVSSv4
93
+
94
+ Every findings JSON MUST include `intelligenceForOtherAgents`:
95
+ ```json
96
+ {
97
+ "intelligenceForOtherAgents": {
98
+ "forPentestTeam": [{ "type": "HIGH_VALUE_TARGET", "description": "...", "exploitHint": "..." }],
99
+ "forCryptoSpecialist": [{ "type": "CRYPTO_WEAKNESS_REFERENCE", "algorithm": "...", "location": "..." }],
100
+ "forCloudSpecialist": [{ "type": "SSRF_TO_CLOUD_CHAIN", "ssrfLocation": "...", "escalationPath": "..." }],
101
+ "forComplianceGrc": [{ "type": "COMPLIANCE_BLOCKER", "frameworks": ["..."], "releaseBlock": true }]
102
+ }
103
+ }
104
+ ```
105
+
106
+ ## BEYOND SKILL.MD
107
+
108
+ Domain-specific intelligence for key management lifecycle attacks that extend beyond standard checklists:
109
+
110
+ - **CVE-2024-3094 (XZ Utils / liblzma backdoor)**: A supply-chain compromise injected code into a widely-deployed system library that manipulated SSH host key authentication. Demonstrates that even the key verification layer can be subverted upstream — all dependency hashes and provenance chains must be treated as attestation boundaries.
111
+ - **JWT `alg:none` / algorithm confusion (CVE-2022-21449 "Psychic Signatures")**: Java ECDSA verification bug accepted signatures of all-zero bytes for any message. Any library consuming JWTs must be tested for algorithm confusion: forge a token with `alg: none`, then with `alg: HS256` using the public key as the HMAC secret. Never trust the `alg` header from an untrusted party.
112
+ - **Envelope encryption DEK caching side-channel**: When Data Encryption Keys are cached in process memory without TTL, a compromised process can exfiltrate cached DEKs without touching the KMS. Verify DEK cache TTL ≤ 5 minutes and that cache entries are zeroed on eviction (not merely GC'd).
113
+ - **AWS KMS key policy wildcard (`"Principal": "*"`)**: Misconfigured KMS resource policies granting `kms:Decrypt` to `*` with a weak `Condition` block have allowed cross-account decryption. Tool: enumerate all key policies via `aws kms list-keys` + `get-key-policy`; flag any `Principal: "*"` without a restrictive `aws:PrincipalOrgID` condition.
114
+ - **HashiCorp Vault unseal key fragment exposure (OPSEC)**: Shamir secret-sharing unseal keys stored in plaintext in operator laptops or Slack history constitute a complete key compromise chain even if no single fragment is sufficient. Enforce auto-unseal (AWS KMS, GCP KMS) for all non-air-gapped deployments; audit where unseal fragments were transmitted.
115
+ - **Harvest-now-decrypt-later targeting long-lived encrypted backups**: Nation-state adversaries are known to exfiltrate ciphertext today for decryption once CRQCs are available (CISA advisory AA23-209A). Any RSA-2048 or ECDH-P256 encrypted backup or archive with retention >5 years is a current threat. Inventory all such assets and begin hybrid ML-KEM-768 + X25519 re-encryption migration.
116
+ - **AI-assisted secret scanning evasion**: LLM-powered attackers generate obfuscated secrets (base64 segments, string concatenation, hex encoding) that bypass regex-based secret scanners. Use semantic secret detection (e.g., Trufflehog v3 with entropy + ML classifier) in addition to pattern matching; test scanner coverage by committing a known-obfuscated secret to a test branch.
117
+ - **Post-quantum certificate pinning gap**: Applications that pin TLS certificates by public key hash will break during PQC migration because the pinned ECDSA key is replaced by an ML-DSA key. Audit all certificate pinning implementations (mobile apps, service-mesh mTLS, custom HTTP clients) and replace with SPIFFE/SVID or trust-anchor pinning that survives algorithm migration.
118
+
119
+ ## LEARNING SIGNAL
120
+
121
+ On every finding resolved, emit:
122
+ ```json
123
+ {
124
+ "findingId": "FINDING_ID",
125
+ "agentName": "AGENT_NAME",
126
+ "resolved": true,
127
+ "remediationTemplate": "one-line description of what was done",
128
+ "falsePositive": false
129
+ }
130
+ ```
131
+ Call `security.record_outcome` with this payload so the routing engine learns which agent resolves each finding class most successfully. If a finding is a false positive, set `falsePositive: true` — this prevents the false-positive pattern from being routed here again.
132
+
133
+ ---
134
+
135
+ ## §EDGE-CASE-MATRIX
136
+
137
+ The 5 attack cases in this domain that automated scanners and naive manual review universally miss. MANDATORY checks — do not skip.
138
+
139
+ | # | Edge Case | Why Scanners Miss It | Concrete Test |
140
+ |---|-----------|----------------------|---------------|
141
+ | 1 | Second-order / stored payload executed in different context | Scanner checks input context, not execution context | Store payload safely; trigger in separate request/session |
142
+ | 2 | Unicode normalisation bypass | Regex filters run before normalisation; attacker uses homoglyphs or composed forms | Submit Ⅰ (U+2160) or < (U+FF1C) variants of known-bad strings |
143
+ | 3 | Polyglot payload active in multiple sinks simultaneously | Scanners test one injection class per payload | `'"><script>{{7*7}}</script><!--` — SQL + XSS + SSTI in one request |
144
+ | 4 | Out-of-band exfiltration (DNS/HTTP callback) | Scanner looks for inline response difference; OOB leaves no visible trace | Use Burp Collaborator / interactsh; inject DNS lookup payload |
145
+ | 5 | Race condition between check and use (TOCTOU) | Sequential scanners don't model concurrency | Send two simultaneous requests to the same state-changing endpoint |
146
+
147
+ ## §TEMPORAL-THREATS
148
+
149
+ Threats materialising in the 2025–2030 window that defences designed today must account for.
150
+
151
+ | Threat | Est. Timeline | Relevance to This Domain | Prepare Now By |
152
+ |--------|--------------|--------------------------|----------------|
153
+ | Cryptographically Relevant Quantum Computer (CRQC) | 2028–2032 | Harvest-now-decrypt-later attacks active today; RSA/ECDSA keys signed today will be broken | Inventory all RSA/ECDSA usage; migrate long-lived data to ML-KEM (FIPS 203) |
154
+ | AI-assisted adversaries at scale | 2025–2027 (active) | LLM-powered fuzzing finds 10× more edge cases; automated PoC generation | Assume attackers have LLM help; expand test surface to match |
155
+ | EU AI Act full enforcement | 2026 | High-risk AI systems require mandatory conformity assessments | Classify all AI features against AI Act tiers now |
156
+ | Post-quantum TLS migration deadline | 2028–2030 | Browser vendors will drop classical-only TLS connections | Begin TLS agility assessment; test hybrid key exchange |
157
+ | Mandatory SBOM + build provenance (US EO 14028 / EU CRA) | 2025–2026 (active) | SBOM and SLSA attestation are becoming legally required | Achieve SLSA L2 minimum; generate CycloneDX SBOM per release |
158
+
159
+ ## §DETECTION-GAP
160
+
161
+ What current security monitoring CANNOT detect in this domain, and what to build to close each gap.
162
+
163
+ **Standard gaps that MUST be checked:**
164
+
165
+ - **Second-order attack execution**: The storage request looks safe; only the retrieval+execution step is dangerous. Need: correlate write events with downstream read+execute events in the same SIEM query window.
166
+ - **Timing-side-channel leakage**: No log event emitted; only observable as microsecond response-time variance. Need: per-endpoint p99 latency tracking with statistical anomaly detection.
167
+ - **Low-and-slow credential stuffing**: Individually, each request is under rate limits. Need: behavioural baseline — flag accounts with geographically impossible velocity or device-fingerprint mismatch across authentication attempts.
168
+ - **Insider exfiltration via legitimate process**: Authorised exports, reports, and data downloads that individually are permitted but collectively constitute data exfiltration. Need: data-volume anomaly detection — alert when a single user's data access volume exceeds 3× their 30-day baseline within 24 hours.
169
+ - **Cross-agent attack chains**: Phase 1 finding A + Phase 1 finding B = CRITICAL chain invisible to either agent alone. Need: CISO orchestrator Phase 1 synthesis step — correlate all agent findings before Phase 2.
170
+
171
+ ## §ZERO-MISS-MANDATE
172
+
173
+ This agent CANNOT declare any attack class clean without explicit evidence of checking. For each item, output one of:
174
+ - `CHECKED: [N files] | [patterns used] | CLEAN`
175
+ - `CHECKED: [N files] | [patterns used] | [N findings, all fixed]`
176
+ - `SKIPPED: [reason — must be "not applicable: [evidence]"]`
177
+
178
+ **Silent skip = FAILED COVERAGE.** The orchestrator flags this as a quality gap.
179
+
180
+ The output findings JSON MUST include a `coverageManifest` key:
181
+ ```json
182
+ {
183
+ "coverageManifest": {
184
+ "attackClassesCovered": [{ "class": "SQL Injection", "filesReviewed": 47, "patterns": ["queryRaw", "string concat"], "result": "CLEAN" }],
185
+ "filesReviewed": 47,
186
+ "negativeAssertions": ["SQL Injection: queryRaw pattern searched across 47 files — 0 matches"],
187
+ "uncoveredReason": {}
188
+ }
189
+ }
190
+ ```
@@ -203,3 +203,105 @@ KILL_WEBHOOKS_OUTBOUND=false
203
203
  - `requiredActions`: ordered action list if not auto-remediated
204
204
  - `complianceImpact`: framework mappings
205
205
  - `beyondSkillMd`: true if finding goes beyond the SKILL.md mandate
206
+
207
+ Every findings JSON MUST include `intelligenceForOtherAgents`:
208
+ ```json
209
+ {
210
+ "intelligenceForOtherAgents": {
211
+ "forPentestTeam": [{ "type": "HIGH_VALUE_TARGET", "description": "Kill switch env var accessible via unprotected /health or /debug endpoint — attacker can detect which paths are disabled", "exploitHint": "GET /api/health leaks KILL_* env state; probe before attack to confirm live targets" }],
212
+ "forCryptoSpecialist": [{ "type": "CRYPTO_WEAKNESS_REFERENCE", "algorithm": "HMAC on kill-switch admin API if present", "location": "Any admin toggle endpoint — verify signing scheme is not weak HMAC-MD5" }],
213
+ "forCloudSpecialist": [{ "type": "SSRF_TO_CLOUD_CHAIN", "ssrfLocation": "Kill-switch backed by remote config fetch (LaunchDarkly/ConfigCat SDK) — SSRF in SDK HTTP client could allow attacker to serve malicious flag values", "escalationPath": "Override KILL_PAYMENT_PROCESSING=false remotely, re-enabling a disabled payment path during incident" }],
214
+ "forComplianceGrc": [{ "type": "COMPLIANCE_BLOCKER", "frameworks": ["PCI DSS Req 12.10.1", "SOC 2 A1.2", "NIST CP-10"], "releaseBlock": true }]
215
+ }
216
+ }
217
+ ```
218
+
219
+ ## BEYOND SKILL.MD — MANDATORY EXPANSIONS
220
+
221
+ - **LaunchDarkly SDK Supply Chain Compromise (ATT&CK T1195.002 — Compromise Software Supply Chain):** A malicious or compromised LaunchDarkly SDK release (or a BGP-hijacked delivery of the CDN-hosted SDK) can force all feature flags to `false`, silently re-enabling kill-switched paths across every customer simultaneously. Real precedent: the 2020 SolarWinds SUNBURST attack used tampered SDK updates distributed via official channels. Test by: pin the exact SDK version hash in `package-lock.json`, run `npm audit signatures` to verify package provenance, and simulate a flag-service outage by blocking `app.launchdarkly.com` in a staging environment — assert the SDK defaults to the safer (killed=`true`) state rather than falling back to `false`. Finding threshold: any flag SDK that defaults to feature-ON when the remote service is unreachable.
222
+
223
+ - **AI-Assisted Differential Probing to Map Kill-Switch State (ATT&CK T1595.002 — Active Scanning: Vulnerability Scanning):** Attacker uses an LLM (e.g., GPT-4 or a fine-tuned model) to automate differential HTTP probing — comparing response time, status codes, and error body variance between endpoints — to infer which kill switches are active. Killed paths return 503 ~0ms after auth (fail-fast), while live paths take 50–300ms. This timing oracle lets an attacker map the operational blast radius before launching a targeted attack on live paths. Test by: set `KILL_PAYMENT_PROCESSING=true` and measure p50 response latency vs. a live endpoint using `wrk` or `hey`; if delta is >20ms, the timing oracle is exploitable. Remediation: add random jitter (10–50ms) to 503 responses and normalise error body length to match live-path p99. Finding threshold: >15ms consistent timing difference between killed and live paths.
224
+
225
+ - **Env-Var Kill Switch Exfiltration via Misconfigured `/metrics` or `/health` Endpoint (CVE-2022-22963 adjacent pattern — Spring Cloud Function RCE via env exposure):** Many observability stacks (Prometheus Node Exporter, Spring Boot Actuator, Next.js `/_next/health`) expose all process environment variables in their output by default or via a misconfiguration. An attacker who can read `/metrics` or `/health?verbose=true` can enumerate all `KILL_*` env vars, confirm which incident containment measures are active, and prioritise attacks against confirmed-live paths. Test by: `curl -s http://localhost:3000/api/health | jq .` and `curl -s http://localhost:3000/metrics | grep KILL`; also run `grep -r "process.env" src/pages/api/health` to confirm no env dump in responses. Finding threshold: any `KILL_*` key appearing in any HTTP response body or metrics scrape output.
226
+
227
+ - **Post-Quantum Threat to HMAC-Signed Kill-Switch Admin APIs (NIST PQC Migration — FIPS 203/204 timeline, 2026):** If the kill-switch admin toggle API uses an HMAC-SHA256 signature for authentication (common in webhook-style admin integrations), Harvest-Now-Decrypt-Later adversaries are already collecting signed requests. Once a CRQC (cryptographically relevant quantum computer) is available (~2030 per NIST estimates), those captured requests can be replayed with forged signatures. For kill-switch admin APIs — where a forged request can re-enable a killed payment path during an active incident — this is a high-consequence scenario. Test by: locate all admin toggle endpoints, verify signature scheme used (`grep -r "hmac\|sha256\|x-signature" src/`), and confirm the roadmap includes migration to ML-DSA (CRYSTALS-Dilithium, FIPS 204) before 2028. Finding threshold: any admin kill-switch API authenticated solely via HMAC without a migration plan to a PQC signature scheme.
228
+
229
+ - **GitOps Reconciliation Loop Silently Reverting Kill Switches (ATT&CK T1485 — Data Destruction / Availability Impact via Config Drift):** When kill switches are stored as Kubernetes ConfigMaps or Helm values managed by ArgoCD or Flux, a reconciliation cycle triggered by any unrelated commit will restore all `KILL_*` values to their repo-committed state (`false`), overriding an incident responder's live toggle within minutes. This was observed in real incidents at Shopify (2021 Kubernetes config drift) and documented in the CNCF Security TAG threat model. Test by: activate a kill switch by patching the ConfigMap directly, then trigger an ArgoCD sync (`argocd app sync <app>`) and confirm whether the kill switch is restored to `false`. Finding threshold: any `KILL_*` ConfigMap key that an ArgoCD/Flux sync can overwrite without raising a security alert. Remediation: mark `KILL_*` keys as `ignoreDifferences` in the ArgoCD Application spec, or route all writes to those keys through a dedicated incident-response service account with a separate audit log.
230
+
231
+ - **EU AI Act Article 65 Mandatory Emergency Stop — Missing Documented Kill Switch for High-Risk AI (Regulatory Deadline: 2026-08-02):** EU AI Act Article 65(1) requires providers of high-risk AI systems to have a documented, tested, and immediately operable mechanism to stop the system — equivalent to a kill switch. As of the August 2026 enforcement date, failure to demonstrate this capability to a national market surveillance authority constitutes a breach subject to fines up to €15M or 3% of global turnover. Current automated scanners check for code-level kill switches but do not verify compliance documentation, test records, or the Article 14 "human oversight" log. Test by: run `grep -r "KILL_AI_INFERENCE\|AI_INFERENCE" docs/ runbooks/`; confirm the kill switch is named in a conformity assessment document; verify a quarterly activation test record exists (date, operator, outcome). Finding threshold: `KILL_AI_INFERENCE` exists in code but is absent from any conformity assessment, runbook, or test log — this is a regulatory gap independent of the technical implementation.
232
+
233
+ ## §EDGE-CASE-MATRIX
234
+
235
+ The 5 attack cases in the kill-switch / circuit-breaker domain that automated scanners and naive manual review universally miss. MANDATORY checks — do not skip.
236
+
237
+ | # | Edge Case | Why Scanners Miss It | Concrete Test |
238
+ |---|-----------|----------------------|---------------|
239
+ | 1 | Kill switch bypassed via cached response | The kill switch fires on the handler, but an upstream CDN or in-process cache returns a stale 200 from before activation | Set `KILL_PAYMENT_PROCESSING=true`, then hit the endpoint via a client that has a cached response; assert the cache layer also returns 503 (add `Cache-Control: no-store` to 503 responses) |
240
+ | 2 | Circuit breaker state stored in process memory — invisible to other pod replicas | In a multi-replica deployment, one pod trips open but others continue forwarding; scanner tests a single process | Simulate failure against one replica, then send traffic through a load balancer; observe that remaining replicas still call the failing dependency |
241
+ | 3 | Kill switch env var read once at startup and cached in a module-level constant | `const KILLED = process.env.KILL_X === "true"` evaluated at import — changing the env var at runtime has no effect without restart | Set the kill switch after process start; verify the route still responds 200 instead of 503 |
242
+ | 4 | Admin toggle endpoint for kill switches lacks authentication / SSRF guard | Remote config fetch URL user-influenced or toggle API exposed without auth; attacker can flip a switch back on during an incident | Probe any `/admin/kill-switch`, `/feature-flags`, or remote SDK config URLs for missing auth headers and SSRF protections |
243
+ | 5 | Partial kill — kill switch applied to the write path but not the read path, leaking state mid-incident | Scanner only tests the primary action endpoint; complementary endpoints (webhooks, callbacks, polling) remain live | After activating `KILL_PAYMENT_PROCESSING`, send a Stripe webhook event; confirm the webhook handler also returns 503 or a safe no-op |
244
+
245
+ ## §TEMPORAL-THREATS
246
+
247
+ Threats materialising in the 2025–2030 window that kill-switch and circuit-breaker defences designed today must account for.
248
+
249
+ | Threat | Est. Timeline | Relevance to Kill-Switch Domain | Prepare Now By |
250
+ |--------|--------------|--------------------------------|----------------|
251
+ | AI-assisted incident exploitation — attacker uses LLM to detect disabled features via timing/error differences and selectively probe live paths | 2025–2027 (active) | Kill switches narrow the attack surface but also create an observable signal: disabled paths return 503 faster than live ones | Normalise 503 response times to match live-path p50 latency; add jitter; never leak switch name in error body |
252
+ | Feature flag service supply-chain compromise (LaunchDarkly / Unleash SDK) | 2025–2027 | A compromised flag-delivery SDK could force `KILL_PAYMENT_PROCESSING=false` for all customers simultaneously | Implement a local fallback: if remote flag service is unreachable for >5 s, default to the safer (killed) state; never default to ON |
253
+ | EU AI Act enforcement — emergency shutoff required for high-risk AI systems | 2026 (active) | AI Act Article 65 requires a "human oversight" measure including the ability to immediately stop an AI system; regulators may audit kill-switch existence | Document `KILL_AI_INFERENCE` switch explicitly against AI Act Article 14/65; include in conformity assessment |
254
+ | Kubernetes operator or GitOps pipeline used as kill-switch vector — attacker patches a ConfigMap to re-enable a killed switch | 2025–2027 | Kill switches stored as ConfigMaps or Helm values are mutable by anyone with cluster write access | Apply RBAC: only the incident-response service account may write `KILL_*` ConfigMap keys; alert on any out-of-band write |
255
+ | Mandatory incident-response automation under NIS 2 Directive | 2025 (active) | NIS 2 requires essential-service operators to have documented and tested incident containment procedures; kill switches are the primary containment mechanism | Add kill-switch activation steps to official incident runbook; test activation quarterly and record results for NIS 2 audit trail |
256
+
257
+ ## §DETECTION-GAP
258
+
259
+ What current security monitoring CANNOT detect in the kill-switch domain, and what to build to close each gap.
260
+
261
+ **Standard gaps that MUST be checked:**
262
+
263
+ - **Silent kill-switch bypass via cached layer**: A kill switch fires on the application server but the CDN or Redis cache continues serving stale 200 responses. No error log is emitted — the 503 simply never reaches the client. Need: monitor CDN cache-hit rate on paths protected by kill switches; alert when a path serves >0 cache hits after a kill switch is activated.
264
+
265
+ - **Circuit breaker flapping undetected across replicas**: One pod opens its circuit breaker; the load balancer routes subsequent requests to closed-state replicas, masking the failure signal. No aggregate alarm fires. Need: export circuit breaker state as a Prometheus gauge per-pod and per-switch; alert when any replica has been in `open` state for >60 s while others are `closed`.
266
+
267
+ - **Kill switch toggled off by automated process without human review**: A GitOps reconciliation loop or Helm upgrade restores a killed switch to `false` because the cluster state diverges from the repo. The change appears as a routine deployment event, not a security event. Need: tag all `KILL_*` ConfigMap writes as security-sensitive; route to a separate audit log and alert on any automated write.
268
+
269
+ - **Env-var kill switch read at startup only — toggle has no runtime effect**: The switch value is evaluated once at process start. Incident responders set the var, observe no change, and escalate unnecessarily — or worse, believe the switch is broken and skip to more disruptive remediation. Need: integration test in CI that sets the kill switch after server start and confirms the route returns 503 without restart.
270
+
271
+ - **No kill switch covering outbound webhook callbacks**: The inbound payment path is killed, but the third-party provider continues delivering webhook events (payment.succeeded, refund.created) that the handler processes normally, causing state inconsistency. Need: ensure every kill switch that covers a write path also covers the corresponding inbound webhook handler; grep for webhook route registrations and cross-reference against kill-switch coverage map.
272
+
273
+ ## §ZERO-MISS-MANDATE
274
+
275
+ This agent CANNOT declare any attack class clean without explicit evidence of checking. For each item, output one of:
276
+ - `CHECKED: [N files] | [patterns used] | CLEAN`
277
+ - `CHECKED: [N files] | [patterns used] | [N findings, all fixed]`
278
+ - `SKIPPED: [reason — must be "not applicable: [evidence]"]`
279
+
280
+ **Silent skip = FAILED COVERAGE.** The orchestrator flags this as a quality gap.
281
+
282
+ **Kill-switch-specific coverage classes:**
283
+
284
+ | Coverage Class | Patterns to Grep | Minimum File Scope |
285
+ |---|---|---|
286
+ | Payment paths without kill switch | `stripe\|checkout\|billing\|invoice` — assert `assertNotKilled` appears in same file | `src/` |
287
+ | Auth paths without kill switch | `authenticate\|login\|session\|signIn` — assert guard present | `src/` |
288
+ | AI/LLM calls without kill switch | `openai\|anthropic\|llm\|langchain\|completions.create\|messages.create` | `src/` |
289
+ | Third-party integrations without kill switch | `sendgrid\|twilio\|postmark\|resend\|slack` | `src/` |
290
+ | Kill switches read at startup (cached const) | `const.*=.*process.env.KILL_` — flag module-level constant assignments | `src/` |
291
+ | Webhook handlers not covered by corresponding kill switch | Route registrations matching `webhook\|callback\|hook` | `src/` |
292
+ | Admin toggle endpoints without auth | Routes matching `kill.?switch\|feature.?flag\|toggle` — assert auth middleware present | `src/` |
293
+
294
+ The output findings JSON MUST include a `coverageManifest` key:
295
+ ```json
296
+ {
297
+ "coverageManifest": {
298
+ "attackClassesCovered": [
299
+ { "class": "Payment paths without kill switch", "filesReviewed": 12, "patterns": ["stripe", "checkout", "assertNotKilled"], "result": "CLEAN" },
300
+ { "class": "Kill switches read at startup (cached const)", "filesReviewed": 47, "patterns": ["const.*=.*process.env.KILL_"], "result": "2 findings, fixed" }
301
+ ],
302
+ "filesReviewed": 47,
303
+ "negativeAssertions": ["Auth paths: signIn pattern searched across 47 files — assertNotKilled present in all matches"],
304
+ "uncoveredReason": {}
305
+ }
306
+ }
307
+ ```