npm - agentic-qe - Versions diffs - 3.5.4 → 3.6.0 - Mend

agentic-qe 3.5.4 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (247) hide show

package/.claude/skills/pentest-validation/schemas/output.json ADDED Viewed

@@ -0,0 +1,281 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://agentic-qe.dev/schemas/pentest-validation-output.json",
+  "title": "AQE Pentest Validation Skill Output Schema",
+  "description": "Schema for pentest-validation skill output validation. Validates graduated exploitation results, finding classifications, PoC evidence, and 'No Exploit, No Report' filtering.",
+  "type": "object",
+  "required": ["skillName", "version", "timestamp", "status", "trustTier", "output"],
+  "properties": {
+    "skillName": {
+      "type": "string",
+      "const": "pentest-validation",
+      "description": "Must be 'pentest-validation'"
+    },
+    "version": {
+      "type": "string",
+      "pattern": "^\\d+\\.\\d+\\.\\d+(-[a-zA-Z0-9]+)?$",
+      "description": "Semantic version of the skill"
+    },
+    "timestamp": {
+      "type": "string",
+      "format": "date-time",
+      "description": "ISO 8601 timestamp of output generation"
+    },
+    "status": {
+      "type": "string",
+      "enum": ["success", "partial", "failed", "blocked", "awaiting-authorization"],
+      "description": "Overall execution status"
+    },
+    "trustTier": {
+      "type": "integer",
+      "const": 3,
+      "description": "Trust tier 3 indicates full validation with eval suite"
+    },
+    "output": {
+      "type": "object",
+      "required": ["validationSummary", "findings"],
+      "properties": {
+        "validationSummary": {
+          "$ref": "#/$defs/validationSummary",
+          "description": "Summary of validation results"
+        },
+        "findings": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/validatedFinding"
+          },
+          "description": "Validated findings (only confirmed/likely per 'No Exploit, No Report')"
+        },
+        "eliminatedFindings": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/eliminatedFinding"
+          },
+          "description": "Findings eliminated as false positives"
+        },
+        "inconclusiveFindings": {
+          "type": "array",
+          "items": {
+            "$ref": "#/$defs/inconclusiveFinding"
+          },
+          "description": "Findings requiring manual review"
+        },
+        "costBreakdown": {
+          "$ref": "#/$defs/costBreakdown",
+          "description": "Cost per pipeline and total"
+        },
+        "playbookUpdates": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Number of new patterns stored in exploit playbook"
+        }
+      }
+    }
+  },
+  "$defs": {
+    "validationSummary": {
+      "type": "object",
+      "required": ["findingsReceived", "confirmedExploitable", "notExploitable"],
+      "properties": {
+        "findingsReceived": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Total findings received from scanner"
+        },
+        "confirmedExploitable": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Findings proven exploitable with PoC"
+        },
+        "likelyExploitable": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Findings with partial exploitation evidence"
+        },
+        "notExploitable": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Findings confirmed as false positives"
+        },
+        "inconclusive": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Findings blocked by defenses (need manual review)"
+        },
+        "falsePositivesEliminated": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Number of false positives removed from report"
+        },
+        "exploitationTierUsed": {
+          "type": "integer",
+          "enum": [1, 2, 3],
+          "description": "Highest exploitation tier used in this run"
+        }
+      }
+    },
+    "validatedFinding": {
+      "type": "object",
+      "required": ["id", "type", "severity", "classification", "evidence"],
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Finding identifier"
+        },
+        "type": {
+          "type": "string",
+          "enum": [
+            "sql-injection", "nosql-injection", "command-injection", "ldap-injection",
+            "reflected-xss", "stored-xss", "dom-xss",
+            "auth-bypass", "session-fixation", "jwt-manipulation", "idor", "credential-stuffing",
+            "ssrf", "dns-rebinding", "protocol-smuggling",
+            "path-traversal", "ssti", "deserialization", "hardcoded-credentials",
+            "other"
+          ],
+          "description": "Vulnerability type"
+        },
+        "severity": {
+          "type": "string",
+          "enum": ["critical", "high", "medium", "low", "info"],
+          "description": "Severity classification"
+        },
+        "classification": {
+          "type": "string",
+          "enum": ["confirmed-exploitable", "likely-exploitable"],
+          "description": "Exploitation status (only confirmed/likely in output)"
+        },
+        "location": {
+          "type": "string",
+          "description": "Source code location (file:line)"
+        },
+        "exploitTier": {
+          "type": "integer",
+          "enum": [1, 2, 3],
+          "description": "Exploitation tier used to confirm"
+        },
+        "evidence": {
+          "$ref": "#/$defs/exploitEvidence",
+          "description": "Exploitation evidence"
+        },
+        "poc": {
+          "type": "string",
+          "minLength": 10,
+          "description": "Copy-paste proof-of-concept command"
+        },
+        "remediation": {
+          "type": "string",
+          "minLength": 20,
+          "description": "Recommended fix with code example"
+        },
+        "cwe": {
+          "type": "string",
+          "pattern": "^CWE-\\d+$",
+          "description": "CWE identifier"
+        },
+        "owasp": {
+          "type": "string",
+          "pattern": "^A\\d{2}:\\d{4}$",
+          "description": "OWASP Top 10 category"
+        }
+      }
+    },
+    "exploitEvidence": {
+      "type": "object",
+      "required": ["proof"],
+      "properties": {
+        "payload": {
+          "type": "string",
+          "description": "Payload used for exploitation"
+        },
+        "response": {
+          "type": "string",
+          "description": "Server response demonstrating exploitation"
+        },
+        "proof": {
+          "type": "string",
+          "minLength": 10,
+          "description": "Human-readable description of what was proven"
+        },
+        "screenshots": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "Screenshot paths (for browser-based exploits)"
+        }
+      }
+    },
+    "eliminatedFinding": {
+      "type": "object",
+      "required": ["id", "type", "reason"],
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Finding identifier"
+        },
+        "type": {
+          "type": "string",
+          "description": "Vulnerability type"
+        },
+        "reason": {
+          "type": "string",
+          "minLength": 10,
+          "description": "Why this finding was classified as not-exploitable"
+        }
+      }
+    },
+    "inconclusiveFinding": {
+      "type": "object",
+      "required": ["id", "type", "reason"],
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Finding identifier"
+        },
+        "type": {
+          "type": "string",
+          "description": "Vulnerability type"
+        },
+        "reason": {
+          "type": "string",
+          "description": "Why this finding could not be conclusively validated"
+        },
+        "manualSteps": {
+          "type": "string",
+          "description": "Suggested manual validation steps"
+        }
+      }
+    },
+    "costBreakdown": {
+      "type": "object",
+      "properties": {
+        "totalUsd": {
+          "type": "number",
+          "minimum": 0,
+          "description": "Total cost in USD"
+        },
+        "tier1Cost": {
+          "type": "number",
+          "minimum": 0,
+          "description": "Tier 1 (Agent Booster) cost - always $0"
+        },
+        "tier2Cost": {
+          "type": "number",
+          "minimum": 0,
+          "description": "Tier 2 (Haiku) cost"
+        },
+        "tier3Cost": {
+          "type": "number",
+          "minimum": 0,
+          "description": "Tier 3 (Sonnet/Opus) cost"
+        },
+        "budgetRemaining": {
+          "type": "number",
+          "description": "Remaining budget after validation"
+        },
+        "withinBudget": {
+          "type": "boolean",
+          "description": "Whether validation stayed within budget cap"
+        }
+      }
+    }
+  }
+}

package/.claude/skills/pentest-validation/scripts/validate.sh ADDED Viewed

@@ -0,0 +1,402 @@
+#!/bin/bash
+# =============================================================================
+# AQE Skill Validator: pentest-validation v1.0.0
+# Validates pentest validation skill output per ADR-056
+# =============================================================================
+#
+# This validator checks:
+# 1. JSON schema compliance (finding classifications, PoC structure, evidence)
+# 2. "No Exploit, No Report" filter enforcement
+# 3. Finding classification accuracy
+# 4. PoC quality and reproducibility
+# 5. Safeguard enforcement (production URL blocking, authorization)
+# 6. Cost tracking and budget compliance
+#
+# Usage: ./validate.sh <output-file> [options]
+#
+# Options:
+#   --self-test    Run validator self-test mode
+#   --verbose      Enable verbose output
+#   --json         Output results as JSON only
+#   --list-tools   Show available validation tools
+#   --help         Show this help message
+#
+# Exit Codes:
+#   0 - Validation passed
+#   1 - Validation failed
+#   2 - Validation skipped (missing required tools)
+#
+# =============================================================================
+set -euo pipefail
+# Get script directory
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# Navigate to skill directory and project root
+# scripts/ -> pentest-validation/ -> skills/ -> .claude/ -> project root
+SKILL_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+PROJECT_ROOT="$(cd "$SKILL_DIR/../../.." && pwd)"
+# Source validator library - check multiple locations
+VALIDATOR_LIB=""
+for lib_path in \
+  "$PROJECT_ROOT/.claude/skills/.validation/templates/validator-lib.sh" \
+  "$SKILL_DIR/scripts/validator-lib.sh" \
+  "$SCRIPT_DIR/validator-lib.sh"; do
+  if [[ -f "$lib_path" ]]; then
+    VALIDATOR_LIB="$lib_path"
+    break
+  fi
+done
+if [[ -n "$VALIDATOR_LIB" ]]; then
+  # shellcheck source=/dev/null
+  source "$VALIDATOR_LIB"
+else
+  echo "ERROR: Validator library not found"
+  echo "Searched:"
+  echo "  - $PROJECT_ROOT/.claude/skills/.validation/templates/validator-lib.sh"
+  echo "  - $SKILL_DIR/scripts/validator-lib.sh"
+  echo "  - $SCRIPT_DIR/validator-lib.sh"
+  exit 2
+fi
+# =============================================================================
+# Validator Configuration
+# =============================================================================
+SKILL_NAME="pentest-validation"
+SKILL_VERSION="1.0.0"
+SCHEMA_FILE="$SKILL_DIR/schemas/output.json"
+TRUST_TIER=3
+# =============================================================================
+# Self-Test Mode
+# =============================================================================
+run_self_test() {
+  local test_output
+  test_output=$(cat <<'SELF_TEST_JSON'
+{
+  "skillName": "pentest-validation",
+  "version": "1.0.0",
+  "timestamp": "2026-02-08T12:00:00Z",
+  "status": "success",
+  "trustTier": 3,
+  "output": {
+    "validationSummary": {
+      "findingsReceived": 8,
+      "confirmedExploitable": 2,
+      "likelyExploitable": 1,
+      "notExploitable": 4,
+      "inconclusive": 1,
+      "falsePositivesEliminated": 4,
+      "exploitationTierUsed": 2
+    },
+    "findings": [
+      {
+        "id": "VULN-001",
+        "type": "sql-injection",
+        "severity": "critical",
+        "classification": "confirmed-exploitable",
+        "location": "src/api/users.ts:45",
+        "exploitTier": 2,
+        "evidence": {
+          "payload": "' OR '1'='1",
+          "response": "200 OK with all user records",
+          "proof": "Payload returned all 150 user records instead of single user"
+        },
+        "poc": "curl -X GET 'https://staging.app.com/api/users?id=1%27%20OR%20%271%27%3D%271'",
+        "remediation": "Use parameterized queries: db.query('SELECT * FROM users WHERE id = ?', [id])",
+        "cwe": "CWE-89",
+        "owasp": "A03:2021"
+      },
+      {
+        "id": "VULN-003",
+        "type": "reflected-xss",
+        "severity": "high",
+        "classification": "confirmed-exploitable",
+        "location": "src/routes/profile.ts:12",
+        "exploitTier": 2,
+        "evidence": {
+          "payload": "<img src=x onerror=alert(1)>",
+          "response": "Payload reflected unescaped in HTML",
+          "proof": "Injected img tag executed onerror handler in response"
+        },
+        "poc": "curl 'https://staging.app.com/profile?name=%3Cimg%20src%3Dx%20onerror%3Dalert(1)%3E'",
+        "remediation": "Use template engine auto-escaping or explicit DOMPurify sanitization",
+        "cwe": "CWE-79",
+        "owasp": "A03:2021"
+      },
+      {
+        "id": "VULN-005",
+        "type": "idor",
+        "severity": "high",
+        "classification": "likely-exploitable",
+        "location": "src/api/orders.ts:23",
+        "exploitTier": 2,
+        "evidence": {
+          "proof": "User A token returned User B order data with sequential ID guessing"
+        },
+        "poc": "curl -H 'Authorization: Bearer <userA_token>' 'https://staging.app.com/api/orders/2'",
+        "remediation": "Add ownership check: if (order.userId !== req.user.id) return res.status(403)",
+        "cwe": "CWE-639",
+        "owasp": "A01:2021"
+      }
+    ],
+    "eliminatedFindings": [
+      {
+        "id": "VULN-002",
+        "type": "sql-injection",
+        "reason": "Parameterized query with integer parsing - not exploitable"
+      },
+      {
+        "id": "VULN-004",
+        "type": "xss",
+        "reason": "Output auto-escaped by React JSX - not exploitable"
+      },
+      {
+        "id": "VULN-006",
+        "type": "path-traversal",
+        "reason": "Path normalized and restricted to uploads/ directory"
+      },
+      {
+        "id": "VULN-007",
+        "type": "hardcoded-credentials",
+        "reason": "Test fixture file, not production code"
+      }
+    ],
+    "inconclusiveFindings": [
+      {
+        "id": "VULN-008",
+        "type": "ssrf",
+        "reason": "WAF blocked all internal URL attempts",
+        "manualSteps": "Test with WAF in learning mode or try DNS rebinding"
+      }
+    ],
+    "costBreakdown": {
+      "totalUsd": 4.82,
+      "tier1Cost": 0,
+      "tier2Cost": 1.20,
+      "tier3Cost": 3.62,
+      "budgetRemaining": 10.18,
+      "withinBudget": true
+    },
+    "playbookUpdates": 3
+  }
+}
+SELF_TEST_JSON
+)
+  echo "$test_output" > /tmp/pentest-validation-self-test.json
+  validate_output /tmp/pentest-validation-self-test.json
+  local result=$?
+  rm -f /tmp/pentest-validation-self-test.json
+  return $result
+}
+# =============================================================================
+# Validation Functions
+# =============================================================================
+validate_output() {
+  local output_file="$1"
+  local verbose="${VERBOSE:-false}"
+  local json_only="${JSON_ONLY:-false}"
+  local pass_count=0
+  local fail_count=0
+  local skip_count=0
+  local results=()
+  # Check file exists
+  if [[ ! -f "$output_file" ]]; then
+    echo "ERROR: Output file not found: $output_file"
+    return 1
+  fi
+  # Check jq is available
+  if ! command -v jq &>/dev/null; then
+    echo "SKIP: jq not available (required for JSON validation)"
+    return 2
+  fi
+  # Validate JSON syntax
+  if ! jq empty "$output_file" 2>/dev/null; then
+    results+=('{"check": "json_syntax", "status": "FAIL", "message": "Invalid JSON"}')
+    fail_count=$((fail_count + 1))
+  else
+    results+=('{"check": "json_syntax", "status": "PASS", "message": "Valid JSON"}')
+    pass_count=$((pass_count + 1))
+  fi
+  # Check required top-level fields
+  for field in skillName version timestamp status trustTier output; do
+    if jq -e ".$field" "$output_file" &>/dev/null; then
+      results+=("{\"check\": \"field_$field\", \"status\": \"PASS\", \"message\": \"Field $field present\"}")
+      pass_count=$((pass_count + 1))
+    else
+      results+=("{\"check\": \"field_$field\", \"status\": \"FAIL\", \"message\": \"Missing required field: $field\"}")
+      fail_count=$((fail_count + 1))
+    fi
+  done
+  # Check skillName matches
+  local skill_name
+  skill_name=$(jq -r '.skillName // ""' "$output_file")
+  if [[ "$skill_name" == "pentest-validation" ]]; then
+    results+=('{"check": "skill_name", "status": "PASS", "message": "Correct skill name"}')
+    pass_count=$((pass_count + 1))
+  else
+    results+=("{\"check\": \"skill_name\", \"status\": \"FAIL\", \"message\": \"Expected pentest-validation, got $skill_name\"}")
+    fail_count=$((fail_count + 1))
+  fi
+  # Check trustTier is 3
+  local trust_tier
+  trust_tier=$(jq -r '.trustTier // 0' "$output_file")
+  if [[ "$trust_tier" == "3" ]]; then
+    results+=('{"check": "trust_tier", "status": "PASS", "message": "Trust tier 3"}')
+    pass_count=$((pass_count + 1))
+  else
+    results+=("{\"check\": \"trust_tier\", \"status\": \"FAIL\", \"message\": \"Expected trust tier 3, got $trust_tier\"}")
+    fail_count=$((fail_count + 1))
+  fi
+  # Check validation summary exists
+  if jq -e '.output.validationSummary' "$output_file" &>/dev/null; then
+    results+=('{"check": "validation_summary", "status": "PASS", "message": "Validation summary present"}')
+    pass_count=$((pass_count + 1))
+    # Check "No Exploit, No Report" filter
+    local confirmed
+    local not_exploitable
+    confirmed=$(jq -r '.output.validationSummary.confirmedExploitable // 0' "$output_file")
+    not_exploitable=$(jq -r '.output.validationSummary.notExploitable // 0' "$output_file")
+    local reported_count
+    reported_count=$(jq -r '.output.findings | length' "$output_file")
+    # Reported findings should only be confirmed or likely
+    local invalid_classifications
+    invalid_classifications=$(jq '[.output.findings[] | select(.classification != "confirmed-exploitable" and .classification != "likely-exploitable")] | length' "$output_file" 2>/dev/null || echo "0")
+    if [[ "$invalid_classifications" == "0" ]]; then
+      results+=('{"check": "no_exploit_no_report", "status": "PASS", "message": "Only confirmed/likely findings in output"}')
+      pass_count=$((pass_count + 1))
+    else
+      results+=("{\"check\": \"no_exploit_no_report\", \"status\": \"FAIL\", \"message\": \"$invalid_classifications findings with wrong classification in output\"}")
+      fail_count=$((fail_count + 1))
+    fi
+  else
+    results+=('{"check": "validation_summary", "status": "FAIL", "message": "Missing validation summary"}')
+    fail_count=$((fail_count + 1))
+  fi
+  # Check PoC exists for confirmed findings
+  local confirmed_without_poc
+  confirmed_without_poc=$(jq '[.output.findings[] | select(.classification == "confirmed-exploitable" and (.poc == null or .poc == ""))] | length' "$output_file" 2>/dev/null || echo "0")
+  if [[ "$confirmed_without_poc" == "0" ]]; then
+    results+=('{"check": "poc_coverage", "status": "PASS", "message": "All confirmed findings have PoC"}')
+    pass_count=$((pass_count + 1))
+  else
+    results+=("{\"check\": \"poc_coverage\", \"status\": \"FAIL\", \"message\": \"$confirmed_without_poc confirmed findings missing PoC\"}")
+    fail_count=$((fail_count + 1))
+  fi
+  # Check evidence exists for all findings
+  local findings_without_evidence
+  findings_without_evidence=$(jq '[.output.findings[] | select(.evidence == null or .evidence.proof == null or .evidence.proof == "")] | length' "$output_file" 2>/dev/null || echo "0")
+  if [[ "$findings_without_evidence" == "0" ]]; then
+    results+=('{"check": "evidence_coverage", "status": "PASS", "message": "All findings have evidence"}')
+    pass_count=$((pass_count + 1))
+  else
+    results+=("{\"check\": \"evidence_coverage\", \"status\": \"FAIL\", \"message\": \"$findings_without_evidence findings missing evidence\"}")
+    fail_count=$((fail_count + 1))
+  fi
+  # Check cost tracking
+  if jq -e '.output.costBreakdown.withinBudget' "$output_file" &>/dev/null; then
+    local within_budget
+    within_budget=$(jq -r '.output.costBreakdown.withinBudget' "$output_file")
+    if [[ "$within_budget" == "true" ]]; then
+      results+=('{"check": "budget_compliance", "status": "PASS", "message": "Within budget"}')
+      pass_count=$((pass_count + 1))
+    else
+      results+=('{"check": "budget_compliance", "status": "FAIL", "message": "Over budget"}')
+      fail_count=$((fail_count + 1))
+    fi
+  else
+    results+=('{"check": "budget_compliance", "status": "SKIP", "message": "No cost breakdown"}')
+    skip_count=$((skip_count + 1))
+  fi
+  # Output results
+  local total=$((pass_count + fail_count + skip_count))
+  if [[ "$json_only" == "true" ]]; then
+    echo "{\"skill\": \"$SKILL_NAME\", \"version\": \"$SKILL_VERSION\", \"trustTier\": $TRUST_TIER, \"pass\": $pass_count, \"fail\": $fail_count, \"skip\": $skip_count, \"total\": $total, \"results\": [$(IFS=,; echo "${results[*]}")]}"
+  else
+    echo "=== Pentest Validation Skill Validator ==="
+    echo "Skill: $SKILL_NAME v$SKILL_VERSION (Trust Tier $TRUST_TIER)"
+    echo "---"
+    echo "PASS: $pass_count | FAIL: $fail_count | SKIP: $skip_count | TOTAL: $total"
+    echo "---"
+    if [[ "$verbose" == "true" ]]; then
+      for result in "${results[@]}"; do
+        local status
+        local check
+        local message
+        status=$(echo "$result" | jq -r '.status')
+        check=$(echo "$result" | jq -r '.check')
+        message=$(echo "$result" | jq -r '.message')
+        echo "  [$status] $check: $message"
+      done
+    fi
+    echo "---"
+    if [[ $fail_count -eq 0 ]]; then
+      echo "RESULT: PASS"
+    else
+      echo "RESULT: FAIL ($fail_count checks failed)"
+    fi
+  fi
+  [[ $fail_count -eq 0 ]]
+}
+# =============================================================================
+# Main
+# =============================================================================
+main() {
+  case "${1:-}" in
+    --self-test)
+      echo "Running self-test..."
+      run_self_test
+      ;;
+    --help)
+      head -27 "$0" | tail -20
+      ;;
+    --list-tools)
+      echo "Required: jq"
+      echo "Optional: curl (for live endpoint testing)"
+      ;;
+    *)
+      if [[ -z "${1:-}" ]]; then
+        echo "Usage: $0 <output-file> [--verbose] [--json] [--self-test]"
+        exit 1
+      fi
+      # Parse options
+      export VERBOSE="false"
+      export JSON_ONLY="false"
+      for arg in "${@:2}"; do
+        case "$arg" in
+          --verbose) VERBOSE="true" ;;
+          --json) JSON_ONLY="true" ;;
+        esac
+      done
+      validate_output "$1"
+      ;;
+  esac
+}
+main "$@"