npm - cipher-security - Versions diffs - 2.1.0 → 2.2.0 - Mend

cipher-security 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/bin/cipher.js +10 -0
package/lib/analyze/consistency.js +566 -0
package/lib/analyze/constitution.js +110 -0
package/lib/analyze/sharding.js +251 -0
package/lib/autonomous/agent-tool.js +165 -0
package/lib/autonomous/framework.js +17 -0
package/lib/autonomous/handoff.js +506 -0
package/lib/autonomous/modes/blue.js +26 -0
package/lib/autonomous/modes/red.js +28 -0
package/lib/benchmark/agent.js +88 -26
package/lib/benchmark/baselines.js +3 -0
package/lib/benchmark/claude-code-solver.js +254 -0
package/lib/benchmark/cognitive.js +283 -0
package/lib/benchmark/index.js +12 -2
package/lib/benchmark/knowledge.js +281 -0
package/lib/benchmark/llm.js +156 -15
package/lib/benchmark/models.js +5 -2
package/lib/benchmark/nyu-ctf.js +192 -0
package/lib/benchmark/overthewire.js +347 -0
package/lib/benchmark/picoctf.js +281 -0
package/lib/benchmark/prompts.js +280 -0
package/lib/benchmark/registry.js +219 -0
package/lib/benchmark/remote-solver.js +356 -0
package/lib/benchmark/remote-target.js +263 -0
package/lib/benchmark/reporter.js +35 -0
package/lib/benchmark/runner.js +174 -10
package/lib/benchmark/sandbox.js +35 -0
package/lib/benchmark/scorer.js +22 -4
package/lib/benchmark/solver.js +34 -1
package/lib/benchmark/tools.js +262 -16
package/lib/commands.js +9 -0
package/lib/execution/council.js +434 -0
package/lib/execution/parallel.js +292 -0
package/lib/gates/circuit-breaker.js +135 -0
package/lib/gates/confidence.js +302 -0
package/lib/gates/corrections.js +219 -0
package/lib/gates/self-check.js +245 -0
package/lib/gateway/commands.js +727 -0
package/lib/guardrails/engine.js +364 -0
package/lib/mcp/server.js +349 -3
package/lib/memory/compressor.js +94 -7
package/lib/pipeline/hooks.js +288 -0
package/lib/pipeline/index.js +11 -0
package/lib/review/budget.js +210 -0
package/lib/review/engine.js +526 -0
package/lib/review/layers/acceptance-auditor.js +279 -0
package/lib/review/layers/blind-hunter.js +500 -0
package/lib/review/layers/defense-in-depth.js +209 -0
package/lib/review/layers/edge-case-hunter.js +266 -0
package/lib/review/panel.js +519 -0
package/lib/review/two-stage.js +244 -0
package/lib/session/cost-tracker.js +203 -0
package/lib/session/logger.js +349 -0
package/package.json +1 -1

package/lib/review/panel.js ADDED Viewed

@@ -0,0 +1,519 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * CIPHER Expert Panel Simulation
+ *
+ * Simulates 3 security expert personas reviewing code independently,
+ * then synthesizes findings into a consensus report. Each persona
+ * filters and reweights findings from the review engine based on
+ * their focus area.
+ *
+ * Personas:
+ * - RedTeamExpert: attack surface, exploitation paths, offensive TTPs
+ * - BlueTeamExpert: detection gaps, hardening, logging, monitoring
+ * - ArchitectExpert: trust boundaries, data flow, auth design, OWASP
+ *
+ * @module review/panel
+ */
+import { createReviewEngine, resolveInput } from './engine.js';
+// ---------------------------------------------------------------------------
+// Expert Personas
+// ---------------------------------------------------------------------------
+/**
+ * @typedef {object} ExpertPersona
+ * @property {string} id         - Persona identifier
+ * @property {string} name       - Display name
+ * @property {string} title      - Professional title
+ * @property {string} focus      - What this expert focuses on
+ * @property {string[]} priorityCwes  - CWEs this expert prioritizes
+ * @property {string[]} priorityTags  - Tags this expert prioritizes
+ * @property {object} severityAdjust  - Severity adjustments {cweOrTag: delta}
+ * @property {function} commentary    - (finding) => expert perspective string
+ */
+const SEVERITY_RANK = { critical: 4, high: 3, medium: 2, low: 1, info: 0 };
+const RANK_TO_SEV = ['info', 'low', 'medium', 'high', 'critical'];
+function clampSeverity(rank) {
+  return RANK_TO_SEV[Math.max(0, Math.min(4, rank))];
+}
+/** @type {ExpertPersona} */
+const RED_TEAM_EXPERT = {
+  id: 'red-team',
+  name: 'Alex Chen',
+  title: 'Principal Offensive Security Engineer',
+  focus: 'Attack surface analysis, exploitation chains, OPSEC failures, privilege escalation paths',
+  priorityCwes: [
+    'CWE-89', 'CWE-78', 'CWE-95', 'CWE-502', 'CWE-918', 'CWE-22',
+    'CWE-798', 'CWE-269', 'CWE-79', 'CWE-1321',
+  ],
+  priorityTags: [
+    'T1190', 'T1059', 'T1078', 'T1552.001', 'T1105', 'T1557',
+    'owasp-a03', 'owasp-a01',
+  ],
+  severityBump: new Map([
+    ['CWE-89', 1],   // SQLi is always critical from attacker perspective
+    ['CWE-78', 1],   // Command injection → immediate RCE
+    ['CWE-502', 1],  // Deserialization → RCE
+    ['CWE-798', 1],  // Hardcoded creds → instant compromise
+    ['CWE-269', 1],  // Privilege escalation
+  ]),
+  commentary(finding) {
+    const cwes = finding.cweIds || [];
+    if (cwes.includes('CWE-89') || cwes.includes('CWE-78')) {
+      return 'Direct code execution path. Attacker can chain this for initial access or lateral movement.';
+    }
+    if (cwes.includes('CWE-798')) {
+      return 'Credential harvesting target. Leaked secrets enable persistence without exploitation.';
+    }
+    if (cwes.includes('CWE-269')) {
+      return 'Privilege escalation primitive. Attacker moves from low-priv to admin in one step.';
+    }
+    if (cwes.includes('CWE-918')) {
+      return 'SSRF enables internal network reconnaissance and cloud metadata access.';
+    }
+    if (cwes.includes('CWE-502')) {
+      return 'Deserialization to RCE. No auth required if the endpoint is reachable.';
+    }
+    if (cwes.includes('CWE-79')) {
+      return 'XSS enables session hijacking, credential theft, and phishing from trusted domain.';
+    }
+    return 'Exploitable attack surface — assess reachability and impact in context.';
+  },
+};
+/** @type {ExpertPersona} */
+const BLUE_TEAM_EXPERT = {
+  id: 'blue-team',
+  name: 'Sarah Martinez',
+  title: 'Senior Detection Engineer & SOC Lead',
+  focus: 'Detection coverage, logging gaps, monitoring blind spots, incident response readiness',
+  priorityCwes: [
+    'CWE-778', 'CWE-532', 'CWE-209', 'CWE-390', 'CWE-755',
+    'CWE-404', 'CWE-401', 'CWE-307', 'CWE-613', 'CWE-614',
+  ],
+  priorityTags: [
+    'reliability', 'observability', 'owasp-a09', 'owasp-a07',
+    'owasp-a05', 'performance',
+  ],
+  severityBump: new Map([
+    ['CWE-778', 1],  // Missing audit logging → blind spot
+    ['CWE-390', 1],  // Error swallowing → invisible failures
+    ['CWE-532', 1],  // Sensitive data in logs → compliance risk
+    ['CWE-307', 1],  // No rate limiting → brute force undetected
+  ]),
+  commentary(finding) {
+    const cwes = finding.cweIds || [];
+    if (cwes.includes('CWE-778')) {
+      return 'Detection blind spot. This operation has no audit trail — incident responders cannot reconstruct the attack timeline.';
+    }
+    if (cwes.includes('CWE-390') || cwes.includes('CWE-755')) {
+      return 'Silent failure. Errors are swallowed — the SOC has no signal to trigger investigation.';
+    }
+    if (cwes.includes('CWE-532')) {
+      return 'Log contamination. Sensitive data in logs creates compliance exposure and complicates log sharing.';
+    }
+    if (cwes.includes('CWE-307')) {
+      return 'No rate limiting means brute-force attacks are invisible until account compromise.';
+    }
+    if (cwes.includes('CWE-209')) {
+      return 'Error details leak internal architecture to attackers, aiding reconnaissance.';
+    }
+    if (cwes.includes('CWE-404') || cwes.includes('CWE-401')) {
+      return 'Resource leak degrades availability over time — difficult to attribute to an attack vs. organic growth.';
+    }
+    return 'Review detection coverage — ensure this behavior generates actionable alerts.';
+  },
+};
+/** @type {ExpertPersona} */
+const ARCHITECT_EXPERT = {
+  id: 'architect',
+  name: 'Dr. Priya Kapoor',
+  title: 'Security Architect & Privacy Officer',
+  focus: 'Trust boundaries, data flow integrity, auth/authz design, OWASP alignment, privacy by design',
+  priorityCwes: [
+    'CWE-306', 'CWE-862', 'CWE-863', 'CWE-200', 'CWE-942',
+    'CWE-295', 'CWE-601', 'CWE-16', 'CWE-20', 'CWE-328',
+  ],
+  priorityTags: [
+    'owasp-a01', 'owasp-a02', 'owasp-a05', 'owasp-a07',
+    'privacy', 'T1078',
+  ],
+  severityBump: new Map([
+    ['CWE-306', 1],  // Missing auth → architectural failure
+    ['CWE-862', 1],  // Missing authz → broken access control
+    ['CWE-200', 1],  // Data exposure → privacy violation
+    ['CWE-942', 1],  // CORS wildcard → trust boundary collapse
+  ]),
+  commentary(finding) {
+    const cwes = finding.cweIds || [];
+    if (cwes.includes('CWE-306') || cwes.includes('CWE-862')) {
+      return 'Broken access control — the most common critical vulnerability class (OWASP #1). This is an architectural gap, not a bug.';
+    }
+    if (cwes.includes('CWE-200')) {
+      return 'Data over-exposure violates least-privilege and creates GDPR/CCPA notification triggers.';
+    }
+    if (cwes.includes('CWE-942') || cwes.includes('CWE-295')) {
+      return 'Trust boundary violation. The security model assumes a perimeter that this bypasses.';
+    }
+    if (cwes.includes('CWE-20')) {
+      return 'Missing input validation at the trust boundary. Defense-in-depth requires validation at every layer crossing.';
+    }
+    if (cwes.includes('CWE-328') || cwes.includes('CWE-338')) {
+      return 'Cryptographic weakness undermines the security guarantees the architecture depends on.';
+    }
+    if (cwes.includes('CWE-16')) {
+      return 'Security misconfiguration — the defaults are insecure. Harden before deployment.';
+    }
+    return 'Evaluate against the threat model — does this weaken a trust boundary or violate a design invariant?';
+  },
+};
+const ALL_PERSONAS = [RED_TEAM_EXPERT, BLUE_TEAM_EXPERT, ARCHITECT_EXPERT];
+// ---------------------------------------------------------------------------
+// Expert Review
+// ---------------------------------------------------------------------------
+/**
+ * An expert's perspective on a single finding.
+ */
+class ExpertFinding {
+  constructor({ finding, persona, adjustedSeverity, comment }) {
+    this.finding = finding;
+    this.expertId = persona.id;
+    this.expertName = persona.name;
+    this.expertTitle = persona.title;
+    this.originalSeverity = finding.severity;
+    this.adjustedSeverity = adjustedSeverity;
+    this.comment = comment;
+    this.isPriority = false; // Set by reviewAsExpert
+  }
+}
+/**
+ * Review findings through a specific expert persona's lens.
+ *
+ * @param {import('./engine.js').ReviewFinding[]} findings
+ * @param {ExpertPersona} persona
+ * @returns {ExpertFinding[]}
+ */
+function reviewAsExpert(findings, persona) {
+  const expertFindings = [];
+  for (const f of findings) {
+    const cwes = f.cweIds || [];
+    const tags = f.tags || [];
+    // Check if this finding is in the expert's priority area
+    const isPriorityCwe = cwes.some((c) => persona.priorityCwes.includes(c));
+    const isPriorityTag = tags.some((t) => persona.priorityTags.includes(t));
+    const isPriority = isPriorityCwe || isPriorityTag;
+    // Adjust severity based on expert perspective
+    let rank = SEVERITY_RANK[f.severity] ?? 0;
+    for (const cwe of cwes) {
+      if (persona.severityBump.has(cwe)) {
+        rank += persona.severityBump.get(cwe);
+      }
+    }
+    const adjustedSeverity = clampSeverity(rank);
+    const ef = new ExpertFinding({
+      finding: f,
+      persona,
+      adjustedSeverity,
+      comment: persona.commentary(f),
+    });
+    ef.isPriority = isPriority;
+    expertFindings.push(ef);
+  }
+  // Sort: priority findings first, then by adjusted severity
+  expertFindings.sort((a, b) => {
+    if (a.isPriority !== b.isPriority) return a.isPriority ? -1 : 1;
+    const aRank = SEVERITY_RANK[a.adjustedSeverity] ?? 0;
+    const bRank = SEVERITY_RANK[b.adjustedSeverity] ?? 0;
+    return bRank - aRank;
+  });
+  return expertFindings;
+}
+// ---------------------------------------------------------------------------
+// Consensus Synthesis
+// ---------------------------------------------------------------------------
+/**
+ * A consensus finding — identified by 2+ experts.
+ */
+class ConsensusFinding {
+  constructor({ finding, experts, maxSeverity, comments }) {
+    this.finding = finding;
+    this.experts = experts;         // expert IDs that flagged this
+    this.expertCount = experts.length;
+    this.maxSeverity = maxSeverity;  // highest adjusted severity
+    this.comments = comments;        // {expertId: comment}
+    this.isUnanimous = experts.length === ALL_PERSONAS.length;
+  }
+}
+/**
+ * A conflict — experts disagree on severity by 2+ levels.
+ */
+class SeverityConflict {
+  constructor({ finding, assessments }) {
+    this.finding = finding;
+    this.assessments = assessments; // [{expertId, severity}]
+    this.spread = 0;
+    if (assessments.length >= 2) {
+      const ranks = assessments.map((a) => SEVERITY_RANK[a.severity] ?? 0);
+      this.spread = Math.max(...ranks) - Math.min(...ranks);
+    }
+  }
+}
+/**
+ * Synthesize expert findings into consensus.
+ *
+ * @param {Map<string, ExpertFinding[]>} expertResults - expertId → findings
+ * @returns {{ consensus: ConsensusFinding[], conflicts: SeverityConflict[] }}
+ */
+function synthesize(expertResults) {
+  // Group findings by identity (file:line:titleStem)
+  const findingMap = new Map(); // key → {finding, experts: Map<expertId, ExpertFinding>}
+  for (const [expertId, findings] of expertResults) {
+    for (const ef of findings) {
+      const f = ef.finding;
+      const stem = f.title.toLowerCase().replace(/[^a-z0-9]/g, '').slice(0, 30);
+      const key = `${f.file}:${f.line}:${stem}`;
+      if (!findingMap.has(key)) {
+        findingMap.set(key, { finding: f, experts: new Map() });
+      }
+      findingMap.get(key).experts.set(expertId, ef);
+    }
+  }
+  const consensus = [];
+  const conflicts = [];
+  for (const [, entry] of findingMap) {
+    const { finding, experts } = entry;
+    if (experts.size >= 2) {
+      // Consensus: 2+ experts flagged this
+      const expertIds = [...experts.keys()];
+      const maxRank = Math.max(
+        ...[...experts.values()].map((ef) => SEVERITY_RANK[ef.adjustedSeverity] ?? 0),
+      );
+      const comments = {};
+      for (const [eid, ef] of experts) {
+        comments[eid] = ef.comment;
+      }
+      consensus.push(new ConsensusFinding({
+        finding,
+        experts: expertIds,
+        maxSeverity: clampSeverity(maxRank),
+        comments,
+      }));
+    }
+    // Check for conflicts: severity spread >= 2 levels
+    if (experts.size >= 2) {
+      const assessments = [...experts.entries()].map(([eid, ef]) => ({
+        expertId: eid,
+        severity: ef.adjustedSeverity,
+      }));
+      const conflict = new SeverityConflict({ finding, assessments });
+      if (conflict.spread >= 2) {
+        conflicts.push(conflict);
+      }
+    }
+  }
+  // Sort consensus by expert count (unanimous first), then severity
+  consensus.sort((a, b) => {
+    if (b.expertCount !== a.expertCount) return b.expertCount - a.expertCount;
+    return (SEVERITY_RANK[b.maxSeverity] ?? 0) - (SEVERITY_RANK[a.maxSeverity] ?? 0);
+  });
+  return { consensus, conflicts };
+}
+// ---------------------------------------------------------------------------
+// PanelResult
+// ---------------------------------------------------------------------------
+export class PanelResult {
+  constructor({ expertResults, consensus, conflicts, reviewResult, totalTime = 0 } = {}) {
+    /** @type {Map<string, ExpertFinding[]>} */
+    this.expertResults = expertResults;
+    /** @type {ConsensusFinding[]} */
+    this.consensus = consensus;
+    /** @type {SeverityConflict[]} */
+    this.conflicts = conflicts;
+    /** @type {import('./engine.js').ReviewResult} */
+    this.reviewResult = reviewResult;
+    this.totalTime = totalTime;
+  }
+  toReport() {
+    const lines = [
+      '═══════════════════════════════════════════════════════',
+      '  CIPHER Expert Panel Assessment',
+      '═══════════════════════════════════════════════════════',
+      '',
+      `Base review: ${this.reviewResult.summary}`,
+      `Panel consensus: ${this.consensus.length} findings agreed by 2+ experts`,
+      `Conflicts: ${this.conflicts.length} severity disagreements`,
+      `Total time: ${this.totalTime}ms`,
+      '',
+    ];
+    // Per-expert summaries
+    for (const persona of ALL_PERSONAS) {
+      const findings = this.expertResults.get(persona.id) || [];
+      const priority = findings.filter((f) => f.isPriority);
+      lines.push(`── ${persona.name} (${persona.title}) ──`);
+      lines.push(`Focus: ${persona.focus}`);
+      lines.push(`Findings: ${findings.length} total, ${priority.length} in priority area`);
+      if (priority.length > 0) {
+        lines.push('Priority findings:');
+        for (const pf of priority.slice(0, 5)) {
+          const bump = pf.adjustedSeverity !== pf.originalSeverity
+            ? ` (${pf.originalSeverity}→${pf.adjustedSeverity})`
+            : '';
+          lines.push(`  • [${pf.adjustedSeverity.toUpperCase()}${bump}] ${pf.finding.title}`);
+          lines.push(`    ${pf.comment}`);
+        }
+      }
+      lines.push('');
+    }
+    // Consensus
+    if (this.consensus.length > 0) {
+      lines.push('── CONSENSUS (2+ experts agree) ──');
+      for (const cf of this.consensus) {
+        const tag = cf.isUnanimous ? '★ UNANIMOUS' : `${cf.expertCount} experts`;
+        lines.push(`  [${cf.maxSeverity.toUpperCase()}] ${cf.finding.title} — ${tag}`);
+        const loc = cf.finding.line ? `${cf.finding.file}:${cf.finding.line}` : cf.finding.file;
+        if (loc) lines.push(`    Location: ${loc}`);
+        for (const [eid, comment] of Object.entries(cf.comments)) {
+          const expert = ALL_PERSONAS.find((p) => p.id === eid);
+          lines.push(`    ${expert?.name ?? eid}: ${comment}`);
+        }
+      }
+      lines.push('');
+    }
+    // Conflicts
+    if (this.conflicts.length > 0) {
+      lines.push('── SEVERITY CONFLICTS ──');
+      for (const sc of this.conflicts) {
+        lines.push(`  ${sc.finding.title} (spread: ${sc.spread} levels)`);
+        for (const a of sc.assessments) {
+          const expert = ALL_PERSONAS.find((p) => p.id === a.expertId);
+          lines.push(`    ${expert?.name ?? a.expertId}: ${a.severity}`);
+        }
+      }
+      lines.push('');
+    }
+    lines.push('───────────────────────────────────────────────────────');
+    return lines.join('\n');
+  }
+  toJSON() {
+    const expertSummaries = {};
+    for (const persona of ALL_PERSONAS) {
+      const findings = this.expertResults.get(persona.id) || [];
+      expertSummaries[persona.id] = {
+        name: persona.name,
+        title: persona.title,
+        focus: persona.focus,
+        totalFindings: findings.length,
+        priorityFindings: findings.filter((f) => f.isPriority).length,
+        topFindings: findings.slice(0, 5).map((ef) => ({
+          title: ef.finding.title,
+          originalSeverity: ef.originalSeverity,
+          adjustedSeverity: ef.adjustedSeverity,
+          isPriority: ef.isPriority,
+          comment: ef.comment,
+          file: ef.finding.file,
+          line: ef.finding.line,
+        })),
+      };
+    }
+    return {
+      totalTime: this.totalTime,
+      baseReview: this.reviewResult.summary,
+      experts: expertSummaries,
+      consensus: this.consensus.map((cf) => ({
+        title: cf.finding.title,
+        severity: cf.maxSeverity,
+        expertCount: cf.expertCount,
+        isUnanimous: cf.isUnanimous,
+        experts: cf.experts,
+        comments: cf.comments,
+        file: cf.finding.file,
+        line: cf.finding.line,
+      })),
+      conflicts: this.conflicts.map((sc) => ({
+        title: sc.finding.title,
+        spread: sc.spread,
+        assessments: sc.assessments,
+      })),
+    };
+  }
+}
+// ---------------------------------------------------------------------------
+// Panel Review — main entry point
+// ---------------------------------------------------------------------------
+/**
+ * Run an expert panel review on code input.
+ *
+ * @param {string} input       - File path, directory, or raw code string
+ * @param {object} [options]
+ * @param {string} [options.language] - Override language detection
+ * @param {string} [options.format]   - 'text' or 'json'
+ * @returns {Promise<PanelResult>}
+ */
+export async function panelReview(input, options = {}) {
+  const t0 = Date.now();
+  // 1. Run base review engine
+  const engine = await createReviewEngine();
+  const reviewResult = await engine.review(input, {
+    language: options.language,
+  });
+  // 2. Each expert reviews the findings from their perspective
+  const expertResults = new Map();
+  for (const persona of ALL_PERSONAS) {
+    expertResults.set(persona.id, reviewAsExpert(reviewResult.findings, persona));
+  }
+  // 3. Synthesize consensus and conflicts
+  const { consensus, conflicts } = synthesize(expertResults);
+  return new PanelResult({
+    expertResults,
+    consensus,
+    conflicts,
+    reviewResult,
+    totalTime: Date.now() - t0,
+  });
+}