npm - cipher-security - Versions diffs - 2.0.8 → 2.2.0 - Mend

cipher-security 2.0.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/bin/cipher.js +11 -1
package/lib/agent-runtime/handlers/architect.js +199 -0
package/lib/agent-runtime/handlers/base.js +240 -0
package/lib/agent-runtime/handlers/blue.js +220 -0
package/lib/agent-runtime/handlers/incident.js +161 -0
package/lib/agent-runtime/handlers/privacy.js +190 -0
package/lib/agent-runtime/handlers/purple.js +209 -0
package/lib/agent-runtime/handlers/recon.js +174 -0
package/lib/agent-runtime/handlers/red.js +246 -0
package/lib/agent-runtime/handlers/researcher.js +170 -0
package/lib/agent-runtime/handlers.js +35 -0
package/lib/agent-runtime/index.js +196 -0
package/lib/agent-runtime/parser.js +316 -0
package/lib/analyze/consistency.js +566 -0
package/lib/analyze/constitution.js +110 -0
package/lib/analyze/sharding.js +251 -0
package/lib/autonomous/agent-tool.js +165 -0
package/lib/autonomous/feedback-loop.js +13 -6
package/lib/autonomous/framework.js +17 -0
package/lib/autonomous/handoff.js +506 -0
package/lib/autonomous/modes/blue.js +26 -0
package/lib/autonomous/modes/red.js +585 -0
package/lib/autonomous/modes/researcher.js +322 -0
package/lib/autonomous/researcher.js +12 -45
package/lib/autonomous/runner.js +9 -537
package/lib/benchmark/agent.js +88 -26
package/lib/benchmark/baselines.js +3 -0
package/lib/benchmark/claude-code-solver.js +254 -0
package/lib/benchmark/cognitive.js +283 -0
package/lib/benchmark/index.js +12 -2
package/lib/benchmark/knowledge.js +281 -0
package/lib/benchmark/llm.js +156 -15
package/lib/benchmark/models.js +5 -2
package/lib/benchmark/nyu-ctf.js +192 -0
package/lib/benchmark/overthewire.js +347 -0
package/lib/benchmark/picoctf.js +281 -0
package/lib/benchmark/prompts.js +280 -0
package/lib/benchmark/registry.js +219 -0
package/lib/benchmark/remote-solver.js +356 -0
package/lib/benchmark/remote-target.js +263 -0
package/lib/benchmark/reporter.js +35 -0
package/lib/benchmark/runner.js +174 -10
package/lib/benchmark/sandbox.js +35 -0
package/lib/benchmark/scorer.js +22 -4
package/lib/benchmark/solver.js +34 -1
package/lib/benchmark/tools.js +262 -16
package/lib/commands.js +9 -0
package/lib/execution/council.js +434 -0
package/lib/execution/parallel.js +292 -0
package/lib/gates/circuit-breaker.js +135 -0
package/lib/gates/confidence.js +302 -0
package/lib/gates/corrections.js +219 -0
package/lib/gates/self-check.js +245 -0
package/lib/gateway/commands.js +727 -0
package/lib/guardrails/engine.js +364 -0
package/lib/mcp/server.js +349 -3
package/lib/memory/compressor.js +94 -7
package/lib/pipeline/hooks.js +288 -0
package/lib/pipeline/index.js +11 -0
package/lib/review/budget.js +210 -0
package/lib/review/engine.js +526 -0
package/lib/review/layers/acceptance-auditor.js +279 -0
package/lib/review/layers/blind-hunter.js +500 -0
package/lib/review/layers/defense-in-depth.js +209 -0
package/lib/review/layers/edge-case-hunter.js +266 -0
package/lib/review/panel.js +519 -0
package/lib/review/two-stage.js +244 -0
package/lib/session/cost-tracker.js +203 -0
package/lib/session/logger.js +349 -0
package/package.json +1 -1

package/lib/gates/confidence.js ADDED Viewed

@@ -0,0 +1,302 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * Security Confidence Checker — Pre-response confidence assessment.
+ *
+ * Prevents wrong-direction responses by scoring confidence across
+ * 5 dimensions before generating security advice. Inspired by
+ * SuperClaude's ConfidenceChecker pattern, adapted for security domain.
+ *
+ * Confidence levels:
+ *   HIGH   (≥0.90): Proceed with full response
+ *   MEDIUM (0.70–0.89): Respond with explicit caveats
+ *   LOW    (<0.70): Flag uncertainty, recommend verification
+ *
+ * @module gates/confidence
+ */
+// ---------------------------------------------------------------------------
+// Hedging language detection
+// ---------------------------------------------------------------------------
+const HEDGING_PATTERNS = [
+  /\bshould\s+(?:work|fix|resolve|detect|prevent|mitigate)\b/i,
+  /\bprobably\s+(?:works?|fixes?|covers?|detects?)\b/i,
+  /\blikely\s+sufficient\b/i,
+  /\bseems?\s+to\s+(?:work|fix|detect)\b/i,
+  /\bmight\s+(?:work|help|detect|prevent)\b/i,
+  /\bI\s+(?:think|believe|assume)\s+(?:this|it|that)\b/i,
+  /\bjust\s+this\s+once\b/i,
+  /\bshould\s+be\s+(?:fine|enough|sufficient|okay)\b/i,
+  /\bI'm\s+(?:fairly|pretty|quite)\s+(?:sure|confident)\b/i,
+];
+const RATIONALIZATION_PATTERNS = [
+  { pattern: /\bshould\s+work\s+now\b/i, category: 'unverified-claim' },
+  { pattern: /\bI'm\s+confident\b/i, category: 'false-confidence' },
+  { pattern: /\bjust\s+this\s+once\b/i, category: 'exception-seeking' },
+  { pattern: /\bpartial\s+check\s+is\s+enough\b/i, category: 'incomplete-verification' },
+  { pattern: /\bno\s+need\s+to\s+(?:test|verify|check)\b/i, category: 'verification-avoidance' },
+  { pattern: /\bobviously\s+(?:works?|correct|right)\b/i, category: 'assumed-correctness' },
+];
+// ---------------------------------------------------------------------------
+// SecurityConfidenceChecker
+// ---------------------------------------------------------------------------
+/**
+ * @typedef {Object} AssessmentContext
+ * @property {string} query — The user's query
+ * @property {string} mode — CIPHER mode (RED, BLUE, etc.)
+ * @property {number} knowledgeHits — Number of knowledge base matches
+ * @property {number} knowledgeRelevance — Relevance score of top match (0–1)
+ * @property {'current'|'recent'|'outdated'|'unknown'} topicRecency
+ * @property {boolean} hasConflictingGuidance — Whether sources conflict
+ * @property {boolean} authoritativeSourceFound — Whether authoritative source exists
+ */
+/**
+ * @typedef {Object} ConfidenceResult
+ * @property {number} score — Overall confidence (0–1)
+ * @property {'HIGH'|'MEDIUM'|'LOW'} level — Confidence level
+ * @property {boolean} shouldProceed — Whether to generate full response
+ * @property {string[]} checks — Individual check results
+ * @property {string[]} concerns — Identified concerns
+ * @property {string} recommendation — Action recommendation
+ */
+export class SecurityConfidenceChecker {
+  /**
+   * Assess confidence before generating a security response.
+   * @param {AssessmentContext} context
+   * @returns {ConfidenceResult}
+   */
+  assess(context) {
+    let score = 0;
+    const checks = [];
+    const concerns = [];
+    // Check 1: Knowledge base has relevant content (30%)
+    if (context.knowledgeHits > 0 && context.knowledgeRelevance > 0.7) {
+      score += 0.30;
+      checks.push('✅ Knowledge base has relevant content');
+    } else if (context.knowledgeHits > 0) {
+      score += 0.15;
+      checks.push('⚠️ Knowledge base has partial matches');
+      concerns.push('Knowledge relevance below threshold');
+    } else {
+      checks.push('❌ No knowledge base matches');
+      concerns.push('No authoritative knowledge for this topic');
+    }
+    // Check 2: Information recency (25%)
+    if (context.topicRecency === 'current' || context.topicRecency === 'recent') {
+      score += 0.25;
+      checks.push('✅ Information is current');
+    } else if (context.topicRecency === 'outdated') {
+      score += 0.10;
+      checks.push('⚠️ Information may be outdated');
+      concerns.push('Knowledge may not reflect latest threats/mitigations');
+    } else {
+      checks.push('❌ Information recency unknown');
+      concerns.push('Cannot verify information currency');
+    }
+    // Check 3: Mode-appropriate response (20%)
+    if (this._isModeAligned(context)) {
+      score += 0.20;
+      checks.push('✅ Query aligns with active mode');
+    } else {
+      score += 0.10;
+      checks.push('⚠️ Query may not align with active mode');
+      concerns.push('Consider switching mode for better results');
+    }
+    // Check 4: No conflicting guidance (15%)
+    if (!context.hasConflictingGuidance) {
+      score += 0.15;
+      checks.push('✅ No conflicting guidance detected');
+    } else {
+      checks.push('❌ Conflicting guidance exists');
+      concerns.push('Multiple sources provide contradictory advice');
+    }
+    // Check 5: Authoritative source verified (10%)
+    if (context.authoritativeSourceFound) {
+      score += 0.10;
+      checks.push('✅ Authoritative source verified');
+    } else {
+      checks.push('⚠️ No authoritative source found');
+      concerns.push('Response based on general knowledge, not verified source');
+    }
+    const level = score >= 0.90 ? 'HIGH' : score >= 0.70 ? 'MEDIUM' : 'LOW';
+    const shouldProceed = score >= 0.70;
+    let recommendation;
+    if (level === 'HIGH') {
+      recommendation = 'Proceed with full response — high confidence';
+    } else if (level === 'MEDIUM') {
+      recommendation = 'Respond with explicit caveats — flag areas of uncertainty';
+    } else {
+      recommendation = 'Flag low confidence — recommend verification before acting on advice';
+    }
+    return { score, level, shouldProceed, checks, concerns, recommendation };
+  }
+  /**
+   * Check if query aligns with the active CIPHER mode.
+   * @private
+   */
+  _isModeAligned(context) {
+    const modeTopics = {
+      RED: ['exploit', 'payload', 'attack', 'bypass', 'lateral', 'privesc', 'c2'],
+      BLUE: ['detect', 'detection', 'sigma', 'siem', 'hunting', 'hardening', 'edr', 'log'],
+      PURPLE: ['coverage', 'emulation', 'gap', 'detection engineering'],
+      PRIVACY: ['gdpr', 'ccpa', 'hipaa', 'dpia', 'anonymization', 'data flow'],
+      RECON: ['osint', 'reconnaissance', 'subdomain', 'footprinting'],
+      INCIDENT: ['triage', 'forensics', 'containment', 'eradication', 'timeline'],
+      ARCHITECT: ['design', 'architecture', 'threat model', 'zero trust'],
+      RESEARCHER: ['research', 'analysis', 'technique', 'methodology'],
+    };
+    const topics = modeTopics[context.mode] || [];
+    if (topics.length === 0) return true; // Unknown mode — don't penalize
+    const queryLower = context.query.toLowerCase();
+    return topics.some(topic => queryLower.includes(topic));
+  }
+}
+// ---------------------------------------------------------------------------
+// Hedging & Rationalization Detection
+// ---------------------------------------------------------------------------
+/**
+ * Detect hedging language in security output.
+ * @param {string} text — Response text to check
+ * @returns {{ hedgingFound: boolean, matches: string[], count: number }}
+ */
+export function detectHedging(text) {
+  const matches = [];
+  for (const pattern of HEDGING_PATTERNS) {
+    const match = text.match(pattern);
+    if (match) {
+      matches.push(match[0]);
+    }
+  }
+  return { hedgingFound: matches.length > 0, matches, count: matches.length };
+}
+/**
+ * Detect rationalization patterns in security output.
+ * @param {string} text
+ * @returns {{ found: boolean, rationalizations: Array<{ text: string, category: string }> }}
+ */
+export function detectRationalizations(text) {
+  const rationalizations = [];
+  for (const { pattern, category } of RATIONALIZATION_PATTERNS) {
+    const match = text.match(pattern);
+    if (match) {
+      rationalizations.push({ text: match[0], category });
+    }
+  }
+  return { found: rationalizations.length > 0, rationalizations };
+}
+// ---------------------------------------------------------------------------
+// Verification Gate
+// ---------------------------------------------------------------------------
+/**
+ * Evidence types that satisfy the verification gate.
+ */
+const EVIDENCE_TYPES = {
+  sigma_rule: {
+    validators: [
+      (text) => /^title:\s*.+/m.test(text),
+      (text) => /^logsource:/m.test(text),
+      (text) => /^detection:/m.test(text),
+    ],
+    description: 'Sigma rule must have title, logsource, and detection fields',
+  },
+  kql_query: {
+    validators: [
+      (text) => /\b(?:where|project|summarize|extend|join|let)\b/i.test(text),
+    ],
+    description: 'KQL query must contain valid operators',
+  },
+  spl_query: {
+    validators: [
+      (text) => /\b(?:index=|sourcetype=|search|stats|eval|table)\b/i.test(text),
+    ],
+    description: 'SPL query must contain valid Splunk operators',
+  },
+  cve_reference: {
+    validators: [
+      (text) => /CVE-\d{4}-\d{4,}/.test(text),
+    ],
+    description: 'CVE reference must be a valid CVE ID',
+  },
+  mitre_reference: {
+    validators: [
+      (text) => /T\d{4}(?:\.\d{3})?/.test(text),
+    ],
+    description: 'MITRE reference must be a valid technique ID',
+  },
+  command_output: {
+    validators: [
+      (text) => text.includes('$') || text.includes('#') || text.includes('>>>'),
+    ],
+    description: 'Command output must include shell prompt evidence',
+  },
+};
+/**
+ * Verification gate — checks that security claims have supporting evidence.
+ *
+ * @param {string} claim — The security claim being made
+ * @param {string} evidence — Supporting evidence
+ * @param {string} evidenceType — Type of evidence expected
+ * @returns {{ verified: boolean, reason: string, evidenceType: string }}
+ */
+export function verifyEvidence(claim, evidence, evidenceType) {
+  if (!evidence || evidence.trim().length === 0) {
+    return { verified: false, reason: 'No evidence provided', evidenceType };
+  }
+  const typeSpec = EVIDENCE_TYPES[evidenceType];
+  if (!typeSpec) {
+    // Unknown evidence type — accept if non-empty
+    return { verified: true, reason: 'Evidence provided (untyped)', evidenceType };
+  }
+  const failedValidators = typeSpec.validators.filter(v => !v(evidence));
+  if (failedValidators.length > 0) {
+    return {
+      verified: false,
+      reason: `Evidence does not satisfy ${evidenceType} requirements: ${typeSpec.description}`,
+      evidenceType,
+    };
+  }
+  return { verified: true, reason: `Evidence verified as valid ${evidenceType}`, evidenceType };
+}
+/**
+ * Batch verify multiple claims against their evidence.
+ * @param {Array<{ claim: string, evidence: string, evidenceType: string }>} items
+ * @returns {{ allVerified: boolean, results: Array, unverifiedCount: number }}
+ */
+export function verifyBatch(items) {
+  const results = items.map(item => ({
+    claim: item.claim,
+    ...verifyEvidence(item.claim, item.evidence, item.evidenceType),
+  }));
+  const unverifiedCount = results.filter(r => !r.verified).length;
+  return { allVerified: unverifiedCount === 0, results, unverifiedCount };
+}

package/lib/gates/corrections.js ADDED Viewed

@@ -0,0 +1,219 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * Reflexion Error Learning — corrections log for pattern-based error prevention.
+ *
+ * Stores error patterns and corrections in a JSONL file. Before generating
+ * structured output (Sigma rules, KQL queries, etc.), checks the corrections
+ * log and applies known fixes automatically.
+ *
+ * Inspired by SuperClaude's ReflectionEngine pattern.
+ *
+ * @module gates/corrections
+ */
+import { readFileSync, appendFileSync, existsSync, mkdirSync } from 'node:fs';
+import { join, dirname } from 'node:path';
+import { homedir } from 'node:os';
+// ---------------------------------------------------------------------------
+// Correction entry
+// ---------------------------------------------------------------------------
+/**
+ * @typedef {Object} CorrectionEntry
+ * @property {string} id — Unique correction ID
+ * @property {string} category — Error category (sigma, kql, spl, hardening, general)
+ * @property {string} pattern — Regex pattern string that matches the error
+ * @property {string} replacement — Replacement string or fix description
+ * @property {string} description — Human-readable description of the error
+ * @property {string} timestamp — ISO timestamp
+ * @property {number} applyCount — Times this correction has been applied
+ */
+// ---------------------------------------------------------------------------
+// CorrectionsLog
+// ---------------------------------------------------------------------------
+const DEFAULT_DIR = join(homedir(), '.cipher', 'data');
+const MAX_ENTRIES = 500;
+export class CorrectionsLog {
+  /**
+   * @param {string} [logPath] — Path to corrections.jsonl file
+   */
+  constructor(logPath) {
+    this._path = logPath || join(DEFAULT_DIR, 'corrections.jsonl');
+    this._entries = null; // Lazy load
+  }
+  /**
+   * Load entries from disk.
+   * @returns {CorrectionEntry[]}
+   */
+  _load() {
+    if (this._entries !== null) return this._entries;
+    this._entries = [];
+    if (!existsSync(this._path)) return this._entries;
+    const lines = readFileSync(this._path, 'utf8').split('\n').filter(l => l.trim());
+    for (const line of lines) {
+      try {
+        this._entries.push(JSON.parse(line));
+      } catch { /* skip malformed lines */ }
+    }
+    return this._entries;
+  }
+  /**
+   * Record a new correction.
+   * @param {{ category: string, pattern: string, replacement: string, description: string }} opts
+   * @returns {CorrectionEntry}
+   */
+  record(opts) {
+    const entry = {
+      id: `COR-${Date.now().toString(36)}`,
+      category: opts.category || 'general',
+      pattern: opts.pattern,
+      replacement: opts.replacement || '',
+      description: opts.description || '',
+      timestamp: new Date().toISOString(),
+      applyCount: 0,
+    };
+    mkdirSync(dirname(this._path), { recursive: true });
+    appendFileSync(this._path, JSON.stringify(entry) + '\n');
+    // Invalidate cache
+    this._entries = null;
+    return entry;
+  }
+  /**
+   * Get all corrections, optionally filtered by category.
+   * @param {string} [category]
+   * @returns {CorrectionEntry[]}
+   */
+  getAll(category) {
+    const entries = this._load();
+    if (category) {
+      return entries.filter(e => e.category === category);
+    }
+    return entries;
+  }
+  /**
+   * Apply known corrections to text.
+   * @param {string} text — Input text to correct
+   * @param {string} category — Category to filter corrections by
+   * @returns {{ corrected: string, applied: string[], count: number }}
+   */
+  apply(text, category) {
+    const corrections = this.getAll(category);
+    let corrected = text;
+    const applied = [];
+    for (const entry of corrections) {
+      try {
+        const regex = new RegExp(entry.pattern, 'g');
+        if (regex.test(corrected)) {
+          corrected = corrected.replace(regex, entry.replacement);
+          applied.push(entry.id);
+          entry.applyCount++;
+        }
+      } catch {
+        // Invalid regex — skip
+      }
+    }
+    return { corrected, applied, count: applied.length };
+  }
+  /**
+   * Get correction statistics.
+   * @returns {{ total: number, byCategory: Record<string, number>, topApplied: CorrectionEntry[] }}
+   */
+  stats() {
+    const entries = this._load();
+    const byCategory = {};
+    for (const e of entries) {
+      byCategory[e.category] = (byCategory[e.category] || 0) + 1;
+    }
+    const topApplied = [...entries]
+      .sort((a, b) => (b.applyCount || 0) - (a.applyCount || 0))
+      .slice(0, 10);
+    return { total: entries.length, byCategory, topApplied };
+  }
+  /**
+   * Prune old entries, keeping only the most recent per category.
+   * @param {number} [maxPerCategory=100]
+   * @returns {number} — Number of entries pruned
+   */
+  prune(maxPerCategory = 100) {
+    const entries = this._load();
+    if (entries.length <= maxPerCategory) return 0;
+    const byCategory = {};
+    for (const e of entries) {
+      if (!byCategory[e.category]) byCategory[e.category] = [];
+      byCategory[e.category].push(e);
+    }
+    const kept = [];
+    for (const [, catEntries] of Object.entries(byCategory)) {
+      catEntries.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
+      kept.push(...catEntries.slice(0, maxPerCategory));
+    }
+    const pruned = entries.length - kept.length;
+    if (pruned > 0) {
+      mkdirSync(dirname(this._path), { recursive: true });
+      const content = kept.map(e => JSON.stringify(e)).join('\n') + '\n';
+      // Atomic write
+      const { writeFileSync: wfs } = require('node:fs');
+      wfs(this._path, content);
+      this._entries = null;
+    }
+    return pruned;
+  }
+  /** Number of entries */
+  get size() {
+    return this._load().length;
+  }
+}
+// ---------------------------------------------------------------------------
+// Pre-built Sigma corrections
+// ---------------------------------------------------------------------------
+/** Common Sigma rule errors that LLMs make */
+export const SIGMA_CORRECTIONS = [
+  {
+    category: 'sigma',
+    pattern: 'level:\\s*(?:critical|high|medium|low)\\s*\\n(?!tags:)',
+    replacement: '',
+    description: 'Sigma level must be one of: critical, high, medium, low, informational',
+  },
+  {
+    category: 'sigma',
+    pattern: 'status:\\s*(?:new|draft)\\b',
+    replacement: 'status: experimental',
+    description: 'Sigma status "new" or "draft" should be "experimental"',
+  },
+  {
+    category: 'sigma',
+    pattern: 'logsource:\\s*\\n\\s*service:\\s*',
+    replacement: 'logsource:\n    category: ',
+    description: 'Sigma logsource should use "category" not "service" as primary field',
+  },
+];