npm - cipher-security - Versions diffs - 2.0.8 → 2.2.0 - Mend

cipher-security 2.0.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/bin/cipher.js +11 -1
package/lib/agent-runtime/handlers/architect.js +199 -0
package/lib/agent-runtime/handlers/base.js +240 -0
package/lib/agent-runtime/handlers/blue.js +220 -0
package/lib/agent-runtime/handlers/incident.js +161 -0
package/lib/agent-runtime/handlers/privacy.js +190 -0
package/lib/agent-runtime/handlers/purple.js +209 -0
package/lib/agent-runtime/handlers/recon.js +174 -0
package/lib/agent-runtime/handlers/red.js +246 -0
package/lib/agent-runtime/handlers/researcher.js +170 -0
package/lib/agent-runtime/handlers.js +35 -0
package/lib/agent-runtime/index.js +196 -0
package/lib/agent-runtime/parser.js +316 -0
package/lib/analyze/consistency.js +566 -0
package/lib/analyze/constitution.js +110 -0
package/lib/analyze/sharding.js +251 -0
package/lib/autonomous/agent-tool.js +165 -0
package/lib/autonomous/feedback-loop.js +13 -6
package/lib/autonomous/framework.js +17 -0
package/lib/autonomous/handoff.js +506 -0
package/lib/autonomous/modes/blue.js +26 -0
package/lib/autonomous/modes/red.js +585 -0
package/lib/autonomous/modes/researcher.js +322 -0
package/lib/autonomous/researcher.js +12 -45
package/lib/autonomous/runner.js +9 -537
package/lib/benchmark/agent.js +88 -26
package/lib/benchmark/baselines.js +3 -0
package/lib/benchmark/claude-code-solver.js +254 -0
package/lib/benchmark/cognitive.js +283 -0
package/lib/benchmark/index.js +12 -2
package/lib/benchmark/knowledge.js +281 -0
package/lib/benchmark/llm.js +156 -15
package/lib/benchmark/models.js +5 -2
package/lib/benchmark/nyu-ctf.js +192 -0
package/lib/benchmark/overthewire.js +347 -0
package/lib/benchmark/picoctf.js +281 -0
package/lib/benchmark/prompts.js +280 -0
package/lib/benchmark/registry.js +219 -0
package/lib/benchmark/remote-solver.js +356 -0
package/lib/benchmark/remote-target.js +263 -0
package/lib/benchmark/reporter.js +35 -0
package/lib/benchmark/runner.js +174 -10
package/lib/benchmark/sandbox.js +35 -0
package/lib/benchmark/scorer.js +22 -4
package/lib/benchmark/solver.js +34 -1
package/lib/benchmark/tools.js +262 -16
package/lib/commands.js +9 -0
package/lib/execution/council.js +434 -0
package/lib/execution/parallel.js +292 -0
package/lib/gates/circuit-breaker.js +135 -0
package/lib/gates/confidence.js +302 -0
package/lib/gates/corrections.js +219 -0
package/lib/gates/self-check.js +245 -0
package/lib/gateway/commands.js +727 -0
package/lib/guardrails/engine.js +364 -0
package/lib/mcp/server.js +349 -3
package/lib/memory/compressor.js +94 -7
package/lib/pipeline/hooks.js +288 -0
package/lib/pipeline/index.js +11 -0
package/lib/review/budget.js +210 -0
package/lib/review/engine.js +526 -0
package/lib/review/layers/acceptance-auditor.js +279 -0
package/lib/review/layers/blind-hunter.js +500 -0
package/lib/review/layers/defense-in-depth.js +209 -0
package/lib/review/layers/edge-case-hunter.js +266 -0
package/lib/review/panel.js +519 -0
package/lib/review/two-stage.js +244 -0
package/lib/session/cost-tracker.js +203 -0
package/lib/session/logger.js +349 -0
package/package.json +1 -1

package/lib/review/two-stage.js ADDED Viewed

@@ -0,0 +1,244 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * CIPHER Two-Stage Review Pipeline
+ *
+ * Stage 1 — Spec Compliance: security requirements gate (auth, validation, error handling)
+ * Stage 2 — Quality Gate: code quality patterns (only runs if Stage 1 passes threshold)
+ *
+ * Uses the review engine from M011 with hookable stages from M013.
+ *
+ * @module review/two-stage
+ */
+import { createReviewEngine } from './engine.js';
+import { PipelineHooks, HookContext, hookableStage } from '../pipeline/hooks.js';
+// ---------------------------------------------------------------------------
+// Severity thresholds
+// ---------------------------------------------------------------------------
+const SEVERITY_RANK = { critical: 4, high: 3, medium: 2, low: 1, info: 0 };
+/**
+ * @typedef {object} StageConfig
+ * @property {string} [minSeverity] - Filter findings at or above this level
+ * @property {number} [maxCritical] - Max critical findings before stage fails
+ * @property {number} [maxHigh]     - Max high findings before stage fails
+ */
+/** Default stage configs */
+const STAGE_DEFAULTS = {
+  compliance: {
+    minSeverity: 'medium',
+    maxCritical: 0,
+    maxHigh: 3,
+  },
+  quality: {
+    minSeverity: 'low',
+    maxCritical: 0,
+    maxHigh: 5,
+  },
+};
+// ---------------------------------------------------------------------------
+// Two-Stage Review
+// ---------------------------------------------------------------------------
+/**
+ * Two-stage review pipeline result.
+ */
+export class TwoStageResult {
+  constructor({ stage1 = null, stage2 = null, gatesPassed = false, abortReason = '', totalTime = 0 } = {}) {
+    this.stage1 = stage1;
+    this.stage2 = stage2;
+    this.gatesPassed = gatesPassed;
+    this.abortReason = abortReason;
+    this.totalTime = totalTime;
+  }
+  get allFindings() {
+    const findings = [];
+    if (this.stage1?.findings) findings.push(...this.stage1.findings);
+    if (this.stage2?.findings) findings.push(...this.stage2.findings);
+    return findings;
+  }
+  toReport() {
+    const lines = [
+      '═══════════════════════════════════════════════════════',
+      '  CIPHER Two-Stage Review Report',
+      '═══════════════════════════════════════════════════════',
+      '',
+    ];
+    // Stage 1
+    lines.push('── Stage 1: Spec Compliance ──');
+    if (this.stage1) {
+      lines.push(`Result: ${this.stage1.passed ? '✓ PASSED' : '✗ FAILED'}`);
+      lines.push(this.stage1.result?.summary ?? 'No results');
+      if (!this.stage1.passed && this.stage1.failReason) {
+        lines.push(`Reason: ${this.stage1.failReason}`);
+      }
+    } else {
+      lines.push('Not executed');
+    }
+    lines.push('');
+    // Stage 2
+    lines.push('── Stage 2: Quality Gate ──');
+    if (this.stage2) {
+      lines.push(`Result: ${this.stage2.passed ? '✓ PASSED' : '✗ FAILED'}`);
+      lines.push(this.stage2.result?.summary ?? 'No results');
+      if (!this.stage2.passed && this.stage2.failReason) {
+        lines.push(`Reason: ${this.stage2.failReason}`);
+      }
+    } else {
+      lines.push(this.abortReason ? `Skipped: ${this.abortReason}` : 'Not executed');
+    }
+    lines.push('');
+    // Overall
+    lines.push('───────────────────────────────────────────────────────');
+    lines.push(`Overall: ${this.gatesPassed ? '✓ ALL GATES PASSED' : '✗ GATES FAILED'}`);
+    lines.push(`Total time: ${this.totalTime}ms`);
+    lines.push(`Total findings: ${this.allFindings.length}`);
+    return lines.join('\n');
+  }
+  toJSON() {
+    return {
+      gatesPassed: this.gatesPassed,
+      abortReason: this.abortReason,
+      totalTime: this.totalTime,
+      totalFindings: this.allFindings.length,
+      stage1: this.stage1 ? {
+        passed: this.stage1.passed,
+        failReason: this.stage1.failReason,
+        findings: this.stage1.result?.findings?.length ?? 0,
+        summary: this.stage1.result?.summary ?? '',
+      } : null,
+      stage2: this.stage2 ? {
+        passed: this.stage2.passed,
+        failReason: this.stage2.failReason,
+        findings: this.stage2.result?.findings?.length ?? 0,
+        summary: this.stage2.result?.summary ?? '',
+      } : null,
+    };
+  }
+}
+/**
+ * Run a two-stage code review.
+ *
+ * @param {string} input          - File path, directory, or code string
+ * @param {object} [options]
+ * @param {StageConfig} [options.compliance] - Stage 1 config
+ * @param {StageConfig} [options.quality]    - Stage 2 config
+ * @param {PipelineHooks} [options.hooks]    - Pipeline hooks
+ * @param {string} [options.language]        - Override language detection
+ * @returns {Promise<TwoStageResult>}
+ */
+export async function twoStageReview(input, options = {}) {
+  const t0 = Date.now();
+  const complianceCfg = { ...STAGE_DEFAULTS.compliance, ...options.compliance };
+  const qualityCfg = { ...STAGE_DEFAULTS.quality, ...options.quality };
+  const hooks = options.hooks ?? new PipelineHooks();
+  const engine = await createReviewEngine();
+  // Stage 1: Spec Compliance
+  const stage1Fn = async (target, opts) => {
+    return engine.review(target, {
+      language: options.language,
+      minSeverity: complianceCfg.minSeverity,
+    });
+  };
+  const hookedStage1 = hookableStage(hooks, 'review', stage1Fn);
+  let stage1Result;
+  let stage1Passed = true;
+  let stage1FailReason = '';
+  try {
+    stage1Result = await hookedStage1(input, { stage: 'compliance' });
+    // Check thresholds
+    const counts = stage1Result.severityCounts;
+    if (complianceCfg.maxCritical !== undefined && counts.critical > complianceCfg.maxCritical) {
+      stage1Passed = false;
+      stage1FailReason = `${counts.critical} critical findings exceed threshold of ${complianceCfg.maxCritical}`;
+    }
+    if (stage1Passed && complianceCfg.maxHigh !== undefined && counts.high > complianceCfg.maxHigh) {
+      stage1Passed = false;
+      stage1FailReason = `${counts.high} high findings exceed threshold of ${complianceCfg.maxHigh}`;
+    }
+  } catch (err) {
+    stage1Passed = false;
+    stage1FailReason = err.message;
+  }
+  const stage1 = {
+    passed: stage1Passed,
+    failReason: stage1FailReason,
+    result: stage1Result ?? null,
+  };
+  // Stage 2: Quality Gate — only if Stage 1 passed
+  let stage2 = null;
+  let abortReason = '';
+  if (stage1Passed) {
+    const stage2Fn = async (target, opts) => {
+      return engine.review(target, {
+        language: options.language,
+        minSeverity: qualityCfg.minSeverity,
+      });
+    };
+    const hookedStage2 = hookableStage(hooks, 'analyze', stage2Fn);
+    let stage2Passed = true;
+    let stage2FailReason = '';
+    try {
+      const stage2Result = await hookedStage2(input, { stage: 'quality' });
+      const counts = stage2Result.severityCounts;
+      if (qualityCfg.maxCritical !== undefined && counts.critical > qualityCfg.maxCritical) {
+        stage2Passed = false;
+        stage2FailReason = `${counts.critical} critical findings exceed threshold of ${qualityCfg.maxCritical}`;
+      }
+      if (stage2Passed && qualityCfg.maxHigh !== undefined && counts.high > qualityCfg.maxHigh) {
+        stage2Passed = false;
+        stage2FailReason = `${counts.high} high findings exceed threshold of ${qualityCfg.maxHigh}`;
+      }
+      stage2 = {
+        passed: stage2Passed,
+        failReason: stage2FailReason,
+        result: stage2Result,
+      };
+    } catch (err) {
+      stage2 = {
+        passed: false,
+        failReason: err.message,
+        result: null,
+      };
+    }
+  } else {
+    abortReason = `Stage 1 failed: ${stage1FailReason}`;
+  }
+  const gatesPassed = stage1Passed && (stage2?.passed ?? false);
+  return new TwoStageResult({
+    stage1,
+    stage2,
+    gatesPassed,
+    abortReason,
+    totalTime: Date.now() - t0,
+  });
+}

package/lib/session/cost-tracker.js ADDED Viewed

@@ -0,0 +1,203 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * Cost Tracker — Per-session spending limits for autonomous agents.
+ *
+ * Tracks token usage, estimates cost per interaction using model-specific
+ * pricing, and enforces a configurable budget limit. Integrates with
+ * SessionLogger for persistent cost records.
+ *
+ * @module session/cost-tracker
+ */
+// ---------------------------------------------------------------------------
+// Model pricing (per 1M tokens)
+// ---------------------------------------------------------------------------
+/**
+ * Token pricing by model family.
+ * @type {Record<string, { input: number, output: number }>}
+ */
+export const MODEL_PRICING = {
+  // Claude models
+  'claude-sonnet': { input: 3.0, output: 15.0 },
+  'claude-haiku': { input: 0.25, output: 1.25 },
+  'claude-opus': { input: 15.0, output: 75.0 },
+  // OpenAI models
+  'gpt-4o': { input: 2.5, output: 10.0 },
+  'gpt-4': { input: 5.0, output: 15.0 },
+  'gpt-3.5': { input: 0.5, output: 1.5 },
+  // Local models (free)
+  'ollama': { input: 0, output: 0 },
+  'local': { input: 0, output: 0 },
+  // Default fallback
+  'default': { input: 3.0, output: 15.0 },
+};
+// ---------------------------------------------------------------------------
+// PriceLimitExceeded
+// ---------------------------------------------------------------------------
+/**
+ * Thrown when session cost exceeds the configured budget.
+ */
+export class PriceLimitExceeded extends Error {
+  /**
+   * @param {number} currentCost
+   * @param {number} limit
+   * @param {string} sessionId
+   */
+  constructor(currentCost, limit, sessionId = '') {
+    super(
+      `Session cost $${currentCost.toFixed(4)} exceeds budget limit $${limit.toFixed(2)}` +
+      (sessionId ? ` (session: ${sessionId})` : '')
+    );
+    this.name = 'PriceLimitExceeded';
+    this.currentCost = currentCost;
+    this.limit = limit;
+    this.sessionId = sessionId;
+  }
+}
+// ---------------------------------------------------------------------------
+// CostTracker
+// ---------------------------------------------------------------------------
+/**
+ * Per-session cost tracker with budget enforcement.
+ */
+export class CostTracker {
+  /**
+   * @param {object} [opts]
+   * @param {number} [opts.budgetUSD] - Maximum spend (default from env or $5)
+   * @param {string} [opts.model] - Model identifier for pricing lookup
+   * @param {string} [opts.sessionId] - Associated session ID
+   * @param {import('./logger.js').SessionLogger} [opts.logger] - Optional session logger
+   */
+  constructor(opts = {}) {
+    this._budgetUSD = opts.budgetUSD ?? parseFloat(process.env.CIPHER_PRICE_LIMIT || '5');
+    this._model = opts.model || 'default';
+    this._sessionId = opts.sessionId || '';
+    this._logger = opts.logger || null;
+    this._totalCostUSD = 0;
+    this._totalTokensIn = 0;
+    this._totalTokensOut = 0;
+    this._interactionCount = 0;
+  }
+  /** Current total cost. */
+  get totalCost() { return this._totalCostUSD; }
+  /** Configured budget. */
+  get budget() { return this._budgetUSD; }
+  /** Remaining budget. */
+  get remaining() { return Math.max(0, this._budgetUSD - this._totalCostUSD); }
+  /** Total input tokens tracked. */
+  get totalTokensIn() { return this._totalTokensIn; }
+  /** Total output tokens tracked. */
+  get totalTokensOut() { return this._totalTokensOut; }
+  /** Number of interactions tracked. */
+  get interactionCount() { return this._interactionCount; }
+  /**
+   * Look up pricing for the configured model.
+   * @returns {{ input: number, output: number }}
+   */
+  getPricing() {
+    const modelLower = this._model.toLowerCase();
+    for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
+      if (modelLower.includes(key)) return pricing;
+    }
+    return MODEL_PRICING.default;
+  }
+  /**
+   * Calculate cost for a given number of tokens.
+   *
+   * @param {number} tokensIn
+   * @param {number} tokensOut
+   * @returns {number} Cost in USD
+   */
+  calculateCost(tokensIn, tokensOut) {
+    const pricing = this.getPricing();
+    return (tokensIn / 1_000_000) * pricing.input + (tokensOut / 1_000_000) * pricing.output;
+  }
+  /**
+   * Track an interaction's token usage and check budget.
+   *
+   * @param {number} tokensIn
+   * @param {number} tokensOut
+   * @returns {{ costUSD: number, cumulativeCostUSD: number, remaining: number, withinBudget: boolean }}
+   * @throws {PriceLimitExceeded} If cumulative cost exceeds budget
+   */
+  track(tokensIn, tokensOut) {
+    const costUSD = this.calculateCost(tokensIn, tokensOut);
+    this._totalCostUSD += costUSD;
+    this._totalTokensIn += tokensIn;
+    this._totalTokensOut += tokensOut;
+    this._interactionCount += 1;
+    const withinBudget = this._totalCostUSD <= this._budgetUSD;
+    // Log to session if available
+    if (this._logger) {
+      const status = withinBudget ? 'ok' : 'exceeded';
+      this._logger.logCost(costUSD, this._budgetUSD, status);
+    }
+    if (!withinBudget) {
+      throw new PriceLimitExceeded(this._totalCostUSD, this._budgetUSD, this._sessionId);
+    }
+    return {
+      costUSD,
+      cumulativeCostUSD: this._totalCostUSD,
+      remaining: this.remaining,
+      withinBudget,
+    };
+  }
+  /**
+   * Check if the budget would be exceeded by additional tokens.
+   * Does NOT track — just estimates.
+   *
+   * @param {number} tokensIn
+   * @param {number} tokensOut
+   * @returns {{ wouldExceed: boolean, estimatedCost: number, projectedTotal: number }}
+   */
+  wouldExceed(tokensIn, tokensOut) {
+    const estimatedCost = this.calculateCost(tokensIn, tokensOut);
+    const projectedTotal = this._totalCostUSD + estimatedCost;
+    return {
+      wouldExceed: projectedTotal > this._budgetUSD,
+      estimatedCost,
+      projectedTotal,
+    };
+  }
+  /**
+   * Return a status summary.
+   * @returns {object}
+   */
+  getStatus() {
+    return {
+      totalCostUSD: Math.round(this._totalCostUSD * 10000) / 10000,
+      budgetUSD: this._budgetUSD,
+      remainingUSD: Math.round(this.remaining * 10000) / 10000,
+      totalTokensIn: this._totalTokensIn,
+      totalTokensOut: this._totalTokensOut,
+      interactionCount: this._interactionCount,
+      model: this._model,
+      utilizationPct: this._budgetUSD > 0
+        ? Math.round((this._totalCostUSD / this._budgetUSD) * 10000) / 100
+        : 0,
+    };
+  }
+}