npm - cipher-security - Versions diffs - 2.0.8 → 2.2.0 - Mend

cipher-security 2.0.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/bin/cipher.js +11 -1
package/lib/agent-runtime/handlers/architect.js +199 -0
package/lib/agent-runtime/handlers/base.js +240 -0
package/lib/agent-runtime/handlers/blue.js +220 -0
package/lib/agent-runtime/handlers/incident.js +161 -0
package/lib/agent-runtime/handlers/privacy.js +190 -0
package/lib/agent-runtime/handlers/purple.js +209 -0
package/lib/agent-runtime/handlers/recon.js +174 -0
package/lib/agent-runtime/handlers/red.js +246 -0
package/lib/agent-runtime/handlers/researcher.js +170 -0
package/lib/agent-runtime/handlers.js +35 -0
package/lib/agent-runtime/index.js +196 -0
package/lib/agent-runtime/parser.js +316 -0
package/lib/analyze/consistency.js +566 -0
package/lib/analyze/constitution.js +110 -0
package/lib/analyze/sharding.js +251 -0
package/lib/autonomous/agent-tool.js +165 -0
package/lib/autonomous/feedback-loop.js +13 -6
package/lib/autonomous/framework.js +17 -0
package/lib/autonomous/handoff.js +506 -0
package/lib/autonomous/modes/blue.js +26 -0
package/lib/autonomous/modes/red.js +585 -0
package/lib/autonomous/modes/researcher.js +322 -0
package/lib/autonomous/researcher.js +12 -45
package/lib/autonomous/runner.js +9 -537
package/lib/benchmark/agent.js +88 -26
package/lib/benchmark/baselines.js +3 -0
package/lib/benchmark/claude-code-solver.js +254 -0
package/lib/benchmark/cognitive.js +283 -0
package/lib/benchmark/index.js +12 -2
package/lib/benchmark/knowledge.js +281 -0
package/lib/benchmark/llm.js +156 -15
package/lib/benchmark/models.js +5 -2
package/lib/benchmark/nyu-ctf.js +192 -0
package/lib/benchmark/overthewire.js +347 -0
package/lib/benchmark/picoctf.js +281 -0
package/lib/benchmark/prompts.js +280 -0
package/lib/benchmark/registry.js +219 -0
package/lib/benchmark/remote-solver.js +356 -0
package/lib/benchmark/remote-target.js +263 -0
package/lib/benchmark/reporter.js +35 -0
package/lib/benchmark/runner.js +174 -10
package/lib/benchmark/sandbox.js +35 -0
package/lib/benchmark/scorer.js +22 -4
package/lib/benchmark/solver.js +34 -1
package/lib/benchmark/tools.js +262 -16
package/lib/commands.js +9 -0
package/lib/execution/council.js +434 -0
package/lib/execution/parallel.js +292 -0
package/lib/gates/circuit-breaker.js +135 -0
package/lib/gates/confidence.js +302 -0
package/lib/gates/corrections.js +219 -0
package/lib/gates/self-check.js +245 -0
package/lib/gateway/commands.js +727 -0
package/lib/guardrails/engine.js +364 -0
package/lib/mcp/server.js +349 -3
package/lib/memory/compressor.js +94 -7
package/lib/pipeline/hooks.js +288 -0
package/lib/pipeline/index.js +11 -0
package/lib/review/budget.js +210 -0
package/lib/review/engine.js +526 -0
package/lib/review/layers/acceptance-auditor.js +279 -0
package/lib/review/layers/blind-hunter.js +500 -0
package/lib/review/layers/defense-in-depth.js +209 -0
package/lib/review/layers/edge-case-hunter.js +266 -0
package/lib/review/panel.js +519 -0
package/lib/review/two-stage.js +244 -0
package/lib/session/cost-tracker.js +203 -0
package/lib/session/logger.js +349 -0
package/package.json +1 -1

package/lib/benchmark/llm.js CHANGED Viewed

@@ -4,20 +4,148 @@
 /**
  * CIPHER Benchmark — LLM client factory for the security agent.
  *
- * Auto-detects the best available backend and returns an
- * Anthropic-SDK-compatible client for tool-use.
+ * Returns an Anthropic-SDK-compatible client for tool-use, wrapping
+ * Ollama's OpenAI-compatible API into the Anthropic messages format
+ * that SecurityAgent expects.
  */
+// ---------------------------------------------------------------------------
+// Ollama → Anthropic adapter
+// ---------------------------------------------------------------------------
+/**
+ * Wraps an OpenAI-compatible client to present an Anthropic-style interface.
+ * Translates tool schemas and response formats.
+ */
+class OllamaAnthropicAdapter {
+  constructor(openaiClient, model) {
+    this._client = openaiClient;
+    this._model = model;
+    this.messages = { create: this._create.bind(this) };
+  }
+  async _create({ model, max_tokens, tools, messages, system }) {
+    // Convert Anthropic tool schemas to OpenAI function schemas
+    const openaiTools = (tools || []).map(t => ({
+      type: 'function',
+      function: {
+        name: t.name,
+        description: t.description,
+        parameters: t.input_schema,
+      },
+    }));
+    // Convert Anthropic messages to OpenAI format
+    const openaiMessages = [];
+    if (system) {
+      openaiMessages.push({ role: 'system', content: system });
+    }
+    for (const msg of messages) {
+      if (msg.role === 'user') {
+        if (typeof msg.content === 'string') {
+          openaiMessages.push({ role: 'user', content: msg.content });
+        } else if (Array.isArray(msg.content)) {
+          // Tool results from Anthropic format
+          for (const block of msg.content) {
+            if (block.type === 'tool_result') {
+              openaiMessages.push({
+                role: 'tool',
+                tool_call_id: block.tool_use_id,
+                content: typeof block.content === 'string' ? block.content : JSON.stringify(block.content),
+              });
+            }
+          }
+        }
+      } else if (msg.role === 'assistant') {
+        if (typeof msg.content === 'string') {
+          openaiMessages.push({ role: 'assistant', content: msg.content });
+        } else if (Array.isArray(msg.content)) {
+          // Convert Anthropic assistant blocks to OpenAI format
+          let textContent = '';
+          const toolCalls = [];
+          for (const block of msg.content) {
+            if (block.type === 'text') {
+              textContent += block.text;
+            } else if (block.type === 'tool_use') {
+              toolCalls.push({
+                id: block.id,
+                type: 'function',
+                function: { name: block.name, arguments: JSON.stringify(block.input) },
+              });
+            }
+          }
+          const assistantMsg = { role: 'assistant', content: textContent || null };
+          if (toolCalls.length > 0) assistantMsg.tool_calls = toolCalls;
+          openaiMessages.push(assistantMsg);
+        }
+      }
+    }
+    // Call OpenAI-compatible endpoint
+    const response = await this._client.chat.completions.create({
+      model: model || this._model,
+      max_tokens: max_tokens || 4096,
+      tools: openaiTools.length > 0 ? openaiTools : undefined,
+      messages: openaiMessages,
+    });
+    // Convert OpenAI response to Anthropic format
+    const choice = response.choices?.[0];
+    if (!choice) {
+      return {
+        content: [{ type: 'text', text: '' }],
+        stop_reason: 'end_turn',
+        usage: { input_tokens: response.usage?.prompt_tokens || 0, output_tokens: response.usage?.completion_tokens || 0 },
+      };
+    }
+    const content = [];
+    if (choice.message?.content) {
+      content.push({ type: 'text', text: choice.message.content });
+    }
+    if (choice.message?.tool_calls) {
+      for (const tc of choice.message.tool_calls) {
+        let parsedArgs = {};
+        try { parsedArgs = JSON.parse(tc.function.arguments); } catch { /* ignore */ }
+        content.push({
+          type: 'tool_use',
+          id: tc.id,
+          name: tc.function.name,
+          input: parsedArgs,
+        });
+      }
+    }
+    const stopReason = choice.finish_reason === 'tool_calls' ? 'tool_use' : 'end_turn';
+    return {
+      content: content.length > 0 ? content : [{ type: 'text', text: '' }],
+      stop_reason: stopReason,
+      usage: {
+        input_tokens: response.usage?.prompt_tokens || 0,
+        output_tokens: response.usage?.completion_tokens || 0,
+      },
+    };
+  }
+}
+// ---------------------------------------------------------------------------
+// Client factory
+// ---------------------------------------------------------------------------
 /**
  * Create an LLM client for the benchmark agent.
+ * Always returns an Anthropic-SDK-compatible interface.
+ *
  * @param {object} [opts]
  * @param {string} [opts.backendOverride]
- * @returns {{ client: object, model: string }}
+ * @returns {Promise<{ client: object, model: string }>}
  */
 export async function makeAgentClient(opts = {}) {
   const backendOverride = opts.backendOverride;
-  // Try explicit override first
+  // Try Claude first
   if (backendOverride === 'claude' || (!backendOverride && process.env.ANTHROPIC_API_KEY)) {
     try {
       const { default: Anthropic } = await import('@anthropic-ai/sdk');
@@ -28,26 +156,39 @@ export async function makeAgentClient(opts = {}) {
     }
   }
+  // Try Ollama via OpenAI-compatible API
   if (backendOverride === 'ollama' || !backendOverride) {
-    // Check if Ollama is running
     try {
       const { request } = await import('node:http');
       const alive = await new Promise((resolve) => {
         const req = request({ hostname: '127.0.0.1', port: 11434, path: '/api/tags', timeout: 2000 }, (res) => {
-          res.resume();
-          resolve(res.statusCode === 200);
+          let body = '';
+          res.on('data', (d) => body += d);
+          res.on('end', () => {
+            try {
+              const data = JSON.parse(body);
+              resolve(data);
+            } catch { resolve(null); }
+          });
         });
-        req.on('error', () => resolve(false));
-        req.on('timeout', () => { req.destroy(); resolve(false); });
+        req.on('error', () => resolve(null));
+        req.on('timeout', () => { req.destroy(); resolve(null); });
         req.end();
       });
       if (alive) {
-        // Use OpenAI SDK pointed at Ollama
-        try {
-          const { default: OpenAI } = await import('openai');
-          const client = new OpenAI({ baseURL: 'http://127.0.0.1:11434/v1', apiKey: 'ollama' });
-          return { client, model: 'qwen2.5-coder:14b' };
-        } catch { /* fall through */ }
+        // Pick the best available model
+        const models = (alive.models || []).map(m => m.name);
+        const preferred = ['qwen2.5:32b', 'cipher:latest', 'qwen2.5-coder:14b', 'qwen2.5:14b', 'llama3.1:70b'];
+        let selectedModel = models[0] || 'qwen2.5:32b';
+        for (const p of preferred) {
+          if (models.includes(p)) { selectedModel = p; break; }
+        }
+        const { default: OpenAI } = await import('openai');
+        const rawClient = new OpenAI({ baseURL: 'http://127.0.0.1:11434/v1', apiKey: 'ollama' });
+        const client = new OllamaAnthropicAdapter(rawClient, selectedModel);
+        return { client, model: selectedModel };
       }
     } catch { /* fall through */ }
   }

package/lib/benchmark/models.js CHANGED Viewed

@@ -9,8 +9,8 @@ import { readFileSync } from 'node:fs';
 import { basename } from 'node:path';
 export class BenchmarkConfig {
-  constructor({ name, description, level, tags = [], winCondition = 'flag', files = [], path = '.' }) {
-    if (![1, 2, 3].includes(level)) throw new Error(`level must be 1, 2, or 3, got ${level}`);
+  constructor({ name, description, level, tags = [], winCondition = 'flag', files = [], path = '.', category = '' }) {
+    if (level < 1 || level > 5) throw new Error(`level must be 1-5, got ${level}`);
     if (!['flag', 'question'].includes(winCondition)) throw new Error(`winCondition must be 'flag' or 'question'`);
     this.name = name;
     this.description = description;
@@ -19,6 +19,7 @@ export class BenchmarkConfig {
     this.winCondition = winCondition;
     this.files = files;
     this.path = path;
+    this.category = category;
   }
   get dirName() { return basename(this.path); }
@@ -61,6 +62,8 @@ export class SolverResult {
   constructor(opts = {}) {
     this.flagFound = opts.flagFound ?? false;
     this.flagValue = opts.flagValue ?? '';
+    this.answerFound = opts.answerFound ?? false;
+    this.answerValue = opts.answerValue ?? '';
     this.durationS = opts.durationS ?? 0;
     this.tokensIn = opts.tokensIn ?? 0;
     this.tokensOut = opts.tokensOut ?? 0;

package/lib/benchmark/nyu-ctf.js ADDED Viewed

@@ -0,0 +1,192 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * NYU CTF Bench — Challenge loader for the NYU CTF benchmark suite.
+ *
+ * Loads challenges from the NYU CTF Bench format (NeurIPS 2024):
+ * - challenge.json with name, description, category, difficulty, flag
+ * - Per-challenge docker-compose.yaml for target services
+ * - 6 categories: web, pwn, rev, crypto, forensics, misc
+ * - 255 challenges across 5 difficulty levels
+ *
+ * @module benchmark/nyu-ctf
+ */
+import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
+import { join, basename } from 'node:path';
+import { homedir } from 'node:os';
+import { BenchmarkConfig } from './models.js';
+import { CompetitorBaseline } from './models.js';
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+export const NYU_CTF_REPO_URL = 'https://github.com/NYU-LLM-CTF/LLM_CTF_Database.git';
+export const NYU_CTF_CLONE_DIR = join(homedir(), '.cipher', 'benchmarks', 'nyu-ctf-bench');
+/** NYU CTF categories with XBOW tag mappings. */
+export const NYU_CATEGORIES = ['web', 'pwn', 'rev', 'crypto', 'forensics', 'misc'];
+/** Difficulty mapping: NYU uses string labels, we use 1-5. */
+const DIFFICULTY_MAP = {
+  'easy': 1,
+  'medium': 2,
+  'hard': 3,
+  'very hard': 4,
+  'expert': 5,
+  // Numeric fallbacks
+  '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
+};
+// ---------------------------------------------------------------------------
+// NYU CTF Baselines
+// ---------------------------------------------------------------------------
+export const CRAKEN_BASELINE = new CompetitorBaseline({
+  name: 'CRAKEN',
+  overallPct: 22.0,
+  overallPassed: 56,
+  overallTotal: 255,
+  levelPct: {},
+  medianCostUsd: 0,
+  medianTimeS: 0,
+  source: 'NYU CTF Bench — CRAKEN (NeurIPS 2024)',
+});
+export const DCIPHER_BASELINE = new CompetitorBaseline({
+  name: 'D-CIPHER',
+  overallPct: 19.0,
+  overallPassed: 48,
+  overallTotal: 255,
+  levelPct: {},
+  medianCostUsd: 0,
+  medianTimeS: 0,
+  source: 'NYU CTF Bench — D-CIPHER (NeurIPS 2024)',
+});
+export const ENIGMA_BASELINE = new CompetitorBaseline({
+  name: 'EnIGMA',
+  overallPct: 13.5,
+  overallPassed: 34,
+  overallTotal: 255,
+  levelPct: {},
+  medianCostUsd: 0,
+  medianTimeS: 0,
+  source: 'NYU CTF Bench — EnIGMA (NeurIPS 2024)',
+});
+export const NYU_BASELINES = [CRAKEN_BASELINE, DCIPHER_BASELINE, ENIGMA_BASELINE];
+// ---------------------------------------------------------------------------
+// Challenge loader
+// ---------------------------------------------------------------------------
+/**
+ * Parse a NYU CTF challenge.json file into a BenchmarkConfig.
+ *
+ * Expected format:
+ * {
+ *   "name": "challenge-name",
+ *   "description": "Challenge description",
+ *   "category": "web",
+ *   "difficulty": "medium",
+ *   "flag": "flag{...}",
+ *   "files": ["file1.py", "file2.txt"],
+ *   "docker_compose": true
+ * }
+ *
+ * @param {string} configPath - Path to challenge.json
+ * @returns {BenchmarkConfig}
+ */
+export function loadNyuChallenge(configPath) {
+  const data = JSON.parse(readFileSync(configPath, 'utf8'));
+  const dir = configPath.replace(/\/[^/]+$/, '');
+  const category = (data.category || 'misc').toLowerCase();
+  const difficultyStr = String(data.difficulty || 'medium').toLowerCase();
+  const level = DIFFICULTY_MAP[difficultyStr] ?? 2;
+  // Build tags from category + any explicit tags
+  const tags = [category];
+  if (data.tags) tags.push(...data.tags);
+  // Detect files
+  const files = (data.files || []).map(f => (typeof f === 'string' ? { name: f } : f));
+  return new BenchmarkConfig({
+    name: data.name || basename(dir),
+    description: data.description || '',
+    level: Math.min(5, Math.max(1, level)),
+    tags,
+    winCondition: 'flag',
+    files,
+    path: dir,
+    category,
+  });
+}
+/**
+ * Enumerate all NYU CTF challenges in a directory.
+ *
+ * Walks the directory tree looking for challenge.json files.
+ * Supports both flat (all in one dir) and nested (category/challenge/) layouts.
+ *
+ * @param {string} baseDir - Root directory to scan
+ * @returns {BenchmarkConfig[]}
+ */
+export function enumerateNyuChallenges(baseDir) {
+  const configs = [];
+  if (!existsSync(baseDir)) return configs;
+  function walk(dir, depth = 0) {
+    if (depth > 3) return; // Don't recurse too deep
+    for (const entry of readdirSync(dir).sort()) {
+      const entryPath = join(dir, entry);
+      if (!statSync(entryPath).isDirectory() && entry === 'challenge.json') {
+        try {
+          configs.push(loadNyuChallenge(entryPath));
+        } catch { /* skip malformed */ }
+        return; // Don't recurse further in this dir
+      }
+    }
+    // Recurse into subdirectories
+    for (const entry of readdirSync(dir).sort()) {
+      const entryPath = join(dir, entry);
+      if (statSync(entryPath).isDirectory()) {
+        walk(entryPath, depth + 1);
+      }
+    }
+  }
+  walk(baseDir);
+  return configs;
+}
+/**
+ * Get the expected flag for a NYU CTF challenge.
+ *
+ * @param {string} configPath - Path to challenge.json
+ * @returns {string|null}
+ */
+export function getNyuFlag(configPath) {
+  try {
+    const data = JSON.parse(readFileSync(configPath, 'utf8'));
+    return data.flag || null;
+  } catch {
+    return null;
+  }
+}
+/**
+ * Check if a directory has a docker-compose file for the challenge.
+ *
+ * @param {string} challengeDir
+ * @returns {boolean}
+ */
+export function hasDockerCompose(challengeDir) {
+  return existsSync(join(challengeDir, 'docker-compose.yml'))
+    || existsSync(join(challengeDir, 'docker-compose.yaml'));
+}