npm - cipher-security - Versions diffs - 2.0.8 → 2.2.0 - Mend

cipher-security 2.0.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/bin/cipher.js +11 -1
package/lib/agent-runtime/handlers/architect.js +199 -0
package/lib/agent-runtime/handlers/base.js +240 -0
package/lib/agent-runtime/handlers/blue.js +220 -0
package/lib/agent-runtime/handlers/incident.js +161 -0
package/lib/agent-runtime/handlers/privacy.js +190 -0
package/lib/agent-runtime/handlers/purple.js +209 -0
package/lib/agent-runtime/handlers/recon.js +174 -0
package/lib/agent-runtime/handlers/red.js +246 -0
package/lib/agent-runtime/handlers/researcher.js +170 -0
package/lib/agent-runtime/handlers.js +35 -0
package/lib/agent-runtime/index.js +196 -0
package/lib/agent-runtime/parser.js +316 -0
package/lib/analyze/consistency.js +566 -0
package/lib/analyze/constitution.js +110 -0
package/lib/analyze/sharding.js +251 -0
package/lib/autonomous/agent-tool.js +165 -0
package/lib/autonomous/feedback-loop.js +13 -6
package/lib/autonomous/framework.js +17 -0
package/lib/autonomous/handoff.js +506 -0
package/lib/autonomous/modes/blue.js +26 -0
package/lib/autonomous/modes/red.js +585 -0
package/lib/autonomous/modes/researcher.js +322 -0
package/lib/autonomous/researcher.js +12 -45
package/lib/autonomous/runner.js +9 -537
package/lib/benchmark/agent.js +88 -26
package/lib/benchmark/baselines.js +3 -0
package/lib/benchmark/claude-code-solver.js +254 -0
package/lib/benchmark/cognitive.js +283 -0
package/lib/benchmark/index.js +12 -2
package/lib/benchmark/knowledge.js +281 -0
package/lib/benchmark/llm.js +156 -15
package/lib/benchmark/models.js +5 -2
package/lib/benchmark/nyu-ctf.js +192 -0
package/lib/benchmark/overthewire.js +347 -0
package/lib/benchmark/picoctf.js +281 -0
package/lib/benchmark/prompts.js +280 -0
package/lib/benchmark/registry.js +219 -0
package/lib/benchmark/remote-solver.js +356 -0
package/lib/benchmark/remote-target.js +263 -0
package/lib/benchmark/reporter.js +35 -0
package/lib/benchmark/runner.js +174 -10
package/lib/benchmark/sandbox.js +35 -0
package/lib/benchmark/scorer.js +22 -4
package/lib/benchmark/solver.js +34 -1
package/lib/benchmark/tools.js +262 -16
package/lib/commands.js +9 -0
package/lib/execution/council.js +434 -0
package/lib/execution/parallel.js +292 -0
package/lib/gates/circuit-breaker.js +135 -0
package/lib/gates/confidence.js +302 -0
package/lib/gates/corrections.js +219 -0
package/lib/gates/self-check.js +245 -0
package/lib/gateway/commands.js +727 -0
package/lib/guardrails/engine.js +364 -0
package/lib/mcp/server.js +349 -3
package/lib/memory/compressor.js +94 -7
package/lib/pipeline/hooks.js +288 -0
package/lib/pipeline/index.js +11 -0
package/lib/review/budget.js +210 -0
package/lib/review/engine.js +526 -0
package/lib/review/layers/acceptance-auditor.js +279 -0
package/lib/review/layers/blind-hunter.js +500 -0
package/lib/review/layers/defense-in-depth.js +209 -0
package/lib/review/layers/edge-case-hunter.js +266 -0
package/lib/review/panel.js +519 -0
package/lib/review/two-stage.js +244 -0
package/lib/session/cost-tracker.js +203 -0
package/lib/session/logger.js +349 -0
package/package.json +1 -1

package/lib/analyze/sharding.js ADDED Viewed

@@ -0,0 +1,251 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * CIPHER Semantic Document Sharding
+ *
+ * Splits large markdown documents into semantically coherent chunks
+ * at heading boundaries. Produces numbered shard files with an index.
+ *
+ * @module analyze/sharding
+ */
+import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
+import { join, basename, dirname, extname } from 'node:path';
+// ---------------------------------------------------------------------------
+// Shard
+// ---------------------------------------------------------------------------
+/**
+ * A single document shard.
+ */
+export class Shard {
+  /**
+   * @param {object} opts
+   * @param {number} opts.number   - Shard number (1-indexed)
+   * @param {string} opts.title    - Section heading
+   * @param {string} opts.content  - Section content (including heading)
+   * @param {number} opts.lines    - Line count
+   * @param {number} opts.startLine - Start line in source (1-indexed)
+   * @param {number} opts.endLine   - End line in source (1-indexed)
+   */
+  constructor(opts = {}) {
+    this.number = opts.number ?? 0;
+    this.title = opts.title ?? '';
+    this.content = opts.content ?? '';
+    this.lines = opts.lines ?? 0;
+    this.startLine = opts.startLine ?? 0;
+    this.endLine = opts.endLine ?? 0;
+  }
+  /** Generate shard filename. */
+  filename(sourceBase) {
+    const padded = String(this.number).padStart(2, '0');
+    const slug = this.title
+      .toLowerCase()
+      .replace(/[^a-z0-9]+/g, '-')
+      .replace(/^-|-$/g, '')
+      .slice(0, 50);
+    return `${sourceBase}-${padded}-${slug}.md`;
+  }
+  /** Generate shard content with frontmatter. */
+  toFile(sourcePath, totalShards) {
+    return [
+      '---',
+      `source: ${sourcePath}`,
+      `shard: ${this.number}/${totalShards}`,
+      `title: "${this.title}"`,
+      `lines: ${this.startLine}-${this.endLine}`,
+      '---',
+      '',
+      this.content,
+    ].join('\n');
+  }
+}
+// ---------------------------------------------------------------------------
+// ShardResult
+// ---------------------------------------------------------------------------
+export class ShardResult {
+  /**
+   * @param {object} opts
+   * @param {Shard[]} opts.shards
+   * @param {string} opts.sourcePath
+   * @param {number} opts.sourceLines
+   * @param {number} opts.headingLevel
+   */
+  constructor(opts = {}) {
+    this.shards = opts.shards ?? [];
+    this.sourcePath = opts.sourcePath ?? '';
+    this.sourceLines = opts.sourceLines ?? 0;
+    this.headingLevel = opts.headingLevel ?? 2;
+  }
+  /** Generate index content. */
+  toIndex() {
+    const lines = [
+      `# Shard Index — ${basename(this.sourcePath)}`,
+      '',
+      `Source: \`${this.sourcePath}\``,
+      `Total lines: ${this.sourceLines}`,
+      `Heading level: H${this.headingLevel}`,
+      `Shards: ${this.shards.length}`,
+      '',
+      '| # | Title | Lines | Range |',
+      '|---|-------|-------|-------|',
+    ];
+    const sourceBase = basename(this.sourcePath, extname(this.sourcePath));
+    for (const shard of this.shards) {
+      const fn = shard.filename(sourceBase);
+      lines.push(`| ${shard.number} | [${shard.title}](${fn}) | ${shard.lines} | ${shard.startLine}-${shard.endLine} |`);
+    }
+    return lines.join('\n');
+  }
+  toReport() {
+    const lines = [
+      `Sharding: ${basename(this.sourcePath)}`,
+      `  Source: ${this.sourceLines} lines → ${this.shards.length} shards at H${this.headingLevel}`,
+      '',
+    ];
+    for (const shard of this.shards) {
+      lines.push(`  [${String(shard.number).padStart(2, '0')}] ${shard.title} (${shard.lines} lines)`);
+    }
+    return lines.join('\n');
+  }
+  toJSON() {
+    return {
+      sourcePath: this.sourcePath,
+      sourceLines: this.sourceLines,
+      headingLevel: this.headingLevel,
+      shardCount: this.shards.length,
+      shards: this.shards.map((s) => ({
+        number: s.number,
+        title: s.title,
+        lines: s.lines,
+        startLine: s.startLine,
+        endLine: s.endLine,
+      })),
+    };
+  }
+}
+// ---------------------------------------------------------------------------
+// Sharding logic
+// ---------------------------------------------------------------------------
+/**
+ * Split a markdown document into shards at heading boundaries.
+ *
+ * @param {string} filePath     - Path to the markdown file
+ * @param {object} [options]
+ * @param {number} [options.level=2]     - Heading level to split at (1-6)
+ * @param {string} [options.output]      - Output directory (default: <filename>-shards/)
+ * @param {boolean} [options.dryRun=false] - Preview only, don't write files
+ * @returns {ShardResult}
+ */
+export function shardDocument(filePath, options = {}) {
+  const level = options.level ?? 2;
+  const dryRun = options.dryRun ?? false;
+  if (!existsSync(filePath)) {
+    throw new Error(`File not found: ${filePath}`);
+  }
+  const content = readFileSync(filePath, 'utf-8');
+  const lines = content.split('\n');
+  const headingPrefix = '#'.repeat(level) + ' ';
+  // Find heading positions
+  const headings = [];
+  for (let i = 0; i < lines.length; i++) {
+    if (lines[i].startsWith(headingPrefix)) {
+      headings.push({
+        line: i,
+        title: lines[i].slice(headingPrefix.length).trim(),
+      });
+    }
+  }
+  // Handle edge cases
+  if (headings.length === 0) {
+    // No headings at this level — return entire doc as one shard
+    return new ShardResult({
+      shards: [new Shard({
+        number: 1,
+        title: basename(filePath, extname(filePath)),
+        content: content,
+        lines: lines.length,
+        startLine: 1,
+        endLine: lines.length,
+      })],
+      sourcePath: filePath,
+      sourceLines: lines.length,
+      headingLevel: level,
+    });
+  }
+  // Build shards
+  const shards = [];
+  // Preamble (content before first heading)
+  if (headings[0].line > 0) {
+    const preambleContent = lines.slice(0, headings[0].line).join('\n').trim();
+    if (preambleContent.length > 0) {
+      shards.push(new Shard({
+        number: shards.length + 1,
+        title: 'Preamble',
+        content: preambleContent,
+        lines: headings[0].line,
+        startLine: 1,
+        endLine: headings[0].line,
+      }));
+    }
+  }
+  // Heading sections
+  for (let h = 0; h < headings.length; h++) {
+    const start = headings[h].line;
+    const end = h + 1 < headings.length ? headings[h + 1].line : lines.length;
+    const sectionContent = lines.slice(start, end).join('\n').trimEnd();
+    shards.push(new Shard({
+      number: shards.length + 1,
+      title: headings[h].title,
+      content: sectionContent,
+      lines: end - start,
+      startLine: start + 1,
+      endLine: end,
+    }));
+  }
+  const result = new ShardResult({
+    shards,
+    sourcePath: filePath,
+    sourceLines: lines.length,
+    headingLevel: level,
+  });
+  // Write files unless dry run
+  if (!dryRun) {
+    const sourceBase = basename(filePath, extname(filePath));
+    const outputDir = options.output ?? join(dirname(filePath), `${sourceBase}-shards`);
+    mkdirSync(outputDir, { recursive: true });
+    // Write index
+    writeFileSync(join(outputDir, 'INDEX.md'), result.toIndex());
+    // Write shard files
+    for (const shard of shards) {
+      const fn = shard.filename(sourceBase);
+      writeFileSync(join(outputDir, fn), shard.toFile(filePath, shards.length));
+    }
+  }
+  return result;
+}

package/lib/autonomous/agent-tool.js ADDED Viewed

@@ -0,0 +1,165 @@
+// Copyright (c) 2026 defconxt. All rights reserved.
+// Licensed under AGPL-3.0 — see LICENSE file for details.
+// CIPHER is a trademark of defconxt.
+/**
+ * Agent-as-Tool — Use one mode agent as a callable tool for another.
+ *
+ * Enables patterns like PURPLE invoking RED for a quick scan without
+ * fully handing off control. The calling agent keeps its context;
+ * the sub-agent runs independently and returns structured results.
+ *
+ * Key exports:
+ * - agentAsTool: creates a tool schema + handler for any mode
+ * - AgentToolResult: structured sub-agent response
+ *
+ * @module autonomous/agent-tool
+ */
+import { ModeAgentResult } from './framework.js';
+// ---------------------------------------------------------------------------
+// AgentToolResult
+// ---------------------------------------------------------------------------
+/**
+ * Structured result from a sub-agent invocation.
+ */
+export class AgentToolResult {
+  /**
+   * @param {object} opts
+   * @param {string} opts.mode - Sub-agent mode
+   * @param {string} opts.outputText - Sub-agent text output
+   * @param {object} opts.outputData - Sub-agent structured data
+   * @param {boolean} opts.valid - Whether sub-agent validation passed
+   * @param {number} opts.durationS - Sub-agent wall-clock seconds
+   * @param {number} opts.tokensIn - Sub-agent input tokens
+   * @param {number} opts.tokensOut - Sub-agent output tokens
+   * @param {string|null} [opts.error] - Error if sub-agent failed
+   */
+  constructor(opts = {}) {
+    this.mode = opts.mode ?? '';
+    this.outputText = opts.outputText ?? '';
+    this.outputData = opts.outputData ?? {};
+    this.valid = opts.valid ?? false;
+    this.durationS = opts.durationS ?? 0;
+    this.tokensIn = opts.tokensIn ?? 0;
+    this.tokensOut = opts.tokensOut ?? 0;
+    this.error = opts.error ?? null;
+  }
+  /** Format as a string for tool output. */
+  toString() {
+    if (this.error) {
+      return `[Agent-as-Tool: ${this.mode}] ERROR: ${this.error}`;
+    }
+    const parts = [
+      `[Agent-as-Tool: ${this.mode}]`,
+      `Valid: ${this.valid}`,
+      `Duration: ${this.durationS.toFixed(2)}s`,
+      `Tokens: ${this.tokensIn}in/${this.tokensOut}out`,
+      '',
+      this.outputText,
+    ];
+    return parts.join('\n');
+  }
+}
+// ---------------------------------------------------------------------------
+// agentAsTool
+// ---------------------------------------------------------------------------
+/**
+ * Create a tool schema + handler that invokes a mode agent as a sub-tool.
+ *
+ * The returned tool can be registered in any mode's ToolRegistry. When called,
+ * it runs the target mode as a sub-agent and returns structured results.
+ *
+ * @param {string} mode - Target mode to invoke as a tool
+ * @param {string} name - Tool name (e.g. 'invoke_red_scan')
+ * @param {string} description - Tool description for the LLM
+ * @param {object} [opts]
+ * @param {Function} [opts.agentRunner] - Injectable runner for testing
+ * @param {import('./handoff.js').HandoffEngine} [opts.handoffEngine] - Shared depth tracking
+ * @returns {{ schema: object, handler: Function }}
+ */
+export function agentAsTool(mode, name, description, opts = {}) {
+  const targetMode = mode.toUpperCase();
+  const schema = {
+    name,
+    description: description || `Invoke ${targetMode} mode as a sub-agent tool.`,
+    input_schema: {
+      type: 'object',
+      properties: {
+        task: {
+          type: 'string',
+          description: 'Task description for the sub-agent',
+        },
+        parameters: {
+          type: 'object',
+          description: 'Optional parameters for the sub-agent task',
+        },
+      },
+      required: ['task'],
+    },
+  };
+  /**
+   * Handler function — invoked by ToolRegistry.dispatch().
+   *
+   * @param {*} context - Calling agent's context (not passed to sub-agent)
+   * @param {object} toolInput - { task: string, parameters?: object }
+   * @returns {Promise<string>}
+   */
+  async function handler(context, toolInput) {
+    const engine = opts.handoffEngine || null;
+    // Check depth limit if engine is available
+    if (engine) {
+      if (engine.depth >= engine.maxDepth) {
+        return new AgentToolResult({
+          mode: targetMode,
+          error: `Max depth (${engine.maxDepth}) reached — cannot invoke sub-agent`,
+        }).toString();
+      }
+      engine.incrementDepth();
+    }
+    const runner = opts.agentRunner || (await import('./runner.js')).runAutonomous;
+    const taskInput = {
+      task: toolInput.task,
+      user_message: `[Sub-agent invocation] ${toolInput.task}`,
+      ...(toolInput.parameters || {}),
+    };
+    let subResult;
+    try {
+      subResult = await runner(targetMode, taskInput, null, null);
+    } catch (e) {
+      if (engine) engine.decrementDepth();
+      return new AgentToolResult({
+        mode: targetMode,
+        error: e.message,
+      }).toString();
+    }
+    if (engine) engine.decrementDepth();
+    const result = new AgentToolResult({
+      mode: targetMode,
+      outputText: subResult.outputText,
+      outputData: subResult.outputData,
+      valid: subResult.validation?.valid ?? false,
+      durationS: subResult.durationS,
+      tokensIn: subResult.tokensIn,
+      tokensOut: subResult.tokensOut,
+      error: subResult.error,
+    });
+    return result.toString();
+  }
+  return { schema, handler };
+}

package/lib/autonomous/feedback-loop.js CHANGED Viewed

@@ -177,13 +177,20 @@ export class SkillQualityAnalyzer {
     if (existsSync(agentJs)) {
       const agentContent = readFileSync(agentJs, 'utf-8');
       const agentLines = agentContent.trim().split('\n');
-      if (agentLines.length < SkillQualityAnalyzer.MIN_AGENT_PY_LINES) {
-        issues.push(`agent.js too short (${agentLines.length} lines)`);
+      // New runtime pattern: 3-line wrapper importing from agent-runtime
+      const usesRuntime = agentContent.includes('agent-runtime');
+      if (usesRuntime) {
+        // Runtime wrapper delegates to shared runtime — full quality
+        scores.agent_quality = 1.0;
+      } else {
+        // Legacy standalone agent.js — validate inline content
+        if (agentLines.length < SkillQualityAnalyzer.MIN_AGENT_PY_LINES) {
+          issues.push(`agent.js too short (${agentLines.length} lines)`);
+        }
+        scores.agent_quality = Math.min(agentLines.length / 50, 1.0);
+        if (!agentContent.includes('process.argv')) issues.push('agent.js missing CLI dispatch');
+        if (!agentContent.includes('json')) issues.push('agent.js missing JSON output');
       }
-      scores.agent_quality = Math.min(agentLines.length / 50, 1.0);
-      if (!agentContent.includes('process.argv')) issues.push('agent.js missing CLI dispatch');
-      if (!agentContent.includes('json')) issues.push('agent.js missing JSON output');
-      if (!agentContent.includes('process.argv')) issues.push('agent.js missing CLI entry point');
     } else {
       issues.push('scripts/agent.js missing');
       scores.agent_quality = 0;

package/lib/autonomous/framework.js CHANGED Viewed

@@ -109,6 +109,8 @@ export class ModeAgentResult {
     this.validation = validation;
     this.error = error;
     this.durationS = durationS;
+    /** @type {import('../gates/self-check.js').SelfCheckResult|null} */
+    this.selfCheck = null;
   }
 }
@@ -240,6 +242,7 @@ export class ModeAgentConfig {
    * @param {boolean} [opts.requiresSandbox=false]
    * @param {Function|null} [opts.completionCheck=null] - (text: string) => boolean
    * @param {Function|null} [opts.outputParser=null] - (text: string) => Object
+   * @param {import('./handoff.js').HandoffFilter|null} [opts.handoffFilter=null] - Context filter for incoming handoffs
    */
   constructor({
     mode,
@@ -252,6 +255,7 @@ export class ModeAgentConfig {
     requiresSandbox = false,
     completionCheck = null,
     outputParser = null,
+    handoffFilter = null,
   }) {
     this.mode = mode;
     this.toolRegistry = toolRegistry;
@@ -263,6 +267,7 @@ export class ModeAgentConfig {
     this.requiresSandbox = requiresSandbox;
     this.completionCheck = completionCheck;
     this.outputParser = outputParser;
+    this.handoffFilter = handoffFilter;
   }
 }
@@ -504,6 +509,18 @@ export class BaseAgent {
     // --- Post-loop: validation ---
     result.validation = this._config.validator.validate(result);
+    // --- Post-loop: self-check (hallucination/hedging detection) ---
+    try {
+      const { SelfChecker } = await import('../gates/self-check.js');
+      const checker = new SelfChecker();
+      result.selfCheck = checker.check(lastAssistantText);
+      if (result.selfCheck.findings.length > 0) {
+        debug(`Self-check: ${result.selfCheck.findings.length} findings, score ${result.selfCheck.score}`);
+      }
+    } catch {
+      // Self-check is non-critical — don't fail the agent if it can't load
+    }
     // --- Duration ---
     result.durationS = (performance.now() / 1000) - startTime;