cipher-security 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/bin/cipher.js +10 -0
  2. package/lib/analyze/consistency.js +566 -0
  3. package/lib/analyze/constitution.js +110 -0
  4. package/lib/analyze/sharding.js +251 -0
  5. package/lib/autonomous/agent-tool.js +165 -0
  6. package/lib/autonomous/framework.js +17 -0
  7. package/lib/autonomous/handoff.js +506 -0
  8. package/lib/autonomous/modes/blue.js +26 -0
  9. package/lib/autonomous/modes/red.js +28 -0
  10. package/lib/benchmark/agent.js +88 -26
  11. package/lib/benchmark/baselines.js +3 -0
  12. package/lib/benchmark/claude-code-solver.js +254 -0
  13. package/lib/benchmark/cognitive.js +283 -0
  14. package/lib/benchmark/index.js +12 -2
  15. package/lib/benchmark/knowledge.js +281 -0
  16. package/lib/benchmark/llm.js +156 -15
  17. package/lib/benchmark/models.js +5 -2
  18. package/lib/benchmark/nyu-ctf.js +192 -0
  19. package/lib/benchmark/overthewire.js +347 -0
  20. package/lib/benchmark/picoctf.js +281 -0
  21. package/lib/benchmark/prompts.js +280 -0
  22. package/lib/benchmark/registry.js +219 -0
  23. package/lib/benchmark/remote-solver.js +356 -0
  24. package/lib/benchmark/remote-target.js +263 -0
  25. package/lib/benchmark/reporter.js +35 -0
  26. package/lib/benchmark/runner.js +174 -10
  27. package/lib/benchmark/sandbox.js +35 -0
  28. package/lib/benchmark/scorer.js +22 -4
  29. package/lib/benchmark/solver.js +34 -1
  30. package/lib/benchmark/tools.js +262 -16
  31. package/lib/commands.js +9 -0
  32. package/lib/execution/council.js +434 -0
  33. package/lib/execution/parallel.js +292 -0
  34. package/lib/gates/circuit-breaker.js +135 -0
  35. package/lib/gates/confidence.js +302 -0
  36. package/lib/gates/corrections.js +219 -0
  37. package/lib/gates/self-check.js +245 -0
  38. package/lib/gateway/commands.js +727 -0
  39. package/lib/guardrails/engine.js +364 -0
  40. package/lib/mcp/server.js +349 -3
  41. package/lib/memory/compressor.js +94 -7
  42. package/lib/pipeline/hooks.js +288 -0
  43. package/lib/pipeline/index.js +11 -0
  44. package/lib/review/budget.js +210 -0
  45. package/lib/review/engine.js +526 -0
  46. package/lib/review/layers/acceptance-auditor.js +279 -0
  47. package/lib/review/layers/blind-hunter.js +500 -0
  48. package/lib/review/layers/defense-in-depth.js +209 -0
  49. package/lib/review/layers/edge-case-hunter.js +266 -0
  50. package/lib/review/panel.js +519 -0
  51. package/lib/review/two-stage.js +244 -0
  52. package/lib/session/cost-tracker.js +203 -0
  53. package/lib/session/logger.js +349 -0
  54. package/package.json +1 -1
@@ -0,0 +1,251 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * CIPHER Semantic Document Sharding
7
+ *
8
+ * Splits large markdown documents into semantically coherent chunks
9
+ * at heading boundaries. Produces numbered shard files with an index.
10
+ *
11
+ * @module analyze/sharding
12
+ */
13
+
14
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
15
+ import { join, basename, dirname, extname } from 'node:path';
16
+
17
+ // ---------------------------------------------------------------------------
18
+ // Shard
19
+ // ---------------------------------------------------------------------------
20
+
21
+ /**
22
+ * A single document shard.
23
+ */
24
+ export class Shard {
25
+ /**
26
+ * @param {object} opts
27
+ * @param {number} opts.number - Shard number (1-indexed)
28
+ * @param {string} opts.title - Section heading
29
+ * @param {string} opts.content - Section content (including heading)
30
+ * @param {number} opts.lines - Line count
31
+ * @param {number} opts.startLine - Start line in source (1-indexed)
32
+ * @param {number} opts.endLine - End line in source (1-indexed)
33
+ */
34
+ constructor(opts = {}) {
35
+ this.number = opts.number ?? 0;
36
+ this.title = opts.title ?? '';
37
+ this.content = opts.content ?? '';
38
+ this.lines = opts.lines ?? 0;
39
+ this.startLine = opts.startLine ?? 0;
40
+ this.endLine = opts.endLine ?? 0;
41
+ }
42
+
43
+ /** Generate shard filename. */
44
+ filename(sourceBase) {
45
+ const padded = String(this.number).padStart(2, '0');
46
+ const slug = this.title
47
+ .toLowerCase()
48
+ .replace(/[^a-z0-9]+/g, '-')
49
+ .replace(/^-|-$/g, '')
50
+ .slice(0, 50);
51
+ return `${sourceBase}-${padded}-${slug}.md`;
52
+ }
53
+
54
+ /** Generate shard content with frontmatter. */
55
+ toFile(sourcePath, totalShards) {
56
+ return [
57
+ '---',
58
+ `source: ${sourcePath}`,
59
+ `shard: ${this.number}/${totalShards}`,
60
+ `title: "${this.title}"`,
61
+ `lines: ${this.startLine}-${this.endLine}`,
62
+ '---',
63
+ '',
64
+ this.content,
65
+ ].join('\n');
66
+ }
67
+ }
68
+
69
+ // ---------------------------------------------------------------------------
70
+ // ShardResult
71
+ // ---------------------------------------------------------------------------
72
+
73
+ export class ShardResult {
74
+ /**
75
+ * @param {object} opts
76
+ * @param {Shard[]} opts.shards
77
+ * @param {string} opts.sourcePath
78
+ * @param {number} opts.sourceLines
79
+ * @param {number} opts.headingLevel
80
+ */
81
+ constructor(opts = {}) {
82
+ this.shards = opts.shards ?? [];
83
+ this.sourcePath = opts.sourcePath ?? '';
84
+ this.sourceLines = opts.sourceLines ?? 0;
85
+ this.headingLevel = opts.headingLevel ?? 2;
86
+ }
87
+
88
+ /** Generate index content. */
89
+ toIndex() {
90
+ const lines = [
91
+ `# Shard Index — ${basename(this.sourcePath)}`,
92
+ '',
93
+ `Source: \`${this.sourcePath}\``,
94
+ `Total lines: ${this.sourceLines}`,
95
+ `Heading level: H${this.headingLevel}`,
96
+ `Shards: ${this.shards.length}`,
97
+ '',
98
+ '| # | Title | Lines | Range |',
99
+ '|---|-------|-------|-------|',
100
+ ];
101
+ const sourceBase = basename(this.sourcePath, extname(this.sourcePath));
102
+ for (const shard of this.shards) {
103
+ const fn = shard.filename(sourceBase);
104
+ lines.push(`| ${shard.number} | [${shard.title}](${fn}) | ${shard.lines} | ${shard.startLine}-${shard.endLine} |`);
105
+ }
106
+ return lines.join('\n');
107
+ }
108
+
109
+ toReport() {
110
+ const lines = [
111
+ `Sharding: ${basename(this.sourcePath)}`,
112
+ ` Source: ${this.sourceLines} lines → ${this.shards.length} shards at H${this.headingLevel}`,
113
+ '',
114
+ ];
115
+ for (const shard of this.shards) {
116
+ lines.push(` [${String(shard.number).padStart(2, '0')}] ${shard.title} (${shard.lines} lines)`);
117
+ }
118
+ return lines.join('\n');
119
+ }
120
+
121
+ toJSON() {
122
+ return {
123
+ sourcePath: this.sourcePath,
124
+ sourceLines: this.sourceLines,
125
+ headingLevel: this.headingLevel,
126
+ shardCount: this.shards.length,
127
+ shards: this.shards.map((s) => ({
128
+ number: s.number,
129
+ title: s.title,
130
+ lines: s.lines,
131
+ startLine: s.startLine,
132
+ endLine: s.endLine,
133
+ })),
134
+ };
135
+ }
136
+ }
137
+
138
+ // ---------------------------------------------------------------------------
139
+ // Sharding logic
140
+ // ---------------------------------------------------------------------------
141
+
142
+ /**
143
+ * Split a markdown document into shards at heading boundaries.
144
+ *
145
+ * @param {string} filePath - Path to the markdown file
146
+ * @param {object} [options]
147
+ * @param {number} [options.level=2] - Heading level to split at (1-6)
148
+ * @param {string} [options.output] - Output directory (default: <filename>-shards/)
149
+ * @param {boolean} [options.dryRun=false] - Preview only, don't write files
150
+ * @returns {ShardResult}
151
+ */
152
+ export function shardDocument(filePath, options = {}) {
153
+ const level = options.level ?? 2;
154
+ const dryRun = options.dryRun ?? false;
155
+
156
+ if (!existsSync(filePath)) {
157
+ throw new Error(`File not found: ${filePath}`);
158
+ }
159
+
160
+ const content = readFileSync(filePath, 'utf-8');
161
+ const lines = content.split('\n');
162
+ const headingPrefix = '#'.repeat(level) + ' ';
163
+
164
+ // Find heading positions
165
+ const headings = [];
166
+ for (let i = 0; i < lines.length; i++) {
167
+ if (lines[i].startsWith(headingPrefix)) {
168
+ headings.push({
169
+ line: i,
170
+ title: lines[i].slice(headingPrefix.length).trim(),
171
+ });
172
+ }
173
+ }
174
+
175
+ // Handle edge cases
176
+ if (headings.length === 0) {
177
+ // No headings at this level — return entire doc as one shard
178
+ return new ShardResult({
179
+ shards: [new Shard({
180
+ number: 1,
181
+ title: basename(filePath, extname(filePath)),
182
+ content: content,
183
+ lines: lines.length,
184
+ startLine: 1,
185
+ endLine: lines.length,
186
+ })],
187
+ sourcePath: filePath,
188
+ sourceLines: lines.length,
189
+ headingLevel: level,
190
+ });
191
+ }
192
+
193
+ // Build shards
194
+ const shards = [];
195
+
196
+ // Preamble (content before first heading)
197
+ if (headings[0].line > 0) {
198
+ const preambleContent = lines.slice(0, headings[0].line).join('\n').trim();
199
+ if (preambleContent.length > 0) {
200
+ shards.push(new Shard({
201
+ number: shards.length + 1,
202
+ title: 'Preamble',
203
+ content: preambleContent,
204
+ lines: headings[0].line,
205
+ startLine: 1,
206
+ endLine: headings[0].line,
207
+ }));
208
+ }
209
+ }
210
+
211
+ // Heading sections
212
+ for (let h = 0; h < headings.length; h++) {
213
+ const start = headings[h].line;
214
+ const end = h + 1 < headings.length ? headings[h + 1].line : lines.length;
215
+ const sectionContent = lines.slice(start, end).join('\n').trimEnd();
216
+
217
+ shards.push(new Shard({
218
+ number: shards.length + 1,
219
+ title: headings[h].title,
220
+ content: sectionContent,
221
+ lines: end - start,
222
+ startLine: start + 1,
223
+ endLine: end,
224
+ }));
225
+ }
226
+
227
+ const result = new ShardResult({
228
+ shards,
229
+ sourcePath: filePath,
230
+ sourceLines: lines.length,
231
+ headingLevel: level,
232
+ });
233
+
234
+ // Write files unless dry run
235
+ if (!dryRun) {
236
+ const sourceBase = basename(filePath, extname(filePath));
237
+ const outputDir = options.output ?? join(dirname(filePath), `${sourceBase}-shards`);
238
+ mkdirSync(outputDir, { recursive: true });
239
+
240
+ // Write index
241
+ writeFileSync(join(outputDir, 'INDEX.md'), result.toIndex());
242
+
243
+ // Write shard files
244
+ for (const shard of shards) {
245
+ const fn = shard.filename(sourceBase);
246
+ writeFileSync(join(outputDir, fn), shard.toFile(filePath, shards.length));
247
+ }
248
+ }
249
+
250
+ return result;
251
+ }
@@ -0,0 +1,165 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * Agent-as-Tool — Use one mode agent as a callable tool for another.
7
+ *
8
+ * Enables patterns like PURPLE invoking RED for a quick scan without
9
+ * fully handing off control. The calling agent keeps its context;
10
+ * the sub-agent runs independently and returns structured results.
11
+ *
12
+ * Key exports:
13
+ * - agentAsTool: creates a tool schema + handler for any mode
14
+ * - AgentToolResult: structured sub-agent response
15
+ *
16
+ * @module autonomous/agent-tool
17
+ */
18
+
19
+ import { ModeAgentResult } from './framework.js';
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // AgentToolResult
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /**
26
+ * Structured result from a sub-agent invocation.
27
+ */
28
+ export class AgentToolResult {
29
+ /**
30
+ * @param {object} opts
31
+ * @param {string} opts.mode - Sub-agent mode
32
+ * @param {string} opts.outputText - Sub-agent text output
33
+ * @param {object} opts.outputData - Sub-agent structured data
34
+ * @param {boolean} opts.valid - Whether sub-agent validation passed
35
+ * @param {number} opts.durationS - Sub-agent wall-clock seconds
36
+ * @param {number} opts.tokensIn - Sub-agent input tokens
37
+ * @param {number} opts.tokensOut - Sub-agent output tokens
38
+ * @param {string|null} [opts.error] - Error if sub-agent failed
39
+ */
40
+ constructor(opts = {}) {
41
+ this.mode = opts.mode ?? '';
42
+ this.outputText = opts.outputText ?? '';
43
+ this.outputData = opts.outputData ?? {};
44
+ this.valid = opts.valid ?? false;
45
+ this.durationS = opts.durationS ?? 0;
46
+ this.tokensIn = opts.tokensIn ?? 0;
47
+ this.tokensOut = opts.tokensOut ?? 0;
48
+ this.error = opts.error ?? null;
49
+ }
50
+
51
+ /** Format as a string for tool output. */
52
+ toString() {
53
+ if (this.error) {
54
+ return `[Agent-as-Tool: ${this.mode}] ERROR: ${this.error}`;
55
+ }
56
+ const parts = [
57
+ `[Agent-as-Tool: ${this.mode}]`,
58
+ `Valid: ${this.valid}`,
59
+ `Duration: ${this.durationS.toFixed(2)}s`,
60
+ `Tokens: ${this.tokensIn}in/${this.tokensOut}out`,
61
+ '',
62
+ this.outputText,
63
+ ];
64
+ return parts.join('\n');
65
+ }
66
+ }
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // agentAsTool
70
+ // ---------------------------------------------------------------------------
71
+
72
+ /**
73
+ * Create a tool schema + handler that invokes a mode agent as a sub-tool.
74
+ *
75
+ * The returned tool can be registered in any mode's ToolRegistry. When called,
76
+ * it runs the target mode as a sub-agent and returns structured results.
77
+ *
78
+ * @param {string} mode - Target mode to invoke as a tool
79
+ * @param {string} name - Tool name (e.g. 'invoke_red_scan')
80
+ * @param {string} description - Tool description for the LLM
81
+ * @param {object} [opts]
82
+ * @param {Function} [opts.agentRunner] - Injectable runner for testing
83
+ * @param {import('./handoff.js').HandoffEngine} [opts.handoffEngine] - Shared depth tracking
84
+ * @returns {{ schema: object, handler: Function }}
85
+ */
86
+ export function agentAsTool(mode, name, description, opts = {}) {
87
+ const targetMode = mode.toUpperCase();
88
+
89
+ const schema = {
90
+ name,
91
+ description: description || `Invoke ${targetMode} mode as a sub-agent tool.`,
92
+ input_schema: {
93
+ type: 'object',
94
+ properties: {
95
+ task: {
96
+ type: 'string',
97
+ description: 'Task description for the sub-agent',
98
+ },
99
+ parameters: {
100
+ type: 'object',
101
+ description: 'Optional parameters for the sub-agent task',
102
+ },
103
+ },
104
+ required: ['task'],
105
+ },
106
+ };
107
+
108
+ /**
109
+ * Handler function — invoked by ToolRegistry.dispatch().
110
+ *
111
+ * @param {*} context - Calling agent's context (not passed to sub-agent)
112
+ * @param {object} toolInput - { task: string, parameters?: object }
113
+ * @returns {Promise<string>}
114
+ */
115
+ async function handler(context, toolInput) {
116
+ const engine = opts.handoffEngine || null;
117
+
118
+ // Check depth limit if engine is available
119
+ if (engine) {
120
+ if (engine.depth >= engine.maxDepth) {
121
+ return new AgentToolResult({
122
+ mode: targetMode,
123
+ error: `Max depth (${engine.maxDepth}) reached — cannot invoke sub-agent`,
124
+ }).toString();
125
+ }
126
+ engine.incrementDepth();
127
+ }
128
+
129
+ const runner = opts.agentRunner || (await import('./runner.js')).runAutonomous;
130
+
131
+ const taskInput = {
132
+ task: toolInput.task,
133
+ user_message: `[Sub-agent invocation] ${toolInput.task}`,
134
+ ...(toolInput.parameters || {}),
135
+ };
136
+
137
+ let subResult;
138
+ try {
139
+ subResult = await runner(targetMode, taskInput, null, null);
140
+ } catch (e) {
141
+ if (engine) engine.decrementDepth();
142
+ return new AgentToolResult({
143
+ mode: targetMode,
144
+ error: e.message,
145
+ }).toString();
146
+ }
147
+
148
+ if (engine) engine.decrementDepth();
149
+
150
+ const result = new AgentToolResult({
151
+ mode: targetMode,
152
+ outputText: subResult.outputText,
153
+ outputData: subResult.outputData,
154
+ valid: subResult.validation?.valid ?? false,
155
+ durationS: subResult.durationS,
156
+ tokensIn: subResult.tokensIn,
157
+ tokensOut: subResult.tokensOut,
158
+ error: subResult.error,
159
+ });
160
+
161
+ return result.toString();
162
+ }
163
+
164
+ return { schema, handler };
165
+ }
@@ -109,6 +109,8 @@ export class ModeAgentResult {
109
109
  this.validation = validation;
110
110
  this.error = error;
111
111
  this.durationS = durationS;
112
+ /** @type {import('../gates/self-check.js').SelfCheckResult|null} */
113
+ this.selfCheck = null;
112
114
  }
113
115
  }
114
116
 
@@ -240,6 +242,7 @@ export class ModeAgentConfig {
240
242
  * @param {boolean} [opts.requiresSandbox=false]
241
243
  * @param {Function|null} [opts.completionCheck=null] - (text: string) => boolean
242
244
  * @param {Function|null} [opts.outputParser=null] - (text: string) => Object
245
+ * @param {import('./handoff.js').HandoffFilter|null} [opts.handoffFilter=null] - Context filter for incoming handoffs
243
246
  */
244
247
  constructor({
245
248
  mode,
@@ -252,6 +255,7 @@ export class ModeAgentConfig {
252
255
  requiresSandbox = false,
253
256
  completionCheck = null,
254
257
  outputParser = null,
258
+ handoffFilter = null,
255
259
  }) {
256
260
  this.mode = mode;
257
261
  this.toolRegistry = toolRegistry;
@@ -263,6 +267,7 @@ export class ModeAgentConfig {
263
267
  this.requiresSandbox = requiresSandbox;
264
268
  this.completionCheck = completionCheck;
265
269
  this.outputParser = outputParser;
270
+ this.handoffFilter = handoffFilter;
266
271
  }
267
272
  }
268
273
 
@@ -504,6 +509,18 @@ export class BaseAgent {
504
509
  // --- Post-loop: validation ---
505
510
  result.validation = this._config.validator.validate(result);
506
511
 
512
+ // --- Post-loop: self-check (hallucination/hedging detection) ---
513
+ try {
514
+ const { SelfChecker } = await import('../gates/self-check.js');
515
+ const checker = new SelfChecker();
516
+ result.selfCheck = checker.check(lastAssistantText);
517
+ if (result.selfCheck.findings.length > 0) {
518
+ debug(`Self-check: ${result.selfCheck.findings.length} findings, score ${result.selfCheck.score}`);
519
+ }
520
+ } catch {
521
+ // Self-check is non-critical — don't fail the agent if it can't load
522
+ }
523
+
507
524
  // --- Duration ---
508
525
  result.durationS = (performance.now() / 1000) - startTime;
509
526