cipher-security 2.0.8 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/bin/cipher.js +11 -1
  2. package/lib/agent-runtime/handlers/architect.js +199 -0
  3. package/lib/agent-runtime/handlers/base.js +240 -0
  4. package/lib/agent-runtime/handlers/blue.js +220 -0
  5. package/lib/agent-runtime/handlers/incident.js +161 -0
  6. package/lib/agent-runtime/handlers/privacy.js +190 -0
  7. package/lib/agent-runtime/handlers/purple.js +209 -0
  8. package/lib/agent-runtime/handlers/recon.js +174 -0
  9. package/lib/agent-runtime/handlers/red.js +246 -0
  10. package/lib/agent-runtime/handlers/researcher.js +170 -0
  11. package/lib/agent-runtime/handlers.js +35 -0
  12. package/lib/agent-runtime/index.js +196 -0
  13. package/lib/agent-runtime/parser.js +316 -0
  14. package/lib/analyze/consistency.js +566 -0
  15. package/lib/analyze/constitution.js +110 -0
  16. package/lib/analyze/sharding.js +251 -0
  17. package/lib/autonomous/agent-tool.js +165 -0
  18. package/lib/autonomous/feedback-loop.js +13 -6
  19. package/lib/autonomous/framework.js +17 -0
  20. package/lib/autonomous/handoff.js +506 -0
  21. package/lib/autonomous/modes/blue.js +26 -0
  22. package/lib/autonomous/modes/red.js +585 -0
  23. package/lib/autonomous/modes/researcher.js +322 -0
  24. package/lib/autonomous/researcher.js +12 -45
  25. package/lib/autonomous/runner.js +9 -537
  26. package/lib/benchmark/agent.js +88 -26
  27. package/lib/benchmark/baselines.js +3 -0
  28. package/lib/benchmark/claude-code-solver.js +254 -0
  29. package/lib/benchmark/cognitive.js +283 -0
  30. package/lib/benchmark/index.js +12 -2
  31. package/lib/benchmark/knowledge.js +281 -0
  32. package/lib/benchmark/llm.js +156 -15
  33. package/lib/benchmark/models.js +5 -2
  34. package/lib/benchmark/nyu-ctf.js +192 -0
  35. package/lib/benchmark/overthewire.js +347 -0
  36. package/lib/benchmark/picoctf.js +281 -0
  37. package/lib/benchmark/prompts.js +280 -0
  38. package/lib/benchmark/registry.js +219 -0
  39. package/lib/benchmark/remote-solver.js +356 -0
  40. package/lib/benchmark/remote-target.js +263 -0
  41. package/lib/benchmark/reporter.js +35 -0
  42. package/lib/benchmark/runner.js +174 -10
  43. package/lib/benchmark/sandbox.js +35 -0
  44. package/lib/benchmark/scorer.js +22 -4
  45. package/lib/benchmark/solver.js +34 -1
  46. package/lib/benchmark/tools.js +262 -16
  47. package/lib/commands.js +9 -0
  48. package/lib/execution/council.js +434 -0
  49. package/lib/execution/parallel.js +292 -0
  50. package/lib/gates/circuit-breaker.js +135 -0
  51. package/lib/gates/confidence.js +302 -0
  52. package/lib/gates/corrections.js +219 -0
  53. package/lib/gates/self-check.js +245 -0
  54. package/lib/gateway/commands.js +727 -0
  55. package/lib/guardrails/engine.js +364 -0
  56. package/lib/mcp/server.js +349 -3
  57. package/lib/memory/compressor.js +94 -7
  58. package/lib/pipeline/hooks.js +288 -0
  59. package/lib/pipeline/index.js +11 -0
  60. package/lib/review/budget.js +210 -0
  61. package/lib/review/engine.js +526 -0
  62. package/lib/review/layers/acceptance-auditor.js +279 -0
  63. package/lib/review/layers/blind-hunter.js +500 -0
  64. package/lib/review/layers/defense-in-depth.js +209 -0
  65. package/lib/review/layers/edge-case-hunter.js +266 -0
  66. package/lib/review/panel.js +519 -0
  67. package/lib/review/two-stage.js +244 -0
  68. package/lib/session/cost-tracker.js +203 -0
  69. package/lib/session/logger.js +349 -0
  70. package/package.json +1 -1
@@ -0,0 +1,364 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * CIPHER Guardrail Tripwire Architecture
7
+ *
8
+ * Input/output guardrails with tripwire pattern for autonomous agents.
9
+ * Guardrails run in parallel with agent execution and can halt processing
10
+ * immediately when a tripwire fires.
11
+ *
12
+ * Input guardrails: detect prompt injection, scope violations, malicious payloads
13
+ * Output guardrails: detect dangerous commands, data leaks, scope non-compliance
14
+ *
15
+ * @module guardrails/engine
16
+ */
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Tripwire
20
+ // ---------------------------------------------------------------------------
21
+
22
+ /**
23
+ * A tripwire result — indicates whether the guardrail tripped.
24
+ */
25
+ export class TripwireResult {
26
+ /**
27
+ * @param {object} opts
28
+ * @param {boolean} opts.tripped - Whether the guardrail fired
29
+ * @param {string} opts.guardrail - Guardrail identifier
30
+ * @param {string} opts.type - 'input' or 'output'
31
+ * @param {string} opts.severity - critical|high|medium|low
32
+ * @param {string} [opts.reason] - Why it tripped
33
+ * @param {string} [opts.evidence] - The triggering content
34
+ * @param {string} [opts.action] - Recommended action (halt|warn|log)
35
+ */
36
+ constructor(opts = {}) {
37
+ this.tripped = opts.tripped ?? false;
38
+ this.guardrail = opts.guardrail ?? '';
39
+ this.type = opts.type ?? 'input';
40
+ this.severity = opts.severity ?? 'medium';
41
+ this.reason = opts.reason ?? '';
42
+ this.evidence = opts.evidence ?? '';
43
+ this.action = opts.action ?? 'halt';
44
+ }
45
+ }
46
+
47
+ // ---------------------------------------------------------------------------
48
+ // Input Guardrails
49
+ // ---------------------------------------------------------------------------
50
+
51
+ /** @typedef {(input: string, context?: object) => Promise<TripwireResult>} GuardrailFn */
52
+
53
+ /**
54
+ * Detect prompt injection attempts in input.
55
+ * @type {GuardrailFn}
56
+ */
57
+ export async function promptInjectionGuardrail(input, context = {}) {
58
+ const lower = input.toLowerCase();
59
+
60
+ const INJECTION_PATTERNS = [
61
+ // Direct instruction override
62
+ { pattern: /ignore\s+(?:all\s+)?(?:previous|prior|above)\s+(?:instructions|prompts|rules)/i, reason: 'Direct instruction override attempt' },
63
+ { pattern: /disregard\s+(?:all\s+)?(?:previous|prior|above)/i, reason: 'Instruction disregard attempt' },
64
+ { pattern: /forget\s+(?:everything|all|your)\s+(?:instructions|rules|training)/i, reason: 'Memory wipe attempt' },
65
+ // Role manipulation
66
+ { pattern: /you\s+are\s+(?:now|actually|really)\s+(?:a|an|the)\s/i, reason: 'Role reassignment attempt' },
67
+ { pattern: /act\s+as\s+(?:if\s+you\s+(?:are|were)|a\s+different)/i, reason: 'Role manipulation attempt' },
68
+ { pattern: /pretend\s+(?:you\s+are|to\s+be)\s/i, reason: 'Identity spoofing attempt' },
69
+ // System prompt extraction
70
+ { pattern: /(?:print|show|reveal|display|output)\s+(?:your\s+)?(?:system\s+)?(?:prompt|instructions|rules)/i, reason: 'System prompt extraction attempt' },
71
+ { pattern: /what\s+(?:are|is)\s+your\s+(?:system\s+)?(?:prompt|instructions|rules)/i, reason: 'System prompt probing' },
72
+ // Encoding bypass
73
+ { pattern: /base64\s*(?:decode|encode)\s*(?:the\s+following|this)/i, reason: 'Encoding bypass attempt' },
74
+ { pattern: /(?:decode|translate)\s+(?:from|this)\s+(?:hex|base64|rot13|binary)/i, reason: 'Encoding bypass via translation' },
75
+ // Delimiter injection
76
+ { pattern: /```system\b/i, reason: 'System block injection' },
77
+ { pattern: /<\/?(?:system|assistant|user)\s*>/i, reason: 'Message role injection via XML tags' },
78
+ // Indirect injection
79
+ { pattern: /when\s+(?:you\s+)?(?:read|see|encounter)\s+this/i, reason: 'Indirect injection trigger' },
80
+ { pattern: /(?:AI|assistant|model|agent)[,:]?\s+(?:please\s+)?(?:execute|run|do)\s+the\s+following/i, reason: 'Embedded instruction for AI' },
81
+ ];
82
+
83
+ for (const { pattern, reason } of INJECTION_PATTERNS) {
84
+ const match = pattern.exec(input);
85
+ if (match) {
86
+ return new TripwireResult({
87
+ tripped: true,
88
+ guardrail: 'prompt-injection',
89
+ type: 'input',
90
+ severity: 'critical',
91
+ reason,
92
+ evidence: match[0].slice(0, 100),
93
+ action: 'halt',
94
+ });
95
+ }
96
+ }
97
+
98
+ return new TripwireResult({ tripped: false, guardrail: 'prompt-injection', type: 'input' });
99
+ }
100
+
101
+ /**
102
+ * Detect scope violations — input requesting actions outside authorized scope.
103
+ * @type {GuardrailFn}
104
+ */
105
+ export async function scopeComplianceGuardrail(input, context = {}) {
106
+ const scope = context.scope ?? {};
107
+ const lower = input.toLowerCase();
108
+
109
+ // Check for targets outside scope
110
+ if (scope.allowedTargets && scope.allowedTargets.length > 0) {
111
+ // Look for IP addresses and domains in the input
112
+ const ipRe = /\b(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\b/g;
113
+ const domainRe = /\b([a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.(?:[a-z]{2,}))\b/gi;
114
+
115
+ for (const re of [ipRe, domainRe]) {
116
+ let match;
117
+ while ((match = re.exec(input)) !== null) {
118
+ const target = match[1];
119
+ if (!scope.allowedTargets.some((t) => target.includes(t) || t.includes(target))) {
120
+ return new TripwireResult({
121
+ tripped: true,
122
+ guardrail: 'scope-compliance',
123
+ type: 'input',
124
+ severity: 'high',
125
+ reason: `Target "${target}" is outside authorized scope`,
126
+ evidence: target,
127
+ action: 'halt',
128
+ });
129
+ }
130
+ }
131
+ }
132
+ }
133
+
134
+ // Check for prohibited actions
135
+ const PROHIBITED = [
136
+ { pattern: /(?:format|wipe|destroy)\s+(?:the\s+)?(?:disk|drive|partition|volume)/i, reason: 'Destructive disk operation requested' },
137
+ { pattern: /rm\s+-rf\s+\//i, reason: 'Root filesystem deletion requested' },
138
+ { pattern: /(?:fork\s+bomb|:$$\(\)\{\s*:\|:\s*&\s*\})/i, reason: 'Fork bomb detected' },
139
+ { pattern: /(?:bitcoin|monero|crypto)\s*(?:miner|mining)/i, reason: 'Cryptocurrency mining requested' },
140
+ ];
141
+
142
+ for (const { pattern, reason } of PROHIBITED) {
143
+ if (pattern.test(input)) {
144
+ return new TripwireResult({
145
+ tripped: true,
146
+ guardrail: 'scope-compliance',
147
+ type: 'input',
148
+ severity: 'critical',
149
+ reason,
150
+ evidence: input.match(pattern)?.[0]?.slice(0, 100) ?? '',
151
+ action: 'halt',
152
+ });
153
+ }
154
+ }
155
+
156
+ return new TripwireResult({ tripped: false, guardrail: 'scope-compliance', type: 'input' });
157
+ }
158
+
159
+ // ---------------------------------------------------------------------------
160
+ // Output Guardrails
161
+ // ---------------------------------------------------------------------------
162
+
163
+ /**
164
+ * Detect dangerous commands in agent output.
165
+ * @type {GuardrailFn}
166
+ */
167
+ export async function dangerousCommandGuardrail(output, context = {}) {
168
+ const DANGEROUS = [
169
+ { pattern: /rm\s+-rf\s+(?:\/|~\/|\$HOME)/i, reason: 'Destructive file deletion' },
170
+ { pattern: /mkfs\b/i, reason: 'Filesystem format command' },
171
+ { pattern: /dd\s+.*of=\/dev\//i, reason: 'Raw device write' },
172
+ { pattern: />\s*\/dev\/sd[a-z]/i, reason: 'Raw disk overwrite' },
173
+ { pattern: /chmod\s+(?:777|666)\s+\//i, reason: 'World-writable root permissions' },
174
+ { pattern: /iptables\s+-F/i, reason: 'Firewall flush — removes all rules' },
175
+ { pattern: /systemctl\s+(?:stop|disable)\s+(?:firewalld|ufw|iptables)/i, reason: 'Firewall disable' },
176
+ { pattern: /curl\s+.*\|\s*(?:sudo\s+)?(?:bash|sh)\b/i, reason: 'Pipe-to-shell execution' },
177
+ { pattern: /wget\s+.*-O\s*-\s*\|\s*(?:bash|sh)/i, reason: 'Pipe-to-shell via wget' },
178
+ { pattern: /python[23]?\s+-c\s+['""]import\s+os;\s*os\.system/i, reason: 'Python shell escape' },
179
+ { pattern: /nc\s+-[el]+\s.*(?:bash|sh|cmd)/i, reason: 'Netcat reverse shell' },
180
+ { pattern: /bash\s+-i\s+>&\s*\/dev\/tcp/i, reason: 'Bash reverse shell' },
181
+ { pattern: /\/etc\/shadow/i, reason: 'Shadow file access' },
182
+ { pattern: /passwd\s+--delete\s+root/i, reason: 'Root password removal' },
183
+ ];
184
+
185
+ for (const { pattern, reason } of DANGEROUS) {
186
+ const match = pattern.exec(output);
187
+ if (match) {
188
+ return new TripwireResult({
189
+ tripped: true,
190
+ guardrail: 'dangerous-command',
191
+ type: 'output',
192
+ severity: 'critical',
193
+ reason,
194
+ evidence: match[0].slice(0, 100),
195
+ action: 'halt',
196
+ });
197
+ }
198
+ }
199
+
200
+ return new TripwireResult({ tripped: false, guardrail: 'dangerous-command', type: 'output' });
201
+ }
202
+
203
+ /**
204
+ * Detect data leak patterns in agent output.
205
+ * @type {GuardrailFn}
206
+ */
207
+ export async function dataLeakGuardrail(output, context = {}) {
208
+ const LEAK_PATTERNS = [
209
+ { pattern: /(?:AKIA[0-9A-Z]{16})/g, reason: 'AWS access key exposed', severity: 'critical' },
210
+ { pattern: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/i, reason: 'Private key exposed', severity: 'critical' },
211
+ { pattern: /(?:eyJ[A-Za-z0-9_-]{20,}\.eyJ[A-Za-z0-9_-]{20,})/g, reason: 'JWT token exposed', severity: 'high' },
212
+ { pattern: /\b\d{3}-\d{2}-\d{4}\b/g, reason: 'Possible SSN pattern exposed', severity: 'high' },
213
+ { pattern: /\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})\b/g, reason: 'Possible credit card number', severity: 'high' },
214
+ { pattern: /(?:password|passwd|pwd)\s*[:=]\s*['"][^'"]{4,}['"]/gi, reason: 'Password in output', severity: 'high' },
215
+ ];
216
+
217
+ for (const { pattern, reason, severity } of LEAK_PATTERNS) {
218
+ pattern.lastIndex = 0;
219
+ const match = pattern.exec(output);
220
+ if (match) {
221
+ return new TripwireResult({
222
+ tripped: true,
223
+ guardrail: 'data-leak',
224
+ type: 'output',
225
+ severity: severity ?? 'high',
226
+ reason,
227
+ evidence: '[REDACTED]', // Don't echo the leaked data
228
+ action: 'halt',
229
+ });
230
+ }
231
+ }
232
+
233
+ return new TripwireResult({ tripped: false, guardrail: 'data-leak', type: 'output' });
234
+ }
235
+
236
+ // ---------------------------------------------------------------------------
237
+ // Guardrail Engine
238
+ // ---------------------------------------------------------------------------
239
+
240
+ /**
241
+ * Guardrail engine — runs input/output guardrails in parallel.
242
+ */
243
+ export class GuardrailEngine {
244
+ constructor() {
245
+ /** @type {GuardrailFn[]} */
246
+ this._inputGuardrails = [];
247
+ /** @type {GuardrailFn[]} */
248
+ this._outputGuardrails = [];
249
+ }
250
+
251
+ /**
252
+ * Register an input guardrail.
253
+ * @param {GuardrailFn} fn
254
+ * @returns {GuardrailEngine}
255
+ */
256
+ addInput(fn) {
257
+ this._inputGuardrails.push(fn);
258
+ return this;
259
+ }
260
+
261
+ /**
262
+ * Register an output guardrail.
263
+ * @param {GuardrailFn} fn
264
+ * @returns {GuardrailEngine}
265
+ */
266
+ addOutput(fn) {
267
+ this._outputGuardrails.push(fn);
268
+ return this;
269
+ }
270
+
271
+ /**
272
+ * Run all input guardrails in parallel. Returns first tripwire or null.
273
+ *
274
+ * Uses Promise.race semantics — if ANY guardrail trips, we halt immediately
275
+ * without waiting for the others to complete.
276
+ *
277
+ * @param {string} input
278
+ * @param {object} [context]
279
+ * @returns {Promise<TripwireResult|null>}
280
+ */
281
+ async checkInput(input, context = {}) {
282
+ if (this._inputGuardrails.length === 0) return null;
283
+ return this._raceGuardrails(this._inputGuardrails, input, context);
284
+ }
285
+
286
+ /**
287
+ * Run all output guardrails in parallel.
288
+ *
289
+ * @param {string} output
290
+ * @param {object} [context]
291
+ * @returns {Promise<TripwireResult|null>}
292
+ */
293
+ async checkOutput(output, context = {}) {
294
+ if (this._outputGuardrails.length === 0) return null;
295
+ return this._raceGuardrails(this._outputGuardrails, output, context);
296
+ }
297
+
298
+ /**
299
+ * Run all guardrails (input + output) and return all results.
300
+ * Used for auditing — collects ALL results, not just first trip.
301
+ *
302
+ * @param {string} text
303
+ * @param {object} [context]
304
+ * @returns {Promise<TripwireResult[]>}
305
+ */
306
+ async audit(text, context = {}) {
307
+ const all = [...this._inputGuardrails, ...this._outputGuardrails];
308
+ const results = await Promise.allSettled(
309
+ all.map((fn) => fn(text, context)),
310
+ );
311
+ return results
312
+ .filter((r) => r.status === 'fulfilled' && r.value.tripped)
313
+ .map((r) => r.value);
314
+ }
315
+
316
+ /**
317
+ * Race guardrails — return first trip or null if all pass.
318
+ * @private
319
+ */
320
+ async _raceGuardrails(guardrails, text, context) {
321
+ // Run all in parallel
322
+ const results = await Promise.allSettled(
323
+ guardrails.map((fn) => fn(text, context)),
324
+ );
325
+
326
+ // Find first trip (by severity: critical > high > medium > low)
327
+ const RANK = { critical: 4, high: 3, medium: 2, low: 1 };
328
+ let worst = null;
329
+
330
+ for (const result of results) {
331
+ if (result.status === 'fulfilled' && result.value.tripped) {
332
+ const r = result.value;
333
+ if (!worst || (RANK[r.severity] ?? 0) > (RANK[worst.severity] ?? 0)) {
334
+ worst = r;
335
+ }
336
+ }
337
+ }
338
+
339
+ return worst;
340
+ }
341
+ }
342
+
343
+ // ---------------------------------------------------------------------------
344
+ // Factory — create engine with standard guardrails
345
+ // ---------------------------------------------------------------------------
346
+
347
+ /**
348
+ * Create a GuardrailEngine with all standard guardrails registered.
349
+ *
350
+ * @returns {GuardrailEngine}
351
+ */
352
+ export function createGuardrailEngine() {
353
+ const engine = new GuardrailEngine();
354
+
355
+ // Input guardrails
356
+ engine.addInput(promptInjectionGuardrail);
357
+ engine.addInput(scopeComplianceGuardrail);
358
+
359
+ // Output guardrails
360
+ engine.addOutput(dangerousCommandGuardrail);
361
+ engine.addOutput(dataLeakGuardrail);
362
+
363
+ return engine;
364
+ }