cipher-security 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/bin/cipher.js +10 -0
  2. package/lib/analyze/consistency.js +566 -0
  3. package/lib/analyze/constitution.js +110 -0
  4. package/lib/analyze/sharding.js +251 -0
  5. package/lib/autonomous/agent-tool.js +165 -0
  6. package/lib/autonomous/framework.js +17 -0
  7. package/lib/autonomous/handoff.js +506 -0
  8. package/lib/autonomous/modes/blue.js +26 -0
  9. package/lib/autonomous/modes/red.js +28 -0
  10. package/lib/benchmark/agent.js +88 -26
  11. package/lib/benchmark/baselines.js +3 -0
  12. package/lib/benchmark/claude-code-solver.js +254 -0
  13. package/lib/benchmark/cognitive.js +283 -0
  14. package/lib/benchmark/index.js +12 -2
  15. package/lib/benchmark/knowledge.js +281 -0
  16. package/lib/benchmark/llm.js +156 -15
  17. package/lib/benchmark/models.js +5 -2
  18. package/lib/benchmark/nyu-ctf.js +192 -0
  19. package/lib/benchmark/overthewire.js +347 -0
  20. package/lib/benchmark/picoctf.js +281 -0
  21. package/lib/benchmark/prompts.js +280 -0
  22. package/lib/benchmark/registry.js +219 -0
  23. package/lib/benchmark/remote-solver.js +356 -0
  24. package/lib/benchmark/remote-target.js +263 -0
  25. package/lib/benchmark/reporter.js +35 -0
  26. package/lib/benchmark/runner.js +174 -10
  27. package/lib/benchmark/sandbox.js +35 -0
  28. package/lib/benchmark/scorer.js +22 -4
  29. package/lib/benchmark/solver.js +34 -1
  30. package/lib/benchmark/tools.js +262 -16
  31. package/lib/commands.js +9 -0
  32. package/lib/execution/council.js +434 -0
  33. package/lib/execution/parallel.js +292 -0
  34. package/lib/gates/circuit-breaker.js +135 -0
  35. package/lib/gates/confidence.js +302 -0
  36. package/lib/gates/corrections.js +219 -0
  37. package/lib/gates/self-check.js +245 -0
  38. package/lib/gateway/commands.js +727 -0
  39. package/lib/guardrails/engine.js +364 -0
  40. package/lib/mcp/server.js +349 -3
  41. package/lib/memory/compressor.js +94 -7
  42. package/lib/pipeline/hooks.js +288 -0
  43. package/lib/pipeline/index.js +11 -0
  44. package/lib/review/budget.js +210 -0
  45. package/lib/review/engine.js +526 -0
  46. package/lib/review/layers/acceptance-auditor.js +279 -0
  47. package/lib/review/layers/blind-hunter.js +500 -0
  48. package/lib/review/layers/defense-in-depth.js +209 -0
  49. package/lib/review/layers/edge-case-hunter.js +266 -0
  50. package/lib/review/panel.js +519 -0
  51. package/lib/review/two-stage.js +244 -0
  52. package/lib/session/cost-tracker.js +203 -0
  53. package/lib/session/logger.js +349 -0
  54. package/package.json +1 -1
@@ -0,0 +1,244 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * CIPHER Two-Stage Review Pipeline
7
+ *
8
+ * Stage 1 — Spec Compliance: security requirements gate (auth, validation, error handling)
9
+ * Stage 2 — Quality Gate: code quality patterns (only runs if Stage 1 passes threshold)
10
+ *
11
+ * Uses the review engine from M011 with hookable stages from M013.
12
+ *
13
+ * @module review/two-stage
14
+ */
15
+
16
+ import { createReviewEngine } from './engine.js';
17
+ import { PipelineHooks, HookContext, hookableStage } from '../pipeline/hooks.js';
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Severity thresholds
21
+ // ---------------------------------------------------------------------------
22
+
23
+ const SEVERITY_RANK = { critical: 4, high: 3, medium: 2, low: 1, info: 0 };
24
+
25
+ /**
26
+ * @typedef {object} StageConfig
27
+ * @property {string} [minSeverity] - Filter findings at or above this level
28
+ * @property {number} [maxCritical] - Max critical findings before stage fails
29
+ * @property {number} [maxHigh] - Max high findings before stage fails
30
+ */
31
+
32
+ /** Default stage configs */
33
+ const STAGE_DEFAULTS = {
34
+ compliance: {
35
+ minSeverity: 'medium',
36
+ maxCritical: 0,
37
+ maxHigh: 3,
38
+ },
39
+ quality: {
40
+ minSeverity: 'low',
41
+ maxCritical: 0,
42
+ maxHigh: 5,
43
+ },
44
+ };
45
+
46
+ // ---------------------------------------------------------------------------
47
+ // Two-Stage Review
48
+ // ---------------------------------------------------------------------------
49
+
50
+ /**
51
+ * Two-stage review pipeline result.
52
+ */
53
+ export class TwoStageResult {
54
+ constructor({ stage1 = null, stage2 = null, gatesPassed = false, abortReason = '', totalTime = 0 } = {}) {
55
+ this.stage1 = stage1;
56
+ this.stage2 = stage2;
57
+ this.gatesPassed = gatesPassed;
58
+ this.abortReason = abortReason;
59
+ this.totalTime = totalTime;
60
+ }
61
+
62
+ get allFindings() {
63
+ const findings = [];
64
+ if (this.stage1?.findings) findings.push(...this.stage1.findings);
65
+ if (this.stage2?.findings) findings.push(...this.stage2.findings);
66
+ return findings;
67
+ }
68
+
69
+ toReport() {
70
+ const lines = [
71
+ '═══════════════════════════════════════════════════════',
72
+ ' CIPHER Two-Stage Review Report',
73
+ '═══════════════════════════════════════════════════════',
74
+ '',
75
+ ];
76
+
77
+ // Stage 1
78
+ lines.push('── Stage 1: Spec Compliance ──');
79
+ if (this.stage1) {
80
+ lines.push(`Result: ${this.stage1.passed ? '✓ PASSED' : '✗ FAILED'}`);
81
+ lines.push(this.stage1.result?.summary ?? 'No results');
82
+ if (!this.stage1.passed && this.stage1.failReason) {
83
+ lines.push(`Reason: ${this.stage1.failReason}`);
84
+ }
85
+ } else {
86
+ lines.push('Not executed');
87
+ }
88
+ lines.push('');
89
+
90
+ // Stage 2
91
+ lines.push('── Stage 2: Quality Gate ──');
92
+ if (this.stage2) {
93
+ lines.push(`Result: ${this.stage2.passed ? '✓ PASSED' : '✗ FAILED'}`);
94
+ lines.push(this.stage2.result?.summary ?? 'No results');
95
+ if (!this.stage2.passed && this.stage2.failReason) {
96
+ lines.push(`Reason: ${this.stage2.failReason}`);
97
+ }
98
+ } else {
99
+ lines.push(this.abortReason ? `Skipped: ${this.abortReason}` : 'Not executed');
100
+ }
101
+ lines.push('');
102
+
103
+ // Overall
104
+ lines.push('───────────────────────────────────────────────────────');
105
+ lines.push(`Overall: ${this.gatesPassed ? '✓ ALL GATES PASSED' : '✗ GATES FAILED'}`);
106
+ lines.push(`Total time: ${this.totalTime}ms`);
107
+ lines.push(`Total findings: ${this.allFindings.length}`);
108
+
109
+ return lines.join('\n');
110
+ }
111
+
112
+ toJSON() {
113
+ return {
114
+ gatesPassed: this.gatesPassed,
115
+ abortReason: this.abortReason,
116
+ totalTime: this.totalTime,
117
+ totalFindings: this.allFindings.length,
118
+ stage1: this.stage1 ? {
119
+ passed: this.stage1.passed,
120
+ failReason: this.stage1.failReason,
121
+ findings: this.stage1.result?.findings?.length ?? 0,
122
+ summary: this.stage1.result?.summary ?? '',
123
+ } : null,
124
+ stage2: this.stage2 ? {
125
+ passed: this.stage2.passed,
126
+ failReason: this.stage2.failReason,
127
+ findings: this.stage2.result?.findings?.length ?? 0,
128
+ summary: this.stage2.result?.summary ?? '',
129
+ } : null,
130
+ };
131
+ }
132
+ }
133
+
134
+ /**
135
+ * Run a two-stage code review.
136
+ *
137
+ * @param {string} input - File path, directory, or code string
138
+ * @param {object} [options]
139
+ * @param {StageConfig} [options.compliance] - Stage 1 config
140
+ * @param {StageConfig} [options.quality] - Stage 2 config
141
+ * @param {PipelineHooks} [options.hooks] - Pipeline hooks
142
+ * @param {string} [options.language] - Override language detection
143
+ * @returns {Promise<TwoStageResult>}
144
+ */
145
+ export async function twoStageReview(input, options = {}) {
146
+ const t0 = Date.now();
147
+ const complianceCfg = { ...STAGE_DEFAULTS.compliance, ...options.compliance };
148
+ const qualityCfg = { ...STAGE_DEFAULTS.quality, ...options.quality };
149
+ const hooks = options.hooks ?? new PipelineHooks();
150
+
151
+ const engine = await createReviewEngine();
152
+
153
+ // Stage 1: Spec Compliance
154
+ const stage1Fn = async (target, opts) => {
155
+ return engine.review(target, {
156
+ language: options.language,
157
+ minSeverity: complianceCfg.minSeverity,
158
+ });
159
+ };
160
+
161
+ const hookedStage1 = hookableStage(hooks, 'review', stage1Fn);
162
+ let stage1Result;
163
+ let stage1Passed = true;
164
+ let stage1FailReason = '';
165
+
166
+ try {
167
+ stage1Result = await hookedStage1(input, { stage: 'compliance' });
168
+
169
+ // Check thresholds
170
+ const counts = stage1Result.severityCounts;
171
+ if (complianceCfg.maxCritical !== undefined && counts.critical > complianceCfg.maxCritical) {
172
+ stage1Passed = false;
173
+ stage1FailReason = `${counts.critical} critical findings exceed threshold of ${complianceCfg.maxCritical}`;
174
+ }
175
+ if (stage1Passed && complianceCfg.maxHigh !== undefined && counts.high > complianceCfg.maxHigh) {
176
+ stage1Passed = false;
177
+ stage1FailReason = `${counts.high} high findings exceed threshold of ${complianceCfg.maxHigh}`;
178
+ }
179
+ } catch (err) {
180
+ stage1Passed = false;
181
+ stage1FailReason = err.message;
182
+ }
183
+
184
+ const stage1 = {
185
+ passed: stage1Passed,
186
+ failReason: stage1FailReason,
187
+ result: stage1Result ?? null,
188
+ };
189
+
190
+ // Stage 2: Quality Gate — only if Stage 1 passed
191
+ let stage2 = null;
192
+ let abortReason = '';
193
+
194
+ if (stage1Passed) {
195
+ const stage2Fn = async (target, opts) => {
196
+ return engine.review(target, {
197
+ language: options.language,
198
+ minSeverity: qualityCfg.minSeverity,
199
+ });
200
+ };
201
+
202
+ const hookedStage2 = hookableStage(hooks, 'analyze', stage2Fn);
203
+ let stage2Passed = true;
204
+ let stage2FailReason = '';
205
+
206
+ try {
207
+ const stage2Result = await hookedStage2(input, { stage: 'quality' });
208
+
209
+ const counts = stage2Result.severityCounts;
210
+ if (qualityCfg.maxCritical !== undefined && counts.critical > qualityCfg.maxCritical) {
211
+ stage2Passed = false;
212
+ stage2FailReason = `${counts.critical} critical findings exceed threshold of ${qualityCfg.maxCritical}`;
213
+ }
214
+ if (stage2Passed && qualityCfg.maxHigh !== undefined && counts.high > qualityCfg.maxHigh) {
215
+ stage2Passed = false;
216
+ stage2FailReason = `${counts.high} high findings exceed threshold of ${qualityCfg.maxHigh}`;
217
+ }
218
+
219
+ stage2 = {
220
+ passed: stage2Passed,
221
+ failReason: stage2FailReason,
222
+ result: stage2Result,
223
+ };
224
+ } catch (err) {
225
+ stage2 = {
226
+ passed: false,
227
+ failReason: err.message,
228
+ result: null,
229
+ };
230
+ }
231
+ } else {
232
+ abortReason = `Stage 1 failed: ${stage1FailReason}`;
233
+ }
234
+
235
+ const gatesPassed = stage1Passed && (stage2?.passed ?? false);
236
+
237
+ return new TwoStageResult({
238
+ stage1,
239
+ stage2,
240
+ gatesPassed,
241
+ abortReason,
242
+ totalTime: Date.now() - t0,
243
+ });
244
+ }
@@ -0,0 +1,203 @@
1
+ // Copyright (c) 2026 defconxt. All rights reserved.
2
+ // Licensed under AGPL-3.0 — see LICENSE file for details.
3
+ // CIPHER is a trademark of defconxt.
4
+
5
+ /**
6
+ * Cost Tracker — Per-session spending limits for autonomous agents.
7
+ *
8
+ * Tracks token usage, estimates cost per interaction using model-specific
9
+ * pricing, and enforces a configurable budget limit. Integrates with
10
+ * SessionLogger for persistent cost records.
11
+ *
12
+ * @module session/cost-tracker
13
+ */
14
+
15
+ // ---------------------------------------------------------------------------
16
+ // Model pricing (per 1M tokens)
17
+ // ---------------------------------------------------------------------------
18
+
19
+ /**
20
+ * Token pricing by model family.
21
+ * @type {Record<string, { input: number, output: number }>}
22
+ */
23
+ export const MODEL_PRICING = {
24
+ // Claude models
25
+ 'claude-sonnet': { input: 3.0, output: 15.0 },
26
+ 'claude-haiku': { input: 0.25, output: 1.25 },
27
+ 'claude-opus': { input: 15.0, output: 75.0 },
28
+ // OpenAI models
29
+ 'gpt-4o': { input: 2.5, output: 10.0 },
30
+ 'gpt-4': { input: 5.0, output: 15.0 },
31
+ 'gpt-3.5': { input: 0.5, output: 1.5 },
32
+ // Local models (free)
33
+ 'ollama': { input: 0, output: 0 },
34
+ 'local': { input: 0, output: 0 },
35
+ // Default fallback
36
+ 'default': { input: 3.0, output: 15.0 },
37
+ };
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // PriceLimitExceeded
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /**
44
+ * Thrown when session cost exceeds the configured budget.
45
+ */
46
+ export class PriceLimitExceeded extends Error {
47
+ /**
48
+ * @param {number} currentCost
49
+ * @param {number} limit
50
+ * @param {string} sessionId
51
+ */
52
+ constructor(currentCost, limit, sessionId = '') {
53
+ super(
54
+ `Session cost $${currentCost.toFixed(4)} exceeds budget limit $${limit.toFixed(2)}` +
55
+ (sessionId ? ` (session: ${sessionId})` : '')
56
+ );
57
+ this.name = 'PriceLimitExceeded';
58
+ this.currentCost = currentCost;
59
+ this.limit = limit;
60
+ this.sessionId = sessionId;
61
+ }
62
+ }
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // CostTracker
66
+ // ---------------------------------------------------------------------------
67
+
68
+ /**
69
+ * Per-session cost tracker with budget enforcement.
70
+ */
71
+ export class CostTracker {
72
+ /**
73
+ * @param {object} [opts]
74
+ * @param {number} [opts.budgetUSD] - Maximum spend (default from env or $5)
75
+ * @param {string} [opts.model] - Model identifier for pricing lookup
76
+ * @param {string} [opts.sessionId] - Associated session ID
77
+ * @param {import('./logger.js').SessionLogger} [opts.logger] - Optional session logger
78
+ */
79
+ constructor(opts = {}) {
80
+ this._budgetUSD = opts.budgetUSD ?? parseFloat(process.env.CIPHER_PRICE_LIMIT || '5');
81
+ this._model = opts.model || 'default';
82
+ this._sessionId = opts.sessionId || '';
83
+ this._logger = opts.logger || null;
84
+ this._totalCostUSD = 0;
85
+ this._totalTokensIn = 0;
86
+ this._totalTokensOut = 0;
87
+ this._interactionCount = 0;
88
+ }
89
+
90
+ /** Current total cost. */
91
+ get totalCost() { return this._totalCostUSD; }
92
+
93
+ /** Configured budget. */
94
+ get budget() { return this._budgetUSD; }
95
+
96
+ /** Remaining budget. */
97
+ get remaining() { return Math.max(0, this._budgetUSD - this._totalCostUSD); }
98
+
99
+ /** Total input tokens tracked. */
100
+ get totalTokensIn() { return this._totalTokensIn; }
101
+
102
+ /** Total output tokens tracked. */
103
+ get totalTokensOut() { return this._totalTokensOut; }
104
+
105
+ /** Number of interactions tracked. */
106
+ get interactionCount() { return this._interactionCount; }
107
+
108
+ /**
109
+ * Look up pricing for the configured model.
110
+ * @returns {{ input: number, output: number }}
111
+ */
112
+ getPricing() {
113
+ const modelLower = this._model.toLowerCase();
114
+ for (const [key, pricing] of Object.entries(MODEL_PRICING)) {
115
+ if (modelLower.includes(key)) return pricing;
116
+ }
117
+ return MODEL_PRICING.default;
118
+ }
119
+
120
+ /**
121
+ * Calculate cost for a given number of tokens.
122
+ *
123
+ * @param {number} tokensIn
124
+ * @param {number} tokensOut
125
+ * @returns {number} Cost in USD
126
+ */
127
+ calculateCost(tokensIn, tokensOut) {
128
+ const pricing = this.getPricing();
129
+ return (tokensIn / 1_000_000) * pricing.input + (tokensOut / 1_000_000) * pricing.output;
130
+ }
131
+
132
+ /**
133
+ * Track an interaction's token usage and check budget.
134
+ *
135
+ * @param {number} tokensIn
136
+ * @param {number} tokensOut
137
+ * @returns {{ costUSD: number, cumulativeCostUSD: number, remaining: number, withinBudget: boolean }}
138
+ * @throws {PriceLimitExceeded} If cumulative cost exceeds budget
139
+ */
140
+ track(tokensIn, tokensOut) {
141
+ const costUSD = this.calculateCost(tokensIn, tokensOut);
142
+ this._totalCostUSD += costUSD;
143
+ this._totalTokensIn += tokensIn;
144
+ this._totalTokensOut += tokensOut;
145
+ this._interactionCount += 1;
146
+
147
+ const withinBudget = this._totalCostUSD <= this._budgetUSD;
148
+
149
+ // Log to session if available
150
+ if (this._logger) {
151
+ const status = withinBudget ? 'ok' : 'exceeded';
152
+ this._logger.logCost(costUSD, this._budgetUSD, status);
153
+ }
154
+
155
+ if (!withinBudget) {
156
+ throw new PriceLimitExceeded(this._totalCostUSD, this._budgetUSD, this._sessionId);
157
+ }
158
+
159
+ return {
160
+ costUSD,
161
+ cumulativeCostUSD: this._totalCostUSD,
162
+ remaining: this.remaining,
163
+ withinBudget,
164
+ };
165
+ }
166
+
167
+ /**
168
+ * Check if the budget would be exceeded by additional tokens.
169
+ * Does NOT track — just estimates.
170
+ *
171
+ * @param {number} tokensIn
172
+ * @param {number} tokensOut
173
+ * @returns {{ wouldExceed: boolean, estimatedCost: number, projectedTotal: number }}
174
+ */
175
+ wouldExceed(tokensIn, tokensOut) {
176
+ const estimatedCost = this.calculateCost(tokensIn, tokensOut);
177
+ const projectedTotal = this._totalCostUSD + estimatedCost;
178
+ return {
179
+ wouldExceed: projectedTotal > this._budgetUSD,
180
+ estimatedCost,
181
+ projectedTotal,
182
+ };
183
+ }
184
+
185
+ /**
186
+ * Return a status summary.
187
+ * @returns {object}
188
+ */
189
+ getStatus() {
190
+ return {
191
+ totalCostUSD: Math.round(this._totalCostUSD * 10000) / 10000,
192
+ budgetUSD: this._budgetUSD,
193
+ remainingUSD: Math.round(this.remaining * 10000) / 10000,
194
+ totalTokensIn: this._totalTokensIn,
195
+ totalTokensOut: this._totalTokensOut,
196
+ interactionCount: this._interactionCount,
197
+ model: this._model,
198
+ utilizationPct: this._budgetUSD > 0
199
+ ? Math.round((this._totalCostUSD / this._budgetUSD) * 10000) / 100
200
+ : 0,
201
+ };
202
+ }
203
+ }