cipher-security 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cipher.js +566 -0
- package/lib/api/billing.js +321 -0
- package/lib/api/compliance.js +693 -0
- package/lib/api/controls.js +1401 -0
- package/lib/api/index.js +49 -0
- package/lib/api/marketplace.js +467 -0
- package/lib/api/openai-proxy.js +383 -0
- package/lib/api/server.js +685 -0
- package/lib/autonomous/feedback-loop.js +554 -0
- package/lib/autonomous/framework.js +512 -0
- package/lib/autonomous/index.js +97 -0
- package/lib/autonomous/leaderboard.js +594 -0
- package/lib/autonomous/modes/architect.js +412 -0
- package/lib/autonomous/modes/blue.js +386 -0
- package/lib/autonomous/modes/incident.js +684 -0
- package/lib/autonomous/modes/privacy.js +369 -0
- package/lib/autonomous/modes/purple.js +294 -0
- package/lib/autonomous/modes/recon.js +250 -0
- package/lib/autonomous/parallel.js +587 -0
- package/lib/autonomous/researcher.js +583 -0
- package/lib/autonomous/runner.js +955 -0
- package/lib/autonomous/scheduler.js +615 -0
- package/lib/autonomous/task-parser.js +127 -0
- package/lib/autonomous/validators/forensic.js +266 -0
- package/lib/autonomous/validators/osint.js +216 -0
- package/lib/autonomous/validators/privacy.js +296 -0
- package/lib/autonomous/validators/purple.js +298 -0
- package/lib/autonomous/validators/sigma.js +248 -0
- package/lib/autonomous/validators/threat-model.js +363 -0
- package/lib/benchmark/agent.js +119 -0
- package/lib/benchmark/baselines.js +43 -0
- package/lib/benchmark/builder.js +143 -0
- package/lib/benchmark/config.js +35 -0
- package/lib/benchmark/coordinator.js +91 -0
- package/lib/benchmark/index.js +20 -0
- package/lib/benchmark/llm.js +58 -0
- package/lib/benchmark/models.js +137 -0
- package/lib/benchmark/reporter.js +103 -0
- package/lib/benchmark/runner.js +103 -0
- package/lib/benchmark/sandbox.js +96 -0
- package/lib/benchmark/scorer.js +32 -0
- package/lib/benchmark/solver.js +166 -0
- package/lib/benchmark/tools.js +62 -0
- package/lib/bot/bot.js +238 -0
- package/lib/brand.js +105 -0
- package/lib/commands.js +100 -0
- package/lib/complexity.js +377 -0
- package/lib/config.js +213 -0
- package/lib/gateway/client.js +309 -0
- package/lib/gateway/commands.js +991 -0
- package/lib/gateway/config-validate.js +109 -0
- package/lib/gateway/gateway.js +367 -0
- package/lib/gateway/index.js +62 -0
- package/lib/gateway/mode.js +309 -0
- package/lib/gateway/plugins.js +222 -0
- package/lib/gateway/prompt.js +214 -0
- package/lib/mcp/server.js +262 -0
- package/lib/memory/compressor.js +425 -0
- package/lib/memory/engine.js +763 -0
- package/lib/memory/evolution.js +668 -0
- package/lib/memory/index.js +58 -0
- package/lib/memory/orchestrator.js +506 -0
- package/lib/memory/retriever.js +515 -0
- package/lib/memory/synthesizer.js +333 -0
- package/lib/pipeline/async-scanner.js +510 -0
- package/lib/pipeline/binary-analysis.js +1043 -0
- package/lib/pipeline/dom-xss-scanner.js +435 -0
- package/lib/pipeline/github-actions.js +792 -0
- package/lib/pipeline/index.js +124 -0
- package/lib/pipeline/osint.js +498 -0
- package/lib/pipeline/sarif.js +373 -0
- package/lib/pipeline/scanner.js +880 -0
- package/lib/pipeline/template-manager.js +525 -0
- package/lib/pipeline/xss-scanner.js +353 -0
- package/lib/setup-wizard.js +288 -0
- package/package.json +31 -0
|
@@ -0,0 +1,668 @@
|
|
|
1
|
+
// Copyright (c) 2026 defconxt. All rights reserved.
|
|
2
|
+
// Licensed under AGPL-3.0 — see LICENSE file for details.
|
|
3
|
+
// CIPHER is a trademark of defconxt.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* CIPHER Evolution Engine — Self-Improving Skill System
|
|
7
|
+
*
|
|
8
|
+
* Inspired by MetaClaw's RL-driven skill evolution:
|
|
9
|
+
* - PRM Scorer: Evaluates CIPHER outputs for quality (+1/-1/0)
|
|
10
|
+
* - Skill Evolver: Generates new skills from failed engagements
|
|
11
|
+
* - Failure Pipeline: Captures failures → analyzes patterns → creates skills
|
|
12
|
+
*
|
|
13
|
+
* Ported from Python memory/core/evolution.py.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { randomUUID } from 'node:crypto';
|
|
17
|
+
import { mkdirSync, writeFileSync, appendFileSync, chmodSync } from 'node:fs';
|
|
18
|
+
import { dirname, join } from 'node:path';
|
|
19
|
+
import { extractSecurityEntities } from './compressor.js';
|
|
20
|
+
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// ScoredResponse
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* A scored engagement response.
|
|
27
|
+
*/
|
|
28
|
+
class ScoredResponse {
|
|
29
|
+
constructor(opts = {}) {
|
|
30
|
+
this.responseId = opts.responseId ?? randomUUID();
|
|
31
|
+
this.query = opts.query ?? '';
|
|
32
|
+
this.response = opts.response ?? '';
|
|
33
|
+
this.mode = opts.mode ?? '';
|
|
34
|
+
this.skillUsed = opts.skillUsed ?? '';
|
|
35
|
+
this.score = opts.score ?? 0.0;
|
|
36
|
+
this.votes = opts.votes ?? [];
|
|
37
|
+
this.feedback = opts.feedback ?? '';
|
|
38
|
+
this.timestamp = opts.timestamp ?? new Date().toISOString();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// ResponseScorer
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Process Reward Model (PRM) scorer for CIPHER outputs.
|
|
48
|
+
*
|
|
49
|
+
* Evaluates response quality using:
|
|
50
|
+
* 1. Structural checks (required sections present, proper format)
|
|
51
|
+
* 2. Content checks (actionable, specific, not generic)
|
|
52
|
+
* 3. Domain checks (correct ATT&CK refs, valid CVE format, proper severity)
|
|
53
|
+
*
|
|
54
|
+
* Returns score: +1 (helpful), -1 (unhelpful), 0 (ambiguous)
|
|
55
|
+
*/
|
|
56
|
+
class ResponseScorer {
|
|
57
|
+
// Required structural elements by mode
|
|
58
|
+
static MODE_REQUIREMENTS = {
|
|
59
|
+
RED: ['ATT&CK', 'DETECTION OPPORTUNITIES'],
|
|
60
|
+
BLUE: ['Sigma', 'detection', 'rule'],
|
|
61
|
+
INCIDENT: ['Triage', 'Containment', 'Evidence'],
|
|
62
|
+
PRIVACY: ['GDPR', 'data flow', 'DPIA'],
|
|
63
|
+
RECON: ['source', 'confidence'],
|
|
64
|
+
ARCHITECT: ['threat model', 'trust boundary'],
|
|
65
|
+
PURPLE: ['detection coverage', 'gap'],
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
// Quality signals
|
|
69
|
+
static POSITIVE_SIGNALS = [
|
|
70
|
+
/CVE-\d{4}-\d{4,}/, // Specific CVE reference
|
|
71
|
+
/T\d{4}(?:\.\d{3})?/, // MITRE ATT&CK ID
|
|
72
|
+
/CIS\s+\d+\.\d+/, // CIS Control reference
|
|
73
|
+
/NIST\s+\d{3}-\d{2,3}/, // NIST SP reference
|
|
74
|
+
/```/, // Code blocks (showing work)
|
|
75
|
+
/\[CONFIRMED\]|\[INFERRED\]/, // Confidence tags
|
|
76
|
+
/Severity\s*:\s*\w+/, // Finding format
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
static NEGATIVE_SIGNALS = [
|
|
80
|
+
/I (?:can't|cannot|am unable to)/i, // Refusals
|
|
81
|
+
/as an AI/i, // Disclaimers
|
|
82
|
+
/I don't have (?:access|information)/i, // Hedging
|
|
83
|
+
/consult (?:a |your )(?:professional|expert)/i, // Deflection
|
|
84
|
+
];
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* @param {{ llmClient?: any, prmVotes?: number }} opts
|
|
88
|
+
*/
|
|
89
|
+
constructor(opts = {}) {
|
|
90
|
+
this.llmClient = opts.llmClient ?? null;
|
|
91
|
+
this.prmVotes = opts.prmVotes ?? 3;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Score a CIPHER response.
|
|
96
|
+
* Uses heuristic scoring (always available) + optional LLM judge.
|
|
97
|
+
* @param {string} query
|
|
98
|
+
* @param {string} response
|
|
99
|
+
* @param {string} mode
|
|
100
|
+
* @param {string} skillUsed
|
|
101
|
+
* @returns {ScoredResponse}
|
|
102
|
+
*/
|
|
103
|
+
score(query, response, mode = '', skillUsed = '') {
|
|
104
|
+
const votes = [];
|
|
105
|
+
|
|
106
|
+
// Vote 1: Structural quality
|
|
107
|
+
votes.push(this._scoreStructural(response, mode));
|
|
108
|
+
|
|
109
|
+
// Vote 2: Content quality
|
|
110
|
+
votes.push(this._scoreContent(response, query));
|
|
111
|
+
|
|
112
|
+
// Vote 3: Domain accuracy
|
|
113
|
+
votes.push(this._scoreDomain(response, mode));
|
|
114
|
+
|
|
115
|
+
// Majority vote
|
|
116
|
+
const final = ResponseScorer._majorityVote(votes);
|
|
117
|
+
|
|
118
|
+
return new ScoredResponse({
|
|
119
|
+
query,
|
|
120
|
+
response,
|
|
121
|
+
mode,
|
|
122
|
+
skillUsed,
|
|
123
|
+
score: final,
|
|
124
|
+
votes,
|
|
125
|
+
feedback: this._generateFeedback(votes, response, mode),
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Check if response has required structural elements.
|
|
131
|
+
* @private
|
|
132
|
+
*/
|
|
133
|
+
_scoreStructural(response, mode) {
|
|
134
|
+
if (!mode || !(mode in ResponseScorer.MODE_REQUIREMENTS)) {
|
|
135
|
+
// General structural check
|
|
136
|
+
const hasStructure = ['```', '##', '- ', '1.', '|'].some(
|
|
137
|
+
(marker) => response.includes(marker),
|
|
138
|
+
);
|
|
139
|
+
return hasStructure && response.length > 100 ? 1.0 : 0.0;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const requirements = ResponseScorer.MODE_REQUIREMENTS[mode];
|
|
143
|
+
let matched = 0;
|
|
144
|
+
for (const req of requirements) {
|
|
145
|
+
if (response.toLowerCase().includes(req.toLowerCase())) matched++;
|
|
146
|
+
}
|
|
147
|
+
const ratio = matched / requirements.length;
|
|
148
|
+
|
|
149
|
+
if (ratio >= 0.7) return 1.0;
|
|
150
|
+
if (ratio >= 0.3) return 0.0;
|
|
151
|
+
return -1.0;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Check if response is actionable and specific.
|
|
156
|
+
* @private
|
|
157
|
+
*/
|
|
158
|
+
_scoreContent(response, query) {
|
|
159
|
+
let score = 0.0;
|
|
160
|
+
|
|
161
|
+
// Positive signals
|
|
162
|
+
for (const pattern of ResponseScorer.POSITIVE_SIGNALS) {
|
|
163
|
+
if (pattern.test(response)) score += 0.3;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Negative signals
|
|
167
|
+
for (const pattern of ResponseScorer.NEGATIVE_SIGNALS) {
|
|
168
|
+
if (pattern.test(response)) score -= 0.5;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Length check
|
|
172
|
+
if (response.length < 50) {
|
|
173
|
+
score -= 0.5;
|
|
174
|
+
} else if (response.length > 200) {
|
|
175
|
+
score += 0.1;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Specificity: contains concrete values
|
|
179
|
+
const entities = extractSecurityEntities(response);
|
|
180
|
+
const entityCount = Object.values(entities).reduce(
|
|
181
|
+
(sum, arr) => sum + arr.length,
|
|
182
|
+
0,
|
|
183
|
+
);
|
|
184
|
+
if (entityCount > 0) score += 0.2;
|
|
185
|
+
|
|
186
|
+
if (score > 0.3) return 1.0;
|
|
187
|
+
if (score < -0.3) return -1.0;
|
|
188
|
+
return 0.0;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Check domain-specific accuracy.
|
|
193
|
+
* @private
|
|
194
|
+
*/
|
|
195
|
+
_scoreDomain(response, mode) {
|
|
196
|
+
if (!mode) return 0.0;
|
|
197
|
+
|
|
198
|
+
let score = 0.0;
|
|
199
|
+
|
|
200
|
+
// Check for valid ATT&CK references
|
|
201
|
+
const mitreRefs = response.match(/T\d{4}(?:\.\d{3})?/g) || [];
|
|
202
|
+
if (mitreRefs.length > 0) {
|
|
203
|
+
const valid = mitreRefs.every((ref) => {
|
|
204
|
+
const numMatch = ref.match(/(\d{4})/);
|
|
205
|
+
if (!numMatch) return false;
|
|
206
|
+
const num = parseInt(numMatch[1], 10);
|
|
207
|
+
return num >= 1001 && num <= 1999;
|
|
208
|
+
});
|
|
209
|
+
score += valid ? 0.3 : -0.2;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Check for valid CVE format
|
|
213
|
+
const cves = [...response.matchAll(/CVE-(\d{4})-(\d+)/g)];
|
|
214
|
+
if (cves.length > 0) {
|
|
215
|
+
const valid = cves.every(([, year]) => {
|
|
216
|
+
const y = parseInt(year, 10);
|
|
217
|
+
return y >= 1999 && y <= 2030;
|
|
218
|
+
});
|
|
219
|
+
score += valid ? 0.2 : -0.3;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// Check Sigma rule format (if BLUE mode)
|
|
223
|
+
if (mode === 'BLUE' && response.toLowerCase().includes('detection:')) {
|
|
224
|
+
const hasTitle = /title:/i.test(response);
|
|
225
|
+
const hasLogsource = /logsource:/i.test(response);
|
|
226
|
+
const hasCondition = /condition:/i.test(response);
|
|
227
|
+
if (hasTitle && hasLogsource && hasCondition) {
|
|
228
|
+
score += 0.5;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (score > 0.2) return 1.0;
|
|
233
|
+
if (score < -0.2) return -1.0;
|
|
234
|
+
return 0.0;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Majority vote from multiple scores.
|
|
239
|
+
* @param {number[]} votes
|
|
240
|
+
* @returns {number}
|
|
241
|
+
*/
|
|
242
|
+
static _majorityVote(votes) {
|
|
243
|
+
if (!votes || votes.length === 0) return 0.0;
|
|
244
|
+
|
|
245
|
+
// Count occurrences
|
|
246
|
+
const counter = new Map();
|
|
247
|
+
for (const v of votes) {
|
|
248
|
+
counter.set(v, (counter.get(v) ?? 0) + 1);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// Find most common
|
|
252
|
+
let topValue = 0.0;
|
|
253
|
+
let topCount = 0;
|
|
254
|
+
for (const [value, count] of counter) {
|
|
255
|
+
if (count > topCount) {
|
|
256
|
+
topCount = count;
|
|
257
|
+
topValue = value;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// If tied, return 0.0 (ambiguous)
|
|
262
|
+
let tiedCount = 0;
|
|
263
|
+
for (const count of counter.values()) {
|
|
264
|
+
if (count === topCount) tiedCount++;
|
|
265
|
+
}
|
|
266
|
+
if (tiedCount > 1) return 0.0;
|
|
267
|
+
|
|
268
|
+
return topValue;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Generate human-readable feedback from scoring.
|
|
273
|
+
* @private
|
|
274
|
+
*/
|
|
275
|
+
_generateFeedback(votes, response, mode) {
|
|
276
|
+
const issues = [];
|
|
277
|
+
|
|
278
|
+
if (votes[0] < 0) {
|
|
279
|
+
issues.push(`Missing required structural elements for ${mode} mode`);
|
|
280
|
+
}
|
|
281
|
+
if (votes[1] < 0) {
|
|
282
|
+
issues.push('Response lacks specificity or contains deflections');
|
|
283
|
+
}
|
|
284
|
+
if (votes.length > 2 && votes[2] < 0) {
|
|
285
|
+
issues.push('Domain-specific references may be inaccurate');
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
if (issues.length === 0) return 'Response meets quality standards';
|
|
289
|
+
return issues.join('; ');
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// ---------------------------------------------------------------------------
|
|
294
|
+
// EvolutionRecord
|
|
295
|
+
// ---------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
class EvolutionRecord {
|
|
298
|
+
constructor(opts = {}) {
|
|
299
|
+
this.timestamp = opts.timestamp ?? '';
|
|
300
|
+
this.failuresAnalyzed = opts.failuresAnalyzed ?? 0;
|
|
301
|
+
this.skillsGenerated = opts.skillsGenerated ?? 0;
|
|
302
|
+
this.skillNames = opts.skillNames ?? [];
|
|
303
|
+
this.failurePatterns = opts.failurePatterns ?? [];
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// ---------------------------------------------------------------------------
|
|
308
|
+
// SkillEvolver
|
|
309
|
+
// ---------------------------------------------------------------------------
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Generates new CIPHER skills from failed engagement patterns.
|
|
313
|
+
*
|
|
314
|
+
* When CIPHER fails a task (score <= 0), the Evolver:
|
|
315
|
+
* 1. Collects failed responses
|
|
316
|
+
* 2. Analyzes failure patterns
|
|
317
|
+
* 3. Generates new skill files to handle similar cases
|
|
318
|
+
* 4. Writes to skills directory for immediate use
|
|
319
|
+
*/
|
|
320
|
+
class SkillEvolver {
|
|
321
|
+
// Common failure patterns → skill templates
|
|
322
|
+
static FAILURE_TEMPLATES = {
|
|
323
|
+
missing_detection_rule: {
|
|
324
|
+
name: 'writing-detection-rules-for-{technique}',
|
|
325
|
+
domain: 'detection-engineering',
|
|
326
|
+
description: 'Generate Sigma/KQL/SPL detection rules for {technique}',
|
|
327
|
+
template: 'detection',
|
|
328
|
+
},
|
|
329
|
+
incomplete_finding: {
|
|
330
|
+
name: 'complete-vulnerability-assessment-{target_type}',
|
|
331
|
+
domain: 'vulnerability-management',
|
|
332
|
+
description: 'Thorough vulnerability assessment with proof-of-concept for {target_type}',
|
|
333
|
+
template: 'finding',
|
|
334
|
+
},
|
|
335
|
+
missing_mitre_mapping: {
|
|
336
|
+
name: 'mapping-{technique}-to-attack',
|
|
337
|
+
domain: 'purple-team',
|
|
338
|
+
description: 'Map observed technique to MITRE ATT&CK framework with detection',
|
|
339
|
+
template: 'mapping',
|
|
340
|
+
},
|
|
341
|
+
weak_remediation: {
|
|
342
|
+
name: 'remediation-guidance-{vuln_class}',
|
|
343
|
+
domain: 'vulnerability-management',
|
|
344
|
+
description: 'Specific remediation steps with verification for {vuln_class}',
|
|
345
|
+
template: 'remediation',
|
|
346
|
+
},
|
|
347
|
+
};
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* @param {{ skillsDir: string, llmClient?: any, maxNewSkills?: number, historyPath?: string }} opts
|
|
351
|
+
*/
|
|
352
|
+
constructor(opts = {}) {
|
|
353
|
+
this.skillsDir = opts.skillsDir ?? '';
|
|
354
|
+
this.llmClient = opts.llmClient ?? null;
|
|
355
|
+
this.maxNewSkills = opts.maxNewSkills ?? 3;
|
|
356
|
+
this.historyPath = opts.historyPath ?? null;
|
|
357
|
+
/** @type {EvolutionRecord[]} */
|
|
358
|
+
this.evolutionHistory = [];
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Check if failure rate warrants skill evolution.
|
|
363
|
+
* @param {ScoredResponse[]} scoredResponses
|
|
364
|
+
* @param {number} threshold
|
|
365
|
+
* @returns {boolean}
|
|
366
|
+
*/
|
|
367
|
+
shouldEvolve(scoredResponses, threshold = 0.4) {
|
|
368
|
+
if (!scoredResponses || scoredResponses.length === 0) return false;
|
|
369
|
+
const successes = scoredResponses.filter((s) => s.score > 0).length;
|
|
370
|
+
const rate = successes / scoredResponses.length;
|
|
371
|
+
return rate < threshold;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Analyze failures and generate new skills.
|
|
376
|
+
* @param {ScoredResponse[]} failedResponses
|
|
377
|
+
* @param {boolean} dryRun — If true, don't write files
|
|
378
|
+
* @returns {object[]} Generated skill metadata
|
|
379
|
+
*/
|
|
380
|
+
evolve(failedResponses, dryRun = false) {
|
|
381
|
+
if (!failedResponses || failedResponses.length === 0) return [];
|
|
382
|
+
|
|
383
|
+
// Analyze failure patterns
|
|
384
|
+
const patterns = this._analyzeFailures(failedResponses);
|
|
385
|
+
|
|
386
|
+
// Generate skills for each pattern
|
|
387
|
+
const generated = [];
|
|
388
|
+
for (const [pattern, context] of patterns.slice(0, this.maxNewSkills)) {
|
|
389
|
+
const skillMeta = this._generateSkill(pattern, context);
|
|
390
|
+
if (skillMeta && !dryRun) {
|
|
391
|
+
this._writeSkill(skillMeta);
|
|
392
|
+
}
|
|
393
|
+
if (skillMeta) generated.push(skillMeta);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Record evolution
|
|
397
|
+
const record = new EvolutionRecord({
|
|
398
|
+
timestamp: new Date().toISOString(),
|
|
399
|
+
failuresAnalyzed: failedResponses.length,
|
|
400
|
+
skillsGenerated: generated.length,
|
|
401
|
+
skillNames: generated.map((s) => s.name || ''),
|
|
402
|
+
failurePatterns: patterns.map(([p]) => p),
|
|
403
|
+
});
|
|
404
|
+
this.evolutionHistory.push(record);
|
|
405
|
+
|
|
406
|
+
if (this.historyPath) {
|
|
407
|
+
this._persistHistory(record);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return generated;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Identify failure patterns from scored responses.
|
|
415
|
+
* @private
|
|
416
|
+
*/
|
|
417
|
+
_analyzeFailures(responses) {
|
|
418
|
+
const patterns = [];
|
|
419
|
+
|
|
420
|
+
for (const resp of responses) {
|
|
421
|
+
const context = {
|
|
422
|
+
query: resp.query.slice(0, 200),
|
|
423
|
+
mode: resp.mode,
|
|
424
|
+
feedback: resp.feedback,
|
|
425
|
+
skill_used: resp.skillUsed,
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
// Pattern detection
|
|
429
|
+
if (resp.mode === 'BLUE' && resp.feedback.toLowerCase().includes('detection')) {
|
|
430
|
+
patterns.push(['missing_detection_rule', context]);
|
|
431
|
+
} else if (resp.feedback.toLowerCase().includes('specificity')) {
|
|
432
|
+
patterns.push(['incomplete_finding', context]);
|
|
433
|
+
} else if (resp.feedback.toLowerCase().includes('structural') && resp.mode) {
|
|
434
|
+
patterns.push(['missing_mitre_mapping', context]);
|
|
435
|
+
} else {
|
|
436
|
+
patterns.push(['weak_remediation', context]);
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Deduplicate patterns
|
|
441
|
+
const seen = new Set();
|
|
442
|
+
const unique = [];
|
|
443
|
+
for (const [pattern, ctx] of patterns) {
|
|
444
|
+
const key = `${pattern}:${ctx.mode || ''}`;
|
|
445
|
+
if (!seen.has(key)) {
|
|
446
|
+
seen.add(key);
|
|
447
|
+
unique.push([pattern, ctx]);
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
return unique;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
/**
|
|
455
|
+
* Generate a skill from a failure pattern.
|
|
456
|
+
* @private
|
|
457
|
+
*/
|
|
458
|
+
_generateSkill(pattern, context) {
|
|
459
|
+
const template = SkillEvolver.FAILURE_TEMPLATES[pattern];
|
|
460
|
+
if (!template) return null;
|
|
461
|
+
|
|
462
|
+
const technique = this._extractTechnique(context);
|
|
463
|
+
const name = template.name
|
|
464
|
+
.replace('{technique}', technique)
|
|
465
|
+
.replace('{target_type}', technique)
|
|
466
|
+
.replace('{vuln_class}', technique);
|
|
467
|
+
const description = template.description
|
|
468
|
+
.replace('{technique}', technique)
|
|
469
|
+
.replace('{target_type}', technique)
|
|
470
|
+
.replace('{vuln_class}', technique);
|
|
471
|
+
|
|
472
|
+
return {
|
|
473
|
+
name,
|
|
474
|
+
domain: template.domain,
|
|
475
|
+
description,
|
|
476
|
+
mode: context.mode || '',
|
|
477
|
+
generated_from: pattern,
|
|
478
|
+
source_query: context.query || '',
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Extract technique name from failure context.
|
|
484
|
+
* @private
|
|
485
|
+
*/
|
|
486
|
+
_extractTechnique(context) {
|
|
487
|
+
const query = context.query || '';
|
|
488
|
+
|
|
489
|
+
// Try to find MITRE technique
|
|
490
|
+
const mitre = query.match(/T\d{4}(?:\.\d{3})?/g);
|
|
491
|
+
if (mitre) return mitre[0].toLowerCase();
|
|
492
|
+
|
|
493
|
+
// Try to find CVE
|
|
494
|
+
const cves = query.match(/CVE-\d{4}-\d+/g);
|
|
495
|
+
if (cves) return cves[0].toLowerCase();
|
|
496
|
+
|
|
497
|
+
// Fall back to mode-specific default
|
|
498
|
+
const modeDefaults = {
|
|
499
|
+
RED: 'attack-technique',
|
|
500
|
+
BLUE: 'detection-gap',
|
|
501
|
+
INCIDENT: 'incident-pattern',
|
|
502
|
+
PRIVACY: 'data-exposure',
|
|
503
|
+
};
|
|
504
|
+
return modeDefaults[context.mode] || 'security-pattern';
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Write generated skill to filesystem.
|
|
509
|
+
* @private
|
|
510
|
+
*/
|
|
511
|
+
_writeSkill(meta) {
|
|
512
|
+
const name = meta.name;
|
|
513
|
+
const domain = meta.domain;
|
|
514
|
+
const skillDir = join(this.skillsDir, domain, 'techniques', name);
|
|
515
|
+
mkdirSync(skillDir, { recursive: true });
|
|
516
|
+
|
|
517
|
+
// SKILL.md
|
|
518
|
+
const skillMd = `<!-- Copyright (c) 2026 defconxt. All rights reserved. -->
|
|
519
|
+
<!-- Licensed under AGPL-3.0 — see LICENSE file for details. -->
|
|
520
|
+
<!-- CIPHER is a trademark of defconxt. -->
|
|
521
|
+
|
|
522
|
+
---
|
|
523
|
+
name: ${name}
|
|
524
|
+
description: ${meta.description}
|
|
525
|
+
domain: cybersecurity
|
|
526
|
+
subdomain: ${domain}
|
|
527
|
+
tags:
|
|
528
|
+
- auto-generated
|
|
529
|
+
- evolution-engine
|
|
530
|
+
version: "1.0"
|
|
531
|
+
---
|
|
532
|
+
|
|
533
|
+
# ${name.replace(/-/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase())}
|
|
534
|
+
|
|
535
|
+
> Auto-generated by CIPHER Evolution Engine from engagement failure analysis.
|
|
536
|
+
> Pattern: ${meta.generated_from || 'unknown'}
|
|
537
|
+
> Source: ${(meta.source_query || 'N/A').slice(0, 100)}
|
|
538
|
+
|
|
539
|
+
## Quick Reference
|
|
540
|
+
|
|
541
|
+
\`\`\`bash
|
|
542
|
+
node scripts/agent.js analyze --target <scope>
|
|
543
|
+
\`\`\`
|
|
544
|
+
|
|
545
|
+
## Workflow
|
|
546
|
+
|
|
547
|
+
1. **Identify** — Recognize the pattern: ${meta.description}
|
|
548
|
+
2. **Analyze** — Gather context and assess scope
|
|
549
|
+
3. **Execute** — Apply technique with proper tooling
|
|
550
|
+
4. **Verify** — Confirm results and document findings
|
|
551
|
+
5. **Report** — Generate structured output with evidence
|
|
552
|
+
|
|
553
|
+
## Verification
|
|
554
|
+
|
|
555
|
+
- [ ] Output matches expected format
|
|
556
|
+
- [ ] All references are valid (CVE, MITRE ATT&CK)
|
|
557
|
+
- [ ] Remediation steps are actionable
|
|
558
|
+
- [ ] Detection opportunities documented
|
|
559
|
+
|
|
560
|
+
## References
|
|
561
|
+
|
|
562
|
+
- MITRE ATT&CK Framework
|
|
563
|
+
- NIST Cybersecurity Framework
|
|
564
|
+
- OWASP Testing Guide
|
|
565
|
+
`;
|
|
566
|
+
writeFileSync(join(skillDir, 'SKILL.md'), skillMd);
|
|
567
|
+
|
|
568
|
+
// scripts/agent.js
|
|
569
|
+
const scriptsDir = join(skillDir, 'scripts');
|
|
570
|
+
mkdirSync(scriptsDir, { recursive: true });
|
|
571
|
+
const agentJs = `#!/usr/bin/env node
|
|
572
|
+
/**
|
|
573
|
+
* Agent script for ${name} — auto-generated by CIPHER Evolution Engine.
|
|
574
|
+
*/
|
|
575
|
+
|
|
576
|
+
import { parseArgs } from 'node:util';
|
|
577
|
+
|
|
578
|
+
const { values } = parseArgs({
|
|
579
|
+
options: {
|
|
580
|
+
target: { type: 'string', short: 't' },
|
|
581
|
+
command: { type: 'string', short: 'c', default: 'analyze' },
|
|
582
|
+
},
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
if (values.command === 'analyze') {
|
|
586
|
+
const result = {
|
|
587
|
+
skill: '${name}',
|
|
588
|
+
domain: '${domain}',
|
|
589
|
+
target: values.target || '',
|
|
590
|
+
status: 'analyzed',
|
|
591
|
+
auto_generated: true,
|
|
592
|
+
findings: [],
|
|
593
|
+
recommendations: [],
|
|
594
|
+
};
|
|
595
|
+
console.log(JSON.stringify(result, null, 2));
|
|
596
|
+
} else {
|
|
597
|
+
console.error('Usage: agent.js --command analyze --target <scope>');
|
|
598
|
+
process.exit(1);
|
|
599
|
+
}
|
|
600
|
+
`;
|
|
601
|
+
writeFileSync(join(scriptsDir, 'agent.js'), agentJs);
|
|
602
|
+
chmodSync(join(scriptsDir, 'agent.js'), 0o755);
|
|
603
|
+
|
|
604
|
+
// references/api-reference.md
|
|
605
|
+
const refsDir = join(skillDir, 'references');
|
|
606
|
+
mkdirSync(refsDir, { recursive: true });
|
|
607
|
+
const refMd = `<!-- Copyright (c) 2026 defconxt. All rights reserved. -->
|
|
608
|
+
<!-- Licensed under AGPL-3.0 — see LICENSE file for details. -->
|
|
609
|
+
<!-- CIPHER is a trademark of defconxt. -->
|
|
610
|
+
|
|
611
|
+
# API Reference — ${name}
|
|
612
|
+
|
|
613
|
+
| Command | Description |
|
|
614
|
+
|---------|-------------|
|
|
615
|
+
| \`analyze --target <scope>\` | Run analysis on target |
|
|
616
|
+
|
|
617
|
+
## Auto-Generated
|
|
618
|
+
|
|
619
|
+
This skill was generated by the CIPHER Evolution Engine.
|
|
620
|
+
Pattern: ${meta.generated_from || 'unknown'}
|
|
621
|
+
`;
|
|
622
|
+
writeFileSync(join(refsDir, 'api-reference.md'), refMd);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* Persist evolution history to JSONL file.
|
|
627
|
+
* @private
|
|
628
|
+
*/
|
|
629
|
+
_persistHistory(record) {
|
|
630
|
+
if (!this.historyPath) return;
|
|
631
|
+
try {
|
|
632
|
+
mkdirSync(dirname(this.historyPath), { recursive: true });
|
|
633
|
+
const line = JSON.stringify({
|
|
634
|
+
timestamp: record.timestamp,
|
|
635
|
+
failures_analyzed: record.failuresAnalyzed,
|
|
636
|
+
skills_generated: record.skillsGenerated,
|
|
637
|
+
skill_names: record.skillNames,
|
|
638
|
+
failure_patterns: record.failurePatterns,
|
|
639
|
+
}) + '\n';
|
|
640
|
+
appendFileSync(this.historyPath, line, 'utf-8');
|
|
641
|
+
} catch (e) {
|
|
642
|
+
// Non-fatal: log and continue
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Get evolution engine summary.
|
|
648
|
+
* @returns {object}
|
|
649
|
+
*/
|
|
650
|
+
getSummary() {
|
|
651
|
+
return {
|
|
652
|
+
total_evolutions: this.evolutionHistory.length,
|
|
653
|
+
total_skills_generated: this.evolutionHistory.reduce(
|
|
654
|
+
(sum, r) => sum + r.skillsGenerated,
|
|
655
|
+
0,
|
|
656
|
+
),
|
|
657
|
+
all_skill_names: this.evolutionHistory.flatMap((r) => r.skillNames),
|
|
658
|
+
failure_patterns: this.evolutionHistory.flatMap((r) => r.failurePatterns),
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
export {
|
|
664
|
+
ScoredResponse,
|
|
665
|
+
ResponseScorer,
|
|
666
|
+
EvolutionRecord,
|
|
667
|
+
SkillEvolver,
|
|
668
|
+
};
|