sicario-red-team 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src-cli/commands/hit.js +4 -3
- package/src-cli/nodes/breacher.js +0 -44
- package/src-cli/nodes/critic.js +187 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sicario-red-team",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.4",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous Agentic Red-Teaming Swarm Protocol",
|
|
6
6
|
"repository": {
|
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"convex": "^1.10.0",
|
|
44
44
|
"dotenv": "^17.3.1",
|
|
45
45
|
"framer-motion": "^12.38.0",
|
|
46
|
+
"ini": "^6.0.0",
|
|
46
47
|
"jspdf": "^4.2.1",
|
|
47
48
|
"jspdf-autotable": "^5.0.7",
|
|
48
49
|
"lucide-react": "^0.363.0",
|
|
@@ -64,7 +65,6 @@
|
|
|
64
65
|
"eslint": "^8.57.0",
|
|
65
66
|
"eslint-plugin-react-hooks": "^4.6.0",
|
|
66
67
|
"eslint-plugin-react-refresh": "^0.4.6",
|
|
67
|
-
"ini": "^6.0.0",
|
|
68
68
|
"postcss": "^8.4.38",
|
|
69
69
|
"tailwindcss": "^3.4.3",
|
|
70
70
|
"typescript": "^5.2.2",
|
package/src-cli/commands/hit.js
CHANGED
|
@@ -6,14 +6,15 @@ import pc from 'picocolors';
|
|
|
6
6
|
import 'dotenv/config';
|
|
7
7
|
import { ConvexClient } from 'convex/browser';
|
|
8
8
|
import { runScout } from '../nodes/scout.js';
|
|
9
|
-
import { runBreacher
|
|
9
|
+
import { runBreacher } from '../nodes/breacher.js';
|
|
10
|
+
import { CriticNode } from '../nodes/critic.js';
|
|
10
11
|
import { theme } from '../utils/theme.js';
|
|
11
12
|
|
|
12
13
|
// Initialize Convex Client (will use CONVEX_URL from .env)
|
|
13
14
|
const client = process.env.CONVEX_URL ? new ConvexClient(process.env.CONVEX_URL) : null;
|
|
14
15
|
|
|
15
16
|
export async function hitCommand(target, options) {
|
|
16
|
-
|
|
17
|
+
const critic = new CriticNode();
|
|
17
18
|
let finalTarget = target;
|
|
18
19
|
if (!finalTarget) {
|
|
19
20
|
finalTarget = await text({
|
|
@@ -113,7 +114,7 @@ export async function hitCommand(target, options) {
|
|
|
113
114
|
for (const breachReport of breachReports) {
|
|
114
115
|
// 5.1 [Critic] Verification (Internal Affairs)
|
|
115
116
|
log.step(`[Critic] : Verifying ${breachReport.title}...`);
|
|
116
|
-
const audit = await
|
|
117
|
+
const audit = await critic.crossExamine(breachReport, elements);
|
|
117
118
|
const isReal = audit.isReal;
|
|
118
119
|
|
|
119
120
|
if (!isReal) {
|
|
@@ -77,47 +77,3 @@ Return a VALID JSON object. DO NOT use "null".
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
|
|
80
|
-
/**
|
|
81
|
-
* Critic Node: Acts as 'Internal Affairs' to verify or debunk findings.
|
|
82
|
-
*/
|
|
83
|
-
export async function runCritic(finding, elements) {
|
|
84
|
-
const client = new Cerebras({
|
|
85
|
-
apiKey: process.env.CEREBRAS_API_KEY,
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
const systemPrompt = `
|
|
89
|
-
You are the "Internal Affairs" auditor for the Sicario Swarm.
|
|
90
|
-
Your goal is to mercilessly debunk high-value findings from the Breacher nodes to ensure zero-false-positive reports.
|
|
91
|
-
|
|
92
|
-
ADVERSARIAL AUDIT PROTOCOL:
|
|
93
|
-
1. BUG vs. EXPLOIT: Is it just a "best practice" violation (e.g. missing label) or a real technical path to catastrophe? Debunk the former.
|
|
94
|
-
2. FRAMEWORK GUARDS: Modern frameworks (React, Next.js, Django) auto-sanitize many vectors. Does this finding survive server-side auto-escaping?
|
|
95
|
-
3. SKEPTICISM: Treat every finding as a hallucination until you can map a specific, realistic user-flow to exploitation.
|
|
96
|
-
4. IDOR/STATE FOCUS: Prioritize Insecure Direct Object References and state-bypass. Be ruthless with generic "Input Validation" reports.
|
|
97
|
-
|
|
98
|
-
RETURN VALID JSON:
|
|
99
|
-
{
|
|
100
|
-
"isReal": boolean,
|
|
101
|
-
"confidence": "0-100",
|
|
102
|
-
"reasoning": "Technical justification for your skepticism.",
|
|
103
|
-
"enhancedMitigation": "Granular, code-level fix if real."
|
|
104
|
-
}
|
|
105
|
-
`;
|
|
106
|
-
|
|
107
|
-
const userPrompt = `Finding: ${JSON.stringify(finding)}\nContext: ${JSON.stringify(elements)}`;
|
|
108
|
-
|
|
109
|
-
try {
|
|
110
|
-
const completion = await client.chat.completions.create({
|
|
111
|
-
messages: [
|
|
112
|
-
{ role: 'system', content: systemPrompt },
|
|
113
|
-
{ role: 'user', content: userPrompt }
|
|
114
|
-
],
|
|
115
|
-
model: 'llama3.1-8b',
|
|
116
|
-
response_format: { type: 'json_object' }
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
return JSON.parse(completion.choices[0].message.content);
|
|
120
|
-
} catch (error) {
|
|
121
|
-
return { isReal: true, confidence: "50", reasoning: "Audit failed." }; // Fail-safe
|
|
122
|
-
}
|
|
123
|
-
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import Cerebras from '@cerebras/cerebras_cloud_sdk';
|
|
2
|
+
import 'dotenv/config';
|
|
3
|
+
import pc from 'picocolors';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* CriticNode: A multi-tier, rate-limited validation engine for security findings.
|
|
7
|
+
* Designed for Cerebras Inference Cloud Free Tier limits:
|
|
8
|
+
* - 30 Requests Per Minute (RPM)
|
|
9
|
+
* - 60,000 Tokens Per Minute (TPM) on Llama 3.1 8B
|
|
10
|
+
* - 30,000 Tokens Per Minute (TPM) on Qwen 235B
|
|
11
|
+
*/
|
|
12
|
+
export class CriticNode {
|
|
13
|
+
constructor(apiKey) {
|
|
14
|
+
this.ai = new Cerebras({
|
|
15
|
+
apiKey: apiKey || process.env.CEREBRAS_API_KEY
|
|
16
|
+
});
|
|
17
|
+
this.llamaModel = 'llama3.1-8b';
|
|
18
|
+
this.qwenModel = 'qwen-3-235b-a22b-instruct-2507';
|
|
19
|
+
|
|
20
|
+
// Throttle to respect the 30 Requests/Min limit (~1 request every 2s)
|
|
21
|
+
this.lastRequestTime = 0;
|
|
22
|
+
this.minDelayMs = 2100; // Buffer for networking and multi-thread safety
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Internal throttler to prevent 429 errors from Cerebras API.
|
|
27
|
+
*/
|
|
28
|
+
async _throttle() {
|
|
29
|
+
const now = Date.now();
|
|
30
|
+
const timeSinceLast = now - this.lastRequestTime;
|
|
31
|
+
if (timeSinceLast < this.minDelayMs) {
|
|
32
|
+
await new Promise(resolve => setTimeout(resolve, this.minDelayMs - timeSinceLast));
|
|
33
|
+
}
|
|
34
|
+
this.lastRequestTime = Date.now();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Tier 0: Local Heuristics & RegEx
|
|
39
|
+
* Eliminates obvious junk without hitting the API.
|
|
40
|
+
*/
|
|
41
|
+
_tierZeroFilter(finding) {
|
|
42
|
+
// Basic status code filtering
|
|
43
|
+
const ignoredCodes = [404, 400, 429];
|
|
44
|
+
if (finding.statusCode && ignoredCodes.includes(finding.statusCode)) {
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Mapping Sicario fields
|
|
49
|
+
const title = (finding.title || '').toUpperCase();
|
|
50
|
+
const thought = (finding.thoughtProcess || finding.body || '').toLowerCase();
|
|
51
|
+
const vector = (finding.vector || '').toLowerCase();
|
|
52
|
+
|
|
53
|
+
// False Positive: Safely encoded XSS
|
|
54
|
+
if (title.includes('XSS') && (thought.includes('<') || vector.includes('<'))) {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// False Positive: Generic Label/Alt tag missing reports
|
|
59
|
+
if (title.includes('ACCESSIBILITY') || title.includes('LABEL MISSING')) {
|
|
60
|
+
return false;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return true; // Passed Tier 0
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Tier 1: Llama 8B Triage (High Speed, Minimal Context)
|
|
68
|
+
* Quickly determines if a finding is a hallucination.
|
|
69
|
+
*/
|
|
70
|
+
async _tierOneTriage(finding) {
|
|
71
|
+
await this._throttle();
|
|
72
|
+
|
|
73
|
+
// Strip payload to save on 60k TPM limit
|
|
74
|
+
const bodyContext = (finding.thoughtProcess || finding.body || '').substring(0, 800);
|
|
75
|
+
|
|
76
|
+
const prompt = `
|
|
77
|
+
You are a strict security analyst. Review this potential vulnerability finding:
|
|
78
|
+
Type: ${finding.title}
|
|
79
|
+
Vector: ${finding.vector}
|
|
80
|
+
Reasoning: ${bodyContext}
|
|
81
|
+
|
|
82
|
+
Is this explicitly a False Positive or AI Hallucination?
|
|
83
|
+
Reply ONLY with "YES" (if junk) or "NO" (if it deserves deep analysis).
|
|
84
|
+
`;
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
const response = await this.ai.chat.completions.create({
|
|
88
|
+
model: this.llamaModel,
|
|
89
|
+
messages: [{ role: 'user', content: prompt }],
|
|
90
|
+
temperature: 0.1,
|
|
91
|
+
max_tokens: 5
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
const answer = response.choices[0].message.content.trim().toUpperCase();
|
|
95
|
+
return !answer.includes('YES');
|
|
96
|
+
} catch (err) {
|
|
97
|
+
console.warn(pc.yellow(` [!] Llama Triage failed. Escalating to deep review...`));
|
|
98
|
+
return true; // Fail open to deep analysis
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Tier 2: Qwen 235B Deep Analysis (Lead Critic Node)
|
|
104
|
+
* Final judgment point for survivnig findings.
|
|
105
|
+
*/
|
|
106
|
+
async _tierTwoJudgment(finding, context) {
|
|
107
|
+
await this._throttle();
|
|
108
|
+
|
|
109
|
+
// Limit DOM context to stay within 30k TPM quota window effectively
|
|
110
|
+
const domSummary = JSON.stringify(context).substring(0, 4000);
|
|
111
|
+
|
|
112
|
+
const prompt = `
|
|
113
|
+
You are the Lead Critic Node of the Sicario Red-Teaming Swarm.
|
|
114
|
+
Analyze this potential business logic vulnerability finding:
|
|
115
|
+
|
|
116
|
+
TITLE: ${finding.title}
|
|
117
|
+
VECTOR: ${finding.vector}
|
|
118
|
+
BREACHER REASONING: ${finding.thoughtProcess}
|
|
119
|
+
DOM CONTEXT: ${domSummary}
|
|
120
|
+
|
|
121
|
+
CRITERIA:
|
|
122
|
+
1. Can this bypass authentication or financial logic?
|
|
123
|
+
2. Is it a real техническая vulnerability or just a best-practice bug?
|
|
124
|
+
|
|
125
|
+
VERDICT: Provide a 1-sentence technical explanation, followed by "VERDICT: EXPLOITABLE" or "VERDICT: SAFE".
|
|
126
|
+
`;
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
const response = await this.ai.chat.completions.create({
|
|
130
|
+
model: this.qwenModel,
|
|
131
|
+
messages: [{ role: 'user', content: prompt }],
|
|
132
|
+
temperature: 0.2
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
return response.choices[0].message.content;
|
|
136
|
+
} catch (err) {
|
|
137
|
+
console.error(pc.red(` [x] Critic Core (Qwen 235B) Offline: Rate Limit or Quota reached.`));
|
|
138
|
+
return "VERDICT: MANUAL_REVIEW_REQUIRED";
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* crossExamine: The main verification pipeline.
|
|
144
|
+
*/
|
|
145
|
+
async crossExamine(finding, context) {
|
|
146
|
+
// Tier 0
|
|
147
|
+
if (!this._tierZeroFilter(finding)) {
|
|
148
|
+
return { isReal: false, reasoning: "Killed by Tier 0 (Local Heuristics)" };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Tier 1
|
|
152
|
+
const survivesTriage = await this._tierOneTriage(finding);
|
|
153
|
+
if (!survivesTriage) {
|
|
154
|
+
return { isReal: false, reasoning: "Killed by Tier 1 (Llama 8B - High Skepticism)" };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
console.log(pc.yellow(` [!] Finding flagged for Deep Analysis (Qwen 235B Critic)...`));
|
|
158
|
+
|
|
159
|
+
// Tier 2
|
|
160
|
+
const finalVerdict = await this._tierTwoJudgment(finding, context);
|
|
161
|
+
|
|
162
|
+
if (finalVerdict.includes("EXPLOITABLE")) {
|
|
163
|
+
return {
|
|
164
|
+
isReal: true,
|
|
165
|
+
reasoning: finalVerdict,
|
|
166
|
+
enhancedMitigation: "Implement server-side state validation and cryptographic integrity checks."
|
|
167
|
+
};
|
|
168
|
+
} else {
|
|
169
|
+
return { isReal: false, reasoning: finalVerdict };
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Functional wrapper for backward compatibility with hitCommand.
|
|
176
|
+
*/
|
|
177
|
+
export async function runCritic(finding, context) {
|
|
178
|
+
// For single-shot usage, though singleton instantiation in hit.js is preferred
|
|
179
|
+
const critic = new CriticNode();
|
|
180
|
+
const result = await critic.crossExamine(finding, context);
|
|
181
|
+
return {
|
|
182
|
+
isReal: result.isReal,
|
|
183
|
+
reasoning: result.reasoning,
|
|
184
|
+
confidence: result.isReal ? "95" : "15",
|
|
185
|
+
enhancedMitigation: result.enhancedMitigation
|
|
186
|
+
};
|
|
187
|
+
}
|