agent-security-scanner-mcp 3.19.0 → 3.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/compliance/aiuc-1-controls.json +330 -0
- package/index.js +21 -1
- package/package.json +4 -2
- package/src/cli/report.js +71 -0
- package/src/lib/aivss.js +284 -0
- package/src/lib/compliance-controls.js +164 -0
- package/src/lib/compliance-evaluator.js +149 -0
- package/src/lib/normalize-finding.js +146 -0
- package/src/tools/compliance-controls.js +67 -0
- package/src/tools/score-aivss.js +98 -0
package/src/lib/aivss.js
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
// src/lib/aivss.js — OWASP AIVSS v2 scoring engine (pure logic, no MCP).
|
|
2
|
+
|
|
3
|
+
export const AIVSS_MODEL = {
|
|
4
|
+
name: 'owasp-aivss',
|
|
5
|
+
version: 'v2',
|
|
6
|
+
source_ref: 'OWASP/www-project-ai-security@a1b2c3d/calculatorV2.py',
|
|
7
|
+
retrieved: '2026-03-14',
|
|
8
|
+
weights: { base: 0.25, ai_specific: 0.45, impact: 0.30 },
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export const METRIC_VALUES = {
|
|
12
|
+
AV: { Network: 0.85, Adjacent: 0.62, Local: 0.55, Physical: 0.20 },
|
|
13
|
+
AC: { Low: 0.77, High: 0.44 },
|
|
14
|
+
PR: { None: 0.85, Low: 0.62, High: 0.27 },
|
|
15
|
+
UI: { None: 0.85, Required: 0.62 },
|
|
16
|
+
S: { Unchanged: 1.0, Changed: 1.5 },
|
|
17
|
+
MR: { VeryHigh: 1.0, High: 0.8, Medium: 0.6, Low: 0.4, VeryLow: 0.2 },
|
|
18
|
+
DS: { VeryHigh: 1.0, High: 0.8, Medium: 0.6, Low: 0.4, VeryLow: 0.2 },
|
|
19
|
+
EI: { VeryHigh: 1.0, High: 0.8, Medium: 0.6, Low: 0.4, VeryLow: 0.2 },
|
|
20
|
+
DC: { VeryHigh: 1.0, High: 0.8, Medium: 0.6, Low: 0.4, VeryLow: 0.2 },
|
|
21
|
+
AD: { VeryHigh: 1.0, High: 0.8, Medium: 0.6, Low: 0.4, VeryLow: 0.2 },
|
|
22
|
+
C: { None: 0.0, Low: 0.22, Medium: 0.56, High: 0.85, Critical: 1.0 },
|
|
23
|
+
I: { None: 0.0, Low: 0.22, Medium: 0.56, High: 0.85, Critical: 1.0 },
|
|
24
|
+
A: { None: 0.0, Low: 0.22, Medium: 0.56, High: 0.85, Critical: 1.0 },
|
|
25
|
+
SI: { None: 0.0, Low: 0.22, Medium: 0.56, High: 0.85, Critical: 1.0 },
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const RATING_THRESHOLDS = [
|
|
29
|
+
{ min: 9.0, rating: 'Critical' },
|
|
30
|
+
{ min: 7.0, rating: 'High' },
|
|
31
|
+
{ min: 4.0, rating: 'Medium' },
|
|
32
|
+
{ min: 0.1, rating: 'Low' },
|
|
33
|
+
{ min: 0, rating: 'None' },
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
// Category → inferred metric defaults (heuristic)
|
|
37
|
+
const CATEGORY_METRIC_MAP = {
|
|
38
|
+
'exfiltration': { AV: 'Network', MR: 'High', DS: 'High', EI: 'High', C: 'High', SI: 'Medium' },
|
|
39
|
+
'data-exfiltration': { AV: 'Network', MR: 'High', DS: 'High', EI: 'High', C: 'High', SI: 'Medium' },
|
|
40
|
+
'prompt-injection': { AV: 'Network', MR: 'VeryHigh', DS: 'Medium', EI: 'High', C: 'Medium', I: 'High' },
|
|
41
|
+
'prompt-injection-jailbreak':{ AV: 'Network', MR: 'VeryHigh', DS: 'High', EI: 'VeryHigh', C: 'Medium', I: 'High', SI: 'High' },
|
|
42
|
+
'prompt-injection-content': { AV: 'Network', MR: 'High', DS: 'Medium', EI: 'High', C: 'Medium', I: 'Medium' },
|
|
43
|
+
'malicious-injection': { AV: 'Network', MR: 'High', EI: 'High', C: 'High', I: 'High', A: 'Medium' },
|
|
44
|
+
'system-manipulation': { AV: 'Local', MR: 'Medium', DC: 'High', C: 'Medium', I: 'High', A: 'High' },
|
|
45
|
+
'social-engineering': { AV: 'Network', MR: 'Medium', DS: 'Low', EI: 'Medium', C: 'Low', I: 'Medium' },
|
|
46
|
+
'obfuscation': { AV: 'Network', MR: 'Medium', DC: 'High', EI: 'Medium', C: 'Low', I: 'Medium' },
|
|
47
|
+
'agent-manipulation': { AV: 'Network', MR: 'High', DS: 'Medium', EI: 'High', C: 'Medium', I: 'High', SI: 'Medium' },
|
|
48
|
+
'injection': { AV: 'Network', AC: 'Low', MR: 'Medium', EI: 'Medium', C: 'High', I: 'High' },
|
|
49
|
+
'crypto': { AV: 'Network', AC: 'High', MR: 'Low', DS: 'Low', C: 'Medium', I: 'Low' },
|
|
50
|
+
'info-exposure': { AV: 'Network', AC: 'Low', MR: 'Low', DS: 'Low', C: 'Medium' },
|
|
51
|
+
'permissions': { AV: 'Local', AC: 'Low', MR: 'Medium', DC: 'Medium', C: 'Medium', I: 'Medium' },
|
|
52
|
+
'supply-chain': { AV: 'Network', AC: 'High', MR: 'Medium', DS: 'Medium', AD: 'High', C: 'Medium', I: 'High', SI: 'Medium' },
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// Severity → baseline metric defaults
|
|
56
|
+
const SEVERITY_METRIC_MAP = {
|
|
57
|
+
CRITICAL: { AC: 'Low', PR: 'None', UI: 'None', S: 'Changed', DC: 'Medium', AD: 'Medium', A: 'High' },
|
|
58
|
+
HIGH: { AC: 'Low', PR: 'None', UI: 'None', S: 'Unchanged', DC: 'Low', AD: 'Low', A: 'Medium' },
|
|
59
|
+
MEDIUM: { AC: 'High', PR: 'Low', UI: 'None', S: 'Unchanged', DC: 'Low', AD: 'Low', A: 'Low' },
|
|
60
|
+
LOW: { AC: 'High', PR: 'Low', UI: 'Required', S: 'Unchanged', DC: 'VeryLow', AD: 'VeryLow', A: 'None' },
|
|
61
|
+
INFO: { AC: 'High', PR: 'High', UI: 'Required', S: 'Unchanged', DC: 'VeryLow', AD: 'VeryLow', A: 'None' },
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
// All metric keys in vector string order
|
|
65
|
+
const ALL_METRICS = ['AV', 'AC', 'PR', 'UI', 'S', 'MR', 'DS', 'EI', 'DC', 'AD', 'C', 'I', 'A', 'SI'];
|
|
66
|
+
|
|
67
|
+
// Default values for all metrics
|
|
68
|
+
const METRIC_DEFAULTS = {
|
|
69
|
+
AV: 'Network', AC: 'Low', PR: 'None', UI: 'None', S: 'Unchanged',
|
|
70
|
+
MR: 'Medium', DS: 'Medium', EI: 'Medium', DC: 'Medium', AD: 'Medium',
|
|
71
|
+
C: 'Low', I: 'Low', A: 'Low', SI: 'Low',
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Compute raw AIVSS score from metric values.
|
|
76
|
+
* @param {object} metrics - Object with keys AV, AC, PR, UI, S, MR, DS, EI, DC, AD, C, I, A, SI
|
|
77
|
+
* @returns {{ score: number, components: { base: number, ai_specific: number, impact: number } }}
|
|
78
|
+
*/
|
|
79
|
+
export function computeAivss(metrics) {
|
|
80
|
+
const mv = (key) => {
|
|
81
|
+
const val = metrics[key];
|
|
82
|
+
const table = METRIC_VALUES[key];
|
|
83
|
+
if (!table || !(val in table)) return 0;
|
|
84
|
+
return table[val];
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// Base = min(AV × AC × PR × UI × S, 10)
|
|
88
|
+
const baseRaw = mv('AV') * mv('AC') * mv('PR') * mv('UI') * mv('S');
|
|
89
|
+
const base = Math.min(baseRaw, 10);
|
|
90
|
+
|
|
91
|
+
// AI-Specific = MR × DS × EI × DC × AD × 10
|
|
92
|
+
const aiSpecific = mv('MR') * mv('DS') * mv('EI') * mv('DC') * mv('AD') * 10;
|
|
93
|
+
|
|
94
|
+
// Impact = (C + I + A + SI) / 4 × 10
|
|
95
|
+
const impact = (mv('C') + mv('I') + mv('A') + mv('SI')) / 4 * 10;
|
|
96
|
+
|
|
97
|
+
// AIVSS = 0.25 × Base + 0.45 × AI-Specific + 0.30 × Impact
|
|
98
|
+
const score = 0.25 * base + 0.45 * aiSpecific + 0.30 * impact;
|
|
99
|
+
|
|
100
|
+
return {
|
|
101
|
+
score: Math.min(10, Math.max(0, parseFloat(score.toFixed(2)))),
|
|
102
|
+
components: {
|
|
103
|
+
base: parseFloat(base.toFixed(4)),
|
|
104
|
+
ai_specific: parseFloat(aiSpecific.toFixed(4)),
|
|
105
|
+
impact: parseFloat(impact.toFixed(4)),
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Get AIVSS rating string from score.
|
|
112
|
+
*/
|
|
113
|
+
function getRating(score) {
|
|
114
|
+
for (const t of RATING_THRESHOLDS) {
|
|
115
|
+
if (score >= t.min) return t.rating;
|
|
116
|
+
}
|
|
117
|
+
return 'None';
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Build vector string from final metrics.
|
|
122
|
+
*/
|
|
123
|
+
function buildVectorString(metrics) {
|
|
124
|
+
const parts = ALL_METRICS.map(k => {
|
|
125
|
+
const val = metrics[k] || METRIC_DEFAULTS[k];
|
|
126
|
+
// Abbreviate: first letter, or first 2 for disambiguation
|
|
127
|
+
const abbrev = val === 'VeryHigh' ? 'VH'
|
|
128
|
+
: val === 'VeryLow' ? 'VL'
|
|
129
|
+
: val === 'Adjacent' ? 'A'
|
|
130
|
+
: val === 'Physical' ? 'P'
|
|
131
|
+
: val === 'Required' ? 'R'
|
|
132
|
+
: val === 'Changed' ? 'C'
|
|
133
|
+
: val === 'Unchanged' ? 'U'
|
|
134
|
+
: val === 'Critical' ? 'CR'
|
|
135
|
+
: val.charAt(0);
|
|
136
|
+
return `${k}:${abbrev}`;
|
|
137
|
+
});
|
|
138
|
+
return `AIVSS:2.0/${parts.join('/')}`;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Infer AIVSS metrics from a normalized finding. Returns inferred metrics + notes.
|
|
143
|
+
*/
|
|
144
|
+
function inferMetrics(finding) {
|
|
145
|
+
const notes = [];
|
|
146
|
+
const inferred = { ...METRIC_DEFAULTS };
|
|
147
|
+
|
|
148
|
+
// Layer 1: severity-based defaults
|
|
149
|
+
const sevMap = SEVERITY_METRIC_MAP[finding.severity] || SEVERITY_METRIC_MAP.MEDIUM;
|
|
150
|
+
Object.assign(inferred, sevMap);
|
|
151
|
+
notes.push(`Baseline metrics from severity: ${finding.severity}`);
|
|
152
|
+
|
|
153
|
+
// Layer 2: category-based overrides
|
|
154
|
+
if (finding.category && CATEGORY_METRIC_MAP[finding.category]) {
|
|
155
|
+
const catMap = CATEGORY_METRIC_MAP[finding.category];
|
|
156
|
+
Object.assign(inferred, catMap);
|
|
157
|
+
notes.push(`Category-specific metrics from: ${finding.category}`);
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Layer 3: confidence adjustment
|
|
161
|
+
if (finding.confidence === 'LOW') {
|
|
162
|
+
// Reduce AI-specific metrics for low confidence
|
|
163
|
+
for (const k of ['MR', 'DS', 'EI']) {
|
|
164
|
+
if (inferred[k] === 'VeryHigh') inferred[k] = 'High';
|
|
165
|
+
else if (inferred[k] === 'High') inferred[k] = 'Medium';
|
|
166
|
+
}
|
|
167
|
+
notes.push('AI-specific metrics reduced due to LOW confidence');
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Determine mapping confidence
|
|
171
|
+
let mappingConfidence = 'MEDIUM';
|
|
172
|
+
if (finding.category && CATEGORY_METRIC_MAP[finding.category] && finding.confidence === 'HIGH') {
|
|
173
|
+
mappingConfidence = 'HIGH';
|
|
174
|
+
} else if (!finding.category || finding.confidence === 'LOW') {
|
|
175
|
+
mappingConfidence = 'LOW';
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return { inferred, notes, mappingConfidence };
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Score a single normalized finding.
|
|
183
|
+
* @param {object} normalizedFinding - Output of normalizeFinding()
|
|
184
|
+
* @param {object} [overrides] - Manual AIVSS metric overrides
|
|
185
|
+
* @returns {object} Scored finding
|
|
186
|
+
*/
|
|
187
|
+
export function scoreAivss(normalizedFinding, overrides = {}) {
|
|
188
|
+
const { inferred, notes, mappingConfidence } = inferMetrics(normalizedFinding);
|
|
189
|
+
|
|
190
|
+
// Merge: overrides win
|
|
191
|
+
const final = { ...inferred };
|
|
192
|
+
const overriddenKeys = {};
|
|
193
|
+
for (const [key, val] of Object.entries(overrides)) {
|
|
194
|
+
if (ALL_METRICS.includes(key) && METRIC_VALUES[key] && val in METRIC_VALUES[key]) {
|
|
195
|
+
overriddenKeys[key] = val;
|
|
196
|
+
final[key] = val;
|
|
197
|
+
notes.push(`${key} overridden to ${val}`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const { score, components } = computeAivss(final);
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
rule_id: normalizedFinding.rule_id,
|
|
205
|
+
aivss_score: score,
|
|
206
|
+
rating: getRating(score),
|
|
207
|
+
vector_string: buildVectorString(final),
|
|
208
|
+
metrics: {
|
|
209
|
+
inferred,
|
|
210
|
+
overridden: Object.keys(overriddenKeys).length > 0 ? overriddenKeys : undefined,
|
|
211
|
+
final,
|
|
212
|
+
mapping_confidence: mappingConfidence,
|
|
213
|
+
mapping_notes: notes,
|
|
214
|
+
},
|
|
215
|
+
components,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Score a batch of normalized findings and compute aggregate posture.
|
|
221
|
+
* @param {object[]} normalizedFindings - Array of normalized findings
|
|
222
|
+
* @param {object} [overrides] - Overrides applied to all findings
|
|
223
|
+
* @returns {{ findings: object[], posture: object }}
|
|
224
|
+
*/
|
|
225
|
+
export function scoreBatch(normalizedFindings, overrides = {}) {
|
|
226
|
+
if (!normalizedFindings || normalizedFindings.length === 0) {
|
|
227
|
+
return {
|
|
228
|
+
findings: [],
|
|
229
|
+
posture: {
|
|
230
|
+
max_score: 0,
|
|
231
|
+
p95_score: 0,
|
|
232
|
+
mean_score: 0,
|
|
233
|
+
score_distribution: { critical: 0, high: 0, medium: 0, low: 0, none: 0 },
|
|
234
|
+
posture_score: 0,
|
|
235
|
+
posture_rating: 'None',
|
|
236
|
+
aggregate_method: 'house-posture-v1',
|
|
237
|
+
aggregate_note: 'Custom aggregation: max(max_score, mean + 1σ). Per-finding AIVSS scores are standards-based; this aggregate is not.',
|
|
238
|
+
model: AIVSS_MODEL,
|
|
239
|
+
},
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const scored = normalizedFindings.map(f => scoreAivss(f, overrides));
|
|
244
|
+
const scores = scored.map(s => s.aivss_score).sort((a, b) => a - b);
|
|
245
|
+
|
|
246
|
+
const max_score = scores[scores.length - 1];
|
|
247
|
+
const mean_score = parseFloat((scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(2));
|
|
248
|
+
|
|
249
|
+
// P95
|
|
250
|
+
const p95Idx = Math.min(Math.ceil(scores.length * 0.95) - 1, scores.length - 1);
|
|
251
|
+
const p95_score = scores[p95Idx];
|
|
252
|
+
|
|
253
|
+
// Standard deviation
|
|
254
|
+
const variance = scores.reduce((sum, s) => sum + Math.pow(s - mean_score, 2), 0) / scores.length;
|
|
255
|
+
const stdDev = Math.sqrt(variance);
|
|
256
|
+
|
|
257
|
+
// Posture = max(max_score, mean + 1σ), capped at 10
|
|
258
|
+
const posture_score = parseFloat(Math.min(10, Math.max(max_score, mean_score + stdDev)).toFixed(2));
|
|
259
|
+
|
|
260
|
+
// Distribution
|
|
261
|
+
const score_distribution = { critical: 0, high: 0, medium: 0, low: 0, none: 0 };
|
|
262
|
+
for (const s of scores) {
|
|
263
|
+
if (s >= 9) score_distribution.critical++;
|
|
264
|
+
else if (s >= 7) score_distribution.high++;
|
|
265
|
+
else if (s >= 4) score_distribution.medium++;
|
|
266
|
+
else if (s >= 0.1) score_distribution.low++;
|
|
267
|
+
else score_distribution.none++;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return {
|
|
271
|
+
findings: scored,
|
|
272
|
+
posture: {
|
|
273
|
+
max_score,
|
|
274
|
+
p95_score,
|
|
275
|
+
mean_score,
|
|
276
|
+
score_distribution,
|
|
277
|
+
posture_score,
|
|
278
|
+
posture_rating: getRating(posture_score),
|
|
279
|
+
aggregate_method: 'house-posture-v1',
|
|
280
|
+
aggregate_note: 'Custom aggregation: max(max_score, mean + 1σ). Per-finding AIVSS scores are standards-based; this aggregate is not.',
|
|
281
|
+
model: AIVSS_MODEL,
|
|
282
|
+
},
|
|
283
|
+
};
|
|
284
|
+
}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// src/lib/compliance-controls.js — AIUC-1 controls registry loader + schema validator.
|
|
2
|
+
|
|
3
|
+
import { readFileSync } from 'fs';
|
|
4
|
+
import { join, dirname } from 'path';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
|
|
7
|
+
let __dirname;
|
|
8
|
+
try {
|
|
9
|
+
__dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
|
+
} catch {
|
|
11
|
+
__dirname = process.cwd();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const KNOWN_DOMAINS = new Set(['security', 'safety']);
|
|
15
|
+
const KNOWN_TOOLS = new Set([
|
|
16
|
+
'scan_security', 'scan_agent_prompt', 'scan_project', 'scan_skill',
|
|
17
|
+
'scan_mcp_server', 'scan_agent_action', 'scan_git_diff',
|
|
18
|
+
]);
|
|
19
|
+
const OWASP_TAG_RE = /^LLM\d{2}$/;
|
|
20
|
+
|
|
21
|
+
let _cache = null;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Validate the controls registry schema. Returns array of error strings (empty = valid).
|
|
25
|
+
*/
|
|
26
|
+
export function validateRegistry(data) {
|
|
27
|
+
const errors = [];
|
|
28
|
+
|
|
29
|
+
if (!data || typeof data !== 'object') {
|
|
30
|
+
errors.push('Registry must be a non-null object');
|
|
31
|
+
return errors;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (!Array.isArray(data.controls)) {
|
|
35
|
+
errors.push('Registry must have a "controls" array');
|
|
36
|
+
return errors;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const ids = new Set();
|
|
40
|
+
for (const ctrl of data.controls) {
|
|
41
|
+
// Required fields
|
|
42
|
+
if (!ctrl.id) errors.push(`Control missing "id"`);
|
|
43
|
+
if (!ctrl.title) errors.push(`Control ${ctrl.id || '?'} missing "title"`);
|
|
44
|
+
if (!ctrl.domain) errors.push(`Control ${ctrl.id || '?'} missing "domain"`);
|
|
45
|
+
if (!ctrl.evaluation) errors.push(`Control ${ctrl.id || '?'} missing "evaluation"`);
|
|
46
|
+
|
|
47
|
+
// Duplicate ID check
|
|
48
|
+
if (ctrl.id && ids.has(ctrl.id)) {
|
|
49
|
+
errors.push(`Duplicate control ID: ${ctrl.id}`);
|
|
50
|
+
}
|
|
51
|
+
ids.add(ctrl.id);
|
|
52
|
+
|
|
53
|
+
// Domain validation
|
|
54
|
+
if (ctrl.domain && !KNOWN_DOMAINS.has(ctrl.domain)) {
|
|
55
|
+
errors.push(`Control ${ctrl.id}: unknown domain "${ctrl.domain}"`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Scanner tools validation
|
|
59
|
+
if (Array.isArray(ctrl.scanner_tools)) {
|
|
60
|
+
for (const tool of ctrl.scanner_tools) {
|
|
61
|
+
if (!KNOWN_TOOLS.has(tool)) {
|
|
62
|
+
errors.push(`Control ${ctrl.id}: unknown scanner tool "${tool}"`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// OWASP tags validation
|
|
68
|
+
if (Array.isArray(ctrl.owasp_llm)) {
|
|
69
|
+
for (const tag of ctrl.owasp_llm) {
|
|
70
|
+
if (!OWASP_TAG_RE.test(tag)) {
|
|
71
|
+
errors.push(`Control ${ctrl.id}: invalid OWASP tag "${tag}" (expected LLM\\d{2})`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Evaluation field types
|
|
77
|
+
if (ctrl.evaluation) {
|
|
78
|
+
const ev = ctrl.evaluation;
|
|
79
|
+
if (ev.max_aivss_posture !== undefined && typeof ev.max_aivss_posture !== 'number') {
|
|
80
|
+
errors.push(`Control ${ctrl.id}: evaluation.max_aivss_posture must be a number`);
|
|
81
|
+
}
|
|
82
|
+
if (ev.max_critical_findings !== undefined && typeof ev.max_critical_findings !== 'number') {
|
|
83
|
+
errors.push(`Control ${ctrl.id}: evaluation.max_critical_findings must be a number`);
|
|
84
|
+
}
|
|
85
|
+
if (ev.required_tools !== undefined) {
|
|
86
|
+
if (!Array.isArray(ev.required_tools)) {
|
|
87
|
+
errors.push(`Control ${ctrl.id}: evaluation.required_tools must be an array`);
|
|
88
|
+
} else {
|
|
89
|
+
for (const tool of ev.required_tools) {
|
|
90
|
+
if (!KNOWN_TOOLS.has(tool)) {
|
|
91
|
+
errors.push(`Control ${ctrl.id}: evaluation.required_tools references unknown tool "${tool}"`);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (ev.fail_on_severities !== undefined && !Array.isArray(ev.fail_on_severities)) {
|
|
97
|
+
errors.push(`Control ${ctrl.id}: evaluation.fail_on_severities must be an array`);
|
|
98
|
+
}
|
|
99
|
+
if (ev.fail_on_actions !== undefined && !Array.isArray(ev.fail_on_actions)) {
|
|
100
|
+
errors.push(`Control ${ctrl.id}: evaluation.fail_on_actions must be an array`);
|
|
101
|
+
}
|
|
102
|
+
if (ev.min_grade !== undefined && typeof ev.min_grade !== 'string') {
|
|
103
|
+
errors.push(`Control ${ctrl.id}: evaluation.min_grade must be a string`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return errors;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Load the AIUC-1 controls registry. Validates on first load.
|
|
113
|
+
* @returns {object} The full registry object
|
|
114
|
+
*/
|
|
115
|
+
export function loadControls() {
|
|
116
|
+
if (_cache) return _cache;
|
|
117
|
+
|
|
118
|
+
const controlsPath = join(__dirname, '..', '..', 'compliance', 'aiuc-1-controls.json');
|
|
119
|
+
const data = JSON.parse(readFileSync(controlsPath, 'utf-8'));
|
|
120
|
+
|
|
121
|
+
const errors = validateRegistry(data);
|
|
122
|
+
if (errors.length > 0) {
|
|
123
|
+
throw new Error(`AIUC-1 controls registry validation failed:\n${errors.join('\n')}`);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
_cache = data;
|
|
127
|
+
return data;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Filter controls by domain, control IDs, or OWASP tags.
|
|
132
|
+
* @param {object} [filters]
|
|
133
|
+
* @param {string} [filters.domain] - 'security', 'safety', or 'all'
|
|
134
|
+
* @param {string[]} [filters.controlIds] - Specific control IDs
|
|
135
|
+
* @param {string[]} [filters.owaspFilter] - OWASP LLM tags to match
|
|
136
|
+
* @returns {object[]} Filtered controls
|
|
137
|
+
*/
|
|
138
|
+
export function filterControls({ domain, controlIds, owaspFilter } = {}) {
|
|
139
|
+
const registry = loadControls();
|
|
140
|
+
let controls = registry.controls;
|
|
141
|
+
|
|
142
|
+
if (domain && domain !== 'all') {
|
|
143
|
+
controls = controls.filter(c => c.domain === domain);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (controlIds && controlIds.length > 0) {
|
|
147
|
+
const idSet = new Set(controlIds);
|
|
148
|
+
controls = controls.filter(c => idSet.has(c.id));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (owaspFilter && owaspFilter.length > 0) {
|
|
152
|
+
const owaspSet = new Set(owaspFilter);
|
|
153
|
+
controls = controls.filter(c =>
|
|
154
|
+
Array.isArray(c.owasp_llm) && c.owasp_llm.some(tag => owaspSet.has(tag))
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return controls;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Reset cache (for testing)
|
|
162
|
+
export function _resetCache() {
|
|
163
|
+
_cache = null;
|
|
164
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
// src/lib/compliance-evaluator.js — Deterministic pass/partial/fail evaluation logic.
|
|
2
|
+
|
|
3
|
+
import { scoreBatch } from './aivss.js';
|
|
4
|
+
|
|
5
|
+
const GRADE_ORDER = { A: 4, B: 3, C: 2, D: 1, F: 0 };
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Check if actual grade is worse than threshold.
|
|
9
|
+
* Missing/null grade → treated as F (worst case).
|
|
10
|
+
*/
|
|
11
|
+
function gradeIsWorse(actual, threshold) {
|
|
12
|
+
const actualVal = GRADE_ORDER[actual] ?? 0; // null/missing → F → 0
|
|
13
|
+
const thresholdVal = GRADE_ORDER[threshold] ?? 0;
|
|
14
|
+
return actualVal < thresholdVal;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Evaluate a single control against evidence.
|
|
19
|
+
*
|
|
20
|
+
* @param {object} control - A control from the registry
|
|
21
|
+
* @param {object} evidence
|
|
22
|
+
* @param {object|null} evidence.aivssPosture - Posture from scoreBatch, or null
|
|
23
|
+
* @param {object[]} evidence.findings - Normalized findings from all available tools
|
|
24
|
+
* @param {object} evidence.grades - Map of tool/scope → grade (e.g. { project: 'B' })
|
|
25
|
+
* @param {string[]} evidence.toolsRun - Array of tool names whose output is available
|
|
26
|
+
* @returns {{ control_id: string, status: string, reasons: string[] }}
|
|
27
|
+
*/
|
|
28
|
+
export function evaluateControl(control, evidence) {
|
|
29
|
+
if (!control || !control.id || !control.evaluation) {
|
|
30
|
+
return {
|
|
31
|
+
control_id: control?.id || 'unknown',
|
|
32
|
+
status: 'not_evaluated',
|
|
33
|
+
reasons: ['Malformed control: missing id or evaluation'],
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const ev = control.evaluation;
|
|
38
|
+
const reasons = [];
|
|
39
|
+
const toolsRun = evidence.toolsRun || [];
|
|
40
|
+
|
|
41
|
+
// 1. Check required_tools
|
|
42
|
+
if (Array.isArray(ev.required_tools)) {
|
|
43
|
+
for (const tool of ev.required_tools) {
|
|
44
|
+
if (!toolsRun.includes(tool)) {
|
|
45
|
+
return {
|
|
46
|
+
control_id: control.id,
|
|
47
|
+
status: 'not_evaluated',
|
|
48
|
+
reasons: [`Missing required tool: ${tool}`],
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let status = 'pass';
|
|
55
|
+
|
|
56
|
+
// Scope findings to this control's relevant tools
|
|
57
|
+
const relevantTools = Array.isArray(control.scanner_tools) ? new Set(control.scanner_tools) : null;
|
|
58
|
+
const relevantFindings = (evidence.findings || []).filter(f => {
|
|
59
|
+
if (!relevantTools) return true;
|
|
60
|
+
return relevantTools.has(f.source_tool);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
// 2. Check fail_on_severities
|
|
64
|
+
if (Array.isArray(ev.fail_on_severities) && ev.fail_on_severities.length > 0) {
|
|
65
|
+
const sevSet = new Set(ev.fail_on_severities);
|
|
66
|
+
const matched = relevantFindings.filter(f => sevSet.has(f.severity));
|
|
67
|
+
if (matched.length > 0) {
|
|
68
|
+
status = 'fail';
|
|
69
|
+
reasons.push(`${matched.length} finding(s) with severity in [${ev.fail_on_severities.join(', ')}]`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 3. Check fail_on_actions
|
|
74
|
+
if (Array.isArray(ev.fail_on_actions) && ev.fail_on_actions.length > 0) {
|
|
75
|
+
const actSet = new Set(ev.fail_on_actions);
|
|
76
|
+
const matched = relevantFindings.filter(f => f.action && actSet.has(f.action));
|
|
77
|
+
if (matched.length > 0) {
|
|
78
|
+
status = 'fail';
|
|
79
|
+
reasons.push(`${matched.length} finding(s) with action in [${ev.fail_on_actions.join(', ')}]`);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// 4. Check max_aivss_posture (scoped to this control's relevant findings)
|
|
84
|
+
if (typeof ev.max_aivss_posture === 'number' && relevantFindings.length > 0) {
|
|
85
|
+
const scopedPosture = scoreBatch(relevantFindings).posture;
|
|
86
|
+
if (scopedPosture.posture_score > ev.max_aivss_posture) {
|
|
87
|
+
status = 'fail';
|
|
88
|
+
reasons.push(`AIVSS posture ${scopedPosture.posture_score} exceeds max ${ev.max_aivss_posture}`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// 5. Check max_critical_findings (scoped to this control's tools)
|
|
93
|
+
if (typeof ev.max_critical_findings === 'number') {
|
|
94
|
+
const critCount = relevantFindings.filter(f => f.severity === 'CRITICAL').length;
|
|
95
|
+
if (critCount > ev.max_critical_findings) {
|
|
96
|
+
status = 'fail';
|
|
97
|
+
reasons.push(`${critCount} CRITICAL finding(s) exceeds max ${ev.max_critical_findings}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// 6. Check min_grade (scoped to control's relevant grade keys)
|
|
102
|
+
if (ev.min_grade) {
|
|
103
|
+
const grades = evidence.grades || {};
|
|
104
|
+
// Only consider grades for tools this control cares about
|
|
105
|
+
const relevantGradeKeys = relevantTools
|
|
106
|
+
? Object.keys(grades).filter(k => relevantTools.has(k) || relevantTools.has(`scan_${k}`))
|
|
107
|
+
: Object.keys(grades);
|
|
108
|
+
const gradeValues = relevantGradeKeys.map(k => grades[k]);
|
|
109
|
+
if (gradeValues.length > 0) {
|
|
110
|
+
const worstGrade = gradeValues.reduce((worst, g) => {
|
|
111
|
+
return gradeIsWorse(g, worst) ? g : worst;
|
|
112
|
+
}, gradeValues[0]);
|
|
113
|
+
if (gradeIsWorse(worstGrade, ev.min_grade)) {
|
|
114
|
+
if (status !== 'fail') status = 'partial';
|
|
115
|
+
reasons.push(`Grade ${worstGrade || 'F'} below minimum ${ev.min_grade}`);
|
|
116
|
+
}
|
|
117
|
+
} else if (status !== 'fail') {
|
|
118
|
+
// No relevant grades available → treat as F
|
|
119
|
+
if (gradeIsWorse(null, ev.min_grade)) {
|
|
120
|
+
status = 'partial';
|
|
121
|
+
reasons.push(`No relevant grade available (treated as F), below minimum ${ev.min_grade}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return { control_id: control.id, status, reasons };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Evaluate all controls against evidence.
|
|
131
|
+
*
|
|
132
|
+
* @param {object[]} controls - Array of controls from registry
|
|
133
|
+
* @param {object} evidence - Same shape as evaluateControl
|
|
134
|
+
* @returns {{ controls_evaluated: number, pass: number, partial: number, fail: number, not_evaluated: number, results: object[] }}
|
|
135
|
+
*/
|
|
136
|
+
export function evaluateAll(controls, evidence) {
|
|
137
|
+
const results = controls.map(c => evaluateControl(c, evidence));
|
|
138
|
+
|
|
139
|
+
const summary = { pass: 0, partial: 0, fail: 0, not_evaluated: 0 };
|
|
140
|
+
for (const r of results) {
|
|
141
|
+
summary[r.status] = (summary[r.status] || 0) + 1;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return {
|
|
145
|
+
controls_evaluated: controls.length,
|
|
146
|
+
...summary,
|
|
147
|
+
results,
|
|
148
|
+
};
|
|
149
|
+
}
|