corpus-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/autofix.d.ts +41 -0
- package/dist/autofix.js +159 -0
- package/dist/constants.d.ts +9 -0
- package/dist/constants.js +9 -0
- package/dist/cve-database.json +396 -0
- package/dist/cve-patterns.d.ts +54 -0
- package/dist/cve-patterns.js +124 -0
- package/dist/engine.d.ts +6 -0
- package/dist/engine.js +71 -0
- package/dist/graph-engine.d.ts +56 -0
- package/dist/graph-engine.js +412 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +17 -0
- package/dist/log.d.ts +7 -0
- package/dist/log.js +33 -0
- package/dist/logger.d.ts +1 -0
- package/dist/logger.js +12 -0
- package/dist/memory.d.ts +67 -0
- package/dist/memory.js +261 -0
- package/dist/pattern-learner.d.ts +82 -0
- package/dist/pattern-learner.js +420 -0
- package/dist/scanners/code-safety.d.ts +13 -0
- package/dist/scanners/code-safety.js +114 -0
- package/dist/scanners/confidence-calibrator.d.ts +25 -0
- package/dist/scanners/confidence-calibrator.js +58 -0
- package/dist/scanners/context-poisoning.d.ts +18 -0
- package/dist/scanners/context-poisoning.js +48 -0
- package/dist/scanners/cross-user-firewall.d.ts +10 -0
- package/dist/scanners/cross-user-firewall.js +24 -0
- package/dist/scanners/dependency-checker.d.ts +15 -0
- package/dist/scanners/dependency-checker.js +203 -0
- package/dist/scanners/exfiltration-guard.d.ts +19 -0
- package/dist/scanners/exfiltration-guard.js +49 -0
- package/dist/scanners/index.d.ts +12 -0
- package/dist/scanners/index.js +12 -0
- package/dist/scanners/injection-firewall.d.ts +12 -0
- package/dist/scanners/injection-firewall.js +71 -0
- package/dist/scanners/scope-enforcer.d.ts +10 -0
- package/dist/scanners/scope-enforcer.js +30 -0
- package/dist/scanners/secret-detector.d.ts +34 -0
- package/dist/scanners/secret-detector.js +188 -0
- package/dist/scanners/session-hijack.d.ts +16 -0
- package/dist/scanners/session-hijack.js +53 -0
- package/dist/scanners/trust-score.d.ts +34 -0
- package/dist/scanners/trust-score.js +164 -0
- package/dist/scanners/undo-integrity.d.ts +9 -0
- package/dist/scanners/undo-integrity.js +38 -0
- package/dist/subprocess.d.ts +10 -0
- package/dist/subprocess.js +103 -0
- package/dist/types.d.ts +117 -0
- package/dist/types.js +16 -0
- package/dist/yaml-evaluator.d.ts +12 -0
- package/dist/yaml-evaluator.js +105 -0
- package/package.json +36 -0
- package/src/cve-database.json +396 -0
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Corpus Pattern Learner
|
|
3
|
+
*
|
|
4
|
+
* Analyzes findings across multiple repos to learn which patterns
|
|
5
|
+
* are real issues vs false positives. Builds a statistical model
|
|
6
|
+
* that evolves as more repos are scanned.
|
|
7
|
+
*
|
|
8
|
+
* This is the "learning" part of the immune system.
|
|
9
|
+
*/
|
|
10
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
11
|
+
import path from 'path';
|
|
12
|
+
const TEST_INDICATORS = ['test', 'spec', '__tests__', '__mocks__', 'fixture', 'mock', 'stub', 'e2e', 'integration-test'];
|
|
13
|
+
const BUILD_INDICATORS = ['build', 'scripts', 'tools', 'bin', 'cli', 'webpack', 'rollup', 'esbuild', 'vite.config', 'jest.config'];
|
|
14
|
+
function isTestFile(filepath) {
|
|
15
|
+
const lower = filepath.toLowerCase();
|
|
16
|
+
return TEST_INDICATORS.some(t => lower.includes(t));
|
|
17
|
+
}
|
|
18
|
+
function isBuildFile(filepath) {
|
|
19
|
+
const lower = filepath.toLowerCase();
|
|
20
|
+
return BUILD_INDICATORS.some(t => lower.includes(t));
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Classify the semantic context of a file based on its path and content.
|
|
24
|
+
*/
|
|
25
|
+
export function classifyContext(filepath, content) {
|
|
26
|
+
const lowerPath = filepath.toLowerCase();
|
|
27
|
+
const lowerContent = content.toLowerCase();
|
|
28
|
+
// webpack/rollup/vite config
|
|
29
|
+
if (/webpack|rollup|vite\.config|vite\./.test(lowerPath))
|
|
30
|
+
return 'webpack-config';
|
|
31
|
+
// Route handler patterns (express, hono, koa, etc.)
|
|
32
|
+
if (/\bapp\.(get|post|put|delete|patch|use)\s*\(|\brouter\.(get|post|put|delete|patch|use)\s*\(/.test(content))
|
|
33
|
+
return 'route-handler';
|
|
34
|
+
// Middleware patterns
|
|
35
|
+
if (/\b(req|request)\s*,\s*(res|response)\s*,\s*next\b/.test(content))
|
|
36
|
+
return 'middleware';
|
|
37
|
+
// Auth module
|
|
38
|
+
if (/auth|login|session|passport|jwt|oauth/i.test(lowerPath) || /\b(authenticate|authorize|login|logout|session|jwt|oauth|passport)\b/i.test(content))
|
|
39
|
+
return 'auth-module';
|
|
40
|
+
// Test harness
|
|
41
|
+
if (isTestFile(filepath))
|
|
42
|
+
return 'test-harness';
|
|
43
|
+
// Build script
|
|
44
|
+
if (isBuildFile(filepath))
|
|
45
|
+
return 'build-script';
|
|
46
|
+
// Database access
|
|
47
|
+
if (/\b(SELECT|INSERT|UPDATE|DELETE|CREATE TABLE)\b/.test(content) || /\b(prisma|sequelize|typeorm|knex|mongoose|mongodb)\b/i.test(content))
|
|
48
|
+
return 'database-access';
|
|
49
|
+
// API client
|
|
50
|
+
if (/\bfetch\s*\(|\baxios\b|\bhttp\.(get|post|put|delete)\b|\bgot\b|\brequest\s*\(/i.test(content))
|
|
51
|
+
return 'api-client';
|
|
52
|
+
return 'production';
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Learn patterns from a set of findings across multiple repos.
|
|
56
|
+
* Updates the learned patterns file.
|
|
57
|
+
*/
|
|
58
|
+
export function learnFromFindings(projectRoot, findings) {
|
|
59
|
+
// Load existing patterns
|
|
60
|
+
const patternsPath = path.join(projectRoot, '.corpus', 'patterns.json');
|
|
61
|
+
let existing;
|
|
62
|
+
if (existsSync(patternsPath)) {
|
|
63
|
+
existing = JSON.parse(readFileSync(patternsPath, 'utf-8'));
|
|
64
|
+
// Ensure new fields exist on loaded data
|
|
65
|
+
if (!existing.knownPackages)
|
|
66
|
+
existing.knownPackages = [];
|
|
67
|
+
if (!existing.repoCategories)
|
|
68
|
+
existing.repoCategories = {};
|
|
69
|
+
}
|
|
70
|
+
else {
|
|
71
|
+
existing = {
|
|
72
|
+
version: 1,
|
|
73
|
+
learnedFrom: 0,
|
|
74
|
+
totalFindings: 0,
|
|
75
|
+
patterns: [],
|
|
76
|
+
lastUpdated: new Date().toISOString(),
|
|
77
|
+
knownPackages: [],
|
|
78
|
+
repoCategories: {},
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
// Group findings by type
|
|
82
|
+
const byType = new Map();
|
|
83
|
+
for (const f of findings) {
|
|
84
|
+
if (!byType.has(f.type))
|
|
85
|
+
byType.set(f.type, []);
|
|
86
|
+
byType.get(f.type).push(f);
|
|
87
|
+
}
|
|
88
|
+
// Update or create pattern signatures
|
|
89
|
+
for (const [type, typeFindings] of byType) {
|
|
90
|
+
let pattern = existing.patterns.find(p => p.type === type);
|
|
91
|
+
if (!pattern) {
|
|
92
|
+
pattern = {
|
|
93
|
+
type,
|
|
94
|
+
totalOccurrences: 0,
|
|
95
|
+
inTestFiles: 0,
|
|
96
|
+
inProductionFiles: 0,
|
|
97
|
+
inBuildTools: 0,
|
|
98
|
+
falsePositiveRate: 0,
|
|
99
|
+
severity: typeFindings[0].severity,
|
|
100
|
+
adjustedSeverity: typeFindings[0].severity,
|
|
101
|
+
description: typeFindings[0].message,
|
|
102
|
+
examples: [],
|
|
103
|
+
repoCount: 0,
|
|
104
|
+
repoPrevalence: 0,
|
|
105
|
+
contextBreakdown: {},
|
|
106
|
+
coOccursWith: [],
|
|
107
|
+
linkedCVEs: [],
|
|
108
|
+
categoryWeights: {},
|
|
109
|
+
};
|
|
110
|
+
existing.patterns.push(pattern);
|
|
111
|
+
}
|
|
112
|
+
// Ensure new fields exist on patterns loaded from disk
|
|
113
|
+
if (!pattern.contextBreakdown)
|
|
114
|
+
pattern.contextBreakdown = {};
|
|
115
|
+
if (!pattern.coOccursWith)
|
|
116
|
+
pattern.coOccursWith = [];
|
|
117
|
+
if (!pattern.linkedCVEs)
|
|
118
|
+
pattern.linkedCVEs = [];
|
|
119
|
+
if (!pattern.categoryWeights)
|
|
120
|
+
pattern.categoryWeights = {};
|
|
121
|
+
if (pattern.repoCount === undefined)
|
|
122
|
+
pattern.repoCount = 0;
|
|
123
|
+
if (pattern.repoPrevalence === undefined)
|
|
124
|
+
pattern.repoPrevalence = 0;
|
|
125
|
+
// Track unique repos for this pattern
|
|
126
|
+
const uniqueRepos = new Set();
|
|
127
|
+
for (const f of typeFindings) {
|
|
128
|
+
pattern.totalOccurrences++;
|
|
129
|
+
if (isTestFile(f.file))
|
|
130
|
+
pattern.inTestFiles++;
|
|
131
|
+
else if (isBuildFile(f.file))
|
|
132
|
+
pattern.inBuildTools++;
|
|
133
|
+
else
|
|
134
|
+
pattern.inProductionFiles++;
|
|
135
|
+
if (pattern.examples.length < 5) {
|
|
136
|
+
pattern.examples.push({ repo: f.repo, file: f.file });
|
|
137
|
+
}
|
|
138
|
+
uniqueRepos.add(f.repo);
|
|
139
|
+
// Classify context and update breakdown
|
|
140
|
+
const context = classifyContext(f.file, f.message);
|
|
141
|
+
pattern.contextBreakdown[context] = (pattern.contextBreakdown[context] || 0) + 1;
|
|
142
|
+
}
|
|
143
|
+
// Update repo count and prevalence
|
|
144
|
+
pattern.repoCount += uniqueRepos.size;
|
|
145
|
+
// Calculate false positive rate and adjust severity
|
|
146
|
+
const nonProd = pattern.inTestFiles + pattern.inBuildTools;
|
|
147
|
+
pattern.falsePositiveRate = pattern.totalOccurrences > 0
|
|
148
|
+
? Math.round((nonProd / pattern.totalOccurrences) * 100)
|
|
149
|
+
: 0;
|
|
150
|
+
// Adjust severity based on false positive rate
|
|
151
|
+
if (pattern.falsePositiveRate > 80) {
|
|
152
|
+
pattern.adjustedSeverity = 'SUPPRESSED';
|
|
153
|
+
}
|
|
154
|
+
else if (pattern.falsePositiveRate > 50) {
|
|
155
|
+
// Downgrade severity by one level
|
|
156
|
+
if (pattern.severity === 'CRITICAL')
|
|
157
|
+
pattern.adjustedSeverity = 'WARNING';
|
|
158
|
+
else if (pattern.severity === 'WARNING')
|
|
159
|
+
pattern.adjustedSeverity = 'INFO';
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
pattern.adjustedSeverity = pattern.severity;
|
|
163
|
+
}
|
|
164
|
+
// CVE-linked patterns should never be suppressed
|
|
165
|
+
if (pattern.linkedCVEs.length > 0) {
|
|
166
|
+
if (pattern.adjustedSeverity === 'SUPPRESSED' || pattern.adjustedSeverity === 'INFO') {
|
|
167
|
+
pattern.adjustedSeverity = 'WARNING';
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// Update repo prevalence for all patterns now that learnedFrom is updated
|
|
172
|
+
const updatedLearnedFrom = existing.learnedFrom + new Set(findings.map(f => f.repo)).size;
|
|
173
|
+
for (const pattern of existing.patterns) {
|
|
174
|
+
pattern.repoPrevalence = updatedLearnedFrom > 0
|
|
175
|
+
? Math.round((pattern.repoCount / updatedLearnedFrom) * 100)
|
|
176
|
+
: 0;
|
|
177
|
+
// Aggressive suppression: high prevalence + mostly non-production
|
|
178
|
+
const totalContext = Object.values(pattern.contextBreakdown).reduce((a, b) => a + b, 0);
|
|
179
|
+
const prodContext = pattern.contextBreakdown['production'] || 0;
|
|
180
|
+
const routeContext = pattern.contextBreakdown['route-handler'] || 0;
|
|
181
|
+
const authContext = pattern.contextBreakdown['auth-module'] || 0;
|
|
182
|
+
const dbContext = pattern.contextBreakdown['database-access'] || 0;
|
|
183
|
+
const productionContextCount = prodContext + routeContext + authContext + dbContext;
|
|
184
|
+
const nonProductionRatio = totalContext > 0 ? (totalContext - productionContextCount) / totalContext : 0;
|
|
185
|
+
if (pattern.repoPrevalence > 60 && nonProductionRatio > 0.7 && pattern.linkedCVEs.length === 0) {
|
|
186
|
+
pattern.adjustedSeverity = 'SUPPRESSED';
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
// Compute co-occurrence: patterns appearing in the same file
|
|
190
|
+
const fileToPatterns = new Map();
|
|
191
|
+
for (const f of findings) {
|
|
192
|
+
const key = `${f.repo}::${f.file}`;
|
|
193
|
+
if (!fileToPatterns.has(key))
|
|
194
|
+
fileToPatterns.set(key, new Set());
|
|
195
|
+
fileToPatterns.get(key).add(f.type);
|
|
196
|
+
}
|
|
197
|
+
const coOccurrenceCount = new Map();
|
|
198
|
+
const patternFileCount = new Map();
|
|
199
|
+
for (const patternSet of fileToPatterns.values()) {
|
|
200
|
+
const types = Array.from(patternSet);
|
|
201
|
+
for (const t of types) {
|
|
202
|
+
patternFileCount.set(t, (patternFileCount.get(t) || 0) + 1);
|
|
203
|
+
}
|
|
204
|
+
for (let i = 0; i < types.length; i++) {
|
|
205
|
+
for (let j = i + 1; j < types.length; j++) {
|
|
206
|
+
const key = [types[i], types[j]].sort().join('::');
|
|
207
|
+
coOccurrenceCount.set(key, (coOccurrenceCount.get(key) || 0) + 1);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
// Update co-occurrence on each pattern
|
|
212
|
+
for (const pattern of existing.patterns) {
|
|
213
|
+
const newCoOccurs = [];
|
|
214
|
+
for (const [key, count] of coOccurrenceCount) {
|
|
215
|
+
const [a, b] = key.split('::');
|
|
216
|
+
const other = a === pattern.type ? b : b === pattern.type ? a : null;
|
|
217
|
+
if (!other)
|
|
218
|
+
continue;
|
|
219
|
+
const myFiles = patternFileCount.get(pattern.type) || 1;
|
|
220
|
+
const correlation = Math.round((count / myFiles) * 100) / 100;
|
|
221
|
+
const combinedRisk = correlation > 0.5 ? 'ELEVATED' : 'NORMAL';
|
|
222
|
+
newCoOccurs.push({ pattern: other, correlation, combinedRisk });
|
|
223
|
+
}
|
|
224
|
+
if (newCoOccurs.length > 0) {
|
|
225
|
+
pattern.coOccursWith = newCoOccurs;
|
|
226
|
+
}
|
|
227
|
+
// If two co-occurring patterns both have ELEVATED risk, upgrade severity
|
|
228
|
+
const hasElevated = pattern.coOccursWith.some(co => co.combinedRisk === 'ELEVATED');
|
|
229
|
+
if (hasElevated && pattern.adjustedSeverity === 'INFO') {
|
|
230
|
+
pattern.adjustedSeverity = 'WARNING';
|
|
231
|
+
}
|
|
232
|
+
else if (hasElevated && pattern.adjustedSeverity === 'WARNING') {
|
|
233
|
+
pattern.adjustedSeverity = 'CRITICAL';
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Sort by production occurrences (most important first)
|
|
237
|
+
existing.patterns.sort((a, b) => b.inProductionFiles - a.inProductionFiles);
|
|
238
|
+
existing.learnedFrom += new Set(findings.map(f => f.repo)).size;
|
|
239
|
+
existing.totalFindings += findings.length;
|
|
240
|
+
existing.lastUpdated = new Date().toISOString();
|
|
241
|
+
// Save
|
|
242
|
+
const corpusDir = path.join(projectRoot, '.corpus');
|
|
243
|
+
if (!existsSync(corpusDir))
|
|
244
|
+
mkdirSync(corpusDir, { recursive: true });
|
|
245
|
+
writeFileSync(patternsPath, JSON.stringify(existing, null, 2));
|
|
246
|
+
return existing;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Get learned patterns for display.
|
|
250
|
+
*/
|
|
251
|
+
export function getLearnedPatterns(projectRoot) {
|
|
252
|
+
const patternsPath = path.join(projectRoot, '.corpus', 'patterns.json');
|
|
253
|
+
if (!existsSync(patternsPath))
|
|
254
|
+
return null;
|
|
255
|
+
try {
|
|
256
|
+
return JSON.parse(readFileSync(patternsPath, 'utf-8'));
|
|
257
|
+
}
|
|
258
|
+
catch {
|
|
259
|
+
return null;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Check if a finding should be suppressed based on learned patterns.
|
|
264
|
+
*/
|
|
265
|
+
// Contexts where certain pattern types are always safe (always suppress)
|
|
266
|
+
const SAFE_CONTEXT_OVERRIDES = {
|
|
267
|
+
'webpack-config': ['eval_usage', 'dynamic_require', 'process_env_access'],
|
|
268
|
+
'build-script': ['eval_usage', 'dynamic_require', 'shell_exec', 'process_env_access'],
|
|
269
|
+
'test-harness': ['eval_usage', 'hardcoded_secret', 'shell_exec', 'dynamic_require'],
|
|
270
|
+
};
|
|
271
|
+
// Contexts where certain pattern types are always dangerous (never suppress)
|
|
272
|
+
const DANGEROUS_CONTEXT_OVERRIDES = {
|
|
273
|
+
'route-handler': ['eval_usage', 'shell_exec', 'sql_injection', 'prototype_pollution'],
|
|
274
|
+
'auth-module': ['hardcoded_secret', 'weak_crypto', 'eval_usage'],
|
|
275
|
+
'middleware': ['eval_usage', 'shell_exec', 'prototype_pollution'],
|
|
276
|
+
'database-access': ['sql_injection', 'eval_usage'],
|
|
277
|
+
'api-client': ['hardcoded_secret', 'ssrf'],
|
|
278
|
+
};
|
|
279
|
+
export function shouldSuppress(projectRoot, type, file, content) {
|
|
280
|
+
const patterns = getLearnedPatterns(projectRoot);
|
|
281
|
+
if (!patterns)
|
|
282
|
+
return { suppress: false };
|
|
283
|
+
const pattern = patterns.patterns.find(p => p.type === type);
|
|
284
|
+
if (!pattern)
|
|
285
|
+
return { suppress: false };
|
|
286
|
+
// Determine the context of this specific file
|
|
287
|
+
const context = classifyContext(file, content || '');
|
|
288
|
+
// Context-aware overrides: certain patterns in safe contexts always suppress
|
|
289
|
+
const safePatterns = SAFE_CONTEXT_OVERRIDES[context];
|
|
290
|
+
if (safePatterns && safePatterns.includes(type)) {
|
|
291
|
+
return {
|
|
292
|
+
suppress: true,
|
|
293
|
+
reason: `Pattern '${type}' in '${context}' context is a known safe usage. Suppressed.`,
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
// Context-aware overrides: certain patterns in dangerous contexts never suppress
|
|
297
|
+
const dangerousPatterns = DANGEROUS_CONTEXT_OVERRIDES[context];
|
|
298
|
+
if (dangerousPatterns && dangerousPatterns.includes(type)) {
|
|
299
|
+
return { suppress: false };
|
|
300
|
+
}
|
|
301
|
+
// CVE-linked patterns are never suppressed
|
|
302
|
+
if (pattern.linkedCVEs && pattern.linkedCVEs.length > 0) {
|
|
303
|
+
return { suppress: false };
|
|
304
|
+
}
|
|
305
|
+
if (pattern.adjustedSeverity === 'SUPPRESSED') {
|
|
306
|
+
return {
|
|
307
|
+
suppress: true,
|
|
308
|
+
reason: `Pattern '${type}' has ${pattern.falsePositiveRate}% false positive rate across ${pattern.totalOccurrences} occurrences in ${patterns.learnedFrom} repos. Likely not a real issue.`,
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
if (isTestFile(file) && pattern.inTestFiles > pattern.inProductionFiles) {
|
|
312
|
+
return {
|
|
313
|
+
suppress: true,
|
|
314
|
+
reason: `Pattern '${type}' is ${Math.round((pattern.inTestFiles / pattern.totalOccurrences) * 100)}% test-only. Suppressed in test files.`,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
return { suppress: false };
|
|
318
|
+
}
|
|
319
|
+
/**
|
|
320
|
+
* Get intelligence verdict for a specific pattern type.
|
|
321
|
+
*/
|
|
322
|
+
export function getPatternIntelligence(projectRoot, type) {
|
|
323
|
+
const patterns = getLearnedPatterns(projectRoot);
|
|
324
|
+
if (!patterns)
|
|
325
|
+
return null;
|
|
326
|
+
const pattern = patterns.patterns.find(p => p.type === type);
|
|
327
|
+
if (!pattern)
|
|
328
|
+
return null;
|
|
329
|
+
// Confidence is based on sample size
|
|
330
|
+
const sampleConfidence = Math.min(100, Math.round((pattern.totalOccurrences / 50) * 100));
|
|
331
|
+
const repoConfidence = Math.min(100, Math.round((pattern.repoCount / 10) * 100));
|
|
332
|
+
const confidence = Math.round((sampleConfidence + repoConfidence) / 2);
|
|
333
|
+
const reasons = [];
|
|
334
|
+
reasons.push(`Seen ${pattern.totalOccurrences} times across ${pattern.repoCount} repos (${pattern.repoPrevalence}% prevalence).`);
|
|
335
|
+
if (pattern.falsePositiveRate > 50) {
|
|
336
|
+
reasons.push(`High false positive rate: ${pattern.falsePositiveRate}%.`);
|
|
337
|
+
}
|
|
338
|
+
if (pattern.linkedCVEs.length > 0) {
|
|
339
|
+
reasons.push(`Linked to CVEs: ${pattern.linkedCVEs.join(', ')}.`);
|
|
340
|
+
}
|
|
341
|
+
const elevatedCoOccurs = pattern.coOccursWith.filter(co => co.combinedRisk === 'ELEVATED');
|
|
342
|
+
if (elevatedCoOccurs.length > 0) {
|
|
343
|
+
reasons.push(`Elevated risk when combined with: ${elevatedCoOccurs.map(co => co.pattern).join(', ')}.`);
|
|
344
|
+
}
|
|
345
|
+
const topContexts = Object.entries(pattern.contextBreakdown)
|
|
346
|
+
.sort(([, a], [, b]) => b - a)
|
|
347
|
+
.slice(0, 3)
|
|
348
|
+
.map(([ctx, count]) => `${ctx}(${count})`)
|
|
349
|
+
.join(', ');
|
|
350
|
+
if (topContexts) {
|
|
351
|
+
reasons.push(`Top contexts: ${topContexts}.`);
|
|
352
|
+
}
|
|
353
|
+
return {
|
|
354
|
+
verdict: pattern.adjustedSeverity,
|
|
355
|
+
confidence,
|
|
356
|
+
reasoning: reasons.join(' '),
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
/**
|
|
360
|
+
* Register known legitimate npm packages to reduce false positives.
|
|
361
|
+
*/
|
|
362
|
+
export function addKnownPackages(projectRoot, packages) {
|
|
363
|
+
const patternsPath = path.join(projectRoot, '.corpus', 'patterns.json');
|
|
364
|
+
let existing;
|
|
365
|
+
if (existsSync(patternsPath)) {
|
|
366
|
+
existing = JSON.parse(readFileSync(patternsPath, 'utf-8'));
|
|
367
|
+
if (!existing.knownPackages)
|
|
368
|
+
existing.knownPackages = [];
|
|
369
|
+
}
|
|
370
|
+
else {
|
|
371
|
+
existing = {
|
|
372
|
+
version: 1,
|
|
373
|
+
learnedFrom: 0,
|
|
374
|
+
totalFindings: 0,
|
|
375
|
+
patterns: [],
|
|
376
|
+
lastUpdated: new Date().toISOString(),
|
|
377
|
+
knownPackages: [],
|
|
378
|
+
repoCategories: {},
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
const packageSet = new Set(existing.knownPackages);
|
|
382
|
+
for (const pkg of packages) {
|
|
383
|
+
packageSet.add(pkg);
|
|
384
|
+
}
|
|
385
|
+
existing.knownPackages = Array.from(packageSet);
|
|
386
|
+
existing.lastUpdated = new Date().toISOString();
|
|
387
|
+
const corpusDir = path.join(projectRoot, '.corpus');
|
|
388
|
+
if (!existsSync(corpusDir))
|
|
389
|
+
mkdirSync(corpusDir, { recursive: true });
|
|
390
|
+
writeFileSync(patternsPath, JSON.stringify(existing, null, 2));
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Categorize a repo for context-weighted pattern analysis.
|
|
394
|
+
*/
|
|
395
|
+
export function categorizeRepo(projectRoot, repo, category) {
|
|
396
|
+
const patternsPath = path.join(projectRoot, '.corpus', 'patterns.json');
|
|
397
|
+
let existing;
|
|
398
|
+
if (existsSync(patternsPath)) {
|
|
399
|
+
existing = JSON.parse(readFileSync(patternsPath, 'utf-8'));
|
|
400
|
+
if (!existing.repoCategories)
|
|
401
|
+
existing.repoCategories = {};
|
|
402
|
+
}
|
|
403
|
+
else {
|
|
404
|
+
existing = {
|
|
405
|
+
version: 1,
|
|
406
|
+
learnedFrom: 0,
|
|
407
|
+
totalFindings: 0,
|
|
408
|
+
patterns: [],
|
|
409
|
+
lastUpdated: new Date().toISOString(),
|
|
410
|
+
knownPackages: [],
|
|
411
|
+
repoCategories: {},
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
existing.repoCategories[repo] = category;
|
|
415
|
+
existing.lastUpdated = new Date().toISOString();
|
|
416
|
+
const corpusDir = path.join(projectRoot, '.corpus');
|
|
417
|
+
if (!existsSync(corpusDir))
|
|
418
|
+
mkdirSync(corpusDir, { recursive: true });
|
|
419
|
+
writeFileSync(patternsPath, JSON.stringify(existing, null, 2));
|
|
420
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export type SafetySeverity = 'CRITICAL' | 'WARNING' | 'INFO';
|
|
2
|
+
export interface SafetyFinding {
|
|
3
|
+
severity: SafetySeverity;
|
|
4
|
+
rule: string;
|
|
5
|
+
line: number;
|
|
6
|
+
file: string;
|
|
7
|
+
message: string;
|
|
8
|
+
suggestion: string;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Scans file content for unsafe code patterns.
|
|
12
|
+
*/
|
|
13
|
+
export declare function checkCodeSafety(content: string, filepath: string): SafetyFinding[];
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
const SAFETY_RULES = [
|
|
2
|
+
// Security
|
|
3
|
+
{
|
|
4
|
+
name: 'eval_usage',
|
|
5
|
+
regex: /\beval\s*\(/g,
|
|
6
|
+
severity: 'CRITICAL',
|
|
7
|
+
message: 'eval() usage detected. This enables arbitrary code execution.',
|
|
8
|
+
suggestion: 'Use JSON.parse() for data, or Function() constructor if dynamic code is unavoidable.',
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
name: 'innerHTML_assignment',
|
|
12
|
+
regex: /\.innerHTML\s*=/g,
|
|
13
|
+
severity: 'WARNING',
|
|
14
|
+
message: 'Direct innerHTML assignment. Risk of XSS if content is user-controlled.',
|
|
15
|
+
suggestion: 'Use textContent for text, or sanitize HTML with DOMPurify before insertion.',
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
name: 'sql_concatenation',
|
|
19
|
+
regex: /(?:SELECT|INSERT|UPDATE|DELETE|DROP)\s+.*\+\s*(?:req\.|params\.|query\.|body\.)/gi,
|
|
20
|
+
severity: 'CRITICAL',
|
|
21
|
+
message: 'SQL query built with string concatenation. SQL injection risk.',
|
|
22
|
+
suggestion: 'Use parameterized queries or an ORM.',
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
name: 'exec_usage',
|
|
26
|
+
regex: /(?:child_process\.)?exec\s*\(\s*(?:`[^`]*\$\{|['"][^'"]*\+)/g,
|
|
27
|
+
severity: 'CRITICAL',
|
|
28
|
+
message: 'Shell command built with dynamic input. Command injection risk.',
|
|
29
|
+
suggestion: 'Use execFile() with an argument array instead of exec() with string interpolation.',
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
name: 'disabled_auth',
|
|
33
|
+
regex: /(?:auth|authentication|authorize)\s*[=:]\s*(?:false|null|undefined|'none'|"none")/gi,
|
|
34
|
+
severity: 'WARNING',
|
|
35
|
+
message: 'Authentication appears to be disabled.',
|
|
36
|
+
suggestion: 'Ensure this is intentional and only in development/test environments.',
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: 'hardcoded_ip',
|
|
40
|
+
regex: /['"](?:0\.0\.0\.0|127\.0\.0\.1)(?::\d+)?['"]/g,
|
|
41
|
+
severity: 'INFO',
|
|
42
|
+
message: 'Hardcoded IP address. May bind to all interfaces (0.0.0.0) in production.',
|
|
43
|
+
suggestion: 'Use environment variable for host binding. 0.0.0.0 exposes the service to all network interfaces.',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: 'chmod_777',
|
|
47
|
+
regex: /chmod\s+777/g,
|
|
48
|
+
severity: 'WARNING',
|
|
49
|
+
message: 'chmod 777 gives all users read/write/execute permissions.',
|
|
50
|
+
suggestion: 'Use the minimum required permissions (e.g., 755 for executables, 644 for files).',
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
name: 'console_log_sensitive',
|
|
54
|
+
regex: /console\.log\s*\([^)]*(?:password|secret|token|key|credential|auth)[^)]*\)/gi,
|
|
55
|
+
severity: 'WARNING',
|
|
56
|
+
message: 'console.log may be logging sensitive data.',
|
|
57
|
+
suggestion: 'Remove or redact sensitive values before logging.',
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
name: 'todo_security',
|
|
61
|
+
regex: /\/\/\s*TODO:?\s*(?:fix|add|implement)\s*(?:auth|security|validation|sanitiz)/gi,
|
|
62
|
+
severity: 'INFO',
|
|
63
|
+
message: 'Security-related TODO found. This may indicate incomplete security implementation.',
|
|
64
|
+
suggestion: 'Address security TODOs before shipping to production.',
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: 'no_verify_flag',
|
|
68
|
+
regex: /--no-verify|--no-check|--insecure|--skip-ssl/g,
|
|
69
|
+
severity: 'WARNING',
|
|
70
|
+
message: 'Security verification bypass flag detected.',
|
|
71
|
+
suggestion: 'Remove bypass flags before shipping to production.',
|
|
72
|
+
fileFilter: /\.(sh|bash|yml|yaml|json|toml)$/,
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: 'debug_endpoint',
|
|
76
|
+
regex: /(?:app|router|server)\.\s*(?:get|post|use)\s*\(\s*['"]\/(?:debug|test|admin|internal)/g,
|
|
77
|
+
severity: 'WARNING',
|
|
78
|
+
message: 'Debug/admin endpoint detected. May be accessible in production.',
|
|
79
|
+
suggestion: 'Gate behind authentication or remove before deployment.',
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
name: 'wildcard_permissions',
|
|
83
|
+
regex: /"(?:Action|Resource)"\s*:\s*"\*"/g,
|
|
84
|
+
severity: 'CRITICAL',
|
|
85
|
+
message: 'Wildcard IAM permission detected. Overly permissive.',
|
|
86
|
+
suggestion: 'Use least-privilege permissions. Specify exact actions and resources.',
|
|
87
|
+
fileFilter: /\.(json|yaml|yml|tf)$/,
|
|
88
|
+
},
|
|
89
|
+
];
|
|
90
|
+
/**
|
|
91
|
+
* Scans file content for unsafe code patterns.
|
|
92
|
+
*/
|
|
93
|
+
export function checkCodeSafety(content, filepath) {
|
|
94
|
+
const findings = [];
|
|
95
|
+
for (const rule of SAFETY_RULES) {
|
|
96
|
+
if (rule.fileFilter && !rule.fileFilter.test(filepath))
|
|
97
|
+
continue;
|
|
98
|
+
rule.regex.lastIndex = 0;
|
|
99
|
+
let match;
|
|
100
|
+
while ((match = rule.regex.exec(content)) !== null) {
|
|
101
|
+
const beforeMatch = content.slice(0, match.index);
|
|
102
|
+
const line = beforeMatch.split('\n').length;
|
|
103
|
+
findings.push({
|
|
104
|
+
severity: rule.severity,
|
|
105
|
+
rule: rule.name,
|
|
106
|
+
line,
|
|
107
|
+
file: filepath,
|
|
108
|
+
message: rule.message,
|
|
109
|
+
suggestion: rule.suggestion,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return findings;
|
|
114
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export interface CalibrationEntry {
|
|
2
|
+
intent: string;
|
|
3
|
+
confidence: number;
|
|
4
|
+
verdict: string;
|
|
5
|
+
userDecision: 'CONFIRMED' | 'CANCELLED' | null;
|
|
6
|
+
}
|
|
7
|
+
export interface CalibrationResult {
|
|
8
|
+
intent: string;
|
|
9
|
+
totalActions: number;
|
|
10
|
+
meanConfidence: number;
|
|
11
|
+
overconfidentCount: number;
|
|
12
|
+
underconfidentCount: number;
|
|
13
|
+
recommendedThreshold: number;
|
|
14
|
+
isMiscalibrated: boolean;
|
|
15
|
+
}
|
|
16
|
+
export interface CalibrationReport {
|
|
17
|
+
results: CalibrationResult[];
|
|
18
|
+
miscalibratedIntents: string[];
|
|
19
|
+
summary: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Analyzes action logs for per-intent confidence calibration.
|
|
23
|
+
* Requires at least `minSampleSize` entries per intent to produce results.
|
|
24
|
+
*/
|
|
25
|
+
export declare function auditCalibration(actionLog: CalibrationEntry[], minSampleSize?: number): CalibrationReport;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Analyzes action logs for per-intent confidence calibration.
|
|
3
|
+
* Requires at least `minSampleSize` entries per intent to produce results.
|
|
4
|
+
*/
|
|
5
|
+
export function auditCalibration(actionLog, minSampleSize = 20) {
|
|
6
|
+
const byIntent = new Map();
|
|
7
|
+
for (const entry of actionLog) {
|
|
8
|
+
const intent = entry.intent || 'unknown';
|
|
9
|
+
if (!byIntent.has(intent))
|
|
10
|
+
byIntent.set(intent, []);
|
|
11
|
+
byIntent.get(intent).push(entry);
|
|
12
|
+
}
|
|
13
|
+
const results = [];
|
|
14
|
+
const miscalibrated = [];
|
|
15
|
+
for (const [intent, entries] of byIntent) {
|
|
16
|
+
if (entries.length < minSampleSize)
|
|
17
|
+
continue;
|
|
18
|
+
const total = entries.length;
|
|
19
|
+
const meanConf = entries.reduce((s, e) => s + e.confidence, 0) / total;
|
|
20
|
+
const overconfident = entries.filter((e) => e.confidence > 0.72 && e.userDecision === 'CANCELLED');
|
|
21
|
+
const underconfident = entries.filter((e) => e.confidence < 0.72 && e.userDecision === 'CONFIRMED');
|
|
22
|
+
const ocRate = overconfident.length / total;
|
|
23
|
+
const ucRate = underconfident.length / total;
|
|
24
|
+
const isMiscalibrated = ocRate > 0.15 || ucRate > 0.15;
|
|
25
|
+
const cancelled = entries
|
|
26
|
+
.filter((e) => e.userDecision === 'CANCELLED')
|
|
27
|
+
.map((e) => e.confidence);
|
|
28
|
+
const confirmed = entries
|
|
29
|
+
.filter((e) => e.userDecision === 'CONFIRMED')
|
|
30
|
+
.map((e) => e.confidence);
|
|
31
|
+
let recommended = 0.72;
|
|
32
|
+
if (cancelled.length > 0) {
|
|
33
|
+
recommended = Math.max(...cancelled) + 0.05;
|
|
34
|
+
}
|
|
35
|
+
else if (confirmed.length > 0) {
|
|
36
|
+
recommended = Math.min(...confirmed) - 0.05;
|
|
37
|
+
}
|
|
38
|
+
recommended = Math.max(0.3, Math.min(0.95, recommended));
|
|
39
|
+
if (isMiscalibrated)
|
|
40
|
+
miscalibrated.push(intent);
|
|
41
|
+
results.push({
|
|
42
|
+
intent,
|
|
43
|
+
totalActions: total,
|
|
44
|
+
meanConfidence: Math.round(meanConf * 1000) / 1000,
|
|
45
|
+
overconfidentCount: overconfident.length,
|
|
46
|
+
underconfidentCount: underconfident.length,
|
|
47
|
+
recommendedThreshold: Math.round(recommended * 100) / 100,
|
|
48
|
+
isMiscalibrated,
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
results,
|
|
53
|
+
miscalibratedIntents: miscalibrated,
|
|
54
|
+
summary: miscalibrated.length > 0
|
|
55
|
+
? `${miscalibrated.length} intent(s) miscalibrated: ${miscalibrated.join(', ')}`
|
|
56
|
+
: 'All intents well-calibrated',
|
|
57
|
+
};
|
|
58
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export type PoisoningSeverity = 'CLEAN' | 'SUSPICIOUS' | 'POISONED';
|
|
2
|
+
export interface PoisoningScanResult {
|
|
3
|
+
severity: PoisoningSeverity;
|
|
4
|
+
pattern: string | null;
|
|
5
|
+
chunkId: string;
|
|
6
|
+
message: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Scans a memory chunk for poisoning signatures.
|
|
10
|
+
*/
|
|
11
|
+
export declare function scanMemoryChunk(content: string, chunkId?: string, retrievalQuery?: string): PoisoningScanResult;
|
|
12
|
+
/**
|
|
13
|
+
* Batch scan multiple memory chunks. Returns only problematic ones.
|
|
14
|
+
*/
|
|
15
|
+
export declare function scanMemoryChunks(chunks: {
|
|
16
|
+
content: string;
|
|
17
|
+
id?: string;
|
|
18
|
+
}[], retrievalQuery?: string): PoisoningScanResult[];
|