muaddib-scanner 2.7.6 → 2.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +3 -2
- package/src/intent-graph.js +203 -3
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -568,8 +568,9 @@ async function run(targetPath, options = {}) {
|
|
|
568
568
|
// A malware package typically has 1-3 occurrences, not dozens.
|
|
569
569
|
applyFPReductions(deduped, reachableFiles, packageName);
|
|
570
570
|
|
|
571
|
-
// Intent coherence analysis: detect source→sink pairs
|
|
572
|
-
|
|
571
|
+
// Intent coherence analysis: detect source→sink pairs within files
|
|
572
|
+
// Pass targetPath for destination-aware SDK pattern detection
|
|
573
|
+
const intentResult = buildIntentPairs(deduped, targetPath);
|
|
573
574
|
// Add intent threats to deduped before enrichment so they get rules/playbooks
|
|
574
575
|
if (intentResult.intentThreats) {
|
|
575
576
|
for (const it of intentResult.intentThreats) {
|
package/src/intent-graph.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
3
6
|
// ============================================
|
|
4
7
|
// INTENT GRAPH — Intra-File Coherence Analysis
|
|
5
8
|
// ============================================
|
|
@@ -14,6 +17,7 @@
|
|
|
14
17
|
// 3. Sources = ONLY high-confidence credential access (NOT env_access, NOT suspicious_dataflow)
|
|
15
18
|
// 4. Sinks = ONLY threats already identified by scanners (NO content-based scanning)
|
|
16
19
|
// 5. No double-counting — suspicious_dataflow is already a compound detection
|
|
20
|
+
// 6. Destination-aware: SDK patterns (env key matches API domain) are NOT exfiltration
|
|
17
21
|
|
|
18
22
|
// ============================================
|
|
19
23
|
// SOURCE CLASSIFICATION
|
|
@@ -32,6 +36,165 @@ const SOURCE_TYPES = {
|
|
|
32
36
|
// Sensitive env var patterns — env_access referencing these is credential theft, not config
|
|
33
37
|
const SENSITIVE_ENV_PATTERNS = /TOKEN|KEY|SECRET|PASSWORD|CREDENTIAL|API_KEY|AUTH/i;
|
|
34
38
|
|
|
39
|
+
// ============================================
|
|
40
|
+
// DESTINATION-AWARE SDK DETECTION
|
|
41
|
+
// ============================================
|
|
42
|
+
// Curated allowlist: when an env var matching the pattern is sent to a matching domain,
|
|
43
|
+
// it is legitimate SDK usage, not credential exfiltration.
|
|
44
|
+
// Safe-by-default: unknown env vars or unknown domains remain CRITICAL.
|
|
45
|
+
const SDK_ENV_DOMAIN_MAP = [
|
|
46
|
+
{ envPattern: /^AWS_/i, domains: ['amazonaws.com', 'aws.amazon.com'] },
|
|
47
|
+
{ envPattern: /^AZURE_/i, domains: ['azure.com', 'microsoft.com'] },
|
|
48
|
+
{ envPattern: /^GOOGLE_|^GCP_/i, domains: ['googleapis.com', 'google.com'] },
|
|
49
|
+
{ envPattern: /^FIREBASE_/i, domains: ['firebase.com', 'googleapis.com'] },
|
|
50
|
+
{ envPattern: /^SALESFORCE_/i, domains: ['salesforce.com', 'force.com'] },
|
|
51
|
+
{ envPattern: /^SUPABASE_/i, domains: ['supabase.co', 'supabase.com'] },
|
|
52
|
+
{ envPattern: /^MAILGUN_/i, domains: ['mailgun.net', 'mailgun.com'] },
|
|
53
|
+
{ envPattern: /^STRIPE_/i, domains: ['stripe.com'] },
|
|
54
|
+
{ envPattern: /^TWILIO_/i, domains: ['twilio.com'] },
|
|
55
|
+
{ envPattern: /^SENDGRID_/i, domains: ['sendgrid.com', 'sendgrid.net'] },
|
|
56
|
+
{ envPattern: /^DATADOG_/i, domains: ['datadoghq.com'] },
|
|
57
|
+
{ envPattern: /^SENTRY_/i, domains: ['sentry.io'] },
|
|
58
|
+
{ envPattern: /^SLACK_/i, domains: ['slack.com'] },
|
|
59
|
+
{ envPattern: /^GITHUB_/i, domains: ['github.com', 'githubusercontent.com'] },
|
|
60
|
+
{ envPattern: /^GITLAB_/i, domains: ['gitlab.com'] },
|
|
61
|
+
{ envPattern: /^CLOUDFLARE_/i, domains: ['cloudflare.com'] },
|
|
62
|
+
{ envPattern: /^OPENAI_/i, domains: ['openai.com'] },
|
|
63
|
+
{ envPattern: /^ANTHROPIC_/i, domains: ['anthropic.com'] },
|
|
64
|
+
{ envPattern: /^MONGODB_|^MONGO_/i, domains: ['mongodb.com', 'mongodb.net'] },
|
|
65
|
+
{ envPattern: /^AUTH0_/i, domains: ['auth0.com'] },
|
|
66
|
+
{ envPattern: /^HUBSPOT_/i, domains: ['hubspot.com', 'hubapi.com'] },
|
|
67
|
+
{ envPattern: /^CONTENTFUL_/i, domains: ['contentful.com'] },
|
|
68
|
+
];
|
|
69
|
+
|
|
70
|
+
// Tokens stripped when extracting brand keyword from env var name
|
|
71
|
+
const ENV_NOISE_TOKENS = new Set([
|
|
72
|
+
'API', 'KEY', 'SECRET', 'TOKEN', 'PASSWORD', 'CREDENTIAL',
|
|
73
|
+
'AUTH', 'ACCESS', 'PRIVATE', 'PUBLIC', 'CLIENT', 'ID', 'URL'
|
|
74
|
+
]);
|
|
75
|
+
|
|
76
|
+
// Suspicious tunneling/proxy domains — never considered legitimate SDK destinations
|
|
77
|
+
const SUSPICIOUS_DOMAIN_PATTERNS = /ngrok|serveo|localtunnel|burpcollaborator|requestbin|pipedream|webhook\.site/i;
|
|
78
|
+
|
|
79
|
+
// URL extraction regex (matches http/https URLs in source code)
|
|
80
|
+
const URL_EXTRACT_RE = /https?:\/\/[a-zA-Z0-9\-._~:/?#[\]@!$&'()*+,;=%]+/g;
|
|
81
|
+
|
|
82
|
+
// Hostname extraction from Node.js request options: hostname: 'domain.com' or host: 'domain.com'
|
|
83
|
+
const HOSTNAME_OPTION_RE = /(?:hostname|host)\s*:\s*['"`]([a-zA-Z0-9\-._]+)['"`]/g;
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Extract env var name from an intent source threat message.
|
|
87
|
+
* Messages look like: "process.env.SALESFORCE_API_KEY", "env var MAILGUN_API_KEY accessed"
|
|
88
|
+
*/
|
|
89
|
+
function extractEnvVarFromMessage(sourceThreats) {
|
|
90
|
+
for (const t of sourceThreats) {
|
|
91
|
+
if (!t.message) continue;
|
|
92
|
+
// Match process.env.VAR_NAME pattern
|
|
93
|
+
const envMatch = t.message.match(/process\.env\.([A-Z_][A-Z0-9_]*)/i);
|
|
94
|
+
if (envMatch) return envMatch[1];
|
|
95
|
+
// Match standalone VAR_NAME patterns (e.g., "SALESFORCE_API_KEY")
|
|
96
|
+
const varMatch = t.message.match(/\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b/);
|
|
97
|
+
if (varMatch) return varMatch[1];
|
|
98
|
+
}
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Extract brand keyword from env var name by removing noise tokens.
|
|
104
|
+
* MAILGUN_API_KEY → MAILGUN, SALESFORCE_CLIENT_SECRET → SALESFORCE
|
|
105
|
+
*/
|
|
106
|
+
function extractBrandFromEnvVar(envVarName) {
|
|
107
|
+
const parts = envVarName.toUpperCase().split('_');
|
|
108
|
+
const brandParts = parts.filter(p => !ENV_NOISE_TOKENS.has(p) && p.length > 0);
|
|
109
|
+
return brandParts.length > 0 ? brandParts[0] : null;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Extract domain from a URL string.
|
|
114
|
+
* Returns the hostname (without port).
|
|
115
|
+
*/
|
|
116
|
+
function extractDomain(url) {
|
|
117
|
+
try {
|
|
118
|
+
const match = url.match(/^https?:\/\/([^/:?#]+)/i);
|
|
119
|
+
return match ? match[1].toLowerCase() : null;
|
|
120
|
+
} catch {
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Check if a domain matches any of the expected SDK domains (suffix match).
|
|
127
|
+
* api.mailgun.net matches mailgun.net, sub.api.stripe.com matches stripe.com
|
|
128
|
+
*/
|
|
129
|
+
function domainMatchesSuffix(domain, expectedDomains) {
|
|
130
|
+
for (const expected of expectedDomains) {
|
|
131
|
+
if (domain === expected || domain.endsWith('.' + expected)) return true;
|
|
132
|
+
}
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Check if an env var + file content represents a legitimate SDK pattern.
|
|
138
|
+
*
|
|
139
|
+
* Returns true ONLY if:
|
|
140
|
+
* 1. The env var matches a known SDK mapping (allowlist) OR heuristic brand match
|
|
141
|
+
* 2. ALL URLs in the file point to domains matching the expected SDK
|
|
142
|
+
* 3. No suspicious tunneling/proxy domains are present
|
|
143
|
+
*
|
|
144
|
+
* @param {string} envVarName - e.g., "SALESFORCE_API_KEY"
|
|
145
|
+
* @param {string} fileContent - source code of the file
|
|
146
|
+
* @returns {boolean} true if SDK pattern (should skip intent pair)
|
|
147
|
+
*/
|
|
148
|
+
function isSDKPattern(envVarName, fileContent) {
|
|
149
|
+
// Extract domains from full URLs (https://api.stripe.com/v1/charges)
|
|
150
|
+
const urls = fileContent.match(URL_EXTRACT_RE) || [];
|
|
151
|
+
const domains = urls.map(u => extractDomain(u)).filter(Boolean);
|
|
152
|
+
|
|
153
|
+
// Also extract hostnames from Node.js request options (hostname: 'api.stripe.com')
|
|
154
|
+
let hostnameMatch;
|
|
155
|
+
const hostnameRe = new RegExp(HOSTNAME_OPTION_RE.source, 'g');
|
|
156
|
+
while ((hostnameMatch = hostnameRe.exec(fileContent)) !== null) {
|
|
157
|
+
const hostname = hostnameMatch[1].toLowerCase();
|
|
158
|
+
if (hostname && !domains.includes(hostname)) {
|
|
159
|
+
domains.push(hostname);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// No URLs found — can't confirm SDK pattern, default to suspicious
|
|
164
|
+
if (domains.length === 0) return false;
|
|
165
|
+
|
|
166
|
+
// Check for suspicious tunneling domains — immediate fail
|
|
167
|
+
for (const domain of domains) {
|
|
168
|
+
if (SUSPICIOUS_DOMAIN_PATTERNS.test(domain)) return false;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Check for raw IP addresses — immediate fail
|
|
172
|
+
for (const domain of domains) {
|
|
173
|
+
if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(domain)) return false;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// 1. Try curated allowlist first
|
|
177
|
+
for (const mapping of SDK_ENV_DOMAIN_MAP) {
|
|
178
|
+
if (mapping.envPattern.test(envVarName)) {
|
|
179
|
+
// All domains must match expected SDK domains
|
|
180
|
+
return domains.every(d => domainMatchesSuffix(d, mapping.domains));
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// 2. Heuristic fallback: extract brand keyword and check domain labels
|
|
185
|
+
const brand = extractBrandFromEnvVar(envVarName);
|
|
186
|
+
if (!brand || brand.length < 3) return false; // Too short for reliable matching
|
|
187
|
+
|
|
188
|
+
const brandLower = brand.toLowerCase();
|
|
189
|
+
// Check if every domain has the brand as a whole label
|
|
190
|
+
// e.g., brand "ACME" matches "api.acme.com" (label "acme") but not "api.acmetech.com"
|
|
191
|
+
return domains.every(d => {
|
|
192
|
+
const labels = d.split('.');
|
|
193
|
+
return labels.some(label => label === brandLower);
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
35
198
|
// ============================================
|
|
36
199
|
// SINK CLASSIFICATION (from existing threats only)
|
|
37
200
|
// ============================================
|
|
@@ -149,9 +312,10 @@ function classifySink(threat) {
|
|
|
149
312
|
* Cross-file detection is handled by module-graph.js (cross_file_dataflow).
|
|
150
313
|
*
|
|
151
314
|
* @param {Array} threats - deduplicated threat array
|
|
315
|
+
* @param {string} [targetPath] - root path for reading source files (SDK pattern detection)
|
|
152
316
|
* @returns {Object} { pairs, intentScore, intentThreats }
|
|
153
317
|
*/
|
|
154
|
-
function buildIntentPairs(threats) {
|
|
318
|
+
function buildIntentPairs(threats, targetPath) {
|
|
155
319
|
// Only consider MEDIUM+ threats. LOW severity means applyFPReductions already
|
|
156
320
|
// determined this is noise (bundler artifact, dist/ file, count threshold exceeded).
|
|
157
321
|
// Re-elevating LOW threats via intent pairing would undo FP reductions.
|
|
@@ -169,15 +333,23 @@ function buildIntentPairs(threats) {
|
|
|
169
333
|
const pairs = [];
|
|
170
334
|
let intentScore = 0;
|
|
171
335
|
|
|
336
|
+
// Cache file contents for SDK pattern checks (lazy, per file)
|
|
337
|
+
const fileContentCache = new Map();
|
|
338
|
+
|
|
172
339
|
// Only pair sources and sinks within the SAME file
|
|
173
340
|
for (const [file, fileThreats] of byFile) {
|
|
174
341
|
const sources = [];
|
|
175
342
|
const sinks = [];
|
|
343
|
+
// Track which threats are credential sources (for env var extraction)
|
|
344
|
+
const sourceThreats = [];
|
|
176
345
|
|
|
177
346
|
for (const t of fileThreats) {
|
|
178
347
|
const srcType = classifySource(t);
|
|
179
348
|
const sinkType = classifySink(t);
|
|
180
|
-
if (srcType)
|
|
349
|
+
if (srcType) {
|
|
350
|
+
sources.push(srcType);
|
|
351
|
+
sourceThreats.push(t);
|
|
352
|
+
}
|
|
181
353
|
if (sinkType) sinks.push(sinkType);
|
|
182
354
|
}
|
|
183
355
|
|
|
@@ -194,6 +366,30 @@ function buildIntentPairs(threats) {
|
|
|
194
366
|
|
|
195
367
|
const pairKey = `${srcType}:${sinkType}:${file}`;
|
|
196
368
|
if (pairSet.has(pairKey)) continue;
|
|
369
|
+
|
|
370
|
+
// Destination-aware SDK check: credential_read → network_external
|
|
371
|
+
// If the env var matches the API domain, this is legitimate SDK usage
|
|
372
|
+
if (srcType === 'credential_read' && sinkType === 'network_external' && targetPath) {
|
|
373
|
+
const envVarName = extractEnvVarFromMessage(sourceThreats);
|
|
374
|
+
if (envVarName) {
|
|
375
|
+
try {
|
|
376
|
+
let content = fileContentCache.get(file);
|
|
377
|
+
if (content === undefined) {
|
|
378
|
+
const filePath = path.join(targetPath, file);
|
|
379
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
380
|
+
fileContentCache.set(file, content);
|
|
381
|
+
}
|
|
382
|
+
if (isSDKPattern(envVarName, content)) {
|
|
383
|
+
// SDK pattern confirmed — skip this pair
|
|
384
|
+
pairSet.add(pairKey); // Mark as seen to avoid re-checking
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
} catch {
|
|
388
|
+
// File read error — default to suspicious (CRITICAL)
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
197
393
|
pairSet.add(pairKey);
|
|
198
394
|
|
|
199
395
|
pairs.push({
|
|
@@ -235,5 +431,9 @@ module.exports = {
|
|
|
235
431
|
classifySource,
|
|
236
432
|
classifySink,
|
|
237
433
|
buildIntentPairs,
|
|
238
|
-
COHERENCE_MATRIX
|
|
434
|
+
COHERENCE_MATRIX,
|
|
435
|
+
isSDKPattern,
|
|
436
|
+
extractEnvVarFromMessage,
|
|
437
|
+
extractBrandFromEnvVar,
|
|
438
|
+
SDK_ENV_DOMAIN_MAP
|
|
239
439
|
};
|