muaddib-scanner 2.11.114 → 2.11.116
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.114.json → self-scan-v2.11.116.json} +1 -1
- package/src/intent-graph.js +34 -192
- package/src/pipeline/executor.js +5 -1
- package/src/pipeline/processor.js +15 -7
- package/src/scanner/ast-detectors/handle-post-walk.js +9 -2
- package/src/scanner/ast.js +11 -4
- package/src/scanner/dataflow.js +5 -53
- package/src/scanner/env-var-classification.js +75 -0
- package/src/scanner/module-graph/annotate-sinks.js +8 -5
- package/src/scanner/module-graph/annotate-tainted.js +18 -0
- package/src/scanner/module-graph/constants.js +15 -2
- package/src/scanner/module-graph/detect-cross-file.js +56 -4
- package/src/scanner/module-graph/index.js +2 -2
- package/src/scanner/module-graph/parse-utils.js +13 -1
- package/src/sdk-destination.js +328 -0
package/package.json
CHANGED
package/src/intent-graph.js
CHANGED
|
@@ -36,180 +36,17 @@ const SOURCE_TYPES = {
|
|
|
36
36
|
// Sensitive env var patterns — env_access referencing these is credential theft, not config
|
|
37
37
|
const SENSITIVE_ENV_PATTERNS = /TOKEN|KEY|SECRET|PASSWORD|CREDENTIAL|API_KEY|AUTH/i;
|
|
38
38
|
|
|
39
|
-
//
|
|
40
|
-
//
|
|
41
|
-
//
|
|
42
|
-
//
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
{ envPattern: /^SALESFORCE_/i, domains: ['salesforce.com', 'force.com'] },
|
|
51
|
-
{ envPattern: /^SUPABASE_/i, domains: ['supabase.co', 'supabase.com'] },
|
|
52
|
-
{ envPattern: /^MAILGUN_/i, domains: ['mailgun.net', 'mailgun.com'] },
|
|
53
|
-
{ envPattern: /^STRIPE_/i, domains: ['stripe.com'] },
|
|
54
|
-
{ envPattern: /^TWILIO_/i, domains: ['twilio.com'] },
|
|
55
|
-
{ envPattern: /^SENDGRID_/i, domains: ['sendgrid.com', 'sendgrid.net'] },
|
|
56
|
-
{ envPattern: /^DATADOG_/i, domains: ['datadoghq.com'] },
|
|
57
|
-
{ envPattern: /^SENTRY_/i, domains: ['sentry.io'] },
|
|
58
|
-
{ envPattern: /^SLACK_/i, domains: ['slack.com'] },
|
|
59
|
-
{ envPattern: /^GITHUB_/i, domains: ['github.com', 'githubusercontent.com'] },
|
|
60
|
-
{ envPattern: /^GITLAB_/i, domains: ['gitlab.com'] },
|
|
61
|
-
{ envPattern: /^CLOUDFLARE_/i, domains: ['cloudflare.com'] },
|
|
62
|
-
{ envPattern: /^OPENAI_/i, domains: ['openai.com'] },
|
|
63
|
-
{ envPattern: /^ANTHROPIC_/i, domains: ['anthropic.com'] },
|
|
64
|
-
{ envPattern: /^MONGODB_|^MONGO_/i, domains: ['mongodb.com', 'mongodb.net'] },
|
|
65
|
-
{ envPattern: /^AUTH0_/i, domains: ['auth0.com'] },
|
|
66
|
-
{ envPattern: /^HUBSPOT_/i, domains: ['hubspot.com', 'hubapi.com'] },
|
|
67
|
-
{ envPattern: /^CONTENTFUL_/i, domains: ['contentful.com'] },
|
|
68
|
-
];
|
|
69
|
-
|
|
70
|
-
// Tokens stripped when extracting brand keyword from env var name
|
|
71
|
-
const ENV_NOISE_TOKENS = new Set([
|
|
72
|
-
'API', 'KEY', 'SECRET', 'TOKEN', 'PASSWORD', 'CREDENTIAL',
|
|
73
|
-
'AUTH', 'ACCESS', 'PRIVATE', 'PUBLIC', 'CLIENT', 'ID', 'URL'
|
|
74
|
-
]);
|
|
75
|
-
|
|
76
|
-
// Suspicious tunneling/proxy domains — never considered legitimate SDK destinations
|
|
77
|
-
const SUSPICIOUS_DOMAIN_PATTERNS = /ngrok|serveo|localtunnel|burpcollaborator|requestbin|pipedream|webhook\.site/i;
|
|
78
|
-
|
|
79
|
-
// URL extraction regex (matches http/https URLs in source code)
|
|
80
|
-
const URL_EXTRACT_RE = /https?:\/\/[a-zA-Z0-9\-._~:/?#[\]@!$&'()*+,;=%]+/g;
|
|
81
|
-
|
|
82
|
-
// Hostname extraction from Node.js request options: hostname: 'domain.com' or host: 'domain.com'
|
|
83
|
-
const HOSTNAME_OPTION_RE = /(?:hostname|host)\s*:\s*['"`]([a-zA-Z0-9\-._]+)['"`]/g;
|
|
84
|
-
|
|
85
|
-
/**
|
|
86
|
-
* Extract env var name from an intent source threat message.
|
|
87
|
-
* Messages look like: "process.env.SALESFORCE_API_KEY", "env var MAILGUN_API_KEY accessed"
|
|
88
|
-
*/
|
|
89
|
-
function extractEnvVarFromMessage(sourceThreats) {
|
|
90
|
-
for (const t of sourceThreats) {
|
|
91
|
-
if (!t.message) continue;
|
|
92
|
-
// Match process.env.VAR_NAME pattern
|
|
93
|
-
const envMatch = t.message.match(/process\.env\.([A-Z_][A-Z0-9_]*)/i);
|
|
94
|
-
if (envMatch) return envMatch[1];
|
|
95
|
-
// Match standalone VAR_NAME patterns (e.g., "SALESFORCE_API_KEY")
|
|
96
|
-
const varMatch = t.message.match(/\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b/);
|
|
97
|
-
if (varMatch) return varMatch[1];
|
|
98
|
-
}
|
|
99
|
-
return null;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Extract brand keyword from env var name by removing noise tokens.
|
|
104
|
-
* MAILGUN_API_KEY → MAILGUN, SALESFORCE_CLIENT_SECRET → SALESFORCE
|
|
105
|
-
*/
|
|
106
|
-
function extractBrandFromEnvVar(envVarName) {
|
|
107
|
-
const parts = envVarName.toUpperCase().split('_');
|
|
108
|
-
const brandParts = parts.filter(p => !ENV_NOISE_TOKENS.has(p) && p.length > 0);
|
|
109
|
-
return brandParts.length > 0 ? brandParts[0] : null;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/**
|
|
113
|
-
* Extract domain from a URL string.
|
|
114
|
-
* Returns the hostname (without port).
|
|
115
|
-
*/
|
|
116
|
-
function extractDomain(url) {
|
|
117
|
-
try {
|
|
118
|
-
const match = url.match(/^https?:\/\/([^/:?#]+)/i);
|
|
119
|
-
return match ? match[1].toLowerCase() : null;
|
|
120
|
-
} catch {
|
|
121
|
-
return null;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Check if a domain matches any of the expected SDK domains (suffix match).
|
|
127
|
-
* api.mailgun.net matches mailgun.net, sub.api.stripe.com matches stripe.com
|
|
128
|
-
*/
|
|
129
|
-
function domainMatchesSuffix(domain, expectedDomains) {
|
|
130
|
-
for (const expected of expectedDomains) {
|
|
131
|
-
if (domain === expected || domain.endsWith('.' + expected)) return true;
|
|
132
|
-
}
|
|
133
|
-
return false;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
/**
|
|
137
|
-
* Check if an env var + file content represents a legitimate SDK pattern.
|
|
138
|
-
*
|
|
139
|
-
* Returns true ONLY if:
|
|
140
|
-
* 1. The env var matches a known SDK mapping (allowlist) OR heuristic brand match
|
|
141
|
-
* 2. ALL URLs in the file point to domains matching the expected SDK
|
|
142
|
-
* 3. No suspicious tunneling/proxy domains are present
|
|
143
|
-
*
|
|
144
|
-
* @param {string} envVarName - e.g., "SALESFORCE_API_KEY"
|
|
145
|
-
* @param {string} fileContent - source code of the file
|
|
146
|
-
* @returns {boolean} true if SDK pattern (should skip intent pair)
|
|
147
|
-
*/
|
|
148
|
-
function isSDKPattern(envVarName, fileContent) {
|
|
149
|
-
// Extract domains from full URLs (https://api.stripe.com/v1/charges)
|
|
150
|
-
const urls = fileContent.match(URL_EXTRACT_RE) || [];
|
|
151
|
-
const domains = urls.map(u => extractDomain(u)).filter(Boolean);
|
|
152
|
-
|
|
153
|
-
// Also extract hostnames from Node.js request options (hostname: 'api.stripe.com')
|
|
154
|
-
let hostnameMatch;
|
|
155
|
-
const hostnameRe = new RegExp(HOSTNAME_OPTION_RE.source, 'g');
|
|
156
|
-
while ((hostnameMatch = hostnameRe.exec(fileContent)) !== null) {
|
|
157
|
-
const hostname = hostnameMatch[1].toLowerCase();
|
|
158
|
-
if (hostname && !domains.includes(hostname)) {
|
|
159
|
-
domains.push(hostname);
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// No URLs found — can't confirm SDK pattern, default to suspicious
|
|
164
|
-
if (domains.length === 0) return false;
|
|
165
|
-
|
|
166
|
-
// Check for suspicious tunneling domains — immediate fail
|
|
167
|
-
for (const domain of domains) {
|
|
168
|
-
if (SUSPICIOUS_DOMAIN_PATTERNS.test(domain)) return false;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// Check for raw IP addresses — immediate fail
|
|
172
|
-
for (const domain of domains) {
|
|
173
|
-
if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(domain)) return false;
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// 1. Try curated allowlist first (strict: ALL domains must match)
|
|
177
|
-
// Curated allowlist is authoritative — no relaxation here to prevent
|
|
178
|
-
// attacker injecting a legitimate domain alongside their C2 domain.
|
|
179
|
-
for (const mapping of SDK_ENV_DOMAIN_MAP) {
|
|
180
|
-
if (mapping.envPattern.test(envVarName)) {
|
|
181
|
-
return domains.every(d => domainMatchesSuffix(d, mapping.domains));
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
// R2: credential-suffixed env vars get relaxed domain matching (at least ONE match).
|
|
186
|
-
// SDKs commonly call their own API + CDN/logging/analytics domains.
|
|
187
|
-
// Safety: suspicious domains and raw IPs are already rejected above.
|
|
188
|
-
// Only applies to the heuristic fallback — curated allowlist stays strict.
|
|
189
|
-
const CREDENTIAL_SUFFIXES = ['_API_KEY', '_SECRET', '_TOKEN', '_SECRET_KEY', '_ACCESS_KEY'];
|
|
190
|
-
const upperName = envVarName.toUpperCase();
|
|
191
|
-
const hasCredentialSuffix = CREDENTIAL_SUFFIXES.some(s => upperName.endsWith(s));
|
|
192
|
-
|
|
193
|
-
// 2. Heuristic fallback: extract brand keyword and check domain labels
|
|
194
|
-
const brand = extractBrandFromEnvVar(envVarName);
|
|
195
|
-
if (!brand || brand.length < 3) return false; // Too short for reliable matching
|
|
196
|
-
|
|
197
|
-
const brandLower = brand.toLowerCase();
|
|
198
|
-
// 2a. Strict check: every domain matches brand (existing behavior)
|
|
199
|
-
// e.g., brand "ACME" matches "api.acme.com" (label "acme") but not "api.acmetech.com"
|
|
200
|
-
if (domains.every(d => {
|
|
201
|
-
const labels = d.split('.');
|
|
202
|
-
return labels.some(label => label === brandLower);
|
|
203
|
-
})) return true;
|
|
204
|
-
|
|
205
|
-
// 2b. R2 relaxed: credential suffix + at least one domain matches brand
|
|
206
|
-
if (hasCredentialSuffix && domains.some(d => {
|
|
207
|
-
const labels = d.split('.');
|
|
208
|
-
return labels.some(label => label === brandLower);
|
|
209
|
-
})) return true;
|
|
210
|
-
|
|
211
|
-
return false;
|
|
212
|
-
}
|
|
39
|
+
// Destination-aware SDK detection — extracted to a shared leaf module
|
|
40
|
+
// (src/sdk-destination.js) so the same logic gates dataflow.js and the cross-file /
|
|
41
|
+
// detached taint detectors, not just intent coherence. Re-exported below for
|
|
42
|
+
// backward compatibility (dataflow.js imports isSDKPattern from this module).
|
|
43
|
+
const {
|
|
44
|
+
isSDKPattern,
|
|
45
|
+
networkDestinationsAllBenign,
|
|
46
|
+
extractEnvVarFromMessage,
|
|
47
|
+
extractBrandFromEnvVar,
|
|
48
|
+
SDK_ENV_DOMAIN_MAP,
|
|
49
|
+
} = require('./sdk-destination.js');
|
|
213
50
|
|
|
214
51
|
|
|
215
52
|
// ============================================
|
|
@@ -384,26 +221,31 @@ function buildIntentPairs(threats, targetPath) {
|
|
|
384
221
|
const pairKey = `${srcType}:${sinkType}:${file}`;
|
|
385
222
|
if (pairSet.has(pairKey)) continue;
|
|
386
223
|
|
|
387
|
-
// Destination-aware
|
|
388
|
-
//
|
|
224
|
+
// Destination-aware check: credential_read → network_external. Two
|
|
225
|
+
// complementary gates, EITHER ⇒ legitimate, skip the pair:
|
|
226
|
+
// (1) isSDKPattern — per-env-var: the env var brand matches its API domain
|
|
227
|
+
// (e.g. STRIPE_API_KEY → stripe.com).
|
|
228
|
+
// (2) networkDestinationsAllBenign — env-var-independent: EVERY network host
|
|
229
|
+
// in the file is a provider/local/reserved destination. Catches multi-
|
|
230
|
+
// provider CLIs (reads GEMINI_API_KEY *and* ANTHROPIC_API_KEY, calls both)
|
|
231
|
+
// and providers absent from the curated env→domain map. Same anti-evasion
|
|
232
|
+
// floor (any suspicious/unknown/public-IP host ⇒ keep firing).
|
|
389
233
|
if (srcType === 'credential_read' && sinkType === 'network_external' && targetPath) {
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
continue;
|
|
403
|
-
}
|
|
404
|
-
} catch {
|
|
405
|
-
// File read error — default to suspicious (CRITICAL)
|
|
234
|
+
try {
|
|
235
|
+
let content = fileContentCache.get(file);
|
|
236
|
+
if (content === undefined) {
|
|
237
|
+
const filePath = path.join(targetPath, file);
|
|
238
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
239
|
+
fileContentCache.set(file, content);
|
|
240
|
+
}
|
|
241
|
+
const envVarName = extractEnvVarFromMessage(sourceThreats);
|
|
242
|
+
if ((envVarName && isSDKPattern(envVarName, content)) || networkDestinationsAllBenign(content)) {
|
|
243
|
+
// First-party/SDK destination — skip this pair
|
|
244
|
+
pairSet.add(pairKey); // Mark as seen to avoid re-checking
|
|
245
|
+
continue;
|
|
406
246
|
}
|
|
247
|
+
} catch {
|
|
248
|
+
// File read error — default to suspicious (CRITICAL)
|
|
407
249
|
}
|
|
408
250
|
}
|
|
409
251
|
|
package/src/pipeline/executor.js
CHANGED
|
@@ -17,7 +17,7 @@ const { scanGitHubActions } = require('../scanner/github-actions.js');
|
|
|
17
17
|
const { scanEntropy } = require('../scanner/entropy.js');
|
|
18
18
|
const { scanAIConfig } = require('../scanner/ai-config.js');
|
|
19
19
|
const { deobfuscate } = require('../scanner/deobfuscate.js');
|
|
20
|
-
const { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, annotateSinkExports, detectCallbackCrossFileFlows, detectEventEmitterFlows } = require('../scanner/module-graph');
|
|
20
|
+
const { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, filterFirstPartyNetworkFlows, annotateSinkExports, detectCallbackCrossFileFlows, detectEventEmitterFlows } = require('../scanner/module-graph');
|
|
21
21
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
22
22
|
const { normalizePythonName } = require('../scanner/python.js');
|
|
23
23
|
const { scanPythonSource } = require('../scanner/python-source.js');
|
|
@@ -173,6 +173,10 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
173
173
|
// EventEmitter cross-module flow detection
|
|
174
174
|
const emitterFlows = await yieldThen(() => detectEventEmitterFlows(graph, tainted, sinkAnnotations, targetPath));
|
|
175
175
|
crossFileFlows = crossFileFlows.concat(emitterFlows);
|
|
176
|
+
// FP gate (segment A): drop cross_file_dataflow flows whose network sink targets
|
|
177
|
+
// only first-party/local/provider destinations — legit SDK calls, not exfil.
|
|
178
|
+
// Suspicious/unknown/public-IP destinations and exec sinks are kept (ecto stays).
|
|
179
|
+
crossFileFlows = filterFirstPartyNetworkFlows(crossFileFlows, targetPath);
|
|
176
180
|
};
|
|
177
181
|
let graphTimerId;
|
|
178
182
|
const timeout = new Promise((_, reject) => {
|
|
@@ -8,6 +8,7 @@ const { applyFPReductions, applyCompoundBoosts, calculateRiskScore, getSeverityW
|
|
|
8
8
|
const { loadPriorVersionSignatures, computeSignatures, saveCachedSignatures } = require('../scoring/delta-multiplier.js');
|
|
9
9
|
const { annotateConfidenceTiers } = require('../rules/confidence-tiers.js');
|
|
10
10
|
const { buildIntentPairs } = require('../intent-graph.js');
|
|
11
|
+
const { networkDestinationsAllBenign } = require('../sdk-destination.js');
|
|
11
12
|
const { debugLog } = require('../utils.js');
|
|
12
13
|
const { getPackageMetadata } = require('../scanner/npm-registry.js');
|
|
13
14
|
const { checkReleaseZero } = require('../scanner/release-zero.js');
|
|
@@ -351,13 +352,20 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
|
|
|
351
352
|
const hasCredFlow = fileThreats.some(t => t.type === 'suspicious_dataflow');
|
|
352
353
|
const alreadyCompound = fileThreats.some(t => t.type === 'detached_credential_exfil');
|
|
353
354
|
if (hasDetached && hasCredFlow && !alreadyCompound) {
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
355
|
+
// FP gate (segment A): skip when the file's network destinations are ALL
|
|
356
|
+
// first-party/local/provider (legit SDK/agent), not exfil. Unknown/suspicious/
|
|
357
|
+
// public-IP host — or unreadable file — keeps it firing (confirmed-benign only).
|
|
358
|
+
let destAllBenign = false;
|
|
359
|
+
try { destAllBenign = networkDestinationsAllBenign(fs.readFileSync(path.join(targetPath, file), 'utf8')); } catch { /* unreadable → not benign */ }
|
|
360
|
+
if (!destAllBenign) {
|
|
361
|
+
deduped.push({
|
|
362
|
+
type: 'detached_credential_exfil',
|
|
363
|
+
severity: 'CRITICAL',
|
|
364
|
+
message: 'Detached process + credential dataflow — background exfiltration (cross-scanner compound).',
|
|
365
|
+
file,
|
|
366
|
+
count: 1
|
|
367
|
+
});
|
|
368
|
+
}
|
|
361
369
|
}
|
|
362
370
|
}
|
|
363
371
|
}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
const { networkDestinationsAllBenign } = require('../../sdk-destination.js');
|
|
4
|
+
|
|
3
5
|
function handlePostWalk(ctx) {
|
|
4
6
|
// SANDWORM_MODE: zlib inflate + base64 decode + eval/Function/Module._compile = obfuscated payload
|
|
5
7
|
if (ctx.hasZlibInflate && ctx.hasBase64Decode && ctx.hasDynamicExec) {
|
|
@@ -322,7 +324,12 @@ function handlePostWalk(ctx) {
|
|
|
322
324
|
const hasSensitiveEnvInFile = ctx.threats.some(t =>
|
|
323
325
|
t.file === ctx.relFile && t.type === 'env_access' && t.severity === 'HIGH'
|
|
324
326
|
);
|
|
325
|
-
|
|
327
|
+
// FP gate (segment A): suppress this credential→network compound when EVERY network
|
|
328
|
+
// destination in the file is first-party/local/provider (e.g. an otel collector on
|
|
329
|
+
// localhost, an SDK POST to its own API). A suspicious/unknown/public-IP host — or no
|
|
330
|
+
// literal host at all — leaves it firing (conservative: confirmed-benign only).
|
|
331
|
+
const destAllBenign = ctx._content ? networkDestinationsAllBenign(ctx._content) : false;
|
|
332
|
+
if (hasDetachedInFile && hasSensitiveEnvInFile && ctx.hasNetworkCallInFile && !destAllBenign) {
|
|
326
333
|
ctx.threats.push({
|
|
327
334
|
type: 'detached_credential_exfil',
|
|
328
335
|
severity: 'CRITICAL',
|
|
@@ -334,7 +341,7 @@ function handlePostWalk(ctx) {
|
|
|
334
341
|
// Audit v3 bypass fix: uncaughtException + env access + network = silent exfiltration
|
|
335
342
|
// Pattern: process.on('uncaughtException', handler) that reads env vars and sends to network.
|
|
336
343
|
// Never legitimate — error handlers don't need to send credentials to external servers.
|
|
337
|
-
if (ctx.hasUncaughtExceptionHandler && hasSensitiveEnvInFile && ctx.hasNetworkCallInFile) {
|
|
344
|
+
if (ctx.hasUncaughtExceptionHandler && hasSensitiveEnvInFile && ctx.hasNetworkCallInFile && !destAllBenign) {
|
|
338
345
|
ctx.threats.push({
|
|
339
346
|
type: 'uncaught_exception_exfil',
|
|
340
347
|
severity: 'CRITICAL',
|
package/src/scanner/ast.js
CHANGED
|
@@ -17,6 +17,13 @@ const {
|
|
|
17
17
|
// Check if credential keywords appear INSIDE regex literals or new RegExp() patterns.
|
|
18
18
|
// Only true when the keyword is part of the regex pattern itself, not just a string elsewhere in the file.
|
|
19
19
|
const CREDENTIAL_REGEX_KEYWORDS = /bearer|password|secret|token|credential|api.?key/i;
|
|
20
|
+
// axios call shapes — a network call the legacy regexes miss (caught only by ioc_string_match).
|
|
21
|
+
// Covers BOTH the identifier form (axios(...) / axios.get|post|...(...)) and the inline-require
|
|
22
|
+
// form (require('axios').get(...) — the chalk-pro/jsonkeeper staged-loader shape). Call-shaped
|
|
23
|
+
// only, so it never matches a bare `require('axios')` import, `import axios`, `myaxios.get`, or
|
|
24
|
+
// `axios` in a comment/string. Bare instance-var calls (const c = axios.create(); c.get()) are a
|
|
25
|
+
// known follow-up gap; the create() verb catches the create site itself.
|
|
26
|
+
const AXIOS_NETWORK_CALL_RE = /(?:\baxios|require\s*\(\s*['"]axios['"]\s*\))\s*(?:\(|\.\s*(?:get|post|put|patch|delete|request|head|options|create)\s*\()/;
|
|
20
27
|
function hasCredentialInsideRegex(content) {
|
|
21
28
|
// Check regex literals: /...pattern.../flags
|
|
22
29
|
const regexLiteralRe = /\/(?!\*)(?:[^/\\]|\\.)+\/[gimsuy]*/g;
|
|
@@ -172,9 +179,9 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
172
179
|
// SANDWORM_MODE P2: env harvesting co-occurrence
|
|
173
180
|
hasEnvEnumeration: false, // Object.entries/keys/values(process.env)
|
|
174
181
|
hasEnvHarvestPattern: /\b(KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|NPM|AWS|SSH|WEBHOOK)\b/.test(content),
|
|
175
|
-
hasNetworkCallInFile: /\b(fetch|https?\.request|https?\.get|dns\.resolve)\b/.test(content),
|
|
176
|
-
// C5: Non-fetch network calls indicate independent network channel (NOT WASM loading)
|
|
177
|
-
hasNonFetchNetworkCall: /\bhttps?\.request\b|\bhttps?\.get\b|\bdns\.resolve\b/.test(content),
|
|
182
|
+
hasNetworkCallInFile: /\b(fetch|https?\.request|https?\.get|dns\.resolve)\b/.test(content) || AXIOS_NETWORK_CALL_RE.test(content),
|
|
183
|
+
// C5: Non-fetch network calls indicate independent network channel (NOT WASM loading). axios is non-fetch.
|
|
184
|
+
hasNonFetchNetworkCall: /\bhttps?\.request\b|\bhttps?\.get\b|\bdns\.resolve\b/.test(content) || AXIOS_NETWORK_CALL_RE.test(content),
|
|
178
185
|
// Credential regex harvesting: regex literals or new RegExp() whose PATTERN contains credential keywords
|
|
179
186
|
// Must check that the keyword is inside the regex, not just anywhere in the file
|
|
180
187
|
hasCredentialRegex: hasCredentialInsideRegex(content),
|
|
@@ -197,7 +204,7 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
197
204
|
gitHooksPathVars: new Map(),
|
|
198
205
|
ideConfigPathVars: new Map(),
|
|
199
206
|
// Wave 4: compound detection — fetch + decrypt + eval chain
|
|
200
|
-
hasRemoteFetch: /\bhttps?\.(get|request)\b/.test(content) || /\bfetch\s*\(/.test(content),
|
|
207
|
+
hasRemoteFetch: /\bhttps?\.(get|request)\b/.test(content) || /\bfetch\s*\(/.test(content) || AXIOS_NETWORK_CALL_RE.test(content),
|
|
201
208
|
// Safe domain exclusion: if ALL URLs in file are from known registries, suppress download_exec_binary
|
|
202
209
|
fetchOnlySafeDomains: false, // computed below after URL extraction
|
|
203
210
|
hasCryptoDecipher: /\bcreateDecipher(iv)?\s*\(/.test(content),
|
package/src/scanner/dataflow.js
CHANGED
|
@@ -1149,58 +1149,10 @@ function isCredentialPath(arg, sensitivePathVars) {
|
|
|
1149
1149
|
return false;
|
|
1150
1150
|
}
|
|
1151
1151
|
|
|
1152
|
-
//
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
// Env var prefixes for tool-internal configuration (not external credentials)
|
|
1159
|
-
const SAFE_ENV_PREFIXES = ['MUADDIB_', 'npm_config_', 'npm_lifecycle_', 'npm_package_'];
|
|
1160
|
-
|
|
1161
|
-
// P6: Node.js runtime config env vars that are not credentials.
|
|
1162
|
-
// NODE_TLS_REJECT_UNAUTHORIZED matches "AUTH" in "UNAUTHORIZED" → false positive.
|
|
1163
|
-
// Real credential exfiltration targets API_KEY, TOKEN, SECRET, PASSWORD.
|
|
1164
|
-
const DATAFLOW_SAFE_ENV_VARS = new Set([
|
|
1165
|
-
'NODE_TLS_REJECT_UNAUTHORIZED', 'NODE_OPTIONS', 'NODE_EXTRA_CA_CERTS',
|
|
1166
|
-
'NODE_ENV', 'NODE_PATH', 'NODE_DEBUG',
|
|
1167
|
-
'DEBUG', 'CI', 'HTTPS_PROXY', 'HTTP_PROXY', 'NO_PROXY',
|
|
1168
|
-
'LANG', 'TZ', 'PORT', 'HOST'
|
|
1169
|
-
// Note: HOME, USER, HOSTNAME stay sensitive — fingerprint exfiltration detection.
|
|
1170
|
-
]);
|
|
1171
|
-
|
|
1172
|
-
function isSensitiveEnv(name) {
|
|
1173
|
-
const upper = name.toUpperCase();
|
|
1174
|
-
if (DATAFLOW_SAFE_ENV_VARS.has(upper)) return false;
|
|
1175
|
-
if (SYSTEM_IDENTITY_ENVS.has(upper)) return true;
|
|
1176
|
-
if (SAFE_ENV_PREFIXES.some(p => upper.startsWith(p))) return false;
|
|
1177
|
-
const sensitive = ['TOKEN', 'SECRET', 'KEY', 'PASSWORD', 'CREDENTIAL', 'AUTH', 'NPM', 'AWS', 'AZURE', 'GCP'];
|
|
1178
|
-
return sensitive.some(s => upper.includes(s));
|
|
1179
|
-
}
|
|
1180
|
-
|
|
1181
|
-
// Audit 2026-05 DF-C4: credential-tier env vars distinguished from generic env_read.
|
|
1182
|
-
// These represent authentication material (NPM_TOKEN, GITHUB_TOKEN, AWS_SECRET_ACCESS_KEY,
|
|
1183
|
-
// STRIPE_API_KEY etc.) — strictly narrower than isSensitiveEnv. Sources of this type
|
|
1184
|
-
// participate in hasHighRiskSource so credential exfil patterns are NOT downgraded by the
|
|
1185
|
-
// HIGH→MEDIUM graduation. System identity vars (HOME, USER) remain plain env_read since
|
|
1186
|
-
// they are fingerprinting signals, not credentials.
|
|
1187
|
-
const KNOWN_CREDENTIAL_ENV_VARS = new Set([
|
|
1188
|
-
'NPM_TOKEN', 'GITHUB_TOKEN', 'GH_TOKEN', 'NODE_AUTH_TOKEN',
|
|
1189
|
-
'CIRCLE_TOKEN', 'GITLAB_TOKEN', 'CARGO_REGISTRY_TOKEN', 'PYPI_TOKEN',
|
|
1190
|
-
'GOOGLE_APPLICATION_CREDENTIALS', 'AZURE_CLIENT_SECRET',
|
|
1191
|
-
'SENTRY_AUTH_TOKEN', 'NPM_AUTH_TOKEN', 'NPM_CONFIG_AUTHTOKEN'
|
|
1192
|
-
]);
|
|
1193
|
-
|
|
1194
|
-
const CREDENTIAL_ENV_SUFFIX_RE = /(?:^|_)(?:TOKEN|SECRET|PASSWORD|PASSPHRASE|CREDENTIAL|CREDENTIALS|API_KEY|ACCESS_KEY|ACCESS_KEY_ID|SECRET_KEY|PRIVATE_KEY|SIGNING_KEY|SESSION_TOKEN|REFRESH_TOKEN|AUTH_TOKEN)$/;
|
|
1195
|
-
|
|
1196
|
-
function isCredentialEnv(name) {
|
|
1197
|
-
const upper = name.toUpperCase();
|
|
1198
|
-
// System identity vars are fingerprinting, not credentials
|
|
1199
|
-
if (SYSTEM_IDENTITY_ENVS.has(upper)) return false;
|
|
1200
|
-
// Public keys are not credentials (e.g., SSH_PUBLIC_KEY, GPG_PUBLIC_KEY)
|
|
1201
|
-
if (upper.includes('PUBLIC_KEY') || upper.includes('PUBKEY')) return false;
|
|
1202
|
-
if (KNOWN_CREDENTIAL_ENV_VARS.has(upper)) return true;
|
|
1203
|
-
return CREDENTIAL_ENV_SUFFIX_RE.test(upper);
|
|
1204
|
-
}
|
|
1152
|
+
// Env-var credential classification (isSensitiveEnv / isCredentialEnv + their sets) was
|
|
1153
|
+
// extracted to a shared leaf module so the module-graph cross-file taint can apply the EXACT
|
|
1154
|
+
// same credential-vs-config distinction (it previously tainted any process.env read). Imported
|
|
1155
|
+
// at module load → in scope for the analyzeDataFlow call sites above. Behavior unchanged here.
|
|
1156
|
+
const { isSensitiveEnv, isCredentialEnv } = require('./env-var-classification.js');
|
|
1205
1157
|
|
|
1206
1158
|
module.exports = { analyzeDataFlow };
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Environment-variable credential classification (shared leaf module).
|
|
4
|
+
// Extracted verbatim from dataflow.js so the SAME credential-vs-config distinction can
|
|
5
|
+
// gate every taint source — dataflow (scanner/dataflow.js) AND the module-graph cross-file
|
|
6
|
+
// taint (scanner/module-graph/annotate-tainted.js), which previously tainted ANY process.env
|
|
7
|
+
// read indiscriminately (config vars like STORYBOARD_SERVER_URL → false credential_exfil).
|
|
8
|
+
// No project dependencies → safe to require from any scanner, no cycles.
|
|
9
|
+
|
|
10
|
+
// System identity env vars used for fingerprinting/exfiltration
|
|
11
|
+
const SYSTEM_IDENTITY_ENVS = new Set([
|
|
12
|
+
'USER', 'USERNAME', 'LOGNAME', 'HOME', 'HOSTNAME',
|
|
13
|
+
'USERPROFILE', 'COMPUTERNAME', 'WHOAMI'
|
|
14
|
+
]);
|
|
15
|
+
|
|
16
|
+
// Env var prefixes for tool-internal configuration (not external credentials)
|
|
17
|
+
const SAFE_ENV_PREFIXES = ['MUADDIB_', 'npm_config_', 'npm_lifecycle_', 'npm_package_'];
|
|
18
|
+
|
|
19
|
+
// P6: Node.js runtime config env vars that are not credentials.
|
|
20
|
+
// NODE_TLS_REJECT_UNAUTHORIZED matches "AUTH" in "UNAUTHORIZED" → false positive.
|
|
21
|
+
// Real credential exfiltration targets API_KEY, TOKEN, SECRET, PASSWORD.
|
|
22
|
+
const DATAFLOW_SAFE_ENV_VARS = new Set([
|
|
23
|
+
'NODE_TLS_REJECT_UNAUTHORIZED', 'NODE_OPTIONS', 'NODE_EXTRA_CA_CERTS',
|
|
24
|
+
'NODE_ENV', 'NODE_PATH', 'NODE_DEBUG',
|
|
25
|
+
'DEBUG', 'CI', 'HTTPS_PROXY', 'HTTP_PROXY', 'NO_PROXY',
|
|
26
|
+
'LANG', 'TZ', 'PORT', 'HOST'
|
|
27
|
+
// Note: HOME, USER, HOSTNAME stay sensitive — fingerprint exfiltration detection.
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
// True when an env var name is a sensitive source (credential material OR system-identity
|
|
31
|
+
// fingerprinting). Config vars (URL/HOST/PORT/NODE_ENV/proxy/...) return false. This is the
|
|
32
|
+
// classification module-graph cross-file taint now shares (it formerly tainted every read).
|
|
33
|
+
function isSensitiveEnv(name) {
|
|
34
|
+
const upper = name.toUpperCase();
|
|
35
|
+
if (DATAFLOW_SAFE_ENV_VARS.has(upper)) return false;
|
|
36
|
+
if (SYSTEM_IDENTITY_ENVS.has(upper)) return true;
|
|
37
|
+
if (SAFE_ENV_PREFIXES.some(p => upper.startsWith(p))) return false;
|
|
38
|
+
const sensitive = ['TOKEN', 'SECRET', 'KEY', 'PASSWORD', 'CREDENTIAL', 'AUTH', 'NPM', 'AWS', 'AZURE', 'GCP'];
|
|
39
|
+
return sensitive.some(s => upper.includes(s));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Audit 2026-05 DF-C4: credential-tier env vars distinguished from generic env_read.
|
|
43
|
+
// These represent authentication material (NPM_TOKEN, GITHUB_TOKEN, AWS_SECRET_ACCESS_KEY,
|
|
44
|
+
// STRIPE_API_KEY etc.) — strictly narrower than isSensitiveEnv. Sources of this type
|
|
45
|
+
// participate in hasHighRiskSource so credential exfil patterns are NOT downgraded by the
|
|
46
|
+
// HIGH→MEDIUM graduation. System identity vars (HOME, USER) remain plain env_read since
|
|
47
|
+
// they are fingerprinting signals, not credentials.
|
|
48
|
+
const KNOWN_CREDENTIAL_ENV_VARS = new Set([
|
|
49
|
+
'NPM_TOKEN', 'GITHUB_TOKEN', 'GH_TOKEN', 'NODE_AUTH_TOKEN',
|
|
50
|
+
'CIRCLE_TOKEN', 'GITLAB_TOKEN', 'CARGO_REGISTRY_TOKEN', 'PYPI_TOKEN',
|
|
51
|
+
'GOOGLE_APPLICATION_CREDENTIALS', 'AZURE_CLIENT_SECRET',
|
|
52
|
+
'SENTRY_AUTH_TOKEN', 'NPM_AUTH_TOKEN', 'NPM_CONFIG_AUTHTOKEN'
|
|
53
|
+
]);
|
|
54
|
+
|
|
55
|
+
const CREDENTIAL_ENV_SUFFIX_RE = /(?:^|_)(?:TOKEN|SECRET|PASSWORD|PASSPHRASE|CREDENTIAL|CREDENTIALS|API_KEY|ACCESS_KEY|ACCESS_KEY_ID|SECRET_KEY|PRIVATE_KEY|SIGNING_KEY|SESSION_TOKEN|REFRESH_TOKEN|AUTH_TOKEN)$/;
|
|
56
|
+
|
|
57
|
+
function isCredentialEnv(name) {
|
|
58
|
+
const upper = name.toUpperCase();
|
|
59
|
+
// System identity vars are fingerprinting, not credentials
|
|
60
|
+
if (SYSTEM_IDENTITY_ENVS.has(upper)) return false;
|
|
61
|
+
// Public keys are not credentials (e.g., SSH_PUBLIC_KEY, GPG_PUBLIC_KEY)
|
|
62
|
+
if (upper.includes('PUBLIC_KEY') || upper.includes('PUBKEY')) return false;
|
|
63
|
+
if (KNOWN_CREDENTIAL_ENV_VARS.has(upper)) return true;
|
|
64
|
+
return CREDENTIAL_ENV_SUFFIX_RE.test(upper);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
module.exports = {
|
|
68
|
+
SYSTEM_IDENTITY_ENVS,
|
|
69
|
+
SAFE_ENV_PREFIXES,
|
|
70
|
+
DATAFLOW_SAFE_ENV_VARS,
|
|
71
|
+
KNOWN_CREDENTIAL_ENV_VARS,
|
|
72
|
+
CREDENTIAL_ENV_SUFFIX_RE,
|
|
73
|
+
isSensitiveEnv,
|
|
74
|
+
isCredentialEnv,
|
|
75
|
+
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const path = require('path');
|
|
4
|
-
const { SINK_CALLEE_NAMES, SINK_MEMBER_METHODS, SINK_INSTANCE_METHODS } = require('./constants.js');
|
|
4
|
+
const { SINK_CALLEE_NAMES, SINK_MEMBER_METHODS, SINK_INSTANCE_METHODS, NON_NETWORK_SINK_RECEIVER_ROOTS } = require('./constants.js');
|
|
5
5
|
const {
|
|
6
6
|
parseFile, walkAST, isRequireCall, isModuleExportsAssign,
|
|
7
|
-
getExportName, getFunctionBody, getMemberChain
|
|
7
|
+
getExportName, getFunctionBody, getMemberChain, getReceiverRootName
|
|
8
8
|
} = require('./parse-utils.js');
|
|
9
9
|
|
|
10
10
|
/**
|
|
@@ -87,11 +87,14 @@ function analyzeSinkExports(filePath) {
|
|
|
87
87
|
return;
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
|
-
// .write(), .send(), .connect()
|
|
90
|
+
// .write(), .send(), .connect() — but not process.*/console.* (local I/O, not network)
|
|
91
91
|
const method = node.callee.property.name || node.callee.property.value;
|
|
92
92
|
if (SINK_INSTANCE_METHODS.has(method)) {
|
|
93
|
-
|
|
94
|
-
|
|
93
|
+
const root = getReceiverRootName(node.callee);
|
|
94
|
+
if (!(root && NON_NETWORK_SINK_RECEIVER_ROOTS.has(root))) {
|
|
95
|
+
found = method + '()';
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
95
98
|
}
|
|
96
99
|
}
|
|
97
100
|
}
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
const path = require('path');
|
|
4
4
|
const { SENSITIVE_MODULES } = require('./constants.js');
|
|
5
|
+
const { isSensitiveEnv } = require('../env-var-classification.js');
|
|
5
6
|
const {
|
|
6
7
|
parseFile, walkAST, isRequireCall, isModuleExportsAssign,
|
|
7
8
|
getExportName, getFunctionBody, getMemberChain, extractLiteralArg
|
|
@@ -272,6 +273,11 @@ function checkNodeTaint(node, moduleVars) {
|
|
|
272
273
|
const chain = getMemberChain(node);
|
|
273
274
|
if (chain.startsWith('process.env')) {
|
|
274
275
|
const detail = chain.length > 'process.env'.length ? chain.slice('process.env.'.length) : '';
|
|
276
|
+
// Source-precision (segment A): a SPECIFIC non-credential config var (URL/HOST/PORT/
|
|
277
|
+
// NODE_ENV/...) is not a credential taint source — consistent with dataflow.js's
|
|
278
|
+
// isSensitiveEnv (DF-C4). Whole-object `process.env` (detail '') stays tainted: an env
|
|
279
|
+
// dump exfiltrates every secret. System-identity vars (HOME/USER) stay tainted too.
|
|
280
|
+
if (detail && !isSensitiveEnv(detail)) return null;
|
|
275
281
|
return { source: 'process.env', detail };
|
|
276
282
|
}
|
|
277
283
|
}
|
|
@@ -332,9 +338,21 @@ function scanBodyForTaint(body, moduleVars, taintedVars) {
|
|
|
332
338
|
// Collect local tainted vars within this function scope too
|
|
333
339
|
const localTainted = Object.assign(Object.create(null), taintedVars);
|
|
334
340
|
|
|
341
|
+
// A `process.env.X` read is matched as a unit by checkNodeTaint (gated on isSensitiveEnv).
|
|
342
|
+
// Record the inner bare `process.env` node of each so the pre-order walker does not RE-match
|
|
343
|
+
// it standalone (detail '' = whole-env dump), which would defeat the per-var gate for config
|
|
344
|
+
// vars. A genuinely standalone `process.env` (a different node) still taints as a dump.
|
|
345
|
+
const innerEnvNodes = new WeakSet();
|
|
346
|
+
|
|
335
347
|
let found = null;
|
|
336
348
|
walkAST({ type: 'Program', body }, (node) => {
|
|
337
349
|
if (found) return;
|
|
350
|
+
if (innerEnvNodes.has(node)) return;
|
|
351
|
+
|
|
352
|
+
if (node.type === 'MemberExpression' && node.object &&
|
|
353
|
+
node.object.type === 'MemberExpression' && getMemberChain(node.object) === 'process.env') {
|
|
354
|
+
innerEnvNodes.add(node.object);
|
|
355
|
+
}
|
|
338
356
|
|
|
339
357
|
// Variable assignment inside function
|
|
340
358
|
if (node.type === 'VariableDeclaration') {
|
|
@@ -18,16 +18,29 @@ const ACORN_OPTIONS = {
|
|
|
18
18
|
};
|
|
19
19
|
|
|
20
20
|
// --- Sink patterns for cross-file detection ---
|
|
21
|
-
const SINK_CALLEE_NAMES = new Set(['fetch', 'eval', 'Function', 'WebSocket', 'XMLHttpRequest']);
|
|
21
|
+
const SINK_CALLEE_NAMES = new Set(['fetch', 'eval', 'Function', 'WebSocket', 'XMLHttpRequest', 'axios']);
|
|
22
22
|
const SINK_MEMBER_METHODS = new Set([
|
|
23
23
|
'https.request', 'https.get', 'http.request', 'http.get',
|
|
24
24
|
'child_process.exec', 'child_process.execSync', 'child_process.spawn',
|
|
25
25
|
'dns.resolveTxt', 'dns.resolve', 'dns.resolve4', 'dns.resolve6',
|
|
26
|
+
// axios as a cross-file network sink (axios.get(taintedData) etc.). Instance form
|
|
27
|
+
// (const c = axios.create(); c.get(...)) is NOT added to SINK_INSTANCE_METHODS — that
|
|
28
|
+
// would match every .get/.post receiver and explode FPs; it's a known follow-up gap.
|
|
29
|
+
'axios.get', 'axios.post', 'axios.put', 'axios.patch', 'axios.delete', 'axios.request',
|
|
26
30
|
]);
|
|
27
31
|
const SINK_INSTANCE_METHODS = new Set(['connect', 'write', 'send']);
|
|
28
32
|
|
|
33
|
+
// Receiver roots that make connect/write/send LOCAL I/O or IPC, never external-network
|
|
34
|
+
// exfil: `process.stdout/stderr.write`, `process.send` (child IPC to the parent), and any
|
|
35
|
+
// `console.*`. SINK_INSTANCE_METHODS matches by method name alone, so without this a
|
|
36
|
+
// console/stderr write of a tainted value reads as a cross-file network sink (segment-A FP
|
|
37
|
+
// driver: contextdevkit, amicus). Real socket/ws/req sinks (receivers `socket`/`ws`/`req`/
|
|
38
|
+
// `net.connect()`…) are unaffected. Globals are trusted here as they are everywhere else.
|
|
39
|
+
const NON_NETWORK_SINK_RECEIVER_ROOTS = new Set(['process', 'console']);
|
|
40
|
+
|
|
29
41
|
|
|
30
42
|
module.exports = {
|
|
31
43
|
MAX_GRAPH_NODES, MAX_GRAPH_EDGES, MAX_FLOWS, MAX_TAINT_DEPTH,
|
|
32
|
-
SENSITIVE_MODULES, ACORN_OPTIONS, SINK_CALLEE_NAMES, SINK_MEMBER_METHODS, SINK_INSTANCE_METHODS
|
|
44
|
+
SENSITIVE_MODULES, ACORN_OPTIONS, SINK_CALLEE_NAMES, SINK_MEMBER_METHODS, SINK_INSTANCE_METHODS,
|
|
45
|
+
NON_NETWORK_SINK_RECEIVER_ROOTS
|
|
33
46
|
};
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const path = require('path');
|
|
4
|
+
const fs = require('fs');
|
|
4
5
|
const { debugLog } = require('../../utils');
|
|
5
|
-
const {
|
|
6
|
+
const { networkDestinationsAllBenign } = require('../../sdk-destination.js');
|
|
7
|
+
const { MAX_FLOWS, SINK_CALLEE_NAMES, SINK_MEMBER_METHODS, SINK_INSTANCE_METHODS, NON_NETWORK_SINK_RECEIVER_ROOTS } = require('./constants.js');
|
|
6
8
|
const {
|
|
7
9
|
parseFile, walkAST, isRequireCall, isLocalImport, isModuleExportsAssign,
|
|
8
|
-
getExportName, getMemberChain, resolveLocal
|
|
10
|
+
getExportName, getMemberChain, getReceiverRootName, resolveLocal
|
|
9
11
|
} = require('./parse-utils.js');
|
|
10
12
|
|
|
11
13
|
/**
|
|
@@ -596,7 +598,12 @@ function getSinkName(callNode) {
|
|
|
596
598
|
// instance.connect(), socket.write(), ws.send()
|
|
597
599
|
const method = callee.property.name || callee.property.value;
|
|
598
600
|
if (SINK_INSTANCE_METHODS.has(method)) {
|
|
599
|
-
|
|
601
|
+
// Reject process.*/console.* receivers: process.stdout/stderr.write,
|
|
602
|
+
// process.send (IPC), console.* are local I/O, never external-network exfil.
|
|
603
|
+
const root = getReceiverRootName(callee);
|
|
604
|
+
if (!(root && NON_NETWORK_SINK_RECEIVER_ROOTS.has(root))) {
|
|
605
|
+
return `${method}()`;
|
|
606
|
+
}
|
|
600
607
|
}
|
|
601
608
|
}
|
|
602
609
|
|
|
@@ -954,4 +961,49 @@ function findPipeChainCrossFileFlows(ast, relFile, graph, taintedExports, sinkEx
|
|
|
954
961
|
}
|
|
955
962
|
|
|
956
963
|
|
|
957
|
-
|
|
964
|
+
// A network sink carries a destination host we can judge; exec/command sinks
|
|
965
|
+
// (eval, Function, child_process.*) do not, and are never destination-gated.
|
|
966
|
+
function isNetworkSinkDescriptor(sink) {
|
|
967
|
+
const s = String(sink || '');
|
|
968
|
+
if (/^(eval|Function)\(\)$/.test(s)) return false; // exec sink
|
|
969
|
+
if (/^child_process\./.test(s)) return false; // command sink
|
|
970
|
+
return true; // fetch / http(s).request|get / WebSocket / XMLHttpRequest / connect|write|send
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
/**
|
|
974
|
+
* FP gate (segment A — destination-aware). Drop a cross_file_dataflow whose NETWORK
|
|
975
|
+
* sink targets ONLY benign destinations (loopback/private/reserved IP or a curated
|
|
976
|
+
* provider API) — a legitimate SDK that reads a key and POSTs to its provider is not
|
|
977
|
+
* exfiltration. Untouched (kept CRITICAL): exec/command sinks, and any flow whose sink
|
|
978
|
+
* file references a suspicious/paste host, a public IP, or any unknown domain (so a real
|
|
979
|
+
* exfil like ecto — webhook.site + direct-IP — keeps firing). The package stays visible
|
|
980
|
+
* via its other (lower-severity) signals, the same way intent-graph skips SDK pairs.
|
|
981
|
+
* Rationale + corpus: FPR-segment-A-diagnosis-2026-06-14.md.
|
|
982
|
+
*
|
|
983
|
+
* @param {Array} flows - assembled cross-file flows (main + callback + emitter)
|
|
984
|
+
* @param {string} packagePath - package root, to resolve sink file content
|
|
985
|
+
* @returns {Array} flows with first-party network FPs removed
|
|
986
|
+
*/
|
|
987
|
+
function filterFirstPartyNetworkFlows(flows, packagePath) {
|
|
988
|
+
if (!Array.isArray(flows) || flows.length === 0) return flows;
|
|
989
|
+
const contentCache = new Map();
|
|
990
|
+
const kept = [];
|
|
991
|
+
for (const flow of flows) {
|
|
992
|
+
if (flow && flow.type === 'cross_file_dataflow' && flow.sinkFile && isNetworkSinkDescriptor(flow.sink)) {
|
|
993
|
+
let content = contentCache.get(flow.sinkFile);
|
|
994
|
+
if (content === undefined) {
|
|
995
|
+
try { content = fs.readFileSync(path.resolve(packagePath, flow.sinkFile), 'utf8'); }
|
|
996
|
+
catch { content = ''; }
|
|
997
|
+
contentCache.set(flow.sinkFile, content);
|
|
998
|
+
}
|
|
999
|
+
if (content && networkDestinationsAllBenign(content)) {
|
|
1000
|
+
debugLog(`[MODULE-GRAPH] cross_file_dataflow suppressed (first-party/local dest): ${flow.sourceFile} -> ${flow.sink} in ${flow.sinkFile}`);
|
|
1001
|
+
continue; // first-party/local network destination → FP, drop
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
kept.push(flow);
|
|
1005
|
+
}
|
|
1006
|
+
return kept;
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
module.exports = { detectCrossFileFlows, expandTaintThroughReexports, collectImportTaint, propagateLocalTaint, getSinkName, findTaintedArgument, isNetworkSinkDescriptor, filterFirstPartyNetworkFlows };
|
|
@@ -4,13 +4,13 @@ const { MAX_GRAPH_NODES, MAX_GRAPH_EDGES, MAX_FLOWS, MAX_TAINT_DEPTH } = require
|
|
|
4
4
|
const { parseFile, resolveLocal, isLocalImport, toRel, isFileExists } = require('./parse-utils.js');
|
|
5
5
|
const { buildModuleGraph, extractLocalImports, tryResolveConcatRequire } = require('./build-graph.js');
|
|
6
6
|
const { annotateTaintedExports } = require('./annotate-tainted.js');
|
|
7
|
-
const { detectCrossFileFlows } = require('./detect-cross-file.js');
|
|
7
|
+
const { detectCrossFileFlows, filterFirstPartyNetworkFlows } = require('./detect-cross-file.js');
|
|
8
8
|
const { annotateSinkExports } = require('./annotate-sinks.js');
|
|
9
9
|
const { detectCallbackCrossFileFlows } = require('./detect-callback-flows.js');
|
|
10
10
|
const { detectEventEmitterFlows } = require('./detect-event-flows.js');
|
|
11
11
|
|
|
12
12
|
module.exports = {
|
|
13
|
-
buildModuleGraph, annotateTaintedExports, detectCrossFileFlows,
|
|
13
|
+
buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, filterFirstPartyNetworkFlows,
|
|
14
14
|
annotateSinkExports, detectCallbackCrossFileFlows, detectEventEmitterFlows,
|
|
15
15
|
resolveLocal, extractLocalImports, parseFile, isLocalImport, toRel, isFileExists,
|
|
16
16
|
tryResolveConcatRequire,
|
|
@@ -107,6 +107,18 @@ function getMemberChain(node, depth) {
|
|
|
107
107
|
return '';
|
|
108
108
|
}
|
|
109
109
|
|
|
110
|
+
// Root identifier of a call's receiver, e.g. `process` for (process.stdout).write(),
|
|
111
|
+
// `process` for process.send(), `console` for console.error(), `sender` for sender.send().
|
|
112
|
+
// Returns null when the receiver root is not a plain Identifier (e.g. this.x.write(),
|
|
113
|
+
// foo().bar()). Used to reject local-IO/IPC receivers (process/console) from the
|
|
114
|
+
// write/send/connect instance-method sink set, which matches by method name alone.
|
|
115
|
+
function getReceiverRootName(callee) {
|
|
116
|
+
if (!callee || callee.type !== 'MemberExpression') return null;
|
|
117
|
+
let obj = callee.object;
|
|
118
|
+
while (obj && obj.type === 'MemberExpression') obj = obj.object;
|
|
119
|
+
return obj && obj.type === 'Identifier' ? obj.name : null;
|
|
120
|
+
}
|
|
121
|
+
|
|
110
122
|
function extractLiteralArg(args) {
|
|
111
123
|
if (!args || args.length === 0) return '';
|
|
112
124
|
const first = args[0];
|
|
@@ -136,6 +148,6 @@ function toRel(abs, packagePath) {
|
|
|
136
148
|
|
|
137
149
|
module.exports = {
|
|
138
150
|
parseFile, walkAST, isRequireCall, isLocalImport, isModuleExportsAssign,
|
|
139
|
-
getExportName, getFunctionBody, getMemberChain, extractLiteralArg,
|
|
151
|
+
getExportName, getFunctionBody, getMemberChain, getReceiverRootName, extractLiteralArg,
|
|
140
152
|
resolveLocal, isFileExists, toRel
|
|
141
153
|
};
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// ============================================
|
|
4
|
+
// DESTINATION-AWARE SDK DETECTION (shared leaf module)
|
|
5
|
+
// ============================================
|
|
6
|
+
// Extracted from intent-graph.js (v2.11.x) so the same destination logic can gate
|
|
7
|
+
// every credential→network taint detector — intent coherence (intent-graph.js),
|
|
8
|
+
// dataflow (scanner/dataflow.js), cross-file flow (scanner/module-graph) and the
|
|
9
|
+
// detached/uncaught compounds (scanner/ast-detectors). No project dependencies
|
|
10
|
+
// (only the Node stdlib via callers) → safe to require from any scanner, no cycles.
|
|
11
|
+
//
|
|
12
|
+
// Curated allowlist: when an env var matching the pattern is sent to a matching domain,
|
|
13
|
+
// it is legitimate SDK usage, not credential exfiltration.
|
|
14
|
+
// Safe-by-default: unknown env vars or unknown domains remain CRITICAL.
|
|
15
|
+
const SDK_ENV_DOMAIN_MAP = [
|
|
16
|
+
{ envPattern: /^AWS_/i, domains: ['amazonaws.com', 'aws.amazon.com'] },
|
|
17
|
+
{ envPattern: /^AZURE_/i, domains: ['azure.com', 'microsoft.com'] },
|
|
18
|
+
{ envPattern: /^GOOGLE_|^GCP_/i, domains: ['googleapis.com', 'google.com'] },
|
|
19
|
+
{ envPattern: /^FIREBASE_/i, domains: ['firebase.com', 'googleapis.com'] },
|
|
20
|
+
{ envPattern: /^SALESFORCE_/i, domains: ['salesforce.com', 'force.com'] },
|
|
21
|
+
{ envPattern: /^SUPABASE_/i, domains: ['supabase.co', 'supabase.com'] },
|
|
22
|
+
{ envPattern: /^MAILGUN_/i, domains: ['mailgun.net', 'mailgun.com'] },
|
|
23
|
+
{ envPattern: /^STRIPE_/i, domains: ['stripe.com'] },
|
|
24
|
+
{ envPattern: /^TWILIO_/i, domains: ['twilio.com'] },
|
|
25
|
+
{ envPattern: /^SENDGRID_/i, domains: ['sendgrid.com', 'sendgrid.net'] },
|
|
26
|
+
{ envPattern: /^DATADOG_/i, domains: ['datadoghq.com'] },
|
|
27
|
+
{ envPattern: /^SENTRY_/i, domains: ['sentry.io'] },
|
|
28
|
+
{ envPattern: /^SLACK_/i, domains: ['slack.com'] },
|
|
29
|
+
{ envPattern: /^GITHUB_/i, domains: ['github.com', 'githubusercontent.com'] },
|
|
30
|
+
{ envPattern: /^GITLAB_/i, domains: ['gitlab.com'] },
|
|
31
|
+
{ envPattern: /^CLOUDFLARE_/i, domains: ['cloudflare.com'] },
|
|
32
|
+
{ envPattern: /^OPENAI_/i, domains: ['openai.com'] },
|
|
33
|
+
{ envPattern: /^ANTHROPIC_/i, domains: ['anthropic.com'] },
|
|
34
|
+
{ envPattern: /^MONGODB_|^MONGO_/i, domains: ['mongodb.com', 'mongodb.net'] },
|
|
35
|
+
{ envPattern: /^AUTH0_/i, domains: ['auth0.com'] },
|
|
36
|
+
{ envPattern: /^HUBSPOT_/i, domains: ['hubspot.com', 'hubapi.com'] },
|
|
37
|
+
{ envPattern: /^CONTENTFUL_/i, domains: ['contentful.com'] },
|
|
38
|
+
];
|
|
39
|
+
|
|
40
|
+
// Tokens stripped when extracting brand keyword from env var name
|
|
41
|
+
const ENV_NOISE_TOKENS = new Set([
|
|
42
|
+
'API', 'KEY', 'SECRET', 'TOKEN', 'PASSWORD', 'CREDENTIAL',
|
|
43
|
+
'AUTH', 'ACCESS', 'PRIVATE', 'PUBLIC', 'CLIENT', 'ID', 'URL'
|
|
44
|
+
]);
|
|
45
|
+
|
|
46
|
+
// Suspicious tunneling/proxy domains — never considered legitimate SDK destinations
|
|
47
|
+
const SUSPICIOUS_DOMAIN_PATTERNS = /ngrok|serveo|localtunnel|burpcollaborator|requestbin|pipedream|webhook\.site/i;
|
|
48
|
+
|
|
49
|
+
// URL extraction regex (matches http/https URLs in source code)
|
|
50
|
+
const URL_EXTRACT_RE = /https?:\/\/[a-zA-Z0-9\-._~:/?#[\]@!$&'()*+,;=%]+/g;
|
|
51
|
+
|
|
52
|
+
// Hostname extraction from Node.js request options: hostname: 'domain.com' or host: 'domain.com'
|
|
53
|
+
const HOSTNAME_OPTION_RE = /(?:hostname|host)\s*:\s*['"`]([a-zA-Z0-9\-._]+)['"`]/g;
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Extract env var name from an intent source threat message.
|
|
57
|
+
* Messages look like: "process.env.SALESFORCE_API_KEY", "env var MAILGUN_API_KEY accessed"
|
|
58
|
+
*/
|
|
59
|
+
function extractEnvVarFromMessage(sourceThreats) {
|
|
60
|
+
for (const t of sourceThreats) {
|
|
61
|
+
if (!t.message) continue;
|
|
62
|
+
// Match process.env.VAR_NAME pattern
|
|
63
|
+
const envMatch = t.message.match(/process\.env\.([A-Z_][A-Z0-9_]*)/i);
|
|
64
|
+
if (envMatch) return envMatch[1];
|
|
65
|
+
// Match standalone VAR_NAME patterns (e.g., "SALESFORCE_API_KEY")
|
|
66
|
+
const varMatch = t.message.match(/\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b/);
|
|
67
|
+
if (varMatch) return varMatch[1];
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Extract brand keyword from env var name by removing noise tokens.
|
|
74
|
+
* MAILGUN_API_KEY → MAILGUN, SALESFORCE_CLIENT_SECRET → SALESFORCE
|
|
75
|
+
*/
|
|
76
|
+
function extractBrandFromEnvVar(envVarName) {
|
|
77
|
+
const parts = envVarName.toUpperCase().split('_');
|
|
78
|
+
const brandParts = parts.filter(p => !ENV_NOISE_TOKENS.has(p) && p.length > 0);
|
|
79
|
+
return brandParts.length > 0 ? brandParts[0] : null;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Extract domain from a URL string.
|
|
84
|
+
* Returns the hostname (without port).
|
|
85
|
+
*/
|
|
86
|
+
function extractDomain(url) {
|
|
87
|
+
try {
|
|
88
|
+
// Capture only valid hostname characters so a path-less URL immediately followed by
|
|
89
|
+
// a quote/paren (e.g. fetch('https://api.openai.com')) does not absorb the trailing
|
|
90
|
+
// ')" into the host. Stops at /, :, ?, #, quotes, parens, etc.
|
|
91
|
+
const match = url.match(/^https?:\/\/([a-zA-Z0-9.\-]+)/i);
|
|
92
|
+
return match ? match[1].toLowerCase() : null;
|
|
93
|
+
} catch {
|
|
94
|
+
return null;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Check if a domain matches any of the expected SDK domains (suffix match).
|
|
100
|
+
* api.mailgun.net matches mailgun.net, sub.api.stripe.com matches stripe.com
|
|
101
|
+
*/
|
|
102
|
+
function domainMatchesSuffix(domain, expectedDomains) {
|
|
103
|
+
for (const expected of expectedDomains) {
|
|
104
|
+
if (domain === expected || domain.endsWith('.' + expected)) return true;
|
|
105
|
+
}
|
|
106
|
+
return false;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Check if an env var + file content represents a legitimate SDK pattern.
|
|
111
|
+
*
|
|
112
|
+
* Returns true ONLY if:
|
|
113
|
+
* 1. The env var matches a known SDK mapping (allowlist) OR heuristic brand match
|
|
114
|
+
* 2. ALL URLs in the file point to domains matching the expected SDK
|
|
115
|
+
* 3. No suspicious tunneling/proxy domains are present
|
|
116
|
+
*
|
|
117
|
+
* @param {string} envVarName - e.g., "SALESFORCE_API_KEY"
|
|
118
|
+
* @param {string} fileContent - source code of the file
|
|
119
|
+
* @returns {boolean} true if SDK pattern (should skip intent pair)
|
|
120
|
+
*/
|
|
121
|
+
function isSDKPattern(envVarName, fileContent) {
|
|
122
|
+
// Extract domains from full URLs (https://api.stripe.com/v1/charges)
|
|
123
|
+
const urls = fileContent.match(URL_EXTRACT_RE) || [];
|
|
124
|
+
const domains = urls.map(u => extractDomain(u)).filter(Boolean);
|
|
125
|
+
|
|
126
|
+
// Also extract hostnames from Node.js request options (hostname: 'api.stripe.com')
|
|
127
|
+
let hostnameMatch;
|
|
128
|
+
const hostnameRe = new RegExp(HOSTNAME_OPTION_RE.source, 'g');
|
|
129
|
+
while ((hostnameMatch = hostnameRe.exec(fileContent)) !== null) {
|
|
130
|
+
const hostname = hostnameMatch[1].toLowerCase();
|
|
131
|
+
if (hostname && !domains.includes(hostname)) {
|
|
132
|
+
domains.push(hostname);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// No URLs found — can't confirm SDK pattern, default to suspicious
|
|
137
|
+
if (domains.length === 0) return false;
|
|
138
|
+
|
|
139
|
+
// Check for suspicious tunneling domains — immediate fail
|
|
140
|
+
for (const domain of domains) {
|
|
141
|
+
if (SUSPICIOUS_DOMAIN_PATTERNS.test(domain)) return false;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Check for raw IP addresses — immediate fail
|
|
145
|
+
for (const domain of domains) {
|
|
146
|
+
if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(domain)) return false;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// 1. Try curated allowlist first (strict: ALL domains must match)
|
|
150
|
+
// Curated allowlist is authoritative — no relaxation here to prevent
|
|
151
|
+
// attacker injecting a legitimate domain alongside their C2 domain.
|
|
152
|
+
for (const mapping of SDK_ENV_DOMAIN_MAP) {
|
|
153
|
+
if (mapping.envPattern.test(envVarName)) {
|
|
154
|
+
return domains.every(d => domainMatchesSuffix(d, mapping.domains));
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// R2: credential-suffixed env vars get relaxed domain matching (at least ONE match).
|
|
159
|
+
// SDKs commonly call their own API + CDN/logging/analytics domains.
|
|
160
|
+
// Safety: suspicious domains and raw IPs are already rejected above.
|
|
161
|
+
// Only applies to the heuristic fallback — curated allowlist stays strict.
|
|
162
|
+
const CREDENTIAL_SUFFIXES = ['_API_KEY', '_SECRET', '_TOKEN', '_SECRET_KEY', '_ACCESS_KEY'];
|
|
163
|
+
const upperName = envVarName.toUpperCase();
|
|
164
|
+
const hasCredentialSuffix = CREDENTIAL_SUFFIXES.some(s => upperName.endsWith(s));
|
|
165
|
+
|
|
166
|
+
// 2. Heuristic fallback: extract brand keyword and check domain labels
|
|
167
|
+
const brand = extractBrandFromEnvVar(envVarName);
|
|
168
|
+
if (!brand || brand.length < 3) return false; // Too short for reliable matching
|
|
169
|
+
|
|
170
|
+
const brandLower = brand.toLowerCase();
|
|
171
|
+
// 2a. Strict check: every domain matches brand (existing behavior)
|
|
172
|
+
// e.g., brand "ACME" matches "api.acme.com" (label "acme") but not "api.acmetech.com"
|
|
173
|
+
if (domains.every(d => {
|
|
174
|
+
const labels = d.split('.');
|
|
175
|
+
return labels.some(label => label === brandLower);
|
|
176
|
+
})) return true;
|
|
177
|
+
|
|
178
|
+
// 2b. R2 relaxed: credential suffix + at least one domain matches brand
|
|
179
|
+
if (hasCredentialSuffix && domains.some(d => {
|
|
180
|
+
const labels = d.split('.');
|
|
181
|
+
return labels.some(label => label === brandLower);
|
|
182
|
+
})) return true;
|
|
183
|
+
|
|
184
|
+
return false;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ============================================
|
|
188
|
+
// DESTINATION-BENIGNNESS GATE (env-var-independent)
|
|
189
|
+
// ============================================
|
|
190
|
+
// isSDKPattern() needs a single extractable env var + matching domain. That model
|
|
191
|
+
// breaks on (a) multi-provider files (a CLI that reads GEMINI_API_KEY *and*
|
|
192
|
+
// ANTHROPIC_API_KEY and calls both), and (b) flows whose credential source has no
|
|
193
|
+
// extractable env var (cross_file_dataflow / detached compounds). For those, judge the
|
|
194
|
+
// DESTINATIONS, not the env var: a credential→network flow is benign iff EVERY network
|
|
195
|
+
// host in scope is provably non-exfil.
|
|
196
|
+
//
|
|
197
|
+
// Benign classes (NOT attacker-spoofable):
|
|
198
|
+
// - loopback / RFC1918 private / link-local IPs, localhost, *.local (local IPC)
|
|
199
|
+
// - reserved test domains (example.com, *.test, *.invalid) (RFC 2606/6761)
|
|
200
|
+
// - curated SaaS/cloud/AI provider API domains (cannot echo a
|
|
201
|
+
// POST body back to a third party — UNLIKE paste sites / bot webhooks, deliberately
|
|
202
|
+
// EXCLUDED, see SUSPICIOUS_DOMAIN_PATTERNS + the exclusion note below)
|
|
203
|
+
// Deliberately NOT benign: package "own domain" from package.json (attacker writes it),
|
|
204
|
+
// unknown domains, public IPs, suspicious tunnels/paste hosts. Any of those ⇒ keep firing.
|
|
205
|
+
// Safe-by-default: no extractable host ⇒ NOT benign (do not suppress).
|
|
206
|
+
|
|
207
|
+
// AI providers (2025-26) absent from the env→domain map. Bot/messaging/paste channels
|
|
208
|
+
// (telegram, discord webhooks, pastebin, gist, transfer.sh, …) are intentionally absent:
|
|
209
|
+
// they CAN relay an exfil POST to the attacker, so they must keep firing.
|
|
210
|
+
const AI_PROVIDER_DOMAIN_SUFFIXES = [
|
|
211
|
+
'claude.com', 'openrouter.ai', 'deepseek.com', 'x.ai', 'mistral.ai', 'cohere.ai',
|
|
212
|
+
'cohere.com', 'huggingface.co', 'perplexity.ai', 'groq.com', 'together.ai',
|
|
213
|
+
'together.xyz', 'replicate.com', 'fireworks.ai', 'anyscale.com', 'ai21.com',
|
|
214
|
+
'voyageai.com', 'deepinfra.com',
|
|
215
|
+
];
|
|
216
|
+
|
|
217
|
+
// Flat suffix list = every domain already curated in SDK_ENV_DOMAIN_MAP + the AI extras.
|
|
218
|
+
// Derived (not duplicated) so the two stay in sync. Matched via domainMatchesSuffix, which
|
|
219
|
+
// is label-anchored: 'evilx.ai' does NOT match 'x.ai'.
|
|
220
|
+
const PROVIDER_DOMAIN_SUFFIXES = Array.from(new Set([
|
|
221
|
+
...SDK_ENV_DOMAIN_MAP.flatMap(m => m.domains),
|
|
222
|
+
...AI_PROVIDER_DOMAIN_SUFFIXES,
|
|
223
|
+
]));
|
|
224
|
+
|
|
225
|
+
function stripPort(host) {
|
|
226
|
+
let h = String(host).trim().toLowerCase();
|
|
227
|
+
// Bracketed IPv6 with optional port: [::1]:443 / [::1] → ::1
|
|
228
|
+
const br = h.match(/^\[([^\]]+)\]/);
|
|
229
|
+
if (br) return br[1];
|
|
230
|
+
// host:port for IPv4 / hostname — only when there's a single colon (bare IPv6 like
|
|
231
|
+
// ::1 has multiple colons and must NOT be truncated).
|
|
232
|
+
if ((h.match(/:/g) || []).length === 1) h = h.replace(/:\d+$/, '');
|
|
233
|
+
return h;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// loopback / RFC1918 private / link-local / localhost / reserved-test domain.
|
|
237
|
+
function isLocalOrReservedHost(host) {
|
|
238
|
+
const h = stripPort(host);
|
|
239
|
+
if (!h) return false;
|
|
240
|
+
if (h === 'localhost' || h.endsWith('.localhost') || h.endsWith('.local')) return true;
|
|
241
|
+
if (h === '::1' || h === '0:0:0:0:0:0:0:1') return true; // IPv6 loopback
|
|
242
|
+
if (h === 'example.com' || h === 'example.org' || h === 'example.net') return true;
|
|
243
|
+
if (h.endsWith('.example.com') || h.endsWith('.example.org') || h.endsWith('.example.net')) return true;
|
|
244
|
+
if (h.endsWith('.example') || h.endsWith('.test') || h.endsWith('.invalid')) return true;
|
|
245
|
+
const m = h.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
|
246
|
+
if (m) {
|
|
247
|
+
const a = +m[1], b = +m[2];
|
|
248
|
+
if (a === 127 || a === 0) return true; // loopback / this-host
|
|
249
|
+
if (a === 10) return true; // 10.0.0.0/8
|
|
250
|
+
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
|
|
251
|
+
if (a === 192 && b === 168) return true; // 192.168.0.0/16
|
|
252
|
+
if (a === 169 && b === 254) return true; // 169.254.0.0/16 link-local
|
|
253
|
+
return false; // any other IPv4 literal = public
|
|
254
|
+
}
|
|
255
|
+
return false;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// A public (non-loopback/private) IPv4 literal — a direct-IP exfil endpoint (ecto pattern).
|
|
259
|
+
function isPublicIpHost(host) {
|
|
260
|
+
const h = stripPort(host);
|
|
261
|
+
if (!/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(h)) return false;
|
|
262
|
+
return !isLocalOrReservedHost(h);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Extract every network host referenced in a file (URLs + Node request options).
|
|
266
|
+
function extractHostsFromContent(fileContent) {
|
|
267
|
+
if (!fileContent) return [];
|
|
268
|
+
const urls = fileContent.match(URL_EXTRACT_RE) || [];
|
|
269
|
+
const hosts = urls.map(u => extractDomain(u)).filter(Boolean);
|
|
270
|
+
let m;
|
|
271
|
+
const re = new RegExp(HOSTNAME_OPTION_RE.source, 'g');
|
|
272
|
+
while ((m = re.exec(fileContent)) !== null) {
|
|
273
|
+
const h = m[1].toLowerCase();
|
|
274
|
+
if (h && !hosts.includes(h)) hosts.push(h);
|
|
275
|
+
}
|
|
276
|
+
// Bare host literals assigned as defaults, e.g. `process.env.HOST || "127.0.0.1"` then
|
|
277
|
+
// used as `host: HOST` (common "configurable local collector" shape — the variable host
|
|
278
|
+
// isn't matched above). Capture quoted IPv4 / localhost / 0.0.0.0 literals. Safe: any
|
|
279
|
+
// co-present public IP or unknown host still fails the all-benign check downstream, so
|
|
280
|
+
// this can only RELAX a file whose every literal host is loopback/private.
|
|
281
|
+
const LITERAL_HOST_RE = /['"`](localhost|0\.0\.0\.0|(?:\d{1,3}\.){3}\d{1,3})['"`]/g;
|
|
282
|
+
while ((m = LITERAL_HOST_RE.exec(fileContent)) !== null) {
|
|
283
|
+
const h = m[1].toLowerCase();
|
|
284
|
+
if (h && !hosts.includes(h)) hosts.push(h);
|
|
285
|
+
}
|
|
286
|
+
return hosts;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Destination-benignness gate for credential→network taint flows whose env var is not
|
|
291
|
+
* (or need not be) known. Returns true ONLY if EVERY extracted host is provably non-exfil
|
|
292
|
+
* (local/reserved OR a curated provider). Any suspicious/paste host, public IP, or unknown
|
|
293
|
+
* domain ⇒ false. No hosts found ⇒ false (cannot confirm).
|
|
294
|
+
*
|
|
295
|
+
* @param {string} fileContent - source of the file containing the network sink
|
|
296
|
+
* @returns {boolean} true ⇒ first-party/local, safe to downgrade the taint flow
|
|
297
|
+
*/
|
|
298
|
+
function networkDestinationsAllBenign(fileContent) {
|
|
299
|
+
const hosts = extractHostsFromContent(fileContent);
|
|
300
|
+
if (hosts.length === 0) return false;
|
|
301
|
+
for (const h of hosts) {
|
|
302
|
+
if (SUSPICIOUS_DOMAIN_PATTERNS.test(h)) return false;
|
|
303
|
+
if (isPublicIpHost(h)) return false;
|
|
304
|
+
if (isLocalOrReservedHost(h)) continue;
|
|
305
|
+
if (PROVIDER_DOMAIN_SUFFIXES.some(s => domainMatchesSuffix(h, [s]))) continue;
|
|
306
|
+
return false; // unknown / unrecognised destination → keep firing
|
|
307
|
+
}
|
|
308
|
+
return true;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
module.exports = {
|
|
312
|
+
SDK_ENV_DOMAIN_MAP,
|
|
313
|
+
ENV_NOISE_TOKENS,
|
|
314
|
+
SUSPICIOUS_DOMAIN_PATTERNS,
|
|
315
|
+
URL_EXTRACT_RE,
|
|
316
|
+
HOSTNAME_OPTION_RE,
|
|
317
|
+
PROVIDER_DOMAIN_SUFFIXES,
|
|
318
|
+
extractEnvVarFromMessage,
|
|
319
|
+
extractBrandFromEnvVar,
|
|
320
|
+
extractDomain,
|
|
321
|
+
domainMatchesSuffix,
|
|
322
|
+
isSDKPattern,
|
|
323
|
+
stripPort,
|
|
324
|
+
isLocalOrReservedHost,
|
|
325
|
+
isPublicIpHost,
|
|
326
|
+
extractHostsFromContent,
|
|
327
|
+
networkDestinationsAllBenign,
|
|
328
|
+
};
|