jaku.sh 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/LICENSE +52 -0
  2. package/README.md +636 -0
  3. package/action.yml +264 -0
  4. package/bin/jaku +2 -0
  5. package/package.json +62 -0
  6. package/src/agents/ai-agent.js +175 -0
  7. package/src/agents/api-agent.js +95 -0
  8. package/src/agents/base-agent.js +158 -0
  9. package/src/agents/crawl-agent.js +175 -0
  10. package/src/agents/event-bus.js +59 -0
  11. package/src/agents/findings-ledger.js +410 -0
  12. package/src/agents/logic-agent.js +144 -0
  13. package/src/agents/orchestrator.js +323 -0
  14. package/src/agents/qa-agent.js +149 -0
  15. package/src/agents/security-agent.js +211 -0
  16. package/src/cli.js +423 -0
  17. package/src/core/accessibility-checker.js +171 -0
  18. package/src/core/ai/ai-endpoint-detector.js +227 -0
  19. package/src/core/ai/guardrail-prober.js +362 -0
  20. package/src/core/ai/indirect-injector.js +106 -0
  21. package/src/core/ai/jailbreak-tester.js +212 -0
  22. package/src/core/ai/model-dos-tester.js +174 -0
  23. package/src/core/ai/model-fingerprinter.js +246 -0
  24. package/src/core/ai/multi-turn-attacker.js +297 -0
  25. package/src/core/ai/output-analyzer.js +182 -0
  26. package/src/core/ai/prompt-injector.js +543 -0
  27. package/src/core/ai/system-prompt-extractor.js +244 -0
  28. package/src/core/api/api-key-auditor.js +266 -0
  29. package/src/core/api/auth-flow-tester.js +430 -0
  30. package/src/core/api/cors-ws-tester.js +263 -0
  31. package/src/core/api/graphql-tester.js +287 -0
  32. package/src/core/api/oauth-prober.js +343 -0
  33. package/src/core/auth-manager.js +902 -0
  34. package/src/core/broken-flow-detector.js +207 -0
  35. package/src/core/browser-manager.js +119 -0
  36. package/src/core/console-monitor.js +111 -0
  37. package/src/core/crawler.js +430 -0
  38. package/src/core/csr-waiter.js +410 -0
  39. package/src/core/form-validator.js +240 -0
  40. package/src/core/logic/abuse-pattern-scanner.js +291 -0
  41. package/src/core/logic/access-boundary-tester.js +448 -0
  42. package/src/core/logic/business-rule-inferrer.js +196 -0
  43. package/src/core/logic/graphql-auditor.js +298 -0
  44. package/src/core/logic/parameter-polluter.js +212 -0
  45. package/src/core/logic/pricing-exploiter.js +299 -0
  46. package/src/core/logic/race-condition-detector.js +222 -0
  47. package/src/core/logic/workflow-enforcer.js +284 -0
  48. package/src/core/performance-checker.js +204 -0
  49. package/src/core/responsive-checker.js +228 -0
  50. package/src/core/security/cors-prober.js +150 -0
  51. package/src/core/security/csrf-prober.js +217 -0
  52. package/src/core/security/dependency-auditor.js +182 -0
  53. package/src/core/security/file-upload-tester.js +340 -0
  54. package/src/core/security/header-analyzer.js +324 -0
  55. package/src/core/security/infra-scanner.js +391 -0
  56. package/src/core/security/path-traversal.js +112 -0
  57. package/src/core/security/prototype-pollution.js +147 -0
  58. package/src/core/security/secret-detector.js +517 -0
  59. package/src/core/security/sqli-prober.js +257 -0
  60. package/src/core/security/tls-checker.js +223 -0
  61. package/src/core/security/xss-scanner.js +225 -0
  62. package/src/core/test-generator.js +339 -0
  63. package/src/core/test-runner.js +398 -0
  64. package/src/reporting/diff-reporter.js +172 -0
  65. package/src/reporting/report-generator.js +408 -0
  66. package/src/reporting/sarif-generator.js +190 -0
  67. package/src/utils/config.js +57 -0
  68. package/src/utils/finding.js +67 -0
  69. package/src/utils/logger.js +50 -0
@@ -0,0 +1,517 @@
1
+ import { chromium } from 'playwright';
2
+ import { createFinding } from '../../utils/finding.js';
3
+
4
+ /**
5
+ * Secret Detector — Scans pages, JS files, and responses for leaked secrets.
6
+ * Detects API keys, tokens, hardcoded credentials, .env exposure, source maps, and debug endpoints.
7
+ */
8
+ export class SecretDetector {
9
+ constructor(logger) {
10
+ this.logger = logger;
11
+ this.findings = [];
12
+ this.checkedUrls = new Set();
13
+ }
14
+
15
+ // Regex patterns for common secret formats
16
+ static PATTERNS = [
17
+ { name: 'AWS Access Key', regex: /AKIA[0-9A-Z]{16}/g, severity: 'critical' },
18
+ { name: 'AWS Secret Key', regex: /(?:aws_secret_access_key|aws_secret)\s*[=:]\s*['"]?([A-Za-z0-9/+=]{40})['"]?/gi, severity: 'critical' },
19
+ { name: 'Google API Key', regex: /AIza[0-9A-Za-z\-_]{35}/g, severity: 'high' },
20
+ { name: 'Stripe Live Key', regex: /sk_live_[0-9a-zA-Z]{24,}/g, severity: 'critical' },
21
+ { name: 'Stripe Publishable Key', regex: /pk_live_[0-9a-zA-Z]{24,}/g, severity: 'medium' },
22
+ { name: 'Stripe Test Key', regex: /sk_test_[0-9a-zA-Z]{24,}/g, severity: 'low' },
23
+ { name: 'GitHub Token', regex: /gh[ps]_[A-Za-z0-9_]{36,}/g, severity: 'critical' },
24
+ { name: 'GitHub OAuth', regex: /gho_[A-Za-z0-9_]{36,}/g, severity: 'high' },
25
+ { name: 'Twilio API Key', regex: /SK[0-9a-fA-F]{32}/g, severity: 'high' },
26
+ { name: 'SendGrid API Key', regex: /SG\.[A-Za-z0-9\-_]{22,}\.[A-Za-z0-9\-_]{43,}/g, severity: 'high' },
27
+ { name: 'Slack Token', regex: /xox[baprs]-[0-9a-zA-Z\-]{10,}/g, severity: 'high' },
28
+ { name: 'Slack Webhook', regex: /hooks\.slack\.com\/services\/[A-Za-z0-9/]+/g, severity: 'medium' },
29
+ { name: 'Firebase Key', regex: /AAAA[A-Za-z0-9_-]{7}:[A-Za-z0-9_-]{140}/g, severity: 'high' },
30
+ { name: 'Private Key', regex: /-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----/g, severity: 'critical' },
31
+ { name: 'JWT Token', regex: /eyJ[A-Za-z0-9-_]+\.eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+/g, severity: 'medium' },
32
+ { name: 'Generic API Key', regex: /(?:api[_-]?key|apikey|api[_-]?secret)\s*[=:]\s*['"]([^'"]{8,})['"/]/gi, severity: 'medium' },
33
+ { name: 'Generic Secret', regex: /(?:secret|password|passwd|pwd|token)\s*[=:]\s*['"]([^'"]{8,})['"/]/gi, severity: 'medium' },
34
+ { name: 'Database URL', regex: /(?:mongodb|postgres|mysql|redis):\/\/[^\s'"<>]+/gi, severity: 'critical' },
35
+ { name: 'Bearer Token in Code', regex: /['"](Bearer\s+[A-Za-z0-9\-._~+/]+=*)['"]/g, severity: 'high' },
36
+ ];
37
+
38
+ // Common sensitive file paths to probe
39
+ static SENSITIVE_PATHS = [
40
+ { path: '/.env', desc: 'Environment variables file' },
41
+ { path: '/.env.local', desc: 'Local environment file' },
42
+ { path: '/.env.production', desc: 'Production environment file' },
43
+ { path: '/.env.development', desc: 'Development environment file' },
44
+ { path: '/.git/config', desc: 'Git configuration' },
45
+ { path: '/.git/HEAD', desc: 'Git HEAD reference' },
46
+ { path: '/wp-config.php', desc: 'WordPress configuration' },
47
+ { path: '/config.json', desc: 'Configuration file' },
48
+ { path: '/config.yaml', desc: 'Configuration file' },
49
+ { path: '/config.yml', desc: 'Configuration file' },
50
+ { path: '/.DS_Store', desc: 'macOS directory metadata' },
51
+ { path: '/debug', desc: 'Debug endpoint' },
52
+ { path: '/_debug', desc: 'Debug endpoint' },
53
+ { path: '/graphiql', desc: 'GraphQL IDE (should not be public)' },
54
+ { path: '/graphql', desc: 'GraphQL endpoint' },
55
+ { path: '/__debug', desc: 'Debug endpoint' },
56
+ { path: '/phpinfo.php', desc: 'PHP info page' },
57
+ { path: '/server-status', desc: 'Apache server status' },
58
+ { path: '/elmah.axd', desc: '.NET error log' },
59
+ { path: '/actuator', desc: 'Spring Boot actuator' },
60
+ { path: '/actuator/env', desc: 'Spring Boot environment' },
61
+ ];
62
+
63
+ /**
64
+ * Run secret detection on all crawled surfaces.
65
+ */
66
+ async detect(surfaceInventory) {
67
+ // 1. Scan page sources for secrets
68
+ await this._scanPageSources(surfaceInventory);
69
+
70
+ // 2. Probe for sensitive file exposure
71
+ await this._probeSensitivePaths(surfaceInventory.baseUrl);
72
+
73
+ // 3. Check for source map exposure
74
+ await this._checkSourceMaps(surfaceInventory);
75
+
76
+ this.logger?.info?.(`Secret detector found ${this.findings.length} issues`);
77
+ return this.findings;
78
+ }
79
+
80
+ /**
81
+ * Scan page HTML and inline JS for secret patterns.
82
+ */
83
+ async _scanPageSources(surfaceInventory) {
84
+ const browser = await chromium.launch({ headless: true });
85
+ const context = await browser.newContext({ ignoreHTTPSErrors: true });
86
+
87
+ for (const page of surfaceInventory.pages) {
88
+ if (typeof page.status !== 'number' || page.status >= 400) continue;
89
+ if (this.checkedUrls.has(page.url)) continue;
90
+ this.checkedUrls.add(page.url);
91
+
92
+ try {
93
+ const browserPage = await context.newPage();
94
+ await browserPage.goto(page.url, { waitUntil: 'networkidle', timeout: 15000 });
95
+
96
+ // Get the full page source
97
+ const source = await browserPage.content();
98
+
99
+ // Get all inline and external script contents
100
+ const scripts = await browserPage.evaluate(() => {
101
+ const scriptEls = Array.from(document.querySelectorAll('script'));
102
+ return scriptEls.map(s => ({
103
+ src: s.src || null,
104
+ content: s.textContent || '',
105
+ }));
106
+ });
107
+
108
+ // Scan page source
109
+ this._scanText(source, page.url, 'page source');
110
+
111
+ // Scan inline scripts
112
+ for (const script of scripts) {
113
+ if (script.content) {
114
+ this._scanText(script.content, page.url, 'inline script');
115
+ }
116
+ }
117
+
118
+ // Scan external JS files
119
+ for (const script of scripts) {
120
+ if (script.src && !this.checkedUrls.has(script.src)) {
121
+ this.checkedUrls.add(script.src);
122
+ try {
123
+ const resp = await fetch(script.src, { signal: AbortSignal.timeout(5000) });
124
+ if (resp.ok) {
125
+ const jsContent = await resp.text();
126
+ this._scanText(jsContent, script.src, 'external JavaScript');
127
+ }
128
+ } catch {
129
+ // Best effort
130
+ }
131
+ }
132
+ }
133
+
134
+ await browserPage.close();
135
+ } catch (err) {
136
+ this.logger?.debug?.(`Secret scan failed for ${page.url}: ${err.message}`);
137
+ }
138
+ }
139
+
140
+ await browser.close();
141
+ }
142
+
143
+ /**
144
+ * Scan a text block for secret patterns.
145
+ */
146
+ _scanText(text, url, context) {
147
+ for (const pattern of SecretDetector.PATTERNS) {
148
+ const regex = new RegExp(pattern.regex.source, pattern.regex.flags);
149
+ let match;
150
+
151
+ while ((match = regex.exec(text)) !== null) {
152
+ const secretValue = match[1] || match[0];
153
+ const maskedValue = this._maskSecret(secretValue);
154
+
155
+ // Skip obvious false positives (deny-list)
156
+ if (this._isFalsePositive(secretValue, pattern.name)) continue;
157
+
158
+ // Fix 5: Shannon entropy filter — skip low-entropy matches for generic patterns
159
+ // High-confidence prefixed patterns (AWS, Stripe, etc.) skip this filter
160
+ const lowConfidencePatterns = ['Generic API Key', 'Generic Secret', 'Bearer Token in Code', 'JWT Token'];
161
+ if (lowConfidencePatterns.includes(pattern.name)) {
162
+ const entropy = this._shannonEntropy(secretValue);
163
+ if (entropy < 3.5) continue; // Below threshold = likely placeholder/variable name
164
+ }
165
+
166
+ // Context-aware check: examine the surrounding text for code patterns
167
+ const surroundStart = Math.max(0, match.index - 60);
168
+ const surroundEnd = Math.min(text.length, match.index + match[0].length + 60);
169
+ const surrounding = text.substring(surroundStart, surroundEnd);
170
+
171
+ if (this._isCodeContext(surrounding, secretValue, pattern.name)) continue;
172
+
173
+ this.findings.push(createFinding({
174
+ module: 'security',
175
+ title: `Exposed ${pattern.name}: ${maskedValue}`,
176
+ severity: pattern.severity,
177
+ affected_surface: url,
178
+ description: `A ${pattern.name} was found in the ${context} at ${url}. Exposed secrets can be exploited by attackers to gain unauthorized access to services, data, and infrastructure.\n\nDetected value: ${maskedValue}`,
179
+ reproduction: [
180
+ `1. Navigate to ${url}`,
181
+ `2. View the ${context}`,
182
+ `3. Search for the pattern matching ${pattern.name}`,
183
+ `4. Found: ${maskedValue}`,
184
+ ],
185
+ evidence: JSON.stringify({ pattern: pattern.name, maskedValue, location: context }),
186
+ remediation: `Immediately rotate this ${pattern.name}. Remove it from client-side code and store in server-side environment variables or a secrets manager. Never commit secrets to version control.`,
187
+ references: ['https://owasp.org/www-community/vulnerabilities/Use_of_hard-coded_password'],
188
+ }));
189
+ }
190
+ }
191
+ }
192
+
193
+ /**
194
+ * Fix 5: Shannon entropy — measures randomness of a string.
195
+ * Real secrets have high entropy (> 3.5); variable names and placeholders don't.
196
+ * @param {string} str
197
+ * @returns {number} entropy in bits
198
+ */
199
+ _shannonEntropy(str) {
200
+ if (!str || str.length === 0) return 0;
201
+ const freq = {};
202
+ for (const c of str) freq[c] = (freq[c] || 0) + 1;
203
+ let entropy = 0;
204
+ const len = str.length;
205
+ for (const count of Object.values(freq)) {
206
+ const p = count / len;
207
+ entropy -= p * Math.log2(p);
208
+ }
209
+ return entropy;
210
+ }
211
+
212
+ /**
213
+ * Probe known sensitive file paths.
214
+ */
215
+ async _probeSensitivePaths(baseUrl) {
216
+ // Step 1: Fingerprint the SPA shell by fetching the homepage
217
+ let spaFingerprint = null;
218
+ try {
219
+ const homeResp = await fetch(baseUrl, {
220
+ method: 'GET',
221
+ redirect: 'follow',
222
+ signal: AbortSignal.timeout(5000),
223
+ });
224
+ if (homeResp.ok) {
225
+ const homeBody = await homeResp.text();
226
+ // Extract a fingerprint: title + first 200 chars of body structure
227
+ const titleMatch = homeBody.match(/<title[^>]*>(.*?)<\/title>/i);
228
+ spaFingerprint = {
229
+ title: titleMatch?.[1] || '',
230
+ length: homeBody.length,
231
+ body: homeBody,
232
+ };
233
+ }
234
+ } catch { /* ignore */ }
235
+
236
+ for (const { path, desc } of SecretDetector.SENSITIVE_PATHS) {
237
+ const url = new URL(path, baseUrl).toString();
238
+
239
+ try {
240
+ const resp = await fetch(url, {
241
+ method: 'GET',
242
+ redirect: 'follow',
243
+ signal: AbortSignal.timeout(5000),
244
+ });
245
+
246
+ if (resp.ok && resp.status === 200) {
247
+ const contentType = resp.headers.get('content-type') || '';
248
+ const body = await resp.text();
249
+
250
+ // Filter out obviously empty responses
251
+ if (body.trim().length < 5) continue;
252
+
253
+ // ── SPA catch-all detection ──
254
+ // If the response is HTML and matches the homepage fingerprint, it's a SPA catch-all
255
+ if (contentType.includes('text/html') || body.match(/<!doctype\s+html/i)) {
256
+ // Check 1: Same body length as homepage (within 10 bytes)
257
+ if (spaFingerprint && Math.abs(body.length - spaFingerprint.length) < 10) continue;
258
+ // Check 2: Contains SPA root mount point (React/Vue/Angular/Next)
259
+ if (/id=["'](root|app|__next|__nuxt)["']/.test(body)) continue;
260
+ // Check 3: Body is identical to homepage
261
+ if (spaFingerprint && body === spaFingerprint.body) continue;
262
+ // Check 4: Generic HTML page with no path-specific content
263
+ if (body.includes('<!doctype html') || body.includes('<!DOCTYPE html')) {
264
+ const pathName = path.replace(/[/._\-]/g, '').toLowerCase();
265
+ if (!body.toLowerCase().includes(pathName)) continue;
266
+ }
267
+ }
268
+
269
+ // ── Content validation ──
270
+ // Verify the response looks like the expected file type
271
+ if (!this._isLegitimateFileContent(path, body, contentType)) continue;
272
+
273
+ const isSecret = path.includes('.env') || path.includes('.git') || path.includes('config');
274
+ const severity = isSecret ? 'critical' : 'high';
275
+
276
+ this.findings.push(createFinding({
277
+ module: 'security',
278
+ title: `Sensitive File Accessible: ${path}`,
279
+ severity,
280
+ affected_surface: url,
281
+ description: `The file "${path}" (${desc}) is publicly accessible and returned a ${resp.status} response. This file may contain sensitive configuration, credentials, or repository metadata.\n\nContent-Type: ${contentType}\nResponse length: ${body.length} bytes`,
282
+ reproduction: [
283
+ `1. Navigate to ${url}`,
284
+ `2. File returns HTTP ${resp.status} with ${body.length} bytes`,
285
+ ],
286
+ evidence: body.substring(0, 500),
287
+ remediation: `Block access to "${path}" via web server configuration. Add rules to deny access to dotfiles and sensitive configuration files.`,
288
+ references: ['https://owasp.org/www-project-web-security-testing-guide/stable/4-Web_Application_Security_Testing/02-Configuration_and_Deployment_Management_Testing/04-Review_Old_Backup_and_Unreferenced_Files_for_Sensitive_Information'],
289
+ }));
290
+ }
291
+ } catch {
292
+ // Path not accessible — this is fine
293
+ }
294
+ }
295
+ }
296
+
297
+ /**
298
+ * Validate that response content actually matches the expected sensitive file type.
299
+ * Catches SPA catch-all where HTML is served for all routes.
300
+ */
301
+ _isLegitimateFileContent(path, body, contentType) {
302
+ const isHTML = contentType.includes('text/html') || /<!doctype\s+html/i.test(body);
303
+
304
+ // .env files should contain KEY=VALUE pairs, not HTML
305
+ if (path.includes('.env')) {
306
+ if (isHTML) return false;
307
+ return /^[A-Z_]+=.+/m.test(body); // At least one KEY=VALUE line
308
+ }
309
+
310
+ // .git files should not be HTML
311
+ if (path.includes('.git')) {
312
+ if (isHTML) return false;
313
+ if (path.includes('HEAD')) return /^ref:\s/.test(body.trim());
314
+ if (path.includes('config')) return body.includes('[core]') || body.includes('[remote');
315
+ return true;
316
+ }
317
+
318
+ // PHP files should contain PHP markers or actual PHP output, not SPA HTML
319
+ if (path.endsWith('.php')) {
320
+ if (isHTML && /id=["'](root|app|__next)["']/.test(body)) return false;
321
+ return true;
322
+ }
323
+
324
+ // Config files (json/yaml/yml) should contain structured data, not HTML
325
+ if (path.match(/config\.(json|yaml|yml)$/)) {
326
+ if (isHTML) return false;
327
+ if (path.endsWith('.json')) {
328
+ try { JSON.parse(body); return true; } catch { return false; }
329
+ }
330
+ return true;
331
+ }
332
+
333
+ // .DS_Store is binary, should not be HTML
334
+ if (path.includes('.DS_Store')) {
335
+ return !isHTML;
336
+ }
337
+
338
+ // Debug/admin endpoints — HTML is OK but SPA shells are not
339
+ if (isHTML && /id=["'](root|app|__next|__nuxt)["']/.test(body)) return false;
340
+
341
+ return true;
342
+ }
343
+
344
+ /**
345
+ * Check for exposed source maps.
346
+ */
347
+ async _checkSourceMaps(surfaceInventory) {
348
+ const jsUrls = new Set();
349
+
350
+ for (const page of surfaceInventory.pages) {
351
+ // Check for source maps in known patterns
352
+ if (page.url.endsWith('.js')) {
353
+ jsUrls.add(page.url + '.map');
354
+ }
355
+ }
356
+
357
+ // Also check API-discovered JS files
358
+ for (const api of surfaceInventory.apiEndpoints || []) {
359
+ if (api.url.endsWith('.js')) {
360
+ jsUrls.add(api.url + '.map');
361
+ }
362
+ }
363
+
364
+ for (const mapUrl of jsUrls) {
365
+ try {
366
+ const resp = await fetch(mapUrl, { signal: AbortSignal.timeout(5000) });
367
+ if (resp.ok) {
368
+ this.findings.push(createFinding({
369
+ module: 'security',
370
+ title: `Source Map Exposed: ${new URL(mapUrl).pathname}`,
371
+ severity: 'medium',
372
+ affected_surface: mapUrl,
373
+ description: 'A JavaScript source map file is publicly accessible. Source maps contain the original source code, which can reveal business logic, internal API endpoints, and potential vulnerabilities.',
374
+ reproduction: [
375
+ `1. Navigate to ${mapUrl}`,
376
+ '2. Source map file is returned successfully',
377
+ ],
378
+ remediation: 'Remove source maps from production builds or restrict access to them via server configuration. Most bundlers have options to disable source map generation for production.',
379
+ }));
380
+ }
381
+ } catch {
382
+ // Not accessible — fine
383
+ }
384
+ }
385
+ }
386
+
387
+ _maskSecret(value) {
388
+ if (!value || value.length < 8) return '****';
389
+ return value.substring(0, 4) + '****' + value.substring(value.length - 4);
390
+ }
391
+
392
+ _isFalsePositive(value, patternName) {
393
+ if (!value) return true;
394
+ // Skip very short matches
395
+ if (value.length < 8) return true;
396
+ // Skip placeholder/example values
397
+ const placeholders = ['example', 'test', 'placeholder', 'your_', 'xxx', 'TODO', 'CHANGEME', 'sample', 'dummy', 'mock'];
398
+ if (placeholders.some(p => value.toLowerCase().includes(p))) return true;
399
+ // Skip if all same character
400
+ if (/^(.)\1+$/.test(value)) return true;
401
+
402
+ // ── Bearer Token in Code false positives ──
403
+ if (patternName === 'Bearer Token in Code') {
404
+ // Skip format strings: Bearer ${token}, Bearer "+token, Bearer '+variable
405
+ if (/Bearer\s+[\$`{"'+]/.test(value)) return true;
406
+ // Skip template literals: Bearer ${...}
407
+ if (/Bearer\s+\$\{/.test(value)) return true;
408
+ // Skip concatenation patterns: Bearer "+, Bearer '+
409
+ if (/Bearer\s*["']\s*\+/.test(value)) return true;
410
+ // Skip if value after "Bearer " is a common variable name
411
+ const afterBearer = value.replace(/^Bearer\s+/, '');
412
+ const varNames = ['token', 'accesstoken', 'access_token', 'authtoken', 'auth_token', 'jwt', 'idtoken', 'id_token'];
413
+ if (varNames.includes(afterBearer.toLowerCase().replace(/["'`]/g, ''))) return true;
414
+ }
415
+
416
+ // ── JWT Token false positives ──
417
+ if (patternName === 'JWT Token') {
418
+ // Decode the header to check for example/test JWTs
419
+ try {
420
+ const header = JSON.parse(Buffer.from(value.split('.')[0], 'base64url').toString());
421
+ const payload = JSON.parse(Buffer.from(value.split('.')[1], 'base64url').toString());
422
+ // Skip if payload contains test/example indicators
423
+ if (payload.sub === 'test' || payload.sub === 'example' || payload.sub === '1234567890') return true;
424
+ if (payload.name === 'John Doe') return true; // jwt.io example
425
+ } catch {
426
+ // If we can't decode it, still check the string
427
+ }
428
+ }
429
+
430
+ // ── Enhanced false-positive detection for Generic Secret / Generic API Key ──
431
+ if (patternName === 'Generic Secret' || patternName === 'Generic API Key') {
432
+ // Reject common variable/property names that aren't actual secrets
433
+ const commonNames = [
434
+ 'access_token', 'accesstoken', 'refresh_token', 'refreshtoken',
435
+ 'client_secret', 'clientsecret', 'client_id', 'clientid',
436
+ 'token_type', 'tokentype', 'grant_type', 'granttype',
437
+ 'auth_token', 'authtoken', 'id_token', 'idtoken',
438
+ 'session_token', 'sessiontoken', 'csrf_token', 'csrftoken',
439
+ 'xsrf_token', 'xsrftoken', 'bearer_token', 'bearertoken',
440
+ 'password_hash', 'passwordhash', 'password_salt', 'passwordsalt',
441
+ 'secret_key', 'secretkey', 'api_secret', 'apisecret',
442
+ 'token_secret', 'tokensecret', 'token_key', 'tokenkey',
443
+ ];
444
+ const cleanValue = value.toLowerCase().replace(/[\s"'`]/g, '');
445
+ if (commonNames.includes(cleanValue)) return true;
446
+
447
+ // Reject if the value is just a common word/identifier (no special chars, low entropy)
448
+ if (/^[a-z_][a-z0-9_]*$/i.test(value) && value.length < 20) return true;
449
+
450
+ // Reject minified JS fragments: high ratio of special chars
451
+ const specialCharRatio = (value.match(/[(){}\[\],;!?@#$%^&*~`<>|\\]/g) || []).length / value.length;
452
+ if (specialCharRatio > 0.15) return true;
453
+
454
+ // Reject if it looks like a code expression (contains JS operators)
455
+ if (/[(){}\[\];,]/.test(value) && /[a-z]\(/i.test(value)) return true;
456
+
457
+ // Reject common JS code patterns
458
+ if (/\b(function|return|const|var|let|this|window|document|null|undefined|true|false)\b/.test(value)) return true;
459
+
460
+ // Reject URL-like values that aren't secrets
461
+ if (/^(https?:\/\/|data:|blob:|javascript:)/i.test(value)) return true;
462
+
463
+ // Reject hex color codes
464
+ if (/^#[0-9a-fA-F]{3,8}$/.test(value)) return true;
465
+
466
+ // Require minimum entropy for generic matches
467
+ const uniqueChars = new Set(value).size;
468
+ if (uniqueChars < value.length * 0.3) return true;
469
+ }
470
+
471
+ // ── Fix 5: Extended deny-list for known false-positive patterns ──
472
+ // UUIDs (e.g. analytics IDs, tracking tokens, feature flags)
473
+ if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(value)) return true;
474
+ // Hex hashes (MD5, SHA1, SHA256) — likely content hashes from bundlers
475
+ if (/^[0-9a-f]{32,64}$/i.test(value) && !/(?:key|token|secret|password|auth)/i.test(patternName)) return true;
476
+ // Base64 image/font data URIs
477
+ if (/^data:(?:image|font|application)\//.test(value)) return true;
478
+ // npm/yarn package checksums (sha512- prefix)
479
+ if (/^sha[0-9]+-/.test(value)) return true;
480
+ // Short alphanumeric identifiers (very likely variable/class names, not secrets)
481
+ if (/^[a-zA-Z][a-zA-Z0-9]{2,10}$/.test(value)) return true;
482
+
483
+ return false;
484
+ }
485
+
486
+ /**
487
+ * Context-aware check: examines the text surrounding a match to detect code patterns.
488
+ * Returns true if the match appears in a code context (variable assignment, property definition, etc.)
489
+ */
490
+ _isCodeContext(surrounding, value, patternName) {
491
+ // Skip context check for high-confidence patterns (AWS, Stripe, GitHub etc. have unique prefixes)
492
+ const highConfidence = ['AWS Access Key', 'AWS Secret Key', 'Stripe Live Key', 'Stripe Test Key',
493
+ 'GitHub Token', 'GitHub OAuth', 'SendGrid API Key', 'Slack Token', 'Private Key', 'Database URL'];
494
+ if (highConfidence.includes(patternName)) return false;
495
+
496
+ // Check if the match is in a variable declaration / property assignment context
497
+ // e.g., const token = "...", { token: "..." }, token: "...",
498
+ const varDeclPattern = /(?:const|let|var|this\.)\s*\w+\s*=\s*["'`]/;
499
+ const propPattern = /["']?\w+["']?\s*:\s*["'`]/;
500
+ const templatePattern = /\$\{[^}]*\}/;
501
+
502
+ // If surrounding text has template literal interpolation, likely not a real secret
503
+ if (templatePattern.test(surrounding)) return true;
504
+
505
+ // If it's a Generic Secret and the surrounding looks like a schema/type definition
506
+ if (patternName === 'Generic Secret' || patternName === 'Generic API Key') {
507
+ // ORM/schema definitions: type: "string", required: true, etc.
508
+ if (/type\s*:\s*["']string["']/.test(surrounding)) return true;
509
+ // Config key definitions: { password: "", token: "" }
510
+ if (/["']\s*:\s*["']["']/.test(surrounding)) return true;
511
+ }
512
+
513
+ return false;
514
+ }
515
+ }
516
+
517
+ export default SecretDetector;