@clear-capabilities/agentic-security-scanner 0.77.0 → 0.78.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/bin/.agentic-security/findings.json +1907 -0
  2. package/bin/.agentic-security/last-scan.json +1907 -0
  3. package/bin/.agentic-security/last-scan.json.sig +1 -0
  4. package/bin/.agentic-security/scan-history.json +115 -0
  5. package/bin/.agentic-security/streak.json +20 -0
  6. package/bin/agentic-security.js +33 -2
  7. package/dist/178.index.js +1 -1
  8. package/dist/384.index.js +1 -1
  9. package/dist/637.index.js +1 -1
  10. package/dist/718.index.js +106 -0
  11. package/dist/824.index.js +126 -0
  12. package/dist/838.index.js +1 -1
  13. package/dist/agentic-security.mjs +32 -32
  14. package/dist/agentic-security.mjs.sha256 +1 -1
  15. package/package.json +3 -3
  16. package/src/.agentic-security/findings.json +82642 -0
  17. package/src/.agentic-security/last-scan.json +82642 -0
  18. package/src/.agentic-security/last-scan.json.sig +1 -0
  19. package/src/.agentic-security/scan-history.json +10054 -0
  20. package/src/.agentic-security/streak.json +21 -0
  21. package/src/dataflow/.agentic-security/findings.json +3515 -0
  22. package/src/dataflow/.agentic-security/last-scan.json +3515 -0
  23. package/src/dataflow/.agentic-security/last-scan.json.sig +1 -0
  24. package/src/dataflow/.agentic-security/scan-history.json +702 -0
  25. package/src/dataflow/.agentic-security/streak.json +22 -0
  26. package/src/dataflow/async-sequencing.js +16 -7
  27. package/src/dataflow/builtin-summaries.js +131 -0
  28. package/src/dataflow/catalog.js +107 -0
  29. package/src/dataflow/cross-repo.js +75 -1
  30. package/src/dataflow/engine.js +129 -0
  31. package/src/dataflow/implicit-flow.js +24 -6
  32. package/src/dataflow/stub-aware-filter.js +69 -11
  33. package/src/dataflow/summaries.js +28 -3
  34. package/src/engine-parallel.js +70 -0
  35. package/src/engine.js +165 -15
  36. package/src/ir/.agentic-security/findings.json +3777 -0
  37. package/src/ir/.agentic-security/last-scan.json +3777 -0
  38. package/src/ir/.agentic-security/last-scan.json.sig +1 -0
  39. package/src/ir/.agentic-security/scan-history.json +771 -0
  40. package/src/ir/.agentic-security/streak.json +21 -0
  41. package/src/ir/index.js +22 -1
  42. package/src/ir/parser-go.js +403 -0
  43. package/src/ir/parser-js.js +2 -0
  44. package/src/ir/parser-php.js +330 -0
  45. package/src/ir/parser-py.helper.py +137 -11
  46. package/src/ir/parser-rb.js +309 -0
  47. package/src/posture/.agentic-security/findings.json +51562 -0
  48. package/src/posture/.agentic-security/last-scan.json +51562 -0
  49. package/src/posture/.agentic-security/last-scan.json.sig +1 -0
  50. package/src/posture/.agentic-security/scan-history.json +650 -0
  51. package/src/posture/.agentic-security/streak.json +20 -0
  52. package/src/posture/calibration.js +14 -0
  53. package/src/posture/triage.js +13 -0
  54. package/src/report/.agentic-security/findings.json +80 -0
  55. package/src/report/.agentic-security/last-scan.json +80 -0
  56. package/src/report/.agentic-security/last-scan.json.sig +1 -0
  57. package/src/report/.agentic-security/scan-history.json +35 -0
  58. package/src/report/.agentic-security/streak.json +22 -0
  59. package/src/report/index.js +23 -2
  60. package/src/sast/.agentic-security/findings.json +5190 -0
  61. package/src/sast/.agentic-security/last-scan.json +5190 -0
  62. package/src/sast/.agentic-security/last-scan.json.sig +1 -0
  63. package/src/sast/.agentic-security/scan-history.json +408 -0
  64. package/src/sast/.agentic-security/streak.json +20 -0
  65. package/src/sast/cache-poisoning.js +77 -0
  66. package/src/sast/comparison-safety.js +73 -0
  67. package/src/sast/db-taint.js +54 -0
  68. package/src/sast/graphql.js +127 -0
  69. package/src/sast/llm-stored-prompt.js +57 -0
  70. package/src/sast/mutation-xss.js +43 -0
  71. package/src/sast/nosql-injection.js +5 -0
  72. package/src/sast/null-byte-injection.js +76 -0
  73. package/src/sast/redos-nfa.js +338 -0
  74. package/src/sast/sensitive-data-logging.js +73 -0
  75. package/src/sast/weak-password-hash.js +77 -0
  76. package/src/sast/weak-randomness.js +100 -0
  77. package/src/sca/.agentic-security/findings.json +1587 -0
  78. package/src/sca/.agentic-security/last-scan.json +1587 -0
  79. package/src/sca/.agentic-security/last-scan.json.sig +1 -0
  80. package/src/sca/.agentic-security/scan-history.json +36 -0
  81. package/src/sca/.agentic-security/streak.json +21 -0
  82. package/src/sca/llm-function-extract.js +107 -0
  83. package/src/sca/vendor-detect.js +91 -0
@@ -0,0 +1,127 @@
1
+ // GraphQL security detector.
2
+ //
3
+ // Coverage:
4
+ // 1. Query injection — string-concat/template building GraphQL queries from user input
5
+ // 2. Depth/complexity DoS — ApolloServer/express-graphql without depth limiting
6
+ // 3. Introspection in production — introspection enabled or not explicitly disabled
7
+ // 4. Batching DoS — missing batch-size limits
8
+ // 5. Field suggestions — error messages leaking field names
9
+
10
+ function _line(raw, idx) {
11
+ return raw.slice(0, idx).split('\n').length;
12
+ }
13
+
14
+ export function scanGraphQL(fp, raw) {
15
+ if (!fp || !raw || typeof raw !== 'string') return [];
16
+ if (raw.length > 500_000) return [];
17
+ if (!/\.(?:js|jsx|ts|tsx|mjs|cjs|py|go|rb)$/i.test(fp)) return [];
18
+
19
+ const findings = [];
20
+
21
+ // 1. Query injection: string concat/template into GraphQL query strings
22
+ const queryConcat = /(?:query|mutation)\s*[:=]\s*(?:`[^`]*\$\{[^}]*\}|["'][^"']*["']\s*\+\s*\w)/g;
23
+ for (const m of raw.matchAll(queryConcat)) {
24
+ const ln = _line(raw, m.index);
25
+ const after = raw.slice(m.index, m.index + 300);
26
+ if (/\b(?:gql|graphql|\.query|\.mutate)\b/i.test(after) || /\b(?:query|mutation)\s*\{/.test(after)) {
27
+ findings.push({
28
+ id: `graphql-injection:${fp}:${ln}`,
29
+ file: fp, line: ln,
30
+ vuln: 'GraphQL Query Injection — user input concatenated into query string',
31
+ severity: 'high',
32
+ family: 'graphql-injection',
33
+ cwe: 'CWE-943',
34
+ parser: 'GRAPHQL',
35
+ confidence: 0.75,
36
+ description: 'GraphQL query is built via string concatenation or template interpolation with variables that may contain user input. An attacker can inject additional fields, aliases, or mutations.',
37
+ remediation: 'Use parameterized GraphQL queries with variables: `query GetUser($id: ID!) { user(id: $id) { name } }` and pass variables separately.',
38
+ });
39
+ }
40
+ }
41
+
42
+ // 2. Depth/complexity DoS: ApolloServer/createYoga/express-graphql without depth limit
43
+ if (/\b(?:ApolloServer|createYoga|graphqlHTTP|makeExecutableSchema)\b/.test(raw)) {
44
+ if (!/\b(?:depthLimit|graphql-depth-limit|graphql-validation-complexity|costAnalysis|maxDepth|queryDepthLimit)\b/.test(raw)) {
45
+ const m = raw.match(/\b(ApolloServer|createYoga|graphqlHTTP)\b/);
46
+ if (m) {
47
+ findings.push({
48
+ id: `graphql-depth-dos:${fp}:${_line(raw, m.index)}`,
49
+ file: fp, line: _line(raw, m.index),
50
+ vuln: 'GraphQL Depth/Complexity DoS — no depth limiting configured',
51
+ severity: 'medium',
52
+ family: 'graphql-dos',
53
+ cwe: 'CWE-400',
54
+ parser: 'GRAPHQL',
55
+ confidence: 0.70,
56
+ description: 'GraphQL server is configured without query depth or complexity limits. An attacker can send deeply nested queries that exhaust server resources.',
57
+ remediation: 'Add graphql-depth-limit or graphql-query-complexity to validationRules: new ApolloServer({ validationRules: [depthLimit(10)] }).',
58
+ });
59
+ }
60
+ }
61
+ }
62
+
63
+ // 3. Introspection in production
64
+ if (/\bintrospection\s*:\s*true\b/.test(raw)) {
65
+ const m = raw.match(/\bintrospection\s*:\s*true\b/);
66
+ if (m) {
67
+ findings.push({
68
+ id: `graphql-introspection:${fp}:${_line(raw, m.index)}`,
69
+ file: fp, line: _line(raw, m.index),
70
+ vuln: 'GraphQL Introspection Enabled — schema exposed to clients',
71
+ severity: 'medium',
72
+ family: 'graphql-introspection',
73
+ cwe: 'CWE-200',
74
+ parser: 'GRAPHQL',
75
+ confidence: 0.80,
76
+ description: 'Introspection is explicitly enabled. Attackers can query __schema to discover all types, fields, and mutations — accelerating further attacks.',
77
+ remediation: 'Disable introspection in production: new ApolloServer({ introspection: process.env.NODE_ENV !== "production" }).',
78
+ });
79
+ }
80
+ }
81
+
82
+ // 4. Batching DoS: missing batch limits
83
+ if (/\b(?:ApolloServer|ApolloGateway)\b/.test(raw)) {
84
+ if (!/\b(?:allowBatchedHttpRequests\s*:\s*false|maxBatchSize|batching\s*:\s*false)\b/.test(raw)) {
85
+ const m = raw.match(/\b(ApolloServer|ApolloGateway)\b/);
86
+ if (m) {
87
+ const after = raw.slice(m.index, m.index + 500);
88
+ if (/allowBatchedHttpRequests\s*:\s*true/.test(after) && !/maxBatchSize/.test(after)) {
89
+ findings.push({
90
+ id: `graphql-batch-dos:${fp}:${_line(raw, m.index)}`,
91
+ file: fp, line: _line(raw, m.index),
92
+ vuln: 'GraphQL Batch DoS — batching enabled without size limit',
93
+ severity: 'medium',
94
+ family: 'graphql-dos',
95
+ cwe: 'CWE-400',
96
+ parser: 'GRAPHQL',
97
+ confidence: 0.65,
98
+ description: 'Batched HTTP requests are enabled without a maxBatchSize limit. Attackers can send thousands of operations in a single HTTP request.',
99
+ remediation: 'Set allowBatchedHttpRequests: false, or add maxBatchSize: 10 to limit batch size.',
100
+ });
101
+ }
102
+ }
103
+ }
104
+ }
105
+
106
+ // 5. Field suggestions leaking schema info
107
+ if (/\b(?:includeStacktraceInErrorResponses|formatError|debug\s*:\s*true)\b/.test(raw)) {
108
+ if (/\b(?:ApolloServer|graphqlHTTP)\b/.test(raw)) {
109
+ for (const m of raw.matchAll(/\bdebug\s*:\s*true\b/g)) {
110
+ findings.push({
111
+ id: `graphql-debug:${fp}:${_line(raw, m.index)}`,
112
+ file: fp, line: _line(raw, m.index),
113
+ vuln: 'GraphQL Debug Mode — error details exposed to clients',
114
+ severity: 'low',
115
+ family: 'graphql-introspection',
116
+ cwe: 'CWE-209',
117
+ parser: 'GRAPHQL',
118
+ confidence: 0.70,
119
+ description: 'Debug mode exposes stack traces and field suggestion in error responses, leaking internal schema structure.',
120
+ remediation: 'Set debug: false or includeStacktraceInErrorResponses: false in production.',
121
+ });
122
+ }
123
+ }
124
+ }
125
+
126
+ return findings;
127
+ }
@@ -101,3 +101,60 @@ export function scanStoredPromptInjection(fp, raw) {
101
101
  }
102
102
  return findings;
103
103
  }
104
+
105
+ const ORM_READ_RE = /\b(?:findOne|findUnique|findFirst|findById|findByPk|get_object_or_404|objects\.get|objects\.filter|\.query\s*\()\b/;
106
+
107
+ export function scanStoredPromptInjectionCrossFile(fileContents) {
108
+ if (!fileContents || typeof fileContents !== 'object') return [];
109
+ const llmSinks = [];
110
+ const ormReads = [];
111
+ for (const [fp, raw] of Object.entries(fileContents)) {
112
+ if (!raw || typeof raw !== 'string' || raw.length > 500_000) continue;
113
+ const lang = _lang(fp);
114
+ if (!lang) continue;
115
+ for (const [plang, pat, label] of LLM_CALL_PATTERNS) {
116
+ if (plang !== lang) continue;
117
+ const re = new RegExp(pat.source, pat.flags);
118
+ let m;
119
+ while ((m = re.exec(raw))) {
120
+ const varName = (m[1] || '').split('.')[0];
121
+ if (varName) llmSinks.push({ file: fp, line: _lineOf(raw, m.index), varName, label });
122
+ }
123
+ }
124
+ if (ORM_READ_RE.test(raw)) {
125
+ const lines = raw.split('\n');
126
+ for (let i = 0; i < lines.length; i++) {
127
+ if (ORM_READ_RE.test(lines[i])) {
128
+ const assignMatch = lines[i].match(/(\w+)\s*=\s*/);
129
+ if (assignMatch) ormReads.push({ file: fp, line: i + 1, varName: assignMatch[1] });
130
+ }
131
+ }
132
+ }
133
+ }
134
+ const findings = [];
135
+ const seen = new Set();
136
+ for (const sink of llmSinks) {
137
+ for (const read of ormReads) {
138
+ if (sink.file === read.file) continue;
139
+ if (sink.varName !== read.varName) continue;
140
+ const id = `llm-stored-prompt-xfile:${read.file}:${read.line}->${sink.file}:${sink.line}`;
141
+ if (seen.has(id)) continue;
142
+ seen.add(id);
143
+ findings.push({
144
+ id,
145
+ file: sink.file, line: sink.line,
146
+ vuln: `LLM Stored-Prompt Injection — cross-file ORM→LLM (${sink.label})`,
147
+ severity: 'high',
148
+ cwe: 'CWE-1336',
149
+ family: 'llm-prompt-injection',
150
+ parser: 'LLM-STORED-PROMPT-XFILE',
151
+ confidence: 0.55,
152
+ description: `Variable "${sink.varName}" loaded from ORM at ${read.file}:${read.line} is used as LLM system prompt at ${sink.file}:${sink.line}. An attacker who can modify the DB record can inject instructions.`,
153
+ remediation: 'Validate stored prompts against a schema or signing key. Wrap DB-loaded text in delimiters and a role-isolation frame.',
154
+ source: { file: read.file, line: read.line, label: `ORM read → ${read.varName}` },
155
+ sink: { file: sink.file, line: sink.line, label: `LLM ${sink.label}` },
156
+ });
157
+ }
158
+ }
159
+ return findings;
160
+ }
@@ -83,5 +83,48 @@ export function scanMutationXSS(fp, raw) {
83
83
  });
84
84
  }
85
85
 
86
+ // Email template XSS: user data rendered into HTML email body
87
+ const emailSinkRe = /\b(?:sendMail|transporter\.sendMail|sg\.send|ses\.sendEmail|mailgun\.messages\.create|send_email|mail\.send)\s*\(/g;
88
+ for (const em of code.matchAll(emailSinkRe)) {
89
+ const after = code.slice(em.index, em.index + 500);
90
+ if (!/\bhtml\s*:/i.test(after)) continue;
91
+ const taintHint = /(?:req\.|request\.|params|body|query|user\.\w+|data\.\w+)/.test(after);
92
+ const templateHint = /(?:ejs\.render|pug\.render|mustache\.render|handlebars\.compile|marked\.parse|render_template|Jinja2|\.render\s*\()/.test(after);
93
+ if (!taintHint && !templateHint) continue;
94
+ const line = lineOf(raw, em.index);
95
+ push({
96
+ id: `email-template-xss:${fp}:${line}`,
97
+ file: fp, line,
98
+ vuln: 'Email Template XSS — user data rendered into HTML email body',
99
+ severity: 'high',
100
+ cwe: 'CWE-79',
101
+ family: 'email-template-xss',
102
+ stride: 'Tampering',
103
+ snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
104
+ remediation: 'HTML-escape user-supplied data before inserting into email templates. Use the template engine\'s auto-escape mode. Consider rendering text-only emails for user-generated content.',
105
+ parser: 'EMAIL-XSS',
106
+ confidence: 0.65,
107
+ });
108
+ }
109
+
110
+ // Markdown → HTML → innerHTML chain
111
+ const markdownHtmlRe = /\bmarked\.parse\s*\([^)]*(?:req\.|request\.|params|body|query|user)/g;
112
+ for (const mm of code.matchAll(markdownHtmlRe)) {
113
+ const line = lineOf(raw, mm.index);
114
+ push({
115
+ id: `markdown-xss:${fp}:${line}`,
116
+ file: fp, line,
117
+ vuln: 'Markdown→HTML XSS — user-supplied Markdown rendered to HTML without sanitization',
118
+ severity: 'high',
119
+ cwe: 'CWE-79',
120
+ family: 'xss',
121
+ stride: 'Tampering',
122
+ snippet: (raw.split('\n')[line - 1] || '').trim().slice(0, 200),
123
+ remediation: 'Pipe marked output through DOMPurify: `const html = DOMPurify.sanitize(marked.parse(userInput))`. Or use marked with `sanitize: true` option.',
124
+ parser: 'MARKDOWN-XSS',
125
+ confidence: 0.70,
126
+ });
127
+ }
128
+
86
129
  return findings;
87
130
  }
@@ -18,6 +18,9 @@ const DYNAMO_EXPR_CONCAT_RE = /(?:FilterExpression|ConditionExpression|KeyCondit
18
18
 
19
19
  const PY_MONGO_FIND_REQ = /\.\s*(?:find|find_one|update_one|update_many|delete_one|delete_many)\s*\(\s*request\s*\.\s*(?:json|data|args|form)/g;
20
20
 
21
+ const MONGO_AGGREGATE_RE = /\.\s*aggregate\s*\(\s*\[[\s\S]{0,300}?\{\s*\$(?:match|expr|where|function|redact|lookup)\s*:[\s\S]{0,200}?(?:req|request|params|query|body|input)\b/g;
22
+ const MONGO_MAPREDUCE_RE = /\.\s*mapReduce\s*\([\s\S]{0,300}?(?:req|request|params|query|body|input)\b/g;
23
+
21
24
  function lineOf(raw, idx) { return raw.substring(0, idx).split('\n').length; }
22
25
 
23
26
  export function scanNoSQLInjection(fp, raw) {
@@ -33,6 +36,8 @@ export function scanNoSQLInjection(fp, raw) {
33
36
  [MONGO_WHERE_RE, 'mongo-where', 'NoSQL Injection: MongoDB $where with user-controlled string', 0.90],
34
37
  [MONGO_FIND_REQ_OBJ_RE, 'mongo-find', 'NoSQL Injection: MongoDB query with raw request object (operator injection)', 0.80],
35
38
  [DYNAMO_EXPR_CONCAT_RE, 'dynamo-expr', 'NoSQL Injection: DynamoDB Expression built via string concatenation', 0.85],
39
+ [MONGO_AGGREGATE_RE, 'mongo-aggregate', 'NoSQL Injection: MongoDB aggregate pipeline with user-controlled stage', 0.80],
40
+ [MONGO_MAPREDUCE_RE, 'mongo-mapreduce', 'NoSQL Injection: MongoDB mapReduce with user-controlled function', 0.85],
36
41
  ]) {
37
42
  const r = new RegExp(re.source, re.flags);
38
43
  while ((m = r.exec(code))) {
@@ -0,0 +1,76 @@
1
+ // Null-byte and path normalization injection detector.
2
+ //
3
+ // Detects patterns where file extension checks can be bypassed via
4
+ // null-byte truncation (%00, \0) or missing path normalization before
5
+ // filesystem operations.
6
+
7
+ function _line(raw, idx) { return raw.slice(0, idx).split('\n').length; }
8
+
9
+ const EXT_CHECK_RE = /\.(?:endsWith|match|test|includes)\s*\(\s*['"]\.(?:jpg|jpeg|png|gif|pdf|doc|docx|csv|txt|zip|svg|webp|mp4)/gi;
10
+ const SPLITEXT_RE = /(?:path\.extname|os\.path\.splitext|filepath\.Ext|pathinfo)\s*\(/g;
11
+
12
+ const FS_SINK_RE = /(?:fs\.readFile|fs\.readFileSync|fs\.createReadStream|fs\.writeFile|fs\.writeFileSync|open\s*\(|os\.open|sendFile|send_file|send_from_directory|filepath\.Join|os\.path\.join|path\.join)\s*\(/g;
13
+
14
+ const NORMALIZATION_RE = /(?:path\.normalize|path\.resolve|os\.path\.abspath|os\.path\.realpath|filepath\.Clean|filepath\.Abs|realpath|basename)\s*\(/;
15
+ const NULL_STRIP_RE = /(?:replace\s*\(\s*\/\\0\/|replace\s*\(\s*['"]\\0['"]|\.replace\s*\(\s*\/\\x00\/|\.replace\s*\(\s*['"]%00['"])/;
16
+
17
+ export function scanNullByteInjection(fp, raw) {
18
+ if (!fp || !raw || typeof raw !== 'string') return [];
19
+ if (raw.length > 500_000) return [];
20
+ if (!/\.(?:js|jsx|ts|tsx|mjs|cjs|py|go|rb|php|phtml)$/i.test(fp)) return [];
21
+
22
+ const findings = [];
23
+ const seen = new Set();
24
+
25
+ // Pattern: extension check without null-byte stripping before FS operation
26
+ for (const extMatch of raw.matchAll(EXT_CHECK_RE)) {
27
+ const checkLine = _line(raw, extMatch.index);
28
+ // Look ahead for FS operation within 15 lines
29
+ const after = raw.slice(extMatch.index, extMatch.index + 1000);
30
+ if (!FS_SINK_RE.test(after)) continue;
31
+ // Check if normalization or null-byte stripping exists between check and sink
32
+ const between = after.slice(0, after.search(FS_SINK_RE));
33
+ if (NORMALIZATION_RE.test(between) || NULL_STRIP_RE.test(between)) continue;
34
+ const id = `null-byte:${fp}:${checkLine}`;
35
+ if (seen.has(id)) continue;
36
+ seen.add(id);
37
+ findings.push({
38
+ id,
39
+ file: fp, line: checkLine,
40
+ vuln: 'Null-Byte Injection Risk — extension check without path normalization before FS operation',
41
+ severity: 'medium',
42
+ family: 'path-normalization',
43
+ cwe: 'CWE-158',
44
+ parser: 'NULL-BYTE',
45
+ confidence: 0.60,
46
+ description: 'A file extension check is performed, then the path is passed to a filesystem operation without normalization or null-byte stripping. An attacker can bypass the extension check with a null byte: "malicious.php%00.jpg" passes the .jpg check but the filesystem may truncate at the null byte.',
47
+ remediation: 'Always normalize paths before extension checks: path.resolve(uploadsDir, path.basename(filename)). Strip null bytes: filename.replace(/\\0/g, ""). Validate the resolved path is within the expected directory.',
48
+ });
49
+ }
50
+
51
+ // Pattern: splitext/extname without null-byte stripping
52
+ for (const splitMatch of raw.matchAll(SPLITEXT_RE)) {
53
+ const line = _line(raw, splitMatch.index);
54
+ const context = raw.slice(Math.max(0, splitMatch.index - 200), splitMatch.index + 200);
55
+ if (NULL_STRIP_RE.test(context) || NORMALIZATION_RE.test(context)) continue;
56
+ // Only fire if user input is nearby
57
+ if (!/(?:req\.|request\.|params|query|body|upload|file|filename|user_input|\$_FILES|\$_GET)/i.test(context)) continue;
58
+ const id = `path-norm:${fp}:${line}`;
59
+ if (seen.has(id)) continue;
60
+ seen.add(id);
61
+ findings.push({
62
+ id,
63
+ file: fp, line,
64
+ vuln: 'Path Normalization Gap — extension extraction without null-byte/traversal sanitization',
65
+ severity: 'medium',
66
+ family: 'path-normalization',
67
+ cwe: 'CWE-176',
68
+ parser: 'NULL-BYTE',
69
+ confidence: 0.55,
70
+ description: 'Path extension is extracted from user-supplied input without prior normalization. Unicode normalization attacks or null-byte truncation can bypass the extension check.',
71
+ remediation: 'Normalize the path first: const safeName = path.basename(userInput).replace(/\\0/g, ""); then check the extension.',
72
+ });
73
+ }
74
+
75
+ return findings;
76
+ }
@@ -0,0 +1,338 @@
1
+ // ReDoS NFA analyzer — detects catastrophic backtracking in regex patterns.
2
+ //
3
+ // Builds a simplified NFA from a regex body string and detects superlinear
4
+ // ambiguity: two distinct paths through a quantifier cycle that accept the
5
+ // same character. This is the core condition for exponential backtracking.
6
+ //
7
+ // Scope: character classes, alternation, quantifiers (+*?{n,m}), groups,
8
+ // escapes, anchors. Unknown constructs → treated as safe (opaque atom).
9
+ // Body-length cap: 500 chars → skip (too complex for static analysis).
10
+ //
11
+ // Also exports extractors for Python re.compile() and Java Pattern.compile().
12
+
13
+ const MAX_BODY_LEN = 500;
14
+
15
+ // ── Regex parser ────────────────────────────────────────────────────────────
16
+
17
+ function parseRegex(body) {
18
+ let pos = 0;
19
+ const src = body;
20
+
21
+ function peek() { return pos < src.length ? src[pos] : null; }
22
+ function advance() { return src[pos++]; }
23
+
24
+ function parseAlternation() {
25
+ const branches = [parseConcat()];
26
+ while (peek() === '|') {
27
+ advance();
28
+ branches.push(parseConcat());
29
+ }
30
+ return branches.length === 1 ? branches[0] : { type: 'alt', branches };
31
+ }
32
+
33
+ function parseConcat() {
34
+ const items = [];
35
+ while (pos < src.length && peek() !== ')' && peek() !== '|') {
36
+ items.push(parseQuantified());
37
+ }
38
+ return items.length === 1 ? items[0] : { type: 'concat', items };
39
+ }
40
+
41
+ function parseQuantified() {
42
+ let atom = parseAtom();
43
+ if (!atom) return { type: 'literal', ch: '' };
44
+ while (pos < src.length) {
45
+ const c = peek();
46
+ if (c === '*') { advance(); atom = { type: 'star', child: atom }; }
47
+ else if (c === '+') { advance(); atom = { type: 'plus', child: atom }; }
48
+ else if (c === '?') { advance(); atom = { type: 'opt', child: atom }; }
49
+ else if (c === '{') {
50
+ const saved = pos;
51
+ advance();
52
+ let numStr = '';
53
+ while (pos < src.length && /[\d,]/.test(peek())) numStr += advance();
54
+ if (peek() === '}') {
55
+ advance();
56
+ const parts = numStr.split(',');
57
+ const max = parts.length > 1 ? (parts[1] ? parseInt(parts[1]) : Infinity) : parseInt(parts[0]);
58
+ if (max > 1 || max === Infinity) {
59
+ atom = { type: 'star', child: atom };
60
+ }
61
+ } else {
62
+ pos = saved;
63
+ break;
64
+ }
65
+ } else break;
66
+ if (peek() === '?') advance(); // lazy modifier
67
+ }
68
+ return atom;
69
+ }
70
+
71
+ function parseAtom() {
72
+ const c = peek();
73
+ if (c === null || c === ')' || c === '|') return null;
74
+ if (c === '(') {
75
+ advance();
76
+ if (peek() === '?') {
77
+ advance();
78
+ // Non-capturing group or lookahead — skip modifier chars
79
+ while (pos < src.length && peek() !== ':' && peek() !== ')' && /[imsx<!=P]/.test(peek())) advance();
80
+ if (peek() === ':' || peek() === ')') {
81
+ if (peek() === ':') advance();
82
+ if (peek() === ')') { advance(); return { type: 'literal', ch: '' }; }
83
+ }
84
+ }
85
+ const inner = parseAlternation();
86
+ if (peek() === ')') advance();
87
+ return { type: 'group', child: inner };
88
+ }
89
+ if (c === '[') return parseCharClass();
90
+ if (c === '\\') {
91
+ advance();
92
+ const esc = advance();
93
+ if (!esc) return { type: 'literal', ch: '\\' };
94
+ if (esc === 'd') return { type: 'class', chars: '0123456789' };
95
+ if (esc === 'w') return { type: 'class', chars: 'azAZ09_' };
96
+ if (esc === 's') return { type: 'class', chars: ' \t\n\r' };
97
+ if (esc === 'D' || esc === 'W' || esc === 'S') return { type: 'class', chars: 'ANY' };
98
+ if (esc === 'b' || esc === 'B') return { type: 'literal', ch: '' }; // anchor
99
+ return { type: 'literal', ch: esc };
100
+ }
101
+ if (c === '.') { advance(); return { type: 'class', chars: 'ANY' }; }
102
+ if (c === '^' || c === '$') { advance(); return { type: 'literal', ch: '' }; }
103
+ advance();
104
+ return { type: 'literal', ch: c };
105
+ }
106
+
107
+ function parseCharClass() {
108
+ advance(); // [
109
+ let chars = '';
110
+ let negated = false;
111
+ if (peek() === '^') { negated = true; advance(); }
112
+ if (peek() === ']') { chars += advance(); }
113
+ while (pos < src.length && peek() !== ']') {
114
+ if (peek() === '\\') {
115
+ advance();
116
+ const esc = advance();
117
+ if (esc === 'd') chars += '0123456789';
118
+ else if (esc === 'w') chars += 'azAZ09_';
119
+ else if (esc === 's') chars += ' \t\n\r';
120
+ else chars += (esc || '');
121
+ } else {
122
+ chars += advance();
123
+ }
124
+ }
125
+ if (peek() === ']') advance();
126
+ return { type: 'class', chars: negated ? 'ANY' : chars };
127
+ }
128
+
129
+ try {
130
+ const tree = parseAlternation();
131
+ return { ok: true, tree };
132
+ } catch {
133
+ return { ok: false, tree: null };
134
+ }
135
+ }
136
+
137
+ // ── Ambiguity detection ─────────────────────────────────────────────────────
138
+
139
+ function classOverlaps(a, b) {
140
+ if (a === 'ANY' || b === 'ANY') return true;
141
+ for (const ch of a) {
142
+ if (b.includes(ch)) return true;
143
+ }
144
+ return false;
145
+ }
146
+
147
+ function collectFirstChars(node) {
148
+ if (!node) return [];
149
+ switch (node.type) {
150
+ case 'literal':
151
+ return node.ch ? [node.ch] : [];
152
+ case 'class':
153
+ return [node.chars];
154
+ case 'group':
155
+ return collectFirstChars(node.child);
156
+ case 'concat':
157
+ for (const item of (node.items || [])) {
158
+ const fc = collectFirstChars(item);
159
+ if (fc.length) return fc;
160
+ if (!canBeEmpty(item)) return fc;
161
+ }
162
+ return [];
163
+ case 'alt':
164
+ return (node.branches || []).flatMap(collectFirstChars);
165
+ case 'star':
166
+ case 'plus':
167
+ case 'opt':
168
+ return collectFirstChars(node.child);
169
+ default:
170
+ return [];
171
+ }
172
+ }
173
+
174
+ function canBeEmpty(node) {
175
+ if (!node) return true;
176
+ switch (node.type) {
177
+ case 'literal': return !node.ch;
178
+ case 'class': return false;
179
+ case 'group': return canBeEmpty(node.child);
180
+ case 'concat': return (node.items || []).every(canBeEmpty);
181
+ case 'alt': return (node.branches || []).some(canBeEmpty);
182
+ case 'star': case 'opt': return true;
183
+ case 'plus': return canBeEmpty(node.child);
184
+ default: return false;
185
+ }
186
+ }
187
+
188
+ function detectSuperlinear(tree) {
189
+ if (!tree) return { unsafe: false };
190
+ const reasons = [];
191
+ _walk(tree, reasons, 0);
192
+ return reasons.length ? { unsafe: true, reason: reasons[0] } : { unsafe: false };
193
+ }
194
+
195
+ function _walk(node, reasons, quantifierDepth) {
196
+ if (!node || reasons.length) return;
197
+ switch (node.type) {
198
+ case 'star':
199
+ case 'plus': {
200
+ if (quantifierDepth > 0) {
201
+ reasons.push('nested quantifier');
202
+ return;
203
+ }
204
+ _walk(node.child, reasons, quantifierDepth + 1);
205
+ // Unwrap group to check inner structure
206
+ const inner = node.child && node.child.type === 'group' ? node.child.child : node.child;
207
+ if (inner && inner.type === 'alt') {
208
+ const branches = inner.branches || [];
209
+ for (let i = 0; i < branches.length; i++) {
210
+ const fc_i = collectFirstChars(branches[i]);
211
+ for (let j = i + 1; j < branches.length; j++) {
212
+ const fc_j = collectFirstChars(branches[j]);
213
+ for (const a of fc_i) {
214
+ for (const b of fc_j) {
215
+ if (classOverlaps(a, b)) {
216
+ reasons.push('alternation ambiguity under quantifier');
217
+ return;
218
+ }
219
+ }
220
+ }
221
+ }
222
+ }
223
+ }
224
+ if (inner && inner.type === 'concat') {
225
+ const items = inner.items || [];
226
+ if (items.length >= 2) {
227
+ const first = collectFirstChars(items[0]);
228
+ for (let k = 1; k < items.length; k++) {
229
+ // Check if all items before k can be empty (nullable prefix)
230
+ const prefixNullable = items.slice(0, k).every(canBeEmpty);
231
+ const prevNullable = canBeEmpty(items[k - 1]) || items[k - 1].type === 'star' || items[k - 1].type === 'opt';
232
+ if (prevNullable || prefixNullable) {
233
+ const fc_k = collectFirstChars(items[k]);
234
+ for (const a of first) {
235
+ for (const b of fc_k) {
236
+ if (classOverlaps(a, b)) {
237
+ reasons.push('overlapping nullable prefix in quantifier');
238
+ return;
239
+ }
240
+ }
241
+ }
242
+ }
243
+ }
244
+ }
245
+ }
246
+ break;
247
+ }
248
+ case 'opt':
249
+ _walk(node.child, reasons, quantifierDepth);
250
+ break;
251
+ case 'group':
252
+ _walk(node.child, reasons, quantifierDepth);
253
+ break;
254
+ case 'concat':
255
+ for (const item of (node.items || [])) _walk(item, reasons, quantifierDepth);
256
+ break;
257
+ case 'alt':
258
+ for (const b of (node.branches || [])) _walk(b, reasons, quantifierDepth);
259
+ break;
260
+ default:
261
+ break;
262
+ }
263
+ }
264
+
265
+ // ── Public API ──────────────────────────────────────────────────────────────
266
+
267
+ export function isUnsafeRegex(body) {
268
+ if (!body || typeof body !== 'string') return { unsafe: false };
269
+ if (body.length > MAX_BODY_LEN) return { unsafe: false };
270
+ if (!/[*+{]/.test(body)) return { unsafe: false };
271
+ const parsed = parseRegex(body);
272
+ if (!parsed.ok) return { unsafe: false };
273
+ return detectSuperlinear(parsed.tree);
274
+ }
275
+
276
+ export function extractJsRegexBodies(code) {
277
+ const out = [];
278
+ // Regex literals: /pattern/flags
279
+ for (const m of code.matchAll(/\/([^/\n]+)\/[gimsuy]*/g)) {
280
+ out.push({ body: m[1], line: code.slice(0, m.index).split('\n').length });
281
+ }
282
+ // new RegExp("pattern")
283
+ for (const m of code.matchAll(/new\s+RegExp\s*\(\s*['"]([^'"]+)['"]/g)) {
284
+ out.push({ body: m[1], line: code.slice(0, m.index).split('\n').length });
285
+ }
286
+ return out;
287
+ }
288
+
289
+ export function extractPyRegexBodies(code) {
290
+ const out = [];
291
+ for (const m of code.matchAll(/\bre\.(?:compile|match|search|sub|findall|fullmatch)\s*\(\s*r?['"]((?:\\.|[^'"\n])+)['"]/g)) {
292
+ out.push({ body: m[1], line: code.slice(0, m.index).split('\n').length });
293
+ }
294
+ return out;
295
+ }
296
+
297
+ export function extractJavaRegexBodies(code) {
298
+ const out = [];
299
+ for (const m of code.matchAll(/\bPattern\.compile\s*\(\s*"((?:\\.|[^"\n])+)"/g)) {
300
+ out.push({ body: m[1].replace(/\\\\/g, '\\'), line: code.slice(0, m.index).split('\n').length });
301
+ }
302
+ for (const m of code.matchAll(/\.matches\s*\(\s*"((?:\\.|[^"\n])+)"/g)) {
303
+ out.push({ body: m[1].replace(/\\\\/g, '\\'), line: code.slice(0, m.index).split('\n').length });
304
+ }
305
+ return out;
306
+ }
307
+
308
+ export function scanRegexReDoS(file, raw) {
309
+ if (!file || !raw || typeof raw !== 'string') return [];
310
+ if (raw.length > 500_000) return [];
311
+ const findings = [];
312
+ let bodies = [];
313
+ if (/\.(?:js|jsx|ts|tsx|mjs|cjs)$/i.test(file)) bodies = extractJsRegexBodies(raw);
314
+ else if (/\.py$/i.test(file)) bodies = extractPyRegexBodies(raw);
315
+ else if (/\.java$/i.test(file)) bodies = extractJavaRegexBodies(raw);
316
+ else return [];
317
+
318
+ for (const { body, line } of bodies) {
319
+ const result = isUnsafeRegex(body);
320
+ if (result.unsafe) {
321
+ findings.push({
322
+ id: `redos-nfa:${file}:${line}`,
323
+ file,
324
+ line,
325
+ vuln: 'ReDoS — Catastrophic Backtracking (NFA analysis)',
326
+ severity: 'high',
327
+ family: 'redos',
328
+ cwe: 'CWE-1333',
329
+ parser: 'NFA',
330
+ confidence: 0.85,
331
+ description: `Regex pattern has ${result.reason}. A crafted input can cause exponential backtracking, consuming 100% CPU.`,
332
+ remediation: 'Rewrite the regex to avoid nested quantifiers and overlapping alternation. Consider using the re2 library for guaranteed linear-time matching.',
333
+ snippet: `/${body.slice(0, 60)}${body.length > 60 ? '...' : ''}/`,
334
+ });
335
+ }
336
+ }
337
+ return findings;
338
+ }