@activemind/scd 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/LICENSE.md +35 -0
  2. package/README.md +417 -0
  3. package/bin/scd.js +140 -0
  4. package/lib/audit-report.js +93 -0
  5. package/lib/audit-sync.js +172 -0
  6. package/lib/audit.js +356 -0
  7. package/lib/cli-helpers.js +108 -0
  8. package/lib/commands/accept.js +28 -0
  9. package/lib/commands/audit.js +17 -0
  10. package/lib/commands/configure.js +200 -0
  11. package/lib/commands/doctor.js +14 -0
  12. package/lib/commands/exceptions.js +19 -0
  13. package/lib/commands/export-findings.js +46 -0
  14. package/lib/commands/findings.js +306 -0
  15. package/lib/commands/ignore.js +28 -0
  16. package/lib/commands/init.js +16 -0
  17. package/lib/commands/insights.js +24 -0
  18. package/lib/commands/install.js +15 -0
  19. package/lib/commands/list.js +109 -0
  20. package/lib/commands/remove.js +16 -0
  21. package/lib/commands/repo.js +862 -0
  22. package/lib/commands/report.js +234 -0
  23. package/lib/commands/resolve.js +25 -0
  24. package/lib/commands/rules.js +185 -0
  25. package/lib/commands/scan.js +519 -0
  26. package/lib/commands/scope.js +341 -0
  27. package/lib/commands/sync.js +40 -0
  28. package/lib/commands/uninstall.js +15 -0
  29. package/lib/commands/version.js +33 -0
  30. package/lib/comment-map.js +388 -0
  31. package/lib/config.js +325 -0
  32. package/lib/context-modifiers.js +211 -0
  33. package/lib/deep-analyzer.js +225 -0
  34. package/lib/doctor.js +236 -0
  35. package/lib/exception-manager.js +675 -0
  36. package/lib/export-findings.js +376 -0
  37. package/lib/file-context.js +380 -0
  38. package/lib/file-filter.js +204 -0
  39. package/lib/file-manifest.js +145 -0
  40. package/lib/git-utils.js +102 -0
  41. package/lib/global-config.js +239 -0
  42. package/lib/hooks-manager.js +130 -0
  43. package/lib/init-repo.js +147 -0
  44. package/lib/insights-analyzer.js +416 -0
  45. package/lib/insights-output.js +160 -0
  46. package/lib/installer.js +128 -0
  47. package/lib/output-constants.js +32 -0
  48. package/lib/output-terminal.js +407 -0
  49. package/lib/push-queue.js +322 -0
  50. package/lib/remove-repo.js +108 -0
  51. package/lib/repo-context.js +187 -0
  52. package/lib/report-html.js +1154 -0
  53. package/lib/report-index.js +157 -0
  54. package/lib/report-json.js +136 -0
  55. package/lib/report-markdown.js +250 -0
  56. package/lib/resolve-manager.js +148 -0
  57. package/lib/rule-registry.js +205 -0
  58. package/lib/scan-cache.js +171 -0
  59. package/lib/scan-context.js +312 -0
  60. package/lib/scan-schema.js +67 -0
  61. package/lib/scanner-full.js +681 -0
  62. package/lib/scanner-manual.js +348 -0
  63. package/lib/scanner-secrets.js +83 -0
  64. package/lib/scope.js +331 -0
  65. package/lib/store-verify.js +395 -0
  66. package/lib/store.js +310 -0
  67. package/lib/taint-register.js +196 -0
  68. package/lib/version-check.js +46 -0
  69. package/package.json +37 -0
  70. package/rules/rule-loader.js +324 -0
  71. package/rules/rules-aspx-cs.json +399 -0
  72. package/rules/rules-aspx.json +222 -0
  73. package/rules/rules-infra-leakage.json +434 -0
  74. package/rules/rules-js.json +664 -0
  75. package/rules/rules-php.json +521 -0
  76. package/rules/rules-python.json +466 -0
  77. package/rules/rules-secrets.json +99 -0
  78. package/rules/rules-sensitive-files.json +475 -0
  79. package/rules/rules-ts.json +76 -0
@@ -0,0 +1,196 @@
1
+ /**
2
+ * taint-register.js
3
+ * Pre-scan single-file taint tracking.
4
+ *
5
+ * Builds a register of variables that are assigned from external/user-controlled
6
+ * sources (HTTP input, CLI args, environment). Passed to the scanner so rules
7
+ * can detect when a tainted variable reaches a dangerous sink.
8
+ *
9
+ * Scope: single-file, single-assignment. Does not track:
10
+ * - Cross-function taint propagation
11
+ * - Chained assignments ($a = $b; $b = $_GET['x'])
12
+ * - Conditional assignments
13
+ *
14
+ * These limitations are acceptable for the current regex-based engine.
15
+ * Full taint analysis is on the roadmap as a future architectural improvement.
16
+ *
17
+ * Usage:
18
+ * const { buildTaintRegister } = require('./taint-register');
19
+ * const taint = buildTaintRegister(fileContent, 'php');
20
+ * // taint.has('id') → true if $id was assigned from $_GET/$_POST etc.
21
+ * // taint.getLine('id') → line number of the assignment
22
+ * // taint.getSource('id') → '$_GET["id"]'
23
+ */
24
+
25
+ 'use strict';
26
+
27
+ // ── Source patterns per language ──────────────────────────────────────────
28
+ // Each pattern captures: group 1 = variable name
29
+
30
+ const SOURCE_PATTERNS = {
31
+ php: [
32
+ // $varname = $_GET['key'] / $_POST['key'] / $_REQUEST / $_COOKIE / $_SESSION
33
+ /^\s*\$(\w+)\s*=\s*(\$_(?:GET|POST|REQUEST|COOKIE|SESSION)\s*\[['"][^'"]{0,60}['"]\])/,
34
+ // $varname = $_SERVER['key'] (e.g. HTTP_HOST, REQUEST_URI)
35
+ /^\s*\$(\w+)\s*=\s*(\$_SERVER\s*\[['"](?:HTTP_\w+|REQUEST_URI|QUERY_STRING|PATH_INFO)['"]\])/,
36
+ // $varname = htmlspecialchars_decode(...) ← still tainted after decode
37
+ /^\s*\$(\w+)\s*=\s*(htmlspecialchars_decode\s*\(\s*\$_(?:GET|POST|REQUEST)\s*\[)/,
38
+ // $varname = trim/strip/addslashes of superglobal ← still tainted (insufficient sanitisation)
39
+ /^\s*\$(\w+)\s*=\s*(?:trim|strip_tags|addslashes|stripslashes|htmlentities)\s*\(\s*(\$_(?:GET|POST|REQUEST|COOKIE)\s*\[)/,
40
+ ],
41
+
42
+ python: [
43
+ // var = request.args.get('key') / request.args['key']
44
+ /^\s*(\w+)\s*=\s*(request\.(?:args|form|values|files)(?:\.get\s*\(|(?:\[)))/,
45
+ // var = request.json.get / request.json['key']
46
+ /^\s*(\w+)\s*=\s*(request\.json(?:\.get\s*\(|\[))/,
47
+ // var = flask.request. / g. shorthand
48
+ /^\s*(\w+)\s*=\s*(flask\.request\.(?:args|form|json|values))/,
49
+ // var = sys.argv[n]
50
+ /^\s*(\w+)\s*=\s*(sys\.argv\s*\[(?:[1-9]|\w+)\])/,
51
+ ],
52
+
53
+ js: [
54
+ // const/let/var name = req.query.x / req.body.x / req.params.x
55
+ /^\s*(?:const|let|var)\s+(\w+)\s*=\s*(req\.(?:query|body|params)(?:\.\w+|\[['"][^'"]{0,40}['"]\]))/,
56
+ // const name = req.query['x']
57
+ /^\s*(?:const|let|var)\s+(\w+)\s*=\s*(request\.(?:query|body|params))/,
58
+ // destructuring: const { id } = req.query ← handled separately below
59
+ ],
60
+
61
+ ts: [
62
+ // Same as JS
63
+ /^\s*(?:const|let|var)\s+(\w+)\s*=\s*(req\.(?:query|body|params)(?:\.\w+|\[['"][^'"]{0,40}['"]\]))/,
64
+ ],
65
+ };
66
+
67
+ // ── TaintRegister class ───────────────────────────────────────────────────
68
+
69
+ class TaintRegister {
70
+ constructor() {
71
+ // Map: varName → { line, source }
72
+ this._vars = new Map();
73
+ }
74
+
75
+ /**
76
+ * Record a tainted variable.
77
+ */
78
+ add(varName, lineNumber, source) {
79
+ if (!this._vars.has(varName)) {
80
+ this._vars.set(varName, { line: lineNumber, source });
81
+ }
82
+ }
83
+
84
+ /**
85
+ * Returns true if varName is tainted.
86
+ */
87
+ has(varName) {
88
+ return this._vars.has(varName);
89
+ }
90
+
91
+ /**
92
+ * Returns the line number where varName was tainted, or null.
93
+ */
94
+ getLine(varName) {
95
+ return this._vars.get(varName)?.line ?? null;
96
+ }
97
+
98
+ /**
99
+ * Returns the source expression (e.g. '$_GET["id"]'), or null.
100
+ */
101
+ getSource(varName) {
102
+ return this._vars.get(varName)?.source ?? null;
103
+ }
104
+
105
+ /**
106
+ * Returns all tainted variable names.
107
+ */
108
+ all() {
109
+ return [...this._vars.keys()];
110
+ }
111
+
112
+ /**
113
+ * Returns true if the register has any entries.
114
+ */
115
+ isEmpty() {
116
+ return this._vars.size === 0;
117
+ }
118
+ }
119
+
120
+ // ── Builder ───────────────────────────────────────────────────────────────
121
+
122
+ /**
123
+ * Build a TaintRegister from file content.
124
+ *
125
+ * @param {string} content - Full file content
126
+ * @param {string} language - 'php' | 'python' | 'js' | 'ts'
127
+ * @returns {TaintRegister}
128
+ */
129
+ function buildTaintRegister(content, language) {
130
+ const register = new TaintRegister();
131
+ const patterns = SOURCE_PATTERNS[language] || [];
132
+
133
+ if (patterns.length === 0) return register;
134
+
135
+ const lines = content.split('\n');
136
+
137
+ for (let i = 0; i < lines.length; i++) {
138
+ const line = lines[i];
139
+ const lineNum = i + 1;
140
+
141
+ // Skip comments
142
+ const trimmed = line.trim();
143
+ if (
144
+ trimmed.startsWith('//') ||
145
+ trimmed.startsWith('#') ||
146
+ trimmed.startsWith('*') ||
147
+ trimmed.startsWith('/*')
148
+ ) continue;
149
+
150
+ for (const pattern of patterns) {
151
+ const m = line.match(pattern);
152
+ if (m) {
153
+ register.add(m[1], lineNum, m[2]);
154
+ break; // one pattern match per line is enough
155
+ }
156
+ }
157
+
158
+ // PHP: handle destructuring-style list() / extract()
159
+ if (language === 'php') {
160
+ // extract($_GET) / extract($_POST) — all vars in scope become tainted
161
+ // We can't know the keys, mark a wildcard
162
+ if (/extract\s*\(\s*\$_(?:GET|POST|REQUEST)/.test(line)) {
163
+ register.add('*', lineNum, 'extract()');
164
+ }
165
+ }
166
+
167
+ // JS/TS: destructuring const { id, name } = req.query
168
+ if (language === 'js' || language === 'ts') {
169
+ const destructure = line.match(/^\s*(?:const|let|var)\s*\{([^}]{1,200})\}\s*=\s*(req\.(?:query|body|params))/);
170
+ if (destructure) {
171
+ const source = destructure[2];
172
+ const vars = destructure[1].split(',').map(v => v.trim().split(/\s*:\s*/)[0].trim());
173
+ for (const v of vars) {
174
+ if (/^\w+$/.test(v)) register.add(v, lineNum, source);
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ return register;
181
+ }
182
+
183
+ /**
184
+ * Map file extension to language key.
185
+ */
186
+ function extToLanguage(ext) {
187
+ const map = {
188
+ php: 'php', php5: 'php', phtml: 'php',
189
+ py: 'python',
190
+ js: 'js', mjs: 'js', cjs: 'js',
191
+ ts: 'ts', tsx: 'ts',
192
+ };
193
+ return map[ext.toLowerCase()] || null;
194
+ }
195
+
196
+ module.exports = { buildTaintRegister, extToLanguage, TaintRegister };
@@ -0,0 +1,46 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * version-check.js
5
+ * Compares the local scd version against the server's minimum required version.
6
+ * Used by interactive commands to warn when the CLI needs upgrading.
7
+ *
8
+ * The server's min_cli_version is cached in ~/.scd/config after each successful
9
+ * batch flush or health check — no extra network call needed at command time.
10
+ */
11
+
12
+ const pkg = require('../package.json');
13
+
14
+ function semverLt(a, b) {
15
+ const pa = String(a).split('.').map(Number);
16
+ const pb = String(b).split('.').map(Number);
17
+ for (let i = 0; i < 3; i++) {
18
+ const na = pa[i] || 0, nb = pb[i] || 0;
19
+ if (na < nb) return true;
20
+ if (na > nb) return false;
21
+ }
22
+ return false; // equal
23
+ }
24
+
25
+ /**
26
+ * Returns a warning string if the local CLI version is below the server's
27
+ * minimum required version, or null if everything is fine or no server info
28
+ * is cached yet.
29
+ */
30
+ function getVersionWarning() {
31
+ try {
32
+ const { getMinCliVersion, getServerVersion } = require('./global-config');
33
+ const minVer = getMinCliVersion();
34
+ if (!minVer) return null; // no server info cached yet
35
+ if (!semverLt(pkg.version, minVer)) return null; // up to date
36
+
37
+ const serverVer = getServerVersion();
38
+ const serverPart = serverVer ? ` (server: v${serverVer})` : '';
39
+ return `⚠ scd v${pkg.version} is outdated — scd-server requires v${minVer} or later${serverPart}.\n` +
40
+ ` Run: npm install -g @activemind/scd`;
41
+ } catch {
42
+ return null; // never let this break a command
43
+ }
44
+ }
45
+
46
+ module.exports = { getVersionWarning };
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@activemind/scd",
3
+ "version": "1.4.0",
4
+ "description": "Secure Code by Design – automated security scanning for development teams",
5
+ "author": "Activemind Solutions AB",
6
+ "license": "SEE LICENSE IN LICENSE.md",
7
+ "homepage": "https://securecodebydesign.com",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "git+https://github.com/activemindsolutions/scd.git"
11
+ },
12
+ "engines": {
13
+ "node": ">=22.0.0"
14
+ },
15
+ "bin": {
16
+ "scd": "bin/scd.js"
17
+ },
18
+ "main": "./bin/scd.js",
19
+ "files": [
20
+ "bin/",
21
+ "lib/",
22
+ "rules/",
23
+ "README.md",
24
+ "LICENSE"
25
+ ],
26
+ "scripts": {
27
+ "link": "npm link",
28
+ "unlink": "npm unlink -g @activemind/scd",
29
+ "test": "node --test tests/**/*.test.js",
30
+ "test:integrity": "node --test tests/integrity/*.test.js",
31
+ "test:smoke": "node --test tests/smoke/*.test.js",
32
+ "test:rules": "node --test tests/rules/*.test.js"
33
+ },
34
+ "dependencies": {
35
+ "commander": "^14.0.3"
36
+ }
37
+ }
@@ -0,0 +1,324 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * rule-loader.js
5
+ * Compiles a raw rule definition (from JSON) into a runtime rule object
6
+ * compatible with scanner-full.js.
7
+ *
8
+ * JSON rules cannot contain RegExp literals or functions — rule-loader
9
+ * bridges that gap by compiling patterns and normalising confidence at
10
+ * load time.
11
+ *
12
+ * ── Schema version ──────────────────────────────────────────────────────────
13
+ * schema_version: 1
14
+ *
15
+ * ── Full field reference ────────────────────────────────────────────────────
16
+ *
17
+ * Required:
18
+ * id {string} Stable rule ID, e.g. "INFRA-001". Plugin rules
19
+ * must use a prefix matching their pack_id to avoid
20
+ * collisions with built-in rules.
21
+ * name {string} Human-readable rule name
22
+ * severity {string} "CRITICAL" | "HIGH" | "MEDIUM" | "EXPOSURE"
23
+ * category {string} OWASP category or custom grouping
24
+ * pattern {string} Regex pattern string (compiled to RegExp at load time)
25
+ *
26
+ * Optional:
27
+ * flags {string} Regex flags for pattern (default: "gi")
28
+ * antipattern {string} Regex pattern string — if matched near the finding,
29
+ * the finding is suppressed. Compiled with antipattern_flags.
30
+ * antipattern_flags {string} Flags for antipattern regex (default: "i")
31
+ * antipattern_preset {string} Named preset expanding to a standard antipattern.
32
+ * Applied in addition to antipattern if both are set.
33
+ * Available presets: "DEV_CONTEXT", "ADDR_AS_DATA",
34
+ * "LINK_EXAMPLE", "ENV_VAR_REF"
35
+ * lookahead {number} Chars to scan ahead of match for antipattern (default: 300)
36
+ * lookbehind {number} Chars to scan behind match for antipattern (default: 120)
37
+ * exclude_file_types {string[]} File extensions to skip entirely, e.g. ["md","txt"]
38
+ * file_types {string[]} File extensions this rule applies to (sensitive-file rules)
39
+ * match_mode {string} "content" (default) | "filename"
40
+ * taint_aware {boolean} True if rule requires taint tracking
41
+ * taint_extract {string} Taint extraction strategy: "concat" | "interpolation" | "func_concat"
42
+ * service {string} Service name for secrets rules
43
+ * resolve_hint {string} Additional remediation hint
44
+ * why {string} Explanation of the vulnerability
45
+ * scenario {string} Attack scenario description
46
+ * fix {string} Remediation guidance
47
+ * checklist {string[]} Step-by-step remediation checklist
48
+ * source {string} Set by loader — "builtin" | pack_id. Never set in JSON.
49
+ *
50
+ * ── Confidence ──────────────────────────────────────────────────────────────
51
+ * Omit confidence_rules → defaults to "HIGH" (same as current JS rules).
52
+ *
53
+ * confidence_rules: array of condition objects, evaluated top-to-bottom.
54
+ * First matching condition wins. Always include a { "default": "..." } last.
55
+ *
56
+ * Available conditions (evaluated top-to-bottom, first match wins):
57
+ * if_file_context {string|string[]} Matches classifyFileContext() result.
58
+ * Values: "frontend" | "backend" | "config" | "doc" | "test"
59
+ * if_path_contains {string[]} filePath contains any term (case-insensitive)
60
+ * if_line_contains {string[]} lineRaw contains any term (case-insensitive)
61
+ * if_value_matches {string} Captured group 1 (or full match) tests against regex
62
+ * if_value_not_matches {string} Captured group 1 does NOT test against regex
63
+ * if_value_shorter_than {number} Captured group 1 length < threshold
64
+ * default {string} Fallback — always matches
65
+ *
66
+ * Example:
67
+ * "confidence_rules": [
68
+ * { "if_value_matches": "^(?:sk-|ghp_|AKIA)", "then": "HIGH" },
69
+ * { "if_value_not_matches": "\\d", "then": "LOW" },
70
+ * { "if_value_shorter_than": 12, "then": "LOW" },
71
+ * { "if_file_context": ["test", "doc"], "then": "LOW" },
72
+ * { "if_path_contains": ["auth", "login"], "then": "HIGH" },
73
+ * { "default": "MEDIUM" }
74
+ * ]
75
+ *
76
+ * ── Plugin / pack rules ──────────────────────────────────────────────────────
77
+ * Rule packs are JSON files placed in ~/.scd/plugins/rules/ (community/custom)
78
+ * or delivered to ~/.scd/packs/ (commercial, via scd-server).
79
+ *
80
+ * Pack file format:
81
+ * {
82
+ * "schema_version": 1,
83
+ * "pack_id": "my-pack",
84
+ * "pack_name": "My Rule Pack",
85
+ * "pack_version": "1.0.0",
86
+ * "author": "...",
87
+ * "rules": [ { ... }, { ... } ]
88
+ * }
89
+ *
90
+ * Rule IDs in a pack must use a prefix matching pack_id to avoid collisions
91
+ * with built-in rules (e.g. pack_id "fintech" → rule IDs "FINTECH-001" etc.)
92
+ *
93
+ * License enforcement is server-side: scd-server validates rule source against
94
+ * the customer's license. The CLI runs offline (trust on install for plugins).
95
+ */
96
+
97
+ // ── File context classifier ─────────────────────────────────────────────────
98
+ // Kept here (not imported from rules-infra-leakage.js) so rule-loader is
99
+ // self-contained and usable by scanner-full without creating a circular dep.
100
+
101
+ const FRONTEND_EXTS = new Set(['js', 'ts', 'jsx', 'tsx', 'mjs', 'cjs', 'vue', 'svelte', 'html', 'htm']);
102
+ const DOC_EXTS = new Set(['md', 'txt', 'log', 'rst', 'adoc']);
103
+ const CONFIG_EXTS = new Set(['json', 'yml', 'yaml', 'toml', 'ini', 'env', 'conf', 'cfg', 'properties', 'xml']);
104
+
105
+ function classifyFileContext(filePath) {
106
+ if (!filePath) return 'backend';
107
+ const ext = filePath.split('.').pop().toLowerCase();
108
+ const lower = filePath.toLowerCase();
109
+ // Test check first — matches path segments at any position including start,
110
+ // and .test.js / .spec.js filename suffixes
111
+ if (/(?:^|[/\\])(?:tests?|spec|__tests__|__mocks__|fixtures)(?:[/\\]|$)|\.(?:test|spec)\.[a-z]+$/.test(lower)) return 'test';
112
+ if (FRONTEND_EXTS.has(ext)) return 'frontend';
113
+ if (DOC_EXTS.has(ext)) return 'doc';
114
+ if (CONFIG_EXTS.has(ext)) return 'config';
115
+ return 'backend';
116
+ }
117
+
118
+ // ── Antipattern presets ─────────────────────────────────────────────────────
119
+ // Shared patterns used by multiple built-in rules. Plugin rules can reference
120
+ // these by name via antipattern_preset for convenience — but inline patterns
121
+ // are always preferred for transparency.
122
+
123
+ const ANTIPATTERN_PRESETS = {
124
+ DEV_CONTEXT: '(?:example|sample|placeholder|TODO|FIXME|NOTE|demo|mock|fake|dummy|test|spec|localhost_only|dev.only|development.only)',
125
+ ADDR_AS_DATA: '(?:==\\s*[\'"`]|!=\\s*[\'"`]|\\.startswith\\s*\\(|netloc\\s*==|\\.host\\s*==|is_loopback|is_private|is_reserved|_has_ipv6|check.*local|local.*check|returns\\s+(?:True|False)\\s+if|e\\.g\\.|i\\.e\\.|for\\s+example|#.*if\\s+ip\\s*=|#.*ip\\s*=|log\\.(?:debug|info|warning|error|critical)\\s*\\()',
126
+ LINK_EXAMPLE: '(?:example\\.com|example\\.org|example\\.net|your[-_]?(?:host|domain|server|url)|<host>|<server>|\\[host\\]|\\[server\\])',
127
+ ENV_VAR_REF: '(?:process\\.env|os\\.environ|getenv|System\\.getenv|\\$\\{|\\$[A-Z_]+\\b)',
128
+ };
129
+
130
+ // ── Confidence rule evaluator ───────────────────────────────────────────────
131
+
132
+ /**
133
+ * Build a confidence function from a confidence_rules array.
134
+ * Returns a function with the same signature as the existing JS confidence
135
+ * functions: (matchObj, lineRaw, filePath) => 'HIGH' | 'MEDIUM' | 'LOW'
136
+ *
137
+ * @param {Array} rules Array of condition objects from JSON
138
+ * @returns {function}
139
+ */
140
+ function buildConfidenceFunction(rules) {
141
+ return function confidenceFromRules(matchObj, lineRaw, filePath) {
142
+ const fileCtx = classifyFileContext(filePath);
143
+ const lineLow = (lineRaw || '').toLowerCase();
144
+ const pathLow = (filePath || '').toLowerCase();
145
+
146
+ // Extract captured value from matchObj (capture group 1, or full match as fallback).
147
+ // Used by if_value_matches / if_value_not_matches / if_value_shorter_than.
148
+ // matchObj is the raw RegExp match array: match[0] = full match, match[1] = first group.
149
+ const value = (matchObj && (matchObj[1] || matchObj[0])) || '';
150
+
151
+ for (const rule of rules) {
152
+ // { default: "MEDIUM" }
153
+ if ('default' in rule) {
154
+ return rule.default;
155
+ }
156
+
157
+ let matched = false;
158
+
159
+ // { if_file_context: "frontend" } or { if_file_context: ["test","backend"] }
160
+ if (rule.if_file_context !== undefined) {
161
+ const targets = Array.isArray(rule.if_file_context)
162
+ ? rule.if_file_context
163
+ : [rule.if_file_context];
164
+ if (targets.includes(fileCtx)) matched = true;
165
+ }
166
+
167
+ // { if_path_contains: ["auth","login"] }
168
+ if (!matched && rule.if_path_contains !== undefined) {
169
+ if (rule.if_path_contains.some(t => pathLow.includes(t.toLowerCase()))) matched = true;
170
+ }
171
+
172
+ // { if_line_contains: ["nonce","csrf"] }
173
+ if (!matched && rule.if_line_contains !== undefined) {
174
+ if (rule.if_line_contains.some(t => lineLow.includes(t.toLowerCase()))) matched = true;
175
+ }
176
+
177
+ // { if_value_matches: "^sk-" }
178
+ // Matches if the captured value (match group 1, or full match) tests against the regex.
179
+ if (!matched && rule.if_value_matches !== undefined) {
180
+ try {
181
+ if (new RegExp(rule.if_value_matches).test(value)) matched = true;
182
+ } catch { /* invalid regex in rule — skip */ }
183
+ }
184
+
185
+ // { if_value_not_matches: "\\d" }
186
+ // Matches if the value does NOT test against the regex.
187
+ if (!matched && rule.if_value_not_matches !== undefined) {
188
+ try {
189
+ if (!new RegExp(rule.if_value_not_matches).test(value)) matched = true;
190
+ } catch { /* invalid regex in rule — skip */ }
191
+ }
192
+
193
+ // { if_value_shorter_than: 12 }
194
+ // Matches if the value length is strictly less than the threshold.
195
+ if (!matched && rule.if_value_shorter_than !== undefined) {
196
+ if (value.length < rule.if_value_shorter_than) matched = true;
197
+ }
198
+
199
+ if (matched) return rule.then;
200
+ }
201
+
202
+ // No rule matched and no default — fall back to HIGH
203
+ return 'HIGH';
204
+ };
205
+ }
206
+
207
+
208
+ // ── Core loader ─────────────────────────────────────────────────────────────
209
+
210
+ /**
211
+ * Load and compile a single raw rule definition from JSON.
212
+ * Returns a runtime rule object compatible with scanner-full.js.
213
+ *
214
+ * @param {object} raw Parsed rule object from JSON
215
+ * @param {string} source "builtin" or pack_id — set by caller
216
+ * @returns {object} Compiled rule ready for use by scanner-full
217
+ */
218
+ function loadRule(raw, source) {
219
+ if (!raw.id) throw new Error('Rule missing required field: id');
220
+ if (!raw.pattern) throw new Error(`Rule ${raw.id} missing required field: pattern`);
221
+
222
+ // ── Pattern ─────────────────────────────────────────────────────────────
223
+ const compiled = {
224
+ ...raw,
225
+ // Rename snake_case JSON fields to camelCase for scanner-full compatibility
226
+ id: raw.id,
227
+ name: raw.name || raw.title || raw.id,
228
+ severity: raw.severity || 'HIGH',
229
+ category: raw.category || 'Uncategorised',
230
+ pattern: new RegExp(raw.pattern, raw.flags || 'gi'),
231
+ lookahead: raw.lookahead ?? undefined,
232
+ lookbehind: raw.lookbehind ?? undefined,
233
+ fileTypes: raw.file_types ?? undefined,
234
+ excludeFileTypes: raw.exclude_file_types ?? undefined,
235
+ extensions: raw.extensions ?? undefined,
236
+ skipForFileTypes: raw.skip_for_file_types ?? undefined,
237
+ matchMode: raw.match_mode ?? undefined,
238
+ taintAware: raw.taint_aware ?? undefined,
239
+ taintExtract: raw.taint_extract ?? undefined,
240
+ service: raw.service ?? undefined,
241
+ resolve_hint: raw.resolve_hint ?? undefined,
242
+ source: source || 'builtin',
243
+ why: raw.why || raw.description || undefined,
244
+ // scan_comments: true — rule intentionally matches comment line content.
245
+ // Opts out of the global comment-line suppression in scanFileWithRules().
246
+ // Use only for rules whose pattern explicitly targets comment syntax
247
+ // (e.g. @ts-ignore, TODO/FIXME with sensitive data, commented-out secrets).
248
+ scanComments: raw.scan_comments ?? false,
249
+ };
250
+
251
+ // ── Antipattern ─────────────────────────────────────────────────────────
252
+ // Combine inline antipattern + optional preset into a single RegExp.
253
+ const apFlags = raw.antipattern_flags || 'i';
254
+ const parts = [];
255
+
256
+ if (raw.antipattern) {
257
+ parts.push(raw.antipattern);
258
+ }
259
+
260
+ if (Array.isArray(raw.antipatterns)) {
261
+ parts.push(...raw.antipatterns);
262
+ }
263
+
264
+ if (raw.antipattern_preset) {
265
+ const preset = ANTIPATTERN_PRESETS[raw.antipattern_preset];
266
+ if (!preset) {
267
+ console.warn(`[scd] Unknown antipattern_preset "${raw.antipattern_preset}" in rule ${raw.id}`);
268
+ } else {
269
+ parts.push(preset);
270
+ }
271
+ }
272
+
273
+ compiled.antipattern = parts.length > 0
274
+ ? new RegExp(parts.join('|'), apFlags)
275
+ : null;
276
+
277
+ // Clean up JSON-only fields that scanner-full doesn't need
278
+ delete compiled.flags;
279
+ delete compiled.antipattern_flags;
280
+ delete compiled.antipattern_preset;
281
+ delete compiled.antipatterns; // array form — compiled into compiled.antipattern above
282
+ delete compiled.file_types;
283
+ delete compiled.exclude_file_types;
284
+ delete compiled.skip_for_file_types; // camelCase version kept as skipForFileTypes
285
+ delete compiled.match_mode;
286
+ delete compiled.taint_aware;
287
+ delete compiled.taint_extract;
288
+ delete compiled.schema_version;
289
+ delete compiled.variant; // variant is internal — scanner sees only id
290
+ delete compiled.title; // alias for name — compiled.name is set above
291
+ delete compiled.description; // alias for why — compiled.why is set above
292
+
293
+ // ── Confidence ──────────────────────────────────────────────────────────
294
+ // confidence_rules array → compiled function
295
+ // Static string → kept as-is (scanner-full handles both)
296
+ // Omitted → no confidence property, scanner-full defaults to HIGH
297
+ if (Array.isArray(raw.confidence_rules)) {
298
+ compiled.confidence = buildConfidenceFunction(raw.confidence_rules);
299
+ delete compiled.confidence_rules;
300
+ } else if (raw.confidence && typeof raw.confidence === 'string') {
301
+ compiled.confidence = raw.confidence;
302
+ } else {
303
+ delete compiled.confidence;
304
+ delete compiled.confidence_rules;
305
+ }
306
+
307
+ return compiled;
308
+ }
309
+
310
+ /**
311
+ * Load all rules from a pack object (parsed pack JSON).
312
+ * Returns an array of compiled rule objects.
313
+ *
314
+ * @param {object} pack Parsed pack JSON with pack_id and rules array
315
+ * @returns {object[]} Array of compiled rules
316
+ */
317
+ function loadPack(pack) {
318
+ if (!Array.isArray(pack.rules)) {
319
+ throw new Error(`Pack "${pack.pack_id || '?'}" has no rules array`);
320
+ }
321
+ return pack.rules.map(raw => loadRule(raw, pack.pack_id || 'unknown'));
322
+ }
323
+
324
+ module.exports = { loadRule, loadPack, classifyFileContext, ANTIPATTERN_PRESETS };