swynx-lite 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +113 -0
  2. package/bin/swynx-lite +3 -0
  3. package/package.json +47 -0
  4. package/src/clean.mjs +280 -0
  5. package/src/cli.mjs +264 -0
  6. package/src/config.mjs +121 -0
  7. package/src/output/console.mjs +298 -0
  8. package/src/output/json.mjs +76 -0
  9. package/src/output/progress.mjs +57 -0
  10. package/src/scan.mjs +143 -0
  11. package/src/security.mjs +62 -0
  12. package/src/shared/fixer/barrel-cleaner.mjs +192 -0
  13. package/src/shared/fixer/import-cleaner.mjs +237 -0
  14. package/src/shared/fixer/quarantine.mjs +218 -0
  15. package/src/shared/scanner/analysers/buildSystems.mjs +647 -0
  16. package/src/shared/scanner/analysers/configParsers.mjs +1086 -0
  17. package/src/shared/scanner/analysers/deadcode.mjs +6194 -0
  18. package/src/shared/scanner/analysers/entryPointDetector.mjs +634 -0
  19. package/src/shared/scanner/analysers/generatedCode.mjs +297 -0
  20. package/src/shared/scanner/analysers/imports.mjs +60 -0
  21. package/src/shared/scanner/discovery.mjs +240 -0
  22. package/src/shared/scanner/parse-worker.mjs +82 -0
  23. package/src/shared/scanner/parsers/assets.mjs +44 -0
  24. package/src/shared/scanner/parsers/csharp.mjs +400 -0
  25. package/src/shared/scanner/parsers/css.mjs +60 -0
  26. package/src/shared/scanner/parsers/go.mjs +445 -0
  27. package/src/shared/scanner/parsers/java.mjs +364 -0
  28. package/src/shared/scanner/parsers/javascript.mjs +823 -0
  29. package/src/shared/scanner/parsers/kotlin.mjs +350 -0
  30. package/src/shared/scanner/parsers/python.mjs +497 -0
  31. package/src/shared/scanner/parsers/registry.mjs +233 -0
  32. package/src/shared/scanner/parsers/rust.mjs +427 -0
  33. package/src/shared/scanner/scan-dead-code.mjs +316 -0
  34. package/src/shared/security/patterns.mjs +349 -0
  35. package/src/shared/security/proximity.mjs +84 -0
  36. package/src/shared/security/scanner.mjs +269 -0
@@ -0,0 +1,316 @@
1
+ // src/scanner/scan-dead-code.mjs
2
+ // Standalone dead code scanning function — the unified entry point for dead code analysis.
3
+ // Replaces both scanner-legacy/index.mjs and the inline scanDeadCodeOnly() in scan-repo-worker.mjs.
4
+
5
+ import { availableParallelism } from 'os';
6
+ import { Worker } from 'worker_threads';
7
+ import { fileURLToPath } from 'url';
8
+ import { dirname, join } from 'path';
9
+ import { readFileSync } from 'fs';
10
+ import { discoverFiles, categoriseFiles } from './discovery.mjs';
11
+ import { parseJavaScript } from './parsers/javascript.mjs';
12
+ import { parseFile } from './parsers/registry.mjs';
13
+ import { analyseImports } from './analysers/imports.mjs';
14
+ import { findDeadCode } from './analysers/deadcode.mjs';
15
+
16
+ const __filename = fileURLToPath(import.meta.url);
17
+ const __dirname = dirname(__filename);
18
+ const WORKER_PATH = join(__dirname, 'parse-worker.mjs');
19
+ const DEFAULT_WORKER_COUNT = parseInt(process.env.SWYNX_WORKERS || '0') || Math.min(availableParallelism(), 8);
20
+
21
+ const CHUNK_THRESHOLD = 10000; // B3: chunk parsing when file count exceeds this
22
+ const CHUNK_SIZE = 5000; // B3: files per parse chunk
23
+
24
+ function parallelParse(files, parserType) {
25
+ const maxWorkers = DEFAULT_WORKER_COUNT;
26
+ const workerCount = Math.min(maxWorkers, Math.ceil(files.length / 50));
27
+ if (workerCount <= 1 || files.length < 100) return null;
28
+
29
+ return new Promise((resolve) => {
30
+ const chunkSize = Math.ceil(files.length / workerCount);
31
+ let completed = 0;
32
+ const allResults = [];
33
+ let activeWorkers = 0;
34
+
35
+ for (let i = 0; i < workerCount; i++) {
36
+ const chunk = files.slice(i * chunkSize, (i + 1) * chunkSize);
37
+ if (chunk.length === 0) continue;
38
+ activeWorkers++;
39
+
40
+ const worker = new Worker(WORKER_PATH, {
41
+ workerData: { files: chunk, parserType }
42
+ });
43
+
44
+ worker.on('message', (msg) => {
45
+ if (msg.type === 'batch') {
46
+ // B1: Handle batch messages from worker (intermediate results)
47
+ allResults.push(...msg.results);
48
+ } else if (msg.type === 'done') {
49
+ allResults.push(...msg.results);
50
+ completed++;
51
+ if (completed === activeWorkers) resolve(allResults);
52
+ } else if (msg.type === 'error') {
53
+ completed++;
54
+ if (completed === activeWorkers) resolve(allResults);
55
+ }
56
+ });
57
+
58
+ worker.on('error', () => {
59
+ completed++;
60
+ if (completed === activeWorkers) resolve(allResults);
61
+ });
62
+ }
63
+ });
64
+ }
65
+
66
+ /**
67
+ * B3: Chunked parse pipeline — processes files in chunks to cap peak memory.
68
+ * Each chunk goes through parallelParse, results accumulated (without content),
69
+ * then next chunk starts. Previous chunk's worker memory is freed.
70
+ */
71
+ async function chunkedParse(files, parserType, onProgress) {
72
+ const allResults = [];
73
+ const totalChunks = Math.ceil(files.length / CHUNK_SIZE);
74
+
75
+ for (let c = 0; c < totalChunks; c++) {
76
+ const start = c * CHUNK_SIZE;
77
+ const chunk = files.slice(start, start + CHUNK_SIZE);
78
+ onProgress({ phase: 'parse', message: `Parsing chunk ${c + 1}/${totalChunks} (${chunk.length} files)...` });
79
+
80
+ const chunkResults = parallelParse(chunk, parserType);
81
+ if (chunkResults) {
82
+ allResults.push(...await chunkResults);
83
+ } else {
84
+ // Fallback to sequential for small chunks
85
+ const parseFn = parserType === 'javascript' ? parseJavaScript : parseFile;
86
+ for (const file of chunk) {
87
+ try {
88
+ const result = await parseFn(file);
89
+ if (result) {
90
+ // Strip content like workers do (B2)
91
+ result.content = null;
92
+ allResults.push(result);
93
+ }
94
+ } catch { /* skip */ }
95
+ }
96
+ }
97
+ }
98
+ return allResults;
99
+ }
100
+
101
+ /**
102
+ * Detect language from file extension (for legacy-compatible summary)
103
+ */
104
+ function detectLanguage(filePath) {
105
+ if (/\.[mc]?[jt]sx?$/.test(filePath)) return 'javascript';
106
+ if (/\.py$/.test(filePath)) return 'python';
107
+ if (/\.go$/.test(filePath)) return 'go';
108
+ if (/\.(java|kt)$/.test(filePath)) return 'java';
109
+ if (/\.php$/.test(filePath)) return 'php';
110
+ if (/\.rb$/.test(filePath)) return 'ruby';
111
+ if (/\.rs$/.test(filePath)) return 'rust';
112
+ if (/\.cs$/.test(filePath)) return 'csharp';
113
+ if (/\.dart$/.test(filePath)) return 'dart';
114
+ if (/\.swift$/.test(filePath)) return 'swift';
115
+ if (/\.scala$/.test(filePath)) return 'scala';
116
+ if (/\.ex$|\.exs$/.test(filePath)) return 'elixir';
117
+ return 'other';
118
+ }
119
+
120
+ const DEFAULT_EXCLUDE = [
121
+ '**/node_modules/**', '**/bower_components/**', '**/.git/**', '**/dist/**', '**/build/**',
122
+ '**/.swynx-quarantine/**', '**/coverage/**', '**/*.min.js', '**/*.min.css',
123
+ '**/logs/**', '**/log/**', '**/*.log',
124
+ '**/tmp/**', '**/temp/**', '**/.cache/**', '**/cache/**',
125
+ '**/__pycache__/**', '**/*.pyc', '**/*.pyo',
126
+ '**/.pytest_cache/**', '**/.mypy_cache/**',
127
+ '**/*.sql', '**/*.sqlite', '**/*.sqlite3', '**/*.db',
128
+ '**/tests/baselines/**', '**/test/baselines/**',
129
+ '**/__snapshots__/**', '**/snapshots/**',
130
+ '**/test-fixtures/**', '**/test_fixtures/**', '**/__fixtures__/**',
131
+ '**/fixtures/**', '**/fixture/**',
132
+ '**/testdata/**', '**/test-data/**',
133
+ '**/vendor/**',
134
+ '**/__mockdata__/**', '**/__mock__/**', '**/__for-testing__/**',
135
+ '**/pkg-tests-fixtures/**', '**/pkg-tests-specs/**',
136
+ '**/type-tests/**', '**/type-test/**',
137
+ // Test fixture / baseline directories (huge in compiler repos)
138
+ '**/TestData/**', '**/testData/**',
139
+ '**/test-cases/**', '**/test_cases/**',
140
+ '**/conformance/**',
141
+ '**/testcases/**',
142
+ // Compiler test input directories
143
+ '**/cases/**/*.ts',
144
+ '**/test/cases/**',
145
+ // IDE/editor test fixtures
146
+ '**/test-fixture/**',
147
+ // C# intermediate / compiled output
148
+ '**/obj/**',
149
+ '**/bin/Debug/**', '**/bin/Release/**',
150
+ // C# scaffolding baselines (test-generated output)
151
+ '**/Scaffolding/Baselines/**',
152
+ // Rust compiler test inputs (standalone files compiled by test harness, not source code)
153
+ '**/tests/ui/**', '**/tests/derive_ui/**', '**/tests/compile-fail/**',
154
+ '**/tests/run-pass/**', '**/tests/run-fail/**', '**/tests/ui-fulldeps/**',
155
+ '**/tests/pretty/**', '**/tests/mir-opt/**', '**/tests/assembly/**',
156
+ '**/tests/codegen/**', '**/tests/debuginfo/**', '**/tests/incremental/**',
157
+ '**/tests/codegen-llvm/**', '**/tests/rustdoc-html/**', '**/tests/crashes/**',
158
+ '**/tests/assembly-llvm/**', '**/tests/rustdoc-ui/**', '**/tests/rustdoc-js/**',
159
+ '**/tests/rustdoc-json/**', '**/tests/codegen-units/**', '**/tests/coverage-run-rustdoc/**',
160
+ // Cypress/E2E system test fixture projects (standalone apps used as test targets)
161
+ '**/system-tests/projects/**', '**/system-tests/project-fixtures/**',
162
+ // RustPython test snippet inputs
163
+ '**/extra_tests/snippets/**',
164
+ // Python stdlib copies (RustPython, cpython)
165
+ '**/Lib/encodings/**',
166
+ // Python vendored third-party code
167
+ '**/_vendor/**', '**/_distutils/**',
168
+ // Compiled/bundled static assets (Phoenix/Elixir)
169
+ '**/static/assets/**',
170
+ // Generated protobuf/gRPC output directories
171
+ '**/gen/proto/**',
172
+ ];
173
+
174
+ /**
175
+ * Standalone dead code scan.
176
+ *
177
+ * @param {string} projectPath - Absolute path to the project root
178
+ * @param {Object} [options]
179
+ * @param {string[]} [options.exclude] - Glob patterns to exclude
180
+ * @param {number} [options.workers] - Max parallel parse workers
181
+ * @param {Function} [options.onProgress] - Progress callback ({ phase, message })
182
+ * @returns {Promise<Object>} Result with both legacy-compatible and full-scanner fields
183
+ */
184
+ export async function scanDeadCode(projectPath, options = {}) {
185
+ const { exclude = DEFAULT_EXCLUDE, onProgress = () => {} } = options;
186
+ const t0 = Date.now();
187
+
188
+ // Phase 1: Discover files
189
+ onProgress({ phase: 'discovery', message: 'Discovering files...' });
190
+ const files = await discoverFiles(projectPath, { exclude });
191
+ const categorised = categoriseFiles(files);
192
+ const totalFiles = files.length;
193
+ onProgress({ phase: 'discovery', message: `${totalFiles} files discovered` });
194
+
195
+ // Phase 2: Parse JS/TS — use chunked pipeline for large repos
196
+ onProgress({ phase: 'parse', message: `Parsing ${categorised.javascript.length} JS/TS files...` });
197
+ const jsFiles = categorised.javascript;
198
+ let jsAnalysis;
199
+ if (jsFiles.length > CHUNK_THRESHOLD) {
200
+ // B3: Chunked parse for truly massive repos
201
+ jsAnalysis = await chunkedParse(jsFiles, 'javascript', onProgress);
202
+ } else {
203
+ const jsParallel = parallelParse(jsFiles, 'javascript');
204
+ if (jsParallel) {
205
+ jsAnalysis = await jsParallel;
206
+ } else {
207
+ jsAnalysis = [];
208
+ for (const file of jsFiles) {
209
+ jsAnalysis.push(await parseJavaScript(file));
210
+ }
211
+ }
212
+ }
213
+ onProgress({ phase: 'parse', message: `Parsed ${jsAnalysis.length} JS/TS files` });
214
+
215
+ // Phase 3: Parse other languages
216
+ const otherLangFiles = [
217
+ ...categorised.python || [],
218
+ ...categorised.java || [],
219
+ ...categorised.kotlin || [],
220
+ ...categorised.csharp || [],
221
+ ...categorised.go || [],
222
+ ...categorised.rust || []
223
+ ];
224
+ const otherLangAnalysis = [];
225
+ if (otherLangFiles.length > 0) {
226
+ onProgress({ phase: 'parse', message: `Parsing ${otherLangFiles.length} other-language files...` });
227
+ if (otherLangFiles.length > CHUNK_THRESHOLD) {
228
+ // B3: Chunked parse for large non-JS repos
229
+ otherLangAnalysis.push(...await chunkedParse(otherLangFiles, 'other', onProgress));
230
+ } else {
231
+ const otherParallel = parallelParse(otherLangFiles, 'other');
232
+ if (otherParallel) {
233
+ otherLangAnalysis.push(...await otherParallel);
234
+ } else {
235
+ for (const file of otherLangFiles) {
236
+ try {
237
+ const parsed = await parseFile(file);
238
+ if (parsed) otherLangAnalysis.push(parsed);
239
+ } catch { /* skip */ }
240
+ }
241
+ }
242
+ }
243
+ onProgress({ phase: 'parse', message: `Parsed ${otherLangAnalysis.length} other-language files` });
244
+ }
245
+
246
+ // Phase 4: Build import graph
247
+ onProgress({ phase: 'graph', message: 'Building import graph...' });
248
+ const importGraph = await analyseImports(jsAnalysis);
249
+
250
+ // Phase 5: Find dead code
251
+ onProgress({ phase: 'detection', message: 'Detecting dead code...' });
252
+ let packageJson = {};
253
+ try {
254
+ packageJson = JSON.parse(readFileSync(join(projectPath, 'package.json'), 'utf-8'));
255
+ } catch { /* no package.json */ }
256
+
257
+ const allCodeAnalysis = [...jsAnalysis, ...otherLangAnalysis];
258
+ const deadCode = await findDeadCode(allCodeAnalysis, importGraph, projectPath, packageJson, {});
259
+
260
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
261
+ onProgress({ phase: 'done', message: `Done in ${elapsed}s` });
262
+
263
+ // Build legacy-compatible deadFiles array from fullyDeadFiles + partiallyDeadFiles
264
+ const deadFiles = [
265
+ ...(deadCode.fullyDeadFiles || []),
266
+ ...(deadCode.partiallyDeadFiles || [])
267
+ ].map(f => ({
268
+ file: f.file,
269
+ size: f.sizeBytes || f.size || 0,
270
+ lines: f.lineCount || f.lines || 0,
271
+ language: f.language || detectLanguage(f.file),
272
+ exports: (f.exports || []).map(e => typeof e === 'string' ? { name: e, type: 'unknown' } : e)
273
+ }));
274
+
275
+ // Sort by size descending
276
+ deadFiles.sort((a, b) => b.size - a.size);
277
+
278
+ // Build language counts from all discovered files
279
+ const languages = {};
280
+ for (const file of files) {
281
+ const rel = typeof file === 'string' ? file : file.relativePath || file;
282
+ const lang = detectLanguage(rel);
283
+ if (lang !== 'other') {
284
+ languages[lang] = (languages[lang] || 0) + 1;
285
+ }
286
+ }
287
+
288
+ const deadCount = deadFiles.length;
289
+ const deadRate = totalFiles > 0 ? ((deadCount / totalFiles) * 100).toFixed(2) : '0.00';
290
+ const totalDeadBytes = deadFiles.reduce((sum, f) => sum + f.size, 0);
291
+
292
+ return {
293
+ // Legacy-compatible fields (used by toReporterShape in cli.mjs and scan-all-repos.mjs)
294
+ deadFiles,
295
+ entryPoints: deadCode.entryPoints || [],
296
+ summary: {
297
+ totalFiles,
298
+ entryPoints: (deadCode.entryPoints || []).length,
299
+ reachableFiles: totalFiles - deadCount - (deadCode.entryPoints || []).length,
300
+ deadFiles: deadCount,
301
+ deadRate: `${deadRate}%`,
302
+ totalDeadBytes,
303
+ languages
304
+ },
305
+
306
+ // Full scanner fields (richer detail)
307
+ fullyDeadFiles: deadCode.fullyDeadFiles || [],
308
+ partiallyDeadFiles: deadCode.partiallyDeadFiles || [],
309
+ skippedDynamic: deadCode.skippedDynamic || [],
310
+ excludedGenerated: deadCode.excludedGenerated || [],
311
+
312
+ // Metadata
313
+ elapsed,
314
+ totalFiles
315
+ };
316
+ }
@@ -0,0 +1,349 @@
1
+ // src/security/patterns.mjs
2
+ // CWE pattern definitions for security analysis across all code
3
+
4
+ /**
5
+ * CWE patterns for detecting dangerous code patterns.
6
+ * Each pattern has: id, cwe, cweName, severity, pattern (RegExp), description, risk, languages (empty = all)
7
+ */
8
+ export const CWE_PATTERNS = [
9
+ // ═══════════════════════════════════════════════════════════════════════════
10
+ // CWE-78: OS Command Injection
11
+ // ═══════════════════════════════════════════════════════════════════════════
12
+ {
13
+ id: 'CWE-78-exec',
14
+ cwe: 'CWE-78',
15
+ cweName: 'OS Command Injection',
16
+ severity: 'CRITICAL',
17
+ pattern: /child_process\.(exec|execSync)\s*\(/,
18
+ description: 'child_process.exec() with potential command injection',
19
+ risk: 'Dead code with command execution could be revived without security review',
20
+ languages: ['js', 'ts']
21
+ },
22
+ {
23
+ id: 'CWE-78-spawn-shell',
24
+ cwe: 'CWE-78',
25
+ cweName: 'OS Command Injection',
26
+ severity: 'CRITICAL',
27
+ pattern: /child_process\.spawn\s*\([^)]*shell\s*:\s*true/,
28
+ description: 'child_process.spawn() with shell: true',
29
+ risk: 'Shell mode spawn in dead code enables injection if revived',
30
+ languages: ['js', 'ts']
31
+ },
32
+ {
33
+ id: 'CWE-78-os-system',
34
+ cwe: 'CWE-78',
35
+ cweName: 'OS Command Injection',
36
+ severity: 'CRITICAL',
37
+ pattern: /os\.system\s*\(|subprocess\.(Popen|call|run)\s*\(/,
38
+ description: 'Python OS command execution',
39
+ risk: 'Dead code with system calls could be revived without security review',
40
+ languages: ['py']
41
+ },
42
+ {
43
+ id: 'CWE-78-go-exec',
44
+ cwe: 'CWE-78',
45
+ cweName: 'OS Command Injection',
46
+ severity: 'CRITICAL',
47
+ pattern: /exec\.Command\s*\(/,
48
+ description: 'Go exec.Command() call',
49
+ risk: 'Dead code with command execution could be revived without security review',
50
+ languages: ['go']
51
+ },
52
+
53
+ // ═══════════════════════════════════════════════════════════════════════════
54
+ // CWE-94: Code Injection
55
+ // ═══════════════════════════════════════════════════════════════════════════
56
+ {
57
+ id: 'CWE-94-eval',
58
+ cwe: 'CWE-94',
59
+ cweName: 'Code Injection',
60
+ severity: 'CRITICAL',
61
+ pattern: /\beval\s*\(/,
62
+ description: 'eval() with dynamic code execution',
63
+ risk: 'Dead eval() could be exploited if code is revived or imported',
64
+ languages: ['js', 'ts', 'py']
65
+ },
66
+ {
67
+ id: 'CWE-94-new-function',
68
+ cwe: 'CWE-94',
69
+ cweName: 'Code Injection',
70
+ severity: 'CRITICAL',
71
+ pattern: /new\s+Function\s*\(/,
72
+ description: 'new Function() constructor for dynamic code',
73
+ risk: 'Dynamic function creation in dead code increases attack surface',
74
+ languages: ['js', 'ts']
75
+ },
76
+ {
77
+ id: 'CWE-94-vm-run',
78
+ cwe: 'CWE-94',
79
+ cweName: 'Code Injection',
80
+ severity: 'CRITICAL',
81
+ pattern: /vm\.(runInNewContext|runInThisContext|runInContext|compileFunction)\s*\(/,
82
+ description: 'Node.js vm module execution',
83
+ risk: 'VM context execution in dead code is a sandbox escape risk',
84
+ languages: ['js', 'ts']
85
+ },
86
+
87
+ // ═══════════════════════════════════════════════════════════════════════════
88
+ // CWE-798: Hardcoded Credentials
89
+ // ═══════════════════════════════════════════════════════════════════════════
90
+ {
91
+ id: 'CWE-798-password',
92
+ cwe: 'CWE-798',
93
+ cweName: 'Hardcoded Credentials',
94
+ severity: 'CRITICAL',
95
+ pattern: /(password|passwd|secret|api_key|apikey|api_secret)\s*[:=]\s*["'][^"']{4,}/i,
96
+ description: 'Hardcoded password or secret',
97
+ risk: 'Credentials in dead code are often forgotten and exposed in version control',
98
+ languages: []
99
+ },
100
+ {
101
+ id: 'CWE-798-aws-key',
102
+ cwe: 'CWE-798',
103
+ cweName: 'Hardcoded Credentials',
104
+ severity: 'CRITICAL',
105
+ pattern: /AKIA[0-9A-Z]{16}/,
106
+ description: 'AWS Access Key ID',
107
+ risk: 'AWS credentials in dead code may still be active',
108
+ languages: []
109
+ },
110
+ {
111
+ id: 'CWE-798-private-key',
112
+ cwe: 'CWE-798',
113
+ cweName: 'Hardcoded Credentials',
114
+ severity: 'CRITICAL',
115
+ pattern: /-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----/,
116
+ description: 'Embedded private key',
117
+ risk: 'Private keys in dead code are often overlooked during rotation',
118
+ languages: []
119
+ },
120
+ {
121
+ id: 'CWE-798-openai-key',
122
+ cwe: 'CWE-798',
123
+ cweName: 'Hardcoded Credentials',
124
+ severity: 'HIGH',
125
+ pattern: /sk-[a-zA-Z0-9]{20,}/,
126
+ description: 'Potential API key (sk-... pattern)',
127
+ risk: 'API keys in dead code may remain active and billable',
128
+ languages: []
129
+ },
130
+
131
+ // ═══════════════════════════════════════════════════════════════════════════
132
+ // CWE-22: Path Traversal
133
+ // ═══════════════════════════════════════════════════════════════════════════
134
+ {
135
+ id: 'CWE-22-path-join-params',
136
+ cwe: 'CWE-22',
137
+ cweName: 'Path Traversal',
138
+ severity: 'HIGH',
139
+ pattern: /path\.join\s*\([^)]*req\.(params|query|body)/,
140
+ description: 'path.join with user input from request',
141
+ risk: 'Path traversal in dead code could be revived as a file access vulnerability',
142
+ languages: ['js', 'ts']
143
+ },
144
+ {
145
+ id: 'CWE-22-readfile-params',
146
+ cwe: 'CWE-22',
147
+ cweName: 'Path Traversal',
148
+ severity: 'HIGH',
149
+ pattern: /(readFile|readFileSync|createReadStream)\s*\([^)]*req\.(params|query|body)/,
150
+ description: 'File read with user-controlled path',
151
+ risk: 'Unvalidated file read in dead code is a path traversal risk if revived',
152
+ languages: ['js', 'ts']
153
+ },
154
+
155
+ // ═══════════════════════════════════════════════════════════════════════════
156
+ // CWE-502: Unsafe Deserialization
157
+ // ═══════════════════════════════════════════════════════════════════════════
158
+ {
159
+ id: 'CWE-502-pickle',
160
+ cwe: 'CWE-502',
161
+ cweName: 'Unsafe Deserialization',
162
+ severity: 'HIGH',
163
+ pattern: /pickle\.(load|loads)\s*\(/,
164
+ description: 'Python pickle deserialization',
165
+ risk: 'pickle.load() in dead code can execute arbitrary code if revived',
166
+ languages: ['py']
167
+ },
168
+ {
169
+ id: 'CWE-502-yaml-load',
170
+ cwe: 'CWE-502',
171
+ cweName: 'Unsafe Deserialization',
172
+ severity: 'HIGH',
173
+ pattern: /yaml\.load\s*\([^)]*(?!Loader\s*=\s*yaml\.SafeLoader)/,
174
+ description: 'yaml.load() without SafeLoader',
175
+ risk: 'Unsafe YAML loading in dead code can execute arbitrary code',
176
+ languages: ['py']
177
+ },
178
+ {
179
+ id: 'CWE-502-unserialize',
180
+ cwe: 'CWE-502',
181
+ cweName: 'Unsafe Deserialization',
182
+ severity: 'HIGH',
183
+ pattern: /\bunserialize\s*\(/,
184
+ description: 'PHP unserialize() call',
185
+ risk: 'Unsafe deserialization in dead code enables object injection if revived',
186
+ languages: ['php']
187
+ },
188
+
189
+ // ═══════════════════════════════════════════════════════════════════════════
190
+ // CWE-79: Cross-Site Scripting (XSS)
191
+ // ═══════════════════════════════════════════════════════════════════════════
192
+ {
193
+ id: 'CWE-79-innerhtml',
194
+ cwe: 'CWE-79',
195
+ cweName: 'Cross-Site Scripting',
196
+ severity: 'HIGH',
197
+ pattern: /\.innerHTML\s*=/,
198
+ description: 'Direct innerHTML assignment',
199
+ risk: 'innerHTML in dead code could introduce XSS if component is re-enabled',
200
+ languages: ['js', 'ts']
201
+ },
202
+ {
203
+ id: 'CWE-79-dangerously',
204
+ cwe: 'CWE-79',
205
+ cweName: 'Cross-Site Scripting',
206
+ severity: 'HIGH',
207
+ pattern: /dangerouslySetInnerHTML/,
208
+ description: 'React dangerouslySetInnerHTML',
209
+ risk: 'Dangerous React prop in dead component could introduce XSS if revived',
210
+ languages: ['js', 'ts']
211
+ },
212
+ {
213
+ id: 'CWE-79-document-write',
214
+ cwe: 'CWE-79',
215
+ cweName: 'Cross-Site Scripting',
216
+ severity: 'HIGH',
217
+ pattern: /document\.write\s*\(/,
218
+ description: 'document.write() call',
219
+ risk: 'document.write in dead code bypasses CSP if revived',
220
+ languages: ['js', 'ts']
221
+ },
222
+
223
+ // ═══════════════════════════════════════════════════════════════════════════
224
+ // CWE-327: Broken Cryptography
225
+ // ═══════════════════════════════════════════════════════════════════════════
226
+ {
227
+ id: 'CWE-327-md5',
228
+ cwe: 'CWE-327',
229
+ cweName: 'Broken Cryptography',
230
+ severity: 'MEDIUM',
231
+ pattern: /createHash\s*\(\s*['"]md5['"]\s*\)/,
232
+ description: 'MD5 hash usage',
233
+ risk: 'Weak hash in dead code may be copied to new code without upgrading',
234
+ languages: ['js', 'ts']
235
+ },
236
+ {
237
+ id: 'CWE-327-sha1',
238
+ cwe: 'CWE-327',
239
+ cweName: 'Broken Cryptography',
240
+ severity: 'MEDIUM',
241
+ pattern: /hashlib\.(md5|sha1)\s*\(/,
242
+ description: 'Python weak hash algorithm',
243
+ risk: 'Weak hash in dead code may be copied to new code without upgrading',
244
+ languages: ['py']
245
+ },
246
+ {
247
+ id: 'CWE-327-des-rc4',
248
+ cwe: 'CWE-327',
249
+ cweName: 'Broken Cryptography',
250
+ severity: 'MEDIUM',
251
+ pattern: /createCipher(iv)?\s*\(\s*['"](des|rc4|des-ede|des-ede3)['"]/i,
252
+ description: 'Weak cipher algorithm (DES/RC4)',
253
+ risk: 'Broken cipher in dead code sets bad precedent if used as reference',
254
+ languages: ['js', 'ts']
255
+ },
256
+
257
+ // ═══════════════════════════════════════════════════════════════════════════
258
+ // CWE-918: Server-Side Request Forgery (SSRF)
259
+ // ═══════════════════════════════════════════════════════════════════════════
260
+ {
261
+ id: 'CWE-918-fetch-params',
262
+ cwe: 'CWE-918',
263
+ cweName: 'Server-Side Request Forgery',
264
+ severity: 'HIGH',
265
+ pattern: /fetch\s*\(\s*req\.(params|query|body)/,
266
+ description: 'fetch() with user-controlled URL',
267
+ risk: 'SSRF in dead code could be revived to access internal services',
268
+ languages: ['js', 'ts']
269
+ },
270
+ {
271
+ id: 'CWE-918-requests-dynamic',
272
+ cwe: 'CWE-918',
273
+ cweName: 'Server-Side Request Forgery',
274
+ severity: 'MEDIUM',
275
+ pattern: /requests\.(get|post|put|delete)\s*\(\s*f?["']/,
276
+ description: 'Python requests with potentially dynamic URL',
277
+ risk: 'Outbound requests in dead code may target internal services if revived',
278
+ languages: ['py']
279
+ },
280
+
281
+ // ═══════════════════════════════════════════════════════════════════════════
282
+ // CWE-200: Information Exposure
283
+ // ═══════════════════════════════════════════════════════════════════════════
284
+ {
285
+ id: 'CWE-200-console-sensitive',
286
+ cwe: 'CWE-200',
287
+ cweName: 'Information Exposure',
288
+ severity: 'LOW',
289
+ pattern: /console\.(log|debug|info)\s*\([^)]*\b(password|secret|token|key|credential)/i,
290
+ description: 'Logging potentially sensitive data',
291
+ risk: 'Sensitive data logging in dead code may leak if code is re-enabled',
292
+ languages: ['js', 'ts']
293
+ },
294
+ {
295
+ id: 'CWE-200-stack-trace',
296
+ cwe: 'CWE-200',
297
+ cweName: 'Information Exposure',
298
+ severity: 'LOW',
299
+ pattern: /res\.(send|json)\s*\(\s*(err|error)\.(stack|message)/,
300
+ description: 'Stack trace exposure in response',
301
+ risk: 'Error detail exposure in dead endpoint leaks info if route is re-enabled',
302
+ languages: ['js', 'ts']
303
+ }
304
+ ];
305
+
306
+ /**
307
+ * Extension to language mapping
308
+ */
309
+ const EXT_TO_LANG = {
310
+ '.js': 'js',
311
+ '.mjs': 'js',
312
+ '.cjs': 'js',
313
+ '.jsx': 'js',
314
+ '.ts': 'ts',
315
+ '.tsx': 'ts',
316
+ '.mts': 'ts',
317
+ '.cts': 'ts',
318
+ '.py': 'py',
319
+ '.go': 'go',
320
+ '.php': 'php',
321
+ '.rb': 'rb',
322
+ '.java': 'java',
323
+ '.kt': 'kt',
324
+ '.kts': 'kt',
325
+ '.cs': 'cs',
326
+ '.rs': 'rs',
327
+ '.c': 'c',
328
+ '.cpp': 'cpp',
329
+ '.h': 'c',
330
+ '.hpp': 'cpp'
331
+ };
332
+
333
+ /**
334
+ * Get CWE patterns applicable to a file extension.
335
+ * Returns all language-agnostic patterns plus language-specific ones.
336
+ */
337
+ export function getPatternsForLanguage(ext) {
338
+ const lang = EXT_TO_LANG[ext];
339
+
340
+ return CWE_PATTERNS.filter(p => {
341
+ // Language-agnostic patterns apply to all files
342
+ if (p.languages.length === 0) return true;
343
+ // No mapping for this extension — only return language-agnostic
344
+ if (!lang) return false;
345
+ // JS and TS share patterns
346
+ if ((lang === 'js' || lang === 'ts') && (p.languages.includes('js') || p.languages.includes('ts'))) return true;
347
+ return p.languages.includes(lang);
348
+ });
349
+ }