swynx-lite 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +113 -0
  2. package/bin/swynx-lite +3 -0
  3. package/package.json +47 -0
  4. package/src/clean.mjs +280 -0
  5. package/src/cli.mjs +264 -0
  6. package/src/config.mjs +121 -0
  7. package/src/output/console.mjs +298 -0
  8. package/src/output/json.mjs +76 -0
  9. package/src/output/progress.mjs +57 -0
  10. package/src/scan.mjs +143 -0
  11. package/src/security.mjs +62 -0
  12. package/src/shared/fixer/barrel-cleaner.mjs +192 -0
  13. package/src/shared/fixer/import-cleaner.mjs +237 -0
  14. package/src/shared/fixer/quarantine.mjs +218 -0
  15. package/src/shared/scanner/analysers/buildSystems.mjs +647 -0
  16. package/src/shared/scanner/analysers/configParsers.mjs +1086 -0
  17. package/src/shared/scanner/analysers/deadcode.mjs +6194 -0
  18. package/src/shared/scanner/analysers/entryPointDetector.mjs +634 -0
  19. package/src/shared/scanner/analysers/generatedCode.mjs +297 -0
  20. package/src/shared/scanner/analysers/imports.mjs +60 -0
  21. package/src/shared/scanner/discovery.mjs +240 -0
  22. package/src/shared/scanner/parse-worker.mjs +82 -0
  23. package/src/shared/scanner/parsers/assets.mjs +44 -0
  24. package/src/shared/scanner/parsers/csharp.mjs +400 -0
  25. package/src/shared/scanner/parsers/css.mjs +60 -0
  26. package/src/shared/scanner/parsers/go.mjs +445 -0
  27. package/src/shared/scanner/parsers/java.mjs +364 -0
  28. package/src/shared/scanner/parsers/javascript.mjs +823 -0
  29. package/src/shared/scanner/parsers/kotlin.mjs +350 -0
  30. package/src/shared/scanner/parsers/python.mjs +497 -0
  31. package/src/shared/scanner/parsers/registry.mjs +233 -0
  32. package/src/shared/scanner/parsers/rust.mjs +427 -0
  33. package/src/shared/scanner/scan-dead-code.mjs +316 -0
  34. package/src/shared/security/patterns.mjs +349 -0
  35. package/src/shared/security/proximity.mjs +84 -0
  36. package/src/shared/security/scanner.mjs +269 -0
@@ -0,0 +1,297 @@
1
+ // src/scanner/analysers/generatedCode.mjs
2
+ // Detection and exclusion of generated code files
3
+
4
+ import { readFileSync, existsSync } from 'fs';
5
+ import { basename, dirname } from 'path';
6
+
7
+ /**
8
+ * Default patterns for generated code files
9
+ * These files should be excluded from dead code analysis
10
+ */
11
+ export const DEFAULT_GENERATED_PATTERNS = [
12
+ // JavaScript/TypeScript
13
+ /\.generated\.(ts|tsx|js|jsx|mjs)$/,
14
+ /\.g\.(ts|js)$/,
15
+ /\/generated\//,
16
+ /\/__generated__\//,
17
+ /\/codegen\//,
18
+
19
+ // GraphQL
20
+ /\/graphql\.(ts|tsx|js)$/,
21
+ /\/gql\.(ts|tsx|js)$/,
22
+ /\.graphql\.(ts|tsx|js)$/,
23
+ /types\.generated\.(ts|tsx|js)$/,
24
+ /\/__graphql__\//,
25
+
26
+ // Protocol Buffers
27
+ /_pb\.(js|d\.ts)$/,
28
+ /_pb2\.py$/,
29
+ /_pb2_grpc\.py$/,
30
+ /\.pb\.(go|cc|h)$/,
31
+
32
+ // OpenAPI/Swagger
33
+ /\/api-client\//,
34
+ /\/swagger-client\//,
35
+ /\/openapi\/.*\.generated\./,
36
+
37
+ // Java build outputs
38
+ /\/target\/generated-sources\//,
39
+ /\/target\/generated-test-sources\//,
40
+ /\/build\/generated\//,
41
+ /\/build\/generated-sources\//,
42
+ /_\.java$/, // MapStruct generated
43
+
44
+ // .NET build outputs
45
+ /\/obj\//,
46
+ /\.Designer\.cs$/,
47
+ /\.g\.cs$/,
48
+ /\.g\.i\.cs$/,
49
+ /\/Migrations\/.*\.cs$/,
50
+ /^Migrations\/.*\.cs$/, // Migrations at repo root (relative paths)
51
+ /\.AssemblyAttributes\.cs$/, // Auto-generated assembly attributes
52
+ /GlobalSuppressions\.cs$/, // Auto-generated code analysis suppressions
53
+
54
+ // Go generated
55
+ /_gen\.go$/,
56
+ /mock_.*\.go$/,
57
+ /.*_mock\.go$/,
58
+ /\/mocks\/.*\.go$/,
59
+ /_string\.go$/, // stringer
60
+
61
+ // Rust
62
+ /\.rs\.bk$/,
63
+
64
+ // Build outputs (all languages)
65
+ /\/dist\//,
66
+ /\/build\//,
67
+ /\/out\//,
68
+ /\/output\//,
69
+ /\/.next\//,
70
+ /\/.nuxt\//,
71
+ /\/.output\//,
72
+ /\/node_modules\//,
73
+ /\/vendor\//,
74
+
75
+ // Bazel outputs
76
+ /\/bazel-bin\//,
77
+ /\/bazel-out\//,
78
+ /\/bazel-testlogs\//
79
+ ];
80
+
81
+ /**
82
+ * Header comments that indicate generated code
83
+ */
84
+ const GENERATED_HEADERS = [
85
+ /^\/\/ Code generated .* DO NOT EDIT/i,
86
+ /^\/\/ AUTO-GENERATED/i,
87
+ /^\/\/ GENERATED CODE/i,
88
+ /^\/\/ This file was auto-?generated/i,
89
+ /^# Generated by/i,
90
+ /^# DO NOT EDIT/i,
91
+ /^\s*\* @generated/,
92
+ /^\/\*\s*eslint-disable\s*\*\//, // Often in generated files
93
+ /^\/\/ @ts-nocheck/, // Often in generated files
94
+ /@generated/,
95
+ /DO NOT EDIT THIS FILE/i,
96
+ /This file is auto-?generated/i,
97
+ /Generated from /i
98
+ ];
99
+
100
+ /**
101
+ * Check if a file path matches generated code patterns
102
+ * @param {string} filePath - File path to check
103
+ * @param {RegExp[]} customPatterns - Additional patterns to check
104
+ * @returns {Object} - { isGenerated: boolean, matchedPattern: string|null }
105
+ */
106
+ export function isGeneratedPath(filePath, customPatterns = []) {
107
+ const allPatterns = [...DEFAULT_GENERATED_PATTERNS, ...customPatterns];
108
+
109
+ for (const pattern of allPatterns) {
110
+ if (pattern.test(filePath)) {
111
+ return {
112
+ isGenerated: true,
113
+ matchedPattern: pattern.toString(),
114
+ reason: 'path'
115
+ };
116
+ }
117
+ }
118
+
119
+ return { isGenerated: false, matchedPattern: null };
120
+ }
121
+
122
+ /**
123
+ * Check if file content indicates generated code (by header comments)
124
+ * @param {string} content - File content
125
+ * @param {number} linesToCheck - Number of lines to check from start (default 20)
126
+ * @returns {Object} - { isGenerated: boolean, matchedPattern: string|null }
127
+ */
128
+ export function isGeneratedContent(content, linesToCheck = 20) {
129
+ if (!content) return { isGenerated: false };
130
+
131
+ const lines = content.split('\n').slice(0, linesToCheck);
132
+ const headerText = lines.join('\n');
133
+
134
+ for (const pattern of GENERATED_HEADERS) {
135
+ if (pattern.test(headerText)) {
136
+ return {
137
+ isGenerated: true,
138
+ matchedPattern: pattern.toString(),
139
+ reason: 'header'
140
+ };
141
+ }
142
+ }
143
+
144
+ return { isGenerated: false, matchedPattern: null };
145
+ }
146
+
147
+ /**
148
+ * Check if a file is generated (by path or content)
149
+ * @param {string} filePath - File path
150
+ * @param {string} [content] - File content (optional, will be read if not provided)
151
+ * @param {Object} options - Options
152
+ * @returns {Object} - { isGenerated: boolean, reason: string|null }
153
+ */
154
+ export function isGeneratedFile(filePath, content = null, options = {}) {
155
+ const { customPatterns = [], checkContent = true } = options;
156
+
157
+ // First check path
158
+ const pathCheck = isGeneratedPath(filePath, customPatterns);
159
+ if (pathCheck.isGenerated) {
160
+ return pathCheck;
161
+ }
162
+
163
+ // Then check content if requested
164
+ if (checkContent) {
165
+ let fileContent = content;
166
+ if (!fileContent && existsSync(filePath)) {
167
+ try {
168
+ fileContent = readFileSync(filePath, 'utf-8');
169
+ } catch {
170
+ // Can't read file, assume not generated
171
+ return { isGenerated: false };
172
+ }
173
+ }
174
+
175
+ if (fileContent) {
176
+ const contentCheck = isGeneratedContent(fileContent);
177
+ if (contentCheck.isGenerated) {
178
+ return contentCheck;
179
+ }
180
+ }
181
+ }
182
+
183
+ return { isGenerated: false, reason: null };
184
+ }
185
+
186
+ /**
187
+ * Filter out generated files from a list
188
+ * @param {Array} files - Array of file objects with path/relativePath
189
+ * @param {Object} options - Options
190
+ * @returns {Object} - { included: Array, excluded: Array }
191
+ */
192
+ export function filterGeneratedFiles(files, options = {}) {
193
+ const included = [];
194
+ const excluded = [];
195
+
196
+ for (const file of files) {
197
+ const filePath = file.relativePath || file.path || file;
198
+ const content = file.content || null;
199
+
200
+ const check = isGeneratedFile(filePath, content, options);
201
+ if (check.isGenerated) {
202
+ excluded.push({
203
+ file: filePath,
204
+ reason: check.reason,
205
+ pattern: check.matchedPattern
206
+ });
207
+ } else {
208
+ included.push(file);
209
+ }
210
+ }
211
+
212
+ return { included, excluded };
213
+ }
214
+
215
+ /**
216
+ * Get patterns for a specific codegen type
217
+ * @param {string} type - Codegen type (graphql, protobuf, openapi, etc.)
218
+ * @returns {RegExp[]} - Patterns for that type
219
+ */
220
+ export function getPatternsForCodegenType(type) {
221
+ const typePatterns = {
222
+ graphql: [
223
+ /\.graphql\.(ts|tsx|js)$/,
224
+ /\/graphql\.(ts|tsx|js)$/,
225
+ /\/gql\.(ts|tsx|js)$/,
226
+ /types\.generated\.(ts|tsx|js)$/,
227
+ /\/__generated__\//,
228
+ /operations\.(ts|tsx|js)$/
229
+ ],
230
+ protobuf: [
231
+ /_pb\.(js|d\.ts)$/,
232
+ /_pb2\.py$/,
233
+ /_pb2_grpc\.py$/,
234
+ /\.pb\.(go|cc|h)$/,
235
+ /\.pb\.ts$/
236
+ ],
237
+ openapi: [
238
+ /\/api-client\//,
239
+ /\/swagger-client\//,
240
+ /\/openapi\/.*\.generated\./,
241
+ /api\.generated\.(ts|js)$/
242
+ ],
243
+ thrift: [
244
+ /_types\.(js|ts)$/,
245
+ /\.thrift\.ts$/
246
+ ],
247
+ grpc: [
248
+ /_grpc_pb\.(js|ts)$/,
249
+ /_grpc\.pb\.go$/
250
+ ]
251
+ };
252
+
253
+ return typePatterns[type] || [];
254
+ }
255
+
256
+ /**
257
+ * Find codegen config files in a project
258
+ * @param {string} projectPath - Path to project root
259
+ * @param {Object} codegenConfigs - Config file patterns by type
260
+ * @returns {Array} - Found config files
261
+ */
262
+ export function findCodegenConfigs(projectPath, codegenConfigs = {}) {
263
+ const defaults = {
264
+ graphql: ['codegen.yml', 'codegen.yaml', 'codegen.ts', 'codegen.js', '.graphqlrc.yml', '.graphqlrc.json'],
265
+ protobuf: ['buf.yaml', 'buf.gen.yaml', 'buf.work.yaml'],
266
+ openapi: ['openapi.yaml', 'openapi.yml', 'openapi.json', 'swagger.yaml', 'swagger.json'],
267
+ grpc: ['grpc-tools.config.js']
268
+ };
269
+
270
+ const configs = { ...defaults, ...codegenConfigs };
271
+ const found = [];
272
+
273
+ for (const [type, files] of Object.entries(configs)) {
274
+ for (const file of files) {
275
+ const fullPath = `${projectPath}/${file}`;
276
+ if (existsSync(fullPath)) {
277
+ found.push({
278
+ type,
279
+ file,
280
+ path: fullPath
281
+ });
282
+ }
283
+ }
284
+ }
285
+
286
+ return found;
287
+ }
288
+
289
+ export default {
290
+ isGeneratedPath,
291
+ isGeneratedContent,
292
+ isGeneratedFile,
293
+ filterGeneratedFiles,
294
+ getPatternsForCodegenType,
295
+ findCodegenConfigs,
296
+ DEFAULT_GENERATED_PATTERNS
297
+ };
@@ -0,0 +1,60 @@
1
+ // src/scanner/analysers/imports.mjs
2
+ // Import/export graph analysis
3
+
4
+ /**
5
+ * Analyse import relationships
6
+ */
7
+ export async function analyseImports(jsAnalysis, onProgress = () => {}) {
8
+ const graph = new Map();
9
+ const usedPackages = new Set();
10
+ const unusedExports = [];
11
+ const total = jsAnalysis.length;
12
+
13
+ for (let i = 0; i < jsAnalysis.length; i++) {
14
+ const file = jsAnalysis[i];
15
+
16
+ // Report progress every 2 files and yield to event loop
17
+ if (i % 2 === 0 || i === total - 1) {
18
+ onProgress({ current: i + 1, total, file: file.file?.relativePath || file.file });
19
+ await new Promise(resolve => setImmediate(resolve));
20
+ }
21
+ const filePath = file.file?.relativePath || file.file;
22
+
23
+ // Track imports
24
+ for (const imp of file.imports || []) {
25
+ const module = imp.module;
26
+ if (typeof module !== "string") continue;
27
+
28
+ // Track npm packages
29
+ if (!module.startsWith('.') && !module.startsWith('/')) {
30
+ const packageName = module.startsWith('@')
31
+ ? module.split('/').slice(0, 2).join('/')
32
+ : module.split('/')[0];
33
+ usedPackages.add(packageName);
34
+ }
35
+
36
+ // Build graph
37
+ if (!graph.has(filePath)) {
38
+ graph.set(filePath, { imports: [], exports: [], importedBy: [] });
39
+ }
40
+ graph.get(filePath).imports.push(module);
41
+ }
42
+
43
+ // Track exports
44
+ for (const exp of file.exports || []) {
45
+ if (!graph.has(filePath)) {
46
+ graph.set(filePath, { imports: [], exports: [], importedBy: [] });
47
+ }
48
+ graph.get(filePath).exports.push(exp);
49
+ }
50
+ }
51
+
52
+ return {
53
+ graph,
54
+ usedPackages,
55
+ unusedExports,
56
+ fileCount: graph.size
57
+ };
58
+ }
59
+
60
+ export default { analyseImports };
@@ -0,0 +1,240 @@
1
+ // src/scanner/discovery.mjs
2
+ // File discovery utilities
3
+
4
+ import { statSync, existsSync, readFileSync } from 'fs';
5
+ import { join, extname, relative } from 'path';
6
+ import { glob } from 'glob';
7
+
8
+ /**
9
+ * Parse .gitmodules file to extract submodule paths
10
+ * @param {string} projectPath - Project root path
11
+ * @returns {string[]} - Array of submodule paths (as glob patterns)
12
+ */
13
+ function getGitSubmodulePaths(projectPath) {
14
+ const gitmodulesPath = join(projectPath, '.gitmodules');
15
+ if (!existsSync(gitmodulesPath)) return [];
16
+
17
+ const submodulePaths = [];
18
+ try {
19
+ const content = readFileSync(gitmodulesPath, 'utf-8');
20
+ // Match: path = vendor/shared-lib
21
+ const pathMatches = content.matchAll(/^\s*path\s*=\s*(.+)$/gm);
22
+ for (const match of pathMatches) {
23
+ const submodulePath = match[1].trim();
24
+ // Add as glob pattern to exclude entire directory
25
+ submodulePaths.push(`${submodulePath}/**`);
26
+ }
27
+ } catch {
28
+ // Ignore parse errors
29
+ }
30
+ return submodulePaths;
31
+ }
32
+
33
+ const DEFAULT_EXCLUDE = [
34
+ '**/node_modules/**',
35
+ '**/bower_components/**',
36
+ '**/jspm_packages/**',
37
+ '**/web_modules/**',
38
+ '**/.git/**',
39
+ '**/dist/**',
40
+ '**/build/**',
41
+ '**/.swynx-quarantine/**',
42
+ '**/coverage/**',
43
+ '**/*.min.js',
44
+ '**/*.min.css',
45
+ // Vendored third-party code
46
+ '**/third_party/**',
47
+ '**/3rdparty/**',
48
+ '**/vendor/**',
49
+ // Exclude log directories and files (can be huge, not code)
50
+ '**/logs/**',
51
+ '**/log/**',
52
+ '**/*.log',
53
+ // Exclude temp/cache directories
54
+ '**/tmp/**',
55
+ '**/temp/**',
56
+ '**/.cache/**',
57
+ '**/cache/**',
58
+ // Exclude Python cache
59
+ '**/__pycache__/**',
60
+ '**/*.pyc',
61
+ '**/*.pyo',
62
+ // Exclude other common non-JS caches
63
+ '**/.pytest_cache/**',
64
+ '**/.mypy_cache/**',
65
+ // Exclude data files
66
+ '**/*.sql',
67
+ '**/*.sqlite',
68
+ '**/*.sqlite3',
69
+ '**/*.db',
70
+ // Exclude large binary/media (analyzed separately via assets)
71
+ '**/*.mp4',
72
+ '**/*.mov',
73
+ '**/*.avi',
74
+ '**/*.zip',
75
+ '**/*.tar',
76
+ '**/*.gz',
77
+ '**/*.rar',
78
+ // Test fixture / baseline directories
79
+ '**/testdata/**',
80
+ '**/test-data/**',
81
+ '**/test_data/**',
82
+ '**/fixtures/**',
83
+ '**/fixture/**',
84
+ '**/TestData/**',
85
+ '**/test-cases/**',
86
+ '**/test_cases/**',
87
+ '**/testcases/**',
88
+ '**/conformance/**',
89
+ '**/test-fixture/**',
90
+ '**/tests/baselines/**',
91
+ '**/test/baselines/**',
92
+ // Compiler test input directories
93
+ '**/cases/**/*.ts',
94
+ '**/test/cases/**',
95
+ // C# intermediate / compiled output
96
+ '**/obj/**',
97
+ '**/bin/Debug/**',
98
+ '**/bin/Release/**',
99
+ // C# scaffolding baselines (test-generated output)
100
+ '**/Scaffolding/Baselines/**',
101
+ // Rust compiler test inputs (standalone files compiled by test harness, not source code)
102
+ '**/tests/ui/**', '**/tests/derive_ui/**', '**/tests/compile-fail/**',
103
+ '**/tests/run-pass/**', '**/tests/run-fail/**', '**/tests/ui-fulldeps/**',
104
+ '**/tests/pretty/**', '**/tests/mir-opt/**', '**/tests/assembly/**',
105
+ '**/tests/codegen/**', '**/tests/debuginfo/**', '**/tests/incremental/**',
106
+ '**/tests/codegen-llvm/**', '**/tests/rustdoc-html/**', '**/tests/crashes/**',
107
+ '**/tests/assembly-llvm/**', '**/tests/rustdoc-ui/**', '**/tests/rustdoc-js/**',
108
+ '**/tests/rustdoc-json/**', '**/tests/codegen-units/**', '**/tests/coverage-run-rustdoc/**',
109
+ // Cypress/E2E system test fixture projects (standalone apps used as test targets)
110
+ '**/system-tests/projects/**', '**/system-tests/project-fixtures/**',
111
+ // RustPython test snippet inputs
112
+ '**/extra_tests/snippets/**',
113
+ // Python stdlib copies (RustPython, cpython)
114
+ '**/Lib/encodings/**',
115
+ // Python vendored third-party code
116
+ '**/_vendor/**', '**/_distutils/**',
117
+ // Compiled/bundled static assets (Phoenix/Elixir)
118
+ '**/static/assets/**',
119
+ // Generated protobuf/gRPC output directories
120
+ '**/gen/proto/**',
121
+ // Snapshots
122
+ '**/__snapshots__/**',
123
+ '**/snapshots/**',
124
+ ];
125
+
126
+ /**
127
+ * Discover all files in project
128
+ */
129
+ export async function discoverFiles(projectPath, options = {}) {
130
+ // Get git submodule paths to exclude
131
+ const submodulePaths = getGitSubmodulePaths(projectPath);
132
+
133
+ // Combine default excludes with submodule paths
134
+ const exclude = [...(options.exclude || DEFAULT_EXCLUDE), ...submodulePaths];
135
+ const include = options.include || ['**/*'];
136
+ const onProgress = options.onProgress || (() => {});
137
+
138
+ const files = [];
139
+
140
+ // Report that we're starting the glob (this allows heartbeat to show activity)
141
+ onProgress({ current: 0, total: 0, file: 'Scanning directory structure...' });
142
+
143
+ for (const pattern of include) {
144
+ // Use async glob to allow event loop to run (enables heartbeat during large scans)
145
+ const matches = await glob(pattern, {
146
+ cwd: projectPath,
147
+ ignore: exclude,
148
+ nodir: true,
149
+ absolute: false
150
+ });
151
+
152
+ const total = matches.length;
153
+ let processed = 0;
154
+
155
+ // Report that glob is complete, now processing files
156
+ onProgress({ current: 0, total, file: `Found ${total} files, processing...` });
157
+
158
+ for (const match of matches) {
159
+ const fullPath = join(projectPath, match);
160
+ if (existsSync(fullPath)) {
161
+ try {
162
+ const stats = statSync(fullPath);
163
+ files.push({
164
+ path: fullPath,
165
+ relativePath: match,
166
+ size: stats.size,
167
+ mtime: stats.mtime.toISOString(),
168
+ ext: extname(match).toLowerCase()
169
+ });
170
+ } catch (e) {
171
+ // Skip files we can't stat
172
+ }
173
+ }
174
+
175
+ processed++;
176
+ // Report progress every 500 files (was every 2 — 50K setImmediate calls for 100K files)
177
+ if (processed % 500 === 0 || processed === total) {
178
+ onProgress({ current: processed, total, file: match });
179
+ // Yield to event loop to allow heartbeat to fire
180
+ await new Promise(resolve => setImmediate(resolve));
181
+ }
182
+ }
183
+ }
184
+
185
+ return files;
186
+ }
187
+
188
+ /**
189
+ * Categorize files by type
190
+ */
191
+ export function categoriseFiles(files) {
192
+ const categories = {
193
+ javascript: [],
194
+ python: [],
195
+ java: [],
196
+ kotlin: [],
197
+ csharp: [],
198
+ go: [],
199
+ rust: [],
200
+ css: [],
201
+ assets: [],
202
+ other: []
203
+ };
204
+
205
+ for (const file of files) {
206
+ const ext = file.ext;
207
+ if (['.js', '.mjs', '.cjs', '.jsx', '.ts', '.mts', '.cts', '.tsx', '.vue', '.svelte'].includes(ext)) {
208
+ categories.javascript.push(file);
209
+ } else if (['.py', '.pyi'].includes(ext)) {
210
+ categories.python.push(file);
211
+ } else if (['.java'].includes(ext)) {
212
+ categories.java.push(file);
213
+ } else if (['.kt', '.kts'].includes(ext)) {
214
+ categories.kotlin.push(file);
215
+ } else if (['.cs'].includes(ext)) {
216
+ categories.csharp.push(file);
217
+ } else if (['.go'].includes(ext)) {
218
+ categories.go.push(file);
219
+ } else if (['.rs'].includes(ext)) {
220
+ categories.rust.push(file);
221
+ } else if (['.css', '.scss', '.sass', '.less'].includes(ext)) {
222
+ categories.css.push(file);
223
+ } else if (['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.woff', '.woff2', '.ttf', '.eot'].includes(ext)) {
224
+ categories.assets.push(file);
225
+ } else {
226
+ categories.other.push(file);
227
+ }
228
+ }
229
+
230
+ return categories;
231
+ }
232
+
233
+ /**
234
+ * Get total size of files
235
+ */
236
+ export function getTotalSize(files) {
237
+ return files.reduce((sum, f) => sum + (f.size || 0), 0);
238
+ }
239
+
240
+ export default { discoverFiles, categoriseFiles, getTotalSize };
@@ -0,0 +1,82 @@
1
+ // src/scanner/parse-worker.mjs
2
+ // Worker thread for parallel file parsing
3
+ // Receives a chunk of files and a parser type, returns parsed results
4
+
5
+ import { parentPort, workerData } from 'worker_threads';
6
+
7
+ const { files, parserType } = workerData;
8
+
9
+ const BATCH_SIZE = 200; // Send results in batches to reduce structured clone overhead
10
+
11
+ async function run() {
12
+ let parseFn;
13
+
14
+ switch (parserType) {
15
+ case 'javascript': {
16
+ const mod = await import('./parsers/javascript.mjs');
17
+ parseFn = mod.parseJavaScript;
18
+ break;
19
+ }
20
+ case 'css': {
21
+ const mod = await import('./parsers/css.mjs');
22
+ parseFn = mod.parseCSS;
23
+ break;
24
+ }
25
+ case 'assets': {
26
+ const mod = await import('./parsers/assets.mjs');
27
+ parseFn = mod.analyseAssets;
28
+ break;
29
+ }
30
+ case 'other': {
31
+ const mod = await import('./parsers/registry.mjs');
32
+ parseFn = mod.parseFile;
33
+ break;
34
+ }
35
+ default:
36
+ throw new Error(`Unknown parser type: ${parserType}`);
37
+ }
38
+
39
+ const batch = [];
40
+
41
+ for (let i = 0; i < files.length; i++) {
42
+ try {
43
+ const result = await parseFn(files[i]);
44
+ if (result) {
45
+ // B1: Strip content and function/method bodies before postMessage
46
+ // Content is only needed for DI/C# analysis in deadcode.mjs — those will re-read from disk
47
+ result.content = null;
48
+ if (result.functions) {
49
+ for (const fn of result.functions) { fn.body = undefined; }
50
+ }
51
+ if (result.classes) {
52
+ for (const cls of result.classes) {
53
+ cls.body = undefined;
54
+ if (cls.methods) {
55
+ for (const m of cls.methods) { m.body = undefined; }
56
+ }
57
+ }
58
+ }
59
+ batch.push(result);
60
+ }
61
+ } catch {
62
+ // Skip files that fail to parse
63
+ }
64
+
65
+ // Report progress every 100 files
66
+ if ((i + 1) % 100 === 0 || i === files.length - 1) {
67
+ parentPort.postMessage({ type: 'progress', done: i + 1, total: files.length });
68
+ }
69
+
70
+ // Send results in batches of BATCH_SIZE to reduce peak structured clone memory
71
+ if (batch.length >= BATCH_SIZE) {
72
+ parentPort.postMessage({ type: 'batch', results: batch.splice(0) });
73
+ }
74
+ }
75
+
76
+ // Send any remaining results and signal completion
77
+ parentPort.postMessage({ type: 'done', results: batch });
78
+ }
79
+
80
+ run().catch(err => {
81
+ parentPort.postMessage({ type: 'error', message: err.message });
82
+ });