swynx-lite 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -0
- package/bin/swynx-lite +3 -0
- package/package.json +47 -0
- package/src/clean.mjs +280 -0
- package/src/cli.mjs +264 -0
- package/src/config.mjs +121 -0
- package/src/output/console.mjs +298 -0
- package/src/output/json.mjs +76 -0
- package/src/output/progress.mjs +57 -0
- package/src/scan.mjs +143 -0
- package/src/security.mjs +62 -0
- package/src/shared/fixer/barrel-cleaner.mjs +192 -0
- package/src/shared/fixer/import-cleaner.mjs +237 -0
- package/src/shared/fixer/quarantine.mjs +218 -0
- package/src/shared/scanner/analysers/buildSystems.mjs +647 -0
- package/src/shared/scanner/analysers/configParsers.mjs +1086 -0
- package/src/shared/scanner/analysers/deadcode.mjs +6194 -0
- package/src/shared/scanner/analysers/entryPointDetector.mjs +634 -0
- package/src/shared/scanner/analysers/generatedCode.mjs +297 -0
- package/src/shared/scanner/analysers/imports.mjs +60 -0
- package/src/shared/scanner/discovery.mjs +240 -0
- package/src/shared/scanner/parse-worker.mjs +82 -0
- package/src/shared/scanner/parsers/assets.mjs +44 -0
- package/src/shared/scanner/parsers/csharp.mjs +400 -0
- package/src/shared/scanner/parsers/css.mjs +60 -0
- package/src/shared/scanner/parsers/go.mjs +445 -0
- package/src/shared/scanner/parsers/java.mjs +364 -0
- package/src/shared/scanner/parsers/javascript.mjs +823 -0
- package/src/shared/scanner/parsers/kotlin.mjs +350 -0
- package/src/shared/scanner/parsers/python.mjs +497 -0
- package/src/shared/scanner/parsers/registry.mjs +233 -0
- package/src/shared/scanner/parsers/rust.mjs +427 -0
- package/src/shared/scanner/scan-dead-code.mjs +316 -0
- package/src/shared/security/patterns.mjs +349 -0
- package/src/shared/security/proximity.mjs +84 -0
- package/src/shared/security/scanner.mjs +269 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
// src/scanner/analysers/generatedCode.mjs
|
|
2
|
+
// Detection and exclusion of generated code files
|
|
3
|
+
|
|
4
|
+
import { readFileSync, existsSync } from 'fs';
|
|
5
|
+
import { basename, dirname } from 'path';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Default patterns for generated code files
|
|
9
|
+
* These files should be excluded from dead code analysis
|
|
10
|
+
*/
|
|
11
|
+
export const DEFAULT_GENERATED_PATTERNS = [
|
|
12
|
+
// JavaScript/TypeScript
|
|
13
|
+
/\.generated\.(ts|tsx|js|jsx|mjs)$/,
|
|
14
|
+
/\.g\.(ts|js)$/,
|
|
15
|
+
/\/generated\//,
|
|
16
|
+
/\/__generated__\//,
|
|
17
|
+
/\/codegen\//,
|
|
18
|
+
|
|
19
|
+
// GraphQL
|
|
20
|
+
/\/graphql\.(ts|tsx|js)$/,
|
|
21
|
+
/\/gql\.(ts|tsx|js)$/,
|
|
22
|
+
/\.graphql\.(ts|tsx|js)$/,
|
|
23
|
+
/types\.generated\.(ts|tsx|js)$/,
|
|
24
|
+
/\/__graphql__\//,
|
|
25
|
+
|
|
26
|
+
// Protocol Buffers
|
|
27
|
+
/_pb\.(js|d\.ts)$/,
|
|
28
|
+
/_pb2\.py$/,
|
|
29
|
+
/_pb2_grpc\.py$/,
|
|
30
|
+
/\.pb\.(go|cc|h)$/,
|
|
31
|
+
|
|
32
|
+
// OpenAPI/Swagger
|
|
33
|
+
/\/api-client\//,
|
|
34
|
+
/\/swagger-client\//,
|
|
35
|
+
/\/openapi\/.*\.generated\./,
|
|
36
|
+
|
|
37
|
+
// Java build outputs
|
|
38
|
+
/\/target\/generated-sources\//,
|
|
39
|
+
/\/target\/generated-test-sources\//,
|
|
40
|
+
/\/build\/generated\//,
|
|
41
|
+
/\/build\/generated-sources\//,
|
|
42
|
+
/_\.java$/, // MapStruct generated
|
|
43
|
+
|
|
44
|
+
// .NET build outputs
|
|
45
|
+
/\/obj\//,
|
|
46
|
+
/\.Designer\.cs$/,
|
|
47
|
+
/\.g\.cs$/,
|
|
48
|
+
/\.g\.i\.cs$/,
|
|
49
|
+
/\/Migrations\/.*\.cs$/,
|
|
50
|
+
/^Migrations\/.*\.cs$/, // Migrations at repo root (relative paths)
|
|
51
|
+
/\.AssemblyAttributes\.cs$/, // Auto-generated assembly attributes
|
|
52
|
+
/GlobalSuppressions\.cs$/, // Auto-generated code analysis suppressions
|
|
53
|
+
|
|
54
|
+
// Go generated
|
|
55
|
+
/_gen\.go$/,
|
|
56
|
+
/mock_.*\.go$/,
|
|
57
|
+
/.*_mock\.go$/,
|
|
58
|
+
/\/mocks\/.*\.go$/,
|
|
59
|
+
/_string\.go$/, // stringer
|
|
60
|
+
|
|
61
|
+
// Rust
|
|
62
|
+
/\.rs\.bk$/,
|
|
63
|
+
|
|
64
|
+
// Build outputs (all languages)
|
|
65
|
+
/\/dist\//,
|
|
66
|
+
/\/build\//,
|
|
67
|
+
/\/out\//,
|
|
68
|
+
/\/output\//,
|
|
69
|
+
/\/.next\//,
|
|
70
|
+
/\/.nuxt\//,
|
|
71
|
+
/\/.output\//,
|
|
72
|
+
/\/node_modules\//,
|
|
73
|
+
/\/vendor\//,
|
|
74
|
+
|
|
75
|
+
// Bazel outputs
|
|
76
|
+
/\/bazel-bin\//,
|
|
77
|
+
/\/bazel-out\//,
|
|
78
|
+
/\/bazel-testlogs\//
|
|
79
|
+
];
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Header comments that indicate generated code
|
|
83
|
+
*/
|
|
84
|
+
const GENERATED_HEADERS = [
|
|
85
|
+
/^\/\/ Code generated .* DO NOT EDIT/i,
|
|
86
|
+
/^\/\/ AUTO-GENERATED/i,
|
|
87
|
+
/^\/\/ GENERATED CODE/i,
|
|
88
|
+
/^\/\/ This file was auto-?generated/i,
|
|
89
|
+
/^# Generated by/i,
|
|
90
|
+
/^# DO NOT EDIT/i,
|
|
91
|
+
/^\s*\* @generated/,
|
|
92
|
+
/^\/\*\s*eslint-disable\s*\*\//, // Often in generated files
|
|
93
|
+
/^\/\/ @ts-nocheck/, // Often in generated files
|
|
94
|
+
/@generated/,
|
|
95
|
+
/DO NOT EDIT THIS FILE/i,
|
|
96
|
+
/This file is auto-?generated/i,
|
|
97
|
+
/Generated from /i
|
|
98
|
+
];
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Check if a file path matches generated code patterns
|
|
102
|
+
* @param {string} filePath - File path to check
|
|
103
|
+
* @param {RegExp[]} customPatterns - Additional patterns to check
|
|
104
|
+
* @returns {Object} - { isGenerated: boolean, matchedPattern: string|null }
|
|
105
|
+
*/
|
|
106
|
+
export function isGeneratedPath(filePath, customPatterns = []) {
|
|
107
|
+
const allPatterns = [...DEFAULT_GENERATED_PATTERNS, ...customPatterns];
|
|
108
|
+
|
|
109
|
+
for (const pattern of allPatterns) {
|
|
110
|
+
if (pattern.test(filePath)) {
|
|
111
|
+
return {
|
|
112
|
+
isGenerated: true,
|
|
113
|
+
matchedPattern: pattern.toString(),
|
|
114
|
+
reason: 'path'
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return { isGenerated: false, matchedPattern: null };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Check if file content indicates generated code (by header comments)
|
|
124
|
+
* @param {string} content - File content
|
|
125
|
+
* @param {number} linesToCheck - Number of lines to check from start (default 20)
|
|
126
|
+
* @returns {Object} - { isGenerated: boolean, matchedPattern: string|null }
|
|
127
|
+
*/
|
|
128
|
+
export function isGeneratedContent(content, linesToCheck = 20) {
|
|
129
|
+
if (!content) return { isGenerated: false };
|
|
130
|
+
|
|
131
|
+
const lines = content.split('\n').slice(0, linesToCheck);
|
|
132
|
+
const headerText = lines.join('\n');
|
|
133
|
+
|
|
134
|
+
for (const pattern of GENERATED_HEADERS) {
|
|
135
|
+
if (pattern.test(headerText)) {
|
|
136
|
+
return {
|
|
137
|
+
isGenerated: true,
|
|
138
|
+
matchedPattern: pattern.toString(),
|
|
139
|
+
reason: 'header'
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return { isGenerated: false, matchedPattern: null };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Check if a file is generated (by path or content)
|
|
149
|
+
* @param {string} filePath - File path
|
|
150
|
+
* @param {string} [content] - File content (optional, will be read if not provided)
|
|
151
|
+
* @param {Object} options - Options
|
|
152
|
+
* @returns {Object} - { isGenerated: boolean, reason: string|null }
|
|
153
|
+
*/
|
|
154
|
+
export function isGeneratedFile(filePath, content = null, options = {}) {
|
|
155
|
+
const { customPatterns = [], checkContent = true } = options;
|
|
156
|
+
|
|
157
|
+
// First check path
|
|
158
|
+
const pathCheck = isGeneratedPath(filePath, customPatterns);
|
|
159
|
+
if (pathCheck.isGenerated) {
|
|
160
|
+
return pathCheck;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Then check content if requested
|
|
164
|
+
if (checkContent) {
|
|
165
|
+
let fileContent = content;
|
|
166
|
+
if (!fileContent && existsSync(filePath)) {
|
|
167
|
+
try {
|
|
168
|
+
fileContent = readFileSync(filePath, 'utf-8');
|
|
169
|
+
} catch {
|
|
170
|
+
// Can't read file, assume not generated
|
|
171
|
+
return { isGenerated: false };
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (fileContent) {
|
|
176
|
+
const contentCheck = isGeneratedContent(fileContent);
|
|
177
|
+
if (contentCheck.isGenerated) {
|
|
178
|
+
return contentCheck;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
return { isGenerated: false, reason: null };
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Filter out generated files from a list
|
|
188
|
+
* @param {Array} files - Array of file objects with path/relativePath
|
|
189
|
+
* @param {Object} options - Options
|
|
190
|
+
* @returns {Object} - { included: Array, excluded: Array }
|
|
191
|
+
*/
|
|
192
|
+
export function filterGeneratedFiles(files, options = {}) {
|
|
193
|
+
const included = [];
|
|
194
|
+
const excluded = [];
|
|
195
|
+
|
|
196
|
+
for (const file of files) {
|
|
197
|
+
const filePath = file.relativePath || file.path || file;
|
|
198
|
+
const content = file.content || null;
|
|
199
|
+
|
|
200
|
+
const check = isGeneratedFile(filePath, content, options);
|
|
201
|
+
if (check.isGenerated) {
|
|
202
|
+
excluded.push({
|
|
203
|
+
file: filePath,
|
|
204
|
+
reason: check.reason,
|
|
205
|
+
pattern: check.matchedPattern
|
|
206
|
+
});
|
|
207
|
+
} else {
|
|
208
|
+
included.push(file);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return { included, excluded };
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Get patterns for a specific codegen type
|
|
217
|
+
* @param {string} type - Codegen type (graphql, protobuf, openapi, etc.)
|
|
218
|
+
* @returns {RegExp[]} - Patterns for that type
|
|
219
|
+
*/
|
|
220
|
+
export function getPatternsForCodegenType(type) {
|
|
221
|
+
const typePatterns = {
|
|
222
|
+
graphql: [
|
|
223
|
+
/\.graphql\.(ts|tsx|js)$/,
|
|
224
|
+
/\/graphql\.(ts|tsx|js)$/,
|
|
225
|
+
/\/gql\.(ts|tsx|js)$/,
|
|
226
|
+
/types\.generated\.(ts|tsx|js)$/,
|
|
227
|
+
/\/__generated__\//,
|
|
228
|
+
/operations\.(ts|tsx|js)$/
|
|
229
|
+
],
|
|
230
|
+
protobuf: [
|
|
231
|
+
/_pb\.(js|d\.ts)$/,
|
|
232
|
+
/_pb2\.py$/,
|
|
233
|
+
/_pb2_grpc\.py$/,
|
|
234
|
+
/\.pb\.(go|cc|h)$/,
|
|
235
|
+
/\.pb\.ts$/
|
|
236
|
+
],
|
|
237
|
+
openapi: [
|
|
238
|
+
/\/api-client\//,
|
|
239
|
+
/\/swagger-client\//,
|
|
240
|
+
/\/openapi\/.*\.generated\./,
|
|
241
|
+
/api\.generated\.(ts|js)$/
|
|
242
|
+
],
|
|
243
|
+
thrift: [
|
|
244
|
+
/_types\.(js|ts)$/,
|
|
245
|
+
/\.thrift\.ts$/
|
|
246
|
+
],
|
|
247
|
+
grpc: [
|
|
248
|
+
/_grpc_pb\.(js|ts)$/,
|
|
249
|
+
/_grpc\.pb\.go$/
|
|
250
|
+
]
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
return typePatterns[type] || [];
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Find codegen config files in a project
|
|
258
|
+
* @param {string} projectPath - Path to project root
|
|
259
|
+
* @param {Object} codegenConfigs - Config file patterns by type
|
|
260
|
+
* @returns {Array} - Found config files
|
|
261
|
+
*/
|
|
262
|
+
export function findCodegenConfigs(projectPath, codegenConfigs = {}) {
|
|
263
|
+
const defaults = {
|
|
264
|
+
graphql: ['codegen.yml', 'codegen.yaml', 'codegen.ts', 'codegen.js', '.graphqlrc.yml', '.graphqlrc.json'],
|
|
265
|
+
protobuf: ['buf.yaml', 'buf.gen.yaml', 'buf.work.yaml'],
|
|
266
|
+
openapi: ['openapi.yaml', 'openapi.yml', 'openapi.json', 'swagger.yaml', 'swagger.json'],
|
|
267
|
+
grpc: ['grpc-tools.config.js']
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
const configs = { ...defaults, ...codegenConfigs };
|
|
271
|
+
const found = [];
|
|
272
|
+
|
|
273
|
+
for (const [type, files] of Object.entries(configs)) {
|
|
274
|
+
for (const file of files) {
|
|
275
|
+
const fullPath = `${projectPath}/${file}`;
|
|
276
|
+
if (existsSync(fullPath)) {
|
|
277
|
+
found.push({
|
|
278
|
+
type,
|
|
279
|
+
file,
|
|
280
|
+
path: fullPath
|
|
281
|
+
});
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return found;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
export default {
|
|
290
|
+
isGeneratedPath,
|
|
291
|
+
isGeneratedContent,
|
|
292
|
+
isGeneratedFile,
|
|
293
|
+
filterGeneratedFiles,
|
|
294
|
+
getPatternsForCodegenType,
|
|
295
|
+
findCodegenConfigs,
|
|
296
|
+
DEFAULT_GENERATED_PATTERNS
|
|
297
|
+
};
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
// src/scanner/analysers/imports.mjs
|
|
2
|
+
// Import/export graph analysis
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Analyse import relationships
|
|
6
|
+
*/
|
|
7
|
+
export async function analyseImports(jsAnalysis, onProgress = () => {}) {
|
|
8
|
+
const graph = new Map();
|
|
9
|
+
const usedPackages = new Set();
|
|
10
|
+
const unusedExports = [];
|
|
11
|
+
const total = jsAnalysis.length;
|
|
12
|
+
|
|
13
|
+
for (let i = 0; i < jsAnalysis.length; i++) {
|
|
14
|
+
const file = jsAnalysis[i];
|
|
15
|
+
|
|
16
|
+
// Report progress every 2 files and yield to event loop
|
|
17
|
+
if (i % 2 === 0 || i === total - 1) {
|
|
18
|
+
onProgress({ current: i + 1, total, file: file.file?.relativePath || file.file });
|
|
19
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
20
|
+
}
|
|
21
|
+
const filePath = file.file?.relativePath || file.file;
|
|
22
|
+
|
|
23
|
+
// Track imports
|
|
24
|
+
for (const imp of file.imports || []) {
|
|
25
|
+
const module = imp.module;
|
|
26
|
+
if (typeof module !== "string") continue;
|
|
27
|
+
|
|
28
|
+
// Track npm packages
|
|
29
|
+
if (!module.startsWith('.') && !module.startsWith('/')) {
|
|
30
|
+
const packageName = module.startsWith('@')
|
|
31
|
+
? module.split('/').slice(0, 2).join('/')
|
|
32
|
+
: module.split('/')[0];
|
|
33
|
+
usedPackages.add(packageName);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Build graph
|
|
37
|
+
if (!graph.has(filePath)) {
|
|
38
|
+
graph.set(filePath, { imports: [], exports: [], importedBy: [] });
|
|
39
|
+
}
|
|
40
|
+
graph.get(filePath).imports.push(module);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Track exports
|
|
44
|
+
for (const exp of file.exports || []) {
|
|
45
|
+
if (!graph.has(filePath)) {
|
|
46
|
+
graph.set(filePath, { imports: [], exports: [], importedBy: [] });
|
|
47
|
+
}
|
|
48
|
+
graph.get(filePath).exports.push(exp);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
graph,
|
|
54
|
+
usedPackages,
|
|
55
|
+
unusedExports,
|
|
56
|
+
fileCount: graph.size
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export default { analyseImports };
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// src/scanner/discovery.mjs
|
|
2
|
+
// File discovery utilities
|
|
3
|
+
|
|
4
|
+
import { statSync, existsSync, readFileSync } from 'fs';
|
|
5
|
+
import { join, extname, relative } from 'path';
|
|
6
|
+
import { glob } from 'glob';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Parse .gitmodules file to extract submodule paths
|
|
10
|
+
* @param {string} projectPath - Project root path
|
|
11
|
+
* @returns {string[]} - Array of submodule paths (as glob patterns)
|
|
12
|
+
*/
|
|
13
|
+
function getGitSubmodulePaths(projectPath) {
|
|
14
|
+
const gitmodulesPath = join(projectPath, '.gitmodules');
|
|
15
|
+
if (!existsSync(gitmodulesPath)) return [];
|
|
16
|
+
|
|
17
|
+
const submodulePaths = [];
|
|
18
|
+
try {
|
|
19
|
+
const content = readFileSync(gitmodulesPath, 'utf-8');
|
|
20
|
+
// Match: path = vendor/shared-lib
|
|
21
|
+
const pathMatches = content.matchAll(/^\s*path\s*=\s*(.+)$/gm);
|
|
22
|
+
for (const match of pathMatches) {
|
|
23
|
+
const submodulePath = match[1].trim();
|
|
24
|
+
// Add as glob pattern to exclude entire directory
|
|
25
|
+
submodulePaths.push(`${submodulePath}/**`);
|
|
26
|
+
}
|
|
27
|
+
} catch {
|
|
28
|
+
// Ignore parse errors
|
|
29
|
+
}
|
|
30
|
+
return submodulePaths;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const DEFAULT_EXCLUDE = [
|
|
34
|
+
'**/node_modules/**',
|
|
35
|
+
'**/bower_components/**',
|
|
36
|
+
'**/jspm_packages/**',
|
|
37
|
+
'**/web_modules/**',
|
|
38
|
+
'**/.git/**',
|
|
39
|
+
'**/dist/**',
|
|
40
|
+
'**/build/**',
|
|
41
|
+
'**/.swynx-quarantine/**',
|
|
42
|
+
'**/coverage/**',
|
|
43
|
+
'**/*.min.js',
|
|
44
|
+
'**/*.min.css',
|
|
45
|
+
// Vendored third-party code
|
|
46
|
+
'**/third_party/**',
|
|
47
|
+
'**/3rdparty/**',
|
|
48
|
+
'**/vendor/**',
|
|
49
|
+
// Exclude log directories and files (can be huge, not code)
|
|
50
|
+
'**/logs/**',
|
|
51
|
+
'**/log/**',
|
|
52
|
+
'**/*.log',
|
|
53
|
+
// Exclude temp/cache directories
|
|
54
|
+
'**/tmp/**',
|
|
55
|
+
'**/temp/**',
|
|
56
|
+
'**/.cache/**',
|
|
57
|
+
'**/cache/**',
|
|
58
|
+
// Exclude Python cache
|
|
59
|
+
'**/__pycache__/**',
|
|
60
|
+
'**/*.pyc',
|
|
61
|
+
'**/*.pyo',
|
|
62
|
+
// Exclude other common non-JS caches
|
|
63
|
+
'**/.pytest_cache/**',
|
|
64
|
+
'**/.mypy_cache/**',
|
|
65
|
+
// Exclude data files
|
|
66
|
+
'**/*.sql',
|
|
67
|
+
'**/*.sqlite',
|
|
68
|
+
'**/*.sqlite3',
|
|
69
|
+
'**/*.db',
|
|
70
|
+
// Exclude large binary/media (analyzed separately via assets)
|
|
71
|
+
'**/*.mp4',
|
|
72
|
+
'**/*.mov',
|
|
73
|
+
'**/*.avi',
|
|
74
|
+
'**/*.zip',
|
|
75
|
+
'**/*.tar',
|
|
76
|
+
'**/*.gz',
|
|
77
|
+
'**/*.rar',
|
|
78
|
+
// Test fixture / baseline directories
|
|
79
|
+
'**/testdata/**',
|
|
80
|
+
'**/test-data/**',
|
|
81
|
+
'**/test_data/**',
|
|
82
|
+
'**/fixtures/**',
|
|
83
|
+
'**/fixture/**',
|
|
84
|
+
'**/TestData/**',
|
|
85
|
+
'**/test-cases/**',
|
|
86
|
+
'**/test_cases/**',
|
|
87
|
+
'**/testcases/**',
|
|
88
|
+
'**/conformance/**',
|
|
89
|
+
'**/test-fixture/**',
|
|
90
|
+
'**/tests/baselines/**',
|
|
91
|
+
'**/test/baselines/**',
|
|
92
|
+
// Compiler test input directories
|
|
93
|
+
'**/cases/**/*.ts',
|
|
94
|
+
'**/test/cases/**',
|
|
95
|
+
// C# intermediate / compiled output
|
|
96
|
+
'**/obj/**',
|
|
97
|
+
'**/bin/Debug/**',
|
|
98
|
+
'**/bin/Release/**',
|
|
99
|
+
// C# scaffolding baselines (test-generated output)
|
|
100
|
+
'**/Scaffolding/Baselines/**',
|
|
101
|
+
// Rust compiler test inputs (standalone files compiled by test harness, not source code)
|
|
102
|
+
'**/tests/ui/**', '**/tests/derive_ui/**', '**/tests/compile-fail/**',
|
|
103
|
+
'**/tests/run-pass/**', '**/tests/run-fail/**', '**/tests/ui-fulldeps/**',
|
|
104
|
+
'**/tests/pretty/**', '**/tests/mir-opt/**', '**/tests/assembly/**',
|
|
105
|
+
'**/tests/codegen/**', '**/tests/debuginfo/**', '**/tests/incremental/**',
|
|
106
|
+
'**/tests/codegen-llvm/**', '**/tests/rustdoc-html/**', '**/tests/crashes/**',
|
|
107
|
+
'**/tests/assembly-llvm/**', '**/tests/rustdoc-ui/**', '**/tests/rustdoc-js/**',
|
|
108
|
+
'**/tests/rustdoc-json/**', '**/tests/codegen-units/**', '**/tests/coverage-run-rustdoc/**',
|
|
109
|
+
// Cypress/E2E system test fixture projects (standalone apps used as test targets)
|
|
110
|
+
'**/system-tests/projects/**', '**/system-tests/project-fixtures/**',
|
|
111
|
+
// RustPython test snippet inputs
|
|
112
|
+
'**/extra_tests/snippets/**',
|
|
113
|
+
// Python stdlib copies (RustPython, cpython)
|
|
114
|
+
'**/Lib/encodings/**',
|
|
115
|
+
// Python vendored third-party code
|
|
116
|
+
'**/_vendor/**', '**/_distutils/**',
|
|
117
|
+
// Compiled/bundled static assets (Phoenix/Elixir)
|
|
118
|
+
'**/static/assets/**',
|
|
119
|
+
// Generated protobuf/gRPC output directories
|
|
120
|
+
'**/gen/proto/**',
|
|
121
|
+
// Snapshots
|
|
122
|
+
'**/__snapshots__/**',
|
|
123
|
+
'**/snapshots/**',
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Discover all files in project
|
|
128
|
+
*/
|
|
129
|
+
export async function discoverFiles(projectPath, options = {}) {
|
|
130
|
+
// Get git submodule paths to exclude
|
|
131
|
+
const submodulePaths = getGitSubmodulePaths(projectPath);
|
|
132
|
+
|
|
133
|
+
// Combine default excludes with submodule paths
|
|
134
|
+
const exclude = [...(options.exclude || DEFAULT_EXCLUDE), ...submodulePaths];
|
|
135
|
+
const include = options.include || ['**/*'];
|
|
136
|
+
const onProgress = options.onProgress || (() => {});
|
|
137
|
+
|
|
138
|
+
const files = [];
|
|
139
|
+
|
|
140
|
+
// Report that we're starting the glob (this allows heartbeat to show activity)
|
|
141
|
+
onProgress({ current: 0, total: 0, file: 'Scanning directory structure...' });
|
|
142
|
+
|
|
143
|
+
for (const pattern of include) {
|
|
144
|
+
// Use async glob to allow event loop to run (enables heartbeat during large scans)
|
|
145
|
+
const matches = await glob(pattern, {
|
|
146
|
+
cwd: projectPath,
|
|
147
|
+
ignore: exclude,
|
|
148
|
+
nodir: true,
|
|
149
|
+
absolute: false
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const total = matches.length;
|
|
153
|
+
let processed = 0;
|
|
154
|
+
|
|
155
|
+
// Report that glob is complete, now processing files
|
|
156
|
+
onProgress({ current: 0, total, file: `Found ${total} files, processing...` });
|
|
157
|
+
|
|
158
|
+
for (const match of matches) {
|
|
159
|
+
const fullPath = join(projectPath, match);
|
|
160
|
+
if (existsSync(fullPath)) {
|
|
161
|
+
try {
|
|
162
|
+
const stats = statSync(fullPath);
|
|
163
|
+
files.push({
|
|
164
|
+
path: fullPath,
|
|
165
|
+
relativePath: match,
|
|
166
|
+
size: stats.size,
|
|
167
|
+
mtime: stats.mtime.toISOString(),
|
|
168
|
+
ext: extname(match).toLowerCase()
|
|
169
|
+
});
|
|
170
|
+
} catch (e) {
|
|
171
|
+
// Skip files we can't stat
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
processed++;
|
|
176
|
+
// Report progress every 500 files (was every 2 — 50K setImmediate calls for 100K files)
|
|
177
|
+
if (processed % 500 === 0 || processed === total) {
|
|
178
|
+
onProgress({ current: processed, total, file: match });
|
|
179
|
+
// Yield to event loop to allow heartbeat to fire
|
|
180
|
+
await new Promise(resolve => setImmediate(resolve));
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return files;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Categorize files by type
|
|
190
|
+
*/
|
|
191
|
+
export function categoriseFiles(files) {
|
|
192
|
+
const categories = {
|
|
193
|
+
javascript: [],
|
|
194
|
+
python: [],
|
|
195
|
+
java: [],
|
|
196
|
+
kotlin: [],
|
|
197
|
+
csharp: [],
|
|
198
|
+
go: [],
|
|
199
|
+
rust: [],
|
|
200
|
+
css: [],
|
|
201
|
+
assets: [],
|
|
202
|
+
other: []
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
for (const file of files) {
|
|
206
|
+
const ext = file.ext;
|
|
207
|
+
if (['.js', '.mjs', '.cjs', '.jsx', '.ts', '.mts', '.cts', '.tsx', '.vue', '.svelte'].includes(ext)) {
|
|
208
|
+
categories.javascript.push(file);
|
|
209
|
+
} else if (['.py', '.pyi'].includes(ext)) {
|
|
210
|
+
categories.python.push(file);
|
|
211
|
+
} else if (['.java'].includes(ext)) {
|
|
212
|
+
categories.java.push(file);
|
|
213
|
+
} else if (['.kt', '.kts'].includes(ext)) {
|
|
214
|
+
categories.kotlin.push(file);
|
|
215
|
+
} else if (['.cs'].includes(ext)) {
|
|
216
|
+
categories.csharp.push(file);
|
|
217
|
+
} else if (['.go'].includes(ext)) {
|
|
218
|
+
categories.go.push(file);
|
|
219
|
+
} else if (['.rs'].includes(ext)) {
|
|
220
|
+
categories.rust.push(file);
|
|
221
|
+
} else if (['.css', '.scss', '.sass', '.less'].includes(ext)) {
|
|
222
|
+
categories.css.push(file);
|
|
223
|
+
} else if (['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.woff', '.woff2', '.ttf', '.eot'].includes(ext)) {
|
|
224
|
+
categories.assets.push(file);
|
|
225
|
+
} else {
|
|
226
|
+
categories.other.push(file);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return categories;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Get total size of files
|
|
235
|
+
*/
|
|
236
|
+
export function getTotalSize(files) {
|
|
237
|
+
return files.reduce((sum, f) => sum + (f.size || 0), 0);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
export default { discoverFiles, categoriseFiles, getTotalSize };
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// src/scanner/parse-worker.mjs
|
|
2
|
+
// Worker thread for parallel file parsing
|
|
3
|
+
// Receives a chunk of files and a parser type, returns parsed results
|
|
4
|
+
|
|
5
|
+
import { parentPort, workerData } from 'worker_threads';
|
|
6
|
+
|
|
7
|
+
const { files, parserType } = workerData;
|
|
8
|
+
|
|
9
|
+
const BATCH_SIZE = 200; // Send results in batches to reduce structured clone overhead
|
|
10
|
+
|
|
11
|
+
async function run() {
|
|
12
|
+
let parseFn;
|
|
13
|
+
|
|
14
|
+
switch (parserType) {
|
|
15
|
+
case 'javascript': {
|
|
16
|
+
const mod = await import('./parsers/javascript.mjs');
|
|
17
|
+
parseFn = mod.parseJavaScript;
|
|
18
|
+
break;
|
|
19
|
+
}
|
|
20
|
+
case 'css': {
|
|
21
|
+
const mod = await import('./parsers/css.mjs');
|
|
22
|
+
parseFn = mod.parseCSS;
|
|
23
|
+
break;
|
|
24
|
+
}
|
|
25
|
+
case 'assets': {
|
|
26
|
+
const mod = await import('./parsers/assets.mjs');
|
|
27
|
+
parseFn = mod.analyseAssets;
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
case 'other': {
|
|
31
|
+
const mod = await import('./parsers/registry.mjs');
|
|
32
|
+
parseFn = mod.parseFile;
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
default:
|
|
36
|
+
throw new Error(`Unknown parser type: ${parserType}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const batch = [];
|
|
40
|
+
|
|
41
|
+
for (let i = 0; i < files.length; i++) {
|
|
42
|
+
try {
|
|
43
|
+
const result = await parseFn(files[i]);
|
|
44
|
+
if (result) {
|
|
45
|
+
// B1: Strip content and function/method bodies before postMessage
|
|
46
|
+
// Content is only needed for DI/C# analysis in deadcode.mjs — those will re-read from disk
|
|
47
|
+
result.content = null;
|
|
48
|
+
if (result.functions) {
|
|
49
|
+
for (const fn of result.functions) { fn.body = undefined; }
|
|
50
|
+
}
|
|
51
|
+
if (result.classes) {
|
|
52
|
+
for (const cls of result.classes) {
|
|
53
|
+
cls.body = undefined;
|
|
54
|
+
if (cls.methods) {
|
|
55
|
+
for (const m of cls.methods) { m.body = undefined; }
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
batch.push(result);
|
|
60
|
+
}
|
|
61
|
+
} catch {
|
|
62
|
+
// Skip files that fail to parse
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Report progress every 100 files
|
|
66
|
+
if ((i + 1) % 100 === 0 || i === files.length - 1) {
|
|
67
|
+
parentPort.postMessage({ type: 'progress', done: i + 1, total: files.length });
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Send results in batches of BATCH_SIZE to reduce peak structured clone memory
|
|
71
|
+
if (batch.length >= BATCH_SIZE) {
|
|
72
|
+
parentPort.postMessage({ type: 'batch', results: batch.splice(0) });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Send any remaining results and signal completion
|
|
77
|
+
parentPort.postMessage({ type: 'done', results: batch });
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
run().catch(err => {
|
|
81
|
+
parentPort.postMessage({ type: 'error', message: err.message });
|
|
82
|
+
});
|