@kevinrabun/judges 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -7
- package/dist/api.d.ts +1 -0
- package/dist/api.d.ts.map +1 -1
- package/dist/api.js +2 -0
- package/dist/api.js.map +1 -1
- package/dist/ast/index.d.ts +12 -1
- package/dist/ast/index.d.ts.map +1 -1
- package/dist/ast/index.js +72 -3
- package/dist/ast/index.js.map +1 -1
- package/dist/ast/tree-sitter-ast.d.ts +34 -0
- package/dist/ast/tree-sitter-ast.d.ts.map +1 -0
- package/dist/ast/tree-sitter-ast.js +747 -0
- package/dist/ast/tree-sitter-ast.js.map +1 -0
- package/dist/cli.d.ts +16 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +317 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +13 -15
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +44 -36
- package/dist/index.js.map +1 -1
- package/grammars/tree-sitter-c_sharp.wasm +0 -0
- package/grammars/tree-sitter-go.wasm +0 -0
- package/grammars/tree-sitter-java.wasm +0 -0
- package/grammars/tree-sitter-python.wasm +0 -0
- package/grammars/tree-sitter-rust.wasm +0 -0
- package/package.json +11 -2
- package/server.json +2 -2
|
@@ -0,0 +1,747 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
2
|
+
// Tree-sitter AST — Real syntax-tree analysis for Python, Go, Rust, Java, C#
|
|
3
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
// Uses web-tree-sitter (WASM-based, zero native deps) to parse source code
|
|
5
|
+
// into a full syntax tree, then walks the tree to extract function metrics,
|
|
6
|
+
// dead code, deep nesting, type-safety issues, and imports.
|
|
7
|
+
//
|
|
8
|
+
// This replaces the lightweight structural parser for Tier 2 languages,
|
|
9
|
+
// elevating them from regex-based heuristics to real AST analysis — the
|
|
10
|
+
// same depth the TypeScript Compiler API provides for JS/TS.
|
|
11
|
+
//
|
|
12
|
+
// Graceful degradation: if tree-sitter WASM grammars aren't available at
|
|
13
|
+
// runtime, the caller can fall back to the structural parser.
|
|
14
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
15
|
+
import { createRequire } from "node:module";
|
|
16
|
+
import { fileURLToPath } from "node:url";
|
|
17
|
+
import { dirname, join } from "node:path";
|
|
18
|
+
const require = createRequire(import.meta.url);
|
|
19
|
+
// ─── Lazy Initialization ────────────────────────────────────────────────────
|
|
20
|
+
let initPromise = null;
|
|
21
|
+
let parserModule = null;
|
|
22
|
+
// Grammar file name mapping
|
|
23
|
+
const GRAMMAR_FILES = {
|
|
24
|
+
python: "tree-sitter-python.wasm",
|
|
25
|
+
go: "tree-sitter-go.wasm",
|
|
26
|
+
rust: "tree-sitter-rust.wasm",
|
|
27
|
+
java: "tree-sitter-java.wasm",
|
|
28
|
+
csharp: "tree-sitter-c_sharp.wasm",
|
|
29
|
+
};
|
|
30
|
+
// Cached language instances
|
|
31
|
+
const languageCache = new Map();
|
|
32
|
+
// Resolve grammar directory relative to this module
|
|
33
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
34
|
+
// In development: src/ast/ → ../../grammars/
|
|
35
|
+
// In dist: dist/ast/ → ../../grammars/
|
|
36
|
+
const GRAMMAR_DIR = join(__dirname, "..", "..", "grammars");
|
|
37
|
+
async function ensureInit() {
|
|
38
|
+
if (initPromise)
|
|
39
|
+
return initPromise;
|
|
40
|
+
initPromise = (async () => {
|
|
41
|
+
try {
|
|
42
|
+
const mod = require("web-tree-sitter");
|
|
43
|
+
await mod.Parser.init();
|
|
44
|
+
parserModule = mod;
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
catch {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
})();
|
|
51
|
+
return initPromise;
|
|
52
|
+
}
|
|
53
|
+
async function getLanguage(lang) {
|
|
54
|
+
if (languageCache.has(lang))
|
|
55
|
+
return languageCache.get(lang);
|
|
56
|
+
const file = GRAMMAR_FILES[lang];
|
|
57
|
+
if (!file)
|
|
58
|
+
return null;
|
|
59
|
+
try {
|
|
60
|
+
const grammar = await parserModule.Language.load(join(GRAMMAR_DIR, file));
|
|
61
|
+
languageCache.set(lang, grammar);
|
|
62
|
+
return grammar;
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// ─── Public API ──────────────────────────────────────────────────────────────
|
|
69
|
+
/**
|
|
70
|
+
* Check whether tree-sitter analysis is available for a given language.
|
|
71
|
+
* Must be called (and awaited) before analyzeWithTreeSitter.
|
|
72
|
+
*/
|
|
73
|
+
export async function isTreeSitterAvailable(lang) {
|
|
74
|
+
const ready = await ensureInit();
|
|
75
|
+
if (!ready)
|
|
76
|
+
return false;
|
|
77
|
+
const grammar = await getLanguage(lang);
|
|
78
|
+
return grammar !== null;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Synchronous readiness check — returns true only if tree-sitter's WASM
|
|
82
|
+
* runtime AND the grammar for `lang` have already been loaded into memory.
|
|
83
|
+
* This is safe to call from synchronous code paths; if the async init
|
|
84
|
+
* hasn't finished yet, it simply returns false and the caller falls back
|
|
85
|
+
* to the structural parser.
|
|
86
|
+
*/
|
|
87
|
+
export function isTreeSitterReadySync(lang) {
|
|
88
|
+
return parserModule !== null && languageCache.has(lang);
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Synchronous tree-sitter analysis. Can ONLY be called when
|
|
92
|
+
* isTreeSitterReadySync(lang) returns true (i.e. parser module and grammar
|
|
93
|
+
* are already loaded). parser.parse() is synchronous in web-tree-sitter
|
|
94
|
+
* once the WASM runtime and grammar are in memory.
|
|
95
|
+
*
|
|
96
|
+
* Returns the same CodeStructure interface as analyzeWithTreeSitter.
|
|
97
|
+
* Throws if preconditions are not met.
|
|
98
|
+
*/
|
|
99
|
+
export function analyzeWithTreeSitterSync(code, language) {
|
|
100
|
+
if (!parserModule)
|
|
101
|
+
throw new Error("Tree-sitter not initialized");
|
|
102
|
+
const grammar = languageCache.get(language);
|
|
103
|
+
if (!grammar)
|
|
104
|
+
throw new Error(`Tree-sitter grammar for ${language} not loaded`);
|
|
105
|
+
return parseAndAnalyze(code, language, grammar);
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Analyse source code using tree-sitter's real syntax tree.
|
|
109
|
+
* Returns the same CodeStructure interface as the TypeScript and
|
|
110
|
+
* structural parsers — but with much higher precision for non-JS/TS languages.
|
|
111
|
+
*
|
|
112
|
+
* IMPORTANT: Call isTreeSitterAvailable(lang) first. If it returns false,
|
|
113
|
+
* fall back to analyzeStructurally().
|
|
114
|
+
*/
|
|
115
|
+
export async function analyzeWithTreeSitter(code, language) {
|
|
116
|
+
if (!parserModule)
|
|
117
|
+
throw new Error("Tree-sitter not initialized");
|
|
118
|
+
const grammar = await getLanguage(language);
|
|
119
|
+
if (!grammar)
|
|
120
|
+
throw new Error(`Tree-sitter grammar for ${language} not available`);
|
|
121
|
+
return parseAndAnalyze(code, language, grammar);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Shared parsing + analysis logic used by both sync and async entry points.
|
|
125
|
+
*/
|
|
126
|
+
function parseAndAnalyze(code, language, grammar) {
|
|
127
|
+
const parser = new parserModule.Parser();
|
|
128
|
+
parser.setLanguage(grammar);
|
|
129
|
+
const tree = parser.parse(code);
|
|
130
|
+
const root = tree.rootNode;
|
|
131
|
+
const lines = code.split("\n");
|
|
132
|
+
// Extract all analysis data from the tree
|
|
133
|
+
const functions = extractFunctions(root, language);
|
|
134
|
+
const deadCodeLines = detectDeadCode(root, language);
|
|
135
|
+
const deepNestLines = detectDeepNesting(root, language);
|
|
136
|
+
const typeAnyLines = detectWeakTypes(root, language);
|
|
137
|
+
const imports = extractImports(root, language);
|
|
138
|
+
const classes = extractClasses(root, language);
|
|
139
|
+
// Compute file-level metrics
|
|
140
|
+
const fileCyclomaticComplexity = functions.reduce((sum, f) => sum + f.cyclomaticComplexity, 0) || 1;
|
|
141
|
+
const maxNestingDepth = functions.reduce((max, f) => Math.max(max, f.maxNestingDepth), 0);
|
|
142
|
+
return {
|
|
143
|
+
language,
|
|
144
|
+
totalLines: lines.length,
|
|
145
|
+
functions,
|
|
146
|
+
fileCyclomaticComplexity,
|
|
147
|
+
maxNestingDepth,
|
|
148
|
+
deadCodeLines,
|
|
149
|
+
deepNestLines,
|
|
150
|
+
typeAnyLines,
|
|
151
|
+
imports,
|
|
152
|
+
classes: classes.length > 0 ? classes : undefined,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
// ─── Function Extraction ────────────────────────────────────────────────────
|
|
156
|
+
/** Node types that represent function/method definitions per language */
|
|
157
|
+
const FUNCTION_NODE_TYPES = {
|
|
158
|
+
python: ["function_definition"],
|
|
159
|
+
go: ["function_declaration", "method_declaration"],
|
|
160
|
+
rust: ["function_item"],
|
|
161
|
+
java: ["method_declaration", "constructor_declaration"],
|
|
162
|
+
csharp: ["method_declaration", "constructor_declaration", "local_function_statement"],
|
|
163
|
+
};
|
|
164
|
+
function extractFunctions(root, language) {
|
|
165
|
+
const funcTypes = FUNCTION_NODE_TYPES[language] || [];
|
|
166
|
+
const functions = [];
|
|
167
|
+
const classRanges = extractClassRanges(root, language);
|
|
168
|
+
walkTree(root, (node) => {
|
|
169
|
+
if (funcTypes.includes(node.type)) {
|
|
170
|
+
const info = analyzeFunctionNode(node, language, classRanges);
|
|
171
|
+
if (info)
|
|
172
|
+
functions.push(info);
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
return functions;
|
|
176
|
+
}
|
|
177
|
+
function extractClassRanges(root, language) {
|
|
178
|
+
const classTypes = CLASS_NODE_TYPES[language] || [];
|
|
179
|
+
const ranges = [];
|
|
180
|
+
walkTree(root, (node) => {
|
|
181
|
+
if (classTypes.includes(node.type)) {
|
|
182
|
+
const nameNode = node.childForFieldName("name");
|
|
183
|
+
if (nameNode) {
|
|
184
|
+
ranges.push({
|
|
185
|
+
name: nameNode.text,
|
|
186
|
+
startLine: node.startPosition.row + 1,
|
|
187
|
+
endLine: node.endPosition.row + 1,
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
});
|
|
192
|
+
return ranges;
|
|
193
|
+
}
|
|
194
|
+
function analyzeFunctionNode(node, language, classRanges) {
|
|
195
|
+
// Get function name
|
|
196
|
+
const nameNode = node.childForFieldName("name");
|
|
197
|
+
let name = nameNode?.text || "<anonymous>";
|
|
198
|
+
// For Go method_declaration, extract receiver type
|
|
199
|
+
if (language === "go" && node.type === "method_declaration") {
|
|
200
|
+
const receiver = node.childForFieldName("receiver");
|
|
201
|
+
if (receiver) {
|
|
202
|
+
// Extract the type from the receiver parameter list
|
|
203
|
+
const typeNode = findFirstByType(receiver, "type_identifier");
|
|
204
|
+
if (typeNode) {
|
|
205
|
+
name = `${typeNode.text}.${name}`;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// Check if function is inside a class
|
|
210
|
+
const startLine = node.startPosition.row + 1;
|
|
211
|
+
const endLine = node.endPosition.row + 1;
|
|
212
|
+
const containingClass = classRanges.find((c) => startLine >= c.startLine && endLine <= c.endLine);
|
|
213
|
+
if (containingClass && language !== "go") {
|
|
214
|
+
name = `${containingClass.name}.${name}`;
|
|
215
|
+
}
|
|
216
|
+
// Count parameters
|
|
217
|
+
const paramCount = countParameters(node, language);
|
|
218
|
+
// Compute cyclomatic complexity
|
|
219
|
+
const complexity = computeCyclomaticComplexity(node, language);
|
|
220
|
+
// Compute max nesting depth
|
|
221
|
+
const maxNesting = computeMaxNesting(node, language, 0);
|
|
222
|
+
// Check for decorators (Python, Java, C#)
|
|
223
|
+
const decorators = extractDecorators(node, language);
|
|
224
|
+
// Check for async
|
|
225
|
+
const isAsync = checkIsAsync(node, language);
|
|
226
|
+
const info = {
|
|
227
|
+
name,
|
|
228
|
+
startLine,
|
|
229
|
+
endLine,
|
|
230
|
+
lineCount: endLine - startLine + 1,
|
|
231
|
+
parameterCount: paramCount,
|
|
232
|
+
cyclomaticComplexity: complexity,
|
|
233
|
+
maxNestingDepth: maxNesting,
|
|
234
|
+
};
|
|
235
|
+
if (decorators.length > 0)
|
|
236
|
+
info.decorators = decorators;
|
|
237
|
+
if (containingClass)
|
|
238
|
+
info.className = containingClass.name;
|
|
239
|
+
if (isAsync)
|
|
240
|
+
info.isAsync = true;
|
|
241
|
+
return info;
|
|
242
|
+
}
|
|
243
|
+
// ─── Parameter Counting ─────────────────────────────────────────────────────
|
|
244
|
+
function countParameters(funcNode, language) {
|
|
245
|
+
let paramsNode = null;
|
|
246
|
+
switch (language) {
|
|
247
|
+
case "python":
|
|
248
|
+
paramsNode = funcNode.childForFieldName("parameters");
|
|
249
|
+
if (!paramsNode)
|
|
250
|
+
return 0;
|
|
251
|
+
// Count identifier children, excluding 'self' and 'cls'
|
|
252
|
+
return paramsNode.namedChildren.filter((c) => {
|
|
253
|
+
if (c.type === "identifier" && (c.text === "self" || c.text === "cls"))
|
|
254
|
+
return false;
|
|
255
|
+
// Also handle typed_parameter, typed_default_parameter, etc.
|
|
256
|
+
if (c.type === "identifier" ||
|
|
257
|
+
c.type === "default_parameter" ||
|
|
258
|
+
c.type === "typed_parameter" ||
|
|
259
|
+
c.type === "typed_default_parameter" ||
|
|
260
|
+
c.type === "list_splat_pattern" ||
|
|
261
|
+
c.type === "dictionary_splat_pattern") {
|
|
262
|
+
// For typed_parameter, check if it's self/cls
|
|
263
|
+
if (c.type === "typed_parameter") {
|
|
264
|
+
const nameChild = c.namedChildren[0];
|
|
265
|
+
if (nameChild && (nameChild.text === "self" || nameChild.text === "cls"))
|
|
266
|
+
return false;
|
|
267
|
+
}
|
|
268
|
+
return true;
|
|
269
|
+
}
|
|
270
|
+
return false;
|
|
271
|
+
}).length;
|
|
272
|
+
case "go":
|
|
273
|
+
paramsNode = funcNode.childForFieldName("parameters");
|
|
274
|
+
if (!paramsNode)
|
|
275
|
+
return 0;
|
|
276
|
+
return paramsNode.namedChildren.filter((c) => c.type === "parameter_declaration").length;
|
|
277
|
+
case "rust":
|
|
278
|
+
paramsNode = funcNode.childForFieldName("parameters");
|
|
279
|
+
if (!paramsNode)
|
|
280
|
+
return 0;
|
|
281
|
+
return paramsNode.namedChildren.filter((c) => c.type === "parameter" || c.type === "self_parameter").length;
|
|
282
|
+
case "java":
|
|
283
|
+
paramsNode = funcNode.childForFieldName("parameters");
|
|
284
|
+
if (!paramsNode)
|
|
285
|
+
return 0;
|
|
286
|
+
return paramsNode.namedChildren.filter((c) => c.type === "formal_parameter" || c.type === "spread_parameter")
|
|
287
|
+
.length;
|
|
288
|
+
case "csharp":
|
|
289
|
+
paramsNode = funcNode.childForFieldName("parameters");
|
|
290
|
+
if (!paramsNode)
|
|
291
|
+
return 0;
|
|
292
|
+
return paramsNode.namedChildren.filter((c) => c.type === "parameter").length;
|
|
293
|
+
default:
|
|
294
|
+
return 0;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
// ─── Cyclomatic Complexity ──────────────────────────────────────────────────
|
|
298
|
+
// CC = 1 + number of decision points
|
|
299
|
+
const DECISION_NODE_TYPES = {
|
|
300
|
+
python: new Set([
|
|
301
|
+
"if_statement",
|
|
302
|
+
"elif_clause",
|
|
303
|
+
"for_statement",
|
|
304
|
+
"while_statement",
|
|
305
|
+
"except_clause",
|
|
306
|
+
"conditional_expression",
|
|
307
|
+
"for_in_clause",
|
|
308
|
+
// Boolean operators — each 'and'/'or' is a decision point
|
|
309
|
+
"boolean_operator",
|
|
310
|
+
]),
|
|
311
|
+
go: new Set(["if_statement", "for_statement", "expression_case", "default_case", "type_case", "communication_case"]),
|
|
312
|
+
rust: new Set(["if_expression", "for_expression", "while_expression", "loop_expression", "match_arm"]),
|
|
313
|
+
java: new Set([
|
|
314
|
+
"if_statement",
|
|
315
|
+
"for_statement",
|
|
316
|
+
"enhanced_for_statement",
|
|
317
|
+
"while_statement",
|
|
318
|
+
"do_statement",
|
|
319
|
+
"catch_clause",
|
|
320
|
+
"switch_block_statement_group",
|
|
321
|
+
"ternary_expression",
|
|
322
|
+
]),
|
|
323
|
+
csharp: new Set([
|
|
324
|
+
"if_statement",
|
|
325
|
+
"for_statement",
|
|
326
|
+
"for_each_statement",
|
|
327
|
+
"while_statement",
|
|
328
|
+
"do_statement",
|
|
329
|
+
"catch_clause",
|
|
330
|
+
"switch_section",
|
|
331
|
+
"conditional_expression",
|
|
332
|
+
]),
|
|
333
|
+
};
|
|
334
|
+
// Binary operators that add to complexity (&&, ||)
|
|
335
|
+
const LOGICAL_OPS = new Set(["&&", "||", "and", "or"]);
|
|
336
|
+
function computeCyclomaticComplexity(funcNode, language) {
|
|
337
|
+
let complexity = 1; // base path
|
|
338
|
+
const decisionTypes = DECISION_NODE_TYPES[language] || new Set();
|
|
339
|
+
walkTree(funcNode, (node) => {
|
|
340
|
+
if (decisionTypes.has(node.type)) {
|
|
341
|
+
complexity++;
|
|
342
|
+
}
|
|
343
|
+
// Check binary expressions for logical operators (&&, ||)
|
|
344
|
+
if (node.type === "binary_expression") {
|
|
345
|
+
const op = node.children.find((c) => c.type === "&&" || c.type === "||" || c.text === "&&" || c.text === "||");
|
|
346
|
+
if (op)
|
|
347
|
+
complexity++;
|
|
348
|
+
}
|
|
349
|
+
});
|
|
350
|
+
return complexity;
|
|
351
|
+
}
|
|
352
|
+
// ─── Nesting Depth ──────────────────────────────────────────────────────────
|
|
353
|
+
const NESTING_NODE_TYPES = {
|
|
354
|
+
python: new Set([
|
|
355
|
+
"if_statement",
|
|
356
|
+
"for_statement",
|
|
357
|
+
"while_statement",
|
|
358
|
+
"with_statement",
|
|
359
|
+
"try_statement",
|
|
360
|
+
"except_clause",
|
|
361
|
+
"for_in_clause",
|
|
362
|
+
"function_definition",
|
|
363
|
+
"class_definition",
|
|
364
|
+
]),
|
|
365
|
+
go: new Set([
|
|
366
|
+
"if_statement",
|
|
367
|
+
"for_statement",
|
|
368
|
+
"select_statement",
|
|
369
|
+
"type_switch_statement",
|
|
370
|
+
"expression_switch_statement",
|
|
371
|
+
"func_literal",
|
|
372
|
+
]),
|
|
373
|
+
rust: new Set([
|
|
374
|
+
"if_expression",
|
|
375
|
+
"for_expression",
|
|
376
|
+
"while_expression",
|
|
377
|
+
"loop_expression",
|
|
378
|
+
"match_expression",
|
|
379
|
+
"closure_expression",
|
|
380
|
+
]),
|
|
381
|
+
java: new Set([
|
|
382
|
+
"if_statement",
|
|
383
|
+
"for_statement",
|
|
384
|
+
"enhanced_for_statement",
|
|
385
|
+
"while_statement",
|
|
386
|
+
"do_statement",
|
|
387
|
+
"try_statement",
|
|
388
|
+
"switch_expression",
|
|
389
|
+
"lambda_expression",
|
|
390
|
+
]),
|
|
391
|
+
csharp: new Set([
|
|
392
|
+
"if_statement",
|
|
393
|
+
"for_statement",
|
|
394
|
+
"for_each_statement",
|
|
395
|
+
"while_statement",
|
|
396
|
+
"do_statement",
|
|
397
|
+
"try_statement",
|
|
398
|
+
"switch_statement",
|
|
399
|
+
"lambda_expression",
|
|
400
|
+
]),
|
|
401
|
+
};
|
|
402
|
+
function computeMaxNesting(node, language, currentDepth) {
|
|
403
|
+
const nestingTypes = NESTING_NODE_TYPES[language] || new Set();
|
|
404
|
+
let maxDepth = currentDepth;
|
|
405
|
+
for (const child of node.namedChildren) {
|
|
406
|
+
let childDepth = currentDepth;
|
|
407
|
+
if (nestingTypes.has(child.type)) {
|
|
408
|
+
childDepth = currentDepth + 1;
|
|
409
|
+
if (childDepth > maxDepth)
|
|
410
|
+
maxDepth = childDepth;
|
|
411
|
+
}
|
|
412
|
+
const subMax = computeMaxNesting(child, language, childDepth);
|
|
413
|
+
if (subMax > maxDepth)
|
|
414
|
+
maxDepth = subMax;
|
|
415
|
+
}
|
|
416
|
+
return maxDepth;
|
|
417
|
+
}
|
|
418
|
+
// ─── Dead Code Detection ────────────────────────────────────────────────────
|
|
419
|
+
/** Node types that represent terminal statements (control flow never continues past them) */
|
|
420
|
+
const TERMINAL_TYPES = {
|
|
421
|
+
python: new Set(["return_statement", "raise_statement", "break_statement", "continue_statement"]),
|
|
422
|
+
go: new Set(["return_statement", "break_statement", "continue_statement"]),
|
|
423
|
+
rust: new Set(["return_expression", "break_expression", "continue_expression"]),
|
|
424
|
+
java: new Set(["return_statement", "throw_statement", "break_statement", "continue_statement"]),
|
|
425
|
+
csharp: new Set(["return_statement", "throw_statement", "break_statement", "continue_statement"]),
|
|
426
|
+
};
|
|
427
|
+
/** Node types that represent blocks containing sequential statements */
|
|
428
|
+
const BLOCK_TYPES = {
|
|
429
|
+
python: new Set(["block"]),
|
|
430
|
+
go: new Set(["block"]),
|
|
431
|
+
rust: new Set(["block"]),
|
|
432
|
+
java: new Set(["block"]),
|
|
433
|
+
csharp: new Set(["block"]),
|
|
434
|
+
};
|
|
435
|
+
function detectDeadCode(root, language) {
|
|
436
|
+
const deadLines = [];
|
|
437
|
+
const terminalTypes = TERMINAL_TYPES[language] || new Set();
|
|
438
|
+
const blockTypes = BLOCK_TYPES[language] || new Set();
|
|
439
|
+
walkTree(root, (node) => {
|
|
440
|
+
if (!blockTypes.has(node.type))
|
|
441
|
+
return;
|
|
442
|
+
const children = node.namedChildren;
|
|
443
|
+
let foundTerminal = false;
|
|
444
|
+
for (const child of children) {
|
|
445
|
+
if (foundTerminal) {
|
|
446
|
+
// Everything after a terminal statement is dead code
|
|
447
|
+
for (let line = child.startPosition.row + 1; line <= child.endPosition.row + 1; line++) {
|
|
448
|
+
deadLines.push(line);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
// Check if this child IS a terminal or CONTAINS a bare terminal
|
|
452
|
+
// (only direct children, not nested in sub-blocks)
|
|
453
|
+
if (terminalTypes.has(child.type)) {
|
|
454
|
+
foundTerminal = true;
|
|
455
|
+
}
|
|
456
|
+
// For expression_statement wrapping a return (Rust)
|
|
457
|
+
if (child.type === "expression_statement") {
|
|
458
|
+
const expr = child.namedChildren[0];
|
|
459
|
+
if (expr && terminalTypes.has(expr.type)) {
|
|
460
|
+
foundTerminal = true;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
});
|
|
465
|
+
return [...new Set(deadLines)].sort((a, b) => a - b);
|
|
466
|
+
}
|
|
467
|
+
// ─── Deep Nesting Detection ─────────────────────────────────────────────────
|
|
468
|
+
function detectDeepNesting(root, language) {
|
|
469
|
+
const deepLines = [];
|
|
470
|
+
const nestingTypes = NESTING_NODE_TYPES[language] || new Set();
|
|
471
|
+
const threshold = 4; // Depth > 4 is "deep"
|
|
472
|
+
function walk(node, depth) {
|
|
473
|
+
for (const child of node.namedChildren) {
|
|
474
|
+
let childDepth = depth;
|
|
475
|
+
if (nestingTypes.has(child.type)) {
|
|
476
|
+
childDepth = depth + 1;
|
|
477
|
+
}
|
|
478
|
+
if (childDepth > threshold) {
|
|
479
|
+
// Mark all lines in this deeply-nested node
|
|
480
|
+
for (let line = child.startPosition.row + 1; line <= child.endPosition.row + 1; line++) {
|
|
481
|
+
deepLines.push(line);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
walk(child, childDepth);
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
walk(root, 0);
|
|
488
|
+
return [...new Set(deepLines)].sort((a, b) => a - b);
|
|
489
|
+
}
|
|
490
|
+
// ─── Weak Type Detection ────────────────────────────────────────────────────
|
|
491
|
+
const WEAK_TYPE_PATTERNS = {
|
|
492
|
+
python: (node) => {
|
|
493
|
+
// typing.Any or just Any in type annotations
|
|
494
|
+
if (node.type === "type" || node.type === "annotation") {
|
|
495
|
+
return node.text.includes("Any");
|
|
496
|
+
}
|
|
497
|
+
return false;
|
|
498
|
+
},
|
|
499
|
+
go: (node) => {
|
|
500
|
+
// interface{} or any keyword
|
|
501
|
+
if (node.type === "interface_type") {
|
|
502
|
+
// Empty interface
|
|
503
|
+
return node.namedChildren.length === 0;
|
|
504
|
+
}
|
|
505
|
+
if (node.type === "type_identifier" && node.text === "any")
|
|
506
|
+
return true;
|
|
507
|
+
return false;
|
|
508
|
+
},
|
|
509
|
+
rust: (node) => {
|
|
510
|
+
// unsafe blocks and raw pointer casts
|
|
511
|
+
if (node.type === "unsafe_block")
|
|
512
|
+
return true;
|
|
513
|
+
if (node.type === "type_cast_expression") {
|
|
514
|
+
return node.text.includes("*const") || node.text.includes("*mut");
|
|
515
|
+
}
|
|
516
|
+
return false;
|
|
517
|
+
},
|
|
518
|
+
java: (node) => {
|
|
519
|
+
// Object type, Class<?>
|
|
520
|
+
if (node.type === "type_identifier" && node.text === "Object")
|
|
521
|
+
return true;
|
|
522
|
+
if (node.type === "generic_type" && node.text.includes("Class<?>"))
|
|
523
|
+
return true;
|
|
524
|
+
return false;
|
|
525
|
+
},
|
|
526
|
+
csharp: (node) => {
|
|
527
|
+
// dynamic, object
|
|
528
|
+
if (node.type === "predefined_type" && (node.text === "dynamic" || node.text === "object")) {
|
|
529
|
+
return true;
|
|
530
|
+
}
|
|
531
|
+
if (node.type === "identifier" && node.text === "dynamic")
|
|
532
|
+
return true;
|
|
533
|
+
return false;
|
|
534
|
+
},
|
|
535
|
+
};
|
|
536
|
+
function detectWeakTypes(root, language) {
|
|
537
|
+
const weakLines = [];
|
|
538
|
+
const checker = WEAK_TYPE_PATTERNS[language];
|
|
539
|
+
if (!checker)
|
|
540
|
+
return weakLines;
|
|
541
|
+
walkTree(root, (node) => {
|
|
542
|
+
if (checker(node)) {
|
|
543
|
+
weakLines.push(node.startPosition.row + 1);
|
|
544
|
+
}
|
|
545
|
+
});
|
|
546
|
+
return [...new Set(weakLines)].sort((a, b) => a - b);
|
|
547
|
+
}
|
|
548
|
+
// ─── Import Extraction ──────────────────────────────────────────────────────
|
|
549
|
+
const IMPORT_NODE_TYPES = {
|
|
550
|
+
python: ["import_statement", "import_from_statement"],
|
|
551
|
+
go: ["import_declaration"],
|
|
552
|
+
rust: ["use_declaration"],
|
|
553
|
+
java: ["import_declaration"],
|
|
554
|
+
csharp: ["using_directive"],
|
|
555
|
+
};
|
|
556
|
+
function extractImports(root, language) {
|
|
557
|
+
const imports = [];
|
|
558
|
+
const importTypes = IMPORT_NODE_TYPES[language] || [];
|
|
559
|
+
walkTree(root, (node) => {
|
|
560
|
+
if (!importTypes.includes(node.type))
|
|
561
|
+
return;
|
|
562
|
+
switch (language) {
|
|
563
|
+
case "python":
|
|
564
|
+
if (node.type === "import_statement") {
|
|
565
|
+
// import os, import os.path
|
|
566
|
+
for (const child of node.namedChildren) {
|
|
567
|
+
if (child.type === "dotted_name" || child.type === "aliased_import") {
|
|
568
|
+
const name = child.type === "aliased_import" ? child.childForFieldName("name")?.text || child.text : child.text;
|
|
569
|
+
if (name)
|
|
570
|
+
imports.push(name);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
else if (node.type === "import_from_statement") {
|
|
575
|
+
// from flask import Flask
|
|
576
|
+
const moduleNode = node.childForFieldName("module_name");
|
|
577
|
+
if (moduleNode)
|
|
578
|
+
imports.push(moduleNode.text);
|
|
579
|
+
}
|
|
580
|
+
break;
|
|
581
|
+
case "go":
|
|
582
|
+
// import "fmt" or import ( "fmt" "net/http" )
|
|
583
|
+
walkTree(node, (child) => {
|
|
584
|
+
if (child.type === "import_spec" || child.type === "interpreted_string_literal") {
|
|
585
|
+
const text = child.text.replace(/"/g, "");
|
|
586
|
+
if (text && text !== "(" && text !== ")")
|
|
587
|
+
imports.push(text);
|
|
588
|
+
}
|
|
589
|
+
});
|
|
590
|
+
break;
|
|
591
|
+
case "rust":
|
|
592
|
+
// use std::io; use crate::module_name;
|
|
593
|
+
{
|
|
594
|
+
const pathNode = node.namedChildren.find((c) => c.type === "scoped_identifier" ||
|
|
595
|
+
c.type === "identifier" ||
|
|
596
|
+
c.type === "use_wildcard" ||
|
|
597
|
+
c.type === "use_list" ||
|
|
598
|
+
c.type === "scoped_use_list");
|
|
599
|
+
if (pathNode) {
|
|
600
|
+
// Extract the root crate/module name
|
|
601
|
+
const fullPath = pathNode.text;
|
|
602
|
+
const rootModule = fullPath.split("::")[0];
|
|
603
|
+
if (rootModule)
|
|
604
|
+
imports.push(rootModule);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
break;
|
|
608
|
+
case "java":
|
|
609
|
+
// import com.example.Foo;
|
|
610
|
+
{
|
|
611
|
+
const nameNode = node.namedChildren.find((c) => c.type === "scoped_identifier" || c.type === "identifier");
|
|
612
|
+
if (nameNode)
|
|
613
|
+
imports.push(nameNode.text);
|
|
614
|
+
}
|
|
615
|
+
break;
|
|
616
|
+
case "csharp":
|
|
617
|
+
// using System.IO;
|
|
618
|
+
{
|
|
619
|
+
const nameNode = node.namedChildren.find((c) => c.type === "qualified_name" || c.type === "identifier");
|
|
620
|
+
if (nameNode)
|
|
621
|
+
imports.push(nameNode.text);
|
|
622
|
+
}
|
|
623
|
+
break;
|
|
624
|
+
}
|
|
625
|
+
});
|
|
626
|
+
return imports;
|
|
627
|
+
}
|
|
628
|
+
// ─── Class Extraction ───────────────────────────────────────────────────────
|
|
629
|
+
const CLASS_NODE_TYPES = {
|
|
630
|
+
python: ["class_definition"],
|
|
631
|
+
go: ["type_declaration"],
|
|
632
|
+
rust: ["struct_item", "enum_item"],
|
|
633
|
+
java: ["class_declaration", "interface_declaration", "enum_declaration"],
|
|
634
|
+
csharp: ["class_declaration", "struct_declaration", "interface_declaration", "enum_declaration"],
|
|
635
|
+
};
|
|
636
|
+
function extractClasses(root, language) {
|
|
637
|
+
const classes = [];
|
|
638
|
+
const classTypes = CLASS_NODE_TYPES[language] || [];
|
|
639
|
+
walkTree(root, (node) => {
|
|
640
|
+
if (!classTypes.includes(node.type))
|
|
641
|
+
return;
|
|
642
|
+
if (language === "go" && node.type === "type_declaration") {
|
|
643
|
+
// Only count struct types: type Foo struct { ... }
|
|
644
|
+
const spec = node.namedChildren.find((c) => c.type === "type_spec");
|
|
645
|
+
if (spec) {
|
|
646
|
+
const typeBody = spec.childForFieldName("type");
|
|
647
|
+
if (typeBody && typeBody.type === "struct_type") {
|
|
648
|
+
const nameNode = spec.childForFieldName("name");
|
|
649
|
+
if (nameNode)
|
|
650
|
+
classes.push(nameNode.text);
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
return;
|
|
654
|
+
}
|
|
655
|
+
const nameNode = node.childForFieldName("name");
|
|
656
|
+
if (nameNode)
|
|
657
|
+
classes.push(nameNode.text);
|
|
658
|
+
});
|
|
659
|
+
return classes;
|
|
660
|
+
}
|
|
661
|
+
// ─── Decorator / Annotation Extraction ──────────────────────────────────────
|
|
662
|
+
function extractDecorators(funcNode, language) {
|
|
663
|
+
const decorators = [];
|
|
664
|
+
switch (language) {
|
|
665
|
+
case "python": {
|
|
666
|
+
// Decorators are siblings before the function_definition, but in the
|
|
667
|
+
// tree-sitter grammar they're children of a decorated_definition parent.
|
|
668
|
+
const parent = funcNode.parent;
|
|
669
|
+
if (parent && parent.type === "decorated_definition") {
|
|
670
|
+
for (const child of parent.namedChildren) {
|
|
671
|
+
if (child.type === "decorator") {
|
|
672
|
+
// Extract decorator name (without the @)
|
|
673
|
+
const text = child.text.replace(/^@/, "").split("(")[0];
|
|
674
|
+
decorators.push(text);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
break;
|
|
679
|
+
}
|
|
680
|
+
case "java": {
|
|
681
|
+
// Annotations are modifiers before the method
|
|
682
|
+
const modifiers = funcNode.childForFieldName("modifiers") || funcNode.childForFieldName("modifier");
|
|
683
|
+
if (modifiers) {
|
|
684
|
+
for (const child of modifiers.namedChildren) {
|
|
685
|
+
if (child.type === "marker_annotation" || child.type === "annotation") {
|
|
686
|
+
decorators.push(child.text.replace(/^@/, "").split("(")[0]);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
break;
|
|
691
|
+
}
|
|
692
|
+
case "csharp": {
|
|
693
|
+
// Attribute lists before the method
|
|
694
|
+
const parent = funcNode.parent;
|
|
695
|
+
if (parent) {
|
|
696
|
+
for (const child of parent.namedChildren) {
|
|
697
|
+
if (child.type === "attribute_list" && child.endPosition.row < funcNode.startPosition.row) {
|
|
698
|
+
decorators.push(child.text.replace(/[\[\]]/g, "").split("(")[0]);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
break;
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
return decorators;
|
|
706
|
+
}
|
|
707
|
+
// ─── Async Detection ────────────────────────────────────────────────────────
|
|
708
|
+
function checkIsAsync(funcNode, language) {
|
|
709
|
+
switch (language) {
|
|
710
|
+
case "python":
|
|
711
|
+
// In Python tree-sitter, async functions have type "function_definition"
|
|
712
|
+
// but the parent is a "decorated_definition" or the text starts with "async"
|
|
713
|
+
return funcNode.text.trimStart().startsWith("async ");
|
|
714
|
+
case "rust":
|
|
715
|
+
// async fn
|
|
716
|
+
return funcNode.text.trimStart().startsWith("async ");
|
|
717
|
+
case "java":
|
|
718
|
+
case "csharp": {
|
|
719
|
+
// Check modifiers for 'async' keyword
|
|
720
|
+
const modifiers = funcNode.childForFieldName("modifiers");
|
|
721
|
+
if (modifiers) {
|
|
722
|
+
return modifiers.children.some((c) => c.text === "async");
|
|
723
|
+
}
|
|
724
|
+
return false;
|
|
725
|
+
}
|
|
726
|
+
default:
|
|
727
|
+
return false;
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
// ─── Tree Walking Helper ────────────────────────────────────────────────────
|
|
731
|
+
function walkTree(node, callback) {
|
|
732
|
+
callback(node);
|
|
733
|
+
for (const child of node.children) {
|
|
734
|
+
walkTree(child, callback);
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
function findFirstByType(node, type) {
|
|
738
|
+
if (node.type === type)
|
|
739
|
+
return node;
|
|
740
|
+
for (const child of node.children) {
|
|
741
|
+
const found = findFirstByType(child, type);
|
|
742
|
+
if (found)
|
|
743
|
+
return found;
|
|
744
|
+
}
|
|
745
|
+
return null;
|
|
746
|
+
}
|
|
747
|
+
//# sourceMappingURL=tree-sitter-ast.js.map
|