skylos 1.0.10__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skylos/__init__.py +9 -3
- skylos/analyzer.py +674 -168
- skylos/cfg_visitor.py +60 -0
- skylos/cli.py +719 -235
- skylos/codemods.py +277 -0
- skylos/config.py +50 -0
- skylos/constants.py +78 -0
- skylos/gatekeeper.py +147 -0
- skylos/linter.py +18 -0
- skylos/rules/base.py +20 -0
- skylos/rules/danger/calls.py +119 -0
- skylos/rules/danger/danger.py +157 -0
- skylos/rules/danger/danger_cmd/cmd_flow.py +75 -0
- skylos/rules/danger/danger_fs/__init__.py +0 -0
- skylos/rules/danger/danger_fs/path_flow.py +79 -0
- skylos/rules/danger/danger_net/__init__.py +0 -0
- skylos/rules/danger/danger_net/ssrf_flow.py +80 -0
- skylos/rules/danger/danger_sql/__init__.py +0 -0
- skylos/rules/danger/danger_sql/sql_flow.py +245 -0
- skylos/rules/danger/danger_sql/sql_raw_flow.py +96 -0
- skylos/rules/danger/danger_web/__init__.py +0 -0
- skylos/rules/danger/danger_web/xss_flow.py +170 -0
- skylos/rules/danger/taint.py +110 -0
- skylos/rules/quality/__init__.py +0 -0
- skylos/rules/quality/complexity.py +95 -0
- skylos/rules/quality/logic.py +96 -0
- skylos/rules/quality/nesting.py +101 -0
- skylos/rules/quality/structure.py +99 -0
- skylos/rules/secrets.py +325 -0
- skylos/server.py +554 -0
- skylos/visitor.py +502 -90
- skylos/visitors/__init__.py +0 -0
- skylos/visitors/framework_aware.py +437 -0
- skylos/visitors/test_aware.py +74 -0
- skylos-2.5.2.dist-info/METADATA +21 -0
- skylos-2.5.2.dist-info/RECORD +42 -0
- {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/WHEEL +1 -1
- {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/top_level.txt +0 -1
- skylos-1.0.10.dist-info/METADATA +0 -8
- skylos-1.0.10.dist-info/RECORD +0 -21
- test/compare_tools.py +0 -604
- test/diagnostics.py +0 -364
- test/sample_repo/app.py +0 -13
- test/sample_repo/sample_repo/commands.py +0 -81
- test/sample_repo/sample_repo/models.py +0 -122
- test/sample_repo/sample_repo/routes.py +0 -89
- test/sample_repo/sample_repo/utils.py +0 -36
- test/test_skylos.py +0 -456
- test/test_visitor.py +0 -220
- {test → skylos/rules}/__init__.py +0 -0
- {test/sample_repo → skylos/rules/danger}/__init__.py +0 -0
- {test/sample_repo/sample_repo → skylos/rules/danger/danger_cmd}/__init__.py +0 -0
- {skylos-1.0.10.dist-info → skylos-2.5.2.dist-info}/entry_points.txt +0 -0
test/compare_tools.py
DELETED
|
@@ -1,604 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
import subprocess
|
|
3
|
-
import sys
|
|
4
|
-
import os
|
|
5
|
-
import json
|
|
6
|
-
import time
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
import re
|
|
9
|
-
import shutil
|
|
10
|
-
import argparse
|
|
11
|
-
import pandas as pd
|
|
12
|
-
|
|
13
|
-
class Colors:
|
|
14
|
-
GREEN = '\033[92m'
|
|
15
|
-
YELLOW = '\033[93m'
|
|
16
|
-
BLUE = '\033[94m'
|
|
17
|
-
RED = '\033[91m'
|
|
18
|
-
BOLD = '\033[1m'
|
|
19
|
-
END = '\033[0m'
|
|
20
|
-
|
|
21
|
-
def check_tool_exists(tool_name):
|
|
22
|
-
return shutil.which(tool_name) is not None
|
|
23
|
-
|
|
24
|
-
def install_tool(tool_name):
|
|
25
|
-
if check_tool_exists(tool_name):
|
|
26
|
-
print(f"✅ {tool_name} is already installed")
|
|
27
|
-
return True
|
|
28
|
-
|
|
29
|
-
print(f"Installing {tool_name}...")
|
|
30
|
-
try:
|
|
31
|
-
subprocess.run([sys.executable, "-m", "pip", "install", tool_name], check=True)
|
|
32
|
-
print(f"✅ {tool_name} installed successfully")
|
|
33
|
-
return True
|
|
34
|
-
except subprocess.SubprocessError as e:
|
|
35
|
-
print(f"⚠️ Error installing {tool_name}: {e}")
|
|
36
|
-
return False
|
|
37
|
-
|
|
38
|
-
def load_ground_truth(test_dir):
|
|
39
|
-
ground_truth_items = []
|
|
40
|
-
|
|
41
|
-
for gt_file in Path(test_dir).glob("**/ground_truth.json"):
|
|
42
|
-
try:
|
|
43
|
-
with open(gt_file, 'r') as f:
|
|
44
|
-
data = json.load(f)
|
|
45
|
-
|
|
46
|
-
test_case_dir = gt_file.parent
|
|
47
|
-
|
|
48
|
-
for file_name, file_data in data["files"].items():
|
|
49
|
-
for item in file_data.get("dead_items", []):
|
|
50
|
-
full_path = test_case_dir / file_name
|
|
51
|
-
ground_truth_items.append({
|
|
52
|
-
"type": item["type"],
|
|
53
|
-
"name": item["name"],
|
|
54
|
-
"simple_name": extract_simple_name(item["name"]),
|
|
55
|
-
"file": str(full_path),
|
|
56
|
-
"basename": file_name,
|
|
57
|
-
"line": item.get("line_start", 0),
|
|
58
|
-
"category": data.get("category", "unknown")
|
|
59
|
-
})
|
|
60
|
-
except Exception as e:
|
|
61
|
-
print(f"Error loading {gt_file}: {e}")
|
|
62
|
-
|
|
63
|
-
return ground_truth_items
|
|
64
|
-
|
|
65
|
-
def extract_simple_name(full_name):
|
|
66
|
-
if '.' in full_name:
|
|
67
|
-
return full_name.split('.')[-1]
|
|
68
|
-
return full_name
|
|
69
|
-
|
|
70
|
-
def run_vulture(test_dir, confidence=60):
|
|
71
|
-
try:
|
|
72
|
-
installed = install_tool("vulture")
|
|
73
|
-
if not installed:
|
|
74
|
-
return {"tool": f"Vulture ({confidence}%)", "items": [], "time": 0}
|
|
75
|
-
|
|
76
|
-
print(f"Running Vulture with {confidence}% confidence...")
|
|
77
|
-
start_time = time.time()
|
|
78
|
-
output = subprocess.run(["vulture", "--min-confidence", str(confidence), test_dir],
|
|
79
|
-
stdout=subprocess.PIPE,
|
|
80
|
-
stderr=subprocess.PIPE,
|
|
81
|
-
universal_newlines=True)
|
|
82
|
-
|
|
83
|
-
detected_items = []
|
|
84
|
-
for line in output.stdout.splitlines():
|
|
85
|
-
if ':' not in line:
|
|
86
|
-
continue
|
|
87
|
-
|
|
88
|
-
match = re.match(r'(.+?):(\d+): (.+?) \'(.+?)\' (.+)', line)
|
|
89
|
-
if match:
|
|
90
|
-
file_path, line_num, item_type, name, _ = match.groups()
|
|
91
|
-
|
|
92
|
-
if "unused function" in item_type:
|
|
93
|
-
item_type = "function"
|
|
94
|
-
elif "unused method" in item_type:
|
|
95
|
-
item_type = "method"
|
|
96
|
-
elif "unused class" in item_type:
|
|
97
|
-
item_type = "class"
|
|
98
|
-
elif "unused import" in item_type:
|
|
99
|
-
item_type = "import"
|
|
100
|
-
elif "unused variable" in item_type:
|
|
101
|
-
item_type = "variable"
|
|
102
|
-
else:
|
|
103
|
-
continue
|
|
104
|
-
|
|
105
|
-
detected_items.append({
|
|
106
|
-
"type": item_type,
|
|
107
|
-
"name": name,
|
|
108
|
-
"simple_name": extract_simple_name(name),
|
|
109
|
-
"file": file_path,
|
|
110
|
-
"basename": Path(file_path).name,
|
|
111
|
-
"line": int(line_num)
|
|
112
|
-
})
|
|
113
|
-
|
|
114
|
-
elapsed_time = time.time() - start_time
|
|
115
|
-
return {
|
|
116
|
-
"tool": f"Vulture ({confidence}%)",
|
|
117
|
-
"items": detected_items,
|
|
118
|
-
"time": elapsed_time,
|
|
119
|
-
"capabilities": ["function", "method", "class", "import", "variable"]
|
|
120
|
-
}
|
|
121
|
-
except Exception as e:
|
|
122
|
-
print(f"Error running Vulture: {e}")
|
|
123
|
-
return {"tool": f"Vulture ({confidence}%)", "items": [], "time": 0, "capabilities": []}
|
|
124
|
-
|
|
125
|
-
def run_skylos_local(test_dir):
|
|
126
|
-
"""Run the locally installed development version of Skylos"""
|
|
127
|
-
try:
|
|
128
|
-
start_time = time.time()
|
|
129
|
-
|
|
130
|
-
try:
|
|
131
|
-
import skylos
|
|
132
|
-
except ImportError as e:
|
|
133
|
-
print(f"Error importing local skylos: {e}")
|
|
134
|
-
return {"tool": "Skylos (Local Dev)", "items": [], "time": 0, "capabilities": []}
|
|
135
|
-
|
|
136
|
-
try:
|
|
137
|
-
result_json = skylos.analyze(test_dir)
|
|
138
|
-
data = json.loads(result_json)
|
|
139
|
-
|
|
140
|
-
detected_items = []
|
|
141
|
-
|
|
142
|
-
for item in data.get("unused_functions", []):
|
|
143
|
-
name = item["name"]
|
|
144
|
-
|
|
145
|
-
if "." in name:
|
|
146
|
-
parts = name.split(".")
|
|
147
|
-
|
|
148
|
-
if len(parts) >= 2 and parts[-2][0].isupper():
|
|
149
|
-
item_type = "method"
|
|
150
|
-
clean_name = f"{parts[-2]}.{parts[-1]}"
|
|
151
|
-
elif any(p[0].isupper() for p in parts[:-1]):
|
|
152
|
-
item_type = "function"
|
|
153
|
-
clean_name = parts[-1]
|
|
154
|
-
else:
|
|
155
|
-
item_type = "function"
|
|
156
|
-
clean_name = name
|
|
157
|
-
else:
|
|
158
|
-
clean_name = name
|
|
159
|
-
item_type = "function"
|
|
160
|
-
|
|
161
|
-
detected_items.append({
|
|
162
|
-
"type": item_type,
|
|
163
|
-
"name": clean_name,
|
|
164
|
-
"simple_name": extract_simple_name(clean_name),
|
|
165
|
-
"file": item["file"],
|
|
166
|
-
"basename": Path(item["file"]).name,
|
|
167
|
-
"line": item["line"]
|
|
168
|
-
})
|
|
169
|
-
|
|
170
|
-
for item in data.get("unused_imports", []):
|
|
171
|
-
name = item["name"].strip()
|
|
172
|
-
name = re.sub(r'^[\(\s]+|[\)\s]+$', '', name)
|
|
173
|
-
name = re.sub(r'#.*$', '', name).strip()
|
|
174
|
-
|
|
175
|
-
detected_items.append({
|
|
176
|
-
"type": "import",
|
|
177
|
-
"name": name,
|
|
178
|
-
"simple_name": extract_simple_name(name),
|
|
179
|
-
"file": item["file"],
|
|
180
|
-
"basename": Path(item["file"]).name,
|
|
181
|
-
"line": item["line"]
|
|
182
|
-
})
|
|
183
|
-
|
|
184
|
-
for item in data.get("unused_classes", []):
|
|
185
|
-
detected_items.append({
|
|
186
|
-
"type": "class",
|
|
187
|
-
"name": item["name"],
|
|
188
|
-
"simple_name": extract_simple_name(item["name"]),
|
|
189
|
-
"file": item["file"],
|
|
190
|
-
"basename": Path(item["file"]).name,
|
|
191
|
-
"line": item["line"]
|
|
192
|
-
})
|
|
193
|
-
|
|
194
|
-
elapsed_time = time.time() - start_time
|
|
195
|
-
return {
|
|
196
|
-
"tool": "Skylos (Local Dev)",
|
|
197
|
-
"items": detected_items,
|
|
198
|
-
"time": elapsed_time,
|
|
199
|
-
"capabilities": ["function", "method", "class", "import"]
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
except Exception as e:
|
|
203
|
-
print(f"Analysis error with local Skylos: {e}")
|
|
204
|
-
# import traceback
|
|
205
|
-
# traceback.print_exc()
|
|
206
|
-
return {"tool": "Skylos (Local Dev)", "items": [], "time": 0, "capabilities": []}
|
|
207
|
-
except Exception as e:
|
|
208
|
-
print(f"Error running local Skylos: {e}")
|
|
209
|
-
return {"tool": "Skylos (Local Dev)", "items": [], "time": 0, "capabilities": []}
|
|
210
|
-
|
|
211
|
-
def run_flake8(test_dir):
|
|
212
|
-
try:
|
|
213
|
-
installed = install_tool("flake8")
|
|
214
|
-
if not installed:
|
|
215
|
-
return {"tool": "Flake8", "items": [], "time": 0, "capabilities": []}
|
|
216
|
-
|
|
217
|
-
print(f"Running Flake8...")
|
|
218
|
-
start_time = time.time()
|
|
219
|
-
output = subprocess.run(["flake8", "--select=F401", test_dir],
|
|
220
|
-
stdout=subprocess.PIPE,
|
|
221
|
-
stderr=subprocess.PIPE,
|
|
222
|
-
universal_newlines=True)
|
|
223
|
-
|
|
224
|
-
detected_items = []
|
|
225
|
-
for line in output.stdout.splitlines():
|
|
226
|
-
if ':' not in line:
|
|
227
|
-
continue
|
|
228
|
-
|
|
229
|
-
match = re.match(r'(.+?):(\d+):\d+: F401 \'(.+?)\' imported but unused', line)
|
|
230
|
-
if match:
|
|
231
|
-
file_path, line_num, name = match.groups()
|
|
232
|
-
|
|
233
|
-
detected_items.append({
|
|
234
|
-
"type": "import",
|
|
235
|
-
"name": name,
|
|
236
|
-
"simple_name": extract_simple_name(name),
|
|
237
|
-
"file": file_path,
|
|
238
|
-
"basename": Path(file_path).name,
|
|
239
|
-
"line": int(line_num)
|
|
240
|
-
})
|
|
241
|
-
|
|
242
|
-
elapsed_time = time.time() - start_time
|
|
243
|
-
return {
|
|
244
|
-
"tool": "Flake8",
|
|
245
|
-
"items": detected_items,
|
|
246
|
-
"time": elapsed_time,
|
|
247
|
-
"capabilities": ["import"]
|
|
248
|
-
}
|
|
249
|
-
except Exception as e:
|
|
250
|
-
print(f"Error running Flake8: {e}")
|
|
251
|
-
return {"tool": "Flake8", "items": [], "time": 0, "capabilities": []}
|
|
252
|
-
|
|
253
|
-
def run_pylint(test_dir):
|
|
254
|
-
try:
|
|
255
|
-
installed = install_tool("pylint")
|
|
256
|
-
if not installed:
|
|
257
|
-
return {"tool": "Pylint", "items": [], "time": 0, "capabilities": []}
|
|
258
|
-
|
|
259
|
-
print(f"Running Pylint...")
|
|
260
|
-
start_time = time.time()
|
|
261
|
-
cmd = ["pylint", "--disable=all", "--enable=unused-import,unused-variable,unused-argument", test_dir]
|
|
262
|
-
output = subprocess.run(cmd,
|
|
263
|
-
stdout=subprocess.PIPE,
|
|
264
|
-
stderr=subprocess.PIPE,
|
|
265
|
-
universal_newlines=True)
|
|
266
|
-
|
|
267
|
-
detected_items = []
|
|
268
|
-
for line in output.stdout.splitlines():
|
|
269
|
-
if ':' not in line:
|
|
270
|
-
continue
|
|
271
|
-
|
|
272
|
-
match = re.match(r'(.+?):(\d+):.*: (Unused import|Unused variable|Unused argument) (.+)', line)
|
|
273
|
-
if match:
|
|
274
|
-
file_path, line_num, warning_type, name = match.groups()
|
|
275
|
-
|
|
276
|
-
if "import" in warning_type:
|
|
277
|
-
item_type = "import"
|
|
278
|
-
elif "variable" in warning_type:
|
|
279
|
-
item_type = "variable"
|
|
280
|
-
elif "argument" in warning_type:
|
|
281
|
-
item_type = "variable"
|
|
282
|
-
else:
|
|
283
|
-
continue
|
|
284
|
-
|
|
285
|
-
name = name.strip()
|
|
286
|
-
if "'" in name:
|
|
287
|
-
name = name.split("'")[1]
|
|
288
|
-
|
|
289
|
-
detected_items.append({
|
|
290
|
-
"type": item_type,
|
|
291
|
-
"name": name,
|
|
292
|
-
"simple_name": extract_simple_name(name),
|
|
293
|
-
"file": file_path,
|
|
294
|
-
"basename": Path(file_path).name,
|
|
295
|
-
"line": int(line_num)
|
|
296
|
-
})
|
|
297
|
-
|
|
298
|
-
elapsed_time = time.time() - start_time
|
|
299
|
-
return {
|
|
300
|
-
"tool": "Pylint",
|
|
301
|
-
"items": detected_items,
|
|
302
|
-
"time": elapsed_time,
|
|
303
|
-
"capabilities": ["import", "variable"]
|
|
304
|
-
}
|
|
305
|
-
except Exception as e:
|
|
306
|
-
print(f"Error running Pylint: {e}")
|
|
307
|
-
return {"tool": "Pylint", "items": [], "time": 0, "capabilities": []}
|
|
308
|
-
|
|
309
|
-
def run_ruff(test_dir):
|
|
310
|
-
try:
|
|
311
|
-
installed = install_tool("ruff")
|
|
312
|
-
if not installed:
|
|
313
|
-
return {"tool": "Ruff", "items": [], "time": 0, "capabilities": []}
|
|
314
|
-
|
|
315
|
-
print(f"Running Ruff...")
|
|
316
|
-
start_time = time.time()
|
|
317
|
-
command = ["ruff", "check", "--select=F401,F811,F841,F504,F505", "--verbose", test_dir]
|
|
318
|
-
print(f"Running command: {' '.join(command)}")
|
|
319
|
-
|
|
320
|
-
output = subprocess.run(command,
|
|
321
|
-
stdout=subprocess.PIPE,
|
|
322
|
-
stderr=subprocess.PIPE,
|
|
323
|
-
universal_newlines=True)
|
|
324
|
-
|
|
325
|
-
detected_items = []
|
|
326
|
-
for line in output.stdout.splitlines():
|
|
327
|
-
if ':' not in line or line.startswith(' ') or line.startswith('='):
|
|
328
|
-
continue
|
|
329
|
-
|
|
330
|
-
match = re.match(r'(.+?):(\d+):\d+: (F\d+)(?:\s+\[\*\])?\s+(.+)', line)
|
|
331
|
-
if match:
|
|
332
|
-
file_path, line_num, code, message = match.groups()
|
|
333
|
-
|
|
334
|
-
if code == "F401":
|
|
335
|
-
name_match = re.search(r'`(.+?)`', message)
|
|
336
|
-
if name_match:
|
|
337
|
-
name = name_match.group(1)
|
|
338
|
-
detected_items.append({
|
|
339
|
-
"type": "import",
|
|
340
|
-
"name": name,
|
|
341
|
-
"simple_name": extract_simple_name(name),
|
|
342
|
-
"file": file_path,
|
|
343
|
-
"basename": Path(file_path).name,
|
|
344
|
-
"line": int(line_num)
|
|
345
|
-
})
|
|
346
|
-
elif code == "F841":
|
|
347
|
-
name_match = re.search(r'`(.+?)`', message)
|
|
348
|
-
if name_match:
|
|
349
|
-
name = name_match.group(1)
|
|
350
|
-
detected_items.append({
|
|
351
|
-
"type": "variable",
|
|
352
|
-
"name": name,
|
|
353
|
-
"simple_name": extract_simple_name(name),
|
|
354
|
-
"file": file_path,
|
|
355
|
-
"basename": Path(file_path).name,
|
|
356
|
-
"line": int(line_num)
|
|
357
|
-
})
|
|
358
|
-
elif code == "F811":
|
|
359
|
-
name_match = re.search(r'`(.+?)`', message)
|
|
360
|
-
if name_match:
|
|
361
|
-
name = name_match.group(1)
|
|
362
|
-
detected_items.append({
|
|
363
|
-
"type": "function",
|
|
364
|
-
"name": name,
|
|
365
|
-
"simple_name": extract_simple_name(name),
|
|
366
|
-
"file": file_path,
|
|
367
|
-
"basename": Path(file_path).name,
|
|
368
|
-
"line": int(line_num)
|
|
369
|
-
})
|
|
370
|
-
elif code in ("F504", "F505"):
|
|
371
|
-
detected_items.append({
|
|
372
|
-
"type": "unreachable",
|
|
373
|
-
"name": f"unreachable code at line {line_num}",
|
|
374
|
-
"simple_name": "unreachable",
|
|
375
|
-
"file": file_path,
|
|
376
|
-
"basename": Path(file_path).name,
|
|
377
|
-
"line": int(line_num)
|
|
378
|
-
})
|
|
379
|
-
|
|
380
|
-
print(f"Ruff found {len(detected_items)} items")
|
|
381
|
-
|
|
382
|
-
elapsed_time = time.time() - start_time
|
|
383
|
-
return {
|
|
384
|
-
"tool": "Ruff",
|
|
385
|
-
"items": detected_items,
|
|
386
|
-
"time": elapsed_time,
|
|
387
|
-
"capabilities": ["import", "function", "variable", "unreachable"]
|
|
388
|
-
}
|
|
389
|
-
except Exception as e:
|
|
390
|
-
print(f"Error running Ruff: {e}")
|
|
391
|
-
import traceback
|
|
392
|
-
traceback.print_exc()
|
|
393
|
-
return {"tool": "Ruff", "items": [], "time": 0, "capabilities": []}
|
|
394
|
-
|
|
395
|
-
def calculate_metrics(detected_items, ground_truth_items):
|
|
396
|
-
"""Calculate metrics using normalized item names and types"""
|
|
397
|
-
detected_set = {(normalize_item(item), item["type"]) for item in detected_items}
|
|
398
|
-
ground_truth_set = {(normalize_item(item), item["type"]) for item in ground_truth_items}
|
|
399
|
-
|
|
400
|
-
true_positives = detected_set.intersection(ground_truth_set)
|
|
401
|
-
false_positives = detected_set - ground_truth_set
|
|
402
|
-
false_negatives = ground_truth_set - detected_set
|
|
403
|
-
|
|
404
|
-
precision = len(true_positives) / max(len(detected_set), 1)
|
|
405
|
-
recall = len(true_positives) / max(len(ground_truth_set), 1)
|
|
406
|
-
f1_score = 2 * precision * recall / max(precision + recall, 1e-10)
|
|
407
|
-
|
|
408
|
-
return {
|
|
409
|
-
"true_positives": len(true_positives),
|
|
410
|
-
"false_positives": len(false_positives),
|
|
411
|
-
"false_negatives": len(false_negatives),
|
|
412
|
-
"precision": precision,
|
|
413
|
-
"recall": recall,
|
|
414
|
-
"f1_score": f1_score
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
def calculate_metrics_by_type(detected_items, ground_truth_items, capabilities):
|
|
418
|
-
metrics_by_type = {}
|
|
419
|
-
|
|
420
|
-
for item_type in capabilities:
|
|
421
|
-
type_detected = [i for i in detected_items if i["type"] == item_type]
|
|
422
|
-
type_ground_truth = [i for i in ground_truth_items if i["type"] == item_type]
|
|
423
|
-
|
|
424
|
-
if type_ground_truth:
|
|
425
|
-
metrics_by_type[item_type] = calculate_metrics(type_detected, type_ground_truth)
|
|
426
|
-
else:
|
|
427
|
-
metrics_by_type[item_type] = {
|
|
428
|
-
"true_positives": 0,
|
|
429
|
-
"false_positives": len(type_detected),
|
|
430
|
-
"false_negatives": 0,
|
|
431
|
-
"precision": 0.0,
|
|
432
|
-
"recall": 0.0,
|
|
433
|
-
"f1_score": 0.0
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
return metrics_by_type
|
|
437
|
-
|
|
438
|
-
def normalize_item(item):
|
|
439
|
-
"""Normalize an item name for better comparison"""
|
|
440
|
-
if isinstance(item, str):
|
|
441
|
-
return item
|
|
442
|
-
|
|
443
|
-
name = item.get("name", "")
|
|
444
|
-
|
|
445
|
-
if item.get("type") == "method" and "." in name:
|
|
446
|
-
parts = name.split(".")
|
|
447
|
-
if len(parts) > 2:
|
|
448
|
-
return ".".join(parts[-2:])
|
|
449
|
-
return name
|
|
450
|
-
|
|
451
|
-
if "." in name:
|
|
452
|
-
return name.split(".")[-1]
|
|
453
|
-
|
|
454
|
-
return name
|
|
455
|
-
|
|
456
|
-
def run_benchmarks(test_dir, output_dir=None):
|
|
457
|
-
|
|
458
|
-
ground_truth_items = load_ground_truth(test_dir)
|
|
459
|
-
|
|
460
|
-
tools = [
|
|
461
|
-
run_skylos_local(test_dir),
|
|
462
|
-
run_vulture(test_dir, 0),
|
|
463
|
-
run_vulture(test_dir, 60),
|
|
464
|
-
run_flake8(test_dir),
|
|
465
|
-
run_pylint(test_dir),
|
|
466
|
-
run_ruff(test_dir),
|
|
467
|
-
]
|
|
468
|
-
|
|
469
|
-
results = []
|
|
470
|
-
metrics_by_type = {}
|
|
471
|
-
|
|
472
|
-
for tool_result in tools:
|
|
473
|
-
tool_name = tool_result["tool"]
|
|
474
|
-
items = tool_result["items"]
|
|
475
|
-
time_taken = tool_result["time"]
|
|
476
|
-
capabilities = tool_result["capabilities"]
|
|
477
|
-
|
|
478
|
-
if tool_result is None:
|
|
479
|
-
print(f"Error: Tool returned None instead of results dictionary")
|
|
480
|
-
continue
|
|
481
|
-
|
|
482
|
-
overall_metrics = calculate_metrics(items, ground_truth_items)
|
|
483
|
-
type_metrics = calculate_metrics_by_type(items, ground_truth_items, capabilities)
|
|
484
|
-
|
|
485
|
-
metrics_by_type[tool_name] = type_metrics
|
|
486
|
-
|
|
487
|
-
result = {
|
|
488
|
-
"tool": tool_name,
|
|
489
|
-
"time": time_taken,
|
|
490
|
-
"item_count": len(items),
|
|
491
|
-
"overall": overall_metrics,
|
|
492
|
-
"by_type": type_metrics,
|
|
493
|
-
"capabilities": capabilities
|
|
494
|
-
}
|
|
495
|
-
results.append(result)
|
|
496
|
-
|
|
497
|
-
print_results(results, ground_truth_items)
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
if output_dir:
|
|
501
|
-
output_dir_path = Path(output_dir)
|
|
502
|
-
output_dir_path.mkdir(exist_ok=True)
|
|
503
|
-
generate_charts(results, ground_truth_items, output_dir_path)
|
|
504
|
-
|
|
505
|
-
return results
|
|
506
|
-
|
|
507
|
-
def print_results(results, ground_truth_items):
|
|
508
|
-
print("\n")
|
|
509
|
-
print(f"{Colors.BOLD}Benchmark Results Summary{Colors.END}")
|
|
510
|
-
print("=" * 80)
|
|
511
|
-
|
|
512
|
-
print(f"{Colors.BOLD}Overall Performance (All Dead Code Types Combined){Colors.END}")
|
|
513
|
-
print("-" * 80)
|
|
514
|
-
print(f"{'Tool':<20} {'Time (s)':<10} {'Items':<8} {'TP':<5} {'FP':<5} {'FN':<5} {'Precision':<10} {'Recall':<10} {'F1 Score':<10}")
|
|
515
|
-
print("-" * 80)
|
|
516
|
-
|
|
517
|
-
for result in results:
|
|
518
|
-
tool = result["tool"]
|
|
519
|
-
time_taken = result["time"]
|
|
520
|
-
item_count = result["item_count"]
|
|
521
|
-
overall = result["overall"]
|
|
522
|
-
|
|
523
|
-
print(f"{tool:<20} {time_taken:<10.3f} {item_count:<8} {overall['true_positives']:<5} "
|
|
524
|
-
f"{overall['false_positives']:<5} {overall['false_negatives']:<5} {overall['precision']:<10.4f} "
|
|
525
|
-
f"{overall['recall']:<10.4f} {overall['f1_score']:<10.4f}")
|
|
526
|
-
|
|
527
|
-
print("\n")
|
|
528
|
-
print(f"{Colors.BOLD}Performance by Dead Code Type (Fair Comparison){Colors.END}")
|
|
529
|
-
|
|
530
|
-
types = set()
|
|
531
|
-
for result in results:
|
|
532
|
-
types.update(result["capabilities"])
|
|
533
|
-
|
|
534
|
-
for dead_code_type in sorted(types):
|
|
535
|
-
type_gt = [i for i in ground_truth_items if i["type"] == dead_code_type]
|
|
536
|
-
if not type_gt:
|
|
537
|
-
continue
|
|
538
|
-
|
|
539
|
-
print(f"\n{Colors.BOLD}Type: {dead_code_type} (Ground Truth: {len(type_gt)} items){Colors.END}")
|
|
540
|
-
print("-" * 80)
|
|
541
|
-
print(f"{'Tool':<20} {'TP':<5} {'FP':<5} {'FN':<5} {'Precision':<10} {'Recall':<10} {'F1 Score':<10}")
|
|
542
|
-
print("-" * 80)
|
|
543
|
-
|
|
544
|
-
for result in results:
|
|
545
|
-
if dead_code_type not in result["capabilities"]:
|
|
546
|
-
continue
|
|
547
|
-
|
|
548
|
-
tool = result["tool"]
|
|
549
|
-
by_type = result["by_type"]
|
|
550
|
-
|
|
551
|
-
if dead_code_type in by_type:
|
|
552
|
-
metrics = by_type[dead_code_type]
|
|
553
|
-
print(f"{tool:<20} {metrics['true_positives']:<5} {metrics['false_positives']:<5} "
|
|
554
|
-
f"{metrics['false_negatives']:<5} {metrics['precision']:<10.4f} "
|
|
555
|
-
f"{metrics['recall']:<10.4f} {metrics['f1_score']:<10.4f}")
|
|
556
|
-
else:
|
|
557
|
-
print(f"{tool:<20} {'N/A':<5} {'N/A':<5} {'N/A':<5} {'N/A':<10} {'N/A':<10} {'N/A':<10}")
|
|
558
|
-
|
|
559
|
-
def generate_charts(results, ground_truth_items, output_dir):
|
|
560
|
-
types = set()
|
|
561
|
-
for result in results:
|
|
562
|
-
types.update(result["capabilities"])
|
|
563
|
-
|
|
564
|
-
data = []
|
|
565
|
-
for dead_code_type in sorted(types):
|
|
566
|
-
type_gt = [i for i in ground_truth_items if i["type"] == dead_code_type]
|
|
567
|
-
if not type_gt:
|
|
568
|
-
continue
|
|
569
|
-
|
|
570
|
-
for result in results:
|
|
571
|
-
if dead_code_type not in result["capabilities"]:
|
|
572
|
-
continue
|
|
573
|
-
|
|
574
|
-
tool = result["tool"]
|
|
575
|
-
by_type = result["by_type"]
|
|
576
|
-
|
|
577
|
-
if dead_code_type in by_type:
|
|
578
|
-
metrics = by_type[dead_code_type]
|
|
579
|
-
data.append({
|
|
580
|
-
"Tool": tool,
|
|
581
|
-
"Type": dead_code_type,
|
|
582
|
-
"Precision": metrics["precision"],
|
|
583
|
-
"Recall": metrics["recall"],
|
|
584
|
-
"F1": metrics["f1_score"],
|
|
585
|
-
"True Positives": metrics["true_positives"],
|
|
586
|
-
"False Positives": metrics["false_positives"],
|
|
587
|
-
"Time": result["time"]
|
|
588
|
-
})
|
|
589
|
-
|
|
590
|
-
df = pd.DataFrame(data)
|
|
591
|
-
|
|
592
|
-
def main():
|
|
593
|
-
parser = argparse.ArgumentParser(description="Fair Dead Code Detection Benchmark")
|
|
594
|
-
parser.add_argument("test_dir", nargs="?", default="cases", help="Directory containing test cases")
|
|
595
|
-
parser.add_argument("--output", "-o", help="Directory to output charts and reports")
|
|
596
|
-
args = parser.parse_args()
|
|
597
|
-
|
|
598
|
-
print(f"{Colors.BOLD}Fair Dead Code Detection Benchmark{Colors.END}")
|
|
599
|
-
print(f"{Colors.BOLD}==============================={Colors.END}")
|
|
600
|
-
|
|
601
|
-
run_benchmarks(args.test_dir, args.output)
|
|
602
|
-
|
|
603
|
-
if __name__ == "__main__":
|
|
604
|
-
main()
|