mcpp 1.2.0__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcpp-1.2.0/src/mcpp.egg-info → mcpp-1.3.1}/PKG-INFO +22 -6
- {mcpp-1.2.0 → mcpp-1.3.1}/README.md +17 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/pyproject.toml +2 -3
- {mcpp-1.2.0 → mcpp-1.3.1}/requirements.txt +3 -3
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/__main__.py +25 -7
- mcpp-1.3.1/src/mcpp/additional.py +45 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/complexity.py +26 -13
- mcpp-1.3.1/src/mcpp/memory.py +52 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/queries.py +40 -4
- mcpp-1.3.1/src/mcpp/smell.py +46 -0
- mcpp-1.3.1/src/mcpp/tree.py +28 -0
- mcpp-1.3.1/src/mcpp/vulnerability.py +361 -0
- {mcpp-1.2.0 → mcpp-1.3.1/src/mcpp.egg-info}/PKG-INFO +22 -6
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp.egg-info/SOURCES.txt +4 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp.egg-info/requires.txt +3 -3
- mcpp-1.2.0/src/mcpp/vulnerability.py +0 -270
- {mcpp-1.2.0 → mcpp-1.3.1}/LICENSE +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/setup.cfg +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/__init__.py +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/assets/__init__.py +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/assets/config.yaml +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/config.py +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp/parse.py +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp.egg-info/dependency_links.txt +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp.egg-info/entry_points.txt +0 -0
- {mcpp-1.2.0 → mcpp-1.3.1}/src/mcpp.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mcpp
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Summary: McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics
|
|
5
5
|
Author-email: Lukas Pirch <lukas.pirch@tu-berlin.de>
|
|
6
6
|
License: MIT License
|
|
@@ -26,18 +26,17 @@ License: MIT License
|
|
|
26
26
|
SOFTWARE.
|
|
27
27
|
|
|
28
28
|
Keywords: vulnerability,code metric,static analysis
|
|
29
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
30
29
|
Classifier: Programming Language :: Python
|
|
31
30
|
Classifier: Programming Language :: Python :: 3
|
|
32
|
-
Requires-Python:
|
|
31
|
+
Requires-Python: <3.14,>=3.9
|
|
33
32
|
Description-Content-Type: text/markdown
|
|
34
33
|
License-File: LICENSE
|
|
35
34
|
Requires-Dist: hydra-core>=1.3.2
|
|
36
|
-
Requires-Dist: tree-sitter>=0.
|
|
35
|
+
Requires-Dist: tree-sitter>=0.24.0
|
|
37
36
|
Requires-Dist: tree-sitter-c>=0.23.0
|
|
38
37
|
Requires-Dist: tree-sitter-cpp>=0.23.0
|
|
39
|
-
Requires-Dist: tqdm>=4.66.
|
|
40
|
-
Requires-Dist: loguru>=0.7.
|
|
38
|
+
Requires-Dist: tqdm>=4.66.0
|
|
39
|
+
Requires-Dist: loguru>=0.7.0
|
|
41
40
|
|
|
42
41
|
# McCabe++ (mcpp)
|
|
43
42
|
|
|
@@ -84,6 +83,23 @@ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
|
|
|
84
83
|
| | V10 | number of if structures without else |
|
|
85
84
|
| | V11 | number of variables involved in control predicates |
|
|
86
85
|
|
|
86
|
+
## Additional Metrics
|
|
87
|
+
|
|
88
|
+
| Dimension | ID | Metric Description |
|
|
89
|
+
|-------------------|----|---------------------------------------------------------|
|
|
90
|
+
| XD: Extra | x1 | number of return statements |
|
|
91
|
+
| | x2 | number of cast expressions |
|
|
92
|
+
| | x3 | number of variable declarations |
|
|
93
|
+
| | x4 | maximum number of operands in an expression |
|
|
94
|
+
| TD: AST Structure | t1 | number of AST nodes (descendants) |
|
|
95
|
+
| | t2 | height of the AST |
|
|
96
|
+
| | t3 | average branching factor of the AST |
|
|
97
|
+
| SD: Code Smells | s1 | number of non-trivial numeric constants (magic numbers) |
|
|
98
|
+
| | s2 | number of goto statements |
|
|
99
|
+
| | s3 | number of function pointers |
|
|
100
|
+
| | s4 | number of function calls without return value usage |
|
|
101
|
+
| MD: Memory Ops | m1 | number of memory allocations (malloc, alloc, new, etc.) |
|
|
102
|
+
| | m2 | number of pointer dereferences (`*`, `[]`, `->`) |
|
|
87
103
|
|
|
88
104
|
|
|
89
105
|
## Setup
|
|
@@ -43,6 +43,23 @@ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
|
|
|
43
43
|
| | V10 | number of if structures without else |
|
|
44
44
|
| | V11 | number of variables involved in control predicates |
|
|
45
45
|
|
|
46
|
+
## Additional Metrics
|
|
47
|
+
|
|
48
|
+
| Dimension | ID | Metric Description |
|
|
49
|
+
|-------------------|----|---------------------------------------------------------|
|
|
50
|
+
| XD: Extra | x1 | number of return statements |
|
|
51
|
+
| | x2 | number of cast expressions |
|
|
52
|
+
| | x3 | number of variable declarations |
|
|
53
|
+
| | x4 | maximum number of operands in an expression |
|
|
54
|
+
| TD: AST Structure | t1 | number of AST nodes (descendants) |
|
|
55
|
+
| | t2 | height of the AST |
|
|
56
|
+
| | t3 | average branching factor of the AST |
|
|
57
|
+
| SD: Code Smells | s1 | number of non-trivial numeric constants (magic numbers) |
|
|
58
|
+
| | s2 | number of goto statements |
|
|
59
|
+
| | s3 | number of function pointers |
|
|
60
|
+
| | s4 | number of function calls without return value usage |
|
|
61
|
+
| MD: Memory Ops | m1 | number of memory allocations (malloc, alloc, new, etc.) |
|
|
62
|
+
| | m2 | number of pointer dereferences (`*`, `[]`, `->`) |
|
|
46
63
|
|
|
47
64
|
|
|
48
65
|
## Setup
|
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mcpp"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.3.1"
|
|
4
4
|
description = "McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{name = "Lukas Pirch", email="lukas.pirch@tu-berlin.de"}]
|
|
7
7
|
license = {file = "LICENSE"}
|
|
8
8
|
classifiers = [
|
|
9
|
-
"License :: OSI Approved :: MIT License",
|
|
10
9
|
"Programming Language :: Python",
|
|
11
10
|
"Programming Language :: Python :: 3",
|
|
12
11
|
]
|
|
13
12
|
keywords = ["vulnerability", "code metric", "static analysis"]
|
|
14
|
-
requires-python = ">=3.9"
|
|
13
|
+
requires-python = ">=3.9,<3.14"
|
|
15
14
|
dynamic = ["dependencies"]
|
|
16
15
|
|
|
17
16
|
[tool.setuptools.dynamic]
|
|
@@ -2,7 +2,7 @@ import json
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import List
|
|
4
4
|
from collections import defaultdict
|
|
5
|
-
from importlib.resources import files
|
|
5
|
+
from importlib.resources import files, as_file
|
|
6
6
|
|
|
7
7
|
import hydra
|
|
8
8
|
from tqdm import tqdm
|
|
@@ -10,9 +10,14 @@ from tqdm import tqdm
|
|
|
10
10
|
from mcpp.config import Config
|
|
11
11
|
from mcpp.parse import Sitter, get_call_names
|
|
12
12
|
from mcpp.complexity import c1, c2, c3_c4
|
|
13
|
-
from mcpp.vulnerability import v1, v2,
|
|
13
|
+
from mcpp.vulnerability import v1, v2, v3_v4_v5, v6_v7, v8, v9, v10, v11
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
from mcpp.additional import x1, x2, x3, x4
|
|
16
|
+
from mcpp.tree import t1, t2, t3
|
|
17
|
+
from mcpp.smell import s1, s2, s3, s4
|
|
18
|
+
from mcpp.memory import m1, m2
|
|
19
|
+
|
|
20
|
+
with as_file(files("mcpp.assets") / "config.yaml") as p:
|
|
16
21
|
config_path = str(p.parent)
|
|
17
22
|
config_name = str(p.name)
|
|
18
23
|
|
|
@@ -24,15 +29,28 @@ METRICS = {
|
|
|
24
29
|
"C4": c3_c4,
|
|
25
30
|
"V1": v1,
|
|
26
31
|
"V2": v2,
|
|
27
|
-
"V3":
|
|
28
|
-
"V4":
|
|
29
|
-
"V5":
|
|
32
|
+
"V3": v3_v4_v5,
|
|
33
|
+
"V4": v3_v4_v5,
|
|
34
|
+
"V5": v3_v4_v5,
|
|
30
35
|
"V6": v6_v7,
|
|
31
36
|
"V7": v6_v7,
|
|
32
37
|
"V8": v8,
|
|
33
38
|
"V9": v9,
|
|
34
39
|
"V10": v10,
|
|
35
|
-
"V11": v11
|
|
40
|
+
"V11": v11,
|
|
41
|
+
"x1": x1,
|
|
42
|
+
"x2": x2,
|
|
43
|
+
"x3": x3,
|
|
44
|
+
"x4": x4,
|
|
45
|
+
"t1": t1,
|
|
46
|
+
"t2": t2,
|
|
47
|
+
"t3": t3,
|
|
48
|
+
"s1": s1,
|
|
49
|
+
"s2": s2,
|
|
50
|
+
"s3": s3,
|
|
51
|
+
"s4": s4,
|
|
52
|
+
"m1": m1,
|
|
53
|
+
"m2": m2,
|
|
36
54
|
}
|
|
37
55
|
|
|
38
56
|
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from mcpp.queries import Q_BINARY_EXPR, Q_IDENTIFIER, Q_NUMBER
|
|
2
|
+
|
|
3
|
+
def x1(root, sitter, lang, calls=None):
|
|
4
|
+
QUERY = "(return_statement) @stmt"
|
|
5
|
+
sitter.add_queries({"Q_RETURN_STMT": QUERY})
|
|
6
|
+
return_statements = sitter.captures("Q_RETURN_STMT", root, lang).get("stmt", [])
|
|
7
|
+
return {
|
|
8
|
+
"x1": len(return_statements)
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
def x2(root, sitter, lang, calls=None):
|
|
12
|
+
QUERY = "(cast_expression) @expr"
|
|
13
|
+
sitter.add_queries({"Q_CAST_EXPR": QUERY})
|
|
14
|
+
cast_exprs = sitter.captures("Q_CAST_EXPR", root, lang).get("expr", [])
|
|
15
|
+
return {
|
|
16
|
+
"x2": len(cast_exprs)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
def x3(root, sitter, lang, calls=None):
|
|
20
|
+
QUERY = "(declaration) @stmt"
|
|
21
|
+
sitter.add_queries({"Q_VAR_DECL": QUERY})
|
|
22
|
+
var_decls = sitter.captures("Q_VAR_DECL", root, lang).get("stmt", [])
|
|
23
|
+
return {
|
|
24
|
+
"x3": len(var_decls)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
def x4(root, sitter, lang, calls=None):
|
|
28
|
+
""" Max # of operands in expression
|
|
29
|
+
"""
|
|
30
|
+
sitter.add_queries({
|
|
31
|
+
"Q_BINARY_EXPR": Q_BINARY_EXPR,
|
|
32
|
+
"Q_IDENTIFIER": Q_IDENTIFIER,
|
|
33
|
+
"Q_NUMBER": Q_NUMBER,
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
num_ops = [0]
|
|
37
|
+
|
|
38
|
+
for expr in sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", []):
|
|
39
|
+
identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
|
|
40
|
+
constants = sitter.captures("Q_NUMBER", expr, lang).get("constant", [])
|
|
41
|
+
num_ops.append(len(identifiers) + len(constants))
|
|
42
|
+
|
|
43
|
+
return {
|
|
44
|
+
"x4": max(num_ops),
|
|
45
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from mcpp.parse import Sitter
|
|
2
2
|
from mcpp.queries import Q_FOR_STMT, Q_DO_STMT, Q_WHILE_STMT, \
|
|
3
|
-
Q_BINARY_EXPR, Q_CONDITION
|
|
3
|
+
Q_FOR_RANGE_STMT, Q_DO_STMT, Q_BINARY_EXPR, Q_CONDITION
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def c1(root, sitter, lang, calls=None):
|
|
@@ -12,15 +12,22 @@ def c1(root, sitter, lang, calls=None):
|
|
|
12
12
|
"Q_CONDITION": Q_CONDITION,
|
|
13
13
|
"Q_FOR_STMT": Q_FOR_STMT,
|
|
14
14
|
"Q_DO_STMT": Q_DO_STMT,
|
|
15
|
-
"Q_WHILE_STMT": Q_WHILE_STMT
|
|
15
|
+
"Q_WHILE_STMT": Q_WHILE_STMT,
|
|
16
16
|
})
|
|
17
17
|
logical_ops = [
|
|
18
|
-
"
|
|
19
|
-
|
|
18
|
+
"&&", "||", "and", "or",
|
|
19
|
+
]
|
|
20
|
+
loop_stmts = [
|
|
21
|
+
"for_statement", "while_statement",
|
|
20
22
|
]
|
|
21
23
|
|
|
22
24
|
complexity = c2(root, sitter, lang, calls)["C2"]
|
|
25
|
+
|
|
26
|
+
# if statements
|
|
23
27
|
for condition in sitter.captures("Q_CONDITION", root, lang).get("condition", []):
|
|
28
|
+
if condition.parent.type in loop_stmts:
|
|
29
|
+
continue
|
|
30
|
+
complexity += 1
|
|
24
31
|
for expr in sitter.captures("Q_BINARY_EXPR", condition, lang).get("expr", []):
|
|
25
32
|
if len(expr.children) != 3:
|
|
26
33
|
continue
|
|
@@ -35,12 +42,15 @@ def c1(root, sitter, lang, calls=None):
|
|
|
35
42
|
|
|
36
43
|
def c2(root, sitter, lang, calls=None):
|
|
37
44
|
"""number of for, while and do-while loops"""
|
|
38
|
-
|
|
45
|
+
loops = {
|
|
39
46
|
"Q_FOR_STMT": Q_FOR_STMT,
|
|
40
|
-
"
|
|
41
|
-
|
|
47
|
+
"Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
|
|
48
|
+
"Q_WHILE_STMT": Q_WHILE_STMT,
|
|
49
|
+
"Q_DO_STMT": Q_DO_STMT,
|
|
50
|
+
}
|
|
51
|
+
sitter.add_queries(loops)
|
|
42
52
|
complexity = 0
|
|
43
|
-
for query in (
|
|
53
|
+
for query in loops.keys():
|
|
44
54
|
complexity += len(sitter.captures(query, root, lang).get("stmt", []))
|
|
45
55
|
return {
|
|
46
56
|
"C2": complexity
|
|
@@ -55,14 +65,16 @@ def c3_c4(root, sitter, lang, calls=None):
|
|
|
55
65
|
- count all loops that have some loop ancestor
|
|
56
66
|
- count ancestors that are also loops
|
|
57
67
|
"""
|
|
58
|
-
|
|
68
|
+
loops = {
|
|
59
69
|
"Q_FOR_STMT": Q_FOR_STMT,
|
|
70
|
+
"Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
|
|
60
71
|
"Q_DO_STMT": Q_DO_STMT,
|
|
61
72
|
"Q_WHILE_STMT": Q_WHILE_STMT
|
|
62
|
-
}
|
|
73
|
+
}
|
|
74
|
+
sitter.add_queries(loops)
|
|
63
75
|
c3_val = 0
|
|
64
76
|
c4_val = 0
|
|
65
|
-
for query in (
|
|
77
|
+
for query in loops.keys():
|
|
66
78
|
for loop_node in sitter.captures(query, root, lang).get("stmt", []):
|
|
67
79
|
nesting_level = _loop_nesting_level(loop_node)
|
|
68
80
|
if nesting_level > 0:
|
|
@@ -78,7 +90,8 @@ def _loop_nesting_level(node):
|
|
|
78
90
|
loop_types = [
|
|
79
91
|
"do_statement",
|
|
80
92
|
"while_statement",
|
|
81
|
-
"for_statement"
|
|
93
|
+
"for_statement",
|
|
94
|
+
"for_range_loop",
|
|
82
95
|
]
|
|
83
96
|
parent = node.parent
|
|
84
97
|
num_loop_ancestors = 0
|
|
@@ -86,4 +99,4 @@ def _loop_nesting_level(node):
|
|
|
86
99
|
if parent.type in loop_types:
|
|
87
100
|
num_loop_ancestors += 1
|
|
88
101
|
parent = parent.parent
|
|
89
|
-
return num_loop_ancestors
|
|
102
|
+
return num_loop_ancestors
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from mcpp.queries import Q_CALL_NAME, Q_NEW_EXPRESSION, Q_SUBSCRIPT_EXPR, Q_FIELD_EXPR
|
|
2
|
+
|
|
3
|
+
def m1(root, sitter, lang, calls=None):
|
|
4
|
+
""" # memory allocations
|
|
5
|
+
|
|
6
|
+
Capture libc memory allocations as well as potential wrappers or individual alloctors.
|
|
7
|
+
"""
|
|
8
|
+
sitter.add_queries({
|
|
9
|
+
"Q_CALL_NAME": Q_CALL_NAME,
|
|
10
|
+
"Q_NEW_EXPRESSION": Q_NEW_EXPRESSION,
|
|
11
|
+
})
|
|
12
|
+
|
|
13
|
+
num_allocations = 0
|
|
14
|
+
|
|
15
|
+
# Number of calls to allocation functions
|
|
16
|
+
for name in sitter.captures("Q_CALL_NAME", root, lang).get("name", []):
|
|
17
|
+
if "alloc" in name.text.decode("utf-8").lower():
|
|
18
|
+
num_allocations += 1
|
|
19
|
+
|
|
20
|
+
# Number of new object instantiations
|
|
21
|
+
num_new_expressions = len(sitter.captures("Q_NEW_EXPRESSION", root, lang).get("expr", []))
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
"m1": num_allocations + num_new_expressions,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
def m2(root, sitter, lang, calls=None):
|
|
28
|
+
""" # ptr dereferences
|
|
29
|
+
"""
|
|
30
|
+
sitter.add_queries({
|
|
31
|
+
"Q_CALL_NAME": Q_CALL_NAME,
|
|
32
|
+
"Q_NEW_EXPRESSION": Q_NEW_EXPRESSION,
|
|
33
|
+
"Q_SUBSCRIPT_EXPR": Q_SUBSCRIPT_EXPR,
|
|
34
|
+
"Q_FIELD_EXPR": Q_FIELD_EXPR,
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
num_ptr_expressions = 0
|
|
38
|
+
|
|
39
|
+
# Number of pointer dereferences using the asterisk syntax (*)
|
|
40
|
+
for ptr in sitter.captures("Q_POINTER_EXPR", root, lang).get("pointer", []):
|
|
41
|
+
if ptr.text.decode("utf-8").startswith("*"):
|
|
42
|
+
num_ptr_expressions += 1
|
|
43
|
+
|
|
44
|
+
# Number of pointer dereferences using the subscript syntax ([])
|
|
45
|
+
num_subscript_expressions = len(sitter.captures("Q_SUBSCRIPT_EXPR", root, lang).get("expr", []))
|
|
46
|
+
|
|
47
|
+
# Number of pointer dereferences using the field expression syntax (ptr->field)
|
|
48
|
+
num_field_expressions = len(sitter.captures("Q_FIELD_EXPR", root, lang).get("expr", []))
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
"m2": num_ptr_expressions + num_subscript_expressions + num_field_expressions,
|
|
52
|
+
}
|
|
@@ -6,6 +6,10 @@ Q_FOR_STMT = """
|
|
|
6
6
|
(for_statement) @stmt
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
Q_FOR_RANGE_STMT = """
|
|
10
|
+
(for_range_loop) @stmt
|
|
11
|
+
"""
|
|
12
|
+
|
|
9
13
|
Q_DO_STMT = """
|
|
10
14
|
(do_statement) @stmt
|
|
11
15
|
"""
|
|
@@ -15,14 +19,13 @@ Q_WHILE_STMT = """
|
|
|
15
19
|
"""
|
|
16
20
|
|
|
17
21
|
Q_IF_STMT = """
|
|
18
|
-
(if_statement) @
|
|
22
|
+
(if_statement) @stmt
|
|
19
23
|
"""
|
|
20
24
|
|
|
21
25
|
Q_SWITCH_STMT = """
|
|
22
26
|
(switch_statement) @stmt
|
|
23
27
|
"""
|
|
24
28
|
|
|
25
|
-
|
|
26
29
|
Q_CONDITION = """
|
|
27
30
|
(_
|
|
28
31
|
condition: ((_) @condition)
|
|
@@ -33,6 +36,18 @@ Q_BINARY_EXPR = """
|
|
|
33
36
|
(binary_expression) @expr
|
|
34
37
|
"""
|
|
35
38
|
|
|
39
|
+
Q_UPDATE_EXPR = """
|
|
40
|
+
(update_expression) @expr
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
Q_SUBSCRIPT_EXPR = """
|
|
44
|
+
(subscript_expression) @expr
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
Q_FIELD_EXPR = """
|
|
48
|
+
(field_expression) @expr
|
|
49
|
+
"""
|
|
50
|
+
|
|
36
51
|
Q_CALL_NAME = """
|
|
37
52
|
(call_expression
|
|
38
53
|
function: ((identifier) @name)
|
|
@@ -49,7 +64,19 @@ Q_IDENTIFIER = """
|
|
|
49
64
|
(identifier) @variable
|
|
50
65
|
"""
|
|
51
66
|
|
|
52
|
-
|
|
67
|
+
Q_NUMBER = """
|
|
68
|
+
(number_literal) @constant
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
#Q_FUNCTION_PARAMETER = """
|
|
72
|
+
#(parameter_declaration) @param
|
|
73
|
+
#"""
|
|
74
|
+
|
|
75
|
+
Q_FUNCTION = """
|
|
76
|
+
(function_definition) @function
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
Q_PARAMETER = """
|
|
53
80
|
(parameter_declaration) @param
|
|
54
81
|
"""
|
|
55
82
|
|
|
@@ -57,6 +84,11 @@ Q_POINTER_EXPR = """
|
|
|
57
84
|
(pointer_expression) @pointer
|
|
58
85
|
"""
|
|
59
86
|
|
|
87
|
+
Q_POINTER_IDENTIFIER = """
|
|
88
|
+
(pointer_declarator
|
|
89
|
+
(identifier) @identifier)
|
|
90
|
+
"""
|
|
91
|
+
|
|
60
92
|
Q_ASSIGNMENT_EXPR = """
|
|
61
93
|
(assignment_expression) @expr
|
|
62
94
|
"""
|
|
@@ -67,4 +99,8 @@ Q_IF_WITHOUT_ELSE = """
|
|
|
67
99
|
consequence: ((_) @then)
|
|
68
100
|
!alternative
|
|
69
101
|
) @stmt
|
|
70
|
-
"""
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
Q_NEW_EXPRESSION = """
|
|
105
|
+
(new_expression) @expr
|
|
106
|
+
"""
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
def s1(root, sitter, lang, calls=None):
|
|
2
|
+
QUERY = "(number_literal) @num"
|
|
3
|
+
sitter.add_queries({"Q_NUMBER_LITERAL": QUERY})
|
|
4
|
+
number_literals = sitter.captures("Q_NUMBER_LITERAL", root, lang).get("num", [])
|
|
5
|
+
number_literals = [node.text.decode("utf8") for node in number_literals]
|
|
6
|
+
def parse_int(s):
|
|
7
|
+
try:
|
|
8
|
+
return int(s, 0)
|
|
9
|
+
except:
|
|
10
|
+
return None
|
|
11
|
+
number_literals = [parse_int(s) for s in number_literals]
|
|
12
|
+
# only non-trivial constants
|
|
13
|
+
number_literals = [x for x in number_literals if x is not None and x not in [-1, 0, 1]]
|
|
14
|
+
return {
|
|
15
|
+
"s1": len(number_literals)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def s2(root, sitter, lang, calls=None):
|
|
20
|
+
QUERY = "(goto_statement) @stmt"
|
|
21
|
+
sitter.add_queries({"Q_GOTO_STMT": QUERY})
|
|
22
|
+
goto_statements = sitter.captures("Q_GOTO_STMT", root, lang).get("stmt", [])
|
|
23
|
+
return {
|
|
24
|
+
"s2": len(goto_statements)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def s3(root, sitter, lang, calls=None):
|
|
29
|
+
QUERY = "(declaration (init_declarator (function_declarator) @decl))"
|
|
30
|
+
sitter.add_queries({"Q_FUNCTION_POINTERS": QUERY})
|
|
31
|
+
QUERY = "(parameter_declaration (function_declarator) @decl)"
|
|
32
|
+
sitter.add_queries({"Q_FUNCTION_POINTER_PARAMS": QUERY})
|
|
33
|
+
function_pointers = sitter.captures("Q_FUNCTION_POINTERS", root, lang).get("decl", [])
|
|
34
|
+
function_pointers += sitter.captures("Q_FUNCTION_POINTER_PARAMS", root, lang).get("decl", [])
|
|
35
|
+
return {
|
|
36
|
+
"s3": len(function_pointers)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def s4(root, sitter, lang, calls=None):
|
|
41
|
+
QUERY = "(expression_statement (call_expression) @expr)"
|
|
42
|
+
sitter.add_queries({"Q_CALLS_WO_RETURN": QUERY})
|
|
43
|
+
functions_wo_return = sitter.captures("Q_CALLS_WO_RETURN", root, lang).get("expr", [])
|
|
44
|
+
return {
|
|
45
|
+
"s4": len(functions_wo_return)
|
|
46
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
def t1(root, sitter, lang, calls=None):
|
|
2
|
+
def num_descendants(node):
|
|
3
|
+
return 1 + sum(map(num_descendants, node.children))
|
|
4
|
+
|
|
5
|
+
return {
|
|
6
|
+
"t1": num_descendants(root)
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def t2(root, sitter, lang, calls=None):
|
|
11
|
+
def height(node):
|
|
12
|
+
if len(node.children) == 0:
|
|
13
|
+
return 1
|
|
14
|
+
return 1 + max(map(height, node.children))
|
|
15
|
+
return {
|
|
16
|
+
"t2": height(root)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def t3(root, sitter, lang, calls=None):
|
|
21
|
+
def get_child_nums(node):
|
|
22
|
+
if len(node.children) == 0:
|
|
23
|
+
return []
|
|
24
|
+
return [len(node.children)] + sum(map(get_child_nums, node.children), start=[])
|
|
25
|
+
child_nums = get_child_nums(root)
|
|
26
|
+
return {
|
|
27
|
+
"t3": sum(child_nums) / len(child_nums)
|
|
28
|
+
}
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
import threading
|
|
3
|
+
import itertools as it
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
from mcpp.parse import Sitter, get_identifiers
|
|
7
|
+
from mcpp.queries import Q_ARGLIST, Q_IDENTIFIER, Q_FUNCTION, Q_PARAMETER, \
|
|
8
|
+
Q_POINTER_EXPR, Q_ASSIGNMENT_EXPR, Q_BINARY_EXPR, Q_UPDATE_EXPR, Q_SUBSCRIPT_EXPR, \
|
|
9
|
+
Q_FIELD_EXPR, Q_CALL_NAME, Q_IF_STMT, Q_SWITCH_STMT, Q_DO_STMT, Q_WHILE_STMT, \
|
|
10
|
+
Q_FOR_STMT, Q_FOR_RANGE_STMT, Q_CONDITION, Q_IF_WITHOUT_ELSE, Q_POINTER_IDENTIFIER
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def v1(root, sitter, lang, calls=None):
|
|
14
|
+
"""
|
|
15
|
+
V1: number of parameter variables
|
|
16
|
+
"""
|
|
17
|
+
sitter.add_queries({
|
|
18
|
+
"Q_FUNCTION": Q_FUNCTION,
|
|
19
|
+
"Q_PARAMETER": Q_PARAMETER,
|
|
20
|
+
})
|
|
21
|
+
functions = sitter.captures("Q_FUNCTION", root, lang).get("function", [])
|
|
22
|
+
if len(functions) == 0:
|
|
23
|
+
return {"V1": 0}
|
|
24
|
+
function = functions[0]
|
|
25
|
+
params = sitter.captures("Q_PARAMETER", function, lang).get("param", [])
|
|
26
|
+
return {
|
|
27
|
+
"V1": len(params)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def v2(root, sitter, lang, calls=None):
|
|
32
|
+
"""
|
|
33
|
+
V2: number of variables as parameters for callee functions
|
|
34
|
+
"""
|
|
35
|
+
sitter.add_queries({
|
|
36
|
+
"Q_ARGLIST": Q_ARGLIST
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
vars_in_calls = []
|
|
40
|
+
for arg_list in sitter.captures("Q_ARGLIST", root, lang).get("args", []):
|
|
41
|
+
variables = get_identifiers(sitter, arg_list, lang, filter=calls)
|
|
42
|
+
vars_in_calls.extend(variables)
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
"V2": len(set(vars_in_calls))
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def v3_v4_v5(root, sitter, lang, calls=None):
|
|
50
|
+
"""
|
|
51
|
+
V3: number of pointer arithmetic
|
|
52
|
+
V4: number of variables involved in pointer arithmetics
|
|
53
|
+
V5: max pointer arithmetic a variable is involved in
|
|
54
|
+
"""
|
|
55
|
+
sitter.add_queries({
|
|
56
|
+
"Q_BINARY_EXPR": Q_BINARY_EXPR,
|
|
57
|
+
"Q_UPDATE_EXPR": Q_UPDATE_EXPR,
|
|
58
|
+
"Q_SUBSCRIPT_EXPR": Q_SUBSCRIPT_EXPR,
|
|
59
|
+
"Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
|
|
60
|
+
"Q_POINTER_EXPR": Q_POINTER_EXPR,
|
|
61
|
+
"Q_FIELD_EXPR": Q_FIELD_EXPR,
|
|
62
|
+
"Q_IDENTIFIER": Q_IDENTIFIER,
|
|
63
|
+
"Q_POINTER_IDENTIFIER": Q_POINTER_IDENTIFIER,
|
|
64
|
+
})
|
|
65
|
+
assignment_operators = [
|
|
66
|
+
"+=", "-=", "*=", "/=", "|=", "&=", "^=", "<<=", ">>=", "%="
|
|
67
|
+
]
|
|
68
|
+
pointer_operators = ["*"]
|
|
69
|
+
|
|
70
|
+
# Get a list of all pointer identifiers
|
|
71
|
+
ptr_identifiers = sitter.captures("Q_POINTER_IDENTIFIER", root, lang).get("identifier", [])
|
|
72
|
+
ptr_identifier_names = set(ptr_identifier.text.decode() for ptr_identifier in ptr_identifiers)
|
|
73
|
+
|
|
74
|
+
# Get a list of all identifiers involved in update expressions
|
|
75
|
+
update_exprs = sitter.captures("Q_UPDATE_EXPR", root, lang).get("expr", [])
|
|
76
|
+
binary_exprs = sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", [])
|
|
77
|
+
|
|
78
|
+
# Calculate the number of pointer aithmetic
|
|
79
|
+
v3_pointer_arith = 0
|
|
80
|
+
v4_pointer_airth_identifiers = []
|
|
81
|
+
pointer_arith_per_identifier = defaultdict(lambda: 0)
|
|
82
|
+
|
|
83
|
+
# No. of update and binary expressions with pointers involved
|
|
84
|
+
for expr in it.chain(update_exprs, binary_exprs):
|
|
85
|
+
identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
|
|
86
|
+
identifier_names = set(identifier.text.decode() for identifier in identifiers)
|
|
87
|
+
if len(identifier_names & ptr_identifier_names) > 0:
|
|
88
|
+
v3_pointer_arith += 1
|
|
89
|
+
v4_pointer_airth_identifiers += identifiers
|
|
90
|
+
for identifier_name in identifier_names:
|
|
91
|
+
pointer_arith_per_identifier[identifier_name] += 1
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# No. of subscription expressions
|
|
95
|
+
subscript_exprs = sitter.captures("Q_SUBSCRIPT_EXPR", root, lang).get("expr", [])
|
|
96
|
+
v3_pointer_arith += len(subscript_exprs)
|
|
97
|
+
for expr in subscript_exprs:
|
|
98
|
+
identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
|
|
99
|
+
identifier_names = set(identifier.text.decode() for identifier in identifiers)
|
|
100
|
+
for identifier_name in identifier_names:
|
|
101
|
+
pointer_arith_per_identifier[identifier_name] += 1
|
|
102
|
+
v4_pointer_airth_identifiers += identifiers
|
|
103
|
+
|
|
104
|
+
# No. of assignment expression where the left hand side is a pointer
|
|
105
|
+
assignment_exprs = sitter.captures("Q_ASSIGNMENT_EXPR", root, lang).get("expr", [])
|
|
106
|
+
for expr in assignment_exprs:
|
|
107
|
+
if expr.child_by_field_name("operator").text.decode() not in assignment_operators:
|
|
108
|
+
continue
|
|
109
|
+
identifiers = sitter.captures("Q_IDENTIFIER", expr.child_by_field_name("left"), lang).get("variable", [])
|
|
110
|
+
identifier_names = set(identifier.text.decode() for identifier in identifiers)
|
|
111
|
+
if len(identifier_names & ptr_identifier_names) > 0:
|
|
112
|
+
v3_pointer_arith += 1
|
|
113
|
+
right_hand_side_identifiers = sitter.captures("Q_IDENTIFIER", expr.child_by_field_name("right"), lang).get("variable", [])
|
|
114
|
+
right_hand_side_identifier_names = set(node.text.decode() for node in right_hand_side_identifiers)
|
|
115
|
+
v4_pointer_airth_identifiers += identifiers
|
|
116
|
+
v4_pointer_airth_identifiers += right_hand_side_identifiers
|
|
117
|
+
for identifier_name in identifier_names.union(right_hand_side_identifier_names):
|
|
118
|
+
pointer_arith_per_identifier[identifier_name] += 1
|
|
119
|
+
|
|
120
|
+
# No. of pointer dereferences with the *ptr syntax
|
|
121
|
+
pointer_exprs = sitter.captures("Q_POINTER_EXPR", root, lang).get("pointer", [])
|
|
122
|
+
for expr in pointer_exprs:
|
|
123
|
+
if expr.child_by_field_name("operator").text.decode() not in pointer_operators:
|
|
124
|
+
continue
|
|
125
|
+
identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
|
|
126
|
+
identifier_names = set(identifier.text.decode() for identifier in identifiers)
|
|
127
|
+
v3_pointer_arith += 1
|
|
128
|
+
v4_pointer_airth_identifiers += identifiers
|
|
129
|
+
for identifier_name in identifier_names:
|
|
130
|
+
pointer_arith_per_identifier[identifier_name] += 1
|
|
131
|
+
|
|
132
|
+
# No. of field expressions (ptr->field)
|
|
133
|
+
field_exprs = sitter.captures("Q_FIELD_EXPR", root, lang).get("expr", [])
|
|
134
|
+
v3_pointer_arith += len(field_exprs)
|
|
135
|
+
for expr in field_exprs:
|
|
136
|
+
identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
|
|
137
|
+
identifier_names = set(identifier.text.decode() for identifier in identifiers)
|
|
138
|
+
for identifier_name in identifier_names:
|
|
139
|
+
pointer_arith_per_identifier[identifier_name] += 1
|
|
140
|
+
|
|
141
|
+
# Calculate V5
|
|
142
|
+
max_pointer_arith_identifier = max(pointer_arith_per_identifier, key=pointer_arith_per_identifier.get, default=None)
|
|
143
|
+
if max_pointer_arith_identifier != None:
|
|
144
|
+
v5_max_pointer_arith_var = pointer_arith_per_identifier[max_pointer_arith_identifier]
|
|
145
|
+
else:
|
|
146
|
+
v5_max_pointer_arith_var = 0
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
"V3": v3_pointer_arith,
|
|
150
|
+
"V4": len(set(v4_pointer_airth_identifiers)),
|
|
151
|
+
"V5": v5_max_pointer_arith_var,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def v5(root, sitter, lang, calls=None):
|
|
156
|
+
"""
|
|
157
|
+
V5: maximum number of pointer arithmetic operations a variable is involved in
|
|
158
|
+
"""
|
|
159
|
+
sitter.add_queries({
|
|
160
|
+
"Q_BINARY_EXPR": Q_BINARY_EXPR,
|
|
161
|
+
"Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
|
|
162
|
+
"Q_CALL_NAME": Q_CALL_NAME
|
|
163
|
+
})
|
|
164
|
+
arith_ops = [
|
|
165
|
+
"+", "++", "+=",
|
|
166
|
+
"-", "--", "-=",
|
|
167
|
+
"*", "*=",
|
|
168
|
+
"/", "/="
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
var_count = Counter()
|
|
172
|
+
candidates = sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", []) + sitter.captures("Q_ASSIGNMENT_EXPR", root, lang).get("expr", [])
|
|
173
|
+
for node in candidates:
|
|
174
|
+
if len(node.children) != 3:
|
|
175
|
+
continue
|
|
176
|
+
op_text = node.children[1].text.decode()
|
|
177
|
+
if any(arith in op_text for arith in arith_ops):
|
|
178
|
+
variables = get_identifiers(sitter, node, lang, filter=calls)
|
|
179
|
+
var_count.update(variables)
|
|
180
|
+
if len(var_count) > 0:
|
|
181
|
+
max_count = var_count.most_common(1)[0][1]
|
|
182
|
+
else:
|
|
183
|
+
max_count = 0
|
|
184
|
+
return {
|
|
185
|
+
"V5": max_count
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def v6_v7(root, sitter, lang, calls=None):
|
|
190
|
+
"""
|
|
191
|
+
V6: number of nested control structures
|
|
192
|
+
V7: maximum level of control nesting
|
|
193
|
+
"""
|
|
194
|
+
queries = {
|
|
195
|
+
"Q_IF_STMT": Q_IF_STMT,
|
|
196
|
+
"Q_SWITCH_STMT": Q_SWITCH_STMT,
|
|
197
|
+
"Q_DO_STMT": Q_DO_STMT,
|
|
198
|
+
"Q_WHILE_STMT": Q_WHILE_STMT,
|
|
199
|
+
"Q_FOR_STMT": Q_FOR_STMT,
|
|
200
|
+
"Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
|
|
201
|
+
}
|
|
202
|
+
sitter.add_queries(queries)
|
|
203
|
+
|
|
204
|
+
nested_controls = []
|
|
205
|
+
max_nesting_level = 0
|
|
206
|
+
for q in queries.keys():
|
|
207
|
+
for node in sitter.captures(q, root, lang).get("stmt", []):
|
|
208
|
+
nesting_level = _control_nesting_level(node)
|
|
209
|
+
if nesting_level > 0:
|
|
210
|
+
nested_controls.append(node)
|
|
211
|
+
max_nesting_level = max(max_nesting_level, nesting_level)
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
"V6": len(nested_controls),
|
|
215
|
+
"V7": max_nesting_level
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _control_nesting_level(node):
|
|
220
|
+
control_types = [
|
|
221
|
+
"if_statement",
|
|
222
|
+
"switch_statement",
|
|
223
|
+
"do_statement",
|
|
224
|
+
"while_statement",
|
|
225
|
+
"for_statement",
|
|
226
|
+
"for_range_loop",
|
|
227
|
+
]
|
|
228
|
+
parent = node.parent
|
|
229
|
+
num_control_ancestors = 0
|
|
230
|
+
while parent is not None:
|
|
231
|
+
if parent.type in control_types:
|
|
232
|
+
num_control_ancestors += 1
|
|
233
|
+
parent = parent.parent
|
|
234
|
+
return num_control_ancestors
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def v8(root, sitter, lang, calls=None):
|
|
238
|
+
"""
|
|
239
|
+
V8: maximum number of control-dependent control structures
|
|
240
|
+
"""
|
|
241
|
+
queries = {
|
|
242
|
+
"Q_IF_STMT": Q_IF_STMT,
|
|
243
|
+
"Q_SWITCH_STMT": Q_SWITCH_STMT,
|
|
244
|
+
"Q_DO_STMT": Q_DO_STMT,
|
|
245
|
+
"Q_WHILE_STMT": Q_WHILE_STMT,
|
|
246
|
+
"Q_FOR_STMT": Q_FOR_STMT,
|
|
247
|
+
"Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
|
|
248
|
+
#"Q_CONDITION": Q_CONDITION,
|
|
249
|
+
}
|
|
250
|
+
sitter.add_queries(queries)
|
|
251
|
+
|
|
252
|
+
# count dependent controls under another control: key = start_byte of parent in function
|
|
253
|
+
control_dependent_controls = Counter()
|
|
254
|
+
threads = []
|
|
255
|
+
thread_lock = threading.Lock()
|
|
256
|
+
for q in queries.keys():
|
|
257
|
+
t = threading.Thread(target=_v8_single_query,
|
|
258
|
+
args=(root, sitter, lang, calls, q,
|
|
259
|
+
control_dependent_controls, thread_lock))
|
|
260
|
+
t.start()
|
|
261
|
+
threads.append(t)
|
|
262
|
+
for t in threads:
|
|
263
|
+
t.join()
|
|
264
|
+
|
|
265
|
+
v8_val = max([0] + list(control_dependent_controls.values()))
|
|
266
|
+
|
|
267
|
+
return {
|
|
268
|
+
"V8": 0 if v8_val == 0 else v8_val + 1,
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _v8_single_query(root, sitter, lang, calls, query, control_dependent_controls, thread_lock):
|
|
273
|
+
tag = "condition" if "Q_CONDITION" in query else "stmt"
|
|
274
|
+
for node in sitter.captures(query, root, lang).get(tag, []):
|
|
275
|
+
parents = _traverse_parent_controls(node)
|
|
276
|
+
if len(parents) > 0:
|
|
277
|
+
with thread_lock:
|
|
278
|
+
control_dependent_controls[parents[-1].start_byte] += 1
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def _traverse_parent_controls(node):
|
|
282
|
+
""" Climb up the AST and emit all control nodes. """
|
|
283
|
+
control_types = [
|
|
284
|
+
"if_statement",
|
|
285
|
+
"switch_statement",
|
|
286
|
+
"do_statement",
|
|
287
|
+
"while_statement",
|
|
288
|
+
"for_statement",
|
|
289
|
+
"for_range_loop",
|
|
290
|
+
]
|
|
291
|
+
parent_controls = []
|
|
292
|
+
parent = node.parent
|
|
293
|
+
while parent is not None:
|
|
294
|
+
if parent.type in control_types:
|
|
295
|
+
parent_controls.append(parent)
|
|
296
|
+
parent = parent.parent
|
|
297
|
+
return parent_controls
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def v9(root, sitter, lang, calls=None):
|
|
301
|
+
"""
|
|
302
|
+
V9: maximum number of data-dependent control structures
|
|
303
|
+
"""
|
|
304
|
+
sitter.add_queries({
|
|
305
|
+
"Q_IDENTIFIER": Q_IDENTIFIER,
|
|
306
|
+
"Q_CONDITION": Q_CONDITION,
|
|
307
|
+
})
|
|
308
|
+
|
|
309
|
+
# Count the number of depend control structures for each identifier
|
|
310
|
+
dependend_ctrl_structures_count = defaultdict(lambda: 0)
|
|
311
|
+
|
|
312
|
+
conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
|
|
313
|
+
for condition in conditions:
|
|
314
|
+
identifiers = sitter.captures("Q_IDENTIFIER", condition, lang).get("variable", [])
|
|
315
|
+
identifier_names = set(identifier.text.decode() for identifier in identifiers)
|
|
316
|
+
|
|
317
|
+
for identifier_name in identifier_names:
|
|
318
|
+
dependend_ctrl_structures_count[identifier_name] += 1
|
|
319
|
+
|
|
320
|
+
if len(dependend_ctrl_structures_count) > 0:
|
|
321
|
+
max_key = max(dependend_ctrl_structures_count, key=dependend_ctrl_structures_count.get)
|
|
322
|
+
max_val = dependend_ctrl_structures_count[max_key]
|
|
323
|
+
else:
|
|
324
|
+
max_val = 0
|
|
325
|
+
|
|
326
|
+
return {
|
|
327
|
+
"V9": max_val,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def v10(root, sitter, lang, calls=None):
|
|
332
|
+
"""
|
|
333
|
+
V10: number of if statements without else
|
|
334
|
+
"""
|
|
335
|
+
sitter.add_queries({
|
|
336
|
+
"Q_IF_WITHOUT_ELSE": Q_IF_WITHOUT_ELSE
|
|
337
|
+
})
|
|
338
|
+
|
|
339
|
+
if_without_else = sitter.captures("Q_IF_WITHOUT_ELSE", root, lang).get("stmt", [])
|
|
340
|
+
return {
|
|
341
|
+
"V10": len(if_without_else)
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
def v11(root, sitter, lang, calls=None):
|
|
346
|
+
"""
|
|
347
|
+
V11: number of variables in control structures (in each predicate)
|
|
348
|
+
"""
|
|
349
|
+
sitter.add_queries({
|
|
350
|
+
"Q_CONDITION": Q_CONDITION
|
|
351
|
+
})
|
|
352
|
+
|
|
353
|
+
num_controlled_vars = 0
|
|
354
|
+
conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
|
|
355
|
+
identifiers = set()
|
|
356
|
+
for condition in conditions:
|
|
357
|
+
identifiers |= set(get_identifiers(sitter, condition, lang, filter=calls))
|
|
358
|
+
|
|
359
|
+
return {
|
|
360
|
+
"V11": len(identifiers),
|
|
361
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mcpp
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Summary: McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics
|
|
5
5
|
Author-email: Lukas Pirch <lukas.pirch@tu-berlin.de>
|
|
6
6
|
License: MIT License
|
|
@@ -26,18 +26,17 @@ License: MIT License
|
|
|
26
26
|
SOFTWARE.
|
|
27
27
|
|
|
28
28
|
Keywords: vulnerability,code metric,static analysis
|
|
29
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
30
29
|
Classifier: Programming Language :: Python
|
|
31
30
|
Classifier: Programming Language :: Python :: 3
|
|
32
|
-
Requires-Python:
|
|
31
|
+
Requires-Python: <3.14,>=3.9
|
|
33
32
|
Description-Content-Type: text/markdown
|
|
34
33
|
License-File: LICENSE
|
|
35
34
|
Requires-Dist: hydra-core>=1.3.2
|
|
36
|
-
Requires-Dist: tree-sitter>=0.
|
|
35
|
+
Requires-Dist: tree-sitter>=0.24.0
|
|
37
36
|
Requires-Dist: tree-sitter-c>=0.23.0
|
|
38
37
|
Requires-Dist: tree-sitter-cpp>=0.23.0
|
|
39
|
-
Requires-Dist: tqdm>=4.66.
|
|
40
|
-
Requires-Dist: loguru>=0.7.
|
|
38
|
+
Requires-Dist: tqdm>=4.66.0
|
|
39
|
+
Requires-Dist: loguru>=0.7.0
|
|
41
40
|
|
|
42
41
|
# McCabe++ (mcpp)
|
|
43
42
|
|
|
@@ -84,6 +83,23 @@ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
|
|
|
84
83
|
| | V10 | number of if structures without else |
|
|
85
84
|
| | V11 | number of variables involved in control predicates |
|
|
86
85
|
|
|
86
|
+
## Additional Metrics
|
|
87
|
+
|
|
88
|
+
| Dimension | ID | Metric Description |
|
|
89
|
+
|-------------------|----|---------------------------------------------------------|
|
|
90
|
+
| XD: Extra | x1 | number of return statements |
|
|
91
|
+
| | x2 | number of cast expressions |
|
|
92
|
+
| | x3 | number of variable declarations |
|
|
93
|
+
| | x4 | maximum number of operands in an expression |
|
|
94
|
+
| TD: AST Structure | t1 | number of AST nodes (descendants) |
|
|
95
|
+
| | t2 | height of the AST |
|
|
96
|
+
| | t3 | average branching factor of the AST |
|
|
97
|
+
| SD: Code Smells | s1 | number of non-trivial numeric constants (magic numbers) |
|
|
98
|
+
| | s2 | number of goto statements |
|
|
99
|
+
| | s3 | number of function pointers |
|
|
100
|
+
| | s4 | number of function calls without return value usage |
|
|
101
|
+
| MD: Memory Ops | m1 | number of memory allocations (malloc, alloc, new, etc.) |
|
|
102
|
+
| | m2 | number of pointer dereferences (`*`, `[]`, `->`) |
|
|
87
103
|
|
|
88
104
|
|
|
89
105
|
## Setup
|
|
@@ -4,10 +4,14 @@ pyproject.toml
|
|
|
4
4
|
requirements.txt
|
|
5
5
|
src/mcpp/__init__.py
|
|
6
6
|
src/mcpp/__main__.py
|
|
7
|
+
src/mcpp/additional.py
|
|
7
8
|
src/mcpp/complexity.py
|
|
8
9
|
src/mcpp/config.py
|
|
10
|
+
src/mcpp/memory.py
|
|
9
11
|
src/mcpp/parse.py
|
|
10
12
|
src/mcpp/queries.py
|
|
13
|
+
src/mcpp/smell.py
|
|
14
|
+
src/mcpp/tree.py
|
|
11
15
|
src/mcpp/vulnerability.py
|
|
12
16
|
src/mcpp.egg-info/PKG-INFO
|
|
13
17
|
src/mcpp.egg-info/SOURCES.txt
|
|
@@ -1,270 +0,0 @@
|
|
|
1
|
-
from collections import Counter
|
|
2
|
-
import threading
|
|
3
|
-
|
|
4
|
-
from mcpp.parse import Sitter, get_identifiers
|
|
5
|
-
from mcpp.queries import Q_ARGLIST, Q_IDENTIFIER, Q_FUNCTION_PARAMETER, \
|
|
6
|
-
Q_POINTER_EXPR, Q_ASSIGNMENT_EXPR, Q_BINARY_EXPR, Q_CALL_NAME, \
|
|
7
|
-
Q_IF_STMT, Q_SWITCH_STMT, Q_DO_STMT, Q_WHILE_STMT, Q_FOR_STMT, Q_CONDITION, \
|
|
8
|
-
Q_IF_WITHOUT_ELSE
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def v1(root, sitter, lang, calls=None):
|
|
12
|
-
"""
|
|
13
|
-
V1: number of variables as parameters for callee functions
|
|
14
|
-
"""
|
|
15
|
-
sitter.add_queries({
|
|
16
|
-
"Q_ARGLIST": Q_ARGLIST
|
|
17
|
-
})
|
|
18
|
-
|
|
19
|
-
vars_in_calls = []
|
|
20
|
-
for arg_list in sitter.captures("Q_ARGLIST", root, lang).get("args", []):
|
|
21
|
-
variables = get_identifiers(sitter, arg_list, lang, filter=calls)
|
|
22
|
-
vars_in_calls.extend(variables)
|
|
23
|
-
|
|
24
|
-
return {
|
|
25
|
-
"V1": len(vars_in_calls)
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def v2(root, sitter, lang, calls=None):
|
|
30
|
-
"""
|
|
31
|
-
V2: number of variables as parameters for callee functions
|
|
32
|
-
"""
|
|
33
|
-
sitter.add_queries({
|
|
34
|
-
"Q_FUNCTION_PARAMETER": Q_FUNCTION_PARAMETER
|
|
35
|
-
})
|
|
36
|
-
params = sitter.captures("Q_FUNCTION_PARAMETER", root, lang).get("param", [])
|
|
37
|
-
return {
|
|
38
|
-
"V2": len(params)
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def v3_v4(root, sitter, lang, calls=None):
|
|
43
|
-
"""
|
|
44
|
-
V3: number of pointer arithmetic operations
|
|
45
|
-
V4: number of variables involved in pointer arithmetics
|
|
46
|
-
"""
|
|
47
|
-
sitter.add_queries({
|
|
48
|
-
"Q_POINTER_EXPR": Q_POINTER_EXPR
|
|
49
|
-
})
|
|
50
|
-
arith_ops = [
|
|
51
|
-
"+", "++", "+=",
|
|
52
|
-
"-", "--", "-=",
|
|
53
|
-
"*=", # * excluded (same as pointer reference)
|
|
54
|
-
"/", "/=",
|
|
55
|
-
"^", "^=",
|
|
56
|
-
"&=", # & excluded (same as pointer dereference)
|
|
57
|
-
"|", "|="
|
|
58
|
-
]
|
|
59
|
-
|
|
60
|
-
pointer_arith = []
|
|
61
|
-
pointer_arith_vars = []
|
|
62
|
-
for pointer in sitter.captures("Q_POINTER_EXPR", root, lang).get("pointer", []):
|
|
63
|
-
if any(arith in pointer.parent.text.decode() for arith in arith_ops):
|
|
64
|
-
pointer_arith.append(pointer)
|
|
65
|
-
variables = get_identifiers(sitter, pointer.parent, lang, filter=calls)
|
|
66
|
-
pointer_arith_vars.extend(variables)
|
|
67
|
-
|
|
68
|
-
return {
|
|
69
|
-
"V3": len(pointer_arith),
|
|
70
|
-
"V4": len(pointer_arith_vars)
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def v5(root, sitter, lang, calls=None):
|
|
75
|
-
"""
|
|
76
|
-
V5: maximum number of pointer arithmetic operations a variable is involved in
|
|
77
|
-
"""
|
|
78
|
-
sitter.add_queries({
|
|
79
|
-
"Q_BINARY_EXPR": Q_BINARY_EXPR,
|
|
80
|
-
"Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
|
|
81
|
-
"Q_CALL_NAME": Q_CALL_NAME
|
|
82
|
-
})
|
|
83
|
-
arith_ops = [
|
|
84
|
-
"+", "++", "+=",
|
|
85
|
-
"-", "--", "-=",
|
|
86
|
-
"*", "*=",
|
|
87
|
-
"/", "/="
|
|
88
|
-
]
|
|
89
|
-
|
|
90
|
-
var_count = Counter()
|
|
91
|
-
candidates = sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", []) + sitter.captures("Q_ASSIGNMENT_EXPR", root, lang).get("expr", [])
|
|
92
|
-
for node in candidates:
|
|
93
|
-
if len(node.children) != 3:
|
|
94
|
-
continue
|
|
95
|
-
op_text = node.children[1].text.decode()
|
|
96
|
-
if any(arith in op_text for arith in arith_ops):
|
|
97
|
-
variables = get_identifiers(sitter, node, lang, filter=calls)
|
|
98
|
-
var_count.update(variables)
|
|
99
|
-
if len(var_count) > 0:
|
|
100
|
-
max_count = var_count.most_common(1)[0][1]
|
|
101
|
-
else:
|
|
102
|
-
max_count = 0
|
|
103
|
-
return {
|
|
104
|
-
"V5": max_count
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def v6_v7(root, sitter, lang, calls=None):
|
|
109
|
-
"""
|
|
110
|
-
V6: number of nested control structures
|
|
111
|
-
V7: maximum level of control nesting
|
|
112
|
-
"""
|
|
113
|
-
queries = {
|
|
114
|
-
"Q_IF_STMT": Q_IF_STMT,
|
|
115
|
-
"Q_SWITCH_STMT": Q_SWITCH_STMT,
|
|
116
|
-
"Q_DO_STMT": Q_DO_STMT,
|
|
117
|
-
"Q_WHILE_STMT": Q_WHILE_STMT,
|
|
118
|
-
"Q_FOR_STMT": Q_FOR_STMT
|
|
119
|
-
}
|
|
120
|
-
sitter.add_queries(queries)
|
|
121
|
-
|
|
122
|
-
nested_controls = []
|
|
123
|
-
max_nesting_level = 0
|
|
124
|
-
for q in queries.keys():
|
|
125
|
-
for node in sitter.captures(q, root, lang).get("stmt", []):
|
|
126
|
-
nesting_level = _control_nesting_level(node)
|
|
127
|
-
if nesting_level > 0:
|
|
128
|
-
nested_controls.append(node)
|
|
129
|
-
max_nesting_level = max(max_nesting_level, nesting_level)
|
|
130
|
-
|
|
131
|
-
return {
|
|
132
|
-
"V6": len(nested_controls),
|
|
133
|
-
"V7": max_nesting_level
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _control_nesting_level(node):
|
|
138
|
-
control_types = [
|
|
139
|
-
"if_statement",
|
|
140
|
-
"switch_statement",
|
|
141
|
-
"do_statement",
|
|
142
|
-
"while_statement",
|
|
143
|
-
"for_statement"
|
|
144
|
-
]
|
|
145
|
-
parent = node.parent
|
|
146
|
-
num_control_ancestors = 0
|
|
147
|
-
while parent is not None:
|
|
148
|
-
if parent.type in control_types:
|
|
149
|
-
num_control_ancestors += 1
|
|
150
|
-
parent = parent.parent
|
|
151
|
-
return num_control_ancestors
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def v8(root, sitter, lang, calls=None):
|
|
155
|
-
"""
|
|
156
|
-
V8: maximum number of control-dependent control structures
|
|
157
|
-
"""
|
|
158
|
-
queries = {
|
|
159
|
-
"Q_IF_STMT": Q_IF_STMT,
|
|
160
|
-
"Q_SWITCH_STMT": Q_SWITCH_STMT,
|
|
161
|
-
"Q_DO_STMT": Q_DO_STMT,
|
|
162
|
-
"Q_WHILE_STMT": Q_WHILE_STMT,
|
|
163
|
-
"Q_FOR_STMT": Q_FOR_STMT,
|
|
164
|
-
"Q_CONDITION": Q_CONDITION
|
|
165
|
-
}
|
|
166
|
-
sitter.add_queries(queries)
|
|
167
|
-
|
|
168
|
-
# count dependent controls under another control: key = start_byte of parent in function
|
|
169
|
-
control_dependent_controls = Counter()
|
|
170
|
-
threads = []
|
|
171
|
-
thread_lock = threading.Lock()
|
|
172
|
-
for q in queries.keys():
|
|
173
|
-
t = threading.Thread(target=_v8_single_query,
|
|
174
|
-
args=(root, sitter, lang, calls, q,
|
|
175
|
-
control_dependent_controls, thread_lock))
|
|
176
|
-
t.start()
|
|
177
|
-
threads.append(t)
|
|
178
|
-
for t in threads:
|
|
179
|
-
t.join()
|
|
180
|
-
|
|
181
|
-
return {
|
|
182
|
-
"V8": max([0] + list(control_dependent_controls.values()))
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def _v8_single_query(root, sitter, lang, calls, query, control_dependent_controls, thread_lock):
|
|
187
|
-
tag = "condition" if "Q_CONDITION" in query else "stmt"
|
|
188
|
-
for node in sitter.captures(query, root, lang).get(tag, []):
|
|
189
|
-
parents = _traverse_parent_controls(node)
|
|
190
|
-
if len(parents) > 0:
|
|
191
|
-
with thread_lock:
|
|
192
|
-
control_dependent_controls[parents[-1].start_byte] += 1
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
def _traverse_parent_controls(node):
|
|
196
|
-
""" Climb up the AST and emit all control nodes. """
|
|
197
|
-
control_types = [
|
|
198
|
-
"if_statement",
|
|
199
|
-
"switch_statement",
|
|
200
|
-
"do_statement",
|
|
201
|
-
"while_statement",
|
|
202
|
-
"for_statement"
|
|
203
|
-
]
|
|
204
|
-
parent_controls = []
|
|
205
|
-
parent = node.parent
|
|
206
|
-
while parent is not None:
|
|
207
|
-
if parent.type in control_types:
|
|
208
|
-
parent_controls.append(parent)
|
|
209
|
-
parent = parent.parent
|
|
210
|
-
return parent_controls
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
def v9(root, sitter, lang, calls=None):
|
|
214
|
-
"""
|
|
215
|
-
V9: maximum number of data-dependent control structures
|
|
216
|
-
"""
|
|
217
|
-
sitter.add_queries({
|
|
218
|
-
"Q_CONDITION": Q_CONDITION,
|
|
219
|
-
"Q_BINARY_EXPR": Q_BINARY_EXPR
|
|
220
|
-
})
|
|
221
|
-
logical_ops = [
|
|
222
|
-
"&", "&&",
|
|
223
|
-
"|", "||"
|
|
224
|
-
]
|
|
225
|
-
|
|
226
|
-
conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
|
|
227
|
-
var_count = Counter()
|
|
228
|
-
for condition in conditions:
|
|
229
|
-
bin_expr = sitter.captures("Q_BINARY_EXPR", condition, lang).get("expr", [])
|
|
230
|
-
for expr in bin_expr:
|
|
231
|
-
if len(expr.children) != 3:
|
|
232
|
-
continue
|
|
233
|
-
left, op, right = expr.children
|
|
234
|
-
if op.text.decode() in logical_ops:
|
|
235
|
-
var_count.update(get_identifiers(sitter, expr, lang, filter=calls))
|
|
236
|
-
|
|
237
|
-
return {
|
|
238
|
-
"V9": max([0] + list(var_count.values()))
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def v10(root, sitter, lang, calls=None):
|
|
243
|
-
"""
|
|
244
|
-
V10: number of if statements without else
|
|
245
|
-
"""
|
|
246
|
-
sitter.add_queries({
|
|
247
|
-
"Q_IF_WITHOUT_ELSE": Q_IF_WITHOUT_ELSE
|
|
248
|
-
})
|
|
249
|
-
|
|
250
|
-
if_without_else = sitter.captures("Q_IF_WITHOUT_ELSE", root, lang).get("stmt", [])
|
|
251
|
-
return {
|
|
252
|
-
"V10": len(if_without_else)
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def v11(root, sitter, lang, calls=None):
|
|
257
|
-
"""
|
|
258
|
-
V11: number of variables in control structures (in each predicate)
|
|
259
|
-
"""
|
|
260
|
-
sitter.add_queries({
|
|
261
|
-
"Q_CONDITION": Q_CONDITION
|
|
262
|
-
})
|
|
263
|
-
|
|
264
|
-
num_controlled_vars = 0
|
|
265
|
-
conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
|
|
266
|
-
for condition in conditions:
|
|
267
|
-
num_controlled_vars += len(get_identifiers(sitter, condition, lang, filter=calls))
|
|
268
|
-
return {
|
|
269
|
-
"V11": num_controlled_vars
|
|
270
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|