mcpp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcpp/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ import os
2
+ from importlib import resources
3
+
4
+ from mcpp.__main__ import extract, extract_single, METRICS
5
+
6
+
7
+ with resources.path("mcpp", "__init__.py") as root_path:
8
+ PKG_ROOT = root_path.parents[1].resolve()
9
+ REPO_ROOT = PKG_ROOT.parents[0].resolve()
10
+ os.environ['PKG_ROOT'] = str(PKG_ROOT)
11
+ os.environ['REPO_ROOT'] = str(REPO_ROOT)
mcpp/__main__.py ADDED
@@ -0,0 +1,75 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import List
4
+ from collections import defaultdict
5
+ from importlib.resources import files
6
+
7
+ import hydra
8
+ from tqdm import tqdm
9
+
10
+ from mcpp.config import Config
11
+ from mcpp.parse import Sitter, get_call_names
12
+ from mcpp.complexity import c1, c2, c3_c4
13
+ from mcpp.vulnerability import v1, v2, v3_v4, v5, v6_v7, v8, v9, v10, v11
14
+
15
+ with files("mcpp.assets") / "config.yaml" as p:
16
+ config_path = str(p.parent)
17
+ config_name = str(p.name)
18
+
19
+
20
+ METRICS = {
21
+ "C1": c1,
22
+ "C2": c2,
23
+ "C3": c3_c4,
24
+ "C4": c3_c4,
25
+ "V1": v1,
26
+ "V2": v2,
27
+ "V3": v3_v4,
28
+ "V4": v3_v4,
29
+ "V5": v5,
30
+ "V6": v6_v7,
31
+ "V7": v6_v7,
32
+ "V8": v8,
33
+ "V9": v9,
34
+ "V10": v10,
35
+ "V11": v11
36
+ }
37
+
38
+
39
+ @hydra.main(
40
+ version_base=None,
41
+ config_path=config_path,
42
+ config_name=config_name)
43
+ def main(cfg: Config):
44
+ if cfg.in_path.is_dir():
45
+ in_files = tqdm(list(cfg.in_path.glob("**/source")))
46
+ else:
47
+ in_files = [cfg.in_path]
48
+
49
+ results = extract(in_files, cfg.metrics)
50
+
51
+ with open(cfg.out_path, "w") as f:
52
+ json.dump(results, f, indent=4)
53
+
54
+
55
+ def extract(in_files: List[Path], metrics: List[str] = list(METRICS.keys())):
56
+ metrics = [fun for name, fun in METRICS.items() if name in metrics]
57
+ sitter = Sitter("c", "cpp")
58
+ results = defaultdict(dict)
59
+ for path in in_files:
60
+ res = {}
61
+ tree, lang = sitter.parse_file(path)
62
+ root = tree.root_node
63
+ calls = set(get_call_names(sitter, root, lang))
64
+ for fun in metrics:
65
+ res.update(fun(root, sitter, lang, calls))
66
+ results[str(path)] = res
67
+ return results
68
+
69
+
70
+ def extract_single(in_file: Path, metrics: List[str]):
71
+ return extract([in_file], metrics)
72
+
73
+
74
+ if __name__ == '__main__':
75
+ main()
File without changes
@@ -0,0 +1,21 @@
1
+ defaults:
2
+ - /mcpp.config
3
+ - _self_
4
+
5
+
6
+ in_path: ${paths.data_root}/CrashMiner/functions
7
+ out_path: ${paths.out_root}/complexity.json
8
+ metrics: [C1, C2, C3, C4, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11]
9
+
10
+
11
+ paths:
12
+ repo_root: ${oc.env:REPO_ROOT}
13
+ lib_root: ${paths.repo_root}/lib
14
+ data_root: ./data
15
+ out_root: ./out-data
16
+ log_root: ${paths.out_root}/logs
17
+
18
+
19
+ hydra:
20
+ run:
21
+ dir: ${paths.log_root}/mcpp-${now:%Y-%m-%d-%H-%M-%S}
mcpp/complexity.py ADDED
@@ -0,0 +1,92 @@
1
+ from mcpp.parse import Sitter
2
+ from mcpp.queries import Q_FOR_STMT, Q_DO_STMT, Q_WHILE_STMT, \
3
+ Q_BINARY_EXPR, Q_CONDITION
4
+
5
+
6
+ def c1(root, sitter, lang, calls=None):
7
+ """Cyclomatic complexity (McCabe):
8
+ number conditional predicates + number of loop statements + 1
9
+ """
10
+ sitter.add_queries({
11
+ "Q_BINARY_EXPR": Q_BINARY_EXPR,
12
+ "Q_CONDITION": Q_CONDITION,
13
+ "Q_FOR_STMT": Q_FOR_STMT,
14
+ "Q_DO_STMT": Q_DO_STMT,
15
+ "Q_WHILE_STMT": Q_WHILE_STMT
16
+ })
17
+ logical_ops = [
18
+ "&", "&&",
19
+ "|", "||"
20
+ ]
21
+
22
+ complexity = c2(root, sitter, lang, calls)["C2"]
23
+ conditions = sitter.captures("Q_CONDITION", root, lang)
24
+ for condition, tag in conditions:
25
+ if tag == "condition":
26
+ bin_expr = sitter.captures("Q_BINARY_EXPR", condition, lang)
27
+ for expr, _ in bin_expr:
28
+ if len(expr.children) != 3:
29
+ continue
30
+ left, op, right = expr.children
31
+ if op.text.decode() in logical_ops:
32
+ complexity += 1
33
+ complexity += 1
34
+ return {
35
+ "C1": complexity
36
+ }
37
+
38
+
39
+ def c2(root, sitter, lang, calls=None):
40
+ """number of for, while and do-while loops"""
41
+ sitter.add_queries({
42
+ "Q_FOR_STMT": Q_FOR_STMT,
43
+ "Q_WHILE_STMT": Q_WHILE_STMT
44
+ })
45
+ complexity = 0
46
+ for query in ("Q_FOR_STMT", "Q_WHILE_STMT"):
47
+ complexity += len(sitter.captures(query, root, lang))
48
+ return {
49
+ "C2": complexity
50
+ }
51
+
52
+
53
+ def c3_c4(root, sitter, lang, calls=None):
54
+ """
55
+ C3: number of nested for, while and do-while loops
56
+ C4: maximum nesting depth
57
+
58
+ - count all loops that have some loop ancestor
59
+ - count ancestors that are also loops
60
+ """
61
+ sitter.add_queries({
62
+ "Q_FOR_STMT": Q_FOR_STMT,
63
+ "Q_DO_STMT": Q_DO_STMT,
64
+ "Q_WHILE_STMT": Q_WHILE_STMT
65
+ })
66
+ c3_val = 0
67
+ c4_val = 0
68
+ for query in ("Q_FOR_STMT", "Q_DO_STMT", "Q_WHILE_STMT"):
69
+ for loop_node, _ in sitter.captures(query, root, lang):
70
+ nesting_level = _loop_nesting_level(loop_node)
71
+ if nesting_level > 0:
72
+ c3_val += 1
73
+ c4_val = max(c4_val, nesting_level)
74
+ return {
75
+ "C3": c3_val,
76
+ "C4": c4_val
77
+ }
78
+
79
+
80
+ def _loop_nesting_level(node):
81
+ loop_types = [
82
+ "do_statement",
83
+ "while_statement",
84
+ "for_statement"
85
+ ]
86
+ parent = node.parent
87
+ num_loop_ancestors = 0
88
+ while parent is not None:
89
+ if parent.type in loop_types:
90
+ num_loop_ancestors += 1
91
+ parent = parent.parent
92
+ return num_loop_ancestors
mcpp/config.py ADDED
@@ -0,0 +1,27 @@
1
+ from typing import List
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ from hydra.core.config_store import ConfigStore
6
+
7
+
8
+ @dataclass
9
+ class PathConfig:
10
+ repo_root: Path
11
+ lib_root: Path
12
+ data_root: Path
13
+ out_root: Path
14
+ log_root: Path
15
+
16
+
17
+
18
+ @dataclass
19
+ class Config:
20
+ in_path: Path
21
+ out_path: Path
22
+ metrics: List[str]
23
+ paths: PathConfig
24
+
25
+
26
+ cs = ConfigStore.instance()
27
+ cs.store(name='mcpp.config', node=Config)
mcpp/parse.py ADDED
@@ -0,0 +1,81 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+ from importlib.resources import files
4
+
5
+ from tree_sitter import Language, Parser
6
+ import tree_sitter_c as ts_c
7
+ import tree_sitter_cpp as ts_cpp
8
+
9
+ from mcpp.queries import Q_ERROR_NODE, Q_CALL_NAME, Q_IDENTIFIER
10
+
11
+
12
+ LANGS = {
13
+ "c": Language(ts_c.language()),
14
+ "cpp": Language(ts_cpp.language())
15
+ }
16
+
17
+
18
+ class Sitter(object):
19
+ def __init__(self, lib_path: Path, *languages):
20
+ self.langs = {k:v for k, v in LANGS.items() if k in languages}
21
+ self.parser = {lang: self._init_parser(lang) for lang in languages}
22
+ self.queries = {}
23
+ self.queries = {"Q_ERROR_NODE": Q_ERROR_NODE}
24
+
25
+
26
+ def _init_parser(self, language: str):
27
+ parser = Parser()
28
+ parser.set_language(self.langs[language])
29
+ return parser
30
+
31
+ def parse_lang(self, source: str, lang: str):
32
+ return self.parser[lang].parse(bytes(source, "utf-8"))
33
+
34
+ def parse(self, source: str):
35
+ min_errors = None
36
+ best_tree = None
37
+ best_lang = None
38
+ for lang in self.langs.keys():
39
+ tree = self.parse_lang(source, lang)
40
+ num_errors = self._count_error_nodes(tree, lang)
41
+ if min_errors is None or num_errors < min_errors:
42
+ best_tree = tree
43
+ best_lang = lang
44
+ min_errors = num_errors
45
+ return best_tree, best_lang
46
+
47
+ def parse_file(self, path: Path):
48
+ with open(path, "r") as f:
49
+ return self.parse(f.read())
50
+
51
+ def _count_error_nodes(self, tree, lang):
52
+ query = self.langs[lang].query(self.queries["Q_ERROR_NODE"])
53
+ return len(query.captures(tree.root_node))
54
+
55
+ def add_queries(self, queries):
56
+ self.queries.update(queries)
57
+
58
+ def captures(self, query, node, lang):
59
+ lang = self.langs[lang]
60
+ return lang.query(self.queries[query]).captures(node)
61
+
62
+
63
+ def get_call_names(sitter, root, lang):
64
+ """ Return all function call names. """
65
+ call_names = []
66
+ sitter.add_queries({"Q_CALL_NAME": Q_CALL_NAME})
67
+ for node, tag in sitter.captures("Q_CALL_NAME", root, lang):
68
+ if tag == "name":
69
+ call_names.append(node.text.decode())
70
+ return call_names
71
+
72
+
73
+ def get_identifiers(sitter, root, lang, filter=None):
74
+ """ Return all identifier names, optionally filtered by list of known function names. """
75
+ identifiers = []
76
+ sitter.add_queries({"Q_IDENTIFIER": Q_IDENTIFIER})
77
+ for node, _ in sitter.captures("Q_IDENTIFIER", root, lang):
78
+ identifier = node.text.decode()
79
+ if filter is None or identifier not in filter:
80
+ identifiers.append(identifier)
81
+ return identifiers
mcpp/queries.py ADDED
@@ -0,0 +1,70 @@
1
+ Q_ERROR_NODE = """
2
+ (ERROR) @error_node
3
+ """
4
+
5
+ Q_FOR_STMT = """
6
+ (for_statement) @for_stmt
7
+ """
8
+
9
+ Q_DO_STMT = """
10
+ (do_statement) @do_stmt
11
+ """
12
+
13
+ Q_WHILE_STMT = """
14
+ (while_statement) @while_stmt
15
+ """
16
+
17
+ Q_IF_STMT = """
18
+ (if_statement) @if_stmt
19
+ """
20
+
21
+ Q_SWITCH_STMT = """
22
+ (switch_statement) @switch_stmt
23
+ """
24
+
25
+
26
+ Q_CONDITION = """
27
+ (_
28
+ condition: ((_) @condition)
29
+ ) @control_stmnt
30
+ """
31
+
32
+ Q_BINARY_EXPR = """
33
+ (binary_expression) @binary_expression
34
+ """
35
+
36
+ Q_CALL_NAME = """
37
+ (call_expression
38
+ function: ((identifier) @name)
39
+ ) @call
40
+ """
41
+
42
+ Q_ARGLIST = """
43
+ (call_expression
44
+ arguments: ((argument_list) @args)
45
+ ) @call
46
+ """
47
+
48
+ Q_IDENTIFIER = """
49
+ (identifier) @variable
50
+ """
51
+
52
+ Q_FUNCTION_PARAMETER = """
53
+ (parameter_declaration) @param
54
+ """
55
+
56
+ Q_POINTER_EXPR = """
57
+ (pointer_expression) @pointer
58
+ """
59
+
60
+ Q_ASSIGNMENT_EXPR = """
61
+ (assignment_expression) @assignment
62
+ """
63
+
64
+ Q_IF_WITHOUT_ELSE = """
65
+ (if_statement
66
+ condition: ((_) @if)
67
+ consequence: ((_) @then)
68
+ !alternative
69
+ ) @if_stmt
70
+ """
mcpp/vulnerability.py ADDED
@@ -0,0 +1,272 @@
1
+ from collections import Counter
2
+ import threading
3
+
4
+ from mcpp.parse import Sitter, get_identifiers
5
+ from mcpp.queries import Q_ARGLIST, Q_IDENTIFIER, Q_FUNCTION_PARAMETER, \
6
+ Q_POINTER_EXPR, Q_ASSIGNMENT_EXPR, Q_BINARY_EXPR, Q_CALL_NAME, \
7
+ Q_IF_STMT, Q_SWITCH_STMT, Q_DO_STMT, Q_WHILE_STMT, Q_FOR_STMT, Q_CONDITION, \
8
+ Q_IF_WITHOUT_ELSE
9
+
10
+
11
+ def v1(root, sitter, lang, calls=None):
12
+ """
13
+ V1: number of variables as parameters for callee functions
14
+ """
15
+ sitter.add_queries({
16
+ "Q_ARGLIST": Q_ARGLIST
17
+ })
18
+
19
+ vars_in_calls = []
20
+ arg_lists = [m for m, tag in sitter.captures("Q_ARGLIST", root, lang) if tag == "args"]
21
+ for arg_list in arg_lists:
22
+ variables = get_identifiers(sitter, arg_list, lang, filter=calls)
23
+ vars_in_calls.extend(variables)
24
+
25
+ return {
26
+ "V1": len(vars_in_calls)
27
+ }
28
+
29
+
30
+ def v2(root, sitter, lang, calls=None):
31
+ """
32
+ V2: number of variables as parameters for callee functions
33
+ """
34
+ sitter.add_queries({
35
+ "Q_FUNCTION_PARAMETER": Q_FUNCTION_PARAMETER
36
+ })
37
+
38
+ params = sitter.captures("Q_FUNCTION_PARAMETER", root, lang)
39
+ return {
40
+ "V2": len(params)
41
+ }
42
+
43
+
44
+ def v3_v4(root, sitter, lang, calls=None):
45
+ """
46
+ V3: number of pointer arithmetic operations
47
+ V4: number of variables involved in pointer arithmetics
48
+ """
49
+ sitter.add_queries({
50
+ "Q_POINTER_EXPR": Q_POINTER_EXPR
51
+ })
52
+ arith_ops = [
53
+ "+", "++", "+=",
54
+ "-", "--", "-=",
55
+ "*=", # * excluded (same as pointer reference)
56
+ "/", "/=",
57
+ "^", "^=",
58
+ "&=", # & excluded (same as pointer dereference)
59
+ "|", "|="
60
+ ]
61
+
62
+ pointer_arith = []
63
+ pointer_arith_vars = []
64
+ for pointer, _ in sitter.captures("Q_POINTER_EXPR", root, lang):
65
+ if any(arith in pointer.parent.text.decode() for arith in arith_ops):
66
+ pointer_arith.append(pointer)
67
+ variables = get_identifiers(sitter, pointer.parent, lang, filter=calls)
68
+ pointer_arith_vars.extend(variables)
69
+
70
+ return {
71
+ "V3": len(pointer_arith),
72
+ "V4": len(pointer_arith_vars)
73
+ }
74
+
75
+
76
+ def v5(root, sitter, lang, calls=None):
77
+ """
78
+ V5: maximum number of pointer arithmetic operations a variable is involved in
79
+ """
80
+ sitter.add_queries({
81
+ "Q_BINARY_EXPR": Q_BINARY_EXPR,
82
+ "Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
83
+ "Q_CALL_NAME": Q_CALL_NAME
84
+ })
85
+ arith_ops = [
86
+ "+", "++", "+=",
87
+ "-", "--", "-=",
88
+ "*", "*=",
89
+ "/", "/="
90
+ ]
91
+
92
+ var_count = Counter()
93
+ candidates = sitter.captures("Q_BINARY_EXPR", root, lang) + sitter.captures("Q_ASSIGNMENT_EXPR", root, lang)
94
+ for node, _ in candidates:
95
+ if len(node.children) != 3:
96
+ continue
97
+ op_text = node.children[1].text.decode()
98
+ if any(arith in op_text for arith in arith_ops):
99
+ variables = get_identifiers(sitter, node, lang, filter=calls)
100
+ var_count.update(variables)
101
+ if len(var_count) > 0:
102
+ max_count = var_count.most_common(1)[0][1]
103
+ else:
104
+ max_count = 0
105
+ return {
106
+ "V5": max_count
107
+ }
108
+
109
+
110
+ def v6_v7(root, sitter, lang, calls=None):
111
+ """
112
+ V6: number of nested control structures
113
+ V7: maximum level of control nesting
114
+ """
115
+ queries = {
116
+ "Q_IF_STMT": Q_IF_STMT,
117
+ "Q_SWITCH_STMT": Q_SWITCH_STMT,
118
+ "Q_DO_STMT": Q_DO_STMT,
119
+ "Q_WHILE_STMT": Q_WHILE_STMT,
120
+ "Q_FOR_STMT": Q_FOR_STMT
121
+ }
122
+ sitter.add_queries(queries)
123
+
124
+ nested_controls = []
125
+ max_nesting_level = 0
126
+ for q in queries.keys():
127
+ for node, _ in sitter.captures(q, root, lang):
128
+ nesting_level = _control_nesting_level(node)
129
+ if nesting_level > 0:
130
+ nested_controls.append(node)
131
+ max_nesting_level = max(max_nesting_level, nesting_level)
132
+
133
+ return {
134
+ "V6": len(nested_controls),
135
+ "V7": max_nesting_level
136
+ }
137
+
138
+
139
+ def _control_nesting_level(node):
140
+ control_types = [
141
+ "if_statement",
142
+ "switch_statement",
143
+ "do_statement",
144
+ "while_statement",
145
+ "for_statement"
146
+ ]
147
+ parent = node.parent
148
+ num_control_ancestors = 0
149
+ while parent is not None:
150
+ if parent.type in control_types:
151
+ num_control_ancestors += 1
152
+ parent = parent.parent
153
+ return num_control_ancestors
154
+
155
+
156
+ def v8(root, sitter, lang, calls=None):
157
+ """
158
+ V8: maximum number of control-dependent control structures
159
+ """
160
+ queries = {
161
+ "Q_IF_STMT": Q_IF_STMT,
162
+ "Q_SWITCH_STMT": Q_SWITCH_STMT,
163
+ "Q_DO_STMT": Q_DO_STMT,
164
+ "Q_WHILE_STMT": Q_WHILE_STMT,
165
+ "Q_FOR_STMT": Q_FOR_STMT,
166
+ "Q_CONDITION": Q_CONDITION
167
+ }
168
+ sitter.add_queries(queries)
169
+
170
+ # count dependent controls under another control: key = start_byte of parent in function
171
+ control_dependent_controls = Counter()
172
+ threads = []
173
+ thread_lock = threading.Lock()
174
+ for q in queries.keys():
175
+ t = threading.Thread(target=_v8_single_query,
176
+ args=(root, sitter, lang, calls, q,
177
+ control_dependent_controls, thread_lock))
178
+ t.start()
179
+ threads.append(t)
180
+ for t in threads:
181
+ t.join()
182
+
183
+ return {
184
+ "V8": max([0] + list(control_dependent_controls.values()))
185
+ }
186
+
187
+
188
+ def _v8_single_query(root, sitter, lang, calls, query, control_dependent_controls, thread_lock):
189
+ for node, _ in sitter.captures(query, root, lang):
190
+ parents = _traverse_parent_controls(node)
191
+ if len(parents) > 0:
192
+ with thread_lock:
193
+ control_dependent_controls[parents[-1].start_byte] += 1
194
+
195
+
196
+ def _traverse_parent_controls(node):
197
+ """ Climb up the AST and emit all control nodes. """
198
+ control_types = [
199
+ "if_statement",
200
+ "switch_statement",
201
+ "do_statement",
202
+ "while_statement",
203
+ "for_statement"
204
+ ]
205
+ parent_controls = []
206
+ parent = node.parent
207
+ while parent is not None:
208
+ if parent.type in control_types:
209
+ parent_controls.append(parent)
210
+ parent = parent.parent
211
+ return parent_controls
212
+
213
+
214
+ def v9(root, sitter, lang, calls=None):
215
+ """
216
+ V9: maximum number of data-dependent control structures
217
+ """
218
+ sitter.add_queries({
219
+ "Q_CONDITION": Q_CONDITION,
220
+ "Q_BINARY_EXPR": Q_BINARY_EXPR
221
+ })
222
+ logical_ops = [
223
+ "&", "&&",
224
+ "|", "||"
225
+ ]
226
+
227
+ conditions = sitter.captures("Q_CONDITION", root, lang)
228
+ var_count = Counter()
229
+ for condition, tag in conditions:
230
+ if tag == "condition":
231
+ bin_expr = sitter.captures("Q_BINARY_EXPR", condition, lang)
232
+ for expr, _ in bin_expr:
233
+ if len(expr.children) != 3:
234
+ continue
235
+ left, op, right = expr.children
236
+ if op.text.decode() in logical_ops:
237
+ var_count.update(get_identifiers(sitter, expr, lang, filter=calls))
238
+
239
+ return {
240
+ "V9": max([0] + list(var_count.values()))
241
+ }
242
+
243
+
244
+ def v10(root, sitter, lang, calls=None):
245
+ """
246
+ V10: number of if statements without else
247
+ """
248
+ sitter.add_queries({
249
+ "Q_IF_WITHOUT_ELSE": Q_IF_WITHOUT_ELSE
250
+ })
251
+
252
+ if_without_else = sitter.captures("Q_IF_WITHOUT_ELSE", root, lang)
253
+ return {
254
+ "V10": len(if_without_else)
255
+ }
256
+
257
+
258
+ def v11(root, sitter, lang, calls=None):
259
+ """
260
+ V11: number of variables in control structures (in each predicate)
261
+ """
262
+ sitter.add_queries({
263
+ "Q_CONDITION": Q_CONDITION
264
+ })
265
+
266
+ num_controlled_vars = 0
267
+ conditions = sitter.captures("Q_CONDITION", root, lang)
268
+ for condition, _ in conditions:
269
+ num_controlled_vars += len(get_identifiers(sitter, condition, lang, filter=calls))
270
+ return {
271
+ "V11": num_controlled_vars
272
+ }
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Lukas Pirch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.1
2
+ Name: mcpp
3
+ Version: 1.0.0
4
+ Summary: McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics
5
+ Author-email: Lukas Pirch <lukas.pirch@tu-berlin.de>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2023 Lukas Pirch
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Keywords: vulnerability,code metric,static analysis
29
+ Classifier: License :: OSI Approved :: MIT License
30
+ Classifier: Programming Language :: Python
31
+ Classifier: Programming Language :: Python :: 3
32
+ Requires-Python: >=3.9
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: hydra-core >=1.3.2
36
+ Requires-Dist: tree-sitter >=0.22.3
37
+ Requires-Dist: tree-sitter-c >=0.21.4
38
+ Requires-Dist: tree-sitter-cpp >=0.22.3
39
+ Requires-Dist: tqdm >=4.66.4
40
+ Requires-Dist: loguru >=0.7.2
41
+
42
+ # McCabe++ (mcpp)
43
+
44
+ <img src="https://github.com/LPirch/mcpp/blob/master/media/mcpp.jpeg?raw=true" height=400/>
45
+
46
+ `mcpp` measures typical code complexity metrics like McCabe's cyclomatic
47
+ complexity.
48
+
49
+ The goal of this project is to provide a re-usable script to analyze C/C++
50
+ source code and extract complexity metrics from it. The implemented metrics
51
+ are taken from the [paper](https://xiaoningdu.github.io/assets/pdf/leopard.pdf)
52
+
53
+ > LEOPARD: Identifying Vulnerable Code for Vulnerability Assessment through Program Metrics
54
+
55
+ This tool is released as part of our research in vulnerability discovery and
56
+ has been used in our paper
57
+
58
+ > SoK: Where to Fuzz? Assessing Target Selection Methods in Directed Fuzzing"
59
+
60
+ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
61
+
62
+ ## Complexity Metrics
63
+
64
+ | Dimension | ID | Metric Description |
65
+ |----------------------|----|--------------------------------|
66
+ | CD1: Function | C1 | cyclomatic complexity |
67
+ | CD2: Loop Structures | C2 | number of loops |
68
+ | | C3 | number of nested loops |
69
+ | | C4 | maximum nesting level of loops |
70
+
71
+ ## Vulnerability Metrics
72
+
73
+ | Dimension | ID | Metric Description |
74
+ |-------------------------|-----|---------------------------------------------------------------------------|
75
+ | VD1: Dependency | V1 | number of parameter variables |
76
+ | | V2 | number of variables as parameters for callee function |
77
+ | VD2: Pointers | V3 | number of pointer arithmetic |
78
+ | | V4 | number of variables involved in pointer arithmetic |
79
+ | | V5 | maximum number of pointer arithmetic operations a variable is involved in |
80
+ | VD3: Control Structures | V6 | number of nested control structures |
81
+ | | V7 | maximum nesting level of control structures |
82
+ | | V8 | maximum number of control-dependent control structures |
83
+ | | V9 | maximum number of data-dependent control structures |
84
+ | | V10 | number of if structures without else |
85
+ | | V11 | number of variables involved in control predicates |
86
+
87
+
88
+
89
+ ## Setup
90
+
91
+ Build a docker container which performs the setup automatically or run the
92
+ installation on your local machine:
93
+
94
+ ```sh
95
+ pip install .
96
+ ```
97
+
98
+ > Note: It is recommended to install packages in virtual environments.
99
+
100
+
101
+ ## Usage
102
+
103
+ ### From Python
104
+
105
+ Simply import `mcpp` and then use the extract function (or one of its variants).
106
+
107
+ ```python
108
+ from pathlib import Path
109
+ from mcpp import extract
110
+
111
+ input_dir = Path("some/dir")
112
+ in_files = list(input_dir.glob("**/*.c"))
113
+ result = extract(in_files)
114
+
115
+ # to extract only a subset of the metrics
116
+ result = extract(in_files, ["V1", "C3"])
117
+
118
+ # full list of metrics:
119
+ from mcpp import METRICS
120
+ print(list(METRICS.keys()))
121
+ ```
122
+
123
+
124
+ ### CLI
125
+
126
+ Configuration parameters can be changed in `config.yaml` or directly on the CLI
127
+ with e.g. `mcpp paths.out_root=some/dir`.
128
+
129
+ Using all defaults:
130
+ ```sh
131
+ mcpp # with default params like input directory, see config.yaml
132
+ ```
133
+
134
+ Changing params from command line:
135
+ ```sh
136
+ mcpp in_path=/some/dir/single_source out_path=single_source_metrics.json
137
+ mcpp metrics=\[C1,C2,V4\]
138
+ ```
139
+
140
+ Or by passing a changed `config.yaml`:
141
+ - `-cp` (config_path) specifies the absolute path to the directory where the config file is located
142
+ - `-cn` (config_name) specifies the name of the config file
143
+ ```sh
144
+ mcpp -cp /some/other/dir -cn myconfig.yaml
145
+ ```
146
+
147
+ Try out the example:
148
+
149
+ ```sh
150
+ mcpp in_path=examples/data/source paths.out_root=examples/data-out
151
+ cat examples/data-out/complexity.json
152
+ ```
@@ -0,0 +1,15 @@
1
+ mcpp/__init__.py,sha256=3wLtGOae7m5Yxu6-onB3v8Hx74tOZXrmBWYepwN_1LM,341
2
+ mcpp/__main__.py,sha256=qo02QTBFGpS63vakDHnB2qXX_gkqzpY5YxbGffblNos,1762
3
+ mcpp/complexity.py,sha256=0QtZGJIZoTNhGNoHTCWv6T1bIiwzPQY5hxasr_MPfI0,2605
4
+ mcpp/config.py,sha256=ml350T5wGlEFS_CykX5TRpXjYLTr_hxwoTVtHLP-A-E,442
5
+ mcpp/parse.py,sha256=YpnillA9HBr5CU5ggf6l4mLoi7azVm9pxEdCb9dCiMo,2646
6
+ mcpp/queries.py,sha256=pY7Guam1ocgoccYplw2FHoIOyJllwX4v4q8tBn2drFg,955
7
+ mcpp/vulnerability.py,sha256=Q6LB3Z_GX9iTvnyspXlmiGoJ6sgCeqq9KH_oT58Ihoo,7788
8
+ mcpp/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ mcpp/assets/config.yaml,sha256=gqYqh3yfAPy7vXNc0aoiaNym0v5_LX73wQ6z13-GJWY,433
10
+ mcpp-1.0.0.dist-info/LICENSE,sha256=8ppmrmIeJlRZ43DIFGZYkpfdy6UirmLoQKNCqa8C1Ec,1068
11
+ mcpp-1.0.0.dist-info/METADATA,sha256=wkDyUGUbqCbaIMFr6Qk9WIenhtn-HDHIEp8hyUVsyQg,6006
12
+ mcpp-1.0.0.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
13
+ mcpp-1.0.0.dist-info/entry_points.txt,sha256=JyfogqzjL4sT_1tHVRosM6KvxdH4LQ8GOOJOEUArfl8,44
14
+ mcpp-1.0.0.dist-info/top_level.txt,sha256=aD5UDPMLDt7Za3YJijr0PEFYSYgSr8Rto45I2clVRqA,5
15
+ mcpp-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (71.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ mcpp = mcpp.__main__:main
@@ -0,0 +1 @@
1
+ mcpp