mcpp 1.2.0__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mcpp
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Summary: McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics
5
5
  Author-email: Lukas Pirch <lukas.pirch@tu-berlin.de>
6
6
  License: MIT License
@@ -26,18 +26,17 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Keywords: vulnerability,code metric,static analysis
29
- Classifier: License :: OSI Approved :: MIT License
30
29
  Classifier: Programming Language :: Python
31
30
  Classifier: Programming Language :: Python :: 3
32
- Requires-Python: >=3.9
31
+ Requires-Python: <3.14,>=3.9
33
32
  Description-Content-Type: text/markdown
34
33
  License-File: LICENSE
35
34
  Requires-Dist: hydra-core>=1.3.2
36
- Requires-Dist: tree-sitter>=0.23.0
35
+ Requires-Dist: tree-sitter>=0.24.0
37
36
  Requires-Dist: tree-sitter-c>=0.23.0
38
37
  Requires-Dist: tree-sitter-cpp>=0.23.0
39
- Requires-Dist: tqdm>=4.66.4
40
- Requires-Dist: loguru>=0.7.2
38
+ Requires-Dist: tqdm>=4.66.0
39
+ Requires-Dist: loguru>=0.7.0
41
40
 
42
41
  # McCabe++ (mcpp)
43
42
 
@@ -84,6 +83,23 @@ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
84
83
  | | V10 | number of if structures without else |
85
84
  | | V11 | number of variables involved in control predicates |
86
85
 
86
+ ## Additional Metrics
87
+
88
+ | Dimension | ID | Metric Description |
89
+ |-------------------|----|---------------------------------------------------------|
90
+ | XD: Extra | x1 | number of return statements |
91
+ | | x2 | number of cast expressions |
92
+ | | x3 | number of variable declarations |
93
+ | | x4 | maximum number of operands in an expression |
94
+ | TD: AST Structure | t1 | number of AST nodes (descendants) |
95
+ | | t2 | height of the AST |
96
+ | | t3 | average branching factor of the AST |
97
+ | SD: Code Smells | s1 | number of non-trivial numeric constants (magic numbers) |
98
+ | | s2 | number of goto statements |
99
+ | | s3 | number of function pointers |
100
+ | | s4 | number of function calls without return value usage |
101
+ | MD: Memory Ops | m1 | number of memory allocations (malloc, alloc, new, etc.) |
102
+ | | m2 | number of pointer dereferences (`*`, `[]`, `->`) |
87
103
 
88
104
 
89
105
  ## Setup
@@ -43,6 +43,23 @@ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
43
43
  | | V10 | number of if structures without else |
44
44
  | | V11 | number of variables involved in control predicates |
45
45
 
46
+ ## Additional Metrics
47
+
48
+ | Dimension | ID | Metric Description |
49
+ |-------------------|----|---------------------------------------------------------|
50
+ | XD: Extra | x1 | number of return statements |
51
+ | | x2 | number of cast expressions |
52
+ | | x3 | number of variable declarations |
53
+ | | x4 | maximum number of operands in an expression |
54
+ | TD: AST Structure | t1 | number of AST nodes (descendants) |
55
+ | | t2 | height of the AST |
56
+ | | t3 | average branching factor of the AST |
57
+ | SD: Code Smells | s1 | number of non-trivial numeric constants (magic numbers) |
58
+ | | s2 | number of goto statements |
59
+ | | s3 | number of function pointers |
60
+ | | s4 | number of function calls without return value usage |
61
+ | MD: Memory Ops | m1 | number of memory allocations (malloc, alloc, new, etc.) |
62
+ | | m2 | number of pointer dereferences (`*`, `[]`, `->`) |
46
63
 
47
64
 
48
65
  ## Setup
@@ -1,17 +1,16 @@
1
1
  [project]
2
2
  name = "mcpp"
3
- version = "1.2.0"
3
+ version = "1.3.1"
4
4
  description = "McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics"
5
5
  readme = "README.md"
6
6
  authors = [{name = "Lukas Pirch", email="lukas.pirch@tu-berlin.de"}]
7
7
  license = {file = "LICENSE"}
8
8
  classifiers = [
9
- "License :: OSI Approved :: MIT License",
10
9
  "Programming Language :: Python",
11
10
  "Programming Language :: Python :: 3",
12
11
  ]
13
12
  keywords = ["vulnerability", "code metric", "static analysis"]
14
- requires-python = ">=3.9"
13
+ requires-python = ">=3.9,<3.14"
15
14
  dynamic = ["dependencies"]
16
15
 
17
16
  [tool.setuptools.dynamic]
@@ -1,6 +1,6 @@
1
1
  hydra-core>=1.3.2
2
- tree-sitter>=0.23.0
2
+ tree-sitter>=0.24.0
3
3
  tree-sitter-c>=0.23.0
4
4
  tree-sitter-cpp>=0.23.0
5
- tqdm>=4.66.4
6
- loguru>=0.7.2
5
+ tqdm>=4.66.0
6
+ loguru>=0.7.0
@@ -2,7 +2,7 @@ import json
2
2
  from pathlib import Path
3
3
  from typing import List
4
4
  from collections import defaultdict
5
- from importlib.resources import files
5
+ from importlib.resources import files, as_file
6
6
 
7
7
  import hydra
8
8
  from tqdm import tqdm
@@ -10,9 +10,14 @@ from tqdm import tqdm
10
10
  from mcpp.config import Config
11
11
  from mcpp.parse import Sitter, get_call_names
12
12
  from mcpp.complexity import c1, c2, c3_c4
13
- from mcpp.vulnerability import v1, v2, v3_v4, v5, v6_v7, v8, v9, v10, v11
13
+ from mcpp.vulnerability import v1, v2, v3_v4_v5, v6_v7, v8, v9, v10, v11
14
14
 
15
- with files("mcpp.assets") / "config.yaml" as p:
15
+ from mcpp.additional import x1, x2, x3, x4
16
+ from mcpp.tree import t1, t2, t3
17
+ from mcpp.smell import s1, s2, s3, s4
18
+ from mcpp.memory import m1, m2
19
+
20
+ with as_file(files("mcpp.assets") / "config.yaml") as p:
16
21
  config_path = str(p.parent)
17
22
  config_name = str(p.name)
18
23
 
@@ -24,15 +29,28 @@ METRICS = {
24
29
  "C4": c3_c4,
25
30
  "V1": v1,
26
31
  "V2": v2,
27
- "V3": v3_v4,
28
- "V4": v3_v4,
29
- "V5": v5,
32
+ "V3": v3_v4_v5,
33
+ "V4": v3_v4_v5,
34
+ "V5": v3_v4_v5,
30
35
  "V6": v6_v7,
31
36
  "V7": v6_v7,
32
37
  "V8": v8,
33
38
  "V9": v9,
34
39
  "V10": v10,
35
- "V11": v11
40
+ "V11": v11,
41
+ "x1": x1,
42
+ "x2": x2,
43
+ "x3": x3,
44
+ "x4": x4,
45
+ "t1": t1,
46
+ "t2": t2,
47
+ "t3": t3,
48
+ "s1": s1,
49
+ "s2": s2,
50
+ "s3": s3,
51
+ "s4": s4,
52
+ "m1": m1,
53
+ "m2": m2,
36
54
  }
37
55
 
38
56
 
@@ -0,0 +1,45 @@
1
+ from mcpp.queries import Q_BINARY_EXPR, Q_IDENTIFIER, Q_NUMBER
2
+
3
+ def x1(root, sitter, lang, calls=None):
4
+ QUERY = "(return_statement) @stmt"
5
+ sitter.add_queries({"Q_RETURN_STMT": QUERY})
6
+ return_statements = sitter.captures("Q_RETURN_STMT", root, lang).get("stmt", [])
7
+ return {
8
+ "x1": len(return_statements)
9
+ }
10
+
11
+ def x2(root, sitter, lang, calls=None):
12
+ QUERY = "(cast_expression) @expr"
13
+ sitter.add_queries({"Q_CAST_EXPR": QUERY})
14
+ cast_exprs = sitter.captures("Q_CAST_EXPR", root, lang).get("expr", [])
15
+ return {
16
+ "x2": len(cast_exprs)
17
+ }
18
+
19
+ def x3(root, sitter, lang, calls=None):
20
+ QUERY = "(declaration) @stmt"
21
+ sitter.add_queries({"Q_VAR_DECL": QUERY})
22
+ var_decls = sitter.captures("Q_VAR_DECL", root, lang).get("stmt", [])
23
+ return {
24
+ "x3": len(var_decls)
25
+ }
26
+
27
+ def x4(root, sitter, lang, calls=None):
28
+ """ Max # of operands in expression
29
+ """
30
+ sitter.add_queries({
31
+ "Q_BINARY_EXPR": Q_BINARY_EXPR,
32
+ "Q_IDENTIFIER": Q_IDENTIFIER,
33
+ "Q_NUMBER": Q_NUMBER,
34
+ })
35
+
36
+ num_ops = [0]
37
+
38
+ for expr in sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", []):
39
+ identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
40
+ constants = sitter.captures("Q_NUMBER", expr, lang).get("constant", [])
41
+ num_ops.append(len(identifiers) + len(constants))
42
+
43
+ return {
44
+ "x4": max(num_ops),
45
+ }
@@ -1,6 +1,6 @@
1
1
  from mcpp.parse import Sitter
2
2
  from mcpp.queries import Q_FOR_STMT, Q_DO_STMT, Q_WHILE_STMT, \
3
- Q_BINARY_EXPR, Q_CONDITION
3
+ Q_FOR_RANGE_STMT, Q_DO_STMT, Q_BINARY_EXPR, Q_CONDITION
4
4
 
5
5
 
6
6
  def c1(root, sitter, lang, calls=None):
@@ -12,15 +12,22 @@ def c1(root, sitter, lang, calls=None):
12
12
  "Q_CONDITION": Q_CONDITION,
13
13
  "Q_FOR_STMT": Q_FOR_STMT,
14
14
  "Q_DO_STMT": Q_DO_STMT,
15
- "Q_WHILE_STMT": Q_WHILE_STMT
15
+ "Q_WHILE_STMT": Q_WHILE_STMT,
16
16
  })
17
17
  logical_ops = [
18
- "&", "&&",
19
- "|", "||"
18
+ "&&", "||", "and", "or",
19
+ ]
20
+ loop_stmts = [
21
+ "for_statement", "while_statement",
20
22
  ]
21
23
 
22
24
  complexity = c2(root, sitter, lang, calls)["C2"]
25
+
26
+ # if statements
23
27
  for condition in sitter.captures("Q_CONDITION", root, lang).get("condition", []):
28
+ if condition.parent.type in loop_stmts:
29
+ continue
30
+ complexity += 1
24
31
  for expr in sitter.captures("Q_BINARY_EXPR", condition, lang).get("expr", []):
25
32
  if len(expr.children) != 3:
26
33
  continue
@@ -35,12 +42,15 @@ def c1(root, sitter, lang, calls=None):
35
42
 
36
43
  def c2(root, sitter, lang, calls=None):
37
44
  """number of for, while and do-while loops"""
38
- sitter.add_queries({
45
+ loops = {
39
46
  "Q_FOR_STMT": Q_FOR_STMT,
40
- "Q_WHILE_STMT": Q_WHILE_STMT
41
- })
47
+ "Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
48
+ "Q_WHILE_STMT": Q_WHILE_STMT,
49
+ "Q_DO_STMT": Q_DO_STMT,
50
+ }
51
+ sitter.add_queries(loops)
42
52
  complexity = 0
43
- for query in ("Q_FOR_STMT", "Q_WHILE_STMT"):
53
+ for query in loops.keys():
44
54
  complexity += len(sitter.captures(query, root, lang).get("stmt", []))
45
55
  return {
46
56
  "C2": complexity
@@ -55,14 +65,16 @@ def c3_c4(root, sitter, lang, calls=None):
55
65
  - count all loops that have some loop ancestor
56
66
  - count ancestors that are also loops
57
67
  """
58
- sitter.add_queries({
68
+ loops = {
59
69
  "Q_FOR_STMT": Q_FOR_STMT,
70
+ "Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
60
71
  "Q_DO_STMT": Q_DO_STMT,
61
72
  "Q_WHILE_STMT": Q_WHILE_STMT
62
- })
73
+ }
74
+ sitter.add_queries(loops)
63
75
  c3_val = 0
64
76
  c4_val = 0
65
- for query in ("Q_FOR_STMT", "Q_DO_STMT", "Q_WHILE_STMT"):
77
+ for query in loops.keys():
66
78
  for loop_node in sitter.captures(query, root, lang).get("stmt", []):
67
79
  nesting_level = _loop_nesting_level(loop_node)
68
80
  if nesting_level > 0:
@@ -78,7 +90,8 @@ def _loop_nesting_level(node):
78
90
  loop_types = [
79
91
  "do_statement",
80
92
  "while_statement",
81
- "for_statement"
93
+ "for_statement",
94
+ "for_range_loop",
82
95
  ]
83
96
  parent = node.parent
84
97
  num_loop_ancestors = 0
@@ -86,4 +99,4 @@ def _loop_nesting_level(node):
86
99
  if parent.type in loop_types:
87
100
  num_loop_ancestors += 1
88
101
  parent = parent.parent
89
- return num_loop_ancestors
102
+ return num_loop_ancestors
@@ -0,0 +1,52 @@
1
+ from mcpp.queries import Q_CALL_NAME, Q_NEW_EXPRESSION, Q_SUBSCRIPT_EXPR, Q_FIELD_EXPR
2
+
3
+ def m1(root, sitter, lang, calls=None):
4
+ """ # memory allocations
5
+
6
+ Capture libc memory allocations as well as potential wrappers or individual alloctors.
7
+ """
8
+ sitter.add_queries({
9
+ "Q_CALL_NAME": Q_CALL_NAME,
10
+ "Q_NEW_EXPRESSION": Q_NEW_EXPRESSION,
11
+ })
12
+
13
+ num_allocations = 0
14
+
15
+ # Number of calls to allocation functions
16
+ for name in sitter.captures("Q_CALL_NAME", root, lang).get("name", []):
17
+ if "alloc" in name.text.decode("utf-8").lower():
18
+ num_allocations += 1
19
+
20
+ # Number of new object instantiations
21
+ num_new_expressions = len(sitter.captures("Q_NEW_EXPRESSION", root, lang).get("expr", []))
22
+
23
+ return {
24
+ "m1": num_allocations + num_new_expressions,
25
+ }
26
+
27
+ def m2(root, sitter, lang, calls=None):
28
+ """ # ptr dereferences
29
+ """
30
+ sitter.add_queries({
31
+ "Q_CALL_NAME": Q_CALL_NAME,
32
+ "Q_NEW_EXPRESSION": Q_NEW_EXPRESSION,
33
+ "Q_SUBSCRIPT_EXPR": Q_SUBSCRIPT_EXPR,
34
+ "Q_FIELD_EXPR": Q_FIELD_EXPR,
35
+ })
36
+
37
+ num_ptr_expressions = 0
38
+
39
+ # Number of pointer dereferences using the asterisk syntax (*)
40
+ for ptr in sitter.captures("Q_POINTER_EXPR", root, lang).get("pointer", []):
41
+ if ptr.text.decode("utf-8").startswith("*"):
42
+ num_ptr_expressions += 1
43
+
44
+ # Number of pointer dereferences using the subscript syntax ([])
45
+ num_subscript_expressions = len(sitter.captures("Q_SUBSCRIPT_EXPR", root, lang).get("expr", []))
46
+
47
+ # Number of pointer dereferences using the field expression syntax (ptr->field)
48
+ num_field_expressions = len(sitter.captures("Q_FIELD_EXPR", root, lang).get("expr", []))
49
+
50
+ return {
51
+ "m2": num_ptr_expressions + num_subscript_expressions + num_field_expressions,
52
+ }
@@ -6,6 +6,10 @@ Q_FOR_STMT = """
6
6
  (for_statement) @stmt
7
7
  """
8
8
 
9
+ Q_FOR_RANGE_STMT = """
10
+ (for_range_loop) @stmt
11
+ """
12
+
9
13
  Q_DO_STMT = """
10
14
  (do_statement) @stmt
11
15
  """
@@ -15,14 +19,13 @@ Q_WHILE_STMT = """
15
19
  """
16
20
 
17
21
  Q_IF_STMT = """
18
- (if_statement) @if_stmt
22
+ (if_statement) @stmt
19
23
  """
20
24
 
21
25
  Q_SWITCH_STMT = """
22
26
  (switch_statement) @stmt
23
27
  """
24
28
 
25
-
26
29
  Q_CONDITION = """
27
30
  (_
28
31
  condition: ((_) @condition)
@@ -33,6 +36,18 @@ Q_BINARY_EXPR = """
33
36
  (binary_expression) @expr
34
37
  """
35
38
 
39
+ Q_UPDATE_EXPR = """
40
+ (update_expression) @expr
41
+ """
42
+
43
+ Q_SUBSCRIPT_EXPR = """
44
+ (subscript_expression) @expr
45
+ """
46
+
47
+ Q_FIELD_EXPR = """
48
+ (field_expression) @expr
49
+ """
50
+
36
51
  Q_CALL_NAME = """
37
52
  (call_expression
38
53
  function: ((identifier) @name)
@@ -49,7 +64,19 @@ Q_IDENTIFIER = """
49
64
  (identifier) @variable
50
65
  """
51
66
 
52
- Q_FUNCTION_PARAMETER = """
67
+ Q_NUMBER = """
68
+ (number_literal) @constant
69
+ """
70
+
71
+ #Q_FUNCTION_PARAMETER = """
72
+ #(parameter_declaration) @param
73
+ #"""
74
+
75
+ Q_FUNCTION = """
76
+ (function_definition) @function
77
+ """
78
+
79
+ Q_PARAMETER = """
53
80
  (parameter_declaration) @param
54
81
  """
55
82
 
@@ -57,6 +84,11 @@ Q_POINTER_EXPR = """
57
84
  (pointer_expression) @pointer
58
85
  """
59
86
 
87
+ Q_POINTER_IDENTIFIER = """
88
+ (pointer_declarator
89
+ (identifier) @identifier)
90
+ """
91
+
60
92
  Q_ASSIGNMENT_EXPR = """
61
93
  (assignment_expression) @expr
62
94
  """
@@ -67,4 +99,8 @@ Q_IF_WITHOUT_ELSE = """
67
99
  consequence: ((_) @then)
68
100
  !alternative
69
101
  ) @stmt
70
- """
102
+ """
103
+
104
+ Q_NEW_EXPRESSION = """
105
+ (new_expression) @expr
106
+ """
@@ -0,0 +1,46 @@
1
+ def s1(root, sitter, lang, calls=None):
2
+ QUERY = "(number_literal) @num"
3
+ sitter.add_queries({"Q_NUMBER_LITERAL": QUERY})
4
+ number_literals = sitter.captures("Q_NUMBER_LITERAL", root, lang).get("num", [])
5
+ number_literals = [node.text.decode("utf8") for node in number_literals]
6
+ def parse_int(s):
7
+ try:
8
+ return int(s, 0)
9
+ except:
10
+ return None
11
+ number_literals = [parse_int(s) for s in number_literals]
12
+ # only non-trivial constants
13
+ number_literals = [x for x in number_literals if x is not None and x not in [-1, 0, 1]]
14
+ return {
15
+ "s1": len(number_literals)
16
+ }
17
+
18
+
19
+ def s2(root, sitter, lang, calls=None):
20
+ QUERY = "(goto_statement) @stmt"
21
+ sitter.add_queries({"Q_GOTO_STMT": QUERY})
22
+ goto_statements = sitter.captures("Q_GOTO_STMT", root, lang).get("stmt", [])
23
+ return {
24
+ "s2": len(goto_statements)
25
+ }
26
+
27
+
28
+ def s3(root, sitter, lang, calls=None):
29
+ QUERY = "(declaration (init_declarator (function_declarator) @decl))"
30
+ sitter.add_queries({"Q_FUNCTION_POINTERS": QUERY})
31
+ QUERY = "(parameter_declaration (function_declarator) @decl)"
32
+ sitter.add_queries({"Q_FUNCTION_POINTER_PARAMS": QUERY})
33
+ function_pointers = sitter.captures("Q_FUNCTION_POINTERS", root, lang).get("decl", [])
34
+ function_pointers += sitter.captures("Q_FUNCTION_POINTER_PARAMS", root, lang).get("decl", [])
35
+ return {
36
+ "s3": len(function_pointers)
37
+ }
38
+
39
+
40
+ def s4(root, sitter, lang, calls=None):
41
+ QUERY = "(expression_statement (call_expression) @expr)"
42
+ sitter.add_queries({"Q_CALLS_WO_RETURN": QUERY})
43
+ functions_wo_return = sitter.captures("Q_CALLS_WO_RETURN", root, lang).get("expr", [])
44
+ return {
45
+ "s4": len(functions_wo_return)
46
+ }
@@ -0,0 +1,28 @@
1
+ def t1(root, sitter, lang, calls=None):
2
+ def num_descendants(node):
3
+ return 1 + sum(map(num_descendants, node.children))
4
+
5
+ return {
6
+ "t1": num_descendants(root)
7
+ }
8
+
9
+
10
+ def t2(root, sitter, lang, calls=None):
11
+ def height(node):
12
+ if len(node.children) == 0:
13
+ return 1
14
+ return 1 + max(map(height, node.children))
15
+ return {
16
+ "t2": height(root)
17
+ }
18
+
19
+
20
+ def t3(root, sitter, lang, calls=None):
21
+ def get_child_nums(node):
22
+ if len(node.children) == 0:
23
+ return []
24
+ return [len(node.children)] + sum(map(get_child_nums, node.children), start=[])
25
+ child_nums = get_child_nums(root)
26
+ return {
27
+ "t3": sum(child_nums) / len(child_nums)
28
+ }
@@ -0,0 +1,361 @@
1
+ from collections import Counter
2
+ import threading
3
+ import itertools as it
4
+ from collections import defaultdict
5
+
6
+ from mcpp.parse import Sitter, get_identifiers
7
+ from mcpp.queries import Q_ARGLIST, Q_IDENTIFIER, Q_FUNCTION, Q_PARAMETER, \
8
+ Q_POINTER_EXPR, Q_ASSIGNMENT_EXPR, Q_BINARY_EXPR, Q_UPDATE_EXPR, Q_SUBSCRIPT_EXPR, \
9
+ Q_FIELD_EXPR, Q_CALL_NAME, Q_IF_STMT, Q_SWITCH_STMT, Q_DO_STMT, Q_WHILE_STMT, \
10
+ Q_FOR_STMT, Q_FOR_RANGE_STMT, Q_CONDITION, Q_IF_WITHOUT_ELSE, Q_POINTER_IDENTIFIER
11
+
12
+
13
+ def v1(root, sitter, lang, calls=None):
14
+ """
15
+ V1: number of parameter variables
16
+ """
17
+ sitter.add_queries({
18
+ "Q_FUNCTION": Q_FUNCTION,
19
+ "Q_PARAMETER": Q_PARAMETER,
20
+ })
21
+ functions = sitter.captures("Q_FUNCTION", root, lang).get("function", [])
22
+ if len(functions) == 0:
23
+ return {"V1": 0}
24
+ function = functions[0]
25
+ params = sitter.captures("Q_PARAMETER", function, lang).get("param", [])
26
+ return {
27
+ "V1": len(params)
28
+ }
29
+
30
+
31
+ def v2(root, sitter, lang, calls=None):
32
+ """
33
+ V2: number of variables as parameters for callee functions
34
+ """
35
+ sitter.add_queries({
36
+ "Q_ARGLIST": Q_ARGLIST
37
+ })
38
+
39
+ vars_in_calls = []
40
+ for arg_list in sitter.captures("Q_ARGLIST", root, lang).get("args", []):
41
+ variables = get_identifiers(sitter, arg_list, lang, filter=calls)
42
+ vars_in_calls.extend(variables)
43
+
44
+ return {
45
+ "V2": len(set(vars_in_calls))
46
+ }
47
+
48
+
49
+ def v3_v4_v5(root, sitter, lang, calls=None):
50
+ """
51
+ V3: number of pointer arithmetic
52
+ V4: number of variables involved in pointer arithmetics
53
+ V5: max pointer arithmetic a variable is involved in
54
+ """
55
+ sitter.add_queries({
56
+ "Q_BINARY_EXPR": Q_BINARY_EXPR,
57
+ "Q_UPDATE_EXPR": Q_UPDATE_EXPR,
58
+ "Q_SUBSCRIPT_EXPR": Q_SUBSCRIPT_EXPR,
59
+ "Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
60
+ "Q_POINTER_EXPR": Q_POINTER_EXPR,
61
+ "Q_FIELD_EXPR": Q_FIELD_EXPR,
62
+ "Q_IDENTIFIER": Q_IDENTIFIER,
63
+ "Q_POINTER_IDENTIFIER": Q_POINTER_IDENTIFIER,
64
+ })
65
+ assignment_operators = [
66
+ "+=", "-=", "*=", "/=", "|=", "&=", "^=", "<<=", ">>=", "%="
67
+ ]
68
+ pointer_operators = ["*"]
69
+
70
+ # Get a list of all pointer identifiers
71
+ ptr_identifiers = sitter.captures("Q_POINTER_IDENTIFIER", root, lang).get("identifier", [])
72
+ ptr_identifier_names = set(ptr_identifier.text.decode() for ptr_identifier in ptr_identifiers)
73
+
74
+ # Get a list of all identifiers involved in update expressions
75
+ update_exprs = sitter.captures("Q_UPDATE_EXPR", root, lang).get("expr", [])
76
+ binary_exprs = sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", [])
77
+
78
+ # Calculate the number of pointer aithmetic
79
+ v3_pointer_arith = 0
80
+ v4_pointer_airth_identifiers = []
81
+ pointer_arith_per_identifier = defaultdict(lambda: 0)
82
+
83
+ # No. of update and binary expressions with pointers involved
84
+ for expr in it.chain(update_exprs, binary_exprs):
85
+ identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
86
+ identifier_names = set(identifier.text.decode() for identifier in identifiers)
87
+ if len(identifier_names & ptr_identifier_names) > 0:
88
+ v3_pointer_arith += 1
89
+ v4_pointer_airth_identifiers += identifiers
90
+ for identifier_name in identifier_names:
91
+ pointer_arith_per_identifier[identifier_name] += 1
92
+
93
+
94
+ # No. of subscription expressions
95
+ subscript_exprs = sitter.captures("Q_SUBSCRIPT_EXPR", root, lang).get("expr", [])
96
+ v3_pointer_arith += len(subscript_exprs)
97
+ for expr in subscript_exprs:
98
+ identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
99
+ identifier_names = set(identifier.text.decode() for identifier in identifiers)
100
+ for identifier_name in identifier_names:
101
+ pointer_arith_per_identifier[identifier_name] += 1
102
+ v4_pointer_airth_identifiers += identifiers
103
+
104
+ # No. of assignment expression where the left hand side is a pointer
105
+ assignment_exprs = sitter.captures("Q_ASSIGNMENT_EXPR", root, lang).get("expr", [])
106
+ for expr in assignment_exprs:
107
+ if expr.child_by_field_name("operator").text.decode() not in assignment_operators:
108
+ continue
109
+ identifiers = sitter.captures("Q_IDENTIFIER", expr.child_by_field_name("left"), lang).get("variable", [])
110
+ identifier_names = set(identifier.text.decode() for identifier in identifiers)
111
+ if len(identifier_names & ptr_identifier_names) > 0:
112
+ v3_pointer_arith += 1
113
+ right_hand_side_identifiers = sitter.captures("Q_IDENTIFIER", expr.child_by_field_name("right"), lang).get("variable", [])
114
+ right_hand_side_identifier_names = set(node.text.decode() for node in right_hand_side_identifiers)
115
+ v4_pointer_airth_identifiers += identifiers
116
+ v4_pointer_airth_identifiers += right_hand_side_identifiers
117
+ for identifier_name in identifier_names.union(right_hand_side_identifier_names):
118
+ pointer_arith_per_identifier[identifier_name] += 1
119
+
120
+ # No. of pointer dereferences with the *ptr syntax
121
+ pointer_exprs = sitter.captures("Q_POINTER_EXPR", root, lang).get("pointer", [])
122
+ for expr in pointer_exprs:
123
+ if expr.child_by_field_name("operator").text.decode() not in pointer_operators:
124
+ continue
125
+ identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
126
+ identifier_names = set(identifier.text.decode() for identifier in identifiers)
127
+ v3_pointer_arith += 1
128
+ v4_pointer_airth_identifiers += identifiers
129
+ for identifier_name in identifier_names:
130
+ pointer_arith_per_identifier[identifier_name] += 1
131
+
132
+ # No. of field expressions (ptr->field)
133
+ field_exprs = sitter.captures("Q_FIELD_EXPR", root, lang).get("expr", [])
134
+ v3_pointer_arith += len(field_exprs)
135
+ for expr in field_exprs:
136
+ identifiers = sitter.captures("Q_IDENTIFIER", expr, lang).get("variable", [])
137
+ identifier_names = set(identifier.text.decode() for identifier in identifiers)
138
+ for identifier_name in identifier_names:
139
+ pointer_arith_per_identifier[identifier_name] += 1
140
+
141
+ # Calculate V5
142
+ max_pointer_arith_identifier = max(pointer_arith_per_identifier, key=pointer_arith_per_identifier.get, default=None)
143
+ if max_pointer_arith_identifier != None:
144
+ v5_max_pointer_arith_var = pointer_arith_per_identifier[max_pointer_arith_identifier]
145
+ else:
146
+ v5_max_pointer_arith_var = 0
147
+
148
+ return {
149
+ "V3": v3_pointer_arith,
150
+ "V4": len(set(v4_pointer_airth_identifiers)),
151
+ "V5": v5_max_pointer_arith_var,
152
+ }
153
+
154
+
155
+ def v5(root, sitter, lang, calls=None):
156
+ """
157
+ V5: maximum number of pointer arithmetic operations a variable is involved in
158
+ """
159
+ sitter.add_queries({
160
+ "Q_BINARY_EXPR": Q_BINARY_EXPR,
161
+ "Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
162
+ "Q_CALL_NAME": Q_CALL_NAME
163
+ })
164
+ arith_ops = [
165
+ "+", "++", "+=",
166
+ "-", "--", "-=",
167
+ "*", "*=",
168
+ "/", "/="
169
+ ]
170
+
171
+ var_count = Counter()
172
+ candidates = sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", []) + sitter.captures("Q_ASSIGNMENT_EXPR", root, lang).get("expr", [])
173
+ for node in candidates:
174
+ if len(node.children) != 3:
175
+ continue
176
+ op_text = node.children[1].text.decode()
177
+ if any(arith in op_text for arith in arith_ops):
178
+ variables = get_identifiers(sitter, node, lang, filter=calls)
179
+ var_count.update(variables)
180
+ if len(var_count) > 0:
181
+ max_count = var_count.most_common(1)[0][1]
182
+ else:
183
+ max_count = 0
184
+ return {
185
+ "V5": max_count
186
+ }
187
+
188
+
189
+ def v6_v7(root, sitter, lang, calls=None):
190
+ """
191
+ V6: number of nested control structures
192
+ V7: maximum level of control nesting
193
+ """
194
+ queries = {
195
+ "Q_IF_STMT": Q_IF_STMT,
196
+ "Q_SWITCH_STMT": Q_SWITCH_STMT,
197
+ "Q_DO_STMT": Q_DO_STMT,
198
+ "Q_WHILE_STMT": Q_WHILE_STMT,
199
+ "Q_FOR_STMT": Q_FOR_STMT,
200
+ "Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
201
+ }
202
+ sitter.add_queries(queries)
203
+
204
+ nested_controls = []
205
+ max_nesting_level = 0
206
+ for q in queries.keys():
207
+ for node in sitter.captures(q, root, lang).get("stmt", []):
208
+ nesting_level = _control_nesting_level(node)
209
+ if nesting_level > 0:
210
+ nested_controls.append(node)
211
+ max_nesting_level = max(max_nesting_level, nesting_level)
212
+
213
+ return {
214
+ "V6": len(nested_controls),
215
+ "V7": max_nesting_level
216
+ }
217
+
218
+
219
+ def _control_nesting_level(node):
220
+ control_types = [
221
+ "if_statement",
222
+ "switch_statement",
223
+ "do_statement",
224
+ "while_statement",
225
+ "for_statement",
226
+ "for_range_loop",
227
+ ]
228
+ parent = node.parent
229
+ num_control_ancestors = 0
230
+ while parent is not None:
231
+ if parent.type in control_types:
232
+ num_control_ancestors += 1
233
+ parent = parent.parent
234
+ return num_control_ancestors
235
+
236
+
237
+ def v8(root, sitter, lang, calls=None):
238
+ """
239
+ V8: maximum number of control-dependent control structures
240
+ """
241
+ queries = {
242
+ "Q_IF_STMT": Q_IF_STMT,
243
+ "Q_SWITCH_STMT": Q_SWITCH_STMT,
244
+ "Q_DO_STMT": Q_DO_STMT,
245
+ "Q_WHILE_STMT": Q_WHILE_STMT,
246
+ "Q_FOR_STMT": Q_FOR_STMT,
247
+ "Q_FOR_RANGE_STMT": Q_FOR_RANGE_STMT,
248
+ #"Q_CONDITION": Q_CONDITION,
249
+ }
250
+ sitter.add_queries(queries)
251
+
252
+ # count dependent controls under another control: key = start_byte of parent in function
253
+ control_dependent_controls = Counter()
254
+ threads = []
255
+ thread_lock = threading.Lock()
256
+ for q in queries.keys():
257
+ t = threading.Thread(target=_v8_single_query,
258
+ args=(root, sitter, lang, calls, q,
259
+ control_dependent_controls, thread_lock))
260
+ t.start()
261
+ threads.append(t)
262
+ for t in threads:
263
+ t.join()
264
+
265
+ v8_val = max([0] + list(control_dependent_controls.values()))
266
+
267
+ return {
268
+ "V8": 0 if v8_val == 0 else v8_val + 1,
269
+ }
270
+
271
+
272
+ def _v8_single_query(root, sitter, lang, calls, query, control_dependent_controls, thread_lock):
273
+ tag = "condition" if "Q_CONDITION" in query else "stmt"
274
+ for node in sitter.captures(query, root, lang).get(tag, []):
275
+ parents = _traverse_parent_controls(node)
276
+ if len(parents) > 0:
277
+ with thread_lock:
278
+ control_dependent_controls[parents[-1].start_byte] += 1
279
+
280
+
281
+ def _traverse_parent_controls(node):
282
+ """ Climb up the AST and emit all control nodes. """
283
+ control_types = [
284
+ "if_statement",
285
+ "switch_statement",
286
+ "do_statement",
287
+ "while_statement",
288
+ "for_statement",
289
+ "for_range_loop",
290
+ ]
291
+ parent_controls = []
292
+ parent = node.parent
293
+ while parent is not None:
294
+ if parent.type in control_types:
295
+ parent_controls.append(parent)
296
+ parent = parent.parent
297
+ return parent_controls
298
+
299
+
300
+ def v9(root, sitter, lang, calls=None):
301
+ """
302
+ V9: maximum number of data-dependent control structures
303
+ """
304
+ sitter.add_queries({
305
+ "Q_IDENTIFIER": Q_IDENTIFIER,
306
+ "Q_CONDITION": Q_CONDITION,
307
+ })
308
+
309
+ # Count the number of depend control structures for each identifier
310
+ dependend_ctrl_structures_count = defaultdict(lambda: 0)
311
+
312
+ conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
313
+ for condition in conditions:
314
+ identifiers = sitter.captures("Q_IDENTIFIER", condition, lang).get("variable", [])
315
+ identifier_names = set(identifier.text.decode() for identifier in identifiers)
316
+
317
+ for identifier_name in identifier_names:
318
+ dependend_ctrl_structures_count[identifier_name] += 1
319
+
320
+ if len(dependend_ctrl_structures_count) > 0:
321
+ max_key = max(dependend_ctrl_structures_count, key=dependend_ctrl_structures_count.get)
322
+ max_val = dependend_ctrl_structures_count[max_key]
323
+ else:
324
+ max_val = 0
325
+
326
+ return {
327
+ "V9": max_val,
328
+ }
329
+
330
+
331
+ def v10(root, sitter, lang, calls=None):
332
+ """
333
+ V10: number of if statements without else
334
+ """
335
+ sitter.add_queries({
336
+ "Q_IF_WITHOUT_ELSE": Q_IF_WITHOUT_ELSE
337
+ })
338
+
339
+ if_without_else = sitter.captures("Q_IF_WITHOUT_ELSE", root, lang).get("stmt", [])
340
+ return {
341
+ "V10": len(if_without_else)
342
+ }
343
+
344
+
345
+ def v11(root, sitter, lang, calls=None):
346
+ """
347
+ V11: number of variables in control structures (in each predicate)
348
+ """
349
+ sitter.add_queries({
350
+ "Q_CONDITION": Q_CONDITION
351
+ })
352
+
353
+ num_controlled_vars = 0
354
+ conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
355
+ identifiers = set()
356
+ for condition in conditions:
357
+ identifiers |= set(get_identifiers(sitter, condition, lang, filter=calls))
358
+
359
+ return {
360
+ "V11": len(identifiers),
361
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mcpp
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Summary: McCabe++ (mcpp): cyclomatic complexity and other vulnerability-related code metrics
5
5
  Author-email: Lukas Pirch <lukas.pirch@tu-berlin.de>
6
6
  License: MIT License
@@ -26,18 +26,17 @@ License: MIT License
26
26
  SOFTWARE.
27
27
 
28
28
  Keywords: vulnerability,code metric,static analysis
29
- Classifier: License :: OSI Approved :: MIT License
30
29
  Classifier: Programming Language :: Python
31
30
  Classifier: Programming Language :: Python :: 3
32
- Requires-Python: >=3.9
31
+ Requires-Python: <3.14,>=3.9
33
32
  Description-Content-Type: text/markdown
34
33
  License-File: LICENSE
35
34
  Requires-Dist: hydra-core>=1.3.2
36
- Requires-Dist: tree-sitter>=0.23.0
35
+ Requires-Dist: tree-sitter>=0.24.0
37
36
  Requires-Dist: tree-sitter-c>=0.23.0
38
37
  Requires-Dist: tree-sitter-cpp>=0.23.0
39
- Requires-Dist: tqdm>=4.66.4
40
- Requires-Dist: loguru>=0.7.2
38
+ Requires-Dist: tqdm>=4.66.0
39
+ Requires-Dist: loguru>=0.7.0
41
40
 
42
41
  # McCabe++ (mcpp)
43
42
 
@@ -84,6 +83,23 @@ See also the corresponding [repo](https://github.com/wsbrg/crashminer).
84
83
  | | V10 | number of if structures without else |
85
84
  | | V11 | number of variables involved in control predicates |
86
85
 
86
+ ## Additional Metrics
87
+
88
+ | Dimension | ID | Metric Description |
89
+ |-------------------|----|---------------------------------------------------------|
90
+ | XD: Extra | x1 | number of return statements |
91
+ | | x2 | number of cast expressions |
92
+ | | x3 | number of variable declarations |
93
+ | | x4 | maximum number of operands in an expression |
94
+ | TD: AST Structure | t1 | number of AST nodes (descendants) |
95
+ | | t2 | height of the AST |
96
+ | | t3 | average branching factor of the AST |
97
+ | SD: Code Smells | s1 | number of non-trivial numeric constants (magic numbers) |
98
+ | | s2 | number of goto statements |
99
+ | | s3 | number of function pointers |
100
+ | | s4 | number of function calls without return value usage |
101
+ | MD: Memory Ops | m1 | number of memory allocations (malloc, alloc, new, etc.) |
102
+ | | m2 | number of pointer dereferences (`*`, `[]`, `->`) |
87
103
 
88
104
 
89
105
  ## Setup
@@ -4,10 +4,14 @@ pyproject.toml
4
4
  requirements.txt
5
5
  src/mcpp/__init__.py
6
6
  src/mcpp/__main__.py
7
+ src/mcpp/additional.py
7
8
  src/mcpp/complexity.py
8
9
  src/mcpp/config.py
10
+ src/mcpp/memory.py
9
11
  src/mcpp/parse.py
10
12
  src/mcpp/queries.py
13
+ src/mcpp/smell.py
14
+ src/mcpp/tree.py
11
15
  src/mcpp/vulnerability.py
12
16
  src/mcpp.egg-info/PKG-INFO
13
17
  src/mcpp.egg-info/SOURCES.txt
@@ -1,6 +1,6 @@
1
1
  hydra-core>=1.3.2
2
- tree-sitter>=0.23.0
2
+ tree-sitter>=0.24.0
3
3
  tree-sitter-c>=0.23.0
4
4
  tree-sitter-cpp>=0.23.0
5
- tqdm>=4.66.4
6
- loguru>=0.7.2
5
+ tqdm>=4.66.0
6
+ loguru>=0.7.0
@@ -1,270 +0,0 @@
1
- from collections import Counter
2
- import threading
3
-
4
- from mcpp.parse import Sitter, get_identifiers
5
- from mcpp.queries import Q_ARGLIST, Q_IDENTIFIER, Q_FUNCTION_PARAMETER, \
6
- Q_POINTER_EXPR, Q_ASSIGNMENT_EXPR, Q_BINARY_EXPR, Q_CALL_NAME, \
7
- Q_IF_STMT, Q_SWITCH_STMT, Q_DO_STMT, Q_WHILE_STMT, Q_FOR_STMT, Q_CONDITION, \
8
- Q_IF_WITHOUT_ELSE
9
-
10
-
11
- def v1(root, sitter, lang, calls=None):
12
- """
13
- V1: number of variables as parameters for callee functions
14
- """
15
- sitter.add_queries({
16
- "Q_ARGLIST": Q_ARGLIST
17
- })
18
-
19
- vars_in_calls = []
20
- for arg_list in sitter.captures("Q_ARGLIST", root, lang).get("args", []):
21
- variables = get_identifiers(sitter, arg_list, lang, filter=calls)
22
- vars_in_calls.extend(variables)
23
-
24
- return {
25
- "V1": len(vars_in_calls)
26
- }
27
-
28
-
29
- def v2(root, sitter, lang, calls=None):
30
- """
31
- V2: number of variables as parameters for callee functions
32
- """
33
- sitter.add_queries({
34
- "Q_FUNCTION_PARAMETER": Q_FUNCTION_PARAMETER
35
- })
36
- params = sitter.captures("Q_FUNCTION_PARAMETER", root, lang).get("param", [])
37
- return {
38
- "V2": len(params)
39
- }
40
-
41
-
42
- def v3_v4(root, sitter, lang, calls=None):
43
- """
44
- V3: number of pointer arithmetic operations
45
- V4: number of variables involved in pointer arithmetics
46
- """
47
- sitter.add_queries({
48
- "Q_POINTER_EXPR": Q_POINTER_EXPR
49
- })
50
- arith_ops = [
51
- "+", "++", "+=",
52
- "-", "--", "-=",
53
- "*=", # * excluded (same as pointer reference)
54
- "/", "/=",
55
- "^", "^=",
56
- "&=", # & excluded (same as pointer dereference)
57
- "|", "|="
58
- ]
59
-
60
- pointer_arith = []
61
- pointer_arith_vars = []
62
- for pointer in sitter.captures("Q_POINTER_EXPR", root, lang).get("pointer", []):
63
- if any(arith in pointer.parent.text.decode() for arith in arith_ops):
64
- pointer_arith.append(pointer)
65
- variables = get_identifiers(sitter, pointer.parent, lang, filter=calls)
66
- pointer_arith_vars.extend(variables)
67
-
68
- return {
69
- "V3": len(pointer_arith),
70
- "V4": len(pointer_arith_vars)
71
- }
72
-
73
-
74
- def v5(root, sitter, lang, calls=None):
75
- """
76
- V5: maximum number of pointer arithmetic operations a variable is involved in
77
- """
78
- sitter.add_queries({
79
- "Q_BINARY_EXPR": Q_BINARY_EXPR,
80
- "Q_ASSIGNMENT_EXPR": Q_ASSIGNMENT_EXPR,
81
- "Q_CALL_NAME": Q_CALL_NAME
82
- })
83
- arith_ops = [
84
- "+", "++", "+=",
85
- "-", "--", "-=",
86
- "*", "*=",
87
- "/", "/="
88
- ]
89
-
90
- var_count = Counter()
91
- candidates = sitter.captures("Q_BINARY_EXPR", root, lang).get("expr", []) + sitter.captures("Q_ASSIGNMENT_EXPR", root, lang).get("expr", [])
92
- for node in candidates:
93
- if len(node.children) != 3:
94
- continue
95
- op_text = node.children[1].text.decode()
96
- if any(arith in op_text for arith in arith_ops):
97
- variables = get_identifiers(sitter, node, lang, filter=calls)
98
- var_count.update(variables)
99
- if len(var_count) > 0:
100
- max_count = var_count.most_common(1)[0][1]
101
- else:
102
- max_count = 0
103
- return {
104
- "V5": max_count
105
- }
106
-
107
-
108
- def v6_v7(root, sitter, lang, calls=None):
109
- """
110
- V6: number of nested control structures
111
- V7: maximum level of control nesting
112
- """
113
- queries = {
114
- "Q_IF_STMT": Q_IF_STMT,
115
- "Q_SWITCH_STMT": Q_SWITCH_STMT,
116
- "Q_DO_STMT": Q_DO_STMT,
117
- "Q_WHILE_STMT": Q_WHILE_STMT,
118
- "Q_FOR_STMT": Q_FOR_STMT
119
- }
120
- sitter.add_queries(queries)
121
-
122
- nested_controls = []
123
- max_nesting_level = 0
124
- for q in queries.keys():
125
- for node in sitter.captures(q, root, lang).get("stmt", []):
126
- nesting_level = _control_nesting_level(node)
127
- if nesting_level > 0:
128
- nested_controls.append(node)
129
- max_nesting_level = max(max_nesting_level, nesting_level)
130
-
131
- return {
132
- "V6": len(nested_controls),
133
- "V7": max_nesting_level
134
- }
135
-
136
-
137
- def _control_nesting_level(node):
138
- control_types = [
139
- "if_statement",
140
- "switch_statement",
141
- "do_statement",
142
- "while_statement",
143
- "for_statement"
144
- ]
145
- parent = node.parent
146
- num_control_ancestors = 0
147
- while parent is not None:
148
- if parent.type in control_types:
149
- num_control_ancestors += 1
150
- parent = parent.parent
151
- return num_control_ancestors
152
-
153
-
154
- def v8(root, sitter, lang, calls=None):
155
- """
156
- V8: maximum number of control-dependent control structures
157
- """
158
- queries = {
159
- "Q_IF_STMT": Q_IF_STMT,
160
- "Q_SWITCH_STMT": Q_SWITCH_STMT,
161
- "Q_DO_STMT": Q_DO_STMT,
162
- "Q_WHILE_STMT": Q_WHILE_STMT,
163
- "Q_FOR_STMT": Q_FOR_STMT,
164
- "Q_CONDITION": Q_CONDITION
165
- }
166
- sitter.add_queries(queries)
167
-
168
- # count dependent controls under another control: key = start_byte of parent in function
169
- control_dependent_controls = Counter()
170
- threads = []
171
- thread_lock = threading.Lock()
172
- for q in queries.keys():
173
- t = threading.Thread(target=_v8_single_query,
174
- args=(root, sitter, lang, calls, q,
175
- control_dependent_controls, thread_lock))
176
- t.start()
177
- threads.append(t)
178
- for t in threads:
179
- t.join()
180
-
181
- return {
182
- "V8": max([0] + list(control_dependent_controls.values()))
183
- }
184
-
185
-
186
- def _v8_single_query(root, sitter, lang, calls, query, control_dependent_controls, thread_lock):
187
- tag = "condition" if "Q_CONDITION" in query else "stmt"
188
- for node in sitter.captures(query, root, lang).get(tag, []):
189
- parents = _traverse_parent_controls(node)
190
- if len(parents) > 0:
191
- with thread_lock:
192
- control_dependent_controls[parents[-1].start_byte] += 1
193
-
194
-
195
- def _traverse_parent_controls(node):
196
- """ Climb up the AST and emit all control nodes. """
197
- control_types = [
198
- "if_statement",
199
- "switch_statement",
200
- "do_statement",
201
- "while_statement",
202
- "for_statement"
203
- ]
204
- parent_controls = []
205
- parent = node.parent
206
- while parent is not None:
207
- if parent.type in control_types:
208
- parent_controls.append(parent)
209
- parent = parent.parent
210
- return parent_controls
211
-
212
-
213
- def v9(root, sitter, lang, calls=None):
214
- """
215
- V9: maximum number of data-dependent control structures
216
- """
217
- sitter.add_queries({
218
- "Q_CONDITION": Q_CONDITION,
219
- "Q_BINARY_EXPR": Q_BINARY_EXPR
220
- })
221
- logical_ops = [
222
- "&", "&&",
223
- "|", "||"
224
- ]
225
-
226
- conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
227
- var_count = Counter()
228
- for condition in conditions:
229
- bin_expr = sitter.captures("Q_BINARY_EXPR", condition, lang).get("expr", [])
230
- for expr in bin_expr:
231
- if len(expr.children) != 3:
232
- continue
233
- left, op, right = expr.children
234
- if op.text.decode() in logical_ops:
235
- var_count.update(get_identifiers(sitter, expr, lang, filter=calls))
236
-
237
- return {
238
- "V9": max([0] + list(var_count.values()))
239
- }
240
-
241
-
242
- def v10(root, sitter, lang, calls=None):
243
- """
244
- V10: number of if statements without else
245
- """
246
- sitter.add_queries({
247
- "Q_IF_WITHOUT_ELSE": Q_IF_WITHOUT_ELSE
248
- })
249
-
250
- if_without_else = sitter.captures("Q_IF_WITHOUT_ELSE", root, lang).get("stmt", [])
251
- return {
252
- "V10": len(if_without_else)
253
- }
254
-
255
-
256
- def v11(root, sitter, lang, calls=None):
257
- """
258
- V11: number of variables in control structures (in each predicate)
259
- """
260
- sitter.add_queries({
261
- "Q_CONDITION": Q_CONDITION
262
- })
263
-
264
- num_controlled_vars = 0
265
- conditions = sitter.captures("Q_CONDITION", root, lang).get("condition", [])
266
- for condition in conditions:
267
- num_controlled_vars += len(get_identifiers(sitter, condition, lang, filter=calls))
268
- return {
269
- "V11": num_controlled_vars
270
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes