lizard 1.17.30__py2.py3-none-any.whl → 1.18.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/METADATA +6 -1
- {lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/RECORD +25 -24
- {lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/entry_points.txt +1 -0
- lizard.py +43 -22
- lizard_ext/__init__.py +14 -0
- lizard_ext/checkstyleoutput.py +33 -0
- lizard_ext/version.py +1 -1
- lizard_languages/__init__.py +4 -2
- lizard_languages/clike.py +60 -1
- lizard_languages/code_reader.py +19 -0
- lizard_languages/go.py +1 -1
- lizard_languages/java.py +6 -3
- lizard_languages/javascript.py +1 -1
- lizard_languages/perl.py +20 -21
- lizard_languages/php.py +14 -14
- lizard_languages/python.py +37 -1
- lizard_languages/r.py +290 -0
- lizard_languages/rust.py +7 -2
- lizard_languages/script_language.py +2 -2
- lizard_languages/st.py +139 -0
- lizard_languages/tsx.py +445 -11
- lizard_languages/typescript.py +215 -15
- lizard_languages/js_style_language_states.py +0 -184
- lizard_languages/jsx.py +0 -337
- {lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/LICENSE.txt +0 -0
- {lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/WHEEL +0 -0
- {lizard-1.17.30.dist-info → lizard-1.18.0.dist-info}/top_level.txt +0 -0
lizard_languages/php.py
CHANGED
|
@@ -12,7 +12,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
12
12
|
PHP-specific state machine that properly handles modern PHP syntax
|
|
13
13
|
including classes, visibility modifiers and return types.
|
|
14
14
|
"""
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
def __init__(self, context):
|
|
17
17
|
super(PHPLanguageStates, self).__init__(context)
|
|
18
18
|
self.function_name = ''
|
|
@@ -29,7 +29,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
29
29
|
self.assignments = []
|
|
30
30
|
self.in_match = False
|
|
31
31
|
self.match_case_count = 0
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
def _state_global(self, token):
|
|
34
34
|
if token == 'class':
|
|
35
35
|
self._state = self._class_declaration
|
|
@@ -76,7 +76,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
76
76
|
if self.in_trait:
|
|
77
77
|
self.in_trait = False
|
|
78
78
|
self.trait_name = None
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
# Update tokens
|
|
81
81
|
self.last_token = token
|
|
82
82
|
if token not in [' ', '\t', '\n']:
|
|
@@ -87,7 +87,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
87
87
|
pass
|
|
88
88
|
else:
|
|
89
89
|
self.last_tokens = ''
|
|
90
|
-
|
|
90
|
+
|
|
91
91
|
def _trait_declaration(self, token):
|
|
92
92
|
if token and not token.isspace() and token not in ['{', '(']:
|
|
93
93
|
self.trait_name = token
|
|
@@ -96,7 +96,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
96
96
|
elif token == '{':
|
|
97
97
|
self.brace_level += 1
|
|
98
98
|
self._state = self._state_global
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
def _class_declaration(self, token):
|
|
101
101
|
if token and not token.isspace() and token not in ['{', '(', 'extends', 'implements']:
|
|
102
102
|
self.class_name = token
|
|
@@ -105,7 +105,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
105
105
|
elif token == '{':
|
|
106
106
|
self.brace_level += 1
|
|
107
107
|
self._state = self._state_global
|
|
108
|
-
|
|
108
|
+
|
|
109
109
|
def _function_name(self, token):
|
|
110
110
|
if token and not token.isspace() and token != '(':
|
|
111
111
|
method_name = token
|
|
@@ -140,7 +140,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
140
140
|
self._state = self._function_args_continue
|
|
141
141
|
self.context.push_new_function(self.function_name)
|
|
142
142
|
self.started_function = True
|
|
143
|
-
|
|
143
|
+
|
|
144
144
|
def _function_args(self, token):
|
|
145
145
|
if token == '(':
|
|
146
146
|
self.bracket_level = 1
|
|
@@ -151,7 +151,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
151
151
|
self.context.push_new_function(self.function_name)
|
|
152
152
|
self.started_function = True
|
|
153
153
|
self._state = self._function_args_continue
|
|
154
|
-
|
|
154
|
+
|
|
155
155
|
def _function_args_continue(self, token):
|
|
156
156
|
if token == '(':
|
|
157
157
|
self.bracket_level += 1
|
|
@@ -165,7 +165,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
165
165
|
# Make sure we count each parameter uniquely
|
|
166
166
|
self.context.add_to_long_function_name(" " + token)
|
|
167
167
|
self.context.parameter(token)
|
|
168
|
-
|
|
168
|
+
|
|
169
169
|
def _function_return_type_or_body(self, token):
|
|
170
170
|
if token == ':':
|
|
171
171
|
# Skip return type declaration
|
|
@@ -180,13 +180,13 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
180
180
|
self.context.end_of_function()
|
|
181
181
|
self.started_function = False
|
|
182
182
|
self._state = self._state_global
|
|
183
|
-
|
|
183
|
+
|
|
184
184
|
def _function_body_or_return_type(self, token):
|
|
185
185
|
if token == '{':
|
|
186
186
|
# Found the function body opening after return type
|
|
187
187
|
self.brace_level += 1
|
|
188
188
|
self._state = self._function_body
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
def _function_body(self, token):
|
|
191
191
|
if token == '{':
|
|
192
192
|
self.brace_level += 1
|
|
@@ -198,12 +198,12 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
198
198
|
self.context.end_of_function()
|
|
199
199
|
self.started_function = False
|
|
200
200
|
self._state = self._state_global
|
|
201
|
-
|
|
201
|
+
|
|
202
202
|
def _condition_expected(self, token):
|
|
203
203
|
if token == '(':
|
|
204
204
|
self.bracket_level = 1
|
|
205
205
|
self._state = self._condition_continue
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
def _condition_continue(self, token):
|
|
208
208
|
if token == '(':
|
|
209
209
|
self.bracket_level += 1
|
|
@@ -216,7 +216,7 @@ class PHPLanguageStates(CodeStateMachine):
|
|
|
216
216
|
if token == '(':
|
|
217
217
|
self.bracket_level = 1
|
|
218
218
|
self._state = self._match_expression_continue
|
|
219
|
-
|
|
219
|
+
|
|
220
220
|
def _match_expression_continue(self, token):
|
|
221
221
|
if token == '(':
|
|
222
222
|
self.bracket_level += 1
|
lizard_languages/python.py
CHANGED
|
@@ -37,15 +37,51 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
|
|
|
37
37
|
def __init__(self, context):
|
|
38
38
|
super(PythonReader, self).__init__(context)
|
|
39
39
|
self.parallel_states = [PythonStates(context, self)]
|
|
40
|
+
self._last_meaningful_token = None # Track the last meaningful token
|
|
40
41
|
|
|
41
42
|
@staticmethod
|
|
42
43
|
def generate_tokens(source_code, addition='', token_class=None):
|
|
43
44
|
return ScriptLanguageMixIn.generate_common_tokens(
|
|
44
45
|
source_code,
|
|
45
|
-
r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" +
|
|
46
|
+
r"|(?:\"\"\"(?:\\.|[^\"]|\"(?!\"\")|\"\"(?!\"))*\"\"\")" +
|
|
46
47
|
r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')",
|
|
47
48
|
token_class)
|
|
48
49
|
|
|
50
|
+
def process_token(self, token):
|
|
51
|
+
"""Process triple-quoted strings used as comments.
|
|
52
|
+
|
|
53
|
+
Triple-quoted strings that are not docstrings (i.e., not immediately
|
|
54
|
+
after function definitions) should be treated like comments and not
|
|
55
|
+
counted in NLOC, but only if they appear to be standalone statements
|
|
56
|
+
rather than part of assignments or other expressions.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
bool: True if the token was handled specially, False otherwise
|
|
60
|
+
"""
|
|
61
|
+
if (token.startswith('"""') or token.startswith("'''")) and len(token) >= 6:
|
|
62
|
+
# Check if this is likely a standalone comment (not a docstring)
|
|
63
|
+
# Docstrings are handled separately in _state_first_line
|
|
64
|
+
current_state = self.parallel_states[0]._state
|
|
65
|
+
|
|
66
|
+
# If we're not in the first line state, check if this is a standalone string
|
|
67
|
+
if current_state != current_state.__self__._state_first_line:
|
|
68
|
+
# Check if the immediate previous meaningful token suggests this is part of an expression
|
|
69
|
+
assignment_tokens = ['=', '+=', '-=', '*=', '/=', '%=', '//=', '**=', '&=', '|=', '^=',
|
|
70
|
+
'<<=', '>>=', '(', 'return', ',', '[', '+', '-', '*', '/', '%']
|
|
71
|
+
|
|
72
|
+
is_part_of_expression = self._last_meaningful_token in assignment_tokens
|
|
73
|
+
|
|
74
|
+
# Only treat as comment if it's NOT part of an expression
|
|
75
|
+
if not is_part_of_expression:
|
|
76
|
+
# Subtract the NLOC contribution of this triple-quoted string
|
|
77
|
+
self.context.add_nloc(-(token.count('\n') + 1))
|
|
78
|
+
|
|
79
|
+
# Update last meaningful token (ignore whitespace and newlines)
|
|
80
|
+
if token not in ['\n', ' ', '\t'] and not token.isspace():
|
|
81
|
+
self._last_meaningful_token = token
|
|
82
|
+
|
|
83
|
+
return False # Continue with normal processing
|
|
84
|
+
|
|
49
85
|
def preprocess(self, tokens):
|
|
50
86
|
indents = PythonIndents(self.context)
|
|
51
87
|
current_leading_spaces = 0
|
lizard_languages/r.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Language parser for R
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
from .code_reader import CodeReader, CodeStateMachine
|
|
6
|
+
from .script_language import ScriptLanguageMixIn
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RReader(CodeReader, ScriptLanguageMixIn):
|
|
10
|
+
"""R language reader for parsing R code and calculating complexity metrics."""
|
|
11
|
+
|
|
12
|
+
ext = ['r', 'R']
|
|
13
|
+
language_names = ['r', 'R']
|
|
14
|
+
|
|
15
|
+
# R-specific conditions that increase cyclomatic complexity
|
|
16
|
+
_conditions = {
|
|
17
|
+
'if', 'else if', 'for', 'while', 'repeat', 'switch',
|
|
18
|
+
'&&', '||', '&', '|', 'ifelse',
|
|
19
|
+
'tryCatch', 'try'
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def __init__(self, context):
|
|
23
|
+
super(RReader, self).__init__(context)
|
|
24
|
+
self.parallel_states = [RStates(context)]
|
|
25
|
+
|
|
26
|
+
def preprocess(self, tokens):
|
|
27
|
+
"""Preprocess tokens - for now just pass them through."""
|
|
28
|
+
for token in tokens:
|
|
29
|
+
yield token
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def generate_tokens(source_code, addition='', token_class=None):
|
|
33
|
+
"""Generate tokens for R code with R-specific patterns."""
|
|
34
|
+
# R-specific token patterns
|
|
35
|
+
r_patterns = (
|
|
36
|
+
r"|<-" # Assignment operator <-
|
|
37
|
+
r"|->" # Assignment operator ->
|
|
38
|
+
r"|%[a-zA-Z_*/>]+%" # Special operators like %in%, %*%, %>%, %/%, etc.
|
|
39
|
+
r"|\.\.\." # Ellipsis for variable arguments
|
|
40
|
+
r"|:::" # Internal namespace operator (must come before ::)
|
|
41
|
+
r"|::" # Namespace operator
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return ScriptLanguageMixIn.generate_common_tokens(
|
|
45
|
+
source_code,
|
|
46
|
+
r_patterns + addition,
|
|
47
|
+
token_class
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class RStates(CodeStateMachine):
|
|
52
|
+
"""State machine for parsing R function definitions and complexity."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, context):
|
|
55
|
+
super(RStates, self).__init__(context)
|
|
56
|
+
self.recent_tokens = [] # Track recent tokens to find function names
|
|
57
|
+
self.brace_count = 0 # Track brace nesting for function bodies
|
|
58
|
+
self.in_braced_function = False # Track if current function uses braces
|
|
59
|
+
self.additional_function_names = [] # Store additional names for multiple assignment
|
|
60
|
+
|
|
61
|
+
def _state_global(self, token):
|
|
62
|
+
"""Global state - looking for function definitions."""
|
|
63
|
+
# Track recent non-whitespace tokens
|
|
64
|
+
if not token.isspace() and token != '\n':
|
|
65
|
+
self.recent_tokens.append(token)
|
|
66
|
+
if len(self.recent_tokens) > 10: # Keep only last 10 tokens
|
|
67
|
+
self.recent_tokens.pop(0)
|
|
68
|
+
|
|
69
|
+
# Look for function keyword after assignment operators
|
|
70
|
+
if token == 'function':
|
|
71
|
+
# Check if we have recent tokens: [name, assignment_op, 'function']
|
|
72
|
+
if len(self.recent_tokens) >= 2:
|
|
73
|
+
# recent_tokens now contains [..., assignment_op, 'function']
|
|
74
|
+
assignment_op = self.recent_tokens[-2] # The token before 'function'
|
|
75
|
+
if assignment_op in ['<-', '=']:
|
|
76
|
+
# Handle multiple assignments by creating separate functions
|
|
77
|
+
func_names = self._extract_function_names()
|
|
78
|
+
|
|
79
|
+
# Create the first function (this will be the main one with the function body)
|
|
80
|
+
self._start_function(func_names[0])
|
|
81
|
+
self._state = self._function_params
|
|
82
|
+
|
|
83
|
+
# Store additional names for later processing
|
|
84
|
+
self.additional_function_names = func_names[1:] if len(func_names) > 1 else []
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
# If we get here, it's an anonymous function or not a proper assignment
|
|
88
|
+
self._start_function("(anonymous)")
|
|
89
|
+
self._state = self._function_params
|
|
90
|
+
|
|
91
|
+
def _extract_function_names(self):
|
|
92
|
+
"""Extract all function names from recent tokens, handling multiple assignments."""
|
|
93
|
+
if len(self.recent_tokens) < 3:
|
|
94
|
+
return ["(anonymous)"]
|
|
95
|
+
|
|
96
|
+
# Look backwards from the assignment operator to find all function names
|
|
97
|
+
# For multiple assignment like: a <- b <- c <- function(...)
|
|
98
|
+
# recent_tokens ends with [..., 'a', '<-', 'b', '<-', 'c', '<-', 'function']
|
|
99
|
+
assignment_index = len(self.recent_tokens) - 2 # Position of assignment operator
|
|
100
|
+
|
|
101
|
+
function_names = []
|
|
102
|
+
i = assignment_index - 1 # Start from token before assignment operator
|
|
103
|
+
current_name_tokens = []
|
|
104
|
+
|
|
105
|
+
while i >= 0:
|
|
106
|
+
token = self.recent_tokens[i]
|
|
107
|
+
|
|
108
|
+
# If we hit an assignment operator, we've found a complete variable name
|
|
109
|
+
if token in ['<-', '=']:
|
|
110
|
+
if current_name_tokens:
|
|
111
|
+
function_names.append(''.join(reversed(current_name_tokens)))
|
|
112
|
+
current_name_tokens = []
|
|
113
|
+
i -= 1
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
# Stop if we hit keywords or operators that shouldn't be part of function names
|
|
117
|
+
if token in ['function', '(', ')', '{', '}', '\n']:
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
# Valid R identifier characters and dots
|
|
121
|
+
if token.replace('_', 'a').replace('.', 'a').isalnum() or token == '.':
|
|
122
|
+
current_name_tokens.append(token)
|
|
123
|
+
i -= 1
|
|
124
|
+
else:
|
|
125
|
+
break
|
|
126
|
+
|
|
127
|
+
# Add the last name if we have one
|
|
128
|
+
if current_name_tokens:
|
|
129
|
+
function_names.append(''.join(reversed(current_name_tokens)))
|
|
130
|
+
|
|
131
|
+
# Return names in the correct order (left to right as they appear in code)
|
|
132
|
+
return list(reversed(function_names)) if function_names else ["(anonymous)"]
|
|
133
|
+
|
|
134
|
+
def _extract_function_name(self):
|
|
135
|
+
"""Extract the first function name (for backward compatibility)."""
|
|
136
|
+
names = self._extract_function_names()
|
|
137
|
+
return names[0] if names else "(anonymous)"
|
|
138
|
+
|
|
139
|
+
def _start_function(self, name):
|
|
140
|
+
"""Start tracking a new function."""
|
|
141
|
+
self.context.restart_new_function(name)
|
|
142
|
+
|
|
143
|
+
def _function_params(self, token):
|
|
144
|
+
"""Expecting function parameters."""
|
|
145
|
+
if token == '(':
|
|
146
|
+
self.context.add_to_long_function_name("(")
|
|
147
|
+
self._state = self._read_params
|
|
148
|
+
else:
|
|
149
|
+
# Single expression function without parentheses - rare in R
|
|
150
|
+
self._state = self._function_body
|
|
151
|
+
self._function_body(token)
|
|
152
|
+
|
|
153
|
+
def _read_params(self, token):
|
|
154
|
+
"""Read function parameters until closing parenthesis."""
|
|
155
|
+
if token == ')':
|
|
156
|
+
self.context.add_to_long_function_name(")")
|
|
157
|
+
self._state = self._function_body
|
|
158
|
+
elif token not in ['\n'] and not token.isspace():
|
|
159
|
+
self.context.parameter(token)
|
|
160
|
+
if token != '(':
|
|
161
|
+
self.context.add_to_long_function_name(" " + token)
|
|
162
|
+
|
|
163
|
+
def _function_body(self, token):
|
|
164
|
+
"""In function body - track complexity and nested functions."""
|
|
165
|
+
# Note: Complexity conditions are automatically counted by the framework
|
|
166
|
+
# based on reader.conditions, so we don't need to manually count them here
|
|
167
|
+
|
|
168
|
+
# Continue tracking tokens even in function body for nested function detection
|
|
169
|
+
if not token.isspace() and token != '\n':
|
|
170
|
+
self.recent_tokens.append(token)
|
|
171
|
+
if len(self.recent_tokens) > 10: # Keep only last 10 tokens
|
|
172
|
+
self.recent_tokens.pop(0)
|
|
173
|
+
|
|
174
|
+
# Track braces
|
|
175
|
+
if token == '{':
|
|
176
|
+
if self.brace_count == 0:
|
|
177
|
+
self.in_braced_function = True
|
|
178
|
+
self.brace_count += 1
|
|
179
|
+
elif token == '}':
|
|
180
|
+
self.brace_count -= 1
|
|
181
|
+
if self.brace_count == 0 and self.in_braced_function:
|
|
182
|
+
# End of braced function
|
|
183
|
+
self._end_current_function()
|
|
184
|
+
return
|
|
185
|
+
|
|
186
|
+
# Handle nested functions - treat them as separate functions
|
|
187
|
+
if token == 'function':
|
|
188
|
+
# Check if this is a nested function assignment
|
|
189
|
+
if len(self.recent_tokens) >= 2:
|
|
190
|
+
assignment_op = self.recent_tokens[-2] # The token before 'function'
|
|
191
|
+
if assignment_op in ['<-', '=']:
|
|
192
|
+
# End current function first
|
|
193
|
+
self.context.end_of_function()
|
|
194
|
+
|
|
195
|
+
# Handle multiple assignments for nested functions too
|
|
196
|
+
func_names = self._extract_function_names()
|
|
197
|
+
|
|
198
|
+
# Start a new function for the nested function
|
|
199
|
+
self._start_function(func_names[0])
|
|
200
|
+
self._state = self._function_params
|
|
201
|
+
# Reset brace counting for the new function
|
|
202
|
+
self.brace_count = 0
|
|
203
|
+
self.in_braced_function = False
|
|
204
|
+
|
|
205
|
+
# Store additional names for later processing
|
|
206
|
+
self.additional_function_names = func_names[1:] if len(func_names) > 1 else []
|
|
207
|
+
return
|
|
208
|
+
|
|
209
|
+
# For single-line functions without braces, end at newline
|
|
210
|
+
elif token == '\n' and not self.in_braced_function:
|
|
211
|
+
self._end_current_function()
|
|
212
|
+
|
|
213
|
+
def _end_current_function(self):
|
|
214
|
+
"""End the current function and reset state."""
|
|
215
|
+
# Check if this might be a right assignment case
|
|
216
|
+
# We need to temporarily not end the function to see if there's a right assignment
|
|
217
|
+
self._state = self._check_right_assignment
|
|
218
|
+
self.brace_count = 0
|
|
219
|
+
self.in_braced_function = False
|
|
220
|
+
|
|
221
|
+
def _check_right_assignment(self, token):
|
|
222
|
+
"""Check if there's a right assignment after function end."""
|
|
223
|
+
# Skip whitespace and comments
|
|
224
|
+
if token.isspace() or token == '\n' or token.startswith('#'):
|
|
225
|
+
return
|
|
226
|
+
|
|
227
|
+
# Look for right assignment operator
|
|
228
|
+
if token == '->':
|
|
229
|
+
self._state = self._read_right_assignment_name
|
|
230
|
+
return
|
|
231
|
+
|
|
232
|
+
# If we encounter anything else, this is not a right assignment
|
|
233
|
+
# End the function and create additional functions for multiple assignments
|
|
234
|
+
self._finalize_function_with_multiple_assignments()
|
|
235
|
+
self._state = self._state_global
|
|
236
|
+
self._state_global(token)
|
|
237
|
+
|
|
238
|
+
def _finalize_function_with_multiple_assignments(self):
|
|
239
|
+
"""End the current function and create additional functions for multiple assignments."""
|
|
240
|
+
# Get the current function's information before ending it
|
|
241
|
+
current_func = self.context.current_function
|
|
242
|
+
|
|
243
|
+
# End the current function
|
|
244
|
+
self.context.end_of_function()
|
|
245
|
+
|
|
246
|
+
# Create additional function entries for multiple assignments
|
|
247
|
+
if self.additional_function_names and current_func:
|
|
248
|
+
for func_name in self.additional_function_names:
|
|
249
|
+
# Create a new function with the same complexity and line info
|
|
250
|
+
self.context.restart_new_function(func_name)
|
|
251
|
+
# Copy the complexity from the original function
|
|
252
|
+
if hasattr(current_func, 'cyclomatic_complexity'):
|
|
253
|
+
self.context.current_function.cyclomatic_complexity = current_func.cyclomatic_complexity
|
|
254
|
+
# Set the same line range
|
|
255
|
+
self.context.current_function.start_line = current_func.start_line
|
|
256
|
+
self.context.current_function.end_line = current_func.end_line
|
|
257
|
+
# End this function immediately
|
|
258
|
+
self.context.end_of_function()
|
|
259
|
+
|
|
260
|
+
# Clear the additional names
|
|
261
|
+
self.additional_function_names = []
|
|
262
|
+
|
|
263
|
+
def _read_right_assignment_name(self, token):
|
|
264
|
+
"""Read the function name after right assignment operator."""
|
|
265
|
+
# Skip whitespace
|
|
266
|
+
if token.isspace() or token == '\n':
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
# This should be the function name
|
|
270
|
+
if token.replace('_', 'a').replace('.', 'a').isalnum() or token == '.':
|
|
271
|
+
# Update the current function's name
|
|
272
|
+
if self.context.current_function:
|
|
273
|
+
self.context.current_function.name = token
|
|
274
|
+
|
|
275
|
+
# End the function and create additional functions for multiple assignments
|
|
276
|
+
self._finalize_function_with_multiple_assignments()
|
|
277
|
+
self._state = self._state_global
|
|
278
|
+
return
|
|
279
|
+
|
|
280
|
+
# If we get something unexpected, treat as anonymous function
|
|
281
|
+
self._finalize_function_with_multiple_assignments()
|
|
282
|
+
self._state = self._state_global
|
|
283
|
+
self._state_global(token)
|
|
284
|
+
|
|
285
|
+
def statemachine_before_return(self):
|
|
286
|
+
"""Called when processing is complete - end any open functions."""
|
|
287
|
+
if self._state in [self._function_body, self._check_right_assignment, self._read_right_assignment_name]:
|
|
288
|
+
# End any open function and process multiple assignments
|
|
289
|
+
if hasattr(self.context, 'current_function') and self.context.current_function:
|
|
290
|
+
self._finalize_function_with_multiple_assignments()
|
lizard_languages/rust.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
'''
|
|
2
|
-
Language parser for
|
|
2
|
+
Language parser for Rust lang
|
|
3
3
|
'''
|
|
4
4
|
|
|
5
5
|
from .code_reader import CodeReader
|
|
@@ -16,9 +16,14 @@ class RustReader(CodeReader, CCppCommentsMixin):
|
|
|
16
16
|
'case', 'match', 'where'])
|
|
17
17
|
|
|
18
18
|
def __init__(self, context):
|
|
19
|
-
super(
|
|
19
|
+
super().__init__(context)
|
|
20
20
|
self.parallel_states = [RustStates(context)]
|
|
21
21
|
|
|
22
|
+
@staticmethod
|
|
23
|
+
def generate_tokens(source_code, addition='', token_class=None):
|
|
24
|
+
addition = r"|(?:'\w+\b)" # lifetimes, labels
|
|
25
|
+
return CodeReader.generate_tokens(source_code, addition, token_class)
|
|
26
|
+
|
|
22
27
|
|
|
23
28
|
class RustStates(GoLikeStates): # pylint: disable=R0903
|
|
24
29
|
FUNC_KEYWORD = 'fn'
|
|
@@ -12,13 +12,13 @@ class ScriptLanguageMixIn:
|
|
|
12
12
|
if token.startswith("#"):
|
|
13
13
|
# For forgiveness comments, return the entire comment with directive intact
|
|
14
14
|
stripped = token.lstrip('#').strip()
|
|
15
|
-
|
|
15
|
+
|
|
16
16
|
# Handle forgiveness directives with proper formatting
|
|
17
17
|
if stripped.startswith('lizard forgive global') or stripped.startswith('#lizard forgive global'):
|
|
18
18
|
return '#lizard forgive global' # Preserve global directive
|
|
19
19
|
elif stripped.startswith('lizard forgive') or stripped.startswith('#lizard forgive'):
|
|
20
20
|
return '#lizard forgive' # Return standardized forgiveness comment
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
return stripped # Return the stripped comment for other cases
|
|
23
23
|
return None
|
|
24
24
|
|
lizard_languages/st.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Language parser for Structured Text.
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
# import itertools
|
|
7
|
+
from .code_reader import CodeStateMachine, CodeReader
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class StCommentsMixin(object): # pylint: disable=R0903
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def get_comment_from_token(token):
|
|
14
|
+
if token.startswith("(*") or token.startswith("//"):
|
|
15
|
+
return token[2:]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class StReader(CodeReader, StCommentsMixin):
|
|
19
|
+
''' This is the reader for Structured Text. '''
|
|
20
|
+
|
|
21
|
+
ext = ["st"]
|
|
22
|
+
language_names = ['st']
|
|
23
|
+
macro_pattern = re.compile(r"#\s*(\w+)\s*(.*)", re.M | re.S)
|
|
24
|
+
|
|
25
|
+
# track block starters
|
|
26
|
+
_conditions = set([
|
|
27
|
+
'if', 'elsif', 'case', 'for', 'while', 'repeat',
|
|
28
|
+
'IF', 'ELSIF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
|
|
29
|
+
])
|
|
30
|
+
|
|
31
|
+
_functions = set([
|
|
32
|
+
'FUNCTION_BLOCK', 'FUNCTION', 'ACTION'
|
|
33
|
+
])
|
|
34
|
+
|
|
35
|
+
_blocks = set([
|
|
36
|
+
'IF', 'FOR', 'WHILE', 'CASE', 'REPEAT',
|
|
37
|
+
])
|
|
38
|
+
|
|
39
|
+
_ends = set([
|
|
40
|
+
'END',
|
|
41
|
+
])
|
|
42
|
+
|
|
43
|
+
# Nesting Depth
|
|
44
|
+
loops = [
|
|
45
|
+
'if', 'case', 'for', 'while', 'repeat',
|
|
46
|
+
'IF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
|
|
47
|
+
]
|
|
48
|
+
bracket = 'END'
|
|
49
|
+
|
|
50
|
+
def __init__(self, context):
|
|
51
|
+
super(StReader, self).__init__(context)
|
|
52
|
+
self.parallel_states = (
|
|
53
|
+
StStates(context, self),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def generate_tokens(source_code, addition='', token_class=None):
|
|
58
|
+
|
|
59
|
+
# Capture everything until end of logical line, where lines may be continued with \ at the end.”
|
|
60
|
+
_until_end = r'(?:\\\n|[^\n])*'
|
|
61
|
+
block_endings = '|'.join(f'END_{_}' for _ in StReader._blocks)
|
|
62
|
+
addition = (
|
|
63
|
+
r'(?i)' # case-insensitive
|
|
64
|
+
r'//' + _until_end + r'|' # line comment
|
|
65
|
+
r'\(\*' + _until_end + r'|' # block comment (* ... *)
|
|
66
|
+
r'OR|'
|
|
67
|
+
r'AND|'
|
|
68
|
+
r'XOR|'
|
|
69
|
+
r'NOT|'
|
|
70
|
+
r'ELSE\s+IF|'
|
|
71
|
+
+ block_endings + addition
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return CodeReader.generate_tokens(source_code, addition, token_class)
|
|
75
|
+
|
|
76
|
+
def preprocess(self, tokens):
|
|
77
|
+
"""Handle compiler pragmas like #IF, #INCLUDE, etc."""
|
|
78
|
+
for token in tokens:
|
|
79
|
+
macro = self.macro_pattern.match(token)
|
|
80
|
+
if macro:
|
|
81
|
+
directive = macro.group(1).lower()
|
|
82
|
+
if directive in ("if", "ifdef", "ifndef", "elif"):
|
|
83
|
+
self.context.add_condition()
|
|
84
|
+
elif directive == "include":
|
|
85
|
+
yield "#include"
|
|
86
|
+
yield macro.group(2) or "\"\""
|
|
87
|
+
for _ in macro.group(2).splitlines()[1:]:
|
|
88
|
+
yield "\n"
|
|
89
|
+
else:
|
|
90
|
+
# ST normalization: collapse END_* into END
|
|
91
|
+
upper_tok = token.upper()
|
|
92
|
+
if upper_tok.startswith("END_"):
|
|
93
|
+
yield "END"
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Eliminate whitespace, keep line breaks
|
|
97
|
+
if not token.isspace() or token == '\n':
|
|
98
|
+
yield token
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class StStates(CodeStateMachine):
|
|
102
|
+
"""Track Structured Text State."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, context, reader):
|
|
105
|
+
super().__init__(context)
|
|
106
|
+
self.reader = reader
|
|
107
|
+
self.last_token = None
|
|
108
|
+
|
|
109
|
+
def __call__(self, token, reader=None):
|
|
110
|
+
if self._state(token):
|
|
111
|
+
self.next(self.saved_state)
|
|
112
|
+
if self.callback:
|
|
113
|
+
self.callback()
|
|
114
|
+
self.last_token = token
|
|
115
|
+
if self.to_exit:
|
|
116
|
+
return True
|
|
117
|
+
|
|
118
|
+
def _state_global(self, token):
|
|
119
|
+
token_upper = token.upper()
|
|
120
|
+
|
|
121
|
+
if token_upper in StReader._functions and self.context.current_function.top_nesting_level < 0:
|
|
122
|
+
self._state = self._function_name
|
|
123
|
+
elif token_upper in StReader._blocks:
|
|
124
|
+
self.context.add_bare_nesting()
|
|
125
|
+
elif token in StReader._ends:
|
|
126
|
+
self.context.pop_nesting()
|
|
127
|
+
|
|
128
|
+
def reset_state(self, token=None):
|
|
129
|
+
self._state = self._state_global
|
|
130
|
+
if token is not None:
|
|
131
|
+
self._state_global(token)
|
|
132
|
+
|
|
133
|
+
def _function_name(self, token):
|
|
134
|
+
self.context.restart_new_function(token)
|
|
135
|
+
self._state = self._function
|
|
136
|
+
|
|
137
|
+
def _function(self, token):
|
|
138
|
+
self.context.add_bare_nesting()
|
|
139
|
+
self.reset_state(token)
|