lizard 1.17.31__py2.py3-none-any.whl → 1.19.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lizard_languages/r.py ADDED
@@ -0,0 +1,290 @@
1
+ '''
2
+ Language parser for R
3
+ '''
4
+
5
+ from .code_reader import CodeReader, CodeStateMachine
6
+ from .script_language import ScriptLanguageMixIn
7
+
8
+
9
+ class RReader(CodeReader, ScriptLanguageMixIn):
10
+ """R language reader for parsing R code and calculating complexity metrics."""
11
+
12
+ ext = ['r', 'R']
13
+ language_names = ['r', 'R']
14
+
15
+ # R-specific conditions that increase cyclomatic complexity
16
+ _conditions = {
17
+ 'if', 'else if', 'for', 'while', 'repeat', 'switch',
18
+ '&&', '||', '&', '|', 'ifelse',
19
+ 'tryCatch', 'try'
20
+ }
21
+
22
+ def __init__(self, context):
23
+ super(RReader, self).__init__(context)
24
+ self.parallel_states = [RStates(context)]
25
+
26
+ def preprocess(self, tokens):
27
+ """Preprocess tokens - for now just pass them through."""
28
+ for token in tokens:
29
+ yield token
30
+
31
+ @staticmethod
32
+ def generate_tokens(source_code, addition='', token_class=None):
33
+ """Generate tokens for R code with R-specific patterns."""
34
+ # R-specific token patterns
35
+ r_patterns = (
36
+ r"|<-" # Assignment operator <-
37
+ r"|->" # Assignment operator ->
38
+ r"|%[a-zA-Z_*/>]+%" # Special operators like %in%, %*%, %>%, %/%, etc.
39
+ r"|\.\.\." # Ellipsis for variable arguments
40
+ r"|:::" # Internal namespace operator (must come before ::)
41
+ r"|::" # Namespace operator
42
+ )
43
+
44
+ return ScriptLanguageMixIn.generate_common_tokens(
45
+ source_code,
46
+ r_patterns + addition,
47
+ token_class
48
+ )
49
+
50
+
51
+ class RStates(CodeStateMachine):
52
+ """State machine for parsing R function definitions and complexity."""
53
+
54
+ def __init__(self, context):
55
+ super(RStates, self).__init__(context)
56
+ self.recent_tokens = [] # Track recent tokens to find function names
57
+ self.brace_count = 0 # Track brace nesting for function bodies
58
+ self.in_braced_function = False # Track if current function uses braces
59
+ self.additional_function_names = [] # Store additional names for multiple assignment
60
+
61
+ def _state_global(self, token):
62
+ """Global state - looking for function definitions."""
63
+ # Track recent non-whitespace tokens
64
+ if not token.isspace() and token != '\n':
65
+ self.recent_tokens.append(token)
66
+ if len(self.recent_tokens) > 10: # Keep only last 10 tokens
67
+ self.recent_tokens.pop(0)
68
+
69
+ # Look for function keyword after assignment operators
70
+ if token == 'function':
71
+ # Check if we have recent tokens: [name, assignment_op, 'function']
72
+ if len(self.recent_tokens) >= 2:
73
+ # recent_tokens now contains [..., assignment_op, 'function']
74
+ assignment_op = self.recent_tokens[-2] # The token before 'function'
75
+ if assignment_op in ['<-', '=']:
76
+ # Handle multiple assignments by creating separate functions
77
+ func_names = self._extract_function_names()
78
+
79
+ # Create the first function (this will be the main one with the function body)
80
+ self._start_function(func_names[0])
81
+ self._state = self._function_params
82
+
83
+ # Store additional names for later processing
84
+ self.additional_function_names = func_names[1:] if len(func_names) > 1 else []
85
+ return
86
+
87
+ # If we get here, it's an anonymous function or not a proper assignment
88
+ self._start_function("(anonymous)")
89
+ self._state = self._function_params
90
+
91
+ def _extract_function_names(self):
92
+ """Extract all function names from recent tokens, handling multiple assignments."""
93
+ if len(self.recent_tokens) < 3:
94
+ return ["(anonymous)"]
95
+
96
+ # Look backwards from the assignment operator to find all function names
97
+ # For multiple assignment like: a <- b <- c <- function(...)
98
+ # recent_tokens ends with [..., 'a', '<-', 'b', '<-', 'c', '<-', 'function']
99
+ assignment_index = len(self.recent_tokens) - 2 # Position of assignment operator
100
+
101
+ function_names = []
102
+ i = assignment_index - 1 # Start from token before assignment operator
103
+ current_name_tokens = []
104
+
105
+ while i >= 0:
106
+ token = self.recent_tokens[i]
107
+
108
+ # If we hit an assignment operator, we've found a complete variable name
109
+ if token in ['<-', '=']:
110
+ if current_name_tokens:
111
+ function_names.append(''.join(reversed(current_name_tokens)))
112
+ current_name_tokens = []
113
+ i -= 1
114
+ continue
115
+
116
+ # Stop if we hit keywords or operators that shouldn't be part of function names
117
+ if token in ['function', '(', ')', '{', '}', '\n']:
118
+ break
119
+
120
+ # Valid R identifier characters and dots
121
+ if token.replace('_', 'a').replace('.', 'a').isalnum() or token == '.':
122
+ current_name_tokens.append(token)
123
+ i -= 1
124
+ else:
125
+ break
126
+
127
+ # Add the last name if we have one
128
+ if current_name_tokens:
129
+ function_names.append(''.join(reversed(current_name_tokens)))
130
+
131
+ # Return names in the correct order (left to right as they appear in code)
132
+ return list(reversed(function_names)) if function_names else ["(anonymous)"]
133
+
134
+ def _extract_function_name(self):
135
+ """Extract the first function name (for backward compatibility)."""
136
+ names = self._extract_function_names()
137
+ return names[0] if names else "(anonymous)"
138
+
139
+ def _start_function(self, name):
140
+ """Start tracking a new function."""
141
+ self.context.restart_new_function(name)
142
+
143
+ def _function_params(self, token):
144
+ """Expecting function parameters."""
145
+ if token == '(':
146
+ self.context.add_to_long_function_name("(")
147
+ self._state = self._read_params
148
+ else:
149
+ # Single expression function without parentheses - rare in R
150
+ self._state = self._function_body
151
+ self._function_body(token)
152
+
153
+ def _read_params(self, token):
154
+ """Read function parameters until closing parenthesis."""
155
+ if token == ')':
156
+ self.context.add_to_long_function_name(")")
157
+ self._state = self._function_body
158
+ elif token not in ['\n'] and not token.isspace():
159
+ self.context.parameter(token)
160
+ if token != '(':
161
+ self.context.add_to_long_function_name(" " + token)
162
+
163
+ def _function_body(self, token):
164
+ """In function body - track complexity and nested functions."""
165
+ # Note: Complexity conditions are automatically counted by the framework
166
+ # based on reader.conditions, so we don't need to manually count them here
167
+
168
+ # Continue tracking tokens even in function body for nested function detection
169
+ if not token.isspace() and token != '\n':
170
+ self.recent_tokens.append(token)
171
+ if len(self.recent_tokens) > 10: # Keep only last 10 tokens
172
+ self.recent_tokens.pop(0)
173
+
174
+ # Track braces
175
+ if token == '{':
176
+ if self.brace_count == 0:
177
+ self.in_braced_function = True
178
+ self.brace_count += 1
179
+ elif token == '}':
180
+ self.brace_count -= 1
181
+ if self.brace_count == 0 and self.in_braced_function:
182
+ # End of braced function
183
+ self._end_current_function()
184
+ return
185
+
186
+ # Handle nested functions - treat them as separate functions
187
+ if token == 'function':
188
+ # Check if this is a nested function assignment
189
+ if len(self.recent_tokens) >= 2:
190
+ assignment_op = self.recent_tokens[-2] # The token before 'function'
191
+ if assignment_op in ['<-', '=']:
192
+ # End current function first
193
+ self.context.end_of_function()
194
+
195
+ # Handle multiple assignments for nested functions too
196
+ func_names = self._extract_function_names()
197
+
198
+ # Start a new function for the nested function
199
+ self._start_function(func_names[0])
200
+ self._state = self._function_params
201
+ # Reset brace counting for the new function
202
+ self.brace_count = 0
203
+ self.in_braced_function = False
204
+
205
+ # Store additional names for later processing
206
+ self.additional_function_names = func_names[1:] if len(func_names) > 1 else []
207
+ return
208
+
209
+ # For single-line functions without braces, end at newline
210
+ elif token == '\n' and not self.in_braced_function:
211
+ self._end_current_function()
212
+
213
+ def _end_current_function(self):
214
+ """End the current function and reset state."""
215
+ # Check if this might be a right assignment case
216
+ # We need to temporarily not end the function to see if there's a right assignment
217
+ self._state = self._check_right_assignment
218
+ self.brace_count = 0
219
+ self.in_braced_function = False
220
+
221
+ def _check_right_assignment(self, token):
222
+ """Check if there's a right assignment after function end."""
223
+ # Skip whitespace and comments
224
+ if token.isspace() or token == '\n' or token.startswith('#'):
225
+ return
226
+
227
+ # Look for right assignment operator
228
+ if token == '->':
229
+ self._state = self._read_right_assignment_name
230
+ return
231
+
232
+ # If we encounter anything else, this is not a right assignment
233
+ # End the function and create additional functions for multiple assignments
234
+ self._finalize_function_with_multiple_assignments()
235
+ self._state = self._state_global
236
+ self._state_global(token)
237
+
238
+ def _finalize_function_with_multiple_assignments(self):
239
+ """End the current function and create additional functions for multiple assignments."""
240
+ # Get the current function's information before ending it
241
+ current_func = self.context.current_function
242
+
243
+ # End the current function
244
+ self.context.end_of_function()
245
+
246
+ # Create additional function entries for multiple assignments
247
+ if self.additional_function_names and current_func:
248
+ for func_name in self.additional_function_names:
249
+ # Create a new function with the same complexity and line info
250
+ self.context.restart_new_function(func_name)
251
+ # Copy the complexity from the original function
252
+ if hasattr(current_func, 'cyclomatic_complexity'):
253
+ self.context.current_function.cyclomatic_complexity = current_func.cyclomatic_complexity
254
+ # Set the same line range
255
+ self.context.current_function.start_line = current_func.start_line
256
+ self.context.current_function.end_line = current_func.end_line
257
+ # End this function immediately
258
+ self.context.end_of_function()
259
+
260
+ # Clear the additional names
261
+ self.additional_function_names = []
262
+
263
+ def _read_right_assignment_name(self, token):
264
+ """Read the function name after right assignment operator."""
265
+ # Skip whitespace
266
+ if token.isspace() or token == '\n':
267
+ return
268
+
269
+ # This should be the function name
270
+ if token.replace('_', 'a').replace('.', 'a').isalnum() or token == '.':
271
+ # Update the current function's name
272
+ if self.context.current_function:
273
+ self.context.current_function.name = token
274
+
275
+ # End the function and create additional functions for multiple assignments
276
+ self._finalize_function_with_multiple_assignments()
277
+ self._state = self._state_global
278
+ return
279
+
280
+ # If we get something unexpected, treat as anonymous function
281
+ self._finalize_function_with_multiple_assignments()
282
+ self._state = self._state_global
283
+ self._state_global(token)
284
+
285
+ def statemachine_before_return(self):
286
+ """Called when processing is complete - end any open functions."""
287
+ if self._state in [self._function_body, self._check_right_assignment, self._read_right_assignment_name]:
288
+ # End any open function and process multiple assignments
289
+ if hasattr(self.context, 'current_function') and self.context.current_function:
290
+ self._finalize_function_with_multiple_assignments()
lizard_languages/rust.py CHANGED
@@ -19,6 +19,11 @@ class RustReader(CodeReader, CCppCommentsMixin):
19
19
  super().__init__(context)
20
20
  self.parallel_states = [RustStates(context)]
21
21
 
22
+ @staticmethod
23
+ def generate_tokens(source_code, addition='', token_class=None):
24
+ addition = r"|(?:'\w+\b)" # lifetimes, labels
25
+ return CodeReader.generate_tokens(source_code, addition, token_class)
26
+
22
27
 
23
28
  class RustStates(GoLikeStates): # pylint: disable=R0903
24
29
  FUNC_KEYWORD = 'fn'
lizard_languages/st.py ADDED
@@ -0,0 +1,139 @@
1
+ '''
2
+ Language parser for Structured Text.
3
+ '''
4
+
5
+ import re
6
+ # import itertools
7
+ from .code_reader import CodeStateMachine, CodeReader
8
+
9
+
10
+ class StCommentsMixin(object): # pylint: disable=R0903
11
+
12
+ @staticmethod
13
+ def get_comment_from_token(token):
14
+ if token.startswith("(*") or token.startswith("//"):
15
+ return token[2:]
16
+
17
+
18
+ class StReader(CodeReader, StCommentsMixin):
19
+ ''' This is the reader for Structured Text. '''
20
+
21
+ ext = ["st"]
22
+ language_names = ['st']
23
+ macro_pattern = re.compile(r"#\s*(\w+)\s*(.*)", re.M | re.S)
24
+
25
+ # track block starters
26
+ _conditions = set([
27
+ 'if', 'elsif', 'case', 'for', 'while', 'repeat',
28
+ 'IF', 'ELSIF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
29
+ ])
30
+
31
+ _functions = set([
32
+ 'FUNCTION_BLOCK', 'FUNCTION', 'ACTION'
33
+ ])
34
+
35
+ _blocks = set([
36
+ 'IF', 'FOR', 'WHILE', 'CASE', 'REPEAT',
37
+ ])
38
+
39
+ _ends = set([
40
+ 'END',
41
+ ])
42
+
43
+ # Nesting Depth
44
+ loops = [
45
+ 'if', 'case', 'for', 'while', 'repeat',
46
+ 'IF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
47
+ ]
48
+ bracket = 'END'
49
+
50
+ def __init__(self, context):
51
+ super(StReader, self).__init__(context)
52
+ self.parallel_states = (
53
+ StStates(context, self),
54
+ )
55
+
56
+ @staticmethod
57
+ def generate_tokens(source_code, addition='', token_class=None):
58
+
59
+ # Capture everything until end of logical line, where lines may be continued with \ at the end.”
60
+ _until_end = r'(?:\\\n|[^\n])*'
61
+ block_endings = '|'.join(f'END_{_}' for _ in StReader._blocks)
62
+ addition = (
63
+ r'(?i)' # case-insensitive
64
+ r'//' + _until_end + r'|' # line comment
65
+ r'\(\*' + _until_end + r'|' # block comment (* ... *)
66
+ r'OR|'
67
+ r'AND|'
68
+ r'XOR|'
69
+ r'NOT|'
70
+ r'ELSE\s+IF|'
71
+ + block_endings + addition
72
+ )
73
+
74
+ return CodeReader.generate_tokens(source_code, addition, token_class)
75
+
76
+ def preprocess(self, tokens):
77
+ """Handle compiler pragmas like #IF, #INCLUDE, etc."""
78
+ for token in tokens:
79
+ macro = self.macro_pattern.match(token)
80
+ if macro:
81
+ directive = macro.group(1).lower()
82
+ if directive in ("if", "ifdef", "ifndef", "elif"):
83
+ self.context.add_condition()
84
+ elif directive == "include":
85
+ yield "#include"
86
+ yield macro.group(2) or "\"\""
87
+ for _ in macro.group(2).splitlines()[1:]:
88
+ yield "\n"
89
+ else:
90
+ # ST normalization: collapse END_* into END
91
+ upper_tok = token.upper()
92
+ if upper_tok.startswith("END_"):
93
+ yield "END"
94
+ continue
95
+
96
+ # Eliminate whitespace, keep line breaks
97
+ if not token.isspace() or token == '\n':
98
+ yield token
99
+
100
+
101
+ class StStates(CodeStateMachine):
102
+ """Track Structured Text State."""
103
+
104
+ def __init__(self, context, reader):
105
+ super().__init__(context)
106
+ self.reader = reader
107
+ self.last_token = None
108
+
109
+ def __call__(self, token, reader=None):
110
+ if self._state(token):
111
+ self.next(self.saved_state)
112
+ if self.callback:
113
+ self.callback()
114
+ self.last_token = token
115
+ if self.to_exit:
116
+ return True
117
+
118
+ def _state_global(self, token):
119
+ token_upper = token.upper()
120
+
121
+ if token_upper in StReader._functions and self.context.current_function.top_nesting_level < 0:
122
+ self._state = self._function_name
123
+ elif token_upper in StReader._blocks:
124
+ self.context.add_bare_nesting()
125
+ elif token in StReader._ends:
126
+ self.context.pop_nesting()
127
+
128
+ def reset_state(self, token=None):
129
+ self._state = self._state_global
130
+ if token is not None:
131
+ self._state_global(token)
132
+
133
+ def _function_name(self, token):
134
+ self.context.restart_new_function(token)
135
+ self._state = self._function
136
+
137
+ def _function(self, token):
138
+ self.context.add_bare_nesting()
139
+ self.reset_state(token)