lizard 1.17.31__py2.py3-none-any.whl → 1.19.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,419 @@
1
+ """
2
+ Language parser for PL/SQL (Oracle's Procedural Language extension to SQL)
3
+
4
+ This module implements complexity analysis for PL/SQL code, supporting:
5
+ - Procedures, Functions, and Triggers
6
+ - Package Bodies (not specifications - they only contain signatures)
7
+ - Nested procedures and functions
8
+ - Anonymous blocks with nested functions (blocks themselves aren't counted)
9
+ - Control structures: IF/ELSIF/ELSE, CASE/WHEN, LOOP/WHILE/FOR
10
+ - Exception handlers
11
+ - Cursor declarations and cursor FOR loops
12
+
13
+ Design Decisions:
14
+ - EXIT WHEN: The WHEN keyword is filtered out by the preprocessor because
15
+ "EXIT WHEN condition" is not a branching construct - it's a conditional
16
+ exit that doesn't create alternate execution paths. The LOOP itself adds
17
+ complexity.
18
+
19
+ - CONTINUE WHEN: Similar to EXIT WHEN, the WHEN is counted as it does create
20
+ a branch in the loop execution.
21
+
22
+ - GOTO: Does not add to cyclomatic complexity as it's just an unconditional
23
+ jump, not a decision point.
24
+
25
+ - Standalone LOOP: Adds +1 complexity as it creates a repeating path.
26
+
27
+ - FOR/WHILE LOOP: The FOR/WHILE keyword adds complexity; the following LOOP
28
+ keyword is part of the same construct and doesn't add additional complexity.
29
+
30
+ - Parameter Counting: Currently counts all non-whitespace tokens and commas
31
+ in parameter lists. This approach works but differs from some other language
32
+ implementations.
33
+ """
34
+
35
+ from .code_reader import CodeReader, CodeStateMachine
36
+ from .clike import CCppCommentsMixin
37
+
38
+
39
+ class PLSQLReader(CodeReader, CCppCommentsMixin):
40
+ """
41
+ Reader for PL/SQL language supporting procedures, functions, packages,
42
+ and core control structures.
43
+ """
44
+
45
+ ext = ["sql", "pks", "pkb", "pls", "plb", "pck"]
46
+ language_names = ["plsql", "pl/sql"]
47
+
48
+ # PL/SQL conditions for cyclomatic complexity
49
+ # Note: 'loop' is NOT in this set because LOOP has special handling:
50
+ # - standalone LOOP adds +1
51
+ # - LOOP after WHILE/FOR should not add (it's part of the compound statement)
52
+ _conditions = {"if", "elsif", "when", "while", "for", "and", "or"}
53
+
54
+ def __init__(self, context):
55
+ super(PLSQLReader, self).__init__(context)
56
+ self.parallel_states = [PLSQLStates(context)]
57
+ # PL/SQL is case-insensitive, so add both lowercase and uppercase versions
58
+ # of keywords to the conditions set for the automatic condition counter
59
+ self.conditions = self.conditions | {c.upper() for c in self.conditions}
60
+
61
+ def preprocess(self, tokens):
62
+ """
63
+ Preprocess tokens to handle PL/SQL-specific constructs.
64
+ Merge compound keywords to prevent the condition counter from double-counting:
65
+ - "END IF", "END LOOP", "END CASE", "END WHILE", "END FOR" -> single tokens
66
+ - "EXIT WHEN" -> remove the WHEN keyword (EXIT doesn't create a branch)
67
+ """
68
+ last_nonwhitespace_token = None
69
+ pending_tokens = []
70
+
71
+ for token in tokens:
72
+ if not token.isspace() or token == "\n":
73
+ token_upper = token.upper()
74
+
75
+ # Handle "END IF", "END LOOP", etc.
76
+ if (
77
+ last_nonwhitespace_token
78
+ and last_nonwhitespace_token.upper() == "END"
79
+ ):
80
+ if token_upper in ("IF", "LOOP", "CASE", "WHILE", "FOR"):
81
+ # Merge into "END_IF", "END_LOOP", etc.
82
+ yield "END_" + token_upper
83
+ last_nonwhitespace_token = None
84
+ pending_tokens = []
85
+ continue
86
+
87
+ # Handle "EXIT WHEN" - skip the WHEN keyword
88
+ if (
89
+ last_nonwhitespace_token
90
+ and last_nonwhitespace_token.upper() == "EXIT"
91
+ and token_upper == "WHEN"
92
+ ):
93
+ # Skip this WHEN keyword
94
+ pending_tokens = []
95
+ continue
96
+
97
+ # Yield any pending tokens
98
+ if last_nonwhitespace_token:
99
+ yield last_nonwhitespace_token
100
+ for pending in pending_tokens:
101
+ yield pending
102
+ pending_tokens = []
103
+
104
+ # Update tracking
105
+ last_nonwhitespace_token = token
106
+ else:
107
+ # Accumulate whitespace
108
+ pending_tokens.append(token)
109
+
110
+ # Don't forget the last tokens
111
+ if last_nonwhitespace_token:
112
+ yield last_nonwhitespace_token
113
+ for pending in pending_tokens:
114
+ yield pending
115
+
116
+ @staticmethod
117
+ def generate_tokens(source_code, addition="", token_class=None):
118
+ """
119
+ Generate tokens for PL/SQL code.
120
+ PL/SQL uses:
121
+ - Single-line comments: --
122
+ - Multi-line comments: /* */
123
+ - String literals: 'text' (with '' for escaping)
124
+ - Assignment operator: :=
125
+ """
126
+ # Add PL/SQL-specific patterns
127
+ addition = r"|--[^\n]*" + addition # Single-line comment starting with --
128
+ return CodeReader.generate_tokens(source_code, addition, token_class)
129
+
130
+ def get_comment_from_token(self, token):
131
+ """
132
+ Override to recognize PL/SQL's -- line comments in addition to /* */ block comments.
133
+ PL/SQL uses -- for single-line comments (like SQL standard).
134
+
135
+ Note: This method correctly identifies -- comments, but due to a limitation in
136
+ the NLOC calculation, these comments may still be counted in NLOC.
137
+ """
138
+ if token.startswith("--"):
139
+ return token # Return full comment token (like Lua does)
140
+ # Delegate to parent for /* */ and // comments
141
+ return super().get_comment_from_token(token)
142
+
143
+
144
+ class PLSQLStates(CodeStateMachine):
145
+ """
146
+ State machine for parsing PL/SQL code structure.
147
+ """
148
+
149
+ def __init__(self, context):
150
+ super(PLSQLStates, self).__init__(context)
151
+ self.in_parameter_list = False
152
+ self.last_control_keyword = None # Track FOR/WHILE to avoid counting their LOOP
153
+ self.declaring_nested_function = (
154
+ False # Track if we're declaring a nested function
155
+ )
156
+
157
+ def _state_global(self, token):
158
+ """Global state - looking for function/procedure/trigger declarations."""
159
+ token_lower = token.lower()
160
+
161
+ if token_lower == "procedure":
162
+ self.next(self._procedure_name)
163
+ elif token_lower == "function":
164
+ self.next(self._function_name)
165
+ elif token_lower == "trigger":
166
+ self.next(self._trigger_name)
167
+
168
+ def _procedure_name(self, token):
169
+ """Read procedure name."""
170
+ if token.isspace() or token == "\n":
171
+ return
172
+ if token == "(":
173
+ self.in_parameter_list = True
174
+ self.next(self._parameters, "(")
175
+ elif token.lower() in ("is", "as"):
176
+ self.context.confirm_new_function()
177
+ self.next(self._state_before_begin)
178
+ else:
179
+ # Check if this is a nested function
180
+ if self.declaring_nested_function:
181
+ self.context.push_new_function(token)
182
+ self.declaring_nested_function = False
183
+ else:
184
+ self.context.try_new_function(token)
185
+ self.next(self._procedure_after_name)
186
+
187
+ def _procedure_after_name(self, token):
188
+ """After procedure name, look for parameters or IS/AS."""
189
+ if token == ".":
190
+ # Schema-qualified name: the previous token was the schema,
191
+ # next non-whitespace token will be the actual procedure name
192
+ self.next(self._procedure_name_after_dot)
193
+ elif token == "(":
194
+ self.in_parameter_list = True
195
+ self.next(self._parameters, "(")
196
+ elif token.lower() in ("is", "as"):
197
+ self.context.confirm_new_function()
198
+ self.next(self._state_before_begin)
199
+ # Skip whitespace and other tokens
200
+
201
+ def _procedure_name_after_dot(self, token):
202
+ """Read the actual procedure name after schema.dot."""
203
+ if token.isspace() or token == "\n":
204
+ return
205
+ # Replace the previous (schema) name with the actual procedure name
206
+ self.context.current_function.name = token
207
+ self.next(self._procedure_after_name)
208
+
209
+ def _function_name(self, token):
210
+ """Read function name."""
211
+ if token.isspace() or token == "\n":
212
+ return
213
+ if token == "(":
214
+ self.in_parameter_list = True
215
+ self.next(self._parameters, "(")
216
+ elif token.lower() == "return":
217
+ self.next(self._return_type)
218
+ elif token.lower() in ("is", "as"):
219
+ self.context.confirm_new_function()
220
+ self.next(self._state_before_begin)
221
+ else:
222
+ # Check if this is a nested function
223
+ if self.declaring_nested_function:
224
+ self.context.push_new_function(token)
225
+ self.declaring_nested_function = False
226
+ else:
227
+ self.context.try_new_function(token)
228
+ self.next(self._function_after_name)
229
+
230
+ def _function_after_name(self, token):
231
+ """After function name, look for parameters, RETURN, or IS/AS."""
232
+ if token == ".":
233
+ # Schema-qualified name: the previous token was the schema,
234
+ # next non-whitespace token will be the actual function name
235
+ self.next(self._function_name_after_dot)
236
+ elif token == "(":
237
+ self.in_parameter_list = True
238
+ self.next(self._parameters, "(")
239
+ elif token.lower() == "return":
240
+ self.next(self._return_type)
241
+ elif token.lower() in ("is", "as"):
242
+ self.context.confirm_new_function()
243
+ self.next(self._state_before_begin)
244
+ # Skip whitespace and other tokens
245
+
246
+ def _function_name_after_dot(self, token):
247
+ """Read the actual function name after schema.dot."""
248
+ if token.isspace() or token == "\n":
249
+ return
250
+ # Replace the previous (schema) name with the actual function name
251
+ self.context.current_function.name = token
252
+ self.next(self._function_after_name)
253
+
254
+ def _return_type(self, token):
255
+ """Skip return type declaration."""
256
+ if token.lower() in ("is", "as"):
257
+ self.context.confirm_new_function()
258
+ self.next(self._state_before_begin)
259
+ # Skip everything else (return type tokens)
260
+
261
+ def _parameters(self, token):
262
+ """Read parameters."""
263
+ if token == ")":
264
+ self.in_parameter_list = False
265
+ self.next(self._after_parameters)
266
+ elif token == ",":
267
+ # Each comma separates parameters
268
+ self.context.parameter(token)
269
+ elif not token.isspace() and token != "\n":
270
+ # Track non-whitespace tokens as potential parameters
271
+ self.context.parameter(token)
272
+
273
+ def _after_parameters(self, token):
274
+ """After parameters, look for IS/AS or RETURN."""
275
+ if token.lower() == "return":
276
+ self.next(self._return_type)
277
+ elif token.lower() in ("is", "as"):
278
+ self.context.confirm_new_function()
279
+ self.next(self._state_before_begin)
280
+ # Skip whitespace and other tokens
281
+
282
+ def _trigger_name(self, token):
283
+ """Read trigger name."""
284
+ if token.isspace() or token == "\n":
285
+ return
286
+ # Trigger name found
287
+ self.context.try_new_function(token)
288
+ self.seen_trigger_name_token = False # Track if we've seen non-whitespace after name
289
+ self.next(self._trigger_after_name)
290
+
291
+ def _trigger_after_name(self, token):
292
+ """After trigger name, skip until DECLARE or BEGIN."""
293
+ token_lower = token.lower()
294
+
295
+ # Only check for dot immediately after trigger name (before any other tokens)
296
+ if token == "." and not self.seen_trigger_name_token:
297
+ # Schema-qualified name: the previous token was the schema,
298
+ # next non-whitespace token will be the actual trigger name
299
+ self.next(self._trigger_name_after_dot)
300
+ return
301
+
302
+ # Mark that we've seen a non-whitespace token after the trigger name
303
+ if not token.isspace() and token != "\n":
304
+ self.seen_trigger_name_token = True
305
+
306
+ if token_lower == "declare":
307
+ self.context.confirm_new_function()
308
+ self.next(self._state_before_begin)
309
+ elif token_lower == "begin":
310
+ self.context.confirm_new_function()
311
+ self.br_count = 1
312
+ self.next(self._state_body)
313
+ # Skip everything else (BEFORE/AFTER, INSERT/UPDATE/DELETE, ON table_name, FOR EACH ROW, etc.)
314
+
315
+ def _trigger_name_after_dot(self, token):
316
+ """Read the actual trigger name after schema.dot."""
317
+ if token.isspace() or token == "\n":
318
+ return
319
+ # Replace the previous (schema) name with the actual trigger name
320
+ self.context.current_function.name = token
321
+ self.seen_trigger_name_token = False # Reset for the real trigger name
322
+ self.next(self._trigger_after_name)
323
+
324
+ def _state_before_begin(self, token):
325
+ """
326
+ State between IS/AS and BEGIN - this is the declaration section.
327
+ Watch for nested procedures/functions and the BEGIN keyword.
328
+ """
329
+ token_lower = token.lower()
330
+
331
+ # Check for nested procedure/function declarations
332
+ if token_lower == "procedure":
333
+ self.declaring_nested_function = True
334
+ # Store current br_count level to know when nested function ends
335
+ if not hasattr(self, "nested_br_level"):
336
+ self.nested_br_level = 0
337
+ self.next(self._procedure_name)
338
+ return
339
+ elif token_lower == "function":
340
+ self.declaring_nested_function = True
341
+ # Store current br_count level to know when nested function ends
342
+ if not hasattr(self, "nested_br_level"):
343
+ self.nested_br_level = 0
344
+ self.next(self._function_name)
345
+ return
346
+ elif token_lower == "begin":
347
+ # Start of the implementation body
348
+ # Check if we had nested functions and need to reset br_count tracking
349
+ if hasattr(self, "nested_br_level"):
350
+ self.br_count = self.nested_br_level + 1
351
+ delattr(self, "nested_br_level")
352
+ else:
353
+ self.br_count = 1 # Initialize counter for the first BEGIN
354
+ self.next(self._state_body)
355
+
356
+ def _state_body(self, token):
357
+ """
358
+ Process function/procedure body.
359
+ Track control structures for cyclomatic complexity.
360
+ Manually track BEGIN/END blocks.
361
+ """
362
+ token_lower = token.lower()
363
+ token_upper = token.upper()
364
+
365
+ # Check for merged compound keywords like "END_IF", "END_LOOP", etc.
366
+ # These are created by the preprocessor
367
+ if token_lower.startswith("end_"):
368
+ # This is a compound END keyword, reset tracking
369
+ self.last_control_keyword = None
370
+ return
371
+
372
+ # Handle nested procedure/function declarations
373
+ if token_lower == "procedure":
374
+ self.next(self._procedure_name)
375
+ return
376
+ elif token_lower == "function":
377
+ self.next(self._function_name)
378
+ return
379
+
380
+ # Track FOR and WHILE to know when LOOP follows them
381
+ if token_upper in ("FOR", "WHILE"):
382
+ self.last_control_keyword = token_upper
383
+
384
+ # Handle LOOP keyword manually
385
+ # - Standalone LOOP adds +1 complexity
386
+ # - LOOP after FOR/WHILE does not add complexity (already counted for FOR/WHILE)
387
+ elif token_upper == "LOOP":
388
+ if self.last_control_keyword not in ("FOR", "WHILE"):
389
+ # This is a standalone LOOP, add complexity
390
+ self.context.add_condition()
391
+ # Reset tracking after processing LOOP
392
+ self.last_control_keyword = None
393
+
394
+ # PL/SQL uses BEGIN/END instead of {}
395
+ if token_lower == "begin":
396
+ self.br_count += 1
397
+ self.context.add_bare_nesting()
398
+ elif token_lower == "end":
399
+ # This is a standalone END (for BEGIN/END block)
400
+ self.br_count -= 1
401
+ if self.br_count == 0:
402
+ # This END closes the function/procedure
403
+ # Check if we have a parent function BEFORE ending (stack gets popped)
404
+ has_parent = len(self.context.stacked_functions) > 0
405
+ self.context.end_of_function()
406
+ # Return to appropriate state based on whether this was nested
407
+ if has_parent:
408
+ # Return to parent function's declaration section
409
+ self.next(self._state_before_begin)
410
+ else:
411
+ # No parent function, return to global
412
+ self.next(self._state_global)
413
+ return
414
+ else:
415
+ self.context.pop_nesting()
416
+
417
+ # Note: Basic conditions (if, elsif, when, while, for, and, or)
418
+ # are automatically counted by the condition_counter processor
419
+ # based on the _conditions set in the Reader class.
@@ -37,6 +37,7 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
37
37
  def __init__(self, context):
38
38
  super(PythonReader, self).__init__(context)
39
39
  self.parallel_states = [PythonStates(context, self)]
40
+ self._last_meaningful_token = None # Track the last meaningful token
40
41
 
41
42
  @staticmethod
42
43
  def generate_tokens(source_code, addition='', token_class=None):
@@ -46,6 +47,41 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
46
47
  r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')",
47
48
  token_class)
48
49
 
50
+ def process_token(self, token):
51
+ """Process triple-quoted strings used as comments.
52
+
53
+ Triple-quoted strings that are not docstrings (i.e., not immediately
54
+ after function definitions) should be treated like comments and not
55
+ counted in NLOC, but only if they appear to be standalone statements
56
+ rather than part of assignments or other expressions.
57
+
58
+ Returns:
59
+ bool: True if the token was handled specially, False otherwise
60
+ """
61
+ if (token.startswith('"""') or token.startswith("'''")) and len(token) >= 6:
62
+ # Check if this is likely a standalone comment (not a docstring)
63
+ # Docstrings are handled separately in _state_first_line
64
+ current_state = self.parallel_states[0]._state
65
+
66
+ # If we're not in the first line state, check if this is a standalone string
67
+ if current_state != current_state.__self__._state_first_line:
68
+ # Check if the immediate previous meaningful token suggests this is part of an expression
69
+ assignment_tokens = ['=', '+=', '-=', '*=', '/=', '%=', '//=', '**=', '&=', '|=', '^=',
70
+ '<<=', '>>=', '(', 'return', ',', '[', '+', '-', '*', '/', '%']
71
+
72
+ is_part_of_expression = self._last_meaningful_token in assignment_tokens
73
+
74
+ # Only treat as comment if it's NOT part of an expression
75
+ if not is_part_of_expression:
76
+ # Subtract the NLOC contribution of this triple-quoted string
77
+ self.context.add_nloc(-(token.count('\n') + 1))
78
+
79
+ # Update last meaningful token (ignore whitespace and newlines)
80
+ if token not in ['\n', ' ', '\t'] and not token.isspace():
81
+ self._last_meaningful_token = token
82
+
83
+ return False # Continue with normal processing
84
+
49
85
  def preprocess(self, tokens):
50
86
  indents = PythonIndents(self.context)
51
87
  current_leading_spaces = 0