lizard 1.17.31__py2.py3-none-any.whl → 1.19.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lizard-1.17.31.dist-info → lizard-1.19.0.dist-info}/METADATA +31 -17
- {lizard-1.17.31.dist-info → lizard-1.19.0.dist-info}/RECORD +21 -20
- {lizard-1.17.31.dist-info → lizard-1.19.0.dist-info}/entry_points.txt +1 -0
- lizard.py +7 -7
- lizard_ext/checkstyleoutput.py +4 -2
- lizard_ext/htmloutput.py +86 -17
- lizard_ext/lizardnd.py +64 -1
- lizard_ext/version.py +1 -1
- lizard_languages/__init__.py +6 -2
- lizard_languages/clike.py +61 -2
- lizard_languages/code_reader.py +1 -1
- lizard_languages/plsql.py +419 -0
- lizard_languages/python.py +36 -0
- lizard_languages/r.py +290 -0
- lizard_languages/rust.py +5 -0
- lizard_languages/st.py +139 -0
- lizard_languages/tsx.py +445 -11
- lizard_languages/typescript.py +264 -14
- lizard_languages/js_style_language_states.py +0 -185
- lizard_languages/jsx.py +0 -337
- {lizard-1.17.31.dist-info → lizard-1.19.0.dist-info}/LICENSE.txt +0 -0
- {lizard-1.17.31.dist-info → lizard-1.19.0.dist-info}/WHEEL +0 -0
- {lizard-1.17.31.dist-info → lizard-1.19.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Language parser for PL/SQL (Oracle's Procedural Language extension to SQL)
|
|
3
|
+
|
|
4
|
+
This module implements complexity analysis for PL/SQL code, supporting:
|
|
5
|
+
- Procedures, Functions, and Triggers
|
|
6
|
+
- Package Bodies (not specifications - they only contain signatures)
|
|
7
|
+
- Nested procedures and functions
|
|
8
|
+
- Anonymous blocks with nested functions (blocks themselves aren't counted)
|
|
9
|
+
- Control structures: IF/ELSIF/ELSE, CASE/WHEN, LOOP/WHILE/FOR
|
|
10
|
+
- Exception handlers
|
|
11
|
+
- Cursor declarations and cursor FOR loops
|
|
12
|
+
|
|
13
|
+
Design Decisions:
|
|
14
|
+
- EXIT WHEN: The WHEN keyword is filtered out by the preprocessor because
|
|
15
|
+
"EXIT WHEN condition" is not a branching construct - it's a conditional
|
|
16
|
+
exit that doesn't create alternate execution paths. The LOOP itself adds
|
|
17
|
+
complexity.
|
|
18
|
+
|
|
19
|
+
- CONTINUE WHEN: Similar to EXIT WHEN, the WHEN is counted as it does create
|
|
20
|
+
a branch in the loop execution.
|
|
21
|
+
|
|
22
|
+
- GOTO: Does not add to cyclomatic complexity as it's just an unconditional
|
|
23
|
+
jump, not a decision point.
|
|
24
|
+
|
|
25
|
+
- Standalone LOOP: Adds +1 complexity as it creates a repeating path.
|
|
26
|
+
|
|
27
|
+
- FOR/WHILE LOOP: The FOR/WHILE keyword adds complexity; the following LOOP
|
|
28
|
+
keyword is part of the same construct and doesn't add additional complexity.
|
|
29
|
+
|
|
30
|
+
- Parameter Counting: Currently counts all non-whitespace tokens and commas
|
|
31
|
+
in parameter lists. This approach works but differs from some other language
|
|
32
|
+
implementations.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from .code_reader import CodeReader, CodeStateMachine
|
|
36
|
+
from .clike import CCppCommentsMixin
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class PLSQLReader(CodeReader, CCppCommentsMixin):
|
|
40
|
+
"""
|
|
41
|
+
Reader for PL/SQL language supporting procedures, functions, packages,
|
|
42
|
+
and core control structures.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
ext = ["sql", "pks", "pkb", "pls", "plb", "pck"]
|
|
46
|
+
language_names = ["plsql", "pl/sql"]
|
|
47
|
+
|
|
48
|
+
# PL/SQL conditions for cyclomatic complexity
|
|
49
|
+
# Note: 'loop' is NOT in this set because LOOP has special handling:
|
|
50
|
+
# - standalone LOOP adds +1
|
|
51
|
+
# - LOOP after WHILE/FOR should not add (it's part of the compound statement)
|
|
52
|
+
_conditions = {"if", "elsif", "when", "while", "for", "and", "or"}
|
|
53
|
+
|
|
54
|
+
def __init__(self, context):
|
|
55
|
+
super(PLSQLReader, self).__init__(context)
|
|
56
|
+
self.parallel_states = [PLSQLStates(context)]
|
|
57
|
+
# PL/SQL is case-insensitive, so add both lowercase and uppercase versions
|
|
58
|
+
# of keywords to the conditions set for the automatic condition counter
|
|
59
|
+
self.conditions = self.conditions | {c.upper() for c in self.conditions}
|
|
60
|
+
|
|
61
|
+
def preprocess(self, tokens):
|
|
62
|
+
"""
|
|
63
|
+
Preprocess tokens to handle PL/SQL-specific constructs.
|
|
64
|
+
Merge compound keywords to prevent the condition counter from double-counting:
|
|
65
|
+
- "END IF", "END LOOP", "END CASE", "END WHILE", "END FOR" -> single tokens
|
|
66
|
+
- "EXIT WHEN" -> remove the WHEN keyword (EXIT doesn't create a branch)
|
|
67
|
+
"""
|
|
68
|
+
last_nonwhitespace_token = None
|
|
69
|
+
pending_tokens = []
|
|
70
|
+
|
|
71
|
+
for token in tokens:
|
|
72
|
+
if not token.isspace() or token == "\n":
|
|
73
|
+
token_upper = token.upper()
|
|
74
|
+
|
|
75
|
+
# Handle "END IF", "END LOOP", etc.
|
|
76
|
+
if (
|
|
77
|
+
last_nonwhitespace_token
|
|
78
|
+
and last_nonwhitespace_token.upper() == "END"
|
|
79
|
+
):
|
|
80
|
+
if token_upper in ("IF", "LOOP", "CASE", "WHILE", "FOR"):
|
|
81
|
+
# Merge into "END_IF", "END_LOOP", etc.
|
|
82
|
+
yield "END_" + token_upper
|
|
83
|
+
last_nonwhitespace_token = None
|
|
84
|
+
pending_tokens = []
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# Handle "EXIT WHEN" - skip the WHEN keyword
|
|
88
|
+
if (
|
|
89
|
+
last_nonwhitespace_token
|
|
90
|
+
and last_nonwhitespace_token.upper() == "EXIT"
|
|
91
|
+
and token_upper == "WHEN"
|
|
92
|
+
):
|
|
93
|
+
# Skip this WHEN keyword
|
|
94
|
+
pending_tokens = []
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
# Yield any pending tokens
|
|
98
|
+
if last_nonwhitespace_token:
|
|
99
|
+
yield last_nonwhitespace_token
|
|
100
|
+
for pending in pending_tokens:
|
|
101
|
+
yield pending
|
|
102
|
+
pending_tokens = []
|
|
103
|
+
|
|
104
|
+
# Update tracking
|
|
105
|
+
last_nonwhitespace_token = token
|
|
106
|
+
else:
|
|
107
|
+
# Accumulate whitespace
|
|
108
|
+
pending_tokens.append(token)
|
|
109
|
+
|
|
110
|
+
# Don't forget the last tokens
|
|
111
|
+
if last_nonwhitespace_token:
|
|
112
|
+
yield last_nonwhitespace_token
|
|
113
|
+
for pending in pending_tokens:
|
|
114
|
+
yield pending
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def generate_tokens(source_code, addition="", token_class=None):
|
|
118
|
+
"""
|
|
119
|
+
Generate tokens for PL/SQL code.
|
|
120
|
+
PL/SQL uses:
|
|
121
|
+
- Single-line comments: --
|
|
122
|
+
- Multi-line comments: /* */
|
|
123
|
+
- String literals: 'text' (with '' for escaping)
|
|
124
|
+
- Assignment operator: :=
|
|
125
|
+
"""
|
|
126
|
+
# Add PL/SQL-specific patterns
|
|
127
|
+
addition = r"|--[^\n]*" + addition # Single-line comment starting with --
|
|
128
|
+
return CodeReader.generate_tokens(source_code, addition, token_class)
|
|
129
|
+
|
|
130
|
+
def get_comment_from_token(self, token):
|
|
131
|
+
"""
|
|
132
|
+
Override to recognize PL/SQL's -- line comments in addition to /* */ block comments.
|
|
133
|
+
PL/SQL uses -- for single-line comments (like SQL standard).
|
|
134
|
+
|
|
135
|
+
Note: This method correctly identifies -- comments, but due to a limitation in
|
|
136
|
+
the NLOC calculation, these comments may still be counted in NLOC.
|
|
137
|
+
"""
|
|
138
|
+
if token.startswith("--"):
|
|
139
|
+
return token # Return full comment token (like Lua does)
|
|
140
|
+
# Delegate to parent for /* */ and // comments
|
|
141
|
+
return super().get_comment_from_token(token)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class PLSQLStates(CodeStateMachine):
|
|
145
|
+
"""
|
|
146
|
+
State machine for parsing PL/SQL code structure.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, context):
|
|
150
|
+
super(PLSQLStates, self).__init__(context)
|
|
151
|
+
self.in_parameter_list = False
|
|
152
|
+
self.last_control_keyword = None # Track FOR/WHILE to avoid counting their LOOP
|
|
153
|
+
self.declaring_nested_function = (
|
|
154
|
+
False # Track if we're declaring a nested function
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _state_global(self, token):
|
|
158
|
+
"""Global state - looking for function/procedure/trigger declarations."""
|
|
159
|
+
token_lower = token.lower()
|
|
160
|
+
|
|
161
|
+
if token_lower == "procedure":
|
|
162
|
+
self.next(self._procedure_name)
|
|
163
|
+
elif token_lower == "function":
|
|
164
|
+
self.next(self._function_name)
|
|
165
|
+
elif token_lower == "trigger":
|
|
166
|
+
self.next(self._trigger_name)
|
|
167
|
+
|
|
168
|
+
def _procedure_name(self, token):
|
|
169
|
+
"""Read procedure name."""
|
|
170
|
+
if token.isspace() or token == "\n":
|
|
171
|
+
return
|
|
172
|
+
if token == "(":
|
|
173
|
+
self.in_parameter_list = True
|
|
174
|
+
self.next(self._parameters, "(")
|
|
175
|
+
elif token.lower() in ("is", "as"):
|
|
176
|
+
self.context.confirm_new_function()
|
|
177
|
+
self.next(self._state_before_begin)
|
|
178
|
+
else:
|
|
179
|
+
# Check if this is a nested function
|
|
180
|
+
if self.declaring_nested_function:
|
|
181
|
+
self.context.push_new_function(token)
|
|
182
|
+
self.declaring_nested_function = False
|
|
183
|
+
else:
|
|
184
|
+
self.context.try_new_function(token)
|
|
185
|
+
self.next(self._procedure_after_name)
|
|
186
|
+
|
|
187
|
+
def _procedure_after_name(self, token):
|
|
188
|
+
"""After procedure name, look for parameters or IS/AS."""
|
|
189
|
+
if token == ".":
|
|
190
|
+
# Schema-qualified name: the previous token was the schema,
|
|
191
|
+
# next non-whitespace token will be the actual procedure name
|
|
192
|
+
self.next(self._procedure_name_after_dot)
|
|
193
|
+
elif token == "(":
|
|
194
|
+
self.in_parameter_list = True
|
|
195
|
+
self.next(self._parameters, "(")
|
|
196
|
+
elif token.lower() in ("is", "as"):
|
|
197
|
+
self.context.confirm_new_function()
|
|
198
|
+
self.next(self._state_before_begin)
|
|
199
|
+
# Skip whitespace and other tokens
|
|
200
|
+
|
|
201
|
+
def _procedure_name_after_dot(self, token):
|
|
202
|
+
"""Read the actual procedure name after schema.dot."""
|
|
203
|
+
if token.isspace() or token == "\n":
|
|
204
|
+
return
|
|
205
|
+
# Replace the previous (schema) name with the actual procedure name
|
|
206
|
+
self.context.current_function.name = token
|
|
207
|
+
self.next(self._procedure_after_name)
|
|
208
|
+
|
|
209
|
+
def _function_name(self, token):
|
|
210
|
+
"""Read function name."""
|
|
211
|
+
if token.isspace() or token == "\n":
|
|
212
|
+
return
|
|
213
|
+
if token == "(":
|
|
214
|
+
self.in_parameter_list = True
|
|
215
|
+
self.next(self._parameters, "(")
|
|
216
|
+
elif token.lower() == "return":
|
|
217
|
+
self.next(self._return_type)
|
|
218
|
+
elif token.lower() in ("is", "as"):
|
|
219
|
+
self.context.confirm_new_function()
|
|
220
|
+
self.next(self._state_before_begin)
|
|
221
|
+
else:
|
|
222
|
+
# Check if this is a nested function
|
|
223
|
+
if self.declaring_nested_function:
|
|
224
|
+
self.context.push_new_function(token)
|
|
225
|
+
self.declaring_nested_function = False
|
|
226
|
+
else:
|
|
227
|
+
self.context.try_new_function(token)
|
|
228
|
+
self.next(self._function_after_name)
|
|
229
|
+
|
|
230
|
+
def _function_after_name(self, token):
|
|
231
|
+
"""After function name, look for parameters, RETURN, or IS/AS."""
|
|
232
|
+
if token == ".":
|
|
233
|
+
# Schema-qualified name: the previous token was the schema,
|
|
234
|
+
# next non-whitespace token will be the actual function name
|
|
235
|
+
self.next(self._function_name_after_dot)
|
|
236
|
+
elif token == "(":
|
|
237
|
+
self.in_parameter_list = True
|
|
238
|
+
self.next(self._parameters, "(")
|
|
239
|
+
elif token.lower() == "return":
|
|
240
|
+
self.next(self._return_type)
|
|
241
|
+
elif token.lower() in ("is", "as"):
|
|
242
|
+
self.context.confirm_new_function()
|
|
243
|
+
self.next(self._state_before_begin)
|
|
244
|
+
# Skip whitespace and other tokens
|
|
245
|
+
|
|
246
|
+
def _function_name_after_dot(self, token):
|
|
247
|
+
"""Read the actual function name after schema.dot."""
|
|
248
|
+
if token.isspace() or token == "\n":
|
|
249
|
+
return
|
|
250
|
+
# Replace the previous (schema) name with the actual function name
|
|
251
|
+
self.context.current_function.name = token
|
|
252
|
+
self.next(self._function_after_name)
|
|
253
|
+
|
|
254
|
+
def _return_type(self, token):
|
|
255
|
+
"""Skip return type declaration."""
|
|
256
|
+
if token.lower() in ("is", "as"):
|
|
257
|
+
self.context.confirm_new_function()
|
|
258
|
+
self.next(self._state_before_begin)
|
|
259
|
+
# Skip everything else (return type tokens)
|
|
260
|
+
|
|
261
|
+
def _parameters(self, token):
|
|
262
|
+
"""Read parameters."""
|
|
263
|
+
if token == ")":
|
|
264
|
+
self.in_parameter_list = False
|
|
265
|
+
self.next(self._after_parameters)
|
|
266
|
+
elif token == ",":
|
|
267
|
+
# Each comma separates parameters
|
|
268
|
+
self.context.parameter(token)
|
|
269
|
+
elif not token.isspace() and token != "\n":
|
|
270
|
+
# Track non-whitespace tokens as potential parameters
|
|
271
|
+
self.context.parameter(token)
|
|
272
|
+
|
|
273
|
+
def _after_parameters(self, token):
|
|
274
|
+
"""After parameters, look for IS/AS or RETURN."""
|
|
275
|
+
if token.lower() == "return":
|
|
276
|
+
self.next(self._return_type)
|
|
277
|
+
elif token.lower() in ("is", "as"):
|
|
278
|
+
self.context.confirm_new_function()
|
|
279
|
+
self.next(self._state_before_begin)
|
|
280
|
+
# Skip whitespace and other tokens
|
|
281
|
+
|
|
282
|
+
def _trigger_name(self, token):
|
|
283
|
+
"""Read trigger name."""
|
|
284
|
+
if token.isspace() or token == "\n":
|
|
285
|
+
return
|
|
286
|
+
# Trigger name found
|
|
287
|
+
self.context.try_new_function(token)
|
|
288
|
+
self.seen_trigger_name_token = False # Track if we've seen non-whitespace after name
|
|
289
|
+
self.next(self._trigger_after_name)
|
|
290
|
+
|
|
291
|
+
def _trigger_after_name(self, token):
|
|
292
|
+
"""After trigger name, skip until DECLARE or BEGIN."""
|
|
293
|
+
token_lower = token.lower()
|
|
294
|
+
|
|
295
|
+
# Only check for dot immediately after trigger name (before any other tokens)
|
|
296
|
+
if token == "." and not self.seen_trigger_name_token:
|
|
297
|
+
# Schema-qualified name: the previous token was the schema,
|
|
298
|
+
# next non-whitespace token will be the actual trigger name
|
|
299
|
+
self.next(self._trigger_name_after_dot)
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
# Mark that we've seen a non-whitespace token after the trigger name
|
|
303
|
+
if not token.isspace() and token != "\n":
|
|
304
|
+
self.seen_trigger_name_token = True
|
|
305
|
+
|
|
306
|
+
if token_lower == "declare":
|
|
307
|
+
self.context.confirm_new_function()
|
|
308
|
+
self.next(self._state_before_begin)
|
|
309
|
+
elif token_lower == "begin":
|
|
310
|
+
self.context.confirm_new_function()
|
|
311
|
+
self.br_count = 1
|
|
312
|
+
self.next(self._state_body)
|
|
313
|
+
# Skip everything else (BEFORE/AFTER, INSERT/UPDATE/DELETE, ON table_name, FOR EACH ROW, etc.)
|
|
314
|
+
|
|
315
|
+
def _trigger_name_after_dot(self, token):
|
|
316
|
+
"""Read the actual trigger name after schema.dot."""
|
|
317
|
+
if token.isspace() or token == "\n":
|
|
318
|
+
return
|
|
319
|
+
# Replace the previous (schema) name with the actual trigger name
|
|
320
|
+
self.context.current_function.name = token
|
|
321
|
+
self.seen_trigger_name_token = False # Reset for the real trigger name
|
|
322
|
+
self.next(self._trigger_after_name)
|
|
323
|
+
|
|
324
|
+
def _state_before_begin(self, token):
|
|
325
|
+
"""
|
|
326
|
+
State between IS/AS and BEGIN - this is the declaration section.
|
|
327
|
+
Watch for nested procedures/functions and the BEGIN keyword.
|
|
328
|
+
"""
|
|
329
|
+
token_lower = token.lower()
|
|
330
|
+
|
|
331
|
+
# Check for nested procedure/function declarations
|
|
332
|
+
if token_lower == "procedure":
|
|
333
|
+
self.declaring_nested_function = True
|
|
334
|
+
# Store current br_count level to know when nested function ends
|
|
335
|
+
if not hasattr(self, "nested_br_level"):
|
|
336
|
+
self.nested_br_level = 0
|
|
337
|
+
self.next(self._procedure_name)
|
|
338
|
+
return
|
|
339
|
+
elif token_lower == "function":
|
|
340
|
+
self.declaring_nested_function = True
|
|
341
|
+
# Store current br_count level to know when nested function ends
|
|
342
|
+
if not hasattr(self, "nested_br_level"):
|
|
343
|
+
self.nested_br_level = 0
|
|
344
|
+
self.next(self._function_name)
|
|
345
|
+
return
|
|
346
|
+
elif token_lower == "begin":
|
|
347
|
+
# Start of the implementation body
|
|
348
|
+
# Check if we had nested functions and need to reset br_count tracking
|
|
349
|
+
if hasattr(self, "nested_br_level"):
|
|
350
|
+
self.br_count = self.nested_br_level + 1
|
|
351
|
+
delattr(self, "nested_br_level")
|
|
352
|
+
else:
|
|
353
|
+
self.br_count = 1 # Initialize counter for the first BEGIN
|
|
354
|
+
self.next(self._state_body)
|
|
355
|
+
|
|
356
|
+
def _state_body(self, token):
|
|
357
|
+
"""
|
|
358
|
+
Process function/procedure body.
|
|
359
|
+
Track control structures for cyclomatic complexity.
|
|
360
|
+
Manually track BEGIN/END blocks.
|
|
361
|
+
"""
|
|
362
|
+
token_lower = token.lower()
|
|
363
|
+
token_upper = token.upper()
|
|
364
|
+
|
|
365
|
+
# Check for merged compound keywords like "END_IF", "END_LOOP", etc.
|
|
366
|
+
# These are created by the preprocessor
|
|
367
|
+
if token_lower.startswith("end_"):
|
|
368
|
+
# This is a compound END keyword, reset tracking
|
|
369
|
+
self.last_control_keyword = None
|
|
370
|
+
return
|
|
371
|
+
|
|
372
|
+
# Handle nested procedure/function declarations
|
|
373
|
+
if token_lower == "procedure":
|
|
374
|
+
self.next(self._procedure_name)
|
|
375
|
+
return
|
|
376
|
+
elif token_lower == "function":
|
|
377
|
+
self.next(self._function_name)
|
|
378
|
+
return
|
|
379
|
+
|
|
380
|
+
# Track FOR and WHILE to know when LOOP follows them
|
|
381
|
+
if token_upper in ("FOR", "WHILE"):
|
|
382
|
+
self.last_control_keyword = token_upper
|
|
383
|
+
|
|
384
|
+
# Handle LOOP keyword manually
|
|
385
|
+
# - Standalone LOOP adds +1 complexity
|
|
386
|
+
# - LOOP after FOR/WHILE does not add complexity (already counted for FOR/WHILE)
|
|
387
|
+
elif token_upper == "LOOP":
|
|
388
|
+
if self.last_control_keyword not in ("FOR", "WHILE"):
|
|
389
|
+
# This is a standalone LOOP, add complexity
|
|
390
|
+
self.context.add_condition()
|
|
391
|
+
# Reset tracking after processing LOOP
|
|
392
|
+
self.last_control_keyword = None
|
|
393
|
+
|
|
394
|
+
# PL/SQL uses BEGIN/END instead of {}
|
|
395
|
+
if token_lower == "begin":
|
|
396
|
+
self.br_count += 1
|
|
397
|
+
self.context.add_bare_nesting()
|
|
398
|
+
elif token_lower == "end":
|
|
399
|
+
# This is a standalone END (for BEGIN/END block)
|
|
400
|
+
self.br_count -= 1
|
|
401
|
+
if self.br_count == 0:
|
|
402
|
+
# This END closes the function/procedure
|
|
403
|
+
# Check if we have a parent function BEFORE ending (stack gets popped)
|
|
404
|
+
has_parent = len(self.context.stacked_functions) > 0
|
|
405
|
+
self.context.end_of_function()
|
|
406
|
+
# Return to appropriate state based on whether this was nested
|
|
407
|
+
if has_parent:
|
|
408
|
+
# Return to parent function's declaration section
|
|
409
|
+
self.next(self._state_before_begin)
|
|
410
|
+
else:
|
|
411
|
+
# No parent function, return to global
|
|
412
|
+
self.next(self._state_global)
|
|
413
|
+
return
|
|
414
|
+
else:
|
|
415
|
+
self.context.pop_nesting()
|
|
416
|
+
|
|
417
|
+
# Note: Basic conditions (if, elsif, when, while, for, and, or)
|
|
418
|
+
# are automatically counted by the condition_counter processor
|
|
419
|
+
# based on the _conditions set in the Reader class.
|
lizard_languages/python.py
CHANGED
|
@@ -37,6 +37,7 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
|
|
|
37
37
|
def __init__(self, context):
|
|
38
38
|
super(PythonReader, self).__init__(context)
|
|
39
39
|
self.parallel_states = [PythonStates(context, self)]
|
|
40
|
+
self._last_meaningful_token = None # Track the last meaningful token
|
|
40
41
|
|
|
41
42
|
@staticmethod
|
|
42
43
|
def generate_tokens(source_code, addition='', token_class=None):
|
|
@@ -46,6 +47,41 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
|
|
|
46
47
|
r"|(?:\'\'\'(?:\\.|[^\']|\'(?!\'\')|\'\'(?!\'))*\'\'\')",
|
|
47
48
|
token_class)
|
|
48
49
|
|
|
50
|
+
def process_token(self, token):
|
|
51
|
+
"""Process triple-quoted strings used as comments.
|
|
52
|
+
|
|
53
|
+
Triple-quoted strings that are not docstrings (i.e., not immediately
|
|
54
|
+
after function definitions) should be treated like comments and not
|
|
55
|
+
counted in NLOC, but only if they appear to be standalone statements
|
|
56
|
+
rather than part of assignments or other expressions.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
bool: True if the token was handled specially, False otherwise
|
|
60
|
+
"""
|
|
61
|
+
if (token.startswith('"""') or token.startswith("'''")) and len(token) >= 6:
|
|
62
|
+
# Check if this is likely a standalone comment (not a docstring)
|
|
63
|
+
# Docstrings are handled separately in _state_first_line
|
|
64
|
+
current_state = self.parallel_states[0]._state
|
|
65
|
+
|
|
66
|
+
# If we're not in the first line state, check if this is a standalone string
|
|
67
|
+
if current_state != current_state.__self__._state_first_line:
|
|
68
|
+
# Check if the immediate previous meaningful token suggests this is part of an expression
|
|
69
|
+
assignment_tokens = ['=', '+=', '-=', '*=', '/=', '%=', '//=', '**=', '&=', '|=', '^=',
|
|
70
|
+
'<<=', '>>=', '(', 'return', ',', '[', '+', '-', '*', '/', '%']
|
|
71
|
+
|
|
72
|
+
is_part_of_expression = self._last_meaningful_token in assignment_tokens
|
|
73
|
+
|
|
74
|
+
# Only treat as comment if it's NOT part of an expression
|
|
75
|
+
if not is_part_of_expression:
|
|
76
|
+
# Subtract the NLOC contribution of this triple-quoted string
|
|
77
|
+
self.context.add_nloc(-(token.count('\n') + 1))
|
|
78
|
+
|
|
79
|
+
# Update last meaningful token (ignore whitespace and newlines)
|
|
80
|
+
if token not in ['\n', ' ', '\t'] and not token.isspace():
|
|
81
|
+
self._last_meaningful_token = token
|
|
82
|
+
|
|
83
|
+
return False # Continue with normal processing
|
|
84
|
+
|
|
49
85
|
def preprocess(self, tokens):
|
|
50
86
|
indents = PythonIndents(self.context)
|
|
51
87
|
current_leading_spaces = 0
|