lizard 1.17.31__py2.py3-none-any.whl → 1.18.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lizard_languages/st.py ADDED
@@ -0,0 +1,139 @@
1
+ '''
2
+ Language parser for Structured Text.
3
+ '''
4
+
5
+ import re
6
+ # import itertools
7
+ from .code_reader import CodeStateMachine, CodeReader
8
+
9
+
10
+ class StCommentsMixin(object): # pylint: disable=R0903
11
+
12
+ @staticmethod
13
+ def get_comment_from_token(token):
14
+ if token.startswith("(*") or token.startswith("//"):
15
+ return token[2:]
16
+
17
+
18
+ class StReader(CodeReader, StCommentsMixin):
19
+ ''' This is the reader for Structured Text. '''
20
+
21
+ ext = ["st"]
22
+ language_names = ['st']
23
+ macro_pattern = re.compile(r"#\s*(\w+)\s*(.*)", re.M | re.S)
24
+
25
+ # track block starters
26
+ _conditions = set([
27
+ 'if', 'elsif', 'case', 'for', 'while', 'repeat',
28
+ 'IF', 'ELSIF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
29
+ ])
30
+
31
+ _functions = set([
32
+ 'FUNCTION_BLOCK', 'FUNCTION', 'ACTION'
33
+ ])
34
+
35
+ _blocks = set([
36
+ 'IF', 'FOR', 'WHILE', 'CASE', 'REPEAT',
37
+ ])
38
+
39
+ _ends = set([
40
+ 'END',
41
+ ])
42
+
43
+ # Nesting Depth
44
+ loops = [
45
+ 'if', 'case', 'for', 'while', 'repeat',
46
+ 'IF', 'CASE', 'FOR', 'WHILE', 'REPEAT'
47
+ ]
48
+ bracket = 'END'
49
+
50
+ def __init__(self, context):
51
+ super(StReader, self).__init__(context)
52
+ self.parallel_states = (
53
+ StStates(context, self),
54
+ )
55
+
56
+ @staticmethod
57
+ def generate_tokens(source_code, addition='', token_class=None):
58
+
59
+ # Capture everything until end of logical line, where lines may be continued with \ at the end.”
60
+ _until_end = r'(?:\\\n|[^\n])*'
61
+ block_endings = '|'.join(f'END_{_}' for _ in StReader._blocks)
62
+ addition = (
63
+ r'(?i)' # case-insensitive
64
+ r'//' + _until_end + r'|' # line comment
65
+ r'\(\*' + _until_end + r'|' # block comment (* ... *)
66
+ r'OR|'
67
+ r'AND|'
68
+ r'XOR|'
69
+ r'NOT|'
70
+ r'ELSE\s+IF|'
71
+ + block_endings + addition
72
+ )
73
+
74
+ return CodeReader.generate_tokens(source_code, addition, token_class)
75
+
76
+ def preprocess(self, tokens):
77
+ """Handle compiler pragmas like #IF, #INCLUDE, etc."""
78
+ for token in tokens:
79
+ macro = self.macro_pattern.match(token)
80
+ if macro:
81
+ directive = macro.group(1).lower()
82
+ if directive in ("if", "ifdef", "ifndef", "elif"):
83
+ self.context.add_condition()
84
+ elif directive == "include":
85
+ yield "#include"
86
+ yield macro.group(2) or "\"\""
87
+ for _ in macro.group(2).splitlines()[1:]:
88
+ yield "\n"
89
+ else:
90
+ # ST normalization: collapse END_* into END
91
+ upper_tok = token.upper()
92
+ if upper_tok.startswith("END_"):
93
+ yield "END"
94
+ continue
95
+
96
+ # Eliminate whitespace, keep line breaks
97
+ if not token.isspace() or token == '\n':
98
+ yield token
99
+
100
+
101
+ class StStates(CodeStateMachine):
102
+ """Track Structured Text State."""
103
+
104
+ def __init__(self, context, reader):
105
+ super().__init__(context)
106
+ self.reader = reader
107
+ self.last_token = None
108
+
109
+ def __call__(self, token, reader=None):
110
+ if self._state(token):
111
+ self.next(self.saved_state)
112
+ if self.callback:
113
+ self.callback()
114
+ self.last_token = token
115
+ if self.to_exit:
116
+ return True
117
+
118
+ def _state_global(self, token):
119
+ token_upper = token.upper()
120
+
121
+ if token_upper in StReader._functions and self.context.current_function.top_nesting_level < 0:
122
+ self._state = self._function_name
123
+ elif token_upper in StReader._blocks:
124
+ self.context.add_bare_nesting()
125
+ elif token in StReader._ends:
126
+ self.context.pop_nesting()
127
+
128
+ def reset_state(self, token=None):
129
+ self._state = self._state_global
130
+ if token is not None:
131
+ self._state_global(token)
132
+
133
+ def _function_name(self, token):
134
+ self.context.restart_new_function(token)
135
+ self._state = self._function
136
+
137
+ def _function(self, token):
138
+ self.context.add_bare_nesting()
139
+ self.reset_state(token)
lizard_languages/tsx.py CHANGED
@@ -2,16 +2,18 @@
2
2
  Language parser for TSX
3
3
  '''
4
4
 
5
- from .typescript import TypeScriptReader
6
- from .jsx import JSXMixin, JSXTypeScriptStates
5
+ from .code_reader import CodeStateMachine
6
+ from .code_reader import CodeReader
7
7
  from .js_style_regex_expression import js_style_regex_expression
8
+ from .typescript import TypeScriptReader
9
+ from .typescript import JSTokenizer, Tokenizer, TypeScriptStates
8
10
 
9
11
 
10
- class TSXReader(TypeScriptReader, JSXMixin):
12
+ class TSXReader(TypeScriptReader):
11
13
  # pylint: disable=R0903
12
14
 
13
- ext = ['tsx']
14
- language_names = ['tsx']
15
+ ext = ['tsx', 'jsx']
16
+ language_names = ['tsx', 'jsx']
15
17
 
16
18
  @staticmethod
17
19
  @js_style_regex_expression
@@ -19,17 +21,449 @@ class TSXReader(TypeScriptReader, JSXMixin):
19
21
  # Add support for TypeScript type annotations in JSX
20
22
  addition = addition + \
21
23
  r"|(?:<[A-Za-z][A-Za-z0-9]*(?:\.[A-Za-z][A-Za-z0-9]*)*>)" + \
22
- r"|(?:<\/[A-Za-z][A-Za-z0-9]*(?:\.[A-Za-z][A-Za-z0-9]*)*>)"
23
- return JSXMixin.generate_tokens(source_code, addition, token_class)
24
+ r"|(?:<\/[A-Za-z][A-Za-z0-9]*(?:\.[A-Za-z][A-Za-z0-9]*)*>)" + \
25
+ r"|(?:\$\w+)" + \
26
+ r"|(?:<\/\w+>)" + \
27
+ r"|(?:=>)" + \
28
+ r"|`.*?`"
29
+ js_tokenizer = TSXTokenizer()
30
+ for token in CodeReader.generate_tokens(
31
+ source_code, addition, token_class):
32
+ for tok in js_tokenizer(token):
33
+ yield tok
24
34
 
25
35
  def __init__(self, context):
26
36
  super(TSXReader, self).__init__(context)
27
37
  # Use JSXTypeScriptStates for better handling of TSX specific features
28
38
  self.parallel_states = [JSXTypeScriptStates(context)]
29
39
 
30
- def _expecting_func_opening_bracket(self, token):
31
- # Handle TypeScript arrow functions with type annotations in JSX attributes
32
- if token == ':':
40
+
41
+ class JSXTypeScriptStates(CodeStateMachine):
42
+ """State machine for JSX/TSX files using composition with TypeScriptStates"""
43
+
44
+ def __init__(self, context, inside_function=False):
45
+ super().__init__(context)
46
+ self.last_token = ''
47
+ self.function_name = ''
48
+ self.started_function = None
49
+ self.pending_function_name = ''
50
+ self._ts_declare = False
51
+ self._conditions = set(['if', 'elseif', 'for', 'while', '&&', '||', '?',
52
+ 'catch', 'case'])
53
+ self.inside_function = inside_function # Track if we're already inside a function
54
+
55
+ def statemachine_before_return(self):
56
+ # Ensure any pending function is closed - this should make the main function appear first
57
+ if self.started_function:
58
+ self._pop_function_from_stack()
59
+
60
+ def _state_global(self, token):
61
+ # Handle TypeScript declare keyword
62
+ if token == 'declare':
63
+ self._ts_declare = True
64
+ return
65
+ if token == 'function' and self._ts_declare:
66
+ # Skip declared functions
67
+ self._ts_declare = False
68
+ self.next(self._skip_until_semicolon)
69
+ return
70
+ self._ts_declare = False
71
+
72
+ # Handle function keyword
73
+ if token == 'function':
74
+ self.next(self._function_name_state)
75
+ return
76
+
77
+ # Handle arrow functions: look for =>
78
+ if token == '=>':
79
+ # Start arrow function with pending name or anonymous
80
+ self._start_arrow_function()
81
+ return
82
+
83
+ # Handle control flow for cyclomatic complexity
84
+ if token in ('if', 'switch', 'for', 'while', 'catch'):
85
+ if self.started_function or self.inside_function:
86
+ self.context.add_condition()
87
+ self.next(self._expecting_condition_and_statement)
88
+ return
89
+ elif token in ('else', 'do', 'try', 'finally'):
90
+ self.next(self._expecting_statement_or_block)
91
+ return
92
+ elif token in ('&&', '||', '?'):
93
+ if self.started_function or self.inside_function:
94
+ self.context.add_condition()
95
+ return
96
+ elif token == 'case':
97
+ if self.started_function or self.inside_function:
98
+ self.context.add_condition()
99
+ return
100
+
101
+ # Handle assignment for function names - only if not inside a function
102
+ if token == '=' and not self.inside_function:
103
+ # If we don't have a pending name yet, use the last token
104
+ if not self.pending_function_name:
105
+ self.pending_function_name = self.last_token
106
+ return
107
+
108
+ # Handle type annotations
109
+ if token == ':' and not self.inside_function:
110
+ # This could be a type annotation before assignment (e.g., const name: Type = ...)
111
+ # Store the current token as potential function name and consume the type
112
+ if not self.pending_function_name:
113
+ self.pending_function_name = self.last_token
114
+ self._consume_type_annotation()
115
+ return
116
+
117
+ # Handle braces
118
+ if token == '{':
119
+ if self.started_function or self.inside_function:
120
+ # Function body - stay in current function
121
+ nested_state = self.__class__(self.context, inside_function=True)
122
+ self.sub_state(nested_state)
123
+ else:
124
+ # Object or block
125
+ self.sub_state(self.__class__(self.context))
126
+ return
127
+ elif token == '}':
128
+ self.statemachine_return()
129
+ return
130
+
131
+ # Handle parentheses - be careful with arrow function parameters
132
+ if token == '(':
133
+ if self.pending_function_name:
134
+ # This might be arrow function parameters, handle in-line
135
+ self.next(self._arrow_function_params)
136
+ else:
137
+ # Regular parentheses grouping or function call
138
+ nested_state = self.__class__(self.context, inside_function=self.inside_function)
139
+ self.sub_state(nested_state)
140
+ return
141
+ elif token == ')':
142
+ self.statemachine_return()
143
+ return
144
+
145
+ # Handle end of statement
146
+ if token == ';' or self.context.newline:
147
+ if not self.inside_function:
148
+ self.pending_function_name = ''
149
+ self._pop_function_from_stack()
150
+
151
+ self.last_token = token
152
+
153
+ def _function_name_state(self, token):
154
+ """Handle function name after 'function' keyword"""
155
+ if token == '(':
156
+ # Anonymous function
157
+ self._start_function('(anonymous)')
158
+ self.next(self._function_parameters, token)
159
+ else:
160
+ # Named function
161
+ self._start_function(token)
162
+ self.next(self._expecting_function_params)
163
+
164
+ def _expecting_function_params(self, token):
165
+ """Expect opening parenthesis for function parameters"""
166
+ if token == '(':
167
+ self.next(self._function_parameters, token)
168
+ else:
169
+ # Not a function, return to global state
170
+ self.next(self._state_global, token)
171
+
172
+ def _function_parameters(self, token):
173
+ """Handle function parameters"""
174
+ if token == ')':
175
+ self.next(self._expecting_function_body)
176
+ elif token == '(':
177
+ # Nested parentheses in parameters
178
+ self.sub_state(self.__class__(self.context))
179
+ else:
180
+ # Parameter token
181
+ if token not in (',', ':', '=', '?', '...') and token.isalnum():
182
+ self.context.parameter(token)
183
+
184
+ def _expecting_function_body(self, token):
185
+ """Expect function body (could be block or expression for arrow functions)"""
186
+ if token == '{':
187
+ # Block body - create new scope but stay in current function
188
+ def callback():
189
+ self.next(self._state_global)
190
+ nested_state = self.__class__(self.context, inside_function=True)
191
+ # Don't start a new function in the nested state
192
+ self.sub_state(nested_state, callback)
193
+ elif token == ':':
194
+ # Type annotation for return type
195
+ self._consume_type_annotation()
196
+ else:
197
+ # Expression body or other tokens - continue in global state
198
+ self.next(self._state_global, token)
199
+
200
+ def _start_arrow_function(self):
201
+ """Start an arrow function with pending name or anonymous"""
202
+ name = self.pending_function_name or '(anonymous)'
203
+
204
+ if self.inside_function:
205
+ # For nested functions, create and immediately close them
206
+ # This ensures they're detected but don't interfere with the main function
207
+ self.context.push_new_function(name)
208
+ self.context.end_of_function()
209
+ else:
210
+ # For top-level functions, use normal processing
211
+ self._start_function(name)
212
+
213
+ self.pending_function_name = ''
214
+
215
+ def _start_function(self, name):
216
+ """Start a new function"""
217
+ # Always start a new function - nested functions are separate functions
218
+ self.context.push_new_function(name)
219
+ # Track that we started a function for proper cleanup
220
+ self.started_function = True
221
+ self.function_name = name
222
+
223
+ def _pop_function_from_stack(self):
224
+ """End current function"""
225
+ if self.started_function:
226
+ self.context.end_of_function()
227
+ self.started_function = None
228
+ self.function_name = ''
229
+
230
+ def _expecting_condition_and_statement(self, token):
231
+ """Handle conditional statements with conditions"""
232
+ if token == '(':
233
+ # Condition in parentheses
234
+ def callback():
235
+ self.next(self._expecting_statement_or_block)
236
+ nested_state = self.__class__(self.context, inside_function=self.inside_function)
237
+ self.sub_state(nested_state, callback)
238
+ else:
239
+ # No parentheses, go directly to statement
240
+ self.next(self._state_global, token)
241
+
242
+ def _expecting_statement_or_block(self, token):
243
+ """Handle statement or block after control flow"""
244
+ if token == '{':
245
+ # Block statement
246
+ def callback():
247
+ self.next(self._state_global)
248
+ nested_state = self.__class__(self.context, inside_function=self.inside_function)
249
+ self.sub_state(nested_state, callback)
250
+ else:
251
+ # Single statement
252
+ self.next(self._state_global, token)
253
+
254
+ def _consume_type_annotation(self):
255
+ """Handle TypeScript type annotations"""
256
+ def callback():
257
+ # Continue with any saved token from the type annotation handler
258
+ type_handler = self.sub_state_instance
259
+ if hasattr(type_handler, 'saved_token') and type_handler.saved_token:
260
+ self.next(self._state_global, type_handler.saved_token)
261
+ else:
262
+ self.next(self._state_global)
263
+
264
+ type_handler = JSXTypeAnnotationHandler(self.context)
265
+ self.sub_state_instance = type_handler # Store reference to access saved_token
266
+ self.sub_state(type_handler, callback)
267
+
268
+ def _skip_until_semicolon(self, token):
269
+ """Skip tokens until semicolon or newline (for declare statements)"""
270
+ if token == ';' or self.context.newline:
271
+ self.next(self._state_global)
272
+
273
+ def _arrow_function_params(self, token):
274
+ """Handle arrow function parameters without losing function name"""
275
+ if token == ')':
276
+ # End of parameters, expect => next
277
+ self.next(self._expecting_arrow)
278
+ elif token == '(':
279
+ # Nested parentheses in parameters
280
+ nested_state = self.__class__(self.context, inside_function=self.inside_function)
281
+ self.sub_state(nested_state)
282
+ else:
283
+ # Parameter token - add to context if it's a valid parameter
284
+ if token not in (',', ':', '=', '?', '...') and token.isalnum():
285
+ # Don't add parameters yet, wait for arrow to confirm it's a function
286
+ pass
287
+
288
+ def _expecting_arrow(self, token):
289
+ """Expect => after arrow function parameters"""
290
+ if token == '=>':
291
+ # Confirmed arrow function, start it now
292
+ self._start_arrow_function()
293
+ elif token == ':':
294
+ # Type annotation for return type
33
295
  self._consume_type_annotation()
296
+ else:
297
+ # Not an arrow function, return to global state
298
+ self.pending_function_name = ''
299
+ self.next(self._state_global, token)
300
+
301
+
302
+ class JSXTypeAnnotationHandler(CodeStateMachine):
303
+ """Handle TypeScript type annotations in JSX/TSX"""
304
+
305
+ def __init__(self, context):
306
+ super().__init__(context)
307
+ self.depth = 0
308
+ self.saved_token = None
309
+
310
+ def _state_global(self, token):
311
+ if token == '<':
312
+ # Generic type
313
+ self.depth += 1
314
+ elif token == '>':
315
+ self.depth -= 1
316
+ if self.depth < 0:
317
+ # End of type annotation
318
+ self.saved_token = token
319
+ self.statemachine_return()
320
+ elif token in ('=', ';', ')', '}', '=>') and self.depth == 0:
321
+ # End of type annotation
322
+ self.saved_token = token
323
+ self.statemachine_return()
324
+ elif token == '{':
325
+ # Object type annotation
326
+ self.next(self._object_type)
327
+ elif token == '(':
328
+ # Function type annotation
329
+ self.next(self._function_type)
330
+
331
+ def _object_type(self, token):
332
+ """Handle object type annotations like { width: string }"""
333
+ if token == '}':
334
+ self.next(self._state_global)
335
+
336
+ def _function_type(self, token):
337
+ """Handle function type annotations like (param: Type) => ReturnType"""
338
+ if token == ')':
339
+ self.next(self._state_global)
340
+
341
+
342
+ class TSXTokenizer(JSTokenizer):
343
+ def __init__(self):
344
+ super().__init__()
345
+
346
+ def process_token(self, token):
347
+ if token == "<":
348
+ self.sub_tokenizer = XMLTagWithAttrTokenizer()
349
+ return
350
+
351
+ if token == "=>":
352
+ # Special handling for arrow functions
353
+ yield token
34
354
  return
35
- super()._expecting_func_opening_bracket(token)
355
+
356
+ for tok in super().process_token(token):
357
+ yield tok
358
+
359
+
360
+ class XMLTagWithAttrTokenizer(Tokenizer):
361
+ def __init__(self):
362
+ super(XMLTagWithAttrTokenizer, self).__init__()
363
+ self.tag = None
364
+ self.state = self._global_state
365
+ self.cache = ['<']
366
+ self.brace_count = 0 # Track nested braces for complex expressions
367
+ self.arrow_function_detected = False # Track if we've detected an arrow function
368
+
369
+ def process_token(self, token):
370
+ self.cache.append(token)
371
+ if not token.isspace():
372
+ result = self.state(token)
373
+ if result is not None:
374
+ if isinstance(result, list):
375
+ for tok in result:
376
+ yield tok
377
+ else:
378
+ return result
379
+ return ()
380
+
381
+ def abort(self):
382
+ self.stop()
383
+ return self.cache
384
+
385
+ def flush(self):
386
+ tmp, self.cache = self.cache, []
387
+ return [''.join(tmp)]
388
+
389
+ def _global_state(self, token):
390
+ if not isidentifier(token):
391
+ return self.abort()
392
+ self.tag = token
393
+ self.state = self._after_tag
394
+
395
+ def _after_tag(self, token):
396
+ if token == '>':
397
+ self.state = self._body
398
+ elif token == "/":
399
+ self.state = self._expecting_self_closing
400
+ elif isidentifier(token):
401
+ self.state = self._expecting_equal_sign
402
+ else:
403
+ return self.abort()
404
+
405
+ def _expecting_self_closing(self, token):
406
+ if token == ">":
407
+ self.stop()
408
+ return self.flush()
409
+ return self.abort()
410
+
411
+ def _expecting_equal_sign(self, token):
412
+ if token == '=':
413
+ self.state = self._expecting_value
414
+ else:
415
+ return self.abort()
416
+
417
+ def _expecting_value(self, token):
418
+ if token[0] in "'\"":
419
+ self.state = self._after_tag
420
+ elif token == "{":
421
+ self.brace_count = 1 # Start counting braces
422
+ self.state = self._jsx_expression
423
+ # Don't add the closing brace automatically
424
+ # self.cache.append("}")
425
+ self.sub_tokenizer = TSXTokenizer()
426
+
427
+ def _jsx_expression(self, token):
428
+ # Handle nested braces in expressions
429
+ if token == "{":
430
+ self.brace_count += 1
431
+ elif token == "}":
432
+ self.brace_count -= 1
433
+ if self.brace_count == 0:
434
+ # We've found the matching closing brace
435
+ self.state = self._after_tag
436
+ return
437
+
438
+ # Handle arrow functions in JSX attributes
439
+ if token == "=>":
440
+ self.arrow_function_detected = True
441
+ # Explicitly yield the arrow token to ensure it's processed
442
+ return ["=>"]
443
+
444
+ # Handle type annotations in JSX attributes
445
+ if token == "<":
446
+ # This might be a TypeScript generic type annotation
447
+ # We'll continue in the current state and let the tokenizer handle it
448
+ pass
449
+
450
+ def _body(self, token):
451
+ if token == "<":
452
+ self.sub_tokenizer = XMLTagWithAttrTokenizer()
453
+ self.cache.pop()
454
+ return self.flush()
455
+
456
+ if token.startswith("</"):
457
+ self.stop()
458
+ return self.flush()
459
+
460
+ if token == '{':
461
+ self.sub_tokenizer = TSXTokenizer()
462
+ return self.flush()
463
+
464
+
465
+ def isidentifier(token):
466
+ try:
467
+ return token.isidentifier()
468
+ except AttributeError:
469
+ return token.encode(encoding='UTF-8')[0].isalpha()