lizard 1.17.13__py2.py3-none-any.whl → 1.17.15__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,152 +6,14 @@ from .code_reader import CodeReader
6
6
  from .clike import CCppCommentsMixin
7
7
  from .js_style_regex_expression import js_style_regex_expression
8
8
  from .js_style_language_states import JavaScriptStyleLanguageStates
9
+ from .typescript import TypeScriptReader, JSTokenizer
9
10
 
10
11
 
11
- class JavaScriptReader(CodeReader, CCppCommentsMixin):
12
+ class JavaScriptReader(TypeScriptReader):
12
13
  # pylint: disable=R0903
13
14
 
14
- ext = ['js', 'jsx']
15
+ ext = ['js']
15
16
  language_names = ['javascript', 'js']
16
17
 
17
- @staticmethod
18
- @js_style_regex_expression
19
- def generate_tokens(source_code, addition='', token_class=None):
20
- addition = addition +\
21
- r"|(?:\$\w+)" + \
22
- r"|(?:\<\/\w+\>)" + \
23
- r"|`.*?`"
24
- js_tokenizer = JSTokenizer()
25
- for token in CodeReader.generate_tokens(
26
- source_code, addition, token_class):
27
- for tok in js_tokenizer(token):
28
- yield tok
29
-
30
18
  def __init__(self, context):
31
19
  super(JavaScriptReader, self).__init__(context)
32
- self.parallel_states = [JavaScriptStyleLanguageStates(context)]
33
-
34
-
35
- class Tokenizer(object):
36
- def __init__(self):
37
- self.sub_tokenizer = None
38
- self._ended = False
39
-
40
- def __call__(self, token):
41
- if self.sub_tokenizer:
42
- for tok in self.sub_tokenizer(token):
43
- yield tok
44
- if self.sub_tokenizer._ended:
45
- self.sub_tokenizer = None
46
- return
47
- for tok in self.process_token(token):
48
- yield tok
49
-
50
- def stop(self):
51
- self._ended = True
52
-
53
- def process_token(self, token):
54
- pass
55
-
56
-
57
- class JSTokenizer(Tokenizer):
58
- def __init__(self):
59
- super(JSTokenizer, self).__init__()
60
- self.depth = 1
61
-
62
- def process_token(self, token):
63
- if token == "<":
64
- self.sub_tokenizer = XMLTagWithAttrTokenizer()
65
- return
66
- if token == "{":
67
- self.depth += 1
68
- elif token == "}":
69
- self.depth -= 1
70
- if self.depth == 0:
71
- self.stop()
72
- # {} in JSX is not listed as token
73
- # otherwise it will be regarded
74
- # as JS object
75
- return
76
- yield token
77
-
78
-
79
- class XMLTagWithAttrTokenizer(Tokenizer):
80
- def __init__(self):
81
- super(XMLTagWithAttrTokenizer, self).__init__()
82
- self.tag = None
83
- self.state = self._global_state
84
- self.cache = ['<']
85
-
86
- def process_token(self, token):
87
- self.cache.append(token)
88
- if not token.isspace():
89
- result = self.state(token)
90
- if result is not None:
91
- return result
92
- return ()
93
-
94
- def abort(self):
95
- self.stop()
96
- return self.cache
97
-
98
- def flush(self):
99
- tmp, self.cache = self.cache, []
100
- return [''.join(tmp)]
101
-
102
- def _global_state(self, token):
103
- if not isidentifier(token):
104
- return self.abort()
105
- self.tag = token
106
- self.state = self._after_tag
107
-
108
- def _after_tag(self, token):
109
- if token == '>':
110
- self.state = self._body
111
- elif token == "/":
112
- self.state = self._expecting_self_closing
113
- elif isidentifier(token):
114
- self.state = self._expecting_equal_sign
115
- else:
116
- return self.abort()
117
-
118
- def _expecting_self_closing(self, token):
119
- if token == ">":
120
- self.stop()
121
- return self.flush()
122
- return self.abort()
123
-
124
- def _expecting_equal_sign(self, token):
125
- if token == '=':
126
- self.state = self._expecting_value
127
- else:
128
- return self.abort()
129
-
130
- def _expecting_value(self, token):
131
- if token[0] in "'\"":
132
- self.state = self._after_tag
133
- elif token == "{":
134
- self.cache.append("}")
135
- self.sub_tokenizer = JSTokenizer()
136
- self.state = self._after_tag
137
-
138
- def _body(self, token):
139
- if token == "<":
140
- self.sub_tokenizer = XMLTagWithAttrTokenizer()
141
- self.cache.pop()
142
- return self.flush()
143
-
144
- if token.startswith("</"):
145
- self.stop()
146
- return self.flush()
147
-
148
- if token == '{':
149
- self.sub_tokenizer = JSTokenizer()
150
- return self.flush()
151
-
152
-
153
- def isidentifier(token):
154
- try:
155
- return token.isidentifier()
156
- except AttributeError:
157
- return token.encode(encoding='UTF-8')[0].isalpha()
@@ -11,8 +11,18 @@ class JavaScriptStyleLanguageStates(CodeStateMachine): # pylint: disable=R0903
11
11
  self.last_tokens = ''
12
12
  self.function_name = ''
13
13
  self.started_function = None
14
+ self.as_object = False
14
15
 
15
16
  def _state_global(self, token):
17
+ if self.as_object:
18
+ if token == ':':
19
+ self.function_name = self.last_tokens
20
+ return
21
+ elif token == '(':
22
+ self._function(self.last_tokens)
23
+ self.next(self._function, token)
24
+ return
25
+
16
26
  if token in '.':
17
27
  self._state = self._field
18
28
  self.last_tokens += token
@@ -29,11 +39,11 @@ class JavaScriptStyleLanguageStates(CodeStateMachine): # pylint: disable=R0903
29
39
  self.function_name = self.last_tokens
30
40
  elif token == "(":
31
41
  self.sub_state(
32
- JavaScriptStyleLanguageStates(self.context))
42
+ self.__class__(self.context))
33
43
  elif token in '{':
34
44
  if self.started_function:
35
45
  self.sub_state(
36
- JavaScriptStyleLanguageStates(self.context),
46
+ self.__class__(self.context),
37
47
  self._pop_function_from_stack)
38
48
  else:
39
49
  self.read_object()
@@ -46,7 +56,12 @@ class JavaScriptStyleLanguageStates(CodeStateMachine): # pylint: disable=R0903
46
56
  self.last_tokens = token
47
57
 
48
58
  def read_object(self):
49
- self.sub_state(ES6ObjectStates(self.context))
59
+ def callback():
60
+ self.next(self._state_global)
61
+
62
+ object_reader = self.__class__(self.context)
63
+ object_reader.as_object = True
64
+ self.sub_state(object_reader, callback)
50
65
 
51
66
  def statemachine_before_return(self):
52
67
  self._pop_function_from_stack()
@@ -63,16 +78,14 @@ class JavaScriptStyleLanguageStates(CodeStateMachine): # pylint: disable=R0903
63
78
  return
64
79
 
65
80
  self.sub_state(
66
- JavaScriptStyleLanguageStates(self.context),
67
- callback)
81
+ self.__class__(self.context), callback)
68
82
 
69
83
  def _expecting_statement_or_block(self, token):
70
84
  def callback():
71
85
  self.next(self._state_global)
72
86
  if token == "{":
73
87
  self.sub_state(
74
- JavaScriptStyleLanguageStates(self.context),
75
- callback)
88
+ self.__class__(self.context), callback)
76
89
  else:
77
90
  self.next(self._state_global, token)
78
91
 
@@ -116,17 +129,3 @@ class JavaScriptStyleLanguageStates(CodeStateMachine): # pylint: disable=R0903
116
129
  if token != '{':
117
130
  self.started_function = None
118
131
  self.next(self._state_global, token)
119
-
120
-
121
- class ES6ObjectStates(JavaScriptStyleLanguageStates): # pylint: disable=R0903
122
- def __init__(self, context):
123
- super(ES6ObjectStates, self).__init__(context)
124
-
125
- def _state_global(self, token):
126
- if token == ':':
127
- self.function_name = self.last_tokens
128
- elif token == '(':
129
- self._function(self.last_tokens)
130
- self.next(self._function, token)
131
- else:
132
- super(ES6ObjectStates, self)._state_global(token)
@@ -6,18 +6,48 @@ import re
6
6
 
7
7
 
8
8
  def js_style_regex_expression(func):
9
- def generate_tokens_with_regex(source_code, _=""):
9
+ def generate_tokens_with_regex(source_code, addition='', token_class=None):
10
10
  regx_regx = r"\/(\S*?[^\s\\]\/)+?(igm)*"
11
11
  regx_pattern = re.compile(regx_regx)
12
- word_pattern = re.compile(r'\w+')
13
- tokens = func(source_code, r"|"+regx_regx)
14
- leading_by_word = False
15
- for token in tokens:
16
- if leading_by_word and regx_pattern.match(token):
17
- for subtoken in func(token, _):
18
- yield subtoken
12
+ tokens = list(func(source_code, addition, token_class))
13
+ result = []
14
+ i = 0
15
+ while i < len(tokens):
16
+ token = tokens[i]
17
+ if token == '/':
18
+ # Check if this could be a regex pattern
19
+ is_regex = False
20
+ if i == 0:
21
+ is_regex = True
22
+ elif i > 0:
23
+ prev_token = tokens[i-1].strip()
24
+ if prev_token and prev_token[-1] in '=,({[?:!&|;':
25
+ is_regex = True
26
+
27
+ if is_regex:
28
+ # This is likely a regex pattern start
29
+ regex_tokens = [token]
30
+ i += 1
31
+ while i < len(tokens) and not tokens[i].endswith('/'):
32
+ regex_tokens.append(tokens[i])
33
+ i += 1
34
+ if i < len(tokens):
35
+ regex_tokens.append(tokens[i])
36
+ i += 1
37
+ # Check for regex flags
38
+ if i < len(tokens) and re.match(r'^[igm]+$', tokens[i]):
39
+ regex_tokens.append(tokens[i])
40
+ i += 1
41
+ combined = ''.join(regex_tokens)
42
+ if regx_pattern.match(combined):
43
+ result.append(combined)
44
+ else:
45
+ result.extend(regex_tokens)
46
+ else:
47
+ # This is a division operator
48
+ result.append(token)
19
49
  else:
20
- yield token
21
- if not token.isspace():
22
- leading_by_word = word_pattern.match(token)
50
+ result.append(token)
51
+ i += 1
52
+ return result
23
53
  return generate_tokens_with_regex
@@ -0,0 +1,127 @@
1
+ '''
2
+ Language parser for JSX
3
+ '''
4
+
5
+ from .javascript import JavaScriptReader
6
+ from .typescript import JSTokenizer, Tokenizer
7
+ from .code_reader import CodeReader
8
+ from .js_style_regex_expression import js_style_regex_expression
9
+
10
+
11
+ class JSXMixin:
12
+ '''Base mixin class for JSX/TSX shared functionality'''
13
+ @staticmethod
14
+ @js_style_regex_expression
15
+ def generate_tokens(source_code, addition='', token_class=None):
16
+ addition = addition +\
17
+ r"|(?:\$\w+)" + \
18
+ r"|(?:\<\/\w+\>)" + \
19
+ r"|`.*?`"
20
+ js_tokenizer = JSTokenizer()
21
+ for token in CodeReader.generate_tokens(
22
+ source_code, addition, token_class):
23
+ for tok in js_tokenizer(token):
24
+ yield tok
25
+
26
+ def _expecting_func_opening_bracket(self, token):
27
+ if token == '<':
28
+ self.next(self._expecting_jsx)
29
+ return
30
+ super()._expecting_func_opening_bracket(token)
31
+
32
+ def _expecting_jsx(self, token):
33
+ if token == '>':
34
+ self.next(self._expecting_func_opening_bracket)
35
+
36
+
37
+ class JSXReader(JavaScriptReader, JSXMixin):
38
+ # pylint: disable=R0903
39
+
40
+ ext = ['jsx']
41
+ language_names = ['jsx']
42
+
43
+ @staticmethod
44
+ @js_style_regex_expression
45
+ def generate_tokens(source_code, addition='', token_class=None):
46
+ return JSXMixin.generate_tokens(source_code, addition, token_class)
47
+
48
+
49
+ class XMLTagWithAttrTokenizer(Tokenizer):
50
+ def __init__(self):
51
+ super(XMLTagWithAttrTokenizer, self).__init__()
52
+ self.tag = None
53
+ self.state = self._global_state
54
+ self.cache = ['<']
55
+
56
+ def process_token(self, token):
57
+ self.cache.append(token)
58
+ if not token.isspace():
59
+ result = self.state(token)
60
+ if result is not None:
61
+ return result
62
+ return ()
63
+
64
+ def abort(self):
65
+ self.stop()
66
+ return self.cache
67
+
68
+ def flush(self):
69
+ tmp, self.cache = self.cache, []
70
+ return [''.join(tmp)]
71
+
72
+ def _global_state(self, token):
73
+ if not isidentifier(token):
74
+ return self.abort()
75
+ self.tag = token
76
+ self.state = self._after_tag
77
+
78
+ def _after_tag(self, token):
79
+ if token == '>':
80
+ self.state = self._body
81
+ elif token == "/":
82
+ self.state = self._expecting_self_closing
83
+ elif isidentifier(token):
84
+ self.state = self._expecting_equal_sign
85
+ else:
86
+ return self.abort()
87
+
88
+ def _expecting_self_closing(self, token):
89
+ if token == ">":
90
+ self.stop()
91
+ return self.flush()
92
+ return self.abort()
93
+
94
+ def _expecting_equal_sign(self, token):
95
+ if token == '=':
96
+ self.state = self._expecting_value
97
+ else:
98
+ return self.abort()
99
+
100
+ def _expecting_value(self, token):
101
+ if token[0] in "'\"":
102
+ self.state = self._after_tag
103
+ elif token == "{":
104
+ self.cache.append("}")
105
+ self.sub_tokenizer = JSTokenizer()
106
+ self.state = self._after_tag
107
+
108
+ def _body(self, token):
109
+ if token == "<":
110
+ self.sub_tokenizer = XMLTagWithAttrTokenizer()
111
+ self.cache.pop()
112
+ return self.flush()
113
+
114
+ if token.startswith("</"):
115
+ self.stop()
116
+ return self.flush()
117
+
118
+ if token == '{':
119
+ self.sub_tokenizer = JSTokenizer()
120
+ return self.flush()
121
+
122
+
123
+ def isidentifier(token):
124
+ try:
125
+ return token.isidentifier()
126
+ except AttributeError:
127
+ return token.encode(encoding='UTF-8')[0].isalpha()
@@ -13,7 +13,7 @@ class PythonIndents: # pylint: disable=R0902
13
13
  self.indents = [0]
14
14
  self.context = context
15
15
 
16
- def set_nesting(self, spaces, token = ""):
16
+ def set_nesting(self, spaces, token=""):
17
17
  while self.indents[-1] > spaces and (not token.startswith(")")):
18
18
  self.indents.pop()
19
19
  self.context.pop_nesting()
@@ -29,8 +29,10 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
29
29
 
30
30
  ext = ['py']
31
31
  language_names = ['python']
32
- _conditions = set(['if', 'for', 'while', 'and', 'or',
33
- 'elif', 'except', 'finally'])
32
+ _conditions = set([
33
+ 'if', 'for', 'while', 'and', 'or',
34
+ 'elif', 'except', 'finally'
35
+ ])
34
36
 
35
37
  def __init__(self, context):
36
38
  super(PythonReader, self).__init__(context)
@@ -39,8 +41,9 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
39
41
  @staticmethod
40
42
  def generate_tokens(source_code, addition='', token_class=None):
41
43
  return ScriptLanguageMixIn.generate_common_tokens(
42
- source_code,
43
- r"|\'\'\'.*?\'\'\'" + r'|\"\"\".*?\"\"\"', token_class)
44
+ source_code,
45
+ r"|\'\'\'.*?\'\'\'" + r'|\"\"\".*?\"\"\"',
46
+ token_class)
44
47
 
45
48
  def preprocess(self, tokens):
46
49
  indents = PythonIndents(self.context)
@@ -54,7 +57,8 @@ class PythonReader(CodeReader, ScriptLanguageMixIn):
54
57
  else:
55
58
  if not token.startswith('#'):
56
59
  current_function = self.context.current_function
57
- if current_function.name == '*global*' or current_function.long_name.endswith(')'):
60
+ if (current_function.name == '*global*' or
61
+ current_function.long_name.endswith(')')):
58
62
  indents.set_nesting(current_leading_spaces, token)
59
63
  reading_leading_space = False
60
64
  else:
@@ -4,7 +4,7 @@ Common behaviours of script languages
4
4
  from .code_reader import CodeReader
5
5
 
6
6
 
7
- class ScriptLanguageMixIn(object):
7
+ class ScriptLanguageMixIn:
8
8
  # pylint: disable=R0903
9
9
 
10
10
  @staticmethod
@@ -0,0 +1,24 @@
1
+ '''
2
+ Language parser for TSX
3
+ '''
4
+
5
+ from .typescript import TypeScriptReader, TypeScriptStates, JSTokenizer
6
+ from .jsx import JSXMixin
7
+ from .code_reader import CodeReader
8
+ from .js_style_regex_expression import js_style_regex_expression
9
+
10
+
11
+ class TSXReader(TypeScriptReader, JSXMixin):
12
+ # pylint: disable=R0903
13
+
14
+ ext = ['tsx']
15
+ language_names = ['tsx']
16
+
17
+ @staticmethod
18
+ @js_style_regex_expression
19
+ def generate_tokens(source_code, addition='', token_class=None):
20
+ return JSXMixin.generate_tokens(source_code, addition, token_class)
21
+
22
+ def __init__(self, context):
23
+ super(TSXReader, self).__init__(context)
24
+ # No need for parallel states since JSX handling is in the mixin
@@ -3,12 +3,54 @@ Language parser for JavaScript
3
3
  '''
4
4
 
5
5
  import re
6
- from .code_reader import CodeReader
6
+ from .code_reader import CodeReader, CodeStateMachine
7
7
  from .clike import CCppCommentsMixin
8
8
  from .js_style_language_states import JavaScriptStyleLanguageStates
9
9
  from .js_style_regex_expression import js_style_regex_expression
10
10
 
11
11
 
12
+ class Tokenizer(object):
13
+ def __init__(self):
14
+ self.sub_tokenizer = None
15
+ self._ended = False
16
+
17
+ def __call__(self, token):
18
+ if self.sub_tokenizer:
19
+ for tok in self.sub_tokenizer(token):
20
+ yield tok
21
+ if self.sub_tokenizer._ended:
22
+ self.sub_tokenizer = None
23
+ return
24
+ for tok in self.process_token(token):
25
+ yield tok
26
+
27
+ def stop(self):
28
+ self._ended = True
29
+
30
+ def process_token(self, token):
31
+ pass
32
+
33
+
34
+ class JSTokenizer(Tokenizer):
35
+ def __init__(self):
36
+ super().__init__()
37
+ self.depth = 1
38
+
39
+ def process_token(self, token):
40
+ if token == "<":
41
+ from .jsx import XMLTagWithAttrTokenizer # Import only when needed
42
+ self.sub_tokenizer = XMLTagWithAttrTokenizer()
43
+ return
44
+ if token == "{":
45
+ self.depth += 1
46
+ elif token == "}":
47
+ self.depth -= 1
48
+ if self.depth == 0:
49
+ self.stop()
50
+ return
51
+ yield token
52
+
53
+
12
54
  class TypeScriptReader(CodeReader, CCppCommentsMixin):
13
55
  # pylint: disable=R0903
14
56
 
@@ -18,32 +60,65 @@ class TypeScriptReader(CodeReader, CCppCommentsMixin):
18
60
  'catch', 'case'])
19
61
 
20
62
  def __init__(self, context):
21
- super(TypeScriptReader, self).__init__(context)
63
+ super().__init__(context)
22
64
  self.parallel_states = [TypeScriptStates(context)]
23
65
 
24
66
  @staticmethod
25
67
  @js_style_regex_expression
26
68
  def generate_tokens(source_code, addition='', token_class=None):
27
69
  addition = addition +\
28
- r"|(?:\w+\?)"
29
- return CodeReader.generate_tokens(source_code, addition, token_class)
70
+ r"|(?:\$\w+)" + \
71
+ r"|(?:\w+\?)" + \
72
+ r"|`.*?`"
73
+ js_tokenizer = JSTokenizer()
74
+ for token in CodeReader.generate_tokens(
75
+ source_code, addition, token_class):
76
+ for tok in js_tokenizer(token):
77
+ yield tok
30
78
 
31
79
 
32
80
  class TypeScriptStates(JavaScriptStyleLanguageStates):
81
+ def __init__(self, context):
82
+ super().__init__(context)
83
+
84
+ def _state_global(self, token):
85
+ if not self.as_object:
86
+ if token == ':':
87
+ self._consume_type_annotation()
88
+ return
89
+ super()._state_global(token)
33
90
 
34
91
  def _expecting_func_opening_bracket(self, token):
35
92
  if token == ':':
36
- self.next(self._expecting_default)
37
- return
38
- super(TypeScriptStates, self)._expecting_func_opening_bracket(token)
93
+ self._consume_type_annotation()
94
+ else:
95
+ super()._expecting_func_opening_bracket(token)
96
+
97
+ def _consume_type_annotation(self):
98
+ typeStates = TypeScriptTypeAnnotationStates(self.context)
99
+
100
+ def callback():
101
+ if typeStates.saved_token:
102
+ self(typeStates.saved_token)
103
+ self.sub_state(typeStates, callback)
39
104
 
40
- def _expecting_default(self, token):
41
- self.next(self._function_return_type)
105
+
106
+ class TypeScriptTypeAnnotationStates(CodeStateMachine):
107
+ def __init__(self, context):
108
+ super().__init__(context)
109
+ self.saved_token = None
110
+
111
+ def _state_global(self, token):
42
112
  if token == '{':
43
- self.read_object()
113
+ self.next(self._inline_type_annotation, token)
114
+ else:
115
+ self.next(self._state_simple_type, token)
116
+
117
+ def _state_simple_type(self, token):
118
+ if token in '{=;':
119
+ self.saved_token = token
120
+ self.statemachine_return()
44
121
 
45
- def _function_return_type(self, token):
46
- if token == ';':
47
- self.next(self._state_global)
48
- elif token == '{':
49
- self.next(self._expecting_func_opening_bracket, token)
122
+ @CodeStateMachine.read_inside_brackets_then("{}")
123
+ def _inline_type_annotation(self, _):
124
+ self.statemachine_return()
@@ -0,0 +1,34 @@
1
+ '''
2
+ Language parser for Vue.js files
3
+ '''
4
+
5
+ from .code_reader import CodeReader, CodeStateMachine
6
+ from .typescript import TypeScriptReader
7
+
8
+
9
+ class VueReader(TypeScriptReader):
10
+ # pylint: disable=R0903
11
+
12
+ ext = ['vue']
13
+ language_names = ['vue', 'vuejs']
14
+
15
+ def __init__(self, context):
16
+ super(VueReader, self).__init__(context)
17
+
18
+ @staticmethod
19
+ def generate_tokens(source_code, addition='', token_class=None):
20
+ # Use the base token generator but ensure we capture Vue block tags
21
+ addition = addition + r"|(?:\<\/?\w+.*?\>)"
22
+ return TypeScriptReader.generate_tokens(source_code, addition, token_class)
23
+
24
+ def preprocess(self, tokens):
25
+ current_block = None
26
+
27
+ for token in tokens:
28
+ if token.startswith('<script'):
29
+ current_block = 'script'
30
+ elif token.startswith('</script'):
31
+ current_block = None
32
+ elif current_block == 'script':
33
+ if not token.isspace() or token == '\n':
34
+ yield token