jaclang 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jaclang might be problematic. Click here for more details.
- jaclang/__init__.py +4 -0
- jaclang/cli/__init__.py +7 -0
- jaclang/cli/cli.jac +46 -0
- jaclang/cli/cmds.jac +14 -0
- jaclang/cli/impl/__init__.py +1 -0
- jaclang/cli/impl/cli_impl.jac +93 -0
- jaclang/cli/impl/cmds_impl.jac +26 -0
- jaclang/core/__init__.py +12 -0
- jaclang/core/impl/__init__.py +1 -0
- jaclang/core/impl/arch_impl.jac +112 -0
- jaclang/core/impl/element_impl.jac +95 -0
- jaclang/core/impl/exec_ctx_impl.jac +17 -0
- jaclang/core/impl/memory_impl.jac +57 -0
- jaclang/core/primitives.jac +104 -0
- jaclang/jac/__init__.py +1 -0
- jaclang/jac/absyntree.py +1787 -0
- jaclang/jac/constant.py +46 -0
- jaclang/jac/importer.py +130 -0
- jaclang/jac/lexer.py +538 -0
- jaclang/jac/parser.py +1474 -0
- jaclang/jac/passes/__init__.py +5 -0
- jaclang/jac/passes/blue/__init__.py +25 -0
- jaclang/jac/passes/blue/ast_build_pass.py +3190 -0
- jaclang/jac/passes/blue/blue_pygen_pass.py +1335 -0
- jaclang/jac/passes/blue/decl_def_match_pass.py +278 -0
- jaclang/jac/passes/blue/import_pass.py +75 -0
- jaclang/jac/passes/blue/sub_node_tab_pass.py +30 -0
- jaclang/jac/passes/blue/tests/__init__.py +1 -0
- jaclang/jac/passes/blue/tests/test_ast_build_pass.py +61 -0
- jaclang/jac/passes/blue/tests/test_blue_pygen_pass.py +117 -0
- jaclang/jac/passes/blue/tests/test_decl_def_match_pass.py +43 -0
- jaclang/jac/passes/blue/tests/test_import_pass.py +18 -0
- jaclang/jac/passes/blue/tests/test_sub_node_pass.py +26 -0
- jaclang/jac/passes/blue/tests/test_type_analyze_pass.py +53 -0
- jaclang/jac/passes/blue/type_analyze_pass.py +731 -0
- jaclang/jac/passes/ir_pass.py +154 -0
- jaclang/jac/passes/purple/__init__.py +17 -0
- jaclang/jac/passes/purple/impl/__init__.py +1 -0
- jaclang/jac/passes/purple/impl/purple_pygen_pass_impl.jac +289 -0
- jaclang/jac/passes/purple/purple_pygen_pass.jac +35 -0
- jaclang/jac/sym_table.py +127 -0
- jaclang/jac/tests/__init__.py +1 -0
- jaclang/jac/tests/fixtures/__init__.py +1 -0
- jaclang/jac/tests/fixtures/activity.py +10 -0
- jaclang/jac/tests/fixtures/fam.jac +68 -0
- jaclang/jac/tests/fixtures/hello_world.jac +5 -0
- jaclang/jac/tests/fixtures/lexer_fam.jac +61 -0
- jaclang/jac/tests/fixtures/stuff.jac +6 -0
- jaclang/jac/tests/test_importer.py +24 -0
- jaclang/jac/tests/test_lexer.py +57 -0
- jaclang/jac/tests/test_parser.py +50 -0
- jaclang/jac/tests/test_utils.py +12 -0
- jaclang/jac/transform.py +63 -0
- jaclang/jac/transpiler.py +69 -0
- jaclang/jac/utils.py +120 -0
- jaclang/utils/__init__.py +1 -0
- jaclang/utils/fstring_parser.py +73 -0
- jaclang/utils/log.py +9 -0
- jaclang/utils/sly/__init__.py +6 -0
- jaclang/utils/sly/docparse.py +62 -0
- jaclang/utils/sly/lex.py +510 -0
- jaclang/utils/sly/yacc.py +2398 -0
- jaclang/utils/test.py +81 -0
- jaclang/utils/tests/__init__.py +1 -0
- jaclang/utils/tests/test_fstring_parser.py +55 -0
- jaclang-0.0.3.dist-info/METADATA +12 -0
- jaclang-0.0.3.dist-info/RECORD +70 -0
- {jaclang-0.0.1.dist-info → jaclang-0.0.3.dist-info}/WHEEL +1 -1
- jaclang-0.0.3.dist-info/entry_points.txt +3 -0
- jaclang-0.0.3.dist-info/top_level.txt +1 -0
- jaclang-0.0.1.dist-info/METADATA +0 -7
- jaclang-0.0.1.dist-info/RECORD +0 -4
- jaclang-0.0.1.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# docparse.py
|
|
2
|
+
#
|
|
3
|
+
# Support doc-string parsing classes
|
|
4
|
+
# flake8: noqa
|
|
5
|
+
|
|
6
|
+
__all__ = ["DocParseMeta"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DocParseMeta(type):
|
|
10
|
+
'''
|
|
11
|
+
Metaclass that processes the class docstring through a parser and
|
|
12
|
+
incorporates the result into the resulting class definition. This
|
|
13
|
+
allows Python classes to be defined with alternative syntax.
|
|
14
|
+
To use this class, you first need to define a lexer and parser:
|
|
15
|
+
|
|
16
|
+
from sly import Lexer, Parser
|
|
17
|
+
class MyLexer(Lexer):
|
|
18
|
+
...
|
|
19
|
+
|
|
20
|
+
class MyParser(Parser):
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
You then need to define a metaclass that inherits from DocParseMeta.
|
|
24
|
+
This class must specify the associated lexer and parser classes.
|
|
25
|
+
For example:
|
|
26
|
+
|
|
27
|
+
class MyDocParseMeta(DocParseMeta):
|
|
28
|
+
lexer = MyLexer
|
|
29
|
+
parser = MyParser
|
|
30
|
+
|
|
31
|
+
This metaclass is then used as a base for processing user-defined
|
|
32
|
+
classes:
|
|
33
|
+
|
|
34
|
+
class Base(metaclass=MyDocParseMeta):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
class Spam(Base):
|
|
38
|
+
"""
|
|
39
|
+
doc string is parsed
|
|
40
|
+
...
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
It is expected that the MyParser() class would return a dictionary.
|
|
44
|
+
This dictionary is used to create the final class Spam in this example.
|
|
45
|
+
'''
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def __new__(meta, clsname, bases, clsdict):
|
|
49
|
+
if "__doc__" in clsdict:
|
|
50
|
+
lexer = meta.lexer()
|
|
51
|
+
parser = meta.parser()
|
|
52
|
+
lexer.cls_name = parser.cls_name = clsname
|
|
53
|
+
lexer.cls_qualname = parser.cls_qualname = clsdict["__qualname__"]
|
|
54
|
+
lexer.cls_module = parser.cls_module = clsdict["__module__"]
|
|
55
|
+
parsedict = parser.parse(lexer.tokenize(clsdict["__doc__"]))
|
|
56
|
+
assert isinstance(parsedict, dict), "Parser must return a dictionary"
|
|
57
|
+
clsdict.update(parsedict)
|
|
58
|
+
return super().__new__(meta, clsname, bases, clsdict)
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def __init_subclass__(cls):
|
|
62
|
+
assert hasattr(cls, "parser") and hasattr(cls, "lexer")
|
jaclang/utils/sly/lex.py
ADDED
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
# type: ignore
|
|
2
|
+
# -----------------------------------------------------------------------------
|
|
3
|
+
# sly: lex.py
|
|
4
|
+
#
|
|
5
|
+
# Copyright (C) 2016 - 2018
|
|
6
|
+
# David M. Beazley (Dabeaz LLC)
|
|
7
|
+
# All rights reserved.
|
|
8
|
+
#
|
|
9
|
+
# Redistribution and use in source and binary forms, with or without
|
|
10
|
+
# modification, are permitted provided that the following conditions are
|
|
11
|
+
# met:
|
|
12
|
+
#
|
|
13
|
+
# * Redistributions of source code must retain the above copyright notice,
|
|
14
|
+
# this list of conditions and the following disclaimer.
|
|
15
|
+
# * Redistributions in binary form must reproduce the above copyright notice,
|
|
16
|
+
# this list of conditions and the following disclaimer in the documentation
|
|
17
|
+
# and/or other materials provided with the distribution.
|
|
18
|
+
# * Neither the name of the David Beazley or Dabeaz LLC may be used to
|
|
19
|
+
# endorse or promote products derived from this software without
|
|
20
|
+
# specific prior written permission.
|
|
21
|
+
#
|
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
23
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
24
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
25
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
26
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
28
|
+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
29
|
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
30
|
+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
31
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
32
|
+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
33
|
+
# -----------------------------------------------------------------------------
|
|
34
|
+
# flake8: noqa
|
|
35
|
+
__all__ = ["Lexer", "LexerStateChange"]
|
|
36
|
+
|
|
37
|
+
import re
|
|
38
|
+
import copy
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LexError(Exception):
|
|
42
|
+
"""
|
|
43
|
+
Exception raised if an invalid character is encountered and no default
|
|
44
|
+
error handler function is defined. The .text attribute of the exception
|
|
45
|
+
contains all remaining untokenized text. The .error_index is the index
|
|
46
|
+
location of the error.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, message, text, error_index):
|
|
50
|
+
self.args = (message,)
|
|
51
|
+
self.text = text
|
|
52
|
+
self.error_index = error_index
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PatternError(Exception):
|
|
56
|
+
"""
|
|
57
|
+
Exception raised if there's some kind of problem with the specified
|
|
58
|
+
regex patterns in the lexer.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class LexerBuildError(Exception):
|
|
65
|
+
"""
|
|
66
|
+
Exception raised if there's some sort of problem building the lexer.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class LexerStateChange(Exception):
|
|
73
|
+
"""
|
|
74
|
+
Exception raised to force a lexing state change
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, newstate, tok=None):
|
|
78
|
+
self.newstate = newstate
|
|
79
|
+
self.tok = tok
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class Token(object):
|
|
83
|
+
"""
|
|
84
|
+
Representation of a single token.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
__slots__ = ("type", "value", "lineno", "lineidx", "index", "end")
|
|
88
|
+
|
|
89
|
+
def __repr__(self):
|
|
90
|
+
return f"Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, lineidx={self.lineidx}, index={self.index}, end={self.end})"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TokenStr(str):
|
|
94
|
+
@staticmethod
|
|
95
|
+
def __new__(cls, value, key=None, remap=None):
|
|
96
|
+
self = super().__new__(cls, value)
|
|
97
|
+
self.key = key
|
|
98
|
+
self.remap = remap
|
|
99
|
+
return self
|
|
100
|
+
|
|
101
|
+
# Implementation of TOKEN[value] = NEWTOKEN
|
|
102
|
+
def __setitem__(self, key, value):
|
|
103
|
+
if self.remap is not None:
|
|
104
|
+
self.remap[self.key, key] = value
|
|
105
|
+
|
|
106
|
+
# Implementation of del TOKEN[value]
|
|
107
|
+
def __delitem__(self, key):
|
|
108
|
+
if self.remap is not None:
|
|
109
|
+
self.remap[self.key, key] = self.key
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class _Before:
|
|
113
|
+
def __init__(self, tok, pattern):
|
|
114
|
+
self.tok = tok
|
|
115
|
+
self.pattern = pattern
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class LexerMetaDict(dict):
|
|
119
|
+
"""
|
|
120
|
+
Special dictionary that prohibits duplicate definitions in lexer specifications.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def __init__(self):
|
|
124
|
+
self.before = {}
|
|
125
|
+
self.delete = []
|
|
126
|
+
self.remap = {}
|
|
127
|
+
|
|
128
|
+
def __setitem__(self, key, value):
|
|
129
|
+
if isinstance(value, str):
|
|
130
|
+
value = TokenStr(value, key, self.remap)
|
|
131
|
+
|
|
132
|
+
if isinstance(value, _Before):
|
|
133
|
+
self.before[key] = value.tok
|
|
134
|
+
value = TokenStr(value.pattern, key, self.remap)
|
|
135
|
+
|
|
136
|
+
if key in self and not isinstance(value, property):
|
|
137
|
+
prior = self[key]
|
|
138
|
+
if isinstance(prior, str):
|
|
139
|
+
if callable(value):
|
|
140
|
+
value.pattern = prior
|
|
141
|
+
else:
|
|
142
|
+
raise AttributeError(f"Name {key} redefined")
|
|
143
|
+
|
|
144
|
+
super().__setitem__(key, value)
|
|
145
|
+
|
|
146
|
+
def __delitem__(self, key):
|
|
147
|
+
self.delete.append(key)
|
|
148
|
+
if key not in self and key.isupper():
|
|
149
|
+
pass
|
|
150
|
+
else:
|
|
151
|
+
return super().__delitem__(key)
|
|
152
|
+
|
|
153
|
+
def __getitem__(self, key):
|
|
154
|
+
if key not in self and key.split("ignore_")[-1].isupper() and key[:1] != "_":
|
|
155
|
+
return TokenStr(key, key, self.remap)
|
|
156
|
+
else:
|
|
157
|
+
return super().__getitem__(key)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class LexerMeta(type):
|
|
161
|
+
"""
|
|
162
|
+
Metaclass for collecting lexing rules
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def __prepare__(meta, name, bases):
|
|
167
|
+
d = LexerMetaDict()
|
|
168
|
+
|
|
169
|
+
def _(pattern, *extra):
|
|
170
|
+
patterns = [pattern, *extra]
|
|
171
|
+
|
|
172
|
+
def decorate(func):
|
|
173
|
+
pattern = "|".join(f"({pat})" for pat in patterns)
|
|
174
|
+
if hasattr(func, "pattern"):
|
|
175
|
+
func.pattern = pattern + "|" + func.pattern
|
|
176
|
+
else:
|
|
177
|
+
func.pattern = pattern
|
|
178
|
+
return func
|
|
179
|
+
|
|
180
|
+
return decorate
|
|
181
|
+
|
|
182
|
+
d["_"] = _
|
|
183
|
+
d["before"] = _Before
|
|
184
|
+
return d
|
|
185
|
+
|
|
186
|
+
def __new__(meta, clsname, bases, attributes):
|
|
187
|
+
del attributes["_"]
|
|
188
|
+
del attributes["before"]
|
|
189
|
+
|
|
190
|
+
# Create attributes for use in the actual class body
|
|
191
|
+
cls_attributes = {
|
|
192
|
+
str(key): str(val) if isinstance(val, TokenStr) else val
|
|
193
|
+
for key, val in attributes.items()
|
|
194
|
+
}
|
|
195
|
+
cls = super().__new__(meta, clsname, bases, cls_attributes)
|
|
196
|
+
|
|
197
|
+
# Attach various metadata to the class
|
|
198
|
+
cls._attributes = dict(attributes)
|
|
199
|
+
cls._remap = attributes.remap
|
|
200
|
+
cls._before = attributes.before
|
|
201
|
+
cls._delete = attributes.delete
|
|
202
|
+
cls._build()
|
|
203
|
+
return cls
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class Lexer(metaclass=LexerMeta):
|
|
207
|
+
# These attributes may be defined in subclasses
|
|
208
|
+
tokens = set()
|
|
209
|
+
literals = set()
|
|
210
|
+
ignore = ""
|
|
211
|
+
reflags = 0
|
|
212
|
+
regex_module = re
|
|
213
|
+
|
|
214
|
+
_token_names = set()
|
|
215
|
+
_token_funcs = {}
|
|
216
|
+
_ignored_tokens = set()
|
|
217
|
+
_remapping = {}
|
|
218
|
+
_delete = {}
|
|
219
|
+
_remap = {}
|
|
220
|
+
|
|
221
|
+
# Internal attributes
|
|
222
|
+
__state_stack = None
|
|
223
|
+
__set_state = None
|
|
224
|
+
|
|
225
|
+
@classmethod
|
|
226
|
+
def _collect_rules(cls):
|
|
227
|
+
# Collect all of the rules from class definitions that look like token
|
|
228
|
+
# information. There are a few things that govern this:
|
|
229
|
+
#
|
|
230
|
+
# 1. Any definition of the form NAME = str is a token if NAME is
|
|
231
|
+
# is defined in the tokens set.
|
|
232
|
+
#
|
|
233
|
+
# 2. Any definition of the form ignore_NAME = str is a rule for an ignored
|
|
234
|
+
# token.
|
|
235
|
+
#
|
|
236
|
+
# 3. Any function defined with a 'pattern' attribute is treated as a rule.
|
|
237
|
+
# Such functions can be created with the @_ decorator or by defining
|
|
238
|
+
# function with the same name as a previously defined string.
|
|
239
|
+
#
|
|
240
|
+
# This function is responsible for keeping rules in order.
|
|
241
|
+
|
|
242
|
+
# Collect all previous rules from base classes
|
|
243
|
+
rules = []
|
|
244
|
+
|
|
245
|
+
for base in cls.__bases__:
|
|
246
|
+
if isinstance(base, LexerMeta):
|
|
247
|
+
rules.extend(base._rules)
|
|
248
|
+
|
|
249
|
+
# Dictionary of previous rules
|
|
250
|
+
existing = dict(rules)
|
|
251
|
+
|
|
252
|
+
for key, value in cls._attributes.items():
|
|
253
|
+
if (
|
|
254
|
+
(key in cls._token_names)
|
|
255
|
+
or key.startswith("ignore_")
|
|
256
|
+
or hasattr(value, "pattern")
|
|
257
|
+
):
|
|
258
|
+
if callable(value) and not hasattr(value, "pattern"):
|
|
259
|
+
raise LexerBuildError(
|
|
260
|
+
f"function {value} doesn't have a regex pattern"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if key in existing:
|
|
264
|
+
# The definition matches something that already existed in the base class.
|
|
265
|
+
# We replace it, but keep the original ordering
|
|
266
|
+
n = rules.index((key, existing[key]))
|
|
267
|
+
rules[n] = (key, value)
|
|
268
|
+
existing[key] = value
|
|
269
|
+
|
|
270
|
+
elif isinstance(value, TokenStr) and key in cls._before:
|
|
271
|
+
before = cls._before[key]
|
|
272
|
+
if before in existing:
|
|
273
|
+
# Position the token before another specified token
|
|
274
|
+
n = rules.index((before, existing[before]))
|
|
275
|
+
rules.insert(n, (key, value))
|
|
276
|
+
else:
|
|
277
|
+
# Put at the end of the rule list
|
|
278
|
+
rules.append((key, value))
|
|
279
|
+
existing[key] = value
|
|
280
|
+
else:
|
|
281
|
+
rules.append((key, value))
|
|
282
|
+
existing[key] = value
|
|
283
|
+
|
|
284
|
+
elif (
|
|
285
|
+
isinstance(value, str)
|
|
286
|
+
and not key.startswith("_")
|
|
287
|
+
and key not in {"ignore", "literals"}
|
|
288
|
+
):
|
|
289
|
+
raise LexerBuildError(f"{key} does not match a name in tokens")
|
|
290
|
+
|
|
291
|
+
# Apply deletion rules
|
|
292
|
+
rules = [(key, value) for key, value in rules if key not in cls._delete]
|
|
293
|
+
cls._rules = rules
|
|
294
|
+
|
|
295
|
+
@classmethod
|
|
296
|
+
def _build(cls):
|
|
297
|
+
"""
|
|
298
|
+
Build the lexer object from the collected tokens and regular expressions.
|
|
299
|
+
Validate the rules to make sure they look sane.
|
|
300
|
+
"""
|
|
301
|
+
if "tokens" not in vars(cls):
|
|
302
|
+
raise LexerBuildError(
|
|
303
|
+
f"{cls.__qualname__} class does not define a tokens attribute"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# Pull definitions created for any parent classes
|
|
307
|
+
cls._token_names = cls._token_names | set(cls.tokens)
|
|
308
|
+
cls._ignored_tokens = set(cls._ignored_tokens)
|
|
309
|
+
cls._token_funcs = dict(cls._token_funcs)
|
|
310
|
+
cls._remapping = dict(cls._remapping)
|
|
311
|
+
|
|
312
|
+
for (key, val), newtok in cls._remap.items():
|
|
313
|
+
if key not in cls._remapping:
|
|
314
|
+
cls._remapping[key] = {}
|
|
315
|
+
cls._remapping[key][val] = newtok
|
|
316
|
+
|
|
317
|
+
remapped_toks = set()
|
|
318
|
+
for d in cls._remapping.values():
|
|
319
|
+
remapped_toks.update(d.values())
|
|
320
|
+
|
|
321
|
+
undefined = remapped_toks - set(cls._token_names)
|
|
322
|
+
if undefined:
|
|
323
|
+
missing = ", ".join(undefined)
|
|
324
|
+
raise LexerBuildError(f"{missing} not included in token(s)")
|
|
325
|
+
|
|
326
|
+
cls._collect_rules()
|
|
327
|
+
|
|
328
|
+
parts = []
|
|
329
|
+
for tokname, value in cls._rules:
|
|
330
|
+
if tokname.startswith("ignore_"):
|
|
331
|
+
tokname = tokname[7:]
|
|
332
|
+
cls._ignored_tokens.add(tokname)
|
|
333
|
+
|
|
334
|
+
if isinstance(value, str):
|
|
335
|
+
pattern = value
|
|
336
|
+
|
|
337
|
+
elif callable(value):
|
|
338
|
+
cls._token_funcs[tokname] = value
|
|
339
|
+
pattern = getattr(value, "pattern")
|
|
340
|
+
|
|
341
|
+
# Form the regular expression component
|
|
342
|
+
part = f"(?P<{tokname}>{pattern})"
|
|
343
|
+
|
|
344
|
+
# Make sure the individual regex compiles properly
|
|
345
|
+
try:
|
|
346
|
+
cpat = cls.regex_module.compile(part, cls.reflags)
|
|
347
|
+
except Exception as e:
|
|
348
|
+
raise PatternError(f"Invalid regex for token {tokname}") from e
|
|
349
|
+
|
|
350
|
+
# Verify that the pattern doesn't match the empty string
|
|
351
|
+
if cpat.match(""):
|
|
352
|
+
raise PatternError(f"Regex for token {tokname} matches empty input")
|
|
353
|
+
|
|
354
|
+
parts.append(part)
|
|
355
|
+
|
|
356
|
+
if not parts:
|
|
357
|
+
return
|
|
358
|
+
|
|
359
|
+
# Form the master regular expression
|
|
360
|
+
# previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
|
361
|
+
# cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
|
|
362
|
+
cls._master_re = cls.regex_module.compile("|".join(parts), cls.reflags)
|
|
363
|
+
|
|
364
|
+
# Verify that that ignore and literals specifiers match the input type
|
|
365
|
+
if not isinstance(cls.ignore, str):
|
|
366
|
+
raise LexerBuildError("ignore specifier must be a string")
|
|
367
|
+
|
|
368
|
+
if not all(isinstance(lit, str) for lit in cls.literals):
|
|
369
|
+
raise LexerBuildError("literals must be specified as strings")
|
|
370
|
+
|
|
371
|
+
def begin(self, cls):
|
|
372
|
+
"""
|
|
373
|
+
Begin a new lexer state
|
|
374
|
+
"""
|
|
375
|
+
assert isinstance(cls, LexerMeta), "state must be a subclass of Lexer"
|
|
376
|
+
if self.__set_state:
|
|
377
|
+
self.__set_state(cls)
|
|
378
|
+
self.__class__ = cls
|
|
379
|
+
|
|
380
|
+
def push_state(self, cls):
|
|
381
|
+
"""
|
|
382
|
+
Push a new lexer state onto the stack
|
|
383
|
+
"""
|
|
384
|
+
if self.__state_stack is None:
|
|
385
|
+
self.__state_stack = []
|
|
386
|
+
self.__state_stack.append(type(self))
|
|
387
|
+
self.begin(cls)
|
|
388
|
+
|
|
389
|
+
def pop_state(self):
|
|
390
|
+
"""
|
|
391
|
+
Pop a lexer state from the stack
|
|
392
|
+
"""
|
|
393
|
+
self.begin(self.__state_stack.pop())
|
|
394
|
+
|
|
395
|
+
def tokenize(self, text, lineno=1, index=0):
|
|
396
|
+
_ignored_tokens = (
|
|
397
|
+
_master_re
|
|
398
|
+
) = _ignore = _token_funcs = _literals = _remapping = None
|
|
399
|
+
|
|
400
|
+
# --- Support for state changes
|
|
401
|
+
def _set_state(cls):
|
|
402
|
+
nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping
|
|
403
|
+
_ignored_tokens = cls._ignored_tokens
|
|
404
|
+
_master_re = cls._master_re
|
|
405
|
+
_ignore = cls.ignore
|
|
406
|
+
_token_funcs = cls._token_funcs
|
|
407
|
+
_literals = cls.literals
|
|
408
|
+
_remapping = cls._remapping
|
|
409
|
+
|
|
410
|
+
self.__set_state = _set_state
|
|
411
|
+
_set_state(type(self))
|
|
412
|
+
|
|
413
|
+
# --- Support for backtracking
|
|
414
|
+
_mark_stack = []
|
|
415
|
+
|
|
416
|
+
def _mark():
|
|
417
|
+
_mark_stack.append((type(self), index, lineno))
|
|
418
|
+
|
|
419
|
+
self.mark = _mark
|
|
420
|
+
|
|
421
|
+
def _accept():
|
|
422
|
+
_mark_stack.pop()
|
|
423
|
+
|
|
424
|
+
self.accept = _accept
|
|
425
|
+
|
|
426
|
+
def _reject():
|
|
427
|
+
nonlocal index, lineno
|
|
428
|
+
cls, index, lineno = _mark_stack[-1]
|
|
429
|
+
_set_state(cls)
|
|
430
|
+
|
|
431
|
+
self.reject = _reject
|
|
432
|
+
|
|
433
|
+
# --- Main tokenization function
|
|
434
|
+
self.text = text
|
|
435
|
+
self.line_lengths = [len(line) for line in text.splitlines(True)]
|
|
436
|
+
try:
|
|
437
|
+
while True:
|
|
438
|
+
try:
|
|
439
|
+
if text[index] in _ignore:
|
|
440
|
+
index += 1
|
|
441
|
+
continue
|
|
442
|
+
except IndexError:
|
|
443
|
+
return
|
|
444
|
+
|
|
445
|
+
tok = Token()
|
|
446
|
+
tok.lineno = lineno
|
|
447
|
+
tok.index = index
|
|
448
|
+
line_index = sum = 0
|
|
449
|
+
while index >= sum + self.line_lengths[line_index]:
|
|
450
|
+
sum += self.line_lengths[line_index]
|
|
451
|
+
line_index += 1
|
|
452
|
+
tok.lineidx = sum
|
|
453
|
+
m = _master_re.match(text, index)
|
|
454
|
+
if m:
|
|
455
|
+
tok.end = index = m.end()
|
|
456
|
+
tok.value = m.group()
|
|
457
|
+
tok.type = m.lastgroup
|
|
458
|
+
|
|
459
|
+
if tok.type in _remapping:
|
|
460
|
+
tok.type = _remapping[tok.type].get(tok.value, tok.type)
|
|
461
|
+
|
|
462
|
+
if tok.type in _token_funcs:
|
|
463
|
+
self.index = index
|
|
464
|
+
self.lineno = lineno
|
|
465
|
+
tok = _token_funcs[tok.type](self, tok)
|
|
466
|
+
index = self.index
|
|
467
|
+
lineno = self.lineno
|
|
468
|
+
if not tok:
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
if tok.type in _ignored_tokens:
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
yield tok
|
|
475
|
+
|
|
476
|
+
else:
|
|
477
|
+
# No match, see if the character is in literals
|
|
478
|
+
if text[index] in _literals:
|
|
479
|
+
tok.value = text[index]
|
|
480
|
+
tok.end = index + 1
|
|
481
|
+
tok.type = tok.value
|
|
482
|
+
index += 1
|
|
483
|
+
yield tok
|
|
484
|
+
else:
|
|
485
|
+
# A lexing error
|
|
486
|
+
self.index = index
|
|
487
|
+
self.lineno = lineno
|
|
488
|
+
tok.type = "ERROR"
|
|
489
|
+
tok.value = text[index:]
|
|
490
|
+
tok = self.error(tok)
|
|
491
|
+
if tok is not None:
|
|
492
|
+
tok.end = self.index
|
|
493
|
+
yield tok
|
|
494
|
+
|
|
495
|
+
index = self.index
|
|
496
|
+
lineno = self.lineno
|
|
497
|
+
|
|
498
|
+
# Set the final state of the lexer before exiting (even if exception)
|
|
499
|
+
finally:
|
|
500
|
+
self.text = text
|
|
501
|
+
self.index = index
|
|
502
|
+
self.lineno = lineno
|
|
503
|
+
|
|
504
|
+
# Default implementations of the error handler. May be changed in subclasses
|
|
505
|
+
def error(self, t):
|
|
506
|
+
raise LexError(
|
|
507
|
+
f"Illegal character {t.value[0]!r} at index {self.index}",
|
|
508
|
+
t.value,
|
|
509
|
+
self.index,
|
|
510
|
+
)
|