jaclang 0.5.11__py3-none-any.whl → 0.5.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jaclang might be problematic. Click here for more details.
- jaclang/cli/cli.py +20 -0
- jaclang/compiler/__init__.py +35 -19
- jaclang/compiler/absyntree.py +106 -97
- jaclang/compiler/generated/jac_parser.py +4069 -0
- jaclang/compiler/jac.lark +655 -0
- jaclang/compiler/parser.py +44 -31
- jaclang/compiler/passes/main/fuse_typeinfo_pass.py +92 -37
- jaclang/compiler/passes/main/import_pass.py +8 -5
- jaclang/compiler/passes/main/pyast_gen_pass.py +512 -352
- jaclang/compiler/passes/main/pyast_load_pass.py +271 -64
- jaclang/compiler/passes/main/registry_pass.py +3 -7
- jaclang/compiler/passes/main/tests/test_pyast_gen_pass.py +2 -0
- jaclang/compiler/passes/main/type_check_pass.py +4 -1
- jaclang/compiler/passes/tool/jac_formatter_pass.py +7 -0
- jaclang/compiler/passes/tool/tests/test_unparse_validate.py +16 -0
- jaclang/compiler/passes/utils/mypy_ast_build.py +93 -0
- jaclang/compiler/tests/test_importer.py +15 -0
- jaclang/core/aott.py +4 -3
- jaclang/core/construct.py +1 -1
- jaclang/core/importer.py +109 -51
- jaclang/core/llms.py +29 -0
- jaclang/core/registry.py +22 -0
- jaclang/core/utils.py +72 -0
- jaclang/plugin/default.py +127 -8
- jaclang/plugin/feature.py +29 -2
- jaclang/plugin/spec.py +25 -2
- jaclang/utils/helpers.py +7 -9
- jaclang/utils/lang_tools.py +37 -13
- jaclang/utils/test.py +1 -3
- jaclang/utils/tests/test_lang_tools.py +6 -0
- jaclang/vendor/lark/grammars/common.lark +59 -0
- jaclang/vendor/lark/grammars/lark.lark +62 -0
- jaclang/vendor/lark/grammars/python.lark +302 -0
- jaclang/vendor/lark/grammars/unicode.lark +7 -0
- {jaclang-0.5.11.dist-info → jaclang-0.5.16.dist-info}/METADATA +1 -1
- {jaclang-0.5.11.dist-info → jaclang-0.5.16.dist-info}/RECORD +40 -34
- jaclang/compiler/__jac_gen__/jac_parser.py +0 -4069
- /jaclang/compiler/{__jac_gen__ → generated}/__init__.py +0 -0
- {jaclang-0.5.11.dist-info → jaclang-0.5.16.dist-info}/WHEEL +0 -0
- {jaclang-0.5.11.dist-info → jaclang-0.5.16.dist-info}/entry_points.txt +0 -0
- {jaclang-0.5.11.dist-info → jaclang-0.5.16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,4069 @@
|
|
|
1
|
+
# The file was automatically generated by Lark v1.1.9
|
|
2
|
+
__version__ = "1.1.9"
|
|
3
|
+
|
|
4
|
+
#
|
|
5
|
+
#
|
|
6
|
+
# Lark Stand-alone Generator Tool
|
|
7
|
+
# ----------------------------------
|
|
8
|
+
# Generates a stand-alone LALR(1) parser
|
|
9
|
+
#
|
|
10
|
+
# Git: https://github.com/erezsh/lark
|
|
11
|
+
# Author: Erez Shinan (erezshin@gmail.com)
|
|
12
|
+
#
|
|
13
|
+
#
|
|
14
|
+
# >>> LICENSE
|
|
15
|
+
#
|
|
16
|
+
# This tool and its generated code use a separate license from Lark,
|
|
17
|
+
# and are subject to the terms of the Mozilla Public License, v. 2.0.
|
|
18
|
+
# If a copy of the MPL was not distributed with this
|
|
19
|
+
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
20
|
+
#
|
|
21
|
+
# If you wish to purchase a commercial license for this tool and its
|
|
22
|
+
# generated code, you may contact me via email or otherwise.
|
|
23
|
+
#
|
|
24
|
+
# If MPL2 is incompatible with your free or open-source project,
|
|
25
|
+
# contact me and we'll work it out.
|
|
26
|
+
#
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
from copy import deepcopy
|
|
30
|
+
from abc import ABC, abstractmethod
|
|
31
|
+
from types import ModuleType
|
|
32
|
+
from typing import (
|
|
33
|
+
TypeVar,
|
|
34
|
+
Generic,
|
|
35
|
+
Type,
|
|
36
|
+
Tuple,
|
|
37
|
+
List,
|
|
38
|
+
Dict,
|
|
39
|
+
Iterator,
|
|
40
|
+
Collection,
|
|
41
|
+
Callable,
|
|
42
|
+
Optional,
|
|
43
|
+
FrozenSet,
|
|
44
|
+
Any,
|
|
45
|
+
Union,
|
|
46
|
+
Iterable,
|
|
47
|
+
IO,
|
|
48
|
+
TYPE_CHECKING,
|
|
49
|
+
overload,
|
|
50
|
+
Sequence,
|
|
51
|
+
Pattern as REPattern,
|
|
52
|
+
ClassVar,
|
|
53
|
+
Set,
|
|
54
|
+
Mapping,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class LarkError(Exception):
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ConfigurationError(LarkError, ValueError):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def assert_config(value, options: Collection, msg="Got %r, expected one of %s"):
|
|
69
|
+
if value not in options:
|
|
70
|
+
raise ConfigurationError(msg % (value, options))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GrammarError(LarkError):
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ParseError(LarkError):
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LexError(LarkError):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
T = TypeVar("T")
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class UnexpectedInput(LarkError):
|
|
89
|
+
#--
|
|
90
|
+
|
|
91
|
+
line: int
|
|
92
|
+
column: int
|
|
93
|
+
pos_in_stream = None
|
|
94
|
+
state: Any
|
|
95
|
+
_terminals_by_name = None
|
|
96
|
+
interactive_parser: "InteractiveParser"
|
|
97
|
+
|
|
98
|
+
def get_context(self, text: str, span: int = 40) -> str:
|
|
99
|
+
#--
|
|
100
|
+
assert self.pos_in_stream is not None, self
|
|
101
|
+
pos = self.pos_in_stream
|
|
102
|
+
start = max(pos - span, 0)
|
|
103
|
+
end = pos + span
|
|
104
|
+
if not isinstance(text, bytes):
|
|
105
|
+
before = text[start:pos].rsplit("\n", 1)[-1]
|
|
106
|
+
after = text[pos:end].split("\n", 1)[0]
|
|
107
|
+
return before + after + "\n" + " " * len(before.expandtabs()) + "^\n"
|
|
108
|
+
else:
|
|
109
|
+
before = text[start:pos].rsplit(b"\n", 1)[-1]
|
|
110
|
+
after = text[pos:end].split(b"\n", 1)[0]
|
|
111
|
+
return (
|
|
112
|
+
before + after + b"\n" + b" " * len(before.expandtabs()) + b"^\n"
|
|
113
|
+
).decode("ascii", "backslashreplace")
|
|
114
|
+
|
|
115
|
+
def match_examples(
|
|
116
|
+
self,
|
|
117
|
+
parse_fn: "Callable[[str], Tree]",
|
|
118
|
+
examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
|
|
119
|
+
token_type_match_fallback: bool = False,
|
|
120
|
+
use_accepts: bool = True,
|
|
121
|
+
) -> Optional[T]:
|
|
122
|
+
#--
|
|
123
|
+
assert self.state is not None, "Not supported for this exception"
|
|
124
|
+
|
|
125
|
+
if isinstance(examples, Mapping):
|
|
126
|
+
examples = examples.items()
|
|
127
|
+
|
|
128
|
+
candidate = (None, False)
|
|
129
|
+
for i, (label, example) in enumerate(examples):
|
|
130
|
+
assert not isinstance(example, str), "Expecting a list"
|
|
131
|
+
|
|
132
|
+
for j, malformed in enumerate(example):
|
|
133
|
+
try:
|
|
134
|
+
parse_fn(malformed)
|
|
135
|
+
except UnexpectedInput as ut:
|
|
136
|
+
if ut.state == self.state:
|
|
137
|
+
if (
|
|
138
|
+
use_accepts
|
|
139
|
+
and isinstance(self, UnexpectedToken)
|
|
140
|
+
and isinstance(ut, UnexpectedToken)
|
|
141
|
+
and ut.accepts != self.accepts
|
|
142
|
+
):
|
|
143
|
+
logger.debug(
|
|
144
|
+
"Different accepts with same state[%d]: %s != %s at example [%s][%s]"
|
|
145
|
+
% (self.state, self.accepts, ut.accepts, i, j)
|
|
146
|
+
)
|
|
147
|
+
continue
|
|
148
|
+
if isinstance(
|
|
149
|
+
self, (UnexpectedToken, UnexpectedEOF)
|
|
150
|
+
) and isinstance(ut, (UnexpectedToken, UnexpectedEOF)):
|
|
151
|
+
if ut.token == self.token: ##
|
|
152
|
+
|
|
153
|
+
logger.debug("Exact Match at example [%s][%s]" % (i, j))
|
|
154
|
+
return label
|
|
155
|
+
|
|
156
|
+
if token_type_match_fallback:
|
|
157
|
+
##
|
|
158
|
+
|
|
159
|
+
if (ut.token.type == self.token.type) and not candidate[
|
|
160
|
+
-1
|
|
161
|
+
]:
|
|
162
|
+
logger.debug(
|
|
163
|
+
"Token Type Fallback at example [%s][%s]"
|
|
164
|
+
% (i, j)
|
|
165
|
+
)
|
|
166
|
+
candidate = label, True
|
|
167
|
+
|
|
168
|
+
if candidate[0] is None:
|
|
169
|
+
logger.debug(
|
|
170
|
+
"Same State match at example [%s][%s]" % (i, j)
|
|
171
|
+
)
|
|
172
|
+
candidate = label, False
|
|
173
|
+
|
|
174
|
+
return candidate[0]
|
|
175
|
+
|
|
176
|
+
def _format_expected(self, expected):
|
|
177
|
+
if self._terminals_by_name:
|
|
178
|
+
d = self._terminals_by_name
|
|
179
|
+
expected = [
|
|
180
|
+
d[t_name].user_repr() if t_name in d else t_name for t_name in expected
|
|
181
|
+
]
|
|
182
|
+
return "Expected one of: \n\t* %s\n" % "\n\t* ".join(expected)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class UnexpectedEOF(ParseError, UnexpectedInput):
|
|
186
|
+
#--
|
|
187
|
+
|
|
188
|
+
expected: "List[Token]"
|
|
189
|
+
|
|
190
|
+
def __init__(self, expected, state=None, terminals_by_name=None):
|
|
191
|
+
super(UnexpectedEOF, self).__init__()
|
|
192
|
+
|
|
193
|
+
self.expected = expected
|
|
194
|
+
self.state = state
|
|
195
|
+
from .lexer import Token
|
|
196
|
+
|
|
197
|
+
self.token = Token("<EOF>", "") ##
|
|
198
|
+
|
|
199
|
+
self.pos_in_stream = -1
|
|
200
|
+
self.line = -1
|
|
201
|
+
self.column = -1
|
|
202
|
+
self._terminals_by_name = terminals_by_name
|
|
203
|
+
|
|
204
|
+
def __str__(self):
|
|
205
|
+
message = "Unexpected end-of-input. "
|
|
206
|
+
message += self._format_expected(self.expected)
|
|
207
|
+
return message
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class UnexpectedCharacters(LexError, UnexpectedInput):
|
|
211
|
+
#--
|
|
212
|
+
|
|
213
|
+
allowed: Set[str]
|
|
214
|
+
considered_tokens: Set[Any]
|
|
215
|
+
|
|
216
|
+
def __init__(
|
|
217
|
+
self,
|
|
218
|
+
seq,
|
|
219
|
+
lex_pos,
|
|
220
|
+
line,
|
|
221
|
+
column,
|
|
222
|
+
allowed=None,
|
|
223
|
+
considered_tokens=None,
|
|
224
|
+
state=None,
|
|
225
|
+
token_history=None,
|
|
226
|
+
terminals_by_name=None,
|
|
227
|
+
considered_rules=None,
|
|
228
|
+
):
|
|
229
|
+
super(UnexpectedCharacters, self).__init__()
|
|
230
|
+
|
|
231
|
+
##
|
|
232
|
+
|
|
233
|
+
self.line = line
|
|
234
|
+
self.column = column
|
|
235
|
+
self.pos_in_stream = lex_pos
|
|
236
|
+
self.state = state
|
|
237
|
+
self._terminals_by_name = terminals_by_name
|
|
238
|
+
|
|
239
|
+
self.allowed = allowed
|
|
240
|
+
self.considered_tokens = considered_tokens
|
|
241
|
+
self.considered_rules = considered_rules
|
|
242
|
+
self.token_history = token_history
|
|
243
|
+
|
|
244
|
+
if isinstance(seq, bytes):
|
|
245
|
+
self.char = seq[lex_pos : lex_pos + 1].decode("ascii", "backslashreplace")
|
|
246
|
+
else:
|
|
247
|
+
self.char = seq[lex_pos]
|
|
248
|
+
self._context = self.get_context(seq)
|
|
249
|
+
|
|
250
|
+
def __str__(self):
|
|
251
|
+
message = (
|
|
252
|
+
"No terminal matches '%s' in the current parser context, at line %d col %d"
|
|
253
|
+
% (self.char, self.line, self.column)
|
|
254
|
+
)
|
|
255
|
+
message += "\n\n" + self._context
|
|
256
|
+
if self.allowed:
|
|
257
|
+
message += self._format_expected(self.allowed)
|
|
258
|
+
if self.token_history:
|
|
259
|
+
message += "\nPrevious tokens: %s\n" % ", ".join(
|
|
260
|
+
repr(t) for t in self.token_history
|
|
261
|
+
)
|
|
262
|
+
return message
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class UnexpectedToken(ParseError, UnexpectedInput):
|
|
266
|
+
#--
|
|
267
|
+
|
|
268
|
+
expected: Set[str]
|
|
269
|
+
considered_rules: Set[str]
|
|
270
|
+
|
|
271
|
+
def __init__(
|
|
272
|
+
self,
|
|
273
|
+
token,
|
|
274
|
+
expected,
|
|
275
|
+
considered_rules=None,
|
|
276
|
+
state=None,
|
|
277
|
+
interactive_parser=None,
|
|
278
|
+
terminals_by_name=None,
|
|
279
|
+
token_history=None,
|
|
280
|
+
):
|
|
281
|
+
super(UnexpectedToken, self).__init__()
|
|
282
|
+
|
|
283
|
+
##
|
|
284
|
+
|
|
285
|
+
self.line = getattr(token, "line", "?")
|
|
286
|
+
self.column = getattr(token, "column", "?")
|
|
287
|
+
self.pos_in_stream = getattr(token, "start_pos", None)
|
|
288
|
+
self.state = state
|
|
289
|
+
|
|
290
|
+
self.token = token
|
|
291
|
+
self.expected = expected ##
|
|
292
|
+
|
|
293
|
+
self._accepts = NO_VALUE
|
|
294
|
+
self.considered_rules = considered_rules
|
|
295
|
+
self.interactive_parser = interactive_parser
|
|
296
|
+
self._terminals_by_name = terminals_by_name
|
|
297
|
+
self.token_history = token_history
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def accepts(self) -> Set[str]:
|
|
301
|
+
if self._accepts is NO_VALUE:
|
|
302
|
+
self._accepts = (
|
|
303
|
+
self.interactive_parser and self.interactive_parser.accepts()
|
|
304
|
+
)
|
|
305
|
+
return self._accepts
|
|
306
|
+
|
|
307
|
+
def __str__(self):
|
|
308
|
+
message = "Unexpected token %r at line %s, column %s.\n%s" % (
|
|
309
|
+
self.token,
|
|
310
|
+
self.line,
|
|
311
|
+
self.column,
|
|
312
|
+
self._format_expected(self.accepts or self.expected),
|
|
313
|
+
)
|
|
314
|
+
if self.token_history:
|
|
315
|
+
message += "Previous tokens: %r\n" % self.token_history
|
|
316
|
+
|
|
317
|
+
return message
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class VisitError(LarkError):
|
|
321
|
+
#--
|
|
322
|
+
|
|
323
|
+
obj: "Union[Tree, Token]"
|
|
324
|
+
orig_exc: Exception
|
|
325
|
+
|
|
326
|
+
def __init__(self, rule, obj, orig_exc):
|
|
327
|
+
message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
|
|
328
|
+
super(VisitError, self).__init__(message)
|
|
329
|
+
|
|
330
|
+
self.rule = rule
|
|
331
|
+
self.obj = obj
|
|
332
|
+
self.orig_exc = orig_exc
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
class MissingVariableError(LarkError):
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
import sys, re
|
|
341
|
+
import logging
|
|
342
|
+
|
|
343
|
+
logger: logging.Logger = logging.getLogger("lark")
|
|
344
|
+
logger.addHandler(logging.StreamHandler())
|
|
345
|
+
##
|
|
346
|
+
|
|
347
|
+
##
|
|
348
|
+
|
|
349
|
+
logger.setLevel(logging.CRITICAL)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
NO_VALUE = object()
|
|
353
|
+
|
|
354
|
+
T = TypeVar("T")
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def classify(
|
|
358
|
+
seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None
|
|
359
|
+
) -> Dict:
|
|
360
|
+
d: Dict[Any, Any] = {}
|
|
361
|
+
for item in seq:
|
|
362
|
+
k = key(item) if (key is not None) else item
|
|
363
|
+
v = value(item) if (value is not None) else item
|
|
364
|
+
try:
|
|
365
|
+
d[k].append(v)
|
|
366
|
+
except KeyError:
|
|
367
|
+
d[k] = [v]
|
|
368
|
+
return d
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any:
|
|
372
|
+
if isinstance(data, dict):
|
|
373
|
+
if "__type__" in data: ##
|
|
374
|
+
|
|
375
|
+
class_ = namespace[data["__type__"]]
|
|
376
|
+
return class_.deserialize(data, memo)
|
|
377
|
+
elif "@" in data:
|
|
378
|
+
return memo[data["@"]]
|
|
379
|
+
return {
|
|
380
|
+
key: _deserialize(value, namespace, memo) for key, value in data.items()
|
|
381
|
+
}
|
|
382
|
+
elif isinstance(data, list):
|
|
383
|
+
return [_deserialize(value, namespace, memo) for value in data]
|
|
384
|
+
return data
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
_T = TypeVar("_T", bound="Serialize")
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
class Serialize:
|
|
391
|
+
#--
|
|
392
|
+
|
|
393
|
+
def memo_serialize(self, types_to_memoize: List) -> Any:
|
|
394
|
+
memo = SerializeMemoizer(types_to_memoize)
|
|
395
|
+
return self.serialize(memo), memo.serialize()
|
|
396
|
+
|
|
397
|
+
def serialize(self, memo=None) -> Dict[str, Any]:
|
|
398
|
+
if memo and memo.in_types(self):
|
|
399
|
+
return {"@": memo.memoized.get(self)}
|
|
400
|
+
|
|
401
|
+
fields = getattr(self, "__serialize_fields__")
|
|
402
|
+
res = {f: _serialize(getattr(self, f), memo) for f in fields}
|
|
403
|
+
res["__type__"] = type(self).__name__
|
|
404
|
+
if hasattr(self, "_serialize"):
|
|
405
|
+
self._serialize(res, memo) ##
|
|
406
|
+
|
|
407
|
+
return res
|
|
408
|
+
|
|
409
|
+
@classmethod
|
|
410
|
+
def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T:
|
|
411
|
+
namespace = getattr(cls, "__serialize_namespace__", [])
|
|
412
|
+
namespace = {c.__name__: c for c in namespace}
|
|
413
|
+
|
|
414
|
+
fields = getattr(cls, "__serialize_fields__")
|
|
415
|
+
|
|
416
|
+
if "@" in data:
|
|
417
|
+
return memo[data["@"]]
|
|
418
|
+
|
|
419
|
+
inst = cls.__new__(cls)
|
|
420
|
+
for f in fields:
|
|
421
|
+
try:
|
|
422
|
+
setattr(inst, f, _deserialize(data[f], namespace, memo))
|
|
423
|
+
except KeyError as e:
|
|
424
|
+
raise KeyError("Cannot find key for class", cls, e)
|
|
425
|
+
|
|
426
|
+
if hasattr(inst, "_deserialize"):
|
|
427
|
+
inst._deserialize() ##
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
return inst
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
class SerializeMemoizer(Serialize):
|
|
434
|
+
#--
|
|
435
|
+
|
|
436
|
+
__serialize_fields__ = ("memoized",)
|
|
437
|
+
|
|
438
|
+
def __init__(self, types_to_memoize: List) -> None:
|
|
439
|
+
self.types_to_memoize = tuple(types_to_memoize)
|
|
440
|
+
self.memoized = Enumerator()
|
|
441
|
+
|
|
442
|
+
def in_types(self, value: Serialize) -> bool:
|
|
443
|
+
return isinstance(value, self.types_to_memoize)
|
|
444
|
+
|
|
445
|
+
def serialize(self) -> Dict[int, Any]: ##
|
|
446
|
+
|
|
447
|
+
return _serialize(self.memoized.reversed(), None)
|
|
448
|
+
|
|
449
|
+
@classmethod
|
|
450
|
+
def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: ##
|
|
451
|
+
|
|
452
|
+
return _deserialize(data, namespace, memo)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
try:
|
|
456
|
+
import regex
|
|
457
|
+
|
|
458
|
+
_has_regex = True
|
|
459
|
+
except ImportError:
|
|
460
|
+
_has_regex = False
|
|
461
|
+
|
|
462
|
+
if sys.version_info >= (3, 11):
|
|
463
|
+
import re._parser as sre_parse
|
|
464
|
+
import re._constants as sre_constants
|
|
465
|
+
else:
|
|
466
|
+
import sre_parse
|
|
467
|
+
import sre_constants
|
|
468
|
+
|
|
469
|
+
categ_pattern = re.compile(r"\\p{[A-Za-z_]+}")
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
|
|
473
|
+
if _has_regex:
|
|
474
|
+
##
|
|
475
|
+
|
|
476
|
+
##
|
|
477
|
+
|
|
478
|
+
##
|
|
479
|
+
|
|
480
|
+
regexp_final = re.sub(categ_pattern, "A", expr)
|
|
481
|
+
else:
|
|
482
|
+
if re.search(categ_pattern, expr):
|
|
483
|
+
raise ImportError(
|
|
484
|
+
"`regex` module must be installed in order to use Unicode categories.",
|
|
485
|
+
expr,
|
|
486
|
+
)
|
|
487
|
+
regexp_final = expr
|
|
488
|
+
try:
|
|
489
|
+
##
|
|
490
|
+
|
|
491
|
+
return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] ##
|
|
492
|
+
|
|
493
|
+
except sre_constants.error:
|
|
494
|
+
if not _has_regex:
|
|
495
|
+
raise ValueError(expr)
|
|
496
|
+
else:
|
|
497
|
+
##
|
|
498
|
+
|
|
499
|
+
##
|
|
500
|
+
|
|
501
|
+
c = regex.compile(regexp_final)
|
|
502
|
+
##
|
|
503
|
+
|
|
504
|
+
##
|
|
505
|
+
|
|
506
|
+
MAXWIDTH = getattr(sre_parse, "MAXWIDTH", sre_constants.MAXREPEAT)
|
|
507
|
+
if c.match("") is None:
|
|
508
|
+
##
|
|
509
|
+
|
|
510
|
+
return 1, int(MAXWIDTH)
|
|
511
|
+
else:
|
|
512
|
+
return 0, int(MAXWIDTH)
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
from collections import OrderedDict
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
class Meta:
|
|
520
|
+
|
|
521
|
+
empty: bool
|
|
522
|
+
line: int
|
|
523
|
+
column: int
|
|
524
|
+
start_pos: int
|
|
525
|
+
end_line: int
|
|
526
|
+
end_column: int
|
|
527
|
+
end_pos: int
|
|
528
|
+
orig_expansion: "List[TerminalDef]"
|
|
529
|
+
match_tree: bool
|
|
530
|
+
|
|
531
|
+
def __init__(self):
|
|
532
|
+
self.empty = True
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
_Leaf_T = TypeVar("_Leaf_T")
|
|
536
|
+
Branch = Union[_Leaf_T, "Tree[_Leaf_T]"]
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
class Tree(Generic[_Leaf_T]):
|
|
540
|
+
#--
|
|
541
|
+
|
|
542
|
+
data: str
|
|
543
|
+
children: "List[Branch[_Leaf_T]]"
|
|
544
|
+
|
|
545
|
+
def __init__(
|
|
546
|
+
self, data: str, children: "List[Branch[_Leaf_T]]", meta: Optional[Meta] = None
|
|
547
|
+
) -> None:
|
|
548
|
+
self.data = data
|
|
549
|
+
self.children = children
|
|
550
|
+
self._meta = meta
|
|
551
|
+
|
|
552
|
+
@property
|
|
553
|
+
def meta(self) -> Meta:
|
|
554
|
+
if self._meta is None:
|
|
555
|
+
self._meta = Meta()
|
|
556
|
+
return self._meta
|
|
557
|
+
|
|
558
|
+
def __repr__(self):
|
|
559
|
+
return "Tree(%r, %r)" % (self.data, self.children)
|
|
560
|
+
|
|
561
|
+
def _pretty_label(self):
|
|
562
|
+
return self.data
|
|
563
|
+
|
|
564
|
+
def _pretty(self, level, indent_str):
|
|
565
|
+
yield f"{indent_str*level}{self._pretty_label()}"
|
|
566
|
+
if len(self.children) == 1 and not isinstance(self.children[0], Tree):
|
|
567
|
+
yield f"\t{self.children[0]}\n"
|
|
568
|
+
else:
|
|
569
|
+
yield "\n"
|
|
570
|
+
for n in self.children:
|
|
571
|
+
if isinstance(n, Tree):
|
|
572
|
+
yield from n._pretty(level + 1, indent_str)
|
|
573
|
+
else:
|
|
574
|
+
yield f"{indent_str*(level+1)}{n}\n"
|
|
575
|
+
|
|
576
|
+
def pretty(self, indent_str: str = " ") -> str:
|
|
577
|
+
#--
|
|
578
|
+
return "".join(self._pretty(0, indent_str))
|
|
579
|
+
|
|
580
|
+
def __rich__(self, parent: Optional["rich.tree.Tree"] = None) -> "rich.tree.Tree":
|
|
581
|
+
#--
|
|
582
|
+
return self._rich(parent)
|
|
583
|
+
|
|
584
|
+
def _rich(self, parent):
|
|
585
|
+
if parent:
|
|
586
|
+
tree = parent.add(f"[bold]{self.data}[/bold]")
|
|
587
|
+
else:
|
|
588
|
+
import rich.tree
|
|
589
|
+
|
|
590
|
+
tree = rich.tree.Tree(self.data)
|
|
591
|
+
|
|
592
|
+
for c in self.children:
|
|
593
|
+
if isinstance(c, Tree):
|
|
594
|
+
c._rich(tree)
|
|
595
|
+
else:
|
|
596
|
+
tree.add(f"[green]{c}[/green]")
|
|
597
|
+
|
|
598
|
+
return tree
|
|
599
|
+
|
|
600
|
+
def __eq__(self, other):
|
|
601
|
+
try:
|
|
602
|
+
return self.data == other.data and self.children == other.children
|
|
603
|
+
except AttributeError:
|
|
604
|
+
return False
|
|
605
|
+
|
|
606
|
+
def __ne__(self, other):
|
|
607
|
+
return not (self == other)
|
|
608
|
+
|
|
609
|
+
def __hash__(self) -> int:
|
|
610
|
+
return hash((self.data, tuple(self.children)))
|
|
611
|
+
|
|
612
|
+
def iter_subtrees(self) -> "Iterator[Tree[_Leaf_T]]":
|
|
613
|
+
#--
|
|
614
|
+
queue = [self]
|
|
615
|
+
subtrees = OrderedDict()
|
|
616
|
+
for subtree in queue:
|
|
617
|
+
subtrees[id(subtree)] = subtree
|
|
618
|
+
##
|
|
619
|
+
|
|
620
|
+
queue += [
|
|
621
|
+
c
|
|
622
|
+
for c in reversed(subtree.children) ##
|
|
623
|
+
|
|
624
|
+
if isinstance(c, Tree) and id(c) not in subtrees
|
|
625
|
+
]
|
|
626
|
+
|
|
627
|
+
del queue
|
|
628
|
+
return reversed(list(subtrees.values()))
|
|
629
|
+
|
|
630
|
+
def iter_subtrees_topdown(self):
|
|
631
|
+
#--
|
|
632
|
+
stack = [self]
|
|
633
|
+
stack_append = stack.append
|
|
634
|
+
stack_pop = stack.pop
|
|
635
|
+
while stack:
|
|
636
|
+
node = stack_pop()
|
|
637
|
+
if not isinstance(node, Tree):
|
|
638
|
+
continue
|
|
639
|
+
yield node
|
|
640
|
+
for child in reversed(node.children):
|
|
641
|
+
stack_append(child)
|
|
642
|
+
|
|
643
|
+
def find_pred(
|
|
644
|
+
self, pred: "Callable[[Tree[_Leaf_T]], bool]"
|
|
645
|
+
) -> "Iterator[Tree[_Leaf_T]]":
|
|
646
|
+
#--
|
|
647
|
+
return filter(pred, self.iter_subtrees())
|
|
648
|
+
|
|
649
|
+
def find_data(self, data: str) -> "Iterator[Tree[_Leaf_T]]":
|
|
650
|
+
#--
|
|
651
|
+
return self.find_pred(lambda t: t.data == data)
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
from functools import wraps, update_wrapper
|
|
655
|
+
from inspect import getmembers, getmro
|
|
656
|
+
|
|
657
|
+
_Return_T = TypeVar("_Return_T")
|
|
658
|
+
_Return_V = TypeVar("_Return_V")
|
|
659
|
+
_Leaf_T = TypeVar("_Leaf_T")
|
|
660
|
+
_Leaf_U = TypeVar("_Leaf_U")
|
|
661
|
+
_R = TypeVar("_R")
|
|
662
|
+
_FUNC = Callable[..., _Return_T]
|
|
663
|
+
_DECORATED = Union[_FUNC, type]
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
class _DiscardType:
|
|
667
|
+
#--
|
|
668
|
+
|
|
669
|
+
def __repr__(self):
|
|
670
|
+
return "lark.visitors.Discard"
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
Discard = _DiscardType()
|
|
674
|
+
|
|
675
|
+
##
|
|
676
|
+
|
|
677
|
+
|
|
678
|
+
|
|
679
|
+
class _Decoratable:
|
|
680
|
+
#--
|
|
681
|
+
|
|
682
|
+
@classmethod
|
|
683
|
+
def _apply_v_args(cls, visit_wrapper):
|
|
684
|
+
mro = getmro(cls)
|
|
685
|
+
assert mro[0] is cls
|
|
686
|
+
libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
|
|
687
|
+
for name, value in getmembers(cls):
|
|
688
|
+
|
|
689
|
+
##
|
|
690
|
+
|
|
691
|
+
if name.startswith("_") or (
|
|
692
|
+
name in libmembers and name not in cls.__dict__
|
|
693
|
+
):
|
|
694
|
+
continue
|
|
695
|
+
if not callable(value):
|
|
696
|
+
continue
|
|
697
|
+
|
|
698
|
+
##
|
|
699
|
+
|
|
700
|
+
if isinstance(cls.__dict__[name], _VArgsWrapper):
|
|
701
|
+
continue
|
|
702
|
+
|
|
703
|
+
setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
|
|
704
|
+
return cls
|
|
705
|
+
|
|
706
|
+
def __class_getitem__(cls, _):
|
|
707
|
+
return cls
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
|
|
711
|
+
#--
|
|
712
|
+
|
|
713
|
+
__visit_tokens__ = True ##
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def __init__(self, visit_tokens: bool = True) -> None:
|
|
717
|
+
self.__visit_tokens__ = visit_tokens
|
|
718
|
+
|
|
719
|
+
def _call_userfunc(self, tree, new_children=None):
|
|
720
|
+
##
|
|
721
|
+
|
|
722
|
+
children = new_children if new_children is not None else tree.children
|
|
723
|
+
try:
|
|
724
|
+
f = getattr(self, tree.data)
|
|
725
|
+
except AttributeError:
|
|
726
|
+
return self.__default__(tree.data, children, tree.meta)
|
|
727
|
+
else:
|
|
728
|
+
try:
|
|
729
|
+
wrapper = getattr(f, "visit_wrapper", None)
|
|
730
|
+
if wrapper is not None:
|
|
731
|
+
return f.visit_wrapper(f, tree.data, children, tree.meta)
|
|
732
|
+
else:
|
|
733
|
+
return f(children)
|
|
734
|
+
except GrammarError:
|
|
735
|
+
raise
|
|
736
|
+
except Exception as e:
|
|
737
|
+
raise VisitError(tree.data, tree, e)
|
|
738
|
+
|
|
739
|
+
def _call_userfunc_token(self, token):
|
|
740
|
+
try:
|
|
741
|
+
f = getattr(self, token.type)
|
|
742
|
+
except AttributeError:
|
|
743
|
+
return self.__default_token__(token)
|
|
744
|
+
else:
|
|
745
|
+
try:
|
|
746
|
+
return f(token)
|
|
747
|
+
except GrammarError:
|
|
748
|
+
raise
|
|
749
|
+
except Exception as e:
|
|
750
|
+
raise VisitError(token.type, token, e)
|
|
751
|
+
|
|
752
|
+
def _transform_children(self, children):
|
|
753
|
+
for c in children:
|
|
754
|
+
if isinstance(c, Tree):
|
|
755
|
+
res = self._transform_tree(c)
|
|
756
|
+
elif self.__visit_tokens__ and isinstance(c, Token):
|
|
757
|
+
res = self._call_userfunc_token(c)
|
|
758
|
+
else:
|
|
759
|
+
res = c
|
|
760
|
+
|
|
761
|
+
if res is not Discard:
|
|
762
|
+
yield res
|
|
763
|
+
|
|
764
|
+
def _transform_tree(self, tree):
|
|
765
|
+
children = list(self._transform_children(tree.children))
|
|
766
|
+
return self._call_userfunc(tree, children)
|
|
767
|
+
|
|
768
|
+
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
|
769
|
+
#--
|
|
770
|
+
return self._transform_tree(tree)
|
|
771
|
+
|
|
772
|
+
def __mul__(
|
|
773
|
+
self: "Transformer[_Leaf_T, Tree[_Leaf_U]]",
|
|
774
|
+
other: "Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]",
|
|
775
|
+
) -> "TransformerChain[_Leaf_T, _Return_V]":
|
|
776
|
+
#--
|
|
777
|
+
return TransformerChain(self, other)
|
|
778
|
+
|
|
779
|
+
def __default__(self, data, children, meta):
|
|
780
|
+
#--
|
|
781
|
+
return Tree(data, children, meta)
|
|
782
|
+
|
|
783
|
+
def __default_token__(self, token):
|
|
784
|
+
#--
|
|
785
|
+
return token
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
def merge_transformers(base_transformer=None, **transformers_to_merge):
|
|
789
|
+
#--
|
|
790
|
+
if base_transformer is None:
|
|
791
|
+
base_transformer = Transformer()
|
|
792
|
+
for prefix, transformer in transformers_to_merge.items():
|
|
793
|
+
for method_name in dir(transformer):
|
|
794
|
+
method = getattr(transformer, method_name)
|
|
795
|
+
if not callable(method):
|
|
796
|
+
continue
|
|
797
|
+
if method_name.startswith("_") or method_name == "transform":
|
|
798
|
+
continue
|
|
799
|
+
prefixed_method = prefix + "__" + method_name
|
|
800
|
+
if hasattr(base_transformer, prefixed_method):
|
|
801
|
+
raise AttributeError(
|
|
802
|
+
"Cannot merge: method '%s' appears more than once" % prefixed_method
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
setattr(base_transformer, prefixed_method, method)
|
|
806
|
+
|
|
807
|
+
return base_transformer
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
class InlineTransformer(Transformer): ##
|
|
811
|
+
|
|
812
|
+
def _call_userfunc(self, tree, new_children=None):
|
|
813
|
+
##
|
|
814
|
+
|
|
815
|
+
children = new_children if new_children is not None else tree.children
|
|
816
|
+
try:
|
|
817
|
+
f = getattr(self, tree.data)
|
|
818
|
+
except AttributeError:
|
|
819
|
+
return self.__default__(tree.data, children, tree.meta)
|
|
820
|
+
else:
|
|
821
|
+
return f(*children)
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
class TransformerChain(Generic[_Leaf_T, _Return_T]):
|
|
825
|
+
|
|
826
|
+
transformers: "Tuple[Union[Transformer, TransformerChain], ...]"
|
|
827
|
+
|
|
828
|
+
def __init__(self, *transformers: "Union[Transformer, TransformerChain]") -> None:
|
|
829
|
+
self.transformers = transformers
|
|
830
|
+
|
|
831
|
+
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
|
832
|
+
for t in self.transformers:
|
|
833
|
+
tree = t.transform(tree)
|
|
834
|
+
return cast(_Return_T, tree)
|
|
835
|
+
|
|
836
|
+
def __mul__(
|
|
837
|
+
self: "TransformerChain[_Leaf_T, Tree[_Leaf_U]]",
|
|
838
|
+
other: "Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]",
|
|
839
|
+
) -> "TransformerChain[_Leaf_T, _Return_V]":
|
|
840
|
+
return TransformerChain(*self.transformers + (other,))
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]):
|
|
844
|
+
#--
|
|
845
|
+
|
|
846
|
+
def _transform_tree(self, tree): ##
|
|
847
|
+
|
|
848
|
+
return self._call_userfunc(tree)
|
|
849
|
+
|
|
850
|
+
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
|
851
|
+
for subtree in tree.iter_subtrees():
|
|
852
|
+
subtree.children = list(self._transform_children(subtree.children))
|
|
853
|
+
|
|
854
|
+
return self._transform_tree(tree)
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]):
|
|
858
|
+
#--
|
|
859
|
+
|
|
860
|
+
def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
|
861
|
+
##
|
|
862
|
+
|
|
863
|
+
rev_postfix = []
|
|
864
|
+
q: List[Branch[_Leaf_T]] = [tree]
|
|
865
|
+
while q:
|
|
866
|
+
t = q.pop()
|
|
867
|
+
rev_postfix.append(t)
|
|
868
|
+
if isinstance(t, Tree):
|
|
869
|
+
q += t.children
|
|
870
|
+
|
|
871
|
+
##
|
|
872
|
+
|
|
873
|
+
stack: List = []
|
|
874
|
+
for x in reversed(rev_postfix):
|
|
875
|
+
if isinstance(x, Tree):
|
|
876
|
+
size = len(x.children)
|
|
877
|
+
if size:
|
|
878
|
+
args = stack[-size:]
|
|
879
|
+
del stack[-size:]
|
|
880
|
+
else:
|
|
881
|
+
args = []
|
|
882
|
+
|
|
883
|
+
res = self._call_userfunc(x, args)
|
|
884
|
+
if res is not Discard:
|
|
885
|
+
stack.append(res)
|
|
886
|
+
|
|
887
|
+
elif self.__visit_tokens__ and isinstance(x, Token):
|
|
888
|
+
res = self._call_userfunc_token(x)
|
|
889
|
+
if res is not Discard:
|
|
890
|
+
stack.append(res)
|
|
891
|
+
else:
|
|
892
|
+
stack.append(x)
|
|
893
|
+
|
|
894
|
+
(result,) = stack ##
|
|
895
|
+
|
|
896
|
+
##
|
|
897
|
+
|
|
898
|
+
##
|
|
899
|
+
|
|
900
|
+
##
|
|
901
|
+
|
|
902
|
+
return cast(_Return_T, result)
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
class Transformer_InPlaceRecursive(Transformer):
|
|
906
|
+
#--
|
|
907
|
+
|
|
908
|
+
def _transform_tree(self, tree):
|
|
909
|
+
tree.children = list(self._transform_children(tree.children))
|
|
910
|
+
return self._call_userfunc(tree)
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
##
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
class VisitorBase:
|
|
918
|
+
def _call_userfunc(self, tree):
|
|
919
|
+
return getattr(self, tree.data, self.__default__)(tree)
|
|
920
|
+
|
|
921
|
+
def __default__(self, tree):
|
|
922
|
+
#--
|
|
923
|
+
return tree
|
|
924
|
+
|
|
925
|
+
def __class_getitem__(cls, _):
|
|
926
|
+
return cls
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
|
|
930
|
+
#--
|
|
931
|
+
|
|
932
|
+
def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
|
933
|
+
#--
|
|
934
|
+
for subtree in tree.iter_subtrees():
|
|
935
|
+
self._call_userfunc(subtree)
|
|
936
|
+
return tree
|
|
937
|
+
|
|
938
|
+
def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
|
939
|
+
#--
|
|
940
|
+
for subtree in tree.iter_subtrees_topdown():
|
|
941
|
+
self._call_userfunc(subtree)
|
|
942
|
+
return tree
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
|
|
946
|
+
#--
|
|
947
|
+
|
|
948
|
+
def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
|
949
|
+
#--
|
|
950
|
+
for child in tree.children:
|
|
951
|
+
if isinstance(child, Tree):
|
|
952
|
+
self.visit(child)
|
|
953
|
+
|
|
954
|
+
self._call_userfunc(tree)
|
|
955
|
+
return tree
|
|
956
|
+
|
|
957
|
+
def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
|
|
958
|
+
#--
|
|
959
|
+
self._call_userfunc(tree)
|
|
960
|
+
|
|
961
|
+
for child in tree.children:
|
|
962
|
+
if isinstance(child, Tree):
|
|
963
|
+
self.visit_topdown(child)
|
|
964
|
+
|
|
965
|
+
return tree
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
|
|
969
|
+
#--
|
|
970
|
+
|
|
971
|
+
def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
|
|
972
|
+
##
|
|
973
|
+
|
|
974
|
+
##
|
|
975
|
+
|
|
976
|
+
##
|
|
977
|
+
|
|
978
|
+
return self._visit_tree(tree)
|
|
979
|
+
|
|
980
|
+
def _visit_tree(self, tree: Tree[_Leaf_T]):
|
|
981
|
+
f = getattr(self, tree.data)
|
|
982
|
+
wrapper = getattr(f, "visit_wrapper", None)
|
|
983
|
+
if wrapper is not None:
|
|
984
|
+
return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
|
|
985
|
+
else:
|
|
986
|
+
return f(tree)
|
|
987
|
+
|
|
988
|
+
def visit_children(self, tree: Tree[_Leaf_T]) -> List:
|
|
989
|
+
return [
|
|
990
|
+
self._visit_tree(child) if isinstance(child, Tree) else child
|
|
991
|
+
for child in tree.children
|
|
992
|
+
]
|
|
993
|
+
|
|
994
|
+
def __getattr__(self, name):
|
|
995
|
+
return self.__default__
|
|
996
|
+
|
|
997
|
+
def __default__(self, tree):
|
|
998
|
+
return self.visit_children(tree)
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
_InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
def visit_children_decor(func: _InterMethod) -> _InterMethod:
|
|
1005
|
+
#--
|
|
1006
|
+
|
|
1007
|
+
@wraps(func)
|
|
1008
|
+
def inner(cls, tree):
|
|
1009
|
+
values = cls.visit_children(tree)
|
|
1010
|
+
return func(cls, values)
|
|
1011
|
+
|
|
1012
|
+
return inner
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
##
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
def _apply_v_args(obj, visit_wrapper):
|
|
1020
|
+
try:
|
|
1021
|
+
_apply = obj._apply_v_args
|
|
1022
|
+
except AttributeError:
|
|
1023
|
+
return _VArgsWrapper(obj, visit_wrapper)
|
|
1024
|
+
else:
|
|
1025
|
+
return _apply(visit_wrapper)
|
|
1026
|
+
|
|
1027
|
+
|
|
1028
|
+
class _VArgsWrapper:
|
|
1029
|
+
#--
|
|
1030
|
+
|
|
1031
|
+
base_func: Callable
|
|
1032
|
+
|
|
1033
|
+
def __init__(
|
|
1034
|
+
self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]
|
|
1035
|
+
):
|
|
1036
|
+
if isinstance(func, _VArgsWrapper):
|
|
1037
|
+
func = func.base_func
|
|
1038
|
+
##
|
|
1039
|
+
|
|
1040
|
+
self.base_func = func ##
|
|
1041
|
+
|
|
1042
|
+
self.visit_wrapper = visit_wrapper
|
|
1043
|
+
update_wrapper(self, func)
|
|
1044
|
+
|
|
1045
|
+
def __call__(self, *args, **kwargs):
|
|
1046
|
+
return self.base_func(*args, **kwargs)
|
|
1047
|
+
|
|
1048
|
+
def __get__(self, instance, owner=None):
|
|
1049
|
+
try:
|
|
1050
|
+
##
|
|
1051
|
+
|
|
1052
|
+
##
|
|
1053
|
+
|
|
1054
|
+
g = type(self.base_func).__get__
|
|
1055
|
+
except AttributeError:
|
|
1056
|
+
return self
|
|
1057
|
+
else:
|
|
1058
|
+
return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper)
|
|
1059
|
+
|
|
1060
|
+
def __set_name__(self, owner, name):
|
|
1061
|
+
try:
|
|
1062
|
+
f = type(self.base_func).__set_name__
|
|
1063
|
+
except AttributeError:
|
|
1064
|
+
return
|
|
1065
|
+
else:
|
|
1066
|
+
f(self.base_func, owner, name)
|
|
1067
|
+
|
|
1068
|
+
|
|
1069
|
+
def _vargs_inline(f, _data, children, _meta):
|
|
1070
|
+
return f(*children)
|
|
1071
|
+
|
|
1072
|
+
|
|
1073
|
+
def _vargs_meta_inline(f, _data, children, meta):
|
|
1074
|
+
return f(meta, *children)
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
def _vargs_meta(f, _data, children, meta):
|
|
1078
|
+
return f(meta, children)
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
def _vargs_tree(f, data, children, meta):
|
|
1082
|
+
return f(Tree(data, children, meta))
|
|
1083
|
+
|
|
1084
|
+
|
|
1085
|
+
def v_args(
|
|
1086
|
+
inline: bool = False,
|
|
1087
|
+
meta: bool = False,
|
|
1088
|
+
tree: bool = False,
|
|
1089
|
+
wrapper: Optional[Callable] = None,
|
|
1090
|
+
) -> Callable[[_DECORATED], _DECORATED]:
|
|
1091
|
+
#--
|
|
1092
|
+
if tree and (meta or inline):
|
|
1093
|
+
raise ValueError(
|
|
1094
|
+
"Visitor functions cannot combine 'tree' with 'meta' or 'inline'."
|
|
1095
|
+
)
|
|
1096
|
+
|
|
1097
|
+
func = None
|
|
1098
|
+
if meta:
|
|
1099
|
+
if inline:
|
|
1100
|
+
func = _vargs_meta_inline
|
|
1101
|
+
else:
|
|
1102
|
+
func = _vargs_meta
|
|
1103
|
+
elif inline:
|
|
1104
|
+
func = _vargs_inline
|
|
1105
|
+
elif tree:
|
|
1106
|
+
func = _vargs_tree
|
|
1107
|
+
|
|
1108
|
+
if wrapper is not None:
|
|
1109
|
+
if func is not None:
|
|
1110
|
+
raise ValueError(
|
|
1111
|
+
"Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'."
|
|
1112
|
+
)
|
|
1113
|
+
func = wrapper
|
|
1114
|
+
|
|
1115
|
+
def _visitor_args_dec(obj):
|
|
1116
|
+
return _apply_v_args(obj, func)
|
|
1117
|
+
|
|
1118
|
+
return _visitor_args_dec
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
TOKEN_DEFAULT_PRIORITY = 0
|
|
1123
|
+
|
|
1124
|
+
|
|
1125
|
+
class Symbol(Serialize):
|
|
1126
|
+
__slots__ = ("name",)
|
|
1127
|
+
|
|
1128
|
+
name: str
|
|
1129
|
+
is_term: ClassVar[bool] = NotImplemented
|
|
1130
|
+
|
|
1131
|
+
def __init__(self, name: str) -> None:
|
|
1132
|
+
self.name = name
|
|
1133
|
+
|
|
1134
|
+
def __eq__(self, other):
|
|
1135
|
+
assert isinstance(other, Symbol), other
|
|
1136
|
+
return self.is_term == other.is_term and self.name == other.name
|
|
1137
|
+
|
|
1138
|
+
def __ne__(self, other):
|
|
1139
|
+
return not (self == other)
|
|
1140
|
+
|
|
1141
|
+
def __hash__(self):
|
|
1142
|
+
return hash(self.name)
|
|
1143
|
+
|
|
1144
|
+
def __repr__(self):
|
|
1145
|
+
return "%s(%r)" % (type(self).__name__, self.name)
|
|
1146
|
+
|
|
1147
|
+
fullrepr = property(__repr__)
|
|
1148
|
+
|
|
1149
|
+
def renamed(self, f):
|
|
1150
|
+
return type(self)(f(self.name))
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
class Terminal(Symbol):
|
|
1154
|
+
__serialize_fields__ = "name", "filter_out"
|
|
1155
|
+
|
|
1156
|
+
is_term: ClassVar[bool] = True
|
|
1157
|
+
|
|
1158
|
+
def __init__(self, name, filter_out=False):
|
|
1159
|
+
self.name = name
|
|
1160
|
+
self.filter_out = filter_out
|
|
1161
|
+
|
|
1162
|
+
@property
|
|
1163
|
+
def fullrepr(self):
|
|
1164
|
+
return "%s(%r, %r)" % (type(self).__name__, self.name, self.filter_out)
|
|
1165
|
+
|
|
1166
|
+
def renamed(self, f):
|
|
1167
|
+
return type(self)(f(self.name), self.filter_out)
|
|
1168
|
+
|
|
1169
|
+
|
|
1170
|
+
class NonTerminal(Symbol):
|
|
1171
|
+
__serialize_fields__ = ("name",)
|
|
1172
|
+
|
|
1173
|
+
is_term: ClassVar[bool] = False
|
|
1174
|
+
|
|
1175
|
+
|
|
1176
|
+
class RuleOptions(Serialize):
|
|
1177
|
+
__serialize_fields__ = (
|
|
1178
|
+
"keep_all_tokens",
|
|
1179
|
+
"expand1",
|
|
1180
|
+
"priority",
|
|
1181
|
+
"template_source",
|
|
1182
|
+
"empty_indices",
|
|
1183
|
+
)
|
|
1184
|
+
|
|
1185
|
+
keep_all_tokens: bool
|
|
1186
|
+
expand1: bool
|
|
1187
|
+
priority: Optional[int]
|
|
1188
|
+
template_source: Optional[str]
|
|
1189
|
+
empty_indices: Tuple[bool, ...]
|
|
1190
|
+
|
|
1191
|
+
def __init__(
|
|
1192
|
+
self,
|
|
1193
|
+
keep_all_tokens: bool = False,
|
|
1194
|
+
expand1: bool = False,
|
|
1195
|
+
priority: Optional[int] = None,
|
|
1196
|
+
template_source: Optional[str] = None,
|
|
1197
|
+
empty_indices: Tuple[bool, ...] = (),
|
|
1198
|
+
) -> None:
|
|
1199
|
+
self.keep_all_tokens = keep_all_tokens
|
|
1200
|
+
self.expand1 = expand1
|
|
1201
|
+
self.priority = priority
|
|
1202
|
+
self.template_source = template_source
|
|
1203
|
+
self.empty_indices = empty_indices
|
|
1204
|
+
|
|
1205
|
+
def __repr__(self):
|
|
1206
|
+
return "RuleOptions(%r, %r, %r, %r)" % (
|
|
1207
|
+
self.keep_all_tokens,
|
|
1208
|
+
self.expand1,
|
|
1209
|
+
self.priority,
|
|
1210
|
+
self.template_source,
|
|
1211
|
+
)
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
class Rule(Serialize):
|
|
1215
|
+
#--
|
|
1216
|
+
|
|
1217
|
+
__slots__ = ("origin", "expansion", "alias", "options", "order", "_hash")
|
|
1218
|
+
|
|
1219
|
+
__serialize_fields__ = "origin", "expansion", "order", "alias", "options"
|
|
1220
|
+
__serialize_namespace__ = Terminal, NonTerminal, RuleOptions
|
|
1221
|
+
|
|
1222
|
+
origin: NonTerminal
|
|
1223
|
+
expansion: Sequence[Symbol]
|
|
1224
|
+
order: int
|
|
1225
|
+
alias: Optional[str]
|
|
1226
|
+
options: RuleOptions
|
|
1227
|
+
_hash: int
|
|
1228
|
+
|
|
1229
|
+
def __init__(
|
|
1230
|
+
self,
|
|
1231
|
+
origin: NonTerminal,
|
|
1232
|
+
expansion: Sequence[Symbol],
|
|
1233
|
+
order: int = 0,
|
|
1234
|
+
alias: Optional[str] = None,
|
|
1235
|
+
options: Optional[RuleOptions] = None,
|
|
1236
|
+
):
|
|
1237
|
+
self.origin = origin
|
|
1238
|
+
self.expansion = expansion
|
|
1239
|
+
self.alias = alias
|
|
1240
|
+
self.order = order
|
|
1241
|
+
self.options = options or RuleOptions()
|
|
1242
|
+
self._hash = hash((self.origin, tuple(self.expansion)))
|
|
1243
|
+
|
|
1244
|
+
def _deserialize(self):
|
|
1245
|
+
self._hash = hash((self.origin, tuple(self.expansion)))
|
|
1246
|
+
|
|
1247
|
+
def __str__(self):
|
|
1248
|
+
return "<%s : %s>" % (
|
|
1249
|
+
self.origin.name,
|
|
1250
|
+
" ".join(x.name for x in self.expansion),
|
|
1251
|
+
)
|
|
1252
|
+
|
|
1253
|
+
def __repr__(self):
|
|
1254
|
+
return "Rule(%r, %r, %r, %r)" % (
|
|
1255
|
+
self.origin,
|
|
1256
|
+
self.expansion,
|
|
1257
|
+
self.alias,
|
|
1258
|
+
self.options,
|
|
1259
|
+
)
|
|
1260
|
+
|
|
1261
|
+
def __hash__(self):
|
|
1262
|
+
return self._hash
|
|
1263
|
+
|
|
1264
|
+
def __eq__(self, other):
|
|
1265
|
+
if not isinstance(other, Rule):
|
|
1266
|
+
return False
|
|
1267
|
+
return self.origin == other.origin and self.expansion == other.expansion
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
from copy import copy
|
|
1272
|
+
|
|
1273
|
+
try: ##
|
|
1274
|
+
|
|
1275
|
+
has_interegular = bool(interegular)
|
|
1276
|
+
except NameError:
|
|
1277
|
+
has_interegular = False
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
class Pattern(Serialize, ABC):
|
|
1281
|
+
#--
|
|
1282
|
+
|
|
1283
|
+
value: str
|
|
1284
|
+
flags: Collection[str]
|
|
1285
|
+
raw: Optional[str]
|
|
1286
|
+
type: ClassVar[str]
|
|
1287
|
+
|
|
1288
|
+
def __init__(
|
|
1289
|
+
self, value: str, flags: Collection[str] = (), raw: Optional[str] = None
|
|
1290
|
+
) -> None:
|
|
1291
|
+
self.value = value
|
|
1292
|
+
self.flags = frozenset(flags)
|
|
1293
|
+
self.raw = raw
|
|
1294
|
+
|
|
1295
|
+
def __repr__(self):
|
|
1296
|
+
return repr(self.to_regexp())
|
|
1297
|
+
|
|
1298
|
+
##
|
|
1299
|
+
|
|
1300
|
+
def __hash__(self):
|
|
1301
|
+
return hash((type(self), self.value, self.flags))
|
|
1302
|
+
|
|
1303
|
+
def __eq__(self, other):
|
|
1304
|
+
return (
|
|
1305
|
+
type(self) == type(other)
|
|
1306
|
+
and self.value == other.value
|
|
1307
|
+
and self.flags == other.flags
|
|
1308
|
+
)
|
|
1309
|
+
|
|
1310
|
+
@abstractmethod
|
|
1311
|
+
def to_regexp(self) -> str:
|
|
1312
|
+
raise NotImplementedError()
|
|
1313
|
+
|
|
1314
|
+
@property
|
|
1315
|
+
@abstractmethod
|
|
1316
|
+
def min_width(self) -> int:
|
|
1317
|
+
raise NotImplementedError()
|
|
1318
|
+
|
|
1319
|
+
@property
|
|
1320
|
+
@abstractmethod
|
|
1321
|
+
def max_width(self) -> int:
|
|
1322
|
+
raise NotImplementedError()
|
|
1323
|
+
|
|
1324
|
+
def _get_flags(self, value):
|
|
1325
|
+
for f in self.flags:
|
|
1326
|
+
value = "(?%s:%s)" % (f, value)
|
|
1327
|
+
return value
|
|
1328
|
+
|
|
1329
|
+
|
|
1330
|
+
class PatternStr(Pattern):
|
|
1331
|
+
__serialize_fields__ = "value", "flags", "raw"
|
|
1332
|
+
|
|
1333
|
+
type: ClassVar[str] = "str"
|
|
1334
|
+
|
|
1335
|
+
def to_regexp(self) -> str:
|
|
1336
|
+
return self._get_flags(re.escape(self.value))
|
|
1337
|
+
|
|
1338
|
+
@property
|
|
1339
|
+
def min_width(self) -> int:
|
|
1340
|
+
return len(self.value)
|
|
1341
|
+
|
|
1342
|
+
@property
|
|
1343
|
+
def max_width(self) -> int:
|
|
1344
|
+
return len(self.value)
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
class PatternRE(Pattern):
|
|
1348
|
+
__serialize_fields__ = "value", "flags", "raw", "_width"
|
|
1349
|
+
|
|
1350
|
+
type: ClassVar[str] = "re"
|
|
1351
|
+
|
|
1352
|
+
def to_regexp(self) -> str:
|
|
1353
|
+
return self._get_flags(self.value)
|
|
1354
|
+
|
|
1355
|
+
_width = None
|
|
1356
|
+
|
|
1357
|
+
def _get_width(self):
|
|
1358
|
+
if self._width is None:
|
|
1359
|
+
self._width = get_regexp_width(self.to_regexp())
|
|
1360
|
+
return self._width
|
|
1361
|
+
|
|
1362
|
+
@property
|
|
1363
|
+
def min_width(self) -> int:
|
|
1364
|
+
return self._get_width()[0]
|
|
1365
|
+
|
|
1366
|
+
@property
|
|
1367
|
+
def max_width(self) -> int:
|
|
1368
|
+
return self._get_width()[1]
|
|
1369
|
+
|
|
1370
|
+
|
|
1371
|
+
class TerminalDef(Serialize):
|
|
1372
|
+
#--
|
|
1373
|
+
__serialize_fields__ = "name", "pattern", "priority"
|
|
1374
|
+
__serialize_namespace__ = PatternStr, PatternRE
|
|
1375
|
+
|
|
1376
|
+
name: str
|
|
1377
|
+
pattern: Pattern
|
|
1378
|
+
priority: int
|
|
1379
|
+
|
|
1380
|
+
def __init__(
|
|
1381
|
+
self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY
|
|
1382
|
+
) -> None:
|
|
1383
|
+
assert isinstance(pattern, Pattern), pattern
|
|
1384
|
+
self.name = name
|
|
1385
|
+
self.pattern = pattern
|
|
1386
|
+
self.priority = priority
|
|
1387
|
+
|
|
1388
|
+
def __repr__(self):
|
|
1389
|
+
return "%s(%r, %r)" % (type(self).__name__, self.name, self.pattern)
|
|
1390
|
+
|
|
1391
|
+
def user_repr(self) -> str:
|
|
1392
|
+
if self.name.startswith("__"): ##
|
|
1393
|
+
|
|
1394
|
+
return self.pattern.raw or self.name
|
|
1395
|
+
else:
|
|
1396
|
+
return self.name
|
|
1397
|
+
|
|
1398
|
+
|
|
1399
|
+
_T = TypeVar("_T", bound="Token")
|
|
1400
|
+
|
|
1401
|
+
|
|
1402
|
+
class Token(str):
|
|
1403
|
+
#--
|
|
1404
|
+
|
|
1405
|
+
__slots__ = (
|
|
1406
|
+
"type",
|
|
1407
|
+
"start_pos",
|
|
1408
|
+
"value",
|
|
1409
|
+
"line",
|
|
1410
|
+
"column",
|
|
1411
|
+
"end_line",
|
|
1412
|
+
"end_column",
|
|
1413
|
+
"end_pos",
|
|
1414
|
+
)
|
|
1415
|
+
|
|
1416
|
+
__match_args__ = ("type", "value")
|
|
1417
|
+
|
|
1418
|
+
type: str
|
|
1419
|
+
start_pos: Optional[int]
|
|
1420
|
+
value: Any
|
|
1421
|
+
line: Optional[int]
|
|
1422
|
+
column: Optional[int]
|
|
1423
|
+
end_line: Optional[int]
|
|
1424
|
+
end_column: Optional[int]
|
|
1425
|
+
end_pos: Optional[int]
|
|
1426
|
+
|
|
1427
|
+
@overload
|
|
1428
|
+
def __new__(
|
|
1429
|
+
cls,
|
|
1430
|
+
type: str,
|
|
1431
|
+
value: Any,
|
|
1432
|
+
start_pos: Optional[int] = None,
|
|
1433
|
+
line: Optional[int] = None,
|
|
1434
|
+
column: Optional[int] = None,
|
|
1435
|
+
end_line: Optional[int] = None,
|
|
1436
|
+
end_column: Optional[int] = None,
|
|
1437
|
+
end_pos: Optional[int] = None,
|
|
1438
|
+
) -> "Token": ...
|
|
1439
|
+
|
|
1440
|
+
@overload
|
|
1441
|
+
def __new__(
|
|
1442
|
+
cls,
|
|
1443
|
+
type_: str,
|
|
1444
|
+
value: Any,
|
|
1445
|
+
start_pos: Optional[int] = None,
|
|
1446
|
+
line: Optional[int] = None,
|
|
1447
|
+
column: Optional[int] = None,
|
|
1448
|
+
end_line: Optional[int] = None,
|
|
1449
|
+
end_column: Optional[int] = None,
|
|
1450
|
+
end_pos: Optional[int] = None,
|
|
1451
|
+
) -> "Token": ...
|
|
1452
|
+
|
|
1453
|
+
def __new__(cls, *args, **kwargs):
|
|
1454
|
+
if "type_" in kwargs:
|
|
1455
|
+
warnings.warn(
|
|
1456
|
+
"`type_` is deprecated use `type` instead", DeprecationWarning
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
if "type" in kwargs:
|
|
1460
|
+
raise TypeError(
|
|
1461
|
+
"Error: using both 'type' and the deprecated 'type_' as arguments."
|
|
1462
|
+
)
|
|
1463
|
+
kwargs["type"] = kwargs.pop("type_")
|
|
1464
|
+
|
|
1465
|
+
return cls._future_new(*args, **kwargs)
|
|
1466
|
+
|
|
1467
|
+
@classmethod
|
|
1468
|
+
def _future_new(
|
|
1469
|
+
cls,
|
|
1470
|
+
type,
|
|
1471
|
+
value,
|
|
1472
|
+
start_pos=None,
|
|
1473
|
+
line=None,
|
|
1474
|
+
column=None,
|
|
1475
|
+
end_line=None,
|
|
1476
|
+
end_column=None,
|
|
1477
|
+
end_pos=None,
|
|
1478
|
+
):
|
|
1479
|
+
inst = super(Token, cls).__new__(cls, value)
|
|
1480
|
+
|
|
1481
|
+
inst.type = type
|
|
1482
|
+
inst.start_pos = start_pos
|
|
1483
|
+
inst.value = value
|
|
1484
|
+
inst.line = line
|
|
1485
|
+
inst.column = column
|
|
1486
|
+
inst.end_line = end_line
|
|
1487
|
+
inst.end_column = end_column
|
|
1488
|
+
inst.end_pos = end_pos
|
|
1489
|
+
return inst
|
|
1490
|
+
|
|
1491
|
+
@overload
|
|
1492
|
+
def update(
|
|
1493
|
+
self, type: Optional[str] = None, value: Optional[Any] = None
|
|
1494
|
+
) -> "Token": ...
|
|
1495
|
+
|
|
1496
|
+
@overload
|
|
1497
|
+
def update(
|
|
1498
|
+
self, type_: Optional[str] = None, value: Optional[Any] = None
|
|
1499
|
+
) -> "Token": ...
|
|
1500
|
+
|
|
1501
|
+
def update(self, *args, **kwargs):
|
|
1502
|
+
if "type_" in kwargs:
|
|
1503
|
+
warnings.warn(
|
|
1504
|
+
"`type_` is deprecated use `type` instead", DeprecationWarning
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
if "type" in kwargs:
|
|
1508
|
+
raise TypeError(
|
|
1509
|
+
"Error: using both 'type' and the deprecated 'type_' as arguments."
|
|
1510
|
+
)
|
|
1511
|
+
kwargs["type"] = kwargs.pop("type_")
|
|
1512
|
+
|
|
1513
|
+
return self._future_update(*args, **kwargs)
|
|
1514
|
+
|
|
1515
|
+
def _future_update(
|
|
1516
|
+
self, type: Optional[str] = None, value: Optional[Any] = None
|
|
1517
|
+
) -> "Token":
|
|
1518
|
+
return Token.new_borrow_pos(
|
|
1519
|
+
type if type is not None else self.type,
|
|
1520
|
+
value if value is not None else self.value,
|
|
1521
|
+
self,
|
|
1522
|
+
)
|
|
1523
|
+
|
|
1524
|
+
@classmethod
|
|
1525
|
+
def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: "Token") -> _T:
|
|
1526
|
+
return cls(
|
|
1527
|
+
type_,
|
|
1528
|
+
value,
|
|
1529
|
+
borrow_t.start_pos,
|
|
1530
|
+
borrow_t.line,
|
|
1531
|
+
borrow_t.column,
|
|
1532
|
+
borrow_t.end_line,
|
|
1533
|
+
borrow_t.end_column,
|
|
1534
|
+
borrow_t.end_pos,
|
|
1535
|
+
)
|
|
1536
|
+
|
|
1537
|
+
def __reduce__(self):
|
|
1538
|
+
return (
|
|
1539
|
+
self.__class__,
|
|
1540
|
+
(self.type, self.value, self.start_pos, self.line, self.column),
|
|
1541
|
+
)
|
|
1542
|
+
|
|
1543
|
+
def __repr__(self):
|
|
1544
|
+
return "Token(%r, %r)" % (self.type, self.value)
|
|
1545
|
+
|
|
1546
|
+
def __deepcopy__(self, memo):
|
|
1547
|
+
return Token(self.type, self.value, self.start_pos, self.line, self.column)
|
|
1548
|
+
|
|
1549
|
+
def __eq__(self, other):
|
|
1550
|
+
if isinstance(other, Token) and self.type != other.type:
|
|
1551
|
+
return False
|
|
1552
|
+
|
|
1553
|
+
return str.__eq__(self, other)
|
|
1554
|
+
|
|
1555
|
+
__hash__ = str.__hash__
|
|
1556
|
+
|
|
1557
|
+
|
|
1558
|
+
class LineCounter:
|
|
1559
|
+
#--
|
|
1560
|
+
|
|
1561
|
+
__slots__ = "char_pos", "line", "column", "line_start_pos", "newline_char"
|
|
1562
|
+
|
|
1563
|
+
def __init__(self, newline_char):
|
|
1564
|
+
self.newline_char = newline_char
|
|
1565
|
+
self.char_pos = 0
|
|
1566
|
+
self.line = 1
|
|
1567
|
+
self.column = 1
|
|
1568
|
+
self.line_start_pos = 0
|
|
1569
|
+
|
|
1570
|
+
def __eq__(self, other):
|
|
1571
|
+
if not isinstance(other, LineCounter):
|
|
1572
|
+
return NotImplemented
|
|
1573
|
+
|
|
1574
|
+
return (
|
|
1575
|
+
self.char_pos == other.char_pos and self.newline_char == other.newline_char
|
|
1576
|
+
)
|
|
1577
|
+
|
|
1578
|
+
def feed(self, token: Token, test_newline=True):
|
|
1579
|
+
#--
|
|
1580
|
+
if test_newline:
|
|
1581
|
+
newlines = token.count(self.newline_char)
|
|
1582
|
+
if newlines:
|
|
1583
|
+
self.line += newlines
|
|
1584
|
+
self.line_start_pos = (
|
|
1585
|
+
self.char_pos + token.rindex(self.newline_char) + 1
|
|
1586
|
+
)
|
|
1587
|
+
|
|
1588
|
+
self.char_pos += len(token)
|
|
1589
|
+
self.column = self.char_pos - self.line_start_pos + 1
|
|
1590
|
+
|
|
1591
|
+
|
|
1592
|
+
class UnlessCallback:
|
|
1593
|
+
def __init__(self, scanner):
|
|
1594
|
+
self.scanner = scanner
|
|
1595
|
+
|
|
1596
|
+
def __call__(self, t):
|
|
1597
|
+
res = self.scanner.match(t.value, 0)
|
|
1598
|
+
if res:
|
|
1599
|
+
_value, t.type = res
|
|
1600
|
+
return t
|
|
1601
|
+
|
|
1602
|
+
|
|
1603
|
+
class CallChain:
|
|
1604
|
+
def __init__(self, callback1, callback2, cond):
|
|
1605
|
+
self.callback1 = callback1
|
|
1606
|
+
self.callback2 = callback2
|
|
1607
|
+
self.cond = cond
|
|
1608
|
+
|
|
1609
|
+
def __call__(self, t):
|
|
1610
|
+
t2 = self.callback1(t)
|
|
1611
|
+
return self.callback2(t) if self.cond(t2) else t2
|
|
1612
|
+
|
|
1613
|
+
|
|
1614
|
+
def _get_match(re_, regexp, s, flags):
|
|
1615
|
+
m = re_.match(regexp, s, flags)
|
|
1616
|
+
if m:
|
|
1617
|
+
return m.group(0)
|
|
1618
|
+
|
|
1619
|
+
|
|
1620
|
+
def _create_unless(terminals, g_regex_flags, re_, use_bytes):
|
|
1621
|
+
tokens_by_type = classify(terminals, lambda t: type(t.pattern))
|
|
1622
|
+
assert len(tokens_by_type) <= 2, tokens_by_type.keys()
|
|
1623
|
+
embedded_strs = set()
|
|
1624
|
+
callback = {}
|
|
1625
|
+
for retok in tokens_by_type.get(PatternRE, []):
|
|
1626
|
+
unless = []
|
|
1627
|
+
for strtok in tokens_by_type.get(PatternStr, []):
|
|
1628
|
+
if strtok.priority != retok.priority:
|
|
1629
|
+
continue
|
|
1630
|
+
s = strtok.pattern.value
|
|
1631
|
+
if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
|
|
1632
|
+
unless.append(strtok)
|
|
1633
|
+
if strtok.pattern.flags <= retok.pattern.flags:
|
|
1634
|
+
embedded_strs.add(strtok)
|
|
1635
|
+
if unless:
|
|
1636
|
+
callback[retok.name] = UnlessCallback(
|
|
1637
|
+
Scanner(
|
|
1638
|
+
unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes
|
|
1639
|
+
)
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
new_terminals = [t for t in terminals if t not in embedded_strs]
|
|
1643
|
+
return new_terminals, callback
|
|
1644
|
+
|
|
1645
|
+
|
|
1646
|
+
class Scanner:
|
|
1647
|
+
def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
|
|
1648
|
+
self.terminals = terminals
|
|
1649
|
+
self.g_regex_flags = g_regex_flags
|
|
1650
|
+
self.re_ = re_
|
|
1651
|
+
self.use_bytes = use_bytes
|
|
1652
|
+
self.match_whole = match_whole
|
|
1653
|
+
|
|
1654
|
+
self.allowed_types = {t.name for t in self.terminals}
|
|
1655
|
+
|
|
1656
|
+
self._mres = self._build_mres(terminals, len(terminals))
|
|
1657
|
+
|
|
1658
|
+
def _build_mres(self, terminals, max_size):
|
|
1659
|
+
##
|
|
1660
|
+
|
|
1661
|
+
##
|
|
1662
|
+
|
|
1663
|
+
##
|
|
1664
|
+
|
|
1665
|
+
postfix = "$" if self.match_whole else ""
|
|
1666
|
+
mres = []
|
|
1667
|
+
while terminals:
|
|
1668
|
+
pattern = "|".join(
|
|
1669
|
+
"(?P<%s>%s)" % (t.name, t.pattern.to_regexp() + postfix)
|
|
1670
|
+
for t in terminals[:max_size]
|
|
1671
|
+
)
|
|
1672
|
+
if self.use_bytes:
|
|
1673
|
+
pattern = pattern.encode("latin-1")
|
|
1674
|
+
try:
|
|
1675
|
+
mre = self.re_.compile(pattern, self.g_regex_flags)
|
|
1676
|
+
except AssertionError: ##
|
|
1677
|
+
|
|
1678
|
+
return self._build_mres(terminals, max_size // 2)
|
|
1679
|
+
|
|
1680
|
+
mres.append(mre)
|
|
1681
|
+
terminals = terminals[max_size:]
|
|
1682
|
+
return mres
|
|
1683
|
+
|
|
1684
|
+
def match(self, text, pos):
|
|
1685
|
+
for mre in self._mres:
|
|
1686
|
+
m = mre.match(text, pos)
|
|
1687
|
+
if m:
|
|
1688
|
+
return m.group(0), m.lastgroup
|
|
1689
|
+
|
|
1690
|
+
|
|
1691
|
+
def _regexp_has_newline(r: str):
|
|
1692
|
+
#--
|
|
1693
|
+
return (
|
|
1694
|
+
"\n" in r or "\\n" in r or "\\s" in r or "[^" in r or ("(?s" in r and "." in r)
|
|
1695
|
+
)
|
|
1696
|
+
|
|
1697
|
+
|
|
1698
|
+
class LexerState:
|
|
1699
|
+
#--
|
|
1700
|
+
|
|
1701
|
+
__slots__ = "text", "line_ctr", "last_token"
|
|
1702
|
+
|
|
1703
|
+
text: str
|
|
1704
|
+
line_ctr: LineCounter
|
|
1705
|
+
last_token: Optional[Token]
|
|
1706
|
+
|
|
1707
|
+
def __init__(
|
|
1708
|
+
self,
|
|
1709
|
+
text: str,
|
|
1710
|
+
line_ctr: Optional[LineCounter] = None,
|
|
1711
|
+
last_token: Optional[Token] = None,
|
|
1712
|
+
):
|
|
1713
|
+
self.text = text
|
|
1714
|
+
self.line_ctr = line_ctr or LineCounter(
|
|
1715
|
+
b"\n" if isinstance(text, bytes) else "\n"
|
|
1716
|
+
)
|
|
1717
|
+
self.last_token = last_token
|
|
1718
|
+
|
|
1719
|
+
def __eq__(self, other):
|
|
1720
|
+
if not isinstance(other, LexerState):
|
|
1721
|
+
return NotImplemented
|
|
1722
|
+
|
|
1723
|
+
return (
|
|
1724
|
+
self.text is other.text
|
|
1725
|
+
and self.line_ctr == other.line_ctr
|
|
1726
|
+
and self.last_token == other.last_token
|
|
1727
|
+
)
|
|
1728
|
+
|
|
1729
|
+
def __copy__(self):
|
|
1730
|
+
return type(self)(self.text, copy(self.line_ctr), self.last_token)
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
class LexerThread:
|
|
1734
|
+
#--
|
|
1735
|
+
|
|
1736
|
+
def __init__(self, lexer: "Lexer", lexer_state: LexerState):
|
|
1737
|
+
self.lexer = lexer
|
|
1738
|
+
self.state = lexer_state
|
|
1739
|
+
|
|
1740
|
+
@classmethod
|
|
1741
|
+
def from_text(cls, lexer: "Lexer", text: str) -> "LexerThread":
|
|
1742
|
+
return cls(lexer, LexerState(text))
|
|
1743
|
+
|
|
1744
|
+
def lex(self, parser_state):
|
|
1745
|
+
return self.lexer.lex(self.state, parser_state)
|
|
1746
|
+
|
|
1747
|
+
def __copy__(self):
|
|
1748
|
+
return type(self)(self.lexer, copy(self.state))
|
|
1749
|
+
|
|
1750
|
+
_Token = Token
|
|
1751
|
+
|
|
1752
|
+
|
|
1753
|
+
_Callback = Callable[[Token], Token]
|
|
1754
|
+
|
|
1755
|
+
|
|
1756
|
+
class Lexer(ABC):
|
|
1757
|
+
#--
|
|
1758
|
+
|
|
1759
|
+
@abstractmethod
|
|
1760
|
+
def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
|
|
1761
|
+
return NotImplemented
|
|
1762
|
+
|
|
1763
|
+
def make_lexer_state(self, text):
|
|
1764
|
+
#--
|
|
1765
|
+
return LexerState(text)
|
|
1766
|
+
|
|
1767
|
+
|
|
1768
|
+
def _check_regex_collisions(
|
|
1769
|
+
terminal_to_regexp: Dict[TerminalDef, str],
|
|
1770
|
+
comparator,
|
|
1771
|
+
strict_mode,
|
|
1772
|
+
max_collisions_to_show=8,
|
|
1773
|
+
):
|
|
1774
|
+
if not comparator:
|
|
1775
|
+
comparator = interegular.Comparator.from_regexes(terminal_to_regexp)
|
|
1776
|
+
|
|
1777
|
+
##
|
|
1778
|
+
|
|
1779
|
+
##
|
|
1780
|
+
|
|
1781
|
+
max_time = 2 if strict_mode else 0.2
|
|
1782
|
+
|
|
1783
|
+
##
|
|
1784
|
+
|
|
1785
|
+
if comparator.count_marked_pairs() >= max_collisions_to_show:
|
|
1786
|
+
return
|
|
1787
|
+
for group in classify(terminal_to_regexp, lambda t: t.priority).values():
|
|
1788
|
+
for a, b in comparator.check(group, skip_marked=True):
|
|
1789
|
+
assert a.priority == b.priority
|
|
1790
|
+
##
|
|
1791
|
+
|
|
1792
|
+
comparator.mark(a, b)
|
|
1793
|
+
|
|
1794
|
+
##
|
|
1795
|
+
|
|
1796
|
+
message = f"Collision between Terminals {a.name} and {b.name}. "
|
|
1797
|
+
try:
|
|
1798
|
+
example = comparator.get_example_overlap(
|
|
1799
|
+
a, b, max_time
|
|
1800
|
+
).format_multiline()
|
|
1801
|
+
except ValueError:
|
|
1802
|
+
##
|
|
1803
|
+
|
|
1804
|
+
example = "No example could be found fast enough. However, the collision does still exists"
|
|
1805
|
+
if strict_mode:
|
|
1806
|
+
raise LexError(f"{message}\n{example}")
|
|
1807
|
+
logger.warning(
|
|
1808
|
+
"%s The lexer will choose between them arbitrarily.\n%s",
|
|
1809
|
+
message,
|
|
1810
|
+
example,
|
|
1811
|
+
)
|
|
1812
|
+
if comparator.count_marked_pairs() >= max_collisions_to_show:
|
|
1813
|
+
logger.warning("Found 8 regex collisions, will not check for more.")
|
|
1814
|
+
return
|
|
1815
|
+
|
|
1816
|
+
|
|
1817
|
+
class AbstractBasicLexer(Lexer):
|
|
1818
|
+
terminals_by_name: Dict[str, TerminalDef]
|
|
1819
|
+
|
|
1820
|
+
@abstractmethod
|
|
1821
|
+
def __init__(self, conf: "LexerConf", comparator=None) -> None: ...
|
|
1822
|
+
|
|
1823
|
+
@abstractmethod
|
|
1824
|
+
def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: ...
|
|
1825
|
+
|
|
1826
|
+
def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
|
|
1827
|
+
with suppress(EOFError):
|
|
1828
|
+
while True:
|
|
1829
|
+
yield self.next_token(state, parser_state)
|
|
1830
|
+
|
|
1831
|
+
|
|
1832
|
+
class BasicLexer(AbstractBasicLexer):
|
|
1833
|
+
terminals: Collection[TerminalDef]
|
|
1834
|
+
ignore_types: FrozenSet[str]
|
|
1835
|
+
newline_types: FrozenSet[str]
|
|
1836
|
+
user_callbacks: Dict[str, _Callback]
|
|
1837
|
+
callback: Dict[str, _Callback]
|
|
1838
|
+
re: ModuleType
|
|
1839
|
+
|
|
1840
|
+
def __init__(self, conf: "LexerConf", comparator=None) -> None:
|
|
1841
|
+
terminals = list(conf.terminals)
|
|
1842
|
+
assert all(isinstance(t, TerminalDef) for t in terminals), terminals
|
|
1843
|
+
|
|
1844
|
+
self.re = conf.re_module
|
|
1845
|
+
|
|
1846
|
+
if not conf.skip_validation:
|
|
1847
|
+
##
|
|
1848
|
+
|
|
1849
|
+
terminal_to_regexp = {}
|
|
1850
|
+
for t in terminals:
|
|
1851
|
+
regexp = t.pattern.to_regexp()
|
|
1852
|
+
try:
|
|
1853
|
+
self.re.compile(regexp, conf.g_regex_flags)
|
|
1854
|
+
except self.re.error:
|
|
1855
|
+
raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
|
|
1856
|
+
|
|
1857
|
+
if t.pattern.min_width == 0:
|
|
1858
|
+
raise LexError(
|
|
1859
|
+
"Lexer does not allow zero-width terminals. (%s: %s)"
|
|
1860
|
+
% (t.name, t.pattern)
|
|
1861
|
+
)
|
|
1862
|
+
if t.pattern.type == "re":
|
|
1863
|
+
terminal_to_regexp[t] = regexp
|
|
1864
|
+
|
|
1865
|
+
if not (set(conf.ignore) <= {t.name for t in terminals}):
|
|
1866
|
+
raise LexError(
|
|
1867
|
+
"Ignore terminals are not defined: %s"
|
|
1868
|
+
% (set(conf.ignore) - {t.name for t in terminals})
|
|
1869
|
+
)
|
|
1870
|
+
|
|
1871
|
+
if has_interegular:
|
|
1872
|
+
_check_regex_collisions(terminal_to_regexp, comparator, conf.strict)
|
|
1873
|
+
elif conf.strict:
|
|
1874
|
+
raise LexError(
|
|
1875
|
+
"interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`."
|
|
1876
|
+
)
|
|
1877
|
+
|
|
1878
|
+
##
|
|
1879
|
+
|
|
1880
|
+
self.newline_types = frozenset(
|
|
1881
|
+
t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())
|
|
1882
|
+
)
|
|
1883
|
+
self.ignore_types = frozenset(conf.ignore)
|
|
1884
|
+
|
|
1885
|
+
terminals.sort(
|
|
1886
|
+
key=lambda x: (
|
|
1887
|
+
-x.priority,
|
|
1888
|
+
-x.pattern.max_width,
|
|
1889
|
+
-len(x.pattern.value),
|
|
1890
|
+
x.name,
|
|
1891
|
+
)
|
|
1892
|
+
)
|
|
1893
|
+
self.terminals = terminals
|
|
1894
|
+
self.user_callbacks = conf.callbacks
|
|
1895
|
+
self.g_regex_flags = conf.g_regex_flags
|
|
1896
|
+
self.use_bytes = conf.use_bytes
|
|
1897
|
+
self.terminals_by_name = conf.terminals_by_name
|
|
1898
|
+
|
|
1899
|
+
self._scanner = None
|
|
1900
|
+
|
|
1901
|
+
def _build_scanner(self):
|
|
1902
|
+
terminals, self.callback = _create_unless(
|
|
1903
|
+
self.terminals, self.g_regex_flags, self.re, self.use_bytes
|
|
1904
|
+
)
|
|
1905
|
+
assert all(self.callback.values())
|
|
1906
|
+
|
|
1907
|
+
for type_, f in self.user_callbacks.items():
|
|
1908
|
+
if type_ in self.callback:
|
|
1909
|
+
##
|
|
1910
|
+
|
|
1911
|
+
self.callback[type_] = CallChain(
|
|
1912
|
+
self.callback[type_], f, lambda t: t.type == type_
|
|
1913
|
+
)
|
|
1914
|
+
else:
|
|
1915
|
+
self.callback[type_] = f
|
|
1916
|
+
|
|
1917
|
+
self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
|
|
1918
|
+
|
|
1919
|
+
@property
|
|
1920
|
+
def scanner(self):
|
|
1921
|
+
if self._scanner is None:
|
|
1922
|
+
self._build_scanner()
|
|
1923
|
+
return self._scanner
|
|
1924
|
+
|
|
1925
|
+
def match(self, text, pos):
|
|
1926
|
+
return self.scanner.match(text, pos)
|
|
1927
|
+
|
|
1928
|
+
def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
|
|
1929
|
+
line_ctr = lex_state.line_ctr
|
|
1930
|
+
while line_ctr.char_pos < len(lex_state.text):
|
|
1931
|
+
res = self.match(lex_state.text, line_ctr.char_pos)
|
|
1932
|
+
if not res:
|
|
1933
|
+
allowed = self.scanner.allowed_types - self.ignore_types
|
|
1934
|
+
if not allowed:
|
|
1935
|
+
allowed = {"<END-OF-FILE>"}
|
|
1936
|
+
raise UnexpectedCharacters(
|
|
1937
|
+
lex_state.text,
|
|
1938
|
+
line_ctr.char_pos,
|
|
1939
|
+
line_ctr.line,
|
|
1940
|
+
line_ctr.column,
|
|
1941
|
+
allowed=allowed,
|
|
1942
|
+
token_history=lex_state.last_token and [lex_state.last_token],
|
|
1943
|
+
state=parser_state,
|
|
1944
|
+
terminals_by_name=self.terminals_by_name,
|
|
1945
|
+
)
|
|
1946
|
+
|
|
1947
|
+
value, type_ = res
|
|
1948
|
+
|
|
1949
|
+
ignored = type_ in self.ignore_types
|
|
1950
|
+
t = None
|
|
1951
|
+
if not ignored or type_ in self.callback:
|
|
1952
|
+
t = Token(
|
|
1953
|
+
type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column
|
|
1954
|
+
)
|
|
1955
|
+
line_ctr.feed(value, type_ in self.newline_types)
|
|
1956
|
+
if t is not None:
|
|
1957
|
+
t.end_line = line_ctr.line
|
|
1958
|
+
t.end_column = line_ctr.column
|
|
1959
|
+
t.end_pos = line_ctr.char_pos
|
|
1960
|
+
if t.type in self.callback:
|
|
1961
|
+
t = self.callback[t.type](t)
|
|
1962
|
+
if not ignored:
|
|
1963
|
+
if not isinstance(t, Token):
|
|
1964
|
+
raise LexError(
|
|
1965
|
+
"Callbacks must return a token (returned %r)" % t
|
|
1966
|
+
)
|
|
1967
|
+
lex_state.last_token = t
|
|
1968
|
+
return t
|
|
1969
|
+
|
|
1970
|
+
##
|
|
1971
|
+
|
|
1972
|
+
raise EOFError(self)
|
|
1973
|
+
|
|
1974
|
+
|
|
1975
|
+
class ContextualLexer(Lexer):
|
|
1976
|
+
lexers: Dict[int, AbstractBasicLexer]
|
|
1977
|
+
root_lexer: AbstractBasicLexer
|
|
1978
|
+
|
|
1979
|
+
BasicLexer: Type[AbstractBasicLexer] = BasicLexer
|
|
1980
|
+
|
|
1981
|
+
def __init__(
|
|
1982
|
+
self,
|
|
1983
|
+
conf: "LexerConf",
|
|
1984
|
+
states: Dict[int, Collection[str]],
|
|
1985
|
+
always_accept: Collection[str] = (),
|
|
1986
|
+
) -> None:
|
|
1987
|
+
terminals = list(conf.terminals)
|
|
1988
|
+
terminals_by_name = conf.terminals_by_name
|
|
1989
|
+
|
|
1990
|
+
trad_conf = copy(conf)
|
|
1991
|
+
trad_conf.terminals = terminals
|
|
1992
|
+
|
|
1993
|
+
if has_interegular and not conf.skip_validation:
|
|
1994
|
+
comparator = interegular.Comparator.from_regexes(
|
|
1995
|
+
{t: t.pattern.to_regexp() for t in terminals}
|
|
1996
|
+
)
|
|
1997
|
+
else:
|
|
1998
|
+
comparator = None
|
|
1999
|
+
lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {}
|
|
2000
|
+
self.lexers = {}
|
|
2001
|
+
for state, accepts in states.items():
|
|
2002
|
+
key = frozenset(accepts)
|
|
2003
|
+
try:
|
|
2004
|
+
lexer = lexer_by_tokens[key]
|
|
2005
|
+
except KeyError:
|
|
2006
|
+
accepts = set(accepts) | set(conf.ignore) | set(always_accept)
|
|
2007
|
+
lexer_conf = copy(trad_conf)
|
|
2008
|
+
lexer_conf.terminals = [
|
|
2009
|
+
terminals_by_name[n] for n in accepts if n in terminals_by_name
|
|
2010
|
+
]
|
|
2011
|
+
lexer = self.BasicLexer(lexer_conf, comparator)
|
|
2012
|
+
lexer_by_tokens[key] = lexer
|
|
2013
|
+
|
|
2014
|
+
self.lexers[state] = lexer
|
|
2015
|
+
|
|
2016
|
+
assert trad_conf.terminals is terminals
|
|
2017
|
+
trad_conf.skip_validation = True ##
|
|
2018
|
+
|
|
2019
|
+
self.root_lexer = self.BasicLexer(trad_conf, comparator)
|
|
2020
|
+
|
|
2021
|
+
def lex(
|
|
2022
|
+
self, lexer_state: LexerState, parser_state: "ParserState"
|
|
2023
|
+
) -> Iterator[Token]:
|
|
2024
|
+
try:
|
|
2025
|
+
while True:
|
|
2026
|
+
lexer = self.lexers[parser_state.position]
|
|
2027
|
+
yield lexer.next_token(lexer_state, parser_state)
|
|
2028
|
+
except EOFError:
|
|
2029
|
+
pass
|
|
2030
|
+
except UnexpectedCharacters as e:
|
|
2031
|
+
##
|
|
2032
|
+
|
|
2033
|
+
##
|
|
2034
|
+
|
|
2035
|
+
try:
|
|
2036
|
+
last_token = (
|
|
2037
|
+
lexer_state.last_token
|
|
2038
|
+
) ##
|
|
2039
|
+
|
|
2040
|
+
token = self.root_lexer.next_token(lexer_state, parser_state)
|
|
2041
|
+
raise UnexpectedToken(
|
|
2042
|
+
token,
|
|
2043
|
+
e.allowed,
|
|
2044
|
+
state=parser_state,
|
|
2045
|
+
token_history=[last_token],
|
|
2046
|
+
terminals_by_name=self.root_lexer.terminals_by_name,
|
|
2047
|
+
)
|
|
2048
|
+
except UnexpectedCharacters:
|
|
2049
|
+
raise e ##
|
|
2050
|
+
|
|
2051
|
+
|
|
2052
|
+
|
|
2053
|
+
|
|
2054
|
+
|
|
2055
|
+
_ParserArgType: "TypeAlias" = 'Literal["earley", "lalr", "cyk", "auto"]'
|
|
2056
|
+
_LexerArgType: "TypeAlias" = (
|
|
2057
|
+
'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
|
|
2058
|
+
)
|
|
2059
|
+
_LexerCallback = Callable[[Token], Token]
|
|
2060
|
+
ParserCallbacks = Dict[str, Callable]
|
|
2061
|
+
|
|
2062
|
+
|
|
2063
|
+
class LexerConf(Serialize):
|
|
2064
|
+
__serialize_fields__ = (
|
|
2065
|
+
"terminals",
|
|
2066
|
+
"ignore",
|
|
2067
|
+
"g_regex_flags",
|
|
2068
|
+
"use_bytes",
|
|
2069
|
+
"lexer_type",
|
|
2070
|
+
)
|
|
2071
|
+
__serialize_namespace__ = (TerminalDef,)
|
|
2072
|
+
|
|
2073
|
+
terminals: Collection[TerminalDef]
|
|
2074
|
+
re_module: ModuleType
|
|
2075
|
+
ignore: Collection[str]
|
|
2076
|
+
postlex: "Optional[PostLex]"
|
|
2077
|
+
callbacks: Dict[str, _LexerCallback]
|
|
2078
|
+
g_regex_flags: int
|
|
2079
|
+
skip_validation: bool
|
|
2080
|
+
use_bytes: bool
|
|
2081
|
+
lexer_type: Optional[_LexerArgType]
|
|
2082
|
+
strict: bool
|
|
2083
|
+
|
|
2084
|
+
def __init__(
|
|
2085
|
+
self,
|
|
2086
|
+
terminals: Collection[TerminalDef],
|
|
2087
|
+
re_module: ModuleType,
|
|
2088
|
+
ignore: Collection[str] = (),
|
|
2089
|
+
postlex: "Optional[PostLex]" = None,
|
|
2090
|
+
callbacks: Optional[Dict[str, _LexerCallback]] = None,
|
|
2091
|
+
g_regex_flags: int = 0,
|
|
2092
|
+
skip_validation: bool = False,
|
|
2093
|
+
use_bytes: bool = False,
|
|
2094
|
+
strict: bool = False,
|
|
2095
|
+
):
|
|
2096
|
+
self.terminals = terminals
|
|
2097
|
+
self.terminals_by_name = {t.name: t for t in self.terminals}
|
|
2098
|
+
assert len(self.terminals) == len(self.terminals_by_name)
|
|
2099
|
+
self.ignore = ignore
|
|
2100
|
+
self.postlex = postlex
|
|
2101
|
+
self.callbacks = callbacks or {}
|
|
2102
|
+
self.g_regex_flags = g_regex_flags
|
|
2103
|
+
self.re_module = re_module
|
|
2104
|
+
self.skip_validation = skip_validation
|
|
2105
|
+
self.use_bytes = use_bytes
|
|
2106
|
+
self.strict = strict
|
|
2107
|
+
self.lexer_type = None
|
|
2108
|
+
|
|
2109
|
+
def _deserialize(self):
|
|
2110
|
+
self.terminals_by_name = {t.name: t for t in self.terminals}
|
|
2111
|
+
|
|
2112
|
+
def __deepcopy__(self, memo=None):
|
|
2113
|
+
return type(self)(
|
|
2114
|
+
deepcopy(self.terminals, memo),
|
|
2115
|
+
self.re_module,
|
|
2116
|
+
deepcopy(self.ignore, memo),
|
|
2117
|
+
deepcopy(self.postlex, memo),
|
|
2118
|
+
deepcopy(self.callbacks, memo),
|
|
2119
|
+
deepcopy(self.g_regex_flags, memo),
|
|
2120
|
+
deepcopy(self.skip_validation, memo),
|
|
2121
|
+
deepcopy(self.use_bytes, memo),
|
|
2122
|
+
)
|
|
2123
|
+
|
|
2124
|
+
|
|
2125
|
+
class ParserConf(Serialize):
|
|
2126
|
+
__serialize_fields__ = "rules", "start", "parser_type"
|
|
2127
|
+
|
|
2128
|
+
rules: List["Rule"]
|
|
2129
|
+
callbacks: ParserCallbacks
|
|
2130
|
+
start: List[str]
|
|
2131
|
+
parser_type: _ParserArgType
|
|
2132
|
+
|
|
2133
|
+
def __init__(
|
|
2134
|
+
self, rules: List["Rule"], callbacks: ParserCallbacks, start: List[str]
|
|
2135
|
+
):
|
|
2136
|
+
assert isinstance(start, list)
|
|
2137
|
+
self.rules = rules
|
|
2138
|
+
self.callbacks = callbacks
|
|
2139
|
+
self.start = start
|
|
2140
|
+
|
|
2141
|
+
|
|
2142
|
+
|
|
2143
|
+
from functools import partial, wraps
|
|
2144
|
+
from itertools import product
|
|
2145
|
+
|
|
2146
|
+
|
|
2147
|
+
class ExpandSingleChild:
|
|
2148
|
+
def __init__(self, node_builder):
|
|
2149
|
+
self.node_builder = node_builder
|
|
2150
|
+
|
|
2151
|
+
def __call__(self, children):
|
|
2152
|
+
if len(children) == 1:
|
|
2153
|
+
return children[0]
|
|
2154
|
+
else:
|
|
2155
|
+
return self.node_builder(children)
|
|
2156
|
+
|
|
2157
|
+
|
|
2158
|
+
class PropagatePositions:
|
|
2159
|
+
def __init__(self, node_builder, node_filter=None):
|
|
2160
|
+
self.node_builder = node_builder
|
|
2161
|
+
self.node_filter = node_filter
|
|
2162
|
+
|
|
2163
|
+
def __call__(self, children):
|
|
2164
|
+
res = self.node_builder(children)
|
|
2165
|
+
|
|
2166
|
+
if isinstance(res, Tree):
|
|
2167
|
+
##
|
|
2168
|
+
|
|
2169
|
+
##
|
|
2170
|
+
|
|
2171
|
+
##
|
|
2172
|
+
|
|
2173
|
+
##
|
|
2174
|
+
|
|
2175
|
+
|
|
2176
|
+
res_meta = res.meta
|
|
2177
|
+
|
|
2178
|
+
first_meta = self._pp_get_meta(children)
|
|
2179
|
+
if first_meta is not None:
|
|
2180
|
+
if not hasattr(res_meta, "line"):
|
|
2181
|
+
##
|
|
2182
|
+
|
|
2183
|
+
res_meta.line = getattr(
|
|
2184
|
+
first_meta, "container_line", first_meta.line
|
|
2185
|
+
)
|
|
2186
|
+
res_meta.column = getattr(
|
|
2187
|
+
first_meta, "container_column", first_meta.column
|
|
2188
|
+
)
|
|
2189
|
+
res_meta.start_pos = getattr(
|
|
2190
|
+
first_meta, "container_start_pos", first_meta.start_pos
|
|
2191
|
+
)
|
|
2192
|
+
res_meta.empty = False
|
|
2193
|
+
|
|
2194
|
+
res_meta.container_line = getattr(
|
|
2195
|
+
first_meta, "container_line", first_meta.line
|
|
2196
|
+
)
|
|
2197
|
+
res_meta.container_column = getattr(
|
|
2198
|
+
first_meta, "container_column", first_meta.column
|
|
2199
|
+
)
|
|
2200
|
+
res_meta.container_start_pos = getattr(
|
|
2201
|
+
first_meta, "container_start_pos", first_meta.start_pos
|
|
2202
|
+
)
|
|
2203
|
+
|
|
2204
|
+
last_meta = self._pp_get_meta(reversed(children))
|
|
2205
|
+
if last_meta is not None:
|
|
2206
|
+
if not hasattr(res_meta, "end_line"):
|
|
2207
|
+
res_meta.end_line = getattr(
|
|
2208
|
+
last_meta, "container_end_line", last_meta.end_line
|
|
2209
|
+
)
|
|
2210
|
+
res_meta.end_column = getattr(
|
|
2211
|
+
last_meta, "container_end_column", last_meta.end_column
|
|
2212
|
+
)
|
|
2213
|
+
res_meta.end_pos = getattr(
|
|
2214
|
+
last_meta, "container_end_pos", last_meta.end_pos
|
|
2215
|
+
)
|
|
2216
|
+
res_meta.empty = False
|
|
2217
|
+
|
|
2218
|
+
res_meta.container_end_line = getattr(
|
|
2219
|
+
last_meta, "container_end_line", last_meta.end_line
|
|
2220
|
+
)
|
|
2221
|
+
res_meta.container_end_column = getattr(
|
|
2222
|
+
last_meta, "container_end_column", last_meta.end_column
|
|
2223
|
+
)
|
|
2224
|
+
res_meta.container_end_pos = getattr(
|
|
2225
|
+
last_meta, "container_end_pos", last_meta.end_pos
|
|
2226
|
+
)
|
|
2227
|
+
|
|
2228
|
+
return res
|
|
2229
|
+
|
|
2230
|
+
def _pp_get_meta(self, children):
|
|
2231
|
+
for c in children:
|
|
2232
|
+
if self.node_filter is not None and not self.node_filter(c):
|
|
2233
|
+
continue
|
|
2234
|
+
if isinstance(c, Tree):
|
|
2235
|
+
if not c.meta.empty:
|
|
2236
|
+
return c.meta
|
|
2237
|
+
elif isinstance(c, Token):
|
|
2238
|
+
return c
|
|
2239
|
+
elif hasattr(c, "__lark_meta__"):
|
|
2240
|
+
return c.__lark_meta__()
|
|
2241
|
+
|
|
2242
|
+
|
|
2243
|
+
def make_propagate_positions(option):
|
|
2244
|
+
if callable(option):
|
|
2245
|
+
return partial(PropagatePositions, node_filter=option)
|
|
2246
|
+
elif option is True:
|
|
2247
|
+
return PropagatePositions
|
|
2248
|
+
elif option is False:
|
|
2249
|
+
return None
|
|
2250
|
+
|
|
2251
|
+
raise ConfigurationError("Invalid option for propagate_positions: %r" % option)
|
|
2252
|
+
|
|
2253
|
+
|
|
2254
|
+
class ChildFilter:
|
|
2255
|
+
def __init__(self, to_include, append_none, node_builder):
|
|
2256
|
+
self.node_builder = node_builder
|
|
2257
|
+
self.to_include = to_include
|
|
2258
|
+
self.append_none = append_none
|
|
2259
|
+
|
|
2260
|
+
def __call__(self, children):
|
|
2261
|
+
filtered = []
|
|
2262
|
+
|
|
2263
|
+
for i, to_expand, add_none in self.to_include:
|
|
2264
|
+
if add_none:
|
|
2265
|
+
filtered += [None] * add_none
|
|
2266
|
+
if to_expand:
|
|
2267
|
+
filtered += children[i].children
|
|
2268
|
+
else:
|
|
2269
|
+
filtered.append(children[i])
|
|
2270
|
+
|
|
2271
|
+
if self.append_none:
|
|
2272
|
+
filtered += [None] * self.append_none
|
|
2273
|
+
|
|
2274
|
+
return self.node_builder(filtered)
|
|
2275
|
+
|
|
2276
|
+
|
|
2277
|
+
class ChildFilterLALR(ChildFilter):
|
|
2278
|
+
#--
|
|
2279
|
+
|
|
2280
|
+
def __call__(self, children):
|
|
2281
|
+
filtered = []
|
|
2282
|
+
for i, to_expand, add_none in self.to_include:
|
|
2283
|
+
if add_none:
|
|
2284
|
+
filtered += [None] * add_none
|
|
2285
|
+
if to_expand:
|
|
2286
|
+
if filtered:
|
|
2287
|
+
filtered += children[i].children
|
|
2288
|
+
else: ##
|
|
2289
|
+
|
|
2290
|
+
filtered = children[i].children
|
|
2291
|
+
else:
|
|
2292
|
+
filtered.append(children[i])
|
|
2293
|
+
|
|
2294
|
+
if self.append_none:
|
|
2295
|
+
filtered += [None] * self.append_none
|
|
2296
|
+
|
|
2297
|
+
return self.node_builder(filtered)
|
|
2298
|
+
|
|
2299
|
+
|
|
2300
|
+
class ChildFilterLALR_NoPlaceholders(ChildFilter):
|
|
2301
|
+
#--
|
|
2302
|
+
|
|
2303
|
+
def __init__(self, to_include, node_builder):
|
|
2304
|
+
self.node_builder = node_builder
|
|
2305
|
+
self.to_include = to_include
|
|
2306
|
+
|
|
2307
|
+
def __call__(self, children):
|
|
2308
|
+
filtered = []
|
|
2309
|
+
for i, to_expand in self.to_include:
|
|
2310
|
+
if to_expand:
|
|
2311
|
+
if filtered:
|
|
2312
|
+
filtered += children[i].children
|
|
2313
|
+
else: ##
|
|
2314
|
+
|
|
2315
|
+
filtered = children[i].children
|
|
2316
|
+
else:
|
|
2317
|
+
filtered.append(children[i])
|
|
2318
|
+
return self.node_builder(filtered)
|
|
2319
|
+
|
|
2320
|
+
|
|
2321
|
+
def _should_expand(sym):
|
|
2322
|
+
return not sym.is_term and sym.name.startswith("_")
|
|
2323
|
+
|
|
2324
|
+
|
|
2325
|
+
def maybe_create_child_filter(
|
|
2326
|
+
expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]
|
|
2327
|
+
):
|
|
2328
|
+
##
|
|
2329
|
+
|
|
2330
|
+
if _empty_indices:
|
|
2331
|
+
assert _empty_indices.count(False) == len(expansion)
|
|
2332
|
+
s = "".join(str(int(b)) for b in _empty_indices)
|
|
2333
|
+
empty_indices = [len(ones) for ones in s.split("0")]
|
|
2334
|
+
assert len(empty_indices) == len(expansion) + 1, (empty_indices, len(expansion))
|
|
2335
|
+
else:
|
|
2336
|
+
empty_indices = [0] * (len(expansion) + 1)
|
|
2337
|
+
|
|
2338
|
+
to_include = []
|
|
2339
|
+
nones_to_add = 0
|
|
2340
|
+
for i, sym in enumerate(expansion):
|
|
2341
|
+
nones_to_add += empty_indices[i]
|
|
2342
|
+
if keep_all_tokens or not (sym.is_term and sym.filter_out):
|
|
2343
|
+
to_include.append((i, _should_expand(sym), nones_to_add))
|
|
2344
|
+
nones_to_add = 0
|
|
2345
|
+
|
|
2346
|
+
nones_to_add += empty_indices[len(expansion)]
|
|
2347
|
+
|
|
2348
|
+
if (
|
|
2349
|
+
_empty_indices
|
|
2350
|
+
or len(to_include) < len(expansion)
|
|
2351
|
+
or any(to_expand for i, to_expand, _ in to_include)
|
|
2352
|
+
):
|
|
2353
|
+
if _empty_indices or ambiguous:
|
|
2354
|
+
return partial(
|
|
2355
|
+
ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add
|
|
2356
|
+
)
|
|
2357
|
+
else:
|
|
2358
|
+
##
|
|
2359
|
+
|
|
2360
|
+
return partial(
|
|
2361
|
+
ChildFilterLALR_NoPlaceholders, [(i, x) for i, x, _ in to_include]
|
|
2362
|
+
)
|
|
2363
|
+
|
|
2364
|
+
|
|
2365
|
+
class AmbiguousExpander:
|
|
2366
|
+
#--
|
|
2367
|
+
|
|
2368
|
+
def __init__(self, to_expand, tree_class, node_builder):
|
|
2369
|
+
self.node_builder = node_builder
|
|
2370
|
+
self.tree_class = tree_class
|
|
2371
|
+
self.to_expand = to_expand
|
|
2372
|
+
|
|
2373
|
+
def __call__(self, children):
|
|
2374
|
+
def _is_ambig_tree(t):
|
|
2375
|
+
return hasattr(t, "data") and t.data == "_ambig"
|
|
2376
|
+
|
|
2377
|
+
##
|
|
2378
|
+
|
|
2379
|
+
##
|
|
2380
|
+
|
|
2381
|
+
##
|
|
2382
|
+
|
|
2383
|
+
##
|
|
2384
|
+
|
|
2385
|
+
ambiguous = []
|
|
2386
|
+
for i, child in enumerate(children):
|
|
2387
|
+
if _is_ambig_tree(child):
|
|
2388
|
+
if i in self.to_expand:
|
|
2389
|
+
ambiguous.append(i)
|
|
2390
|
+
|
|
2391
|
+
child.expand_kids_by_data("_ambig")
|
|
2392
|
+
|
|
2393
|
+
if not ambiguous:
|
|
2394
|
+
return self.node_builder(children)
|
|
2395
|
+
|
|
2396
|
+
expand = [
|
|
2397
|
+
child.children if i in ambiguous else (child,)
|
|
2398
|
+
for i, child in enumerate(children)
|
|
2399
|
+
]
|
|
2400
|
+
return self.tree_class(
|
|
2401
|
+
"_ambig", [self.node_builder(list(f)) for f in product(*expand)]
|
|
2402
|
+
)
|
|
2403
|
+
|
|
2404
|
+
|
|
2405
|
+
def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
|
|
2406
|
+
to_expand = [
|
|
2407
|
+
i
|
|
2408
|
+
for i, sym in enumerate(expansion)
|
|
2409
|
+
if keep_all_tokens
|
|
2410
|
+
or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))
|
|
2411
|
+
]
|
|
2412
|
+
if to_expand:
|
|
2413
|
+
return partial(AmbiguousExpander, to_expand, tree_class)
|
|
2414
|
+
|
|
2415
|
+
|
|
2416
|
+
class AmbiguousIntermediateExpander:
|
|
2417
|
+
#--
|
|
2418
|
+
|
|
2419
|
+
def __init__(self, tree_class, node_builder):
|
|
2420
|
+
self.node_builder = node_builder
|
|
2421
|
+
self.tree_class = tree_class
|
|
2422
|
+
|
|
2423
|
+
def __call__(self, children):
|
|
2424
|
+
def _is_iambig_tree(child):
|
|
2425
|
+
return hasattr(child, "data") and child.data == "_iambig"
|
|
2426
|
+
|
|
2427
|
+
def _collapse_iambig(children):
|
|
2428
|
+
#--
|
|
2429
|
+
|
|
2430
|
+
##
|
|
2431
|
+
|
|
2432
|
+
##
|
|
2433
|
+
|
|
2434
|
+
if children and _is_iambig_tree(children[0]):
|
|
2435
|
+
iambig_node = children[0]
|
|
2436
|
+
result = []
|
|
2437
|
+
for grandchild in iambig_node.children:
|
|
2438
|
+
collapsed = _collapse_iambig(grandchild.children)
|
|
2439
|
+
if collapsed:
|
|
2440
|
+
for child in collapsed:
|
|
2441
|
+
child.children += children[1:]
|
|
2442
|
+
result += collapsed
|
|
2443
|
+
else:
|
|
2444
|
+
new_tree = self.tree_class(
|
|
2445
|
+
"_inter", grandchild.children + children[1:]
|
|
2446
|
+
)
|
|
2447
|
+
result.append(new_tree)
|
|
2448
|
+
return result
|
|
2449
|
+
|
|
2450
|
+
collapsed = _collapse_iambig(children)
|
|
2451
|
+
if collapsed:
|
|
2452
|
+
processed_nodes = [self.node_builder(c.children) for c in collapsed]
|
|
2453
|
+
return self.tree_class("_ambig", processed_nodes)
|
|
2454
|
+
|
|
2455
|
+
return self.node_builder(children)
|
|
2456
|
+
|
|
2457
|
+
|
|
2458
|
+
def inplace_transformer(func):
|
|
2459
|
+
@wraps(func)
|
|
2460
|
+
def f(children):
|
|
2461
|
+
##
|
|
2462
|
+
|
|
2463
|
+
tree = Tree(func.__name__, children)
|
|
2464
|
+
return func(tree)
|
|
2465
|
+
|
|
2466
|
+
return f
|
|
2467
|
+
|
|
2468
|
+
|
|
2469
|
+
def apply_visit_wrapper(func, name, wrapper):
|
|
2470
|
+
if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
|
|
2471
|
+
raise NotImplementedError("Meta args not supported for internal transformer")
|
|
2472
|
+
|
|
2473
|
+
@wraps(func)
|
|
2474
|
+
def f(children):
|
|
2475
|
+
return wrapper(func, name, children, None)
|
|
2476
|
+
|
|
2477
|
+
return f
|
|
2478
|
+
|
|
2479
|
+
|
|
2480
|
+
class ParseTreeBuilder:
|
|
2481
|
+
def __init__(
|
|
2482
|
+
self,
|
|
2483
|
+
rules,
|
|
2484
|
+
tree_class,
|
|
2485
|
+
propagate_positions=False,
|
|
2486
|
+
ambiguous=False,
|
|
2487
|
+
maybe_placeholders=False,
|
|
2488
|
+
):
|
|
2489
|
+
self.tree_class = tree_class
|
|
2490
|
+
self.propagate_positions = propagate_positions
|
|
2491
|
+
self.ambiguous = ambiguous
|
|
2492
|
+
self.maybe_placeholders = maybe_placeholders
|
|
2493
|
+
|
|
2494
|
+
self.rule_builders = list(self._init_builders(rules))
|
|
2495
|
+
|
|
2496
|
+
def _init_builders(self, rules):
|
|
2497
|
+
propagate_positions = make_propagate_positions(self.propagate_positions)
|
|
2498
|
+
|
|
2499
|
+
for rule in rules:
|
|
2500
|
+
options = rule.options
|
|
2501
|
+
keep_all_tokens = options.keep_all_tokens
|
|
2502
|
+
expand_single_child = options.expand1
|
|
2503
|
+
|
|
2504
|
+
wrapper_chain = list(
|
|
2505
|
+
filter(
|
|
2506
|
+
None,
|
|
2507
|
+
[
|
|
2508
|
+
(expand_single_child and not rule.alias) and ExpandSingleChild,
|
|
2509
|
+
maybe_create_child_filter(
|
|
2510
|
+
rule.expansion,
|
|
2511
|
+
keep_all_tokens,
|
|
2512
|
+
self.ambiguous,
|
|
2513
|
+
options.empty_indices if self.maybe_placeholders else None,
|
|
2514
|
+
),
|
|
2515
|
+
propagate_positions,
|
|
2516
|
+
self.ambiguous
|
|
2517
|
+
and maybe_create_ambiguous_expander(
|
|
2518
|
+
self.tree_class, rule.expansion, keep_all_tokens
|
|
2519
|
+
),
|
|
2520
|
+
self.ambiguous
|
|
2521
|
+
and partial(AmbiguousIntermediateExpander, self.tree_class),
|
|
2522
|
+
],
|
|
2523
|
+
)
|
|
2524
|
+
)
|
|
2525
|
+
|
|
2526
|
+
yield rule, wrapper_chain
|
|
2527
|
+
|
|
2528
|
+
def create_callback(self, transformer=None):
|
|
2529
|
+
callbacks = {}
|
|
2530
|
+
|
|
2531
|
+
default_handler = getattr(transformer, "__default__", None)
|
|
2532
|
+
if default_handler:
|
|
2533
|
+
|
|
2534
|
+
def default_callback(data, children):
|
|
2535
|
+
return default_handler(data, children, None)
|
|
2536
|
+
|
|
2537
|
+
else:
|
|
2538
|
+
default_callback = self.tree_class
|
|
2539
|
+
|
|
2540
|
+
for rule, wrapper_chain in self.rule_builders:
|
|
2541
|
+
|
|
2542
|
+
user_callback_name = (
|
|
2543
|
+
rule.alias or rule.options.template_source or rule.origin.name
|
|
2544
|
+
)
|
|
2545
|
+
try:
|
|
2546
|
+
f = getattr(transformer, user_callback_name)
|
|
2547
|
+
wrapper = getattr(f, "visit_wrapper", None)
|
|
2548
|
+
if wrapper is not None:
|
|
2549
|
+
f = apply_visit_wrapper(f, user_callback_name, wrapper)
|
|
2550
|
+
elif isinstance(transformer, Transformer_InPlace):
|
|
2551
|
+
f = inplace_transformer(f)
|
|
2552
|
+
except AttributeError:
|
|
2553
|
+
f = partial(default_callback, user_callback_name)
|
|
2554
|
+
|
|
2555
|
+
for w in wrapper_chain:
|
|
2556
|
+
f = w(f)
|
|
2557
|
+
|
|
2558
|
+
if rule in callbacks:
|
|
2559
|
+
raise GrammarError("Rule '%s' already exists" % (rule,))
|
|
2560
|
+
|
|
2561
|
+
callbacks[rule] = f
|
|
2562
|
+
|
|
2563
|
+
return callbacks
|
|
2564
|
+
|
|
2565
|
+
|
|
2566
|
+
|
|
2567
|
+
|
|
2568
|
+
|
|
2569
|
+
class Action:
|
|
2570
|
+
def __init__(self, name):
|
|
2571
|
+
self.name = name
|
|
2572
|
+
|
|
2573
|
+
def __str__(self):
|
|
2574
|
+
return self.name
|
|
2575
|
+
|
|
2576
|
+
def __repr__(self):
|
|
2577
|
+
return str(self)
|
|
2578
|
+
|
|
2579
|
+
|
|
2580
|
+
Shift = Action("Shift")
|
|
2581
|
+
Reduce = Action("Reduce")
|
|
2582
|
+
|
|
2583
|
+
StateT = TypeVar("StateT")
|
|
2584
|
+
|
|
2585
|
+
|
|
2586
|
+
class ParseTableBase(Generic[StateT]):
|
|
2587
|
+
states: Dict[StateT, Dict[str, Tuple]]
|
|
2588
|
+
start_states: Dict[str, StateT]
|
|
2589
|
+
end_states: Dict[str, StateT]
|
|
2590
|
+
|
|
2591
|
+
def __init__(self, states, start_states, end_states):
|
|
2592
|
+
self.states = states
|
|
2593
|
+
self.start_states = start_states
|
|
2594
|
+
self.end_states = end_states
|
|
2595
|
+
|
|
2596
|
+
def serialize(self, memo):
|
|
2597
|
+
tokens = Enumerator()
|
|
2598
|
+
|
|
2599
|
+
states = {
|
|
2600
|
+
state: {
|
|
2601
|
+
tokens.get(token): (
|
|
2602
|
+
(1, arg.serialize(memo)) if action is Reduce else (0, arg)
|
|
2603
|
+
)
|
|
2604
|
+
for token, (action, arg) in actions.items()
|
|
2605
|
+
}
|
|
2606
|
+
for state, actions in self.states.items()
|
|
2607
|
+
}
|
|
2608
|
+
|
|
2609
|
+
return {
|
|
2610
|
+
"tokens": tokens.reversed(),
|
|
2611
|
+
"states": states,
|
|
2612
|
+
"start_states": self.start_states,
|
|
2613
|
+
"end_states": self.end_states,
|
|
2614
|
+
}
|
|
2615
|
+
|
|
2616
|
+
@classmethod
|
|
2617
|
+
def deserialize(cls, data, memo):
|
|
2618
|
+
tokens = data["tokens"]
|
|
2619
|
+
states = {
|
|
2620
|
+
state: {
|
|
2621
|
+
tokens[token]: (
|
|
2622
|
+
(Reduce, Rule.deserialize(arg, memo))
|
|
2623
|
+
if action == 1
|
|
2624
|
+
else (Shift, arg)
|
|
2625
|
+
)
|
|
2626
|
+
for token, (action, arg) in actions.items()
|
|
2627
|
+
}
|
|
2628
|
+
for state, actions in data["states"].items()
|
|
2629
|
+
}
|
|
2630
|
+
return cls(states, data["start_states"], data["end_states"])
|
|
2631
|
+
|
|
2632
|
+
|
|
2633
|
+
class ParseTable(ParseTableBase["State"]):
|
|
2634
|
+
#--
|
|
2635
|
+
|
|
2636
|
+
pass
|
|
2637
|
+
|
|
2638
|
+
|
|
2639
|
+
class IntParseTable(ParseTableBase[int]):
|
|
2640
|
+
#--
|
|
2641
|
+
|
|
2642
|
+
@classmethod
|
|
2643
|
+
def from_ParseTable(cls, parse_table: ParseTable):
|
|
2644
|
+
enum = list(parse_table.states)
|
|
2645
|
+
state_to_idx: Dict["State", int] = {s: i for i, s in enumerate(enum)}
|
|
2646
|
+
int_states = {}
|
|
2647
|
+
|
|
2648
|
+
for s, la in parse_table.states.items():
|
|
2649
|
+
la = {
|
|
2650
|
+
k: (v[0], state_to_idx[v[1]]) if v[0] is Shift else v
|
|
2651
|
+
for k, v in la.items()
|
|
2652
|
+
}
|
|
2653
|
+
int_states[state_to_idx[s]] = la
|
|
2654
|
+
|
|
2655
|
+
start_states = {
|
|
2656
|
+
start: state_to_idx[s] for start, s in parse_table.start_states.items()
|
|
2657
|
+
}
|
|
2658
|
+
end_states = {
|
|
2659
|
+
start: state_to_idx[s] for start, s in parse_table.end_states.items()
|
|
2660
|
+
}
|
|
2661
|
+
return cls(int_states, start_states, end_states)
|
|
2662
|
+
|
|
2663
|
+
|
|
2664
|
+
|
|
2665
|
+
|
|
2666
|
+
|
|
2667
|
+
class ParseConf(Generic[StateT]):
|
|
2668
|
+
__slots__ = (
|
|
2669
|
+
"parse_table",
|
|
2670
|
+
"callbacks",
|
|
2671
|
+
"start",
|
|
2672
|
+
"start_state",
|
|
2673
|
+
"end_state",
|
|
2674
|
+
"states",
|
|
2675
|
+
)
|
|
2676
|
+
|
|
2677
|
+
parse_table: ParseTableBase[StateT]
|
|
2678
|
+
callbacks: ParserCallbacks
|
|
2679
|
+
start: str
|
|
2680
|
+
|
|
2681
|
+
start_state: StateT
|
|
2682
|
+
end_state: StateT
|
|
2683
|
+
states: Dict[StateT, Dict[str, tuple]]
|
|
2684
|
+
|
|
2685
|
+
def __init__(
|
|
2686
|
+
self,
|
|
2687
|
+
parse_table: ParseTableBase[StateT],
|
|
2688
|
+
callbacks: ParserCallbacks,
|
|
2689
|
+
start: str,
|
|
2690
|
+
):
|
|
2691
|
+
self.parse_table = parse_table
|
|
2692
|
+
|
|
2693
|
+
self.start_state = self.parse_table.start_states[start]
|
|
2694
|
+
self.end_state = self.parse_table.end_states[start]
|
|
2695
|
+
self.states = self.parse_table.states
|
|
2696
|
+
|
|
2697
|
+
self.callbacks = callbacks
|
|
2698
|
+
self.start = start
|
|
2699
|
+
|
|
2700
|
+
|
|
2701
|
+
class ParserState(Generic[StateT]):
|
|
2702
|
+
__slots__ = "parse_conf", "lexer", "state_stack", "value_stack"
|
|
2703
|
+
|
|
2704
|
+
parse_conf: ParseConf[StateT]
|
|
2705
|
+
lexer: LexerThread
|
|
2706
|
+
state_stack: List[StateT]
|
|
2707
|
+
value_stack: list
|
|
2708
|
+
|
|
2709
|
+
def __init__(
|
|
2710
|
+
self,
|
|
2711
|
+
parse_conf: ParseConf[StateT],
|
|
2712
|
+
lexer: LexerThread,
|
|
2713
|
+
state_stack=None,
|
|
2714
|
+
value_stack=None,
|
|
2715
|
+
):
|
|
2716
|
+
self.parse_conf = parse_conf
|
|
2717
|
+
self.lexer = lexer
|
|
2718
|
+
self.state_stack = state_stack or [self.parse_conf.start_state]
|
|
2719
|
+
self.value_stack = value_stack or []
|
|
2720
|
+
|
|
2721
|
+
@property
|
|
2722
|
+
def position(self) -> StateT:
|
|
2723
|
+
return self.state_stack[-1]
|
|
2724
|
+
|
|
2725
|
+
##
|
|
2726
|
+
|
|
2727
|
+
def __eq__(self, other) -> bool:
|
|
2728
|
+
if not isinstance(other, ParserState):
|
|
2729
|
+
return NotImplemented
|
|
2730
|
+
return (
|
|
2731
|
+
len(self.state_stack) == len(other.state_stack)
|
|
2732
|
+
and self.position == other.position
|
|
2733
|
+
)
|
|
2734
|
+
|
|
2735
|
+
def __copy__(self):
|
|
2736
|
+
return type(self)(
|
|
2737
|
+
self.parse_conf,
|
|
2738
|
+
self.lexer, ##
|
|
2739
|
+
|
|
2740
|
+
copy(self.state_stack),
|
|
2741
|
+
deepcopy(self.value_stack),
|
|
2742
|
+
)
|
|
2743
|
+
|
|
2744
|
+
def copy(self) -> "ParserState[StateT]":
|
|
2745
|
+
return copy(self)
|
|
2746
|
+
|
|
2747
|
+
def feed_token(self, token: Token, is_end=False) -> Any:
|
|
2748
|
+
state_stack = self.state_stack
|
|
2749
|
+
value_stack = self.value_stack
|
|
2750
|
+
states = self.parse_conf.states
|
|
2751
|
+
end_state = self.parse_conf.end_state
|
|
2752
|
+
callbacks = self.parse_conf.callbacks
|
|
2753
|
+
|
|
2754
|
+
while True:
|
|
2755
|
+
state = state_stack[-1]
|
|
2756
|
+
try:
|
|
2757
|
+
action, arg = states[state][token.type]
|
|
2758
|
+
except KeyError:
|
|
2759
|
+
expected = {s for s in states[state].keys() if s.isupper()}
|
|
2760
|
+
raise UnexpectedToken(
|
|
2761
|
+
token, expected, state=self, interactive_parser=None
|
|
2762
|
+
)
|
|
2763
|
+
|
|
2764
|
+
assert arg != end_state
|
|
2765
|
+
|
|
2766
|
+
if action is Shift:
|
|
2767
|
+
##
|
|
2768
|
+
|
|
2769
|
+
assert not is_end
|
|
2770
|
+
state_stack.append(arg)
|
|
2771
|
+
value_stack.append(
|
|
2772
|
+
token
|
|
2773
|
+
if token.type not in callbacks
|
|
2774
|
+
else callbacks[token.type](token)
|
|
2775
|
+
)
|
|
2776
|
+
return
|
|
2777
|
+
else:
|
|
2778
|
+
##
|
|
2779
|
+
|
|
2780
|
+
rule = arg
|
|
2781
|
+
size = len(rule.expansion)
|
|
2782
|
+
if size:
|
|
2783
|
+
s = value_stack[-size:]
|
|
2784
|
+
del state_stack[-size:]
|
|
2785
|
+
del value_stack[-size:]
|
|
2786
|
+
else:
|
|
2787
|
+
s = []
|
|
2788
|
+
|
|
2789
|
+
value = callbacks[rule](s) if callbacks else s
|
|
2790
|
+
|
|
2791
|
+
_action, new_state = states[state_stack[-1]][rule.origin.name]
|
|
2792
|
+
assert _action is Shift
|
|
2793
|
+
state_stack.append(new_state)
|
|
2794
|
+
value_stack.append(value)
|
|
2795
|
+
|
|
2796
|
+
if is_end and state_stack[-1] == end_state:
|
|
2797
|
+
return value_stack[-1]
|
|
2798
|
+
|
|
2799
|
+
|
|
2800
|
+
|
|
2801
|
+
|
|
2802
|
+
|
|
2803
|
+
class LALR_Parser(Serialize):
|
|
2804
|
+
def __init__(
|
|
2805
|
+
self, parser_conf: ParserConf, debug: bool = False, strict: bool = False
|
|
2806
|
+
):
|
|
2807
|
+
analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
|
|
2808
|
+
analysis.compute_lalr()
|
|
2809
|
+
callbacks = parser_conf.callbacks
|
|
2810
|
+
|
|
2811
|
+
self._parse_table = analysis.parse_table
|
|
2812
|
+
self.parser_conf = parser_conf
|
|
2813
|
+
self.parser = _Parser(analysis.parse_table, callbacks, debug)
|
|
2814
|
+
|
|
2815
|
+
@classmethod
|
|
2816
|
+
def deserialize(cls, data, memo, callbacks, debug=False):
|
|
2817
|
+
inst = cls.__new__(cls)
|
|
2818
|
+
inst._parse_table = IntParseTable.deserialize(data, memo)
|
|
2819
|
+
inst.parser = _Parser(inst._parse_table, callbacks, debug)
|
|
2820
|
+
return inst
|
|
2821
|
+
|
|
2822
|
+
def serialize(self, memo: Any = None) -> Dict[str, Any]:
|
|
2823
|
+
return self._parse_table.serialize(memo)
|
|
2824
|
+
|
|
2825
|
+
def parse_interactive(self, lexer: LexerThread, start: str):
|
|
2826
|
+
return self.parser.parse(lexer, start, start_interactive=True)
|
|
2827
|
+
|
|
2828
|
+
def parse(self, lexer, start, on_error=None):
|
|
2829
|
+
try:
|
|
2830
|
+
return self.parser.parse(lexer, start)
|
|
2831
|
+
except UnexpectedInput as e:
|
|
2832
|
+
if on_error is None:
|
|
2833
|
+
raise
|
|
2834
|
+
|
|
2835
|
+
while True:
|
|
2836
|
+
if isinstance(e, UnexpectedCharacters):
|
|
2837
|
+
s = e.interactive_parser.lexer_thread.state
|
|
2838
|
+
p = s.line_ctr.char_pos
|
|
2839
|
+
|
|
2840
|
+
if not on_error(e):
|
|
2841
|
+
raise e
|
|
2842
|
+
|
|
2843
|
+
if isinstance(e, UnexpectedCharacters):
|
|
2844
|
+
##
|
|
2845
|
+
|
|
2846
|
+
if p == s.line_ctr.char_pos:
|
|
2847
|
+
s.line_ctr.feed(s.text[p : p + 1])
|
|
2848
|
+
|
|
2849
|
+
try:
|
|
2850
|
+
return e.interactive_parser.resume_parse()
|
|
2851
|
+
except UnexpectedToken as e2:
|
|
2852
|
+
if (
|
|
2853
|
+
isinstance(e, UnexpectedToken)
|
|
2854
|
+
and e.token.type == e2.token.type == "$END"
|
|
2855
|
+
and e.interactive_parser == e2.interactive_parser
|
|
2856
|
+
):
|
|
2857
|
+
##
|
|
2858
|
+
|
|
2859
|
+
raise e2
|
|
2860
|
+
e = e2
|
|
2861
|
+
except UnexpectedCharacters as e2:
|
|
2862
|
+
e = e2
|
|
2863
|
+
|
|
2864
|
+
|
|
2865
|
+
class _Parser:
|
|
2866
|
+
parse_table: ParseTableBase
|
|
2867
|
+
callbacks: ParserCallbacks
|
|
2868
|
+
debug: bool
|
|
2869
|
+
|
|
2870
|
+
def __init__(
|
|
2871
|
+
self,
|
|
2872
|
+
parse_table: ParseTableBase,
|
|
2873
|
+
callbacks: ParserCallbacks,
|
|
2874
|
+
debug: bool = False,
|
|
2875
|
+
):
|
|
2876
|
+
self.parse_table = parse_table
|
|
2877
|
+
self.callbacks = callbacks
|
|
2878
|
+
self.debug = debug
|
|
2879
|
+
|
|
2880
|
+
def parse(
|
|
2881
|
+
self,
|
|
2882
|
+
lexer: LexerThread,
|
|
2883
|
+
start: str,
|
|
2884
|
+
value_stack=None,
|
|
2885
|
+
state_stack=None,
|
|
2886
|
+
start_interactive=False,
|
|
2887
|
+
):
|
|
2888
|
+
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
|
|
2889
|
+
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
|
|
2890
|
+
if start_interactive:
|
|
2891
|
+
return InteractiveParser(self, parser_state, parser_state.lexer)
|
|
2892
|
+
return self.parse_from_state(parser_state)
|
|
2893
|
+
|
|
2894
|
+
def parse_from_state(self, state: ParserState, last_token: Optional[Token] = None):
|
|
2895
|
+
#--
|
|
2896
|
+
try:
|
|
2897
|
+
token = last_token
|
|
2898
|
+
for token in state.lexer.lex(state):
|
|
2899
|
+
assert token is not None
|
|
2900
|
+
state.feed_token(token)
|
|
2901
|
+
|
|
2902
|
+
end_token = (
|
|
2903
|
+
Token.new_borrow_pos("$END", "", token)
|
|
2904
|
+
if token
|
|
2905
|
+
else Token("$END", "", 0, 1, 1)
|
|
2906
|
+
)
|
|
2907
|
+
return state.feed_token(end_token, True)
|
|
2908
|
+
except UnexpectedInput as e:
|
|
2909
|
+
try:
|
|
2910
|
+
e.interactive_parser = InteractiveParser(self, state, state.lexer)
|
|
2911
|
+
except NameError:
|
|
2912
|
+
pass
|
|
2913
|
+
raise e
|
|
2914
|
+
except Exception as e:
|
|
2915
|
+
if self.debug:
|
|
2916
|
+
print("")
|
|
2917
|
+
print("STATE STACK DUMP")
|
|
2918
|
+
print("----------------")
|
|
2919
|
+
for i, s in enumerate(state.state_stack):
|
|
2920
|
+
print("%d)" % i, s)
|
|
2921
|
+
print("")
|
|
2922
|
+
|
|
2923
|
+
raise
|
|
2924
|
+
|
|
2925
|
+
|
|
2926
|
+
|
|
2927
|
+
|
|
2928
|
+
|
|
2929
|
+
class InteractiveParser:
|
|
2930
|
+
#--
|
|
2931
|
+
|
|
2932
|
+
def __init__(self, parser, parser_state, lexer_thread: LexerThread):
|
|
2933
|
+
self.parser = parser
|
|
2934
|
+
self.parser_state = parser_state
|
|
2935
|
+
self.lexer_thread = lexer_thread
|
|
2936
|
+
self.result = None
|
|
2937
|
+
|
|
2938
|
+
@property
|
|
2939
|
+
def lexer_state(self) -> LexerThread:
|
|
2940
|
+
warnings.warn(
|
|
2941
|
+
"lexer_state will be removed in subsequent releases. Use lexer_thread instead.",
|
|
2942
|
+
DeprecationWarning,
|
|
2943
|
+
)
|
|
2944
|
+
return self.lexer_thread
|
|
2945
|
+
|
|
2946
|
+
def feed_token(self, token: Token):
|
|
2947
|
+
#--
|
|
2948
|
+
return self.parser_state.feed_token(token, token.type == "$END")
|
|
2949
|
+
|
|
2950
|
+
def iter_parse(self) -> Iterator[Token]:
|
|
2951
|
+
#--
|
|
2952
|
+
for token in self.lexer_thread.lex(self.parser_state):
|
|
2953
|
+
yield token
|
|
2954
|
+
self.result = self.feed_token(token)
|
|
2955
|
+
|
|
2956
|
+
def exhaust_lexer(self) -> List[Token]:
|
|
2957
|
+
#--
|
|
2958
|
+
return list(self.iter_parse())
|
|
2959
|
+
|
|
2960
|
+
def feed_eof(self, last_token=None):
|
|
2961
|
+
#--
|
|
2962
|
+
eof = (
|
|
2963
|
+
Token.new_borrow_pos("$END", "", last_token)
|
|
2964
|
+
if last_token is not None
|
|
2965
|
+
else self.lexer_thread._Token("$END", "", 0, 1, 1)
|
|
2966
|
+
)
|
|
2967
|
+
return self.feed_token(eof)
|
|
2968
|
+
|
|
2969
|
+
def __copy__(self):
|
|
2970
|
+
#--
|
|
2971
|
+
return type(self)(
|
|
2972
|
+
self.parser,
|
|
2973
|
+
copy(self.parser_state),
|
|
2974
|
+
copy(self.lexer_thread),
|
|
2975
|
+
)
|
|
2976
|
+
|
|
2977
|
+
def copy(self):
|
|
2978
|
+
return copy(self)
|
|
2979
|
+
|
|
2980
|
+
def __eq__(self, other):
|
|
2981
|
+
if not isinstance(other, InteractiveParser):
|
|
2982
|
+
return False
|
|
2983
|
+
|
|
2984
|
+
return (
|
|
2985
|
+
self.parser_state == other.parser_state
|
|
2986
|
+
and self.lexer_thread == other.lexer_thread
|
|
2987
|
+
)
|
|
2988
|
+
|
|
2989
|
+
def as_immutable(self):
|
|
2990
|
+
#--
|
|
2991
|
+
p = copy(self)
|
|
2992
|
+
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
|
2993
|
+
|
|
2994
|
+
def pretty(self):
|
|
2995
|
+
#--
|
|
2996
|
+
out = ["Parser choices:"]
|
|
2997
|
+
for k, v in self.choices().items():
|
|
2998
|
+
out.append("\t- %s -> %r" % (k, v))
|
|
2999
|
+
out.append("stack size: %s" % len(self.parser_state.state_stack))
|
|
3000
|
+
return "\n".join(out)
|
|
3001
|
+
|
|
3002
|
+
def choices(self):
|
|
3003
|
+
#--
|
|
3004
|
+
return self.parser_state.parse_conf.parse_table.states[
|
|
3005
|
+
self.parser_state.position
|
|
3006
|
+
]
|
|
3007
|
+
|
|
3008
|
+
def accepts(self):
|
|
3009
|
+
#--
|
|
3010
|
+
accepts = set()
|
|
3011
|
+
conf_no_callbacks = copy(self.parser_state.parse_conf)
|
|
3012
|
+
##
|
|
3013
|
+
|
|
3014
|
+
##
|
|
3015
|
+
|
|
3016
|
+
conf_no_callbacks.callbacks = {}
|
|
3017
|
+
for t in self.choices():
|
|
3018
|
+
if t.isupper(): ##
|
|
3019
|
+
|
|
3020
|
+
new_cursor = copy(self)
|
|
3021
|
+
new_cursor.parser_state.parse_conf = conf_no_callbacks
|
|
3022
|
+
try:
|
|
3023
|
+
new_cursor.feed_token(self.lexer_thread._Token(t, ""))
|
|
3024
|
+
except UnexpectedToken:
|
|
3025
|
+
pass
|
|
3026
|
+
else:
|
|
3027
|
+
accepts.add(t)
|
|
3028
|
+
return accepts
|
|
3029
|
+
|
|
3030
|
+
def resume_parse(self):
|
|
3031
|
+
#--
|
|
3032
|
+
return self.parser.parse_from_state(
|
|
3033
|
+
self.parser_state, last_token=self.lexer_thread.state.last_token
|
|
3034
|
+
)
|
|
3035
|
+
|
|
3036
|
+
|
|
3037
|
+
class ImmutableInteractiveParser(InteractiveParser):
|
|
3038
|
+
#--
|
|
3039
|
+
|
|
3040
|
+
result = None
|
|
3041
|
+
|
|
3042
|
+
def __hash__(self):
|
|
3043
|
+
return hash((self.parser_state, self.lexer_thread))
|
|
3044
|
+
|
|
3045
|
+
def feed_token(self, token):
|
|
3046
|
+
c = copy(self)
|
|
3047
|
+
c.result = InteractiveParser.feed_token(c, token)
|
|
3048
|
+
return c
|
|
3049
|
+
|
|
3050
|
+
def exhaust_lexer(self):
|
|
3051
|
+
#--
|
|
3052
|
+
cursor = self.as_mutable()
|
|
3053
|
+
cursor.exhaust_lexer()
|
|
3054
|
+
return cursor.as_immutable()
|
|
3055
|
+
|
|
3056
|
+
def as_mutable(self):
|
|
3057
|
+
#--
|
|
3058
|
+
p = copy(self)
|
|
3059
|
+
return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
|
3060
|
+
|
|
3061
|
+
|
|
3062
|
+
|
|
3063
|
+
|
|
3064
|
+
|
|
3065
|
+
def _wrap_lexer(lexer_class):
|
|
3066
|
+
future_interface = getattr(lexer_class, "__future_interface__", False)
|
|
3067
|
+
if future_interface:
|
|
3068
|
+
return lexer_class
|
|
3069
|
+
else:
|
|
3070
|
+
|
|
3071
|
+
class CustomLexerWrapper(Lexer):
|
|
3072
|
+
def __init__(self, lexer_conf):
|
|
3073
|
+
self.lexer = lexer_class(lexer_conf)
|
|
3074
|
+
|
|
3075
|
+
def lex(self, lexer_state, parser_state):
|
|
3076
|
+
return self.lexer.lex(lexer_state.text)
|
|
3077
|
+
|
|
3078
|
+
return CustomLexerWrapper
|
|
3079
|
+
|
|
3080
|
+
|
|
3081
|
+
def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
|
|
3082
|
+
parser_conf = ParserConf.deserialize(data["parser_conf"], memo)
|
|
3083
|
+
cls = (options and options._plugins.get("LALR_Parser")) or LALR_Parser
|
|
3084
|
+
parser = cls.deserialize(data["parser"], memo, callbacks, options.debug)
|
|
3085
|
+
parser_conf.callbacks = callbacks
|
|
3086
|
+
return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
|
|
3087
|
+
|
|
3088
|
+
|
|
3089
|
+
_parser_creators: "Dict[str, Callable[[LexerConf, Any, Any], Any]]" = {}
|
|
3090
|
+
|
|
3091
|
+
|
|
3092
|
+
class ParsingFrontend(Serialize):
|
|
3093
|
+
__serialize_fields__ = "lexer_conf", "parser_conf", "parser"
|
|
3094
|
+
|
|
3095
|
+
lexer_conf: LexerConf
|
|
3096
|
+
parser_conf: ParserConf
|
|
3097
|
+
options: Any
|
|
3098
|
+
|
|
3099
|
+
def __init__(
|
|
3100
|
+
self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None
|
|
3101
|
+
):
|
|
3102
|
+
self.parser_conf = parser_conf
|
|
3103
|
+
self.lexer_conf = lexer_conf
|
|
3104
|
+
self.options = options
|
|
3105
|
+
|
|
3106
|
+
##
|
|
3107
|
+
|
|
3108
|
+
if parser: ##
|
|
3109
|
+
|
|
3110
|
+
self.parser = parser
|
|
3111
|
+
else:
|
|
3112
|
+
create_parser = _parser_creators.get(parser_conf.parser_type)
|
|
3113
|
+
assert (
|
|
3114
|
+
create_parser is not None
|
|
3115
|
+
), "{} is not supported in standalone mode".format(parser_conf.parser_type)
|
|
3116
|
+
self.parser = create_parser(lexer_conf, parser_conf, options)
|
|
3117
|
+
|
|
3118
|
+
##
|
|
3119
|
+
|
|
3120
|
+
lexer_type = lexer_conf.lexer_type
|
|
3121
|
+
self.skip_lexer = False
|
|
3122
|
+
if lexer_type in ("dynamic", "dynamic_complete"):
|
|
3123
|
+
assert lexer_conf.postlex is None
|
|
3124
|
+
self.skip_lexer = True
|
|
3125
|
+
return
|
|
3126
|
+
|
|
3127
|
+
if isinstance(lexer_type, type):
|
|
3128
|
+
assert issubclass(lexer_type, Lexer)
|
|
3129
|
+
self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
|
|
3130
|
+
elif isinstance(lexer_type, str):
|
|
3131
|
+
create_lexer = {
|
|
3132
|
+
"basic": create_basic_lexer,
|
|
3133
|
+
"contextual": create_contextual_lexer,
|
|
3134
|
+
}[lexer_type]
|
|
3135
|
+
self.lexer = create_lexer(
|
|
3136
|
+
lexer_conf, self.parser, lexer_conf.postlex, options
|
|
3137
|
+
)
|
|
3138
|
+
else:
|
|
3139
|
+
raise TypeError("Bad value for lexer_type: {lexer_type}")
|
|
3140
|
+
|
|
3141
|
+
if lexer_conf.postlex:
|
|
3142
|
+
self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
|
|
3143
|
+
|
|
3144
|
+
def _verify_start(self, start=None):
|
|
3145
|
+
if start is None:
|
|
3146
|
+
start_decls = self.parser_conf.start
|
|
3147
|
+
if len(start_decls) > 1:
|
|
3148
|
+
raise ConfigurationError(
|
|
3149
|
+
"Lark initialized with more than 1 possible start rule. Must specify which start rule to parse",
|
|
3150
|
+
start_decls,
|
|
3151
|
+
)
|
|
3152
|
+
(start,) = start_decls
|
|
3153
|
+
elif start not in self.parser_conf.start:
|
|
3154
|
+
raise ConfigurationError(
|
|
3155
|
+
"Unknown start rule %s. Must be one of %r"
|
|
3156
|
+
% (start, self.parser_conf.start)
|
|
3157
|
+
)
|
|
3158
|
+
return start
|
|
3159
|
+
|
|
3160
|
+
def _make_lexer_thread(self, text: str) -> Union[str, LexerThread]:
|
|
3161
|
+
cls = (self.options and self.options._plugins.get("LexerThread")) or LexerThread
|
|
3162
|
+
return text if self.skip_lexer else cls.from_text(self.lexer, text)
|
|
3163
|
+
|
|
3164
|
+
def parse(self, text: str, start=None, on_error=None):
|
|
3165
|
+
chosen_start = self._verify_start(start)
|
|
3166
|
+
kw = {} if on_error is None else {"on_error": on_error}
|
|
3167
|
+
stream = self._make_lexer_thread(text)
|
|
3168
|
+
return self.parser.parse(stream, chosen_start, **kw)
|
|
3169
|
+
|
|
3170
|
+
def parse_interactive(self, text: Optional[str] = None, start=None):
|
|
3171
|
+
##
|
|
3172
|
+
|
|
3173
|
+
##
|
|
3174
|
+
|
|
3175
|
+
chosen_start = self._verify_start(start)
|
|
3176
|
+
if self.parser_conf.parser_type != "lalr":
|
|
3177
|
+
raise ConfigurationError(
|
|
3178
|
+
"parse_interactive() currently only works with parser='lalr' "
|
|
3179
|
+
)
|
|
3180
|
+
stream = self._make_lexer_thread(text) ##
|
|
3181
|
+
|
|
3182
|
+
return self.parser.parse_interactive(stream, chosen_start)
|
|
3183
|
+
|
|
3184
|
+
|
|
3185
|
+
def _validate_frontend_args(parser, lexer) -> None:
|
|
3186
|
+
assert_config(parser, ("lalr", "earley", "cyk"))
|
|
3187
|
+
if not isinstance(lexer, type): ##
|
|
3188
|
+
|
|
3189
|
+
expected = {
|
|
3190
|
+
"lalr": ("basic", "contextual"),
|
|
3191
|
+
"earley": ("basic", "dynamic", "dynamic_complete"),
|
|
3192
|
+
"cyk": ("basic",),
|
|
3193
|
+
}[parser]
|
|
3194
|
+
assert_config(
|
|
3195
|
+
lexer,
|
|
3196
|
+
expected,
|
|
3197
|
+
"Parser %r does not support lexer %%r, expected one of %%s" % parser,
|
|
3198
|
+
)
|
|
3199
|
+
|
|
3200
|
+
|
|
3201
|
+
def _get_lexer_callbacks(transformer, terminals):
|
|
3202
|
+
result = {}
|
|
3203
|
+
for terminal in terminals:
|
|
3204
|
+
callback = getattr(transformer, terminal.name, None)
|
|
3205
|
+
if callback is not None:
|
|
3206
|
+
result[terminal.name] = callback
|
|
3207
|
+
return result
|
|
3208
|
+
|
|
3209
|
+
|
|
3210
|
+
class PostLexConnector:
|
|
3211
|
+
def __init__(self, lexer, postlexer):
|
|
3212
|
+
self.lexer = lexer
|
|
3213
|
+
self.postlexer = postlexer
|
|
3214
|
+
|
|
3215
|
+
def lex(self, lexer_state, parser_state):
|
|
3216
|
+
i = self.lexer.lex(lexer_state, parser_state)
|
|
3217
|
+
return self.postlexer.process(i)
|
|
3218
|
+
|
|
3219
|
+
|
|
3220
|
+
def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
|
|
3221
|
+
cls = (options and options._plugins.get("BasicLexer")) or BasicLexer
|
|
3222
|
+
return cls(lexer_conf)
|
|
3223
|
+
|
|
3224
|
+
|
|
3225
|
+
def create_contextual_lexer(
|
|
3226
|
+
lexer_conf: LexerConf, parser, postlex, options
|
|
3227
|
+
) -> ContextualLexer:
|
|
3228
|
+
cls = (options and options._plugins.get("ContextualLexer")) or ContextualLexer
|
|
3229
|
+
parse_table: ParseTableBase[int] = parser._parse_table
|
|
3230
|
+
states: Dict[int, Collection[str]] = {
|
|
3231
|
+
idx: list(t.keys()) for idx, t in parse_table.states.items()
|
|
3232
|
+
}
|
|
3233
|
+
always_accept: Collection[str] = postlex.always_accept if postlex else ()
|
|
3234
|
+
return cls(lexer_conf, states, always_accept=always_accept)
|
|
3235
|
+
|
|
3236
|
+
|
|
3237
|
+
def create_lalr_parser(
|
|
3238
|
+
lexer_conf: LexerConf, parser_conf: ParserConf, options=None
|
|
3239
|
+
) -> LALR_Parser:
|
|
3240
|
+
debug = options.debug if options else False
|
|
3241
|
+
strict = options.strict if options else False
|
|
3242
|
+
cls = (options and options._plugins.get("LALR_Parser")) or LALR_Parser
|
|
3243
|
+
return cls(parser_conf, debug=debug, strict=strict)
|
|
3244
|
+
|
|
3245
|
+
|
|
3246
|
+
_parser_creators["lalr"] = create_lalr_parser
|
|
3247
|
+
|
|
3248
|
+
|
|
3249
|
+
|
|
3250
|
+
|
|
3251
|
+
class PostLex(ABC):
|
|
3252
|
+
@abstractmethod
|
|
3253
|
+
def process(self, stream: Iterator[Token]) -> Iterator[Token]:
|
|
3254
|
+
return stream
|
|
3255
|
+
|
|
3256
|
+
always_accept: Iterable[str] = ()
|
|
3257
|
+
|
|
3258
|
+
|
|
3259
|
+
class LarkOptions(Serialize):
|
|
3260
|
+
#--
|
|
3261
|
+
|
|
3262
|
+
start: List[str]
|
|
3263
|
+
debug: bool
|
|
3264
|
+
strict: bool
|
|
3265
|
+
transformer: "Optional[Transformer]"
|
|
3266
|
+
propagate_positions: Union[bool, str]
|
|
3267
|
+
maybe_placeholders: bool
|
|
3268
|
+
cache: Union[bool, str]
|
|
3269
|
+
regex: bool
|
|
3270
|
+
g_regex_flags: int
|
|
3271
|
+
keep_all_tokens: bool
|
|
3272
|
+
tree_class: Optional[Callable[[str, List], Any]]
|
|
3273
|
+
parser: _ParserArgType
|
|
3274
|
+
lexer: _LexerArgType
|
|
3275
|
+
ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
|
|
3276
|
+
postlex: Optional[PostLex]
|
|
3277
|
+
priority: 'Optional[Literal["auto", "normal", "invert"]]'
|
|
3278
|
+
lexer_callbacks: Dict[str, Callable[[Token], Token]]
|
|
3279
|
+
use_bytes: bool
|
|
3280
|
+
ordered_sets: bool
|
|
3281
|
+
edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
|
|
3282
|
+
import_paths: "List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]"
|
|
3283
|
+
source_path: Optional[str]
|
|
3284
|
+
|
|
3285
|
+
OPTIONS_DOC = r"""
|
|
3286
|
+
**=== General Options ===**
|
|
3287
|
+
|
|
3288
|
+
start
|
|
3289
|
+
The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
|
|
3290
|
+
debug
|
|
3291
|
+
Display debug information and extra warnings. Use only when debugging (Default: ``False``)
|
|
3292
|
+
When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
|
|
3293
|
+
strict
|
|
3294
|
+
Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
|
|
3295
|
+
transformer
|
|
3296
|
+
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
|
|
3297
|
+
propagate_positions
|
|
3298
|
+
Propagates positional attributes into the 'meta' attribute of all tree branches.
|
|
3299
|
+
Sets attributes: (line, column, end_line, end_column, start_pos, end_pos,
|
|
3300
|
+
container_line, container_column, container_end_line, container_end_column)
|
|
3301
|
+
Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
|
|
3302
|
+
maybe_placeholders
|
|
3303
|
+
When ``True``, the ``[]`` operator returns ``None`` when not matched.
|
|
3304
|
+
When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
|
|
3305
|
+
(default= ``True``)
|
|
3306
|
+
cache
|
|
3307
|
+
Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
|
|
3308
|
+
|
|
3309
|
+
- When ``False``, does nothing (default)
|
|
3310
|
+
- When ``True``, caches to a temporary file in the local directory
|
|
3311
|
+
- When given a string, caches to the path pointed by the string
|
|
3312
|
+
regex
|
|
3313
|
+
When True, uses the ``regex`` module instead of the stdlib ``re``.
|
|
3314
|
+
g_regex_flags
|
|
3315
|
+
Flags that are applied to all terminals (both regex and strings)
|
|
3316
|
+
keep_all_tokens
|
|
3317
|
+
Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
|
|
3318
|
+
tree_class
|
|
3319
|
+
Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
|
|
3320
|
+
|
|
3321
|
+
**=== Algorithm Options ===**
|
|
3322
|
+
|
|
3323
|
+
parser
|
|
3324
|
+
Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
|
|
3325
|
+
(there is also a "cyk" option for legacy)
|
|
3326
|
+
lexer
|
|
3327
|
+
Decides whether or not to use a lexer stage
|
|
3328
|
+
|
|
3329
|
+
- "auto" (default): Choose for me based on the parser
|
|
3330
|
+
- "basic": Use a basic lexer
|
|
3331
|
+
- "contextual": Stronger lexer (only works with parser="lalr")
|
|
3332
|
+
- "dynamic": Flexible and powerful (only with parser="earley")
|
|
3333
|
+
- "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
|
|
3334
|
+
ambiguity
|
|
3335
|
+
Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
|
|
3336
|
+
|
|
3337
|
+
- "resolve": The parser will automatically choose the simplest derivation
|
|
3338
|
+
(it chooses consistently: greedy for tokens, non-greedy for rules)
|
|
3339
|
+
- "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
|
|
3340
|
+
- "forest": The parser will return the root of the shared packed parse forest.
|
|
3341
|
+
|
|
3342
|
+
**=== Misc. / Domain Specific Options ===**
|
|
3343
|
+
|
|
3344
|
+
postlex
|
|
3345
|
+
Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
|
|
3346
|
+
priority
|
|
3347
|
+
How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
|
|
3348
|
+
lexer_callbacks
|
|
3349
|
+
Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
|
|
3350
|
+
use_bytes
|
|
3351
|
+
Accept an input of type ``bytes`` instead of ``str``.
|
|
3352
|
+
ordered_sets
|
|
3353
|
+
Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True)
|
|
3354
|
+
edit_terminals
|
|
3355
|
+
A callback for editing the terminals before parse.
|
|
3356
|
+
import_paths
|
|
3357
|
+
A List of either paths or loader functions to specify from where grammars are imported
|
|
3358
|
+
source_path
|
|
3359
|
+
Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
|
|
3360
|
+
**=== End of Options ===**
|
|
3361
|
+
"""
|
|
3362
|
+
if __doc__:
|
|
3363
|
+
__doc__ += OPTIONS_DOC
|
|
3364
|
+
|
|
3365
|
+
##
|
|
3366
|
+
|
|
3367
|
+
##
|
|
3368
|
+
|
|
3369
|
+
##
|
|
3370
|
+
|
|
3371
|
+
##
|
|
3372
|
+
|
|
3373
|
+
##
|
|
3374
|
+
|
|
3375
|
+
##
|
|
3376
|
+
|
|
3377
|
+
_defaults: Dict[str, Any] = {
|
|
3378
|
+
"debug": False,
|
|
3379
|
+
"strict": False,
|
|
3380
|
+
"keep_all_tokens": False,
|
|
3381
|
+
"tree_class": None,
|
|
3382
|
+
"cache": False,
|
|
3383
|
+
"postlex": None,
|
|
3384
|
+
"parser": "earley",
|
|
3385
|
+
"lexer": "auto",
|
|
3386
|
+
"transformer": None,
|
|
3387
|
+
"start": "start",
|
|
3388
|
+
"priority": "auto",
|
|
3389
|
+
"ambiguity": "auto",
|
|
3390
|
+
"regex": False,
|
|
3391
|
+
"propagate_positions": False,
|
|
3392
|
+
"lexer_callbacks": {},
|
|
3393
|
+
"maybe_placeholders": True,
|
|
3394
|
+
"edit_terminals": None,
|
|
3395
|
+
"g_regex_flags": 0,
|
|
3396
|
+
"use_bytes": False,
|
|
3397
|
+
"ordered_sets": True,
|
|
3398
|
+
"import_paths": [],
|
|
3399
|
+
"source_path": None,
|
|
3400
|
+
"_plugins": {},
|
|
3401
|
+
}
|
|
3402
|
+
|
|
3403
|
+
def __init__(self, options_dict: Dict[str, Any]) -> None:
|
|
3404
|
+
o = dict(options_dict)
|
|
3405
|
+
|
|
3406
|
+
options = {}
|
|
3407
|
+
for name, default in self._defaults.items():
|
|
3408
|
+
if name in o:
|
|
3409
|
+
value = o.pop(name)
|
|
3410
|
+
if isinstance(default, bool) and name not in (
|
|
3411
|
+
"cache",
|
|
3412
|
+
"use_bytes",
|
|
3413
|
+
"propagate_positions",
|
|
3414
|
+
):
|
|
3415
|
+
value = bool(value)
|
|
3416
|
+
else:
|
|
3417
|
+
value = default
|
|
3418
|
+
|
|
3419
|
+
options[name] = value
|
|
3420
|
+
|
|
3421
|
+
if isinstance(options["start"], str):
|
|
3422
|
+
options["start"] = [options["start"]]
|
|
3423
|
+
|
|
3424
|
+
self.__dict__["options"] = options
|
|
3425
|
+
|
|
3426
|
+
assert_config(self.parser, ("earley", "lalr", "cyk", None))
|
|
3427
|
+
|
|
3428
|
+
if self.parser == "earley" and self.transformer:
|
|
3429
|
+
raise ConfigurationError(
|
|
3430
|
+
"Cannot specify an embedded transformer when using the Earley algorithm. "
|
|
3431
|
+
"Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)"
|
|
3432
|
+
)
|
|
3433
|
+
|
|
3434
|
+
if o:
|
|
3435
|
+
raise ConfigurationError("Unknown options: %s" % o.keys())
|
|
3436
|
+
|
|
3437
|
+
def __getattr__(self, name: str) -> Any:
|
|
3438
|
+
try:
|
|
3439
|
+
return self.__dict__["options"][name]
|
|
3440
|
+
except KeyError as e:
|
|
3441
|
+
raise AttributeError(e)
|
|
3442
|
+
|
|
3443
|
+
def __setattr__(self, name: str, value: str) -> None:
|
|
3444
|
+
assert_config(
|
|
3445
|
+
name, self.options.keys(), "%r isn't a valid option. Expected one of: %s"
|
|
3446
|
+
)
|
|
3447
|
+
self.options[name] = value
|
|
3448
|
+
|
|
3449
|
+
def serialize(self, memo=None) -> Dict[str, Any]:
|
|
3450
|
+
return self.options
|
|
3451
|
+
|
|
3452
|
+
@classmethod
|
|
3453
|
+
def deserialize(
|
|
3454
|
+
cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]
|
|
3455
|
+
) -> "LarkOptions":
|
|
3456
|
+
return cls(data)
|
|
3457
|
+
|
|
3458
|
+
|
|
3459
|
+
##
|
|
3460
|
+
|
|
3461
|
+
##
|
|
3462
|
+
|
|
3463
|
+
_LOAD_ALLOWED_OPTIONS = {
|
|
3464
|
+
"postlex",
|
|
3465
|
+
"transformer",
|
|
3466
|
+
"lexer_callbacks",
|
|
3467
|
+
"use_bytes",
|
|
3468
|
+
"debug",
|
|
3469
|
+
"g_regex_flags",
|
|
3470
|
+
"regex",
|
|
3471
|
+
"propagate_positions",
|
|
3472
|
+
"tree_class",
|
|
3473
|
+
"_plugins",
|
|
3474
|
+
}
|
|
3475
|
+
|
|
3476
|
+
_VALID_PRIORITY_OPTIONS = ("auto", "normal", "invert", None)
|
|
3477
|
+
_VALID_AMBIGUITY_OPTIONS = ("auto", "resolve", "explicit", "forest")
|
|
3478
|
+
|
|
3479
|
+
|
|
3480
|
+
_T = TypeVar("_T", bound="Lark")
|
|
3481
|
+
|
|
3482
|
+
|
|
3483
|
+
class Lark(Serialize):
|
|
3484
|
+
#--
|
|
3485
|
+
|
|
3486
|
+
source_path: str
|
|
3487
|
+
source_grammar: str
|
|
3488
|
+
grammar: "Grammar"
|
|
3489
|
+
options: LarkOptions
|
|
3490
|
+
lexer: Lexer
|
|
3491
|
+
parser: "ParsingFrontend"
|
|
3492
|
+
terminals: Collection[TerminalDef]
|
|
3493
|
+
|
|
3494
|
+
def __init__(self, grammar: "Union[Grammar, str, IO[str]]", **options) -> None:
|
|
3495
|
+
self.options = LarkOptions(options)
|
|
3496
|
+
re_module: types.ModuleType
|
|
3497
|
+
|
|
3498
|
+
##
|
|
3499
|
+
|
|
3500
|
+
use_regex = self.options.regex
|
|
3501
|
+
if use_regex:
|
|
3502
|
+
if _has_regex:
|
|
3503
|
+
re_module = regex
|
|
3504
|
+
else:
|
|
3505
|
+
raise ImportError(
|
|
3506
|
+
"`regex` module must be installed if calling `Lark(regex=True)`."
|
|
3507
|
+
)
|
|
3508
|
+
else:
|
|
3509
|
+
re_module = re
|
|
3510
|
+
|
|
3511
|
+
##
|
|
3512
|
+
|
|
3513
|
+
if self.options.source_path is None:
|
|
3514
|
+
try:
|
|
3515
|
+
self.source_path = grammar.name ##
|
|
3516
|
+
|
|
3517
|
+
except AttributeError:
|
|
3518
|
+
self.source_path = "<string>"
|
|
3519
|
+
else:
|
|
3520
|
+
self.source_path = self.options.source_path
|
|
3521
|
+
|
|
3522
|
+
##
|
|
3523
|
+
|
|
3524
|
+
try:
|
|
3525
|
+
read = grammar.read ##
|
|
3526
|
+
|
|
3527
|
+
except AttributeError:
|
|
3528
|
+
pass
|
|
3529
|
+
else:
|
|
3530
|
+
grammar = read()
|
|
3531
|
+
|
|
3532
|
+
cache_fn = None
|
|
3533
|
+
cache_sha256 = None
|
|
3534
|
+
if isinstance(grammar, str):
|
|
3535
|
+
self.source_grammar = grammar
|
|
3536
|
+
if self.options.use_bytes:
|
|
3537
|
+
if not isascii(grammar):
|
|
3538
|
+
raise ConfigurationError(
|
|
3539
|
+
"Grammar must be ascii only, when use_bytes=True"
|
|
3540
|
+
)
|
|
3541
|
+
|
|
3542
|
+
if self.options.cache:
|
|
3543
|
+
if self.options.parser != "lalr":
|
|
3544
|
+
raise ConfigurationError(
|
|
3545
|
+
"cache only works with parser='lalr' for now"
|
|
3546
|
+
)
|
|
3547
|
+
|
|
3548
|
+
unhashable = (
|
|
3549
|
+
"transformer",
|
|
3550
|
+
"postlex",
|
|
3551
|
+
"lexer_callbacks",
|
|
3552
|
+
"edit_terminals",
|
|
3553
|
+
"_plugins",
|
|
3554
|
+
)
|
|
3555
|
+
options_str = "".join(
|
|
3556
|
+
k + str(v) for k, v in options.items() if k not in unhashable
|
|
3557
|
+
)
|
|
3558
|
+
from . import __version__
|
|
3559
|
+
|
|
3560
|
+
s = grammar + options_str + __version__ + str(sys.version_info[:2])
|
|
3561
|
+
cache_sha256 = sha256_digest(s)
|
|
3562
|
+
|
|
3563
|
+
if isinstance(self.options.cache, str):
|
|
3564
|
+
cache_fn = self.options.cache
|
|
3565
|
+
else:
|
|
3566
|
+
if self.options.cache is not True:
|
|
3567
|
+
raise ConfigurationError("cache argument must be bool or str")
|
|
3568
|
+
|
|
3569
|
+
try:
|
|
3570
|
+
username = getpass.getuser()
|
|
3571
|
+
except Exception:
|
|
3572
|
+
##
|
|
3573
|
+
|
|
3574
|
+
##
|
|
3575
|
+
|
|
3576
|
+
##
|
|
3577
|
+
|
|
3578
|
+
username = "unknown"
|
|
3579
|
+
|
|
3580
|
+
cache_fn = (
|
|
3581
|
+
tempfile.gettempdir()
|
|
3582
|
+
+ "/.lark_cache_%s_%s_%s_%s.tmp"
|
|
3583
|
+
% (username, cache_sha256, *sys.version_info[:2])
|
|
3584
|
+
)
|
|
3585
|
+
|
|
3586
|
+
old_options = self.options
|
|
3587
|
+
try:
|
|
3588
|
+
with FS.open(cache_fn, "rb") as f:
|
|
3589
|
+
logger.debug("Loading grammar from cache: %s", cache_fn)
|
|
3590
|
+
##
|
|
3591
|
+
|
|
3592
|
+
for name in set(options) - _LOAD_ALLOWED_OPTIONS:
|
|
3593
|
+
del options[name]
|
|
3594
|
+
file_sha256 = f.readline().rstrip(b"\n")
|
|
3595
|
+
cached_used_files = pickle.load(f)
|
|
3596
|
+
if file_sha256 == cache_sha256.encode(
|
|
3597
|
+
"utf8"
|
|
3598
|
+
) and verify_used_files(cached_used_files):
|
|
3599
|
+
cached_parser_data = pickle.load(f)
|
|
3600
|
+
self._load(cached_parser_data, **options)
|
|
3601
|
+
return
|
|
3602
|
+
except FileNotFoundError:
|
|
3603
|
+
##
|
|
3604
|
+
|
|
3605
|
+
pass
|
|
3606
|
+
except (
|
|
3607
|
+
Exception
|
|
3608
|
+
): ##
|
|
3609
|
+
|
|
3610
|
+
logger.exception(
|
|
3611
|
+
"Failed to load Lark from cache: %r. We will try to carry on.",
|
|
3612
|
+
cache_fn,
|
|
3613
|
+
)
|
|
3614
|
+
|
|
3615
|
+
##
|
|
3616
|
+
|
|
3617
|
+
##
|
|
3618
|
+
|
|
3619
|
+
self.options = old_options
|
|
3620
|
+
|
|
3621
|
+
##
|
|
3622
|
+
|
|
3623
|
+
self.grammar, used_files = load_grammar(
|
|
3624
|
+
grammar,
|
|
3625
|
+
self.source_path,
|
|
3626
|
+
self.options.import_paths,
|
|
3627
|
+
self.options.keep_all_tokens,
|
|
3628
|
+
)
|
|
3629
|
+
else:
|
|
3630
|
+
assert isinstance(grammar, Grammar)
|
|
3631
|
+
self.grammar = grammar
|
|
3632
|
+
|
|
3633
|
+
if self.options.lexer == "auto":
|
|
3634
|
+
if self.options.parser == "lalr":
|
|
3635
|
+
self.options.lexer = "contextual"
|
|
3636
|
+
elif self.options.parser == "earley":
|
|
3637
|
+
if self.options.postlex is not None:
|
|
3638
|
+
logger.info(
|
|
3639
|
+
"postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
|
|
3640
|
+
"Consider using lalr with contextual instead of earley"
|
|
3641
|
+
)
|
|
3642
|
+
self.options.lexer = "basic"
|
|
3643
|
+
else:
|
|
3644
|
+
self.options.lexer = "dynamic"
|
|
3645
|
+
elif self.options.parser == "cyk":
|
|
3646
|
+
self.options.lexer = "basic"
|
|
3647
|
+
else:
|
|
3648
|
+
assert False, self.options.parser
|
|
3649
|
+
lexer = self.options.lexer
|
|
3650
|
+
if isinstance(lexer, type):
|
|
3651
|
+
assert issubclass(
|
|
3652
|
+
lexer, Lexer
|
|
3653
|
+
) ##
|
|
3654
|
+
|
|
3655
|
+
else:
|
|
3656
|
+
assert_config(lexer, ("basic", "contextual", "dynamic", "dynamic_complete"))
|
|
3657
|
+
if self.options.postlex is not None and "dynamic" in lexer:
|
|
3658
|
+
raise ConfigurationError(
|
|
3659
|
+
"Can't use postlex with a dynamic lexer. Use basic or contextual instead"
|
|
3660
|
+
)
|
|
3661
|
+
|
|
3662
|
+
if self.options.ambiguity == "auto":
|
|
3663
|
+
if self.options.parser == "earley":
|
|
3664
|
+
self.options.ambiguity = "resolve"
|
|
3665
|
+
else:
|
|
3666
|
+
assert_config(
|
|
3667
|
+
self.options.parser,
|
|
3668
|
+
("earley", "cyk"),
|
|
3669
|
+
"%r doesn't support disambiguation. Use one of these parsers instead: %s",
|
|
3670
|
+
)
|
|
3671
|
+
|
|
3672
|
+
if self.options.priority == "auto":
|
|
3673
|
+
self.options.priority = "normal"
|
|
3674
|
+
|
|
3675
|
+
if self.options.priority not in _VALID_PRIORITY_OPTIONS:
|
|
3676
|
+
raise ConfigurationError(
|
|
3677
|
+
"invalid priority option: %r. Must be one of %r"
|
|
3678
|
+
% (self.options.priority, _VALID_PRIORITY_OPTIONS)
|
|
3679
|
+
)
|
|
3680
|
+
if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
|
|
3681
|
+
raise ConfigurationError(
|
|
3682
|
+
"invalid ambiguity option: %r. Must be one of %r"
|
|
3683
|
+
% (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)
|
|
3684
|
+
)
|
|
3685
|
+
|
|
3686
|
+
if self.options.parser is None:
|
|
3687
|
+
terminals_to_keep = "*"
|
|
3688
|
+
elif self.options.postlex is not None:
|
|
3689
|
+
terminals_to_keep = set(self.options.postlex.always_accept)
|
|
3690
|
+
else:
|
|
3691
|
+
terminals_to_keep = set()
|
|
3692
|
+
|
|
3693
|
+
##
|
|
3694
|
+
|
|
3695
|
+
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(
|
|
3696
|
+
self.options.start, terminals_to_keep
|
|
3697
|
+
)
|
|
3698
|
+
|
|
3699
|
+
if self.options.edit_terminals:
|
|
3700
|
+
for t in self.terminals:
|
|
3701
|
+
self.options.edit_terminals(t)
|
|
3702
|
+
|
|
3703
|
+
self._terminals_dict = {t.name: t for t in self.terminals}
|
|
3704
|
+
|
|
3705
|
+
##
|
|
3706
|
+
|
|
3707
|
+
if self.options.priority == "invert":
|
|
3708
|
+
for rule in self.rules:
|
|
3709
|
+
if rule.options.priority is not None:
|
|
3710
|
+
rule.options.priority = -rule.options.priority
|
|
3711
|
+
for term in self.terminals:
|
|
3712
|
+
term.priority = -term.priority
|
|
3713
|
+
##
|
|
3714
|
+
|
|
3715
|
+
##
|
|
3716
|
+
|
|
3717
|
+
##
|
|
3718
|
+
|
|
3719
|
+
elif self.options.priority is None:
|
|
3720
|
+
for rule in self.rules:
|
|
3721
|
+
if rule.options.priority is not None:
|
|
3722
|
+
rule.options.priority = None
|
|
3723
|
+
for term in self.terminals:
|
|
3724
|
+
term.priority = 0
|
|
3725
|
+
|
|
3726
|
+
##
|
|
3727
|
+
|
|
3728
|
+
self.lexer_conf = LexerConf(
|
|
3729
|
+
self.terminals,
|
|
3730
|
+
re_module,
|
|
3731
|
+
self.ignore_tokens,
|
|
3732
|
+
self.options.postlex,
|
|
3733
|
+
self.options.lexer_callbacks,
|
|
3734
|
+
self.options.g_regex_flags,
|
|
3735
|
+
use_bytes=self.options.use_bytes,
|
|
3736
|
+
strict=self.options.strict,
|
|
3737
|
+
)
|
|
3738
|
+
|
|
3739
|
+
if self.options.parser:
|
|
3740
|
+
self.parser = self._build_parser()
|
|
3741
|
+
elif lexer:
|
|
3742
|
+
self.lexer = self._build_lexer()
|
|
3743
|
+
|
|
3744
|
+
if cache_fn:
|
|
3745
|
+
logger.debug("Saving grammar to cache: %s", cache_fn)
|
|
3746
|
+
try:
|
|
3747
|
+
with FS.open(cache_fn, "wb") as f:
|
|
3748
|
+
assert cache_sha256 is not None
|
|
3749
|
+
f.write(cache_sha256.encode("utf8") + b"\n")
|
|
3750
|
+
pickle.dump(used_files, f)
|
|
3751
|
+
self.save(f, _LOAD_ALLOWED_OPTIONS)
|
|
3752
|
+
except IOError as e:
|
|
3753
|
+
logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
|
|
3754
|
+
|
|
3755
|
+
if __doc__:
|
|
3756
|
+
__doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
|
|
3757
|
+
|
|
3758
|
+
__serialize_fields__ = "parser", "rules", "options"
|
|
3759
|
+
|
|
3760
|
+
def _build_lexer(self, dont_ignore: bool = False) -> BasicLexer:
|
|
3761
|
+
lexer_conf = self.lexer_conf
|
|
3762
|
+
if dont_ignore:
|
|
3763
|
+
from copy import copy
|
|
3764
|
+
|
|
3765
|
+
lexer_conf = copy(lexer_conf)
|
|
3766
|
+
lexer_conf.ignore = ()
|
|
3767
|
+
return BasicLexer(lexer_conf)
|
|
3768
|
+
|
|
3769
|
+
def _prepare_callbacks(self) -> None:
|
|
3770
|
+
self._callbacks = {}
|
|
3771
|
+
##
|
|
3772
|
+
|
|
3773
|
+
if self.options.ambiguity != "forest":
|
|
3774
|
+
self._parse_tree_builder = ParseTreeBuilder(
|
|
3775
|
+
self.rules,
|
|
3776
|
+
self.options.tree_class or Tree,
|
|
3777
|
+
self.options.propagate_positions,
|
|
3778
|
+
self.options.parser != "lalr" and self.options.ambiguity == "explicit",
|
|
3779
|
+
self.options.maybe_placeholders,
|
|
3780
|
+
)
|
|
3781
|
+
self._callbacks = self._parse_tree_builder.create_callback(
|
|
3782
|
+
self.options.transformer
|
|
3783
|
+
)
|
|
3784
|
+
self._callbacks.update(
|
|
3785
|
+
_get_lexer_callbacks(self.options.transformer, self.terminals)
|
|
3786
|
+
)
|
|
3787
|
+
|
|
3788
|
+
def _build_parser(self) -> "ParsingFrontend":
|
|
3789
|
+
self._prepare_callbacks()
|
|
3790
|
+
_validate_frontend_args(self.options.parser, self.options.lexer)
|
|
3791
|
+
parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
|
|
3792
|
+
return _construct_parsing_frontend(
|
|
3793
|
+
self.options.parser,
|
|
3794
|
+
self.options.lexer,
|
|
3795
|
+
self.lexer_conf,
|
|
3796
|
+
parser_conf,
|
|
3797
|
+
options=self.options,
|
|
3798
|
+
)
|
|
3799
|
+
|
|
3800
|
+
def save(self, f, exclude_options: Collection[str] = ()) -> None:
|
|
3801
|
+
#--
|
|
3802
|
+
if self.options.parser != "lalr":
|
|
3803
|
+
raise NotImplementedError(
|
|
3804
|
+
"Lark.save() is only implemented for the LALR(1) parser."
|
|
3805
|
+
)
|
|
3806
|
+
data, m = self.memo_serialize([TerminalDef, Rule])
|
|
3807
|
+
if exclude_options:
|
|
3808
|
+
data["options"] = {
|
|
3809
|
+
n: v for n, v in data["options"].items() if n not in exclude_options
|
|
3810
|
+
}
|
|
3811
|
+
pickle.dump({"data": data, "memo": m}, f, protocol=pickle.HIGHEST_PROTOCOL)
|
|
3812
|
+
|
|
3813
|
+
@classmethod
|
|
3814
|
+
def load(cls: Type[_T], f) -> _T:
|
|
3815
|
+
#--
|
|
3816
|
+
inst = cls.__new__(cls)
|
|
3817
|
+
return inst._load(f)
|
|
3818
|
+
|
|
3819
|
+
def _deserialize_lexer_conf(
|
|
3820
|
+
self,
|
|
3821
|
+
data: Dict[str, Any],
|
|
3822
|
+
memo: Dict[int, Union[TerminalDef, Rule]],
|
|
3823
|
+
options: LarkOptions,
|
|
3824
|
+
) -> LexerConf:
|
|
3825
|
+
lexer_conf = LexerConf.deserialize(data["lexer_conf"], memo)
|
|
3826
|
+
lexer_conf.callbacks = options.lexer_callbacks or {}
|
|
3827
|
+
lexer_conf.re_module = regex if options.regex else re
|
|
3828
|
+
lexer_conf.use_bytes = options.use_bytes
|
|
3829
|
+
lexer_conf.g_regex_flags = options.g_regex_flags
|
|
3830
|
+
lexer_conf.skip_validation = True
|
|
3831
|
+
lexer_conf.postlex = options.postlex
|
|
3832
|
+
return lexer_conf
|
|
3833
|
+
|
|
3834
|
+
def _load(self: _T, f: Any, **kwargs) -> _T:
|
|
3835
|
+
if isinstance(f, dict):
|
|
3836
|
+
d = f
|
|
3837
|
+
else:
|
|
3838
|
+
d = pickle.load(f)
|
|
3839
|
+
memo_json = d["memo"]
|
|
3840
|
+
data = d["data"]
|
|
3841
|
+
|
|
3842
|
+
assert memo_json
|
|
3843
|
+
memo = SerializeMemoizer.deserialize(
|
|
3844
|
+
memo_json, {"Rule": Rule, "TerminalDef": TerminalDef}, {}
|
|
3845
|
+
)
|
|
3846
|
+
options = dict(data["options"])
|
|
3847
|
+
if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
|
|
3848
|
+
raise ConfigurationError(
|
|
3849
|
+
"Some options are not allowed when loading a Parser: {}".format(
|
|
3850
|
+
set(kwargs) - _LOAD_ALLOWED_OPTIONS
|
|
3851
|
+
)
|
|
3852
|
+
)
|
|
3853
|
+
options.update(kwargs)
|
|
3854
|
+
self.options = LarkOptions.deserialize(options, memo)
|
|
3855
|
+
self.rules = [Rule.deserialize(r, memo) for r in data["rules"]]
|
|
3856
|
+
self.source_path = "<deserialized>"
|
|
3857
|
+
_validate_frontend_args(self.options.parser, self.options.lexer)
|
|
3858
|
+
self.lexer_conf = self._deserialize_lexer_conf(
|
|
3859
|
+
data["parser"], memo, self.options
|
|
3860
|
+
)
|
|
3861
|
+
self.terminals = self.lexer_conf.terminals
|
|
3862
|
+
self._prepare_callbacks()
|
|
3863
|
+
self._terminals_dict = {t.name: t for t in self.terminals}
|
|
3864
|
+
self.parser = _deserialize_parsing_frontend(
|
|
3865
|
+
data["parser"],
|
|
3866
|
+
memo,
|
|
3867
|
+
self.lexer_conf,
|
|
3868
|
+
self._callbacks,
|
|
3869
|
+
self.options, ##
|
|
3870
|
+
|
|
3871
|
+
)
|
|
3872
|
+
return self
|
|
3873
|
+
|
|
3874
|
+
@classmethod
|
|
3875
|
+
def _load_from_dict(cls, data, memo, **kwargs):
|
|
3876
|
+
inst = cls.__new__(cls)
|
|
3877
|
+
return inst._load({"data": data, "memo": memo}, **kwargs)
|
|
3878
|
+
|
|
3879
|
+
@classmethod
|
|
3880
|
+
def open(
|
|
3881
|
+
cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options
|
|
3882
|
+
) -> _T:
|
|
3883
|
+
#--
|
|
3884
|
+
if rel_to:
|
|
3885
|
+
basepath = os.path.dirname(rel_to)
|
|
3886
|
+
grammar_filename = os.path.join(basepath, grammar_filename)
|
|
3887
|
+
with open(grammar_filename, encoding="utf8") as f:
|
|
3888
|
+
return cls(f, **options)
|
|
3889
|
+
|
|
3890
|
+
@classmethod
|
|
3891
|
+
def open_from_package(
|
|
3892
|
+
cls: Type[_T],
|
|
3893
|
+
package: str,
|
|
3894
|
+
grammar_path: str,
|
|
3895
|
+
search_paths: "Sequence[str]" = [""],
|
|
3896
|
+
**options
|
|
3897
|
+
) -> _T:
|
|
3898
|
+
#--
|
|
3899
|
+
package_loader = FromPackageLoader(package, search_paths)
|
|
3900
|
+
full_path, text = package_loader(None, grammar_path)
|
|
3901
|
+
options.setdefault("source_path", full_path)
|
|
3902
|
+
options.setdefault("import_paths", [])
|
|
3903
|
+
options["import_paths"].append(package_loader)
|
|
3904
|
+
return cls(text, **options)
|
|
3905
|
+
|
|
3906
|
+
def __repr__(self):
|
|
3907
|
+
return "Lark(open(%r), parser=%r, lexer=%r, ...)" % (
|
|
3908
|
+
self.source_path,
|
|
3909
|
+
self.options.parser,
|
|
3910
|
+
self.options.lexer,
|
|
3911
|
+
)
|
|
3912
|
+
|
|
3913
|
+
def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
|
|
3914
|
+
#--
|
|
3915
|
+
lexer: Lexer
|
|
3916
|
+
if not hasattr(self, "lexer") or dont_ignore:
|
|
3917
|
+
lexer = self._build_lexer(dont_ignore)
|
|
3918
|
+
else:
|
|
3919
|
+
lexer = self.lexer
|
|
3920
|
+
lexer_thread = LexerThread.from_text(lexer, text)
|
|
3921
|
+
stream = lexer_thread.lex(None)
|
|
3922
|
+
if self.options.postlex:
|
|
3923
|
+
return self.options.postlex.process(stream)
|
|
3924
|
+
return stream
|
|
3925
|
+
|
|
3926
|
+
def get_terminal(self, name: str) -> TerminalDef:
|
|
3927
|
+
#--
|
|
3928
|
+
return self._terminals_dict[name]
|
|
3929
|
+
|
|
3930
|
+
def parse_interactive(
|
|
3931
|
+
self, text: Optional[str] = None, start: Optional[str] = None
|
|
3932
|
+
) -> "InteractiveParser":
|
|
3933
|
+
#--
|
|
3934
|
+
return self.parser.parse_interactive(text, start=start)
|
|
3935
|
+
|
|
3936
|
+
def parse(
|
|
3937
|
+
self,
|
|
3938
|
+
text: str,
|
|
3939
|
+
start: Optional[str] = None,
|
|
3940
|
+
on_error: "Optional[Callable[[UnexpectedInput], bool]]" = None,
|
|
3941
|
+
) -> "ParseTree":
|
|
3942
|
+
#--
|
|
3943
|
+
return self.parser.parse(text, start=start, on_error=on_error)
|
|
3944
|
+
|
|
3945
|
+
|
|
3946
|
+
|
|
3947
|
+
|
|
3948
|
+
|
|
3949
|
+
class DedentError(LarkError):
|
|
3950
|
+
pass
|
|
3951
|
+
|
|
3952
|
+
|
|
3953
|
+
class Indenter(PostLex, ABC):
|
|
3954
|
+
paren_level: int
|
|
3955
|
+
indent_level: List[int]
|
|
3956
|
+
|
|
3957
|
+
def __init__(self) -> None:
|
|
3958
|
+
self.paren_level = 0
|
|
3959
|
+
self.indent_level = [0]
|
|
3960
|
+
assert self.tab_len > 0
|
|
3961
|
+
|
|
3962
|
+
def handle_NL(self, token: Token) -> Iterator[Token]:
|
|
3963
|
+
if self.paren_level > 0:
|
|
3964
|
+
return
|
|
3965
|
+
|
|
3966
|
+
yield token
|
|
3967
|
+
|
|
3968
|
+
indent_str = token.rsplit("\n", 1)[1] ##
|
|
3969
|
+
|
|
3970
|
+
indent = indent_str.count(" ") + indent_str.count("\t") * self.tab_len
|
|
3971
|
+
|
|
3972
|
+
if indent > self.indent_level[-1]:
|
|
3973
|
+
self.indent_level.append(indent)
|
|
3974
|
+
yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
|
|
3975
|
+
else:
|
|
3976
|
+
while indent < self.indent_level[-1]:
|
|
3977
|
+
self.indent_level.pop()
|
|
3978
|
+
yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
|
|
3979
|
+
|
|
3980
|
+
if indent != self.indent_level[-1]:
|
|
3981
|
+
raise DedentError(
|
|
3982
|
+
"Unexpected dedent to column %s. Expected dedent to %s"
|
|
3983
|
+
% (indent, self.indent_level[-1])
|
|
3984
|
+
)
|
|
3985
|
+
|
|
3986
|
+
def _process(self, stream):
|
|
3987
|
+
for token in stream:
|
|
3988
|
+
if token.type == self.NL_type:
|
|
3989
|
+
yield from self.handle_NL(token)
|
|
3990
|
+
else:
|
|
3991
|
+
yield token
|
|
3992
|
+
|
|
3993
|
+
if token.type in self.OPEN_PAREN_types:
|
|
3994
|
+
self.paren_level += 1
|
|
3995
|
+
elif token.type in self.CLOSE_PAREN_types:
|
|
3996
|
+
self.paren_level -= 1
|
|
3997
|
+
assert self.paren_level >= 0
|
|
3998
|
+
|
|
3999
|
+
while len(self.indent_level) > 1:
|
|
4000
|
+
self.indent_level.pop()
|
|
4001
|
+
yield Token(self.DEDENT_type, "")
|
|
4002
|
+
|
|
4003
|
+
assert self.indent_level == [0], self.indent_level
|
|
4004
|
+
|
|
4005
|
+
def process(self, stream):
|
|
4006
|
+
self.paren_level = 0
|
|
4007
|
+
self.indent_level = [0]
|
|
4008
|
+
return self._process(stream)
|
|
4009
|
+
|
|
4010
|
+
##
|
|
4011
|
+
|
|
4012
|
+
@property
|
|
4013
|
+
def always_accept(self):
|
|
4014
|
+
return (self.NL_type,)
|
|
4015
|
+
|
|
4016
|
+
@property
|
|
4017
|
+
@abstractmethod
|
|
4018
|
+
def NL_type(self) -> str:
|
|
4019
|
+
raise NotImplementedError()
|
|
4020
|
+
|
|
4021
|
+
@property
|
|
4022
|
+
@abstractmethod
|
|
4023
|
+
def OPEN_PAREN_types(self) -> List[str]:
|
|
4024
|
+
raise NotImplementedError()
|
|
4025
|
+
|
|
4026
|
+
@property
|
|
4027
|
+
@abstractmethod
|
|
4028
|
+
def CLOSE_PAREN_types(self) -> List[str]:
|
|
4029
|
+
raise NotImplementedError()
|
|
4030
|
+
|
|
4031
|
+
@property
|
|
4032
|
+
@abstractmethod
|
|
4033
|
+
def INDENT_type(self) -> str:
|
|
4034
|
+
raise NotImplementedError()
|
|
4035
|
+
|
|
4036
|
+
@property
|
|
4037
|
+
@abstractmethod
|
|
4038
|
+
def DEDENT_type(self) -> str:
|
|
4039
|
+
raise NotImplementedError()
|
|
4040
|
+
|
|
4041
|
+
@property
|
|
4042
|
+
@abstractmethod
|
|
4043
|
+
def tab_len(self) -> int:
|
|
4044
|
+
raise NotImplementedError()
|
|
4045
|
+
|
|
4046
|
+
|
|
4047
|
+
class PythonIndenter(Indenter):
|
|
4048
|
+
NL_type = "_NEWLINE"
|
|
4049
|
+
OPEN_PAREN_types = ["LPAR", "LSQB", "LBRACE"]
|
|
4050
|
+
CLOSE_PAREN_types = ["RPAR", "RSQB", "RBRACE"]
|
|
4051
|
+
INDENT_type = "_INDENT"
|
|
4052
|
+
DEDENT_type = "_DEDENT"
|
|
4053
|
+
tab_len = 8
|
|
4054
|
+
|
|
4055
|
+
|
|
4056
|
+
|
|
4057
|
+
import pickle, zlib, base64
|
|
4058
|
+
DATA = (
|
|
4059
|
+
b''
|
|
4060
|
+
)
|
|
4061
|
+
DATA = pickle.loads(zlib.decompress(base64.b64decode(DATA)))
|
|
4062
|
+
MEMO = (
|
|
4063
|
+
b''
|
|
4064
|
+
)
|
|
4065
|
+
MEMO = pickle.loads(zlib.decompress(base64.b64decode(MEMO)))
|
|
4066
|
+
Shift = 0
|
|
4067
|
+
Reduce = 1
|
|
4068
|
+
def Lark_StandAlone(**kwargs):
|
|
4069
|
+
return Lark._load_from_dict(DATA, MEMO, **kwargs)
|