jaclang 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jaclang might be problematic. Click here for more details.

Files changed (49) hide show
  1. jaclang/cli/cli.py +113 -7
  2. jaclang/cli/cmdreg.py +12 -0
  3. jaclang/compiler/__init__.py +58 -2
  4. jaclang/compiler/absyntree.py +1775 -61
  5. jaclang/compiler/codeloc.py +7 -0
  6. jaclang/compiler/compile.py +1 -1
  7. jaclang/compiler/constant.py +17 -0
  8. jaclang/compiler/parser.py +134 -112
  9. jaclang/compiler/passes/ir_pass.py +18 -0
  10. jaclang/compiler/passes/main/__init__.py +2 -0
  11. jaclang/compiler/passes/main/def_impl_match_pass.py +19 -3
  12. jaclang/compiler/passes/main/def_use_pass.py +1 -1
  13. jaclang/compiler/passes/main/fuse_typeinfo_pass.py +357 -0
  14. jaclang/compiler/passes/main/import_pass.py +7 -3
  15. jaclang/compiler/passes/main/pyast_gen_pass.py +350 -109
  16. jaclang/compiler/passes/main/pyast_load_pass.py +1779 -206
  17. jaclang/compiler/passes/main/registry_pass.py +126 -0
  18. jaclang/compiler/passes/main/schedules.py +4 -1
  19. jaclang/compiler/passes/main/sym_tab_build_pass.py +20 -28
  20. jaclang/compiler/passes/main/tests/test_pyast_build_pass.py +14 -5
  21. jaclang/compiler/passes/main/tests/test_registry_pass.py +39 -0
  22. jaclang/compiler/passes/main/tests/test_sym_tab_build_pass.py +8 -8
  23. jaclang/compiler/passes/main/tests/test_typeinfo_pass.py +7 -0
  24. jaclang/compiler/passes/main/type_check_pass.py +0 -1
  25. jaclang/compiler/passes/tool/jac_formatter_pass.py +8 -17
  26. jaclang/compiler/passes/tool/tests/test_unparse_validate.py +65 -0
  27. jaclang/compiler/passes/utils/mypy_ast_build.py +28 -14
  28. jaclang/compiler/symtable.py +23 -2
  29. jaclang/compiler/tests/test_parser.py +53 -0
  30. jaclang/compiler/workspace.py +52 -26
  31. jaclang/core/aott.py +193 -28
  32. jaclang/core/construct.py +59 -2
  33. jaclang/core/registry.py +115 -0
  34. jaclang/core/utils.py +25 -0
  35. jaclang/plugin/default.py +108 -26
  36. jaclang/plugin/feature.py +22 -4
  37. jaclang/plugin/spec.py +13 -7
  38. jaclang/utils/helpers.py +66 -3
  39. jaclang/utils/lang_tools.py +6 -38
  40. jaclang/utils/test.py +1 -0
  41. jaclang/utils/tests/test_lang_tools.py +11 -14
  42. jaclang/utils/treeprinter.py +10 -2
  43. {jaclang-0.5.7.dist-info → jaclang-0.5.9.dist-info}/METADATA +1 -1
  44. {jaclang-0.5.7.dist-info → jaclang-0.5.9.dist-info}/RECORD +47 -43
  45. {jaclang-0.5.7.dist-info → jaclang-0.5.9.dist-info}/WHEEL +1 -1
  46. jaclang/compiler/__jac_gen__/__init__.py +0 -0
  47. jaclang/compiler/__jac_gen__/jac_parser.py +0 -4069
  48. {jaclang-0.5.7.dist-info → jaclang-0.5.9.dist-info}/entry_points.txt +0 -0
  49. {jaclang-0.5.7.dist-info → jaclang-0.5.9.dist-info}/top_level.txt +0 -0
@@ -1,4069 +0,0 @@
1
- # The file was automatically generated by Lark v1.1.9
2
- __version__ = "1.1.9"
3
-
4
- #
5
- #
6
- # Lark Stand-alone Generator Tool
7
- # ----------------------------------
8
- # Generates a stand-alone LALR(1) parser
9
- #
10
- # Git: https://github.com/erezsh/lark
11
- # Author: Erez Shinan (erezshin@gmail.com)
12
- #
13
- #
14
- # >>> LICENSE
15
- #
16
- # This tool and its generated code use a separate license from Lark,
17
- # and are subject to the terms of the Mozilla Public License, v. 2.0.
18
- # If a copy of the MPL was not distributed with this
19
- # file, You can obtain one at https://mozilla.org/MPL/2.0/.
20
- #
21
- # If you wish to purchase a commercial license for this tool and its
22
- # generated code, you may contact me via email or otherwise.
23
- #
24
- # If MPL2 is incompatible with your free or open-source project,
25
- # contact me and we'll work it out.
26
- #
27
- #
28
-
29
- from copy import deepcopy
30
- from abc import ABC, abstractmethod
31
- from types import ModuleType
32
- from typing import (
33
- TypeVar,
34
- Generic,
35
- Type,
36
- Tuple,
37
- List,
38
- Dict,
39
- Iterator,
40
- Collection,
41
- Callable,
42
- Optional,
43
- FrozenSet,
44
- Any,
45
- Union,
46
- Iterable,
47
- IO,
48
- TYPE_CHECKING,
49
- overload,
50
- Sequence,
51
- Pattern as REPattern,
52
- ClassVar,
53
- Set,
54
- Mapping,
55
- )
56
-
57
-
58
-
59
-
60
- class LarkError(Exception):
61
- pass
62
-
63
-
64
- class ConfigurationError(LarkError, ValueError):
65
- pass
66
-
67
-
68
- def assert_config(value, options: Collection, msg="Got %r, expected one of %s"):
69
- if value not in options:
70
- raise ConfigurationError(msg % (value, options))
71
-
72
-
73
- class GrammarError(LarkError):
74
- pass
75
-
76
-
77
- class ParseError(LarkError):
78
- pass
79
-
80
-
81
- class LexError(LarkError):
82
- pass
83
-
84
-
85
- T = TypeVar("T")
86
-
87
-
88
- class UnexpectedInput(LarkError):
89
- #--
90
-
91
- line: int
92
- column: int
93
- pos_in_stream = None
94
- state: Any
95
- _terminals_by_name = None
96
- interactive_parser: "InteractiveParser"
97
-
98
- def get_context(self, text: str, span: int = 40) -> str:
99
- #--
100
- assert self.pos_in_stream is not None, self
101
- pos = self.pos_in_stream
102
- start = max(pos - span, 0)
103
- end = pos + span
104
- if not isinstance(text, bytes):
105
- before = text[start:pos].rsplit("\n", 1)[-1]
106
- after = text[pos:end].split("\n", 1)[0]
107
- return before + after + "\n" + " " * len(before.expandtabs()) + "^\n"
108
- else:
109
- before = text[start:pos].rsplit(b"\n", 1)[-1]
110
- after = text[pos:end].split(b"\n", 1)[0]
111
- return (
112
- before + after + b"\n" + b" " * len(before.expandtabs()) + b"^\n"
113
- ).decode("ascii", "backslashreplace")
114
-
115
- def match_examples(
116
- self,
117
- parse_fn: "Callable[[str], Tree]",
118
- examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
119
- token_type_match_fallback: bool = False,
120
- use_accepts: bool = True,
121
- ) -> Optional[T]:
122
- #--
123
- assert self.state is not None, "Not supported for this exception"
124
-
125
- if isinstance(examples, Mapping):
126
- examples = examples.items()
127
-
128
- candidate = (None, False)
129
- for i, (label, example) in enumerate(examples):
130
- assert not isinstance(example, str), "Expecting a list"
131
-
132
- for j, malformed in enumerate(example):
133
- try:
134
- parse_fn(malformed)
135
- except UnexpectedInput as ut:
136
- if ut.state == self.state:
137
- if (
138
- use_accepts
139
- and isinstance(self, UnexpectedToken)
140
- and isinstance(ut, UnexpectedToken)
141
- and ut.accepts != self.accepts
142
- ):
143
- logger.debug(
144
- "Different accepts with same state[%d]: %s != %s at example [%s][%s]"
145
- % (self.state, self.accepts, ut.accepts, i, j)
146
- )
147
- continue
148
- if isinstance(
149
- self, (UnexpectedToken, UnexpectedEOF)
150
- ) and isinstance(ut, (UnexpectedToken, UnexpectedEOF)):
151
- if ut.token == self.token: ##
152
-
153
- logger.debug("Exact Match at example [%s][%s]" % (i, j))
154
- return label
155
-
156
- if token_type_match_fallback:
157
- ##
158
-
159
- if (ut.token.type == self.token.type) and not candidate[
160
- -1
161
- ]:
162
- logger.debug(
163
- "Token Type Fallback at example [%s][%s]"
164
- % (i, j)
165
- )
166
- candidate = label, True
167
-
168
- if candidate[0] is None:
169
- logger.debug(
170
- "Same State match at example [%s][%s]" % (i, j)
171
- )
172
- candidate = label, False
173
-
174
- return candidate[0]
175
-
176
- def _format_expected(self, expected):
177
- if self._terminals_by_name:
178
- d = self._terminals_by_name
179
- expected = [
180
- d[t_name].user_repr() if t_name in d else t_name for t_name in expected
181
- ]
182
- return "Expected one of: \n\t* %s\n" % "\n\t* ".join(expected)
183
-
184
-
185
- class UnexpectedEOF(ParseError, UnexpectedInput):
186
- #--
187
-
188
- expected: "List[Token]"
189
-
190
- def __init__(self, expected, state=None, terminals_by_name=None):
191
- super(UnexpectedEOF, self).__init__()
192
-
193
- self.expected = expected
194
- self.state = state
195
- from .lexer import Token
196
-
197
- self.token = Token("<EOF>", "") ##
198
-
199
- self.pos_in_stream = -1
200
- self.line = -1
201
- self.column = -1
202
- self._terminals_by_name = terminals_by_name
203
-
204
- def __str__(self):
205
- message = "Unexpected end-of-input. "
206
- message += self._format_expected(self.expected)
207
- return message
208
-
209
-
210
- class UnexpectedCharacters(LexError, UnexpectedInput):
211
- #--
212
-
213
- allowed: Set[str]
214
- considered_tokens: Set[Any]
215
-
216
- def __init__(
217
- self,
218
- seq,
219
- lex_pos,
220
- line,
221
- column,
222
- allowed=None,
223
- considered_tokens=None,
224
- state=None,
225
- token_history=None,
226
- terminals_by_name=None,
227
- considered_rules=None,
228
- ):
229
- super(UnexpectedCharacters, self).__init__()
230
-
231
- ##
232
-
233
- self.line = line
234
- self.column = column
235
- self.pos_in_stream = lex_pos
236
- self.state = state
237
- self._terminals_by_name = terminals_by_name
238
-
239
- self.allowed = allowed
240
- self.considered_tokens = considered_tokens
241
- self.considered_rules = considered_rules
242
- self.token_history = token_history
243
-
244
- if isinstance(seq, bytes):
245
- self.char = seq[lex_pos : lex_pos + 1].decode("ascii", "backslashreplace")
246
- else:
247
- self.char = seq[lex_pos]
248
- self._context = self.get_context(seq)
249
-
250
- def __str__(self):
251
- message = (
252
- "No terminal matches '%s' in the current parser context, at line %d col %d"
253
- % (self.char, self.line, self.column)
254
- )
255
- message += "\n\n" + self._context
256
- if self.allowed:
257
- message += self._format_expected(self.allowed)
258
- if self.token_history:
259
- message += "\nPrevious tokens: %s\n" % ", ".join(
260
- repr(t) for t in self.token_history
261
- )
262
- return message
263
-
264
-
265
- class UnexpectedToken(ParseError, UnexpectedInput):
266
- #--
267
-
268
- expected: Set[str]
269
- considered_rules: Set[str]
270
-
271
- def __init__(
272
- self,
273
- token,
274
- expected,
275
- considered_rules=None,
276
- state=None,
277
- interactive_parser=None,
278
- terminals_by_name=None,
279
- token_history=None,
280
- ):
281
- super(UnexpectedToken, self).__init__()
282
-
283
- ##
284
-
285
- self.line = getattr(token, "line", "?")
286
- self.column = getattr(token, "column", "?")
287
- self.pos_in_stream = getattr(token, "start_pos", None)
288
- self.state = state
289
-
290
- self.token = token
291
- self.expected = expected ##
292
-
293
- self._accepts = NO_VALUE
294
- self.considered_rules = considered_rules
295
- self.interactive_parser = interactive_parser
296
- self._terminals_by_name = terminals_by_name
297
- self.token_history = token_history
298
-
299
- @property
300
- def accepts(self) -> Set[str]:
301
- if self._accepts is NO_VALUE:
302
- self._accepts = (
303
- self.interactive_parser and self.interactive_parser.accepts()
304
- )
305
- return self._accepts
306
-
307
- def __str__(self):
308
- message = "Unexpected token %r at line %s, column %s.\n%s" % (
309
- self.token,
310
- self.line,
311
- self.column,
312
- self._format_expected(self.accepts or self.expected),
313
- )
314
- if self.token_history:
315
- message += "Previous tokens: %r\n" % self.token_history
316
-
317
- return message
318
-
319
-
320
- class VisitError(LarkError):
321
- #--
322
-
323
- obj: "Union[Tree, Token]"
324
- orig_exc: Exception
325
-
326
- def __init__(self, rule, obj, orig_exc):
327
- message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
328
- super(VisitError, self).__init__(message)
329
-
330
- self.rule = rule
331
- self.obj = obj
332
- self.orig_exc = orig_exc
333
-
334
-
335
- class MissingVariableError(LarkError):
336
- pass
337
-
338
-
339
-
340
- import sys, re
341
- import logging
342
-
343
- logger: logging.Logger = logging.getLogger("lark")
344
- logger.addHandler(logging.StreamHandler())
345
- ##
346
-
347
- ##
348
-
349
- logger.setLevel(logging.CRITICAL)
350
-
351
-
352
- NO_VALUE = object()
353
-
354
- T = TypeVar("T")
355
-
356
-
357
- def classify(
358
- seq: Iterable, key: Optional[Callable] = None, value: Optional[Callable] = None
359
- ) -> Dict:
360
- d: Dict[Any, Any] = {}
361
- for item in seq:
362
- k = key(item) if (key is not None) else item
363
- v = value(item) if (value is not None) else item
364
- try:
365
- d[k].append(v)
366
- except KeyError:
367
- d[k] = [v]
368
- return d
369
-
370
-
371
- def _deserialize(data: Any, namespace: Dict[str, Any], memo: Dict) -> Any:
372
- if isinstance(data, dict):
373
- if "__type__" in data: ##
374
-
375
- class_ = namespace[data["__type__"]]
376
- return class_.deserialize(data, memo)
377
- elif "@" in data:
378
- return memo[data["@"]]
379
- return {
380
- key: _deserialize(value, namespace, memo) for key, value in data.items()
381
- }
382
- elif isinstance(data, list):
383
- return [_deserialize(value, namespace, memo) for value in data]
384
- return data
385
-
386
-
387
- _T = TypeVar("_T", bound="Serialize")
388
-
389
-
390
- class Serialize:
391
- #--
392
-
393
- def memo_serialize(self, types_to_memoize: List) -> Any:
394
- memo = SerializeMemoizer(types_to_memoize)
395
- return self.serialize(memo), memo.serialize()
396
-
397
- def serialize(self, memo=None) -> Dict[str, Any]:
398
- if memo and memo.in_types(self):
399
- return {"@": memo.memoized.get(self)}
400
-
401
- fields = getattr(self, "__serialize_fields__")
402
- res = {f: _serialize(getattr(self, f), memo) for f in fields}
403
- res["__type__"] = type(self).__name__
404
- if hasattr(self, "_serialize"):
405
- self._serialize(res, memo) ##
406
-
407
- return res
408
-
409
- @classmethod
410
- def deserialize(cls: Type[_T], data: Dict[str, Any], memo: Dict[int, Any]) -> _T:
411
- namespace = getattr(cls, "__serialize_namespace__", [])
412
- namespace = {c.__name__: c for c in namespace}
413
-
414
- fields = getattr(cls, "__serialize_fields__")
415
-
416
- if "@" in data:
417
- return memo[data["@"]]
418
-
419
- inst = cls.__new__(cls)
420
- for f in fields:
421
- try:
422
- setattr(inst, f, _deserialize(data[f], namespace, memo))
423
- except KeyError as e:
424
- raise KeyError("Cannot find key for class", cls, e)
425
-
426
- if hasattr(inst, "_deserialize"):
427
- inst._deserialize() ##
428
-
429
-
430
- return inst
431
-
432
-
433
- class SerializeMemoizer(Serialize):
434
- #--
435
-
436
- __serialize_fields__ = ("memoized",)
437
-
438
- def __init__(self, types_to_memoize: List) -> None:
439
- self.types_to_memoize = tuple(types_to_memoize)
440
- self.memoized = Enumerator()
441
-
442
- def in_types(self, value: Serialize) -> bool:
443
- return isinstance(value, self.types_to_memoize)
444
-
445
- def serialize(self) -> Dict[int, Any]: ##
446
-
447
- return _serialize(self.memoized.reversed(), None)
448
-
449
- @classmethod
450
- def deserialize(cls, data: Dict[int, Any], namespace: Dict[str, Any], memo: Dict[Any, Any]) -> Dict[int, Any]: ##
451
-
452
- return _deserialize(data, namespace, memo)
453
-
454
-
455
- try:
456
- import regex
457
-
458
- _has_regex = True
459
- except ImportError:
460
- _has_regex = False
461
-
462
- if sys.version_info >= (3, 11):
463
- import re._parser as sre_parse
464
- import re._constants as sre_constants
465
- else:
466
- import sre_parse
467
- import sre_constants
468
-
469
- categ_pattern = re.compile(r"\\p{[A-Za-z_]+}")
470
-
471
-
472
- def get_regexp_width(expr: str) -> Union[Tuple[int, int], List[int]]:
473
- if _has_regex:
474
- ##
475
-
476
- ##
477
-
478
- ##
479
-
480
- regexp_final = re.sub(categ_pattern, "A", expr)
481
- else:
482
- if re.search(categ_pattern, expr):
483
- raise ImportError(
484
- "`regex` module must be installed in order to use Unicode categories.",
485
- expr,
486
- )
487
- regexp_final = expr
488
- try:
489
- ##
490
-
491
- return [int(x) for x in sre_parse.parse(regexp_final).getwidth()] ##
492
-
493
- except sre_constants.error:
494
- if not _has_regex:
495
- raise ValueError(expr)
496
- else:
497
- ##
498
-
499
- ##
500
-
501
- c = regex.compile(regexp_final)
502
- ##
503
-
504
- ##
505
-
506
- MAXWIDTH = getattr(sre_parse, "MAXWIDTH", sre_constants.MAXREPEAT)
507
- if c.match("") is None:
508
- ##
509
-
510
- return 1, int(MAXWIDTH)
511
- else:
512
- return 0, int(MAXWIDTH)
513
-
514
-
515
-
516
- from collections import OrderedDict
517
-
518
-
519
- class Meta:
520
-
521
- empty: bool
522
- line: int
523
- column: int
524
- start_pos: int
525
- end_line: int
526
- end_column: int
527
- end_pos: int
528
- orig_expansion: "List[TerminalDef]"
529
- match_tree: bool
530
-
531
- def __init__(self):
532
- self.empty = True
533
-
534
-
535
- _Leaf_T = TypeVar("_Leaf_T")
536
- Branch = Union[_Leaf_T, "Tree[_Leaf_T]"]
537
-
538
-
539
- class Tree(Generic[_Leaf_T]):
540
- #--
541
-
542
- data: str
543
- children: "List[Branch[_Leaf_T]]"
544
-
545
- def __init__(
546
- self, data: str, children: "List[Branch[_Leaf_T]]", meta: Optional[Meta] = None
547
- ) -> None:
548
- self.data = data
549
- self.children = children
550
- self._meta = meta
551
-
552
- @property
553
- def meta(self) -> Meta:
554
- if self._meta is None:
555
- self._meta = Meta()
556
- return self._meta
557
-
558
- def __repr__(self):
559
- return "Tree(%r, %r)" % (self.data, self.children)
560
-
561
- def _pretty_label(self):
562
- return self.data
563
-
564
- def _pretty(self, level, indent_str):
565
- yield f"{indent_str*level}{self._pretty_label()}"
566
- if len(self.children) == 1 and not isinstance(self.children[0], Tree):
567
- yield f"\t{self.children[0]}\n"
568
- else:
569
- yield "\n"
570
- for n in self.children:
571
- if isinstance(n, Tree):
572
- yield from n._pretty(level + 1, indent_str)
573
- else:
574
- yield f"{indent_str*(level+1)}{n}\n"
575
-
576
- def pretty(self, indent_str: str = " ") -> str:
577
- #--
578
- return "".join(self._pretty(0, indent_str))
579
-
580
- def __rich__(self, parent: Optional["rich.tree.Tree"] = None) -> "rich.tree.Tree":
581
- #--
582
- return self._rich(parent)
583
-
584
- def _rich(self, parent):
585
- if parent:
586
- tree = parent.add(f"[bold]{self.data}[/bold]")
587
- else:
588
- import rich.tree
589
-
590
- tree = rich.tree.Tree(self.data)
591
-
592
- for c in self.children:
593
- if isinstance(c, Tree):
594
- c._rich(tree)
595
- else:
596
- tree.add(f"[green]{c}[/green]")
597
-
598
- return tree
599
-
600
- def __eq__(self, other):
601
- try:
602
- return self.data == other.data and self.children == other.children
603
- except AttributeError:
604
- return False
605
-
606
- def __ne__(self, other):
607
- return not (self == other)
608
-
609
- def __hash__(self) -> int:
610
- return hash((self.data, tuple(self.children)))
611
-
612
- def iter_subtrees(self) -> "Iterator[Tree[_Leaf_T]]":
613
- #--
614
- queue = [self]
615
- subtrees = OrderedDict()
616
- for subtree in queue:
617
- subtrees[id(subtree)] = subtree
618
- ##
619
-
620
- queue += [
621
- c
622
- for c in reversed(subtree.children) ##
623
-
624
- if isinstance(c, Tree) and id(c) not in subtrees
625
- ]
626
-
627
- del queue
628
- return reversed(list(subtrees.values()))
629
-
630
- def iter_subtrees_topdown(self):
631
- #--
632
- stack = [self]
633
- stack_append = stack.append
634
- stack_pop = stack.pop
635
- while stack:
636
- node = stack_pop()
637
- if not isinstance(node, Tree):
638
- continue
639
- yield node
640
- for child in reversed(node.children):
641
- stack_append(child)
642
-
643
- def find_pred(
644
- self, pred: "Callable[[Tree[_Leaf_T]], bool]"
645
- ) -> "Iterator[Tree[_Leaf_T]]":
646
- #--
647
- return filter(pred, self.iter_subtrees())
648
-
649
- def find_data(self, data: str) -> "Iterator[Tree[_Leaf_T]]":
650
- #--
651
- return self.find_pred(lambda t: t.data == data)
652
-
653
-
654
- from functools import wraps, update_wrapper
655
- from inspect import getmembers, getmro
656
-
657
- _Return_T = TypeVar("_Return_T")
658
- _Return_V = TypeVar("_Return_V")
659
- _Leaf_T = TypeVar("_Leaf_T")
660
- _Leaf_U = TypeVar("_Leaf_U")
661
- _R = TypeVar("_R")
662
- _FUNC = Callable[..., _Return_T]
663
- _DECORATED = Union[_FUNC, type]
664
-
665
-
666
- class _DiscardType:
667
- #--
668
-
669
- def __repr__(self):
670
- return "lark.visitors.Discard"
671
-
672
-
673
- Discard = _DiscardType()
674
-
675
- ##
676
-
677
-
678
-
679
- class _Decoratable:
680
- #--
681
-
682
- @classmethod
683
- def _apply_v_args(cls, visit_wrapper):
684
- mro = getmro(cls)
685
- assert mro[0] is cls
686
- libmembers = {name for _cls in mro[1:] for name, _ in getmembers(_cls)}
687
- for name, value in getmembers(cls):
688
-
689
- ##
690
-
691
- if name.startswith("_") or (
692
- name in libmembers and name not in cls.__dict__
693
- ):
694
- continue
695
- if not callable(value):
696
- continue
697
-
698
- ##
699
-
700
- if isinstance(cls.__dict__[name], _VArgsWrapper):
701
- continue
702
-
703
- setattr(cls, name, _VArgsWrapper(cls.__dict__[name], visit_wrapper))
704
- return cls
705
-
706
- def __class_getitem__(cls, _):
707
- return cls
708
-
709
-
710
- class Transformer(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
711
- #--
712
-
713
- __visit_tokens__ = True ##
714
-
715
-
716
- def __init__(self, visit_tokens: bool = True) -> None:
717
- self.__visit_tokens__ = visit_tokens
718
-
719
- def _call_userfunc(self, tree, new_children=None):
720
- ##
721
-
722
- children = new_children if new_children is not None else tree.children
723
- try:
724
- f = getattr(self, tree.data)
725
- except AttributeError:
726
- return self.__default__(tree.data, children, tree.meta)
727
- else:
728
- try:
729
- wrapper = getattr(f, "visit_wrapper", None)
730
- if wrapper is not None:
731
- return f.visit_wrapper(f, tree.data, children, tree.meta)
732
- else:
733
- return f(children)
734
- except GrammarError:
735
- raise
736
- except Exception as e:
737
- raise VisitError(tree.data, tree, e)
738
-
739
- def _call_userfunc_token(self, token):
740
- try:
741
- f = getattr(self, token.type)
742
- except AttributeError:
743
- return self.__default_token__(token)
744
- else:
745
- try:
746
- return f(token)
747
- except GrammarError:
748
- raise
749
- except Exception as e:
750
- raise VisitError(token.type, token, e)
751
-
752
- def _transform_children(self, children):
753
- for c in children:
754
- if isinstance(c, Tree):
755
- res = self._transform_tree(c)
756
- elif self.__visit_tokens__ and isinstance(c, Token):
757
- res = self._call_userfunc_token(c)
758
- else:
759
- res = c
760
-
761
- if res is not Discard:
762
- yield res
763
-
764
- def _transform_tree(self, tree):
765
- children = list(self._transform_children(tree.children))
766
- return self._call_userfunc(tree, children)
767
-
768
- def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
769
- #--
770
- return self._transform_tree(tree)
771
-
772
- def __mul__(
773
- self: "Transformer[_Leaf_T, Tree[_Leaf_U]]",
774
- other: "Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V,]]",
775
- ) -> "TransformerChain[_Leaf_T, _Return_V]":
776
- #--
777
- return TransformerChain(self, other)
778
-
779
- def __default__(self, data, children, meta):
780
- #--
781
- return Tree(data, children, meta)
782
-
783
- def __default_token__(self, token):
784
- #--
785
- return token
786
-
787
-
788
- def merge_transformers(base_transformer=None, **transformers_to_merge):
789
- #--
790
- if base_transformer is None:
791
- base_transformer = Transformer()
792
- for prefix, transformer in transformers_to_merge.items():
793
- for method_name in dir(transformer):
794
- method = getattr(transformer, method_name)
795
- if not callable(method):
796
- continue
797
- if method_name.startswith("_") or method_name == "transform":
798
- continue
799
- prefixed_method = prefix + "__" + method_name
800
- if hasattr(base_transformer, prefixed_method):
801
- raise AttributeError(
802
- "Cannot merge: method '%s' appears more than once" % prefixed_method
803
- )
804
-
805
- setattr(base_transformer, prefixed_method, method)
806
-
807
- return base_transformer
808
-
809
-
810
- class InlineTransformer(Transformer): ##
811
-
812
- def _call_userfunc(self, tree, new_children=None):
813
- ##
814
-
815
- children = new_children if new_children is not None else tree.children
816
- try:
817
- f = getattr(self, tree.data)
818
- except AttributeError:
819
- return self.__default__(tree.data, children, tree.meta)
820
- else:
821
- return f(*children)
822
-
823
-
824
- class TransformerChain(Generic[_Leaf_T, _Return_T]):
825
-
826
- transformers: "Tuple[Union[Transformer, TransformerChain], ...]"
827
-
828
- def __init__(self, *transformers: "Union[Transformer, TransformerChain]") -> None:
829
- self.transformers = transformers
830
-
831
- def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
832
- for t in self.transformers:
833
- tree = t.transform(tree)
834
- return cast(_Return_T, tree)
835
-
836
- def __mul__(
837
- self: "TransformerChain[_Leaf_T, Tree[_Leaf_U]]",
838
- other: "Union[Transformer[_Leaf_U, _Return_V], TransformerChain[_Leaf_U, _Return_V]]",
839
- ) -> "TransformerChain[_Leaf_T, _Return_V]":
840
- return TransformerChain(*self.transformers + (other,))
841
-
842
-
843
- class Transformer_InPlace(Transformer[_Leaf_T, _Return_T]):
844
- #--
845
-
846
- def _transform_tree(self, tree): ##
847
-
848
- return self._call_userfunc(tree)
849
-
850
- def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
851
- for subtree in tree.iter_subtrees():
852
- subtree.children = list(self._transform_children(subtree.children))
853
-
854
- return self._transform_tree(tree)
855
-
856
-
857
- class Transformer_NonRecursive(Transformer[_Leaf_T, _Return_T]):
858
- #--
859
-
860
- def transform(self, tree: Tree[_Leaf_T]) -> _Return_T:
861
- ##
862
-
863
- rev_postfix = []
864
- q: List[Branch[_Leaf_T]] = [tree]
865
- while q:
866
- t = q.pop()
867
- rev_postfix.append(t)
868
- if isinstance(t, Tree):
869
- q += t.children
870
-
871
- ##
872
-
873
- stack: List = []
874
- for x in reversed(rev_postfix):
875
- if isinstance(x, Tree):
876
- size = len(x.children)
877
- if size:
878
- args = stack[-size:]
879
- del stack[-size:]
880
- else:
881
- args = []
882
-
883
- res = self._call_userfunc(x, args)
884
- if res is not Discard:
885
- stack.append(res)
886
-
887
- elif self.__visit_tokens__ and isinstance(x, Token):
888
- res = self._call_userfunc_token(x)
889
- if res is not Discard:
890
- stack.append(res)
891
- else:
892
- stack.append(x)
893
-
894
- (result,) = stack ##
895
-
896
- ##
897
-
898
- ##
899
-
900
- ##
901
-
902
- return cast(_Return_T, result)
903
-
904
-
905
- class Transformer_InPlaceRecursive(Transformer):
906
- #--
907
-
908
- def _transform_tree(self, tree):
909
- tree.children = list(self._transform_children(tree.children))
910
- return self._call_userfunc(tree)
911
-
912
-
913
- ##
914
-
915
-
916
-
917
- class VisitorBase:
918
- def _call_userfunc(self, tree):
919
- return getattr(self, tree.data, self.__default__)(tree)
920
-
921
- def __default__(self, tree):
922
- #--
923
- return tree
924
-
925
- def __class_getitem__(cls, _):
926
- return cls
927
-
928
-
929
- class Visitor(VisitorBase, ABC, Generic[_Leaf_T]):
930
- #--
931
-
932
- def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
933
- #--
934
- for subtree in tree.iter_subtrees():
935
- self._call_userfunc(subtree)
936
- return tree
937
-
938
- def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
939
- #--
940
- for subtree in tree.iter_subtrees_topdown():
941
- self._call_userfunc(subtree)
942
- return tree
943
-
944
-
945
- class Visitor_Recursive(VisitorBase, Generic[_Leaf_T]):
946
- #--
947
-
948
- def visit(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
949
- #--
950
- for child in tree.children:
951
- if isinstance(child, Tree):
952
- self.visit(child)
953
-
954
- self._call_userfunc(tree)
955
- return tree
956
-
957
- def visit_topdown(self, tree: Tree[_Leaf_T]) -> Tree[_Leaf_T]:
958
- #--
959
- self._call_userfunc(tree)
960
-
961
- for child in tree.children:
962
- if isinstance(child, Tree):
963
- self.visit_topdown(child)
964
-
965
- return tree
966
-
967
-
968
- class Interpreter(_Decoratable, ABC, Generic[_Leaf_T, _Return_T]):
969
- #--
970
-
971
- def visit(self, tree: Tree[_Leaf_T]) -> _Return_T:
972
- ##
973
-
974
- ##
975
-
976
- ##
977
-
978
- return self._visit_tree(tree)
979
-
980
- def _visit_tree(self, tree: Tree[_Leaf_T]):
981
- f = getattr(self, tree.data)
982
- wrapper = getattr(f, "visit_wrapper", None)
983
- if wrapper is not None:
984
- return f.visit_wrapper(f, tree.data, tree.children, tree.meta)
985
- else:
986
- return f(tree)
987
-
988
- def visit_children(self, tree: Tree[_Leaf_T]) -> List:
989
- return [
990
- self._visit_tree(child) if isinstance(child, Tree) else child
991
- for child in tree.children
992
- ]
993
-
994
- def __getattr__(self, name):
995
- return self.__default__
996
-
997
- def __default__(self, tree):
998
- return self.visit_children(tree)
999
-
1000
-
1001
- _InterMethod = Callable[[Type[Interpreter], _Return_T], _R]
1002
-
1003
-
1004
- def visit_children_decor(func: _InterMethod) -> _InterMethod:
1005
- #--
1006
-
1007
- @wraps(func)
1008
- def inner(cls, tree):
1009
- values = cls.visit_children(tree)
1010
- return func(cls, values)
1011
-
1012
- return inner
1013
-
1014
-
1015
- ##
1016
-
1017
-
1018
-
1019
- def _apply_v_args(obj, visit_wrapper):
1020
- try:
1021
- _apply = obj._apply_v_args
1022
- except AttributeError:
1023
- return _VArgsWrapper(obj, visit_wrapper)
1024
- else:
1025
- return _apply(visit_wrapper)
1026
-
1027
-
1028
- class _VArgsWrapper:
1029
- #--
1030
-
1031
- base_func: Callable
1032
-
1033
- def __init__(
1034
- self, func: Callable, visit_wrapper: Callable[[Callable, str, list, Any], Any]
1035
- ):
1036
- if isinstance(func, _VArgsWrapper):
1037
- func = func.base_func
1038
- ##
1039
-
1040
- self.base_func = func ##
1041
-
1042
- self.visit_wrapper = visit_wrapper
1043
- update_wrapper(self, func)
1044
-
1045
- def __call__(self, *args, **kwargs):
1046
- return self.base_func(*args, **kwargs)
1047
-
1048
- def __get__(self, instance, owner=None):
1049
- try:
1050
- ##
1051
-
1052
- ##
1053
-
1054
- g = type(self.base_func).__get__
1055
- except AttributeError:
1056
- return self
1057
- else:
1058
- return _VArgsWrapper(g(self.base_func, instance, owner), self.visit_wrapper)
1059
-
1060
- def __set_name__(self, owner, name):
1061
- try:
1062
- f = type(self.base_func).__set_name__
1063
- except AttributeError:
1064
- return
1065
- else:
1066
- f(self.base_func, owner, name)
1067
-
1068
-
1069
- def _vargs_inline(f, _data, children, _meta):
1070
- return f(*children)
1071
-
1072
-
1073
- def _vargs_meta_inline(f, _data, children, meta):
1074
- return f(meta, *children)
1075
-
1076
-
1077
- def _vargs_meta(f, _data, children, meta):
1078
- return f(meta, children)
1079
-
1080
-
1081
- def _vargs_tree(f, data, children, meta):
1082
- return f(Tree(data, children, meta))
1083
-
1084
-
1085
- def v_args(
1086
- inline: bool = False,
1087
- meta: bool = False,
1088
- tree: bool = False,
1089
- wrapper: Optional[Callable] = None,
1090
- ) -> Callable[[_DECORATED], _DECORATED]:
1091
- #--
1092
- if tree and (meta or inline):
1093
- raise ValueError(
1094
- "Visitor functions cannot combine 'tree' with 'meta' or 'inline'."
1095
- )
1096
-
1097
- func = None
1098
- if meta:
1099
- if inline:
1100
- func = _vargs_meta_inline
1101
- else:
1102
- func = _vargs_meta
1103
- elif inline:
1104
- func = _vargs_inline
1105
- elif tree:
1106
- func = _vargs_tree
1107
-
1108
- if wrapper is not None:
1109
- if func is not None:
1110
- raise ValueError(
1111
- "Cannot use 'wrapper' along with 'tree', 'meta' or 'inline'."
1112
- )
1113
- func = wrapper
1114
-
1115
- def _visitor_args_dec(obj):
1116
- return _apply_v_args(obj, func)
1117
-
1118
- return _visitor_args_dec
1119
-
1120
-
1121
-
1122
- TOKEN_DEFAULT_PRIORITY = 0
1123
-
1124
-
1125
- class Symbol(Serialize):
1126
- __slots__ = ("name",)
1127
-
1128
- name: str
1129
- is_term: ClassVar[bool] = NotImplemented
1130
-
1131
- def __init__(self, name: str) -> None:
1132
- self.name = name
1133
-
1134
- def __eq__(self, other):
1135
- assert isinstance(other, Symbol), other
1136
- return self.is_term == other.is_term and self.name == other.name
1137
-
1138
- def __ne__(self, other):
1139
- return not (self == other)
1140
-
1141
- def __hash__(self):
1142
- return hash(self.name)
1143
-
1144
- def __repr__(self):
1145
- return "%s(%r)" % (type(self).__name__, self.name)
1146
-
1147
- fullrepr = property(__repr__)
1148
-
1149
- def renamed(self, f):
1150
- return type(self)(f(self.name))
1151
-
1152
-
1153
- class Terminal(Symbol):
1154
- __serialize_fields__ = "name", "filter_out"
1155
-
1156
- is_term: ClassVar[bool] = True
1157
-
1158
- def __init__(self, name, filter_out=False):
1159
- self.name = name
1160
- self.filter_out = filter_out
1161
-
1162
- @property
1163
- def fullrepr(self):
1164
- return "%s(%r, %r)" % (type(self).__name__, self.name, self.filter_out)
1165
-
1166
- def renamed(self, f):
1167
- return type(self)(f(self.name), self.filter_out)
1168
-
1169
-
1170
- class NonTerminal(Symbol):
1171
- __serialize_fields__ = ("name",)
1172
-
1173
- is_term: ClassVar[bool] = False
1174
-
1175
-
1176
- class RuleOptions(Serialize):
1177
- __serialize_fields__ = (
1178
- "keep_all_tokens",
1179
- "expand1",
1180
- "priority",
1181
- "template_source",
1182
- "empty_indices",
1183
- )
1184
-
1185
- keep_all_tokens: bool
1186
- expand1: bool
1187
- priority: Optional[int]
1188
- template_source: Optional[str]
1189
- empty_indices: Tuple[bool, ...]
1190
-
1191
- def __init__(
1192
- self,
1193
- keep_all_tokens: bool = False,
1194
- expand1: bool = False,
1195
- priority: Optional[int] = None,
1196
- template_source: Optional[str] = None,
1197
- empty_indices: Tuple[bool, ...] = (),
1198
- ) -> None:
1199
- self.keep_all_tokens = keep_all_tokens
1200
- self.expand1 = expand1
1201
- self.priority = priority
1202
- self.template_source = template_source
1203
- self.empty_indices = empty_indices
1204
-
1205
- def __repr__(self):
1206
- return "RuleOptions(%r, %r, %r, %r)" % (
1207
- self.keep_all_tokens,
1208
- self.expand1,
1209
- self.priority,
1210
- self.template_source,
1211
- )
1212
-
1213
-
1214
- class Rule(Serialize):
1215
- #--
1216
-
1217
- __slots__ = ("origin", "expansion", "alias", "options", "order", "_hash")
1218
-
1219
- __serialize_fields__ = "origin", "expansion", "order", "alias", "options"
1220
- __serialize_namespace__ = Terminal, NonTerminal, RuleOptions
1221
-
1222
- origin: NonTerminal
1223
- expansion: Sequence[Symbol]
1224
- order: int
1225
- alias: Optional[str]
1226
- options: RuleOptions
1227
- _hash: int
1228
-
1229
- def __init__(
1230
- self,
1231
- origin: NonTerminal,
1232
- expansion: Sequence[Symbol],
1233
- order: int = 0,
1234
- alias: Optional[str] = None,
1235
- options: Optional[RuleOptions] = None,
1236
- ):
1237
- self.origin = origin
1238
- self.expansion = expansion
1239
- self.alias = alias
1240
- self.order = order
1241
- self.options = options or RuleOptions()
1242
- self._hash = hash((self.origin, tuple(self.expansion)))
1243
-
1244
- def _deserialize(self):
1245
- self._hash = hash((self.origin, tuple(self.expansion)))
1246
-
1247
- def __str__(self):
1248
- return "<%s : %s>" % (
1249
- self.origin.name,
1250
- " ".join(x.name for x in self.expansion),
1251
- )
1252
-
1253
- def __repr__(self):
1254
- return "Rule(%r, %r, %r, %r)" % (
1255
- self.origin,
1256
- self.expansion,
1257
- self.alias,
1258
- self.options,
1259
- )
1260
-
1261
- def __hash__(self):
1262
- return self._hash
1263
-
1264
- def __eq__(self, other):
1265
- if not isinstance(other, Rule):
1266
- return False
1267
- return self.origin == other.origin and self.expansion == other.expansion
1268
-
1269
-
1270
-
1271
- from copy import copy
1272
-
1273
- try: ##
1274
-
1275
- has_interegular = bool(interegular)
1276
- except NameError:
1277
- has_interegular = False
1278
-
1279
-
1280
- class Pattern(Serialize, ABC):
1281
- #--
1282
-
1283
- value: str
1284
- flags: Collection[str]
1285
- raw: Optional[str]
1286
- type: ClassVar[str]
1287
-
1288
- def __init__(
1289
- self, value: str, flags: Collection[str] = (), raw: Optional[str] = None
1290
- ) -> None:
1291
- self.value = value
1292
- self.flags = frozenset(flags)
1293
- self.raw = raw
1294
-
1295
- def __repr__(self):
1296
- return repr(self.to_regexp())
1297
-
1298
- ##
1299
-
1300
- def __hash__(self):
1301
- return hash((type(self), self.value, self.flags))
1302
-
1303
- def __eq__(self, other):
1304
- return (
1305
- type(self) == type(other)
1306
- and self.value == other.value
1307
- and self.flags == other.flags
1308
- )
1309
-
1310
- @abstractmethod
1311
- def to_regexp(self) -> str:
1312
- raise NotImplementedError()
1313
-
1314
- @property
1315
- @abstractmethod
1316
- def min_width(self) -> int:
1317
- raise NotImplementedError()
1318
-
1319
- @property
1320
- @abstractmethod
1321
- def max_width(self) -> int:
1322
- raise NotImplementedError()
1323
-
1324
- def _get_flags(self, value):
1325
- for f in self.flags:
1326
- value = "(?%s:%s)" % (f, value)
1327
- return value
1328
-
1329
-
1330
- class PatternStr(Pattern):
1331
- __serialize_fields__ = "value", "flags", "raw"
1332
-
1333
- type: ClassVar[str] = "str"
1334
-
1335
- def to_regexp(self) -> str:
1336
- return self._get_flags(re.escape(self.value))
1337
-
1338
- @property
1339
- def min_width(self) -> int:
1340
- return len(self.value)
1341
-
1342
- @property
1343
- def max_width(self) -> int:
1344
- return len(self.value)
1345
-
1346
-
1347
- class PatternRE(Pattern):
1348
- __serialize_fields__ = "value", "flags", "raw", "_width"
1349
-
1350
- type: ClassVar[str] = "re"
1351
-
1352
- def to_regexp(self) -> str:
1353
- return self._get_flags(self.value)
1354
-
1355
- _width = None
1356
-
1357
- def _get_width(self):
1358
- if self._width is None:
1359
- self._width = get_regexp_width(self.to_regexp())
1360
- return self._width
1361
-
1362
- @property
1363
- def min_width(self) -> int:
1364
- return self._get_width()[0]
1365
-
1366
- @property
1367
- def max_width(self) -> int:
1368
- return self._get_width()[1]
1369
-
1370
-
1371
- class TerminalDef(Serialize):
1372
- #--
1373
- __serialize_fields__ = "name", "pattern", "priority"
1374
- __serialize_namespace__ = PatternStr, PatternRE
1375
-
1376
- name: str
1377
- pattern: Pattern
1378
- priority: int
1379
-
1380
- def __init__(
1381
- self, name: str, pattern: Pattern, priority: int = TOKEN_DEFAULT_PRIORITY
1382
- ) -> None:
1383
- assert isinstance(pattern, Pattern), pattern
1384
- self.name = name
1385
- self.pattern = pattern
1386
- self.priority = priority
1387
-
1388
- def __repr__(self):
1389
- return "%s(%r, %r)" % (type(self).__name__, self.name, self.pattern)
1390
-
1391
- def user_repr(self) -> str:
1392
- if self.name.startswith("__"): ##
1393
-
1394
- return self.pattern.raw or self.name
1395
- else:
1396
- return self.name
1397
-
1398
-
1399
- _T = TypeVar("_T", bound="Token")
1400
-
1401
-
1402
- class Token(str):
1403
- #--
1404
-
1405
- __slots__ = (
1406
- "type",
1407
- "start_pos",
1408
- "value",
1409
- "line",
1410
- "column",
1411
- "end_line",
1412
- "end_column",
1413
- "end_pos",
1414
- )
1415
-
1416
- __match_args__ = ("type", "value")
1417
-
1418
- type: str
1419
- start_pos: Optional[int]
1420
- value: Any
1421
- line: Optional[int]
1422
- column: Optional[int]
1423
- end_line: Optional[int]
1424
- end_column: Optional[int]
1425
- end_pos: Optional[int]
1426
-
1427
- @overload
1428
- def __new__(
1429
- cls,
1430
- type: str,
1431
- value: Any,
1432
- start_pos: Optional[int] = None,
1433
- line: Optional[int] = None,
1434
- column: Optional[int] = None,
1435
- end_line: Optional[int] = None,
1436
- end_column: Optional[int] = None,
1437
- end_pos: Optional[int] = None,
1438
- ) -> "Token": ...
1439
-
1440
- @overload
1441
- def __new__(
1442
- cls,
1443
- type_: str,
1444
- value: Any,
1445
- start_pos: Optional[int] = None,
1446
- line: Optional[int] = None,
1447
- column: Optional[int] = None,
1448
- end_line: Optional[int] = None,
1449
- end_column: Optional[int] = None,
1450
- end_pos: Optional[int] = None,
1451
- ) -> "Token": ...
1452
-
1453
- def __new__(cls, *args, **kwargs):
1454
- if "type_" in kwargs:
1455
- warnings.warn(
1456
- "`type_` is deprecated use `type` instead", DeprecationWarning
1457
- )
1458
-
1459
- if "type" in kwargs:
1460
- raise TypeError(
1461
- "Error: using both 'type' and the deprecated 'type_' as arguments."
1462
- )
1463
- kwargs["type"] = kwargs.pop("type_")
1464
-
1465
- return cls._future_new(*args, **kwargs)
1466
-
1467
- @classmethod
1468
- def _future_new(
1469
- cls,
1470
- type,
1471
- value,
1472
- start_pos=None,
1473
- line=None,
1474
- column=None,
1475
- end_line=None,
1476
- end_column=None,
1477
- end_pos=None,
1478
- ):
1479
- inst = super(Token, cls).__new__(cls, value)
1480
-
1481
- inst.type = type
1482
- inst.start_pos = start_pos
1483
- inst.value = value
1484
- inst.line = line
1485
- inst.column = column
1486
- inst.end_line = end_line
1487
- inst.end_column = end_column
1488
- inst.end_pos = end_pos
1489
- return inst
1490
-
1491
- @overload
1492
- def update(
1493
- self, type: Optional[str] = None, value: Optional[Any] = None
1494
- ) -> "Token": ...
1495
-
1496
- @overload
1497
- def update(
1498
- self, type_: Optional[str] = None, value: Optional[Any] = None
1499
- ) -> "Token": ...
1500
-
1501
- def update(self, *args, **kwargs):
1502
- if "type_" in kwargs:
1503
- warnings.warn(
1504
- "`type_` is deprecated use `type` instead", DeprecationWarning
1505
- )
1506
-
1507
- if "type" in kwargs:
1508
- raise TypeError(
1509
- "Error: using both 'type' and the deprecated 'type_' as arguments."
1510
- )
1511
- kwargs["type"] = kwargs.pop("type_")
1512
-
1513
- return self._future_update(*args, **kwargs)
1514
-
1515
- def _future_update(
1516
- self, type: Optional[str] = None, value: Optional[Any] = None
1517
- ) -> "Token":
1518
- return Token.new_borrow_pos(
1519
- type if type is not None else self.type,
1520
- value if value is not None else self.value,
1521
- self,
1522
- )
1523
-
1524
- @classmethod
1525
- def new_borrow_pos(cls: Type[_T], type_: str, value: Any, borrow_t: "Token") -> _T:
1526
- return cls(
1527
- type_,
1528
- value,
1529
- borrow_t.start_pos,
1530
- borrow_t.line,
1531
- borrow_t.column,
1532
- borrow_t.end_line,
1533
- borrow_t.end_column,
1534
- borrow_t.end_pos,
1535
- )
1536
-
1537
- def __reduce__(self):
1538
- return (
1539
- self.__class__,
1540
- (self.type, self.value, self.start_pos, self.line, self.column),
1541
- )
1542
-
1543
- def __repr__(self):
1544
- return "Token(%r, %r)" % (self.type, self.value)
1545
-
1546
- def __deepcopy__(self, memo):
1547
- return Token(self.type, self.value, self.start_pos, self.line, self.column)
1548
-
1549
- def __eq__(self, other):
1550
- if isinstance(other, Token) and self.type != other.type:
1551
- return False
1552
-
1553
- return str.__eq__(self, other)
1554
-
1555
- __hash__ = str.__hash__
1556
-
1557
-
1558
- class LineCounter:
1559
- #--
1560
-
1561
- __slots__ = "char_pos", "line", "column", "line_start_pos", "newline_char"
1562
-
1563
- def __init__(self, newline_char):
1564
- self.newline_char = newline_char
1565
- self.char_pos = 0
1566
- self.line = 1
1567
- self.column = 1
1568
- self.line_start_pos = 0
1569
-
1570
- def __eq__(self, other):
1571
- if not isinstance(other, LineCounter):
1572
- return NotImplemented
1573
-
1574
- return (
1575
- self.char_pos == other.char_pos and self.newline_char == other.newline_char
1576
- )
1577
-
1578
- def feed(self, token: Token, test_newline=True):
1579
- #--
1580
- if test_newline:
1581
- newlines = token.count(self.newline_char)
1582
- if newlines:
1583
- self.line += newlines
1584
- self.line_start_pos = (
1585
- self.char_pos + token.rindex(self.newline_char) + 1
1586
- )
1587
-
1588
- self.char_pos += len(token)
1589
- self.column = self.char_pos - self.line_start_pos + 1
1590
-
1591
-
1592
- class UnlessCallback:
1593
- def __init__(self, scanner):
1594
- self.scanner = scanner
1595
-
1596
- def __call__(self, t):
1597
- res = self.scanner.match(t.value, 0)
1598
- if res:
1599
- _value, t.type = res
1600
- return t
1601
-
1602
-
1603
- class CallChain:
1604
- def __init__(self, callback1, callback2, cond):
1605
- self.callback1 = callback1
1606
- self.callback2 = callback2
1607
- self.cond = cond
1608
-
1609
- def __call__(self, t):
1610
- t2 = self.callback1(t)
1611
- return self.callback2(t) if self.cond(t2) else t2
1612
-
1613
-
1614
- def _get_match(re_, regexp, s, flags):
1615
- m = re_.match(regexp, s, flags)
1616
- if m:
1617
- return m.group(0)
1618
-
1619
-
1620
- def _create_unless(terminals, g_regex_flags, re_, use_bytes):
1621
- tokens_by_type = classify(terminals, lambda t: type(t.pattern))
1622
- assert len(tokens_by_type) <= 2, tokens_by_type.keys()
1623
- embedded_strs = set()
1624
- callback = {}
1625
- for retok in tokens_by_type.get(PatternRE, []):
1626
- unless = []
1627
- for strtok in tokens_by_type.get(PatternStr, []):
1628
- if strtok.priority != retok.priority:
1629
- continue
1630
- s = strtok.pattern.value
1631
- if s == _get_match(re_, retok.pattern.to_regexp(), s, g_regex_flags):
1632
- unless.append(strtok)
1633
- if strtok.pattern.flags <= retok.pattern.flags:
1634
- embedded_strs.add(strtok)
1635
- if unless:
1636
- callback[retok.name] = UnlessCallback(
1637
- Scanner(
1638
- unless, g_regex_flags, re_, match_whole=True, use_bytes=use_bytes
1639
- )
1640
- )
1641
-
1642
- new_terminals = [t for t in terminals if t not in embedded_strs]
1643
- return new_terminals, callback
1644
-
1645
-
1646
- class Scanner:
1647
- def __init__(self, terminals, g_regex_flags, re_, use_bytes, match_whole=False):
1648
- self.terminals = terminals
1649
- self.g_regex_flags = g_regex_flags
1650
- self.re_ = re_
1651
- self.use_bytes = use_bytes
1652
- self.match_whole = match_whole
1653
-
1654
- self.allowed_types = {t.name for t in self.terminals}
1655
-
1656
- self._mres = self._build_mres(terminals, len(terminals))
1657
-
1658
- def _build_mres(self, terminals, max_size):
1659
- ##
1660
-
1661
- ##
1662
-
1663
- ##
1664
-
1665
- postfix = "$" if self.match_whole else ""
1666
- mres = []
1667
- while terminals:
1668
- pattern = "|".join(
1669
- "(?P<%s>%s)" % (t.name, t.pattern.to_regexp() + postfix)
1670
- for t in terminals[:max_size]
1671
- )
1672
- if self.use_bytes:
1673
- pattern = pattern.encode("latin-1")
1674
- try:
1675
- mre = self.re_.compile(pattern, self.g_regex_flags)
1676
- except AssertionError: ##
1677
-
1678
- return self._build_mres(terminals, max_size // 2)
1679
-
1680
- mres.append(mre)
1681
- terminals = terminals[max_size:]
1682
- return mres
1683
-
1684
- def match(self, text, pos):
1685
- for mre in self._mres:
1686
- m = mre.match(text, pos)
1687
- if m:
1688
- return m.group(0), m.lastgroup
1689
-
1690
-
1691
- def _regexp_has_newline(r: str):
1692
- #--
1693
- return (
1694
- "\n" in r or "\\n" in r or "\\s" in r or "[^" in r or ("(?s" in r and "." in r)
1695
- )
1696
-
1697
-
1698
- class LexerState:
1699
- #--
1700
-
1701
- __slots__ = "text", "line_ctr", "last_token"
1702
-
1703
- text: str
1704
- line_ctr: LineCounter
1705
- last_token: Optional[Token]
1706
-
1707
- def __init__(
1708
- self,
1709
- text: str,
1710
- line_ctr: Optional[LineCounter] = None,
1711
- last_token: Optional[Token] = None,
1712
- ):
1713
- self.text = text
1714
- self.line_ctr = line_ctr or LineCounter(
1715
- b"\n" if isinstance(text, bytes) else "\n"
1716
- )
1717
- self.last_token = last_token
1718
-
1719
- def __eq__(self, other):
1720
- if not isinstance(other, LexerState):
1721
- return NotImplemented
1722
-
1723
- return (
1724
- self.text is other.text
1725
- and self.line_ctr == other.line_ctr
1726
- and self.last_token == other.last_token
1727
- )
1728
-
1729
- def __copy__(self):
1730
- return type(self)(self.text, copy(self.line_ctr), self.last_token)
1731
-
1732
-
1733
- class LexerThread:
1734
- #--
1735
-
1736
- def __init__(self, lexer: "Lexer", lexer_state: LexerState):
1737
- self.lexer = lexer
1738
- self.state = lexer_state
1739
-
1740
- @classmethod
1741
- def from_text(cls, lexer: "Lexer", text: str) -> "LexerThread":
1742
- return cls(lexer, LexerState(text))
1743
-
1744
- def lex(self, parser_state):
1745
- return self.lexer.lex(self.state, parser_state)
1746
-
1747
- def __copy__(self):
1748
- return type(self)(self.lexer, copy(self.state))
1749
-
1750
- _Token = Token
1751
-
1752
-
1753
- _Callback = Callable[[Token], Token]
1754
-
1755
-
1756
- class Lexer(ABC):
1757
- #--
1758
-
1759
- @abstractmethod
1760
- def lex(self, lexer_state: LexerState, parser_state: Any) -> Iterator[Token]:
1761
- return NotImplemented
1762
-
1763
- def make_lexer_state(self, text):
1764
- #--
1765
- return LexerState(text)
1766
-
1767
-
1768
- def _check_regex_collisions(
1769
- terminal_to_regexp: Dict[TerminalDef, str],
1770
- comparator,
1771
- strict_mode,
1772
- max_collisions_to_show=8,
1773
- ):
1774
- if not comparator:
1775
- comparator = interegular.Comparator.from_regexes(terminal_to_regexp)
1776
-
1777
- ##
1778
-
1779
- ##
1780
-
1781
- max_time = 2 if strict_mode else 0.2
1782
-
1783
- ##
1784
-
1785
- if comparator.count_marked_pairs() >= max_collisions_to_show:
1786
- return
1787
- for group in classify(terminal_to_regexp, lambda t: t.priority).values():
1788
- for a, b in comparator.check(group, skip_marked=True):
1789
- assert a.priority == b.priority
1790
- ##
1791
-
1792
- comparator.mark(a, b)
1793
-
1794
- ##
1795
-
1796
- message = f"Collision between Terminals {a.name} and {b.name}. "
1797
- try:
1798
- example = comparator.get_example_overlap(
1799
- a, b, max_time
1800
- ).format_multiline()
1801
- except ValueError:
1802
- ##
1803
-
1804
- example = "No example could be found fast enough. However, the collision does still exists"
1805
- if strict_mode:
1806
- raise LexError(f"{message}\n{example}")
1807
- logger.warning(
1808
- "%s The lexer will choose between them arbitrarily.\n%s",
1809
- message,
1810
- example,
1811
- )
1812
- if comparator.count_marked_pairs() >= max_collisions_to_show:
1813
- logger.warning("Found 8 regex collisions, will not check for more.")
1814
- return
1815
-
1816
-
1817
- class AbstractBasicLexer(Lexer):
1818
- terminals_by_name: Dict[str, TerminalDef]
1819
-
1820
- @abstractmethod
1821
- def __init__(self, conf: "LexerConf", comparator=None) -> None: ...
1822
-
1823
- @abstractmethod
1824
- def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token: ...
1825
-
1826
- def lex(self, state: LexerState, parser_state: Any) -> Iterator[Token]:
1827
- with suppress(EOFError):
1828
- while True:
1829
- yield self.next_token(state, parser_state)
1830
-
1831
-
1832
- class BasicLexer(AbstractBasicLexer):
1833
- terminals: Collection[TerminalDef]
1834
- ignore_types: FrozenSet[str]
1835
- newline_types: FrozenSet[str]
1836
- user_callbacks: Dict[str, _Callback]
1837
- callback: Dict[str, _Callback]
1838
- re: ModuleType
1839
-
1840
- def __init__(self, conf: "LexerConf", comparator=None) -> None:
1841
- terminals = list(conf.terminals)
1842
- assert all(isinstance(t, TerminalDef) for t in terminals), terminals
1843
-
1844
- self.re = conf.re_module
1845
-
1846
- if not conf.skip_validation:
1847
- ##
1848
-
1849
- terminal_to_regexp = {}
1850
- for t in terminals:
1851
- regexp = t.pattern.to_regexp()
1852
- try:
1853
- self.re.compile(regexp, conf.g_regex_flags)
1854
- except self.re.error:
1855
- raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern))
1856
-
1857
- if t.pattern.min_width == 0:
1858
- raise LexError(
1859
- "Lexer does not allow zero-width terminals. (%s: %s)"
1860
- % (t.name, t.pattern)
1861
- )
1862
- if t.pattern.type == "re":
1863
- terminal_to_regexp[t] = regexp
1864
-
1865
- if not (set(conf.ignore) <= {t.name for t in terminals}):
1866
- raise LexError(
1867
- "Ignore terminals are not defined: %s"
1868
- % (set(conf.ignore) - {t.name for t in terminals})
1869
- )
1870
-
1871
- if has_interegular:
1872
- _check_regex_collisions(terminal_to_regexp, comparator, conf.strict)
1873
- elif conf.strict:
1874
- raise LexError(
1875
- "interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`."
1876
- )
1877
-
1878
- ##
1879
-
1880
- self.newline_types = frozenset(
1881
- t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())
1882
- )
1883
- self.ignore_types = frozenset(conf.ignore)
1884
-
1885
- terminals.sort(
1886
- key=lambda x: (
1887
- -x.priority,
1888
- -x.pattern.max_width,
1889
- -len(x.pattern.value),
1890
- x.name,
1891
- )
1892
- )
1893
- self.terminals = terminals
1894
- self.user_callbacks = conf.callbacks
1895
- self.g_regex_flags = conf.g_regex_flags
1896
- self.use_bytes = conf.use_bytes
1897
- self.terminals_by_name = conf.terminals_by_name
1898
-
1899
- self._scanner = None
1900
-
1901
- def _build_scanner(self):
1902
- terminals, self.callback = _create_unless(
1903
- self.terminals, self.g_regex_flags, self.re, self.use_bytes
1904
- )
1905
- assert all(self.callback.values())
1906
-
1907
- for type_, f in self.user_callbacks.items():
1908
- if type_ in self.callback:
1909
- ##
1910
-
1911
- self.callback[type_] = CallChain(
1912
- self.callback[type_], f, lambda t: t.type == type_
1913
- )
1914
- else:
1915
- self.callback[type_] = f
1916
-
1917
- self._scanner = Scanner(terminals, self.g_regex_flags, self.re, self.use_bytes)
1918
-
1919
- @property
1920
- def scanner(self):
1921
- if self._scanner is None:
1922
- self._build_scanner()
1923
- return self._scanner
1924
-
1925
- def match(self, text, pos):
1926
- return self.scanner.match(text, pos)
1927
-
1928
- def next_token(self, lex_state: LexerState, parser_state: Any = None) -> Token:
1929
- line_ctr = lex_state.line_ctr
1930
- while line_ctr.char_pos < len(lex_state.text):
1931
- res = self.match(lex_state.text, line_ctr.char_pos)
1932
- if not res:
1933
- allowed = self.scanner.allowed_types - self.ignore_types
1934
- if not allowed:
1935
- allowed = {"<END-OF-FILE>"}
1936
- raise UnexpectedCharacters(
1937
- lex_state.text,
1938
- line_ctr.char_pos,
1939
- line_ctr.line,
1940
- line_ctr.column,
1941
- allowed=allowed,
1942
- token_history=lex_state.last_token and [lex_state.last_token],
1943
- state=parser_state,
1944
- terminals_by_name=self.terminals_by_name,
1945
- )
1946
-
1947
- value, type_ = res
1948
-
1949
- ignored = type_ in self.ignore_types
1950
- t = None
1951
- if not ignored or type_ in self.callback:
1952
- t = Token(
1953
- type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column
1954
- )
1955
- line_ctr.feed(value, type_ in self.newline_types)
1956
- if t is not None:
1957
- t.end_line = line_ctr.line
1958
- t.end_column = line_ctr.column
1959
- t.end_pos = line_ctr.char_pos
1960
- if t.type in self.callback:
1961
- t = self.callback[t.type](t)
1962
- if not ignored:
1963
- if not isinstance(t, Token):
1964
- raise LexError(
1965
- "Callbacks must return a token (returned %r)" % t
1966
- )
1967
- lex_state.last_token = t
1968
- return t
1969
-
1970
- ##
1971
-
1972
- raise EOFError(self)
1973
-
1974
-
1975
- class ContextualLexer(Lexer):
1976
- lexers: Dict[int, AbstractBasicLexer]
1977
- root_lexer: AbstractBasicLexer
1978
-
1979
- BasicLexer: Type[AbstractBasicLexer] = BasicLexer
1980
-
1981
- def __init__(
1982
- self,
1983
- conf: "LexerConf",
1984
- states: Dict[int, Collection[str]],
1985
- always_accept: Collection[str] = (),
1986
- ) -> None:
1987
- terminals = list(conf.terminals)
1988
- terminals_by_name = conf.terminals_by_name
1989
-
1990
- trad_conf = copy(conf)
1991
- trad_conf.terminals = terminals
1992
-
1993
- if has_interegular and not conf.skip_validation:
1994
- comparator = interegular.Comparator.from_regexes(
1995
- {t: t.pattern.to_regexp() for t in terminals}
1996
- )
1997
- else:
1998
- comparator = None
1999
- lexer_by_tokens: Dict[FrozenSet[str], AbstractBasicLexer] = {}
2000
- self.lexers = {}
2001
- for state, accepts in states.items():
2002
- key = frozenset(accepts)
2003
- try:
2004
- lexer = lexer_by_tokens[key]
2005
- except KeyError:
2006
- accepts = set(accepts) | set(conf.ignore) | set(always_accept)
2007
- lexer_conf = copy(trad_conf)
2008
- lexer_conf.terminals = [
2009
- terminals_by_name[n] for n in accepts if n in terminals_by_name
2010
- ]
2011
- lexer = self.BasicLexer(lexer_conf, comparator)
2012
- lexer_by_tokens[key] = lexer
2013
-
2014
- self.lexers[state] = lexer
2015
-
2016
- assert trad_conf.terminals is terminals
2017
- trad_conf.skip_validation = True ##
2018
-
2019
- self.root_lexer = self.BasicLexer(trad_conf, comparator)
2020
-
2021
- def lex(
2022
- self, lexer_state: LexerState, parser_state: "ParserState"
2023
- ) -> Iterator[Token]:
2024
- try:
2025
- while True:
2026
- lexer = self.lexers[parser_state.position]
2027
- yield lexer.next_token(lexer_state, parser_state)
2028
- except EOFError:
2029
- pass
2030
- except UnexpectedCharacters as e:
2031
- ##
2032
-
2033
- ##
2034
-
2035
- try:
2036
- last_token = (
2037
- lexer_state.last_token
2038
- ) ##
2039
-
2040
- token = self.root_lexer.next_token(lexer_state, parser_state)
2041
- raise UnexpectedToken(
2042
- token,
2043
- e.allowed,
2044
- state=parser_state,
2045
- token_history=[last_token],
2046
- terminals_by_name=self.root_lexer.terminals_by_name,
2047
- )
2048
- except UnexpectedCharacters:
2049
- raise e ##
2050
-
2051
-
2052
-
2053
-
2054
-
2055
- _ParserArgType: "TypeAlias" = 'Literal["earley", "lalr", "cyk", "auto"]'
2056
- _LexerArgType: "TypeAlias" = (
2057
- 'Union[Literal["auto", "basic", "contextual", "dynamic", "dynamic_complete"], Type[Lexer]]'
2058
- )
2059
- _LexerCallback = Callable[[Token], Token]
2060
- ParserCallbacks = Dict[str, Callable]
2061
-
2062
-
2063
- class LexerConf(Serialize):
2064
- __serialize_fields__ = (
2065
- "terminals",
2066
- "ignore",
2067
- "g_regex_flags",
2068
- "use_bytes",
2069
- "lexer_type",
2070
- )
2071
- __serialize_namespace__ = (TerminalDef,)
2072
-
2073
- terminals: Collection[TerminalDef]
2074
- re_module: ModuleType
2075
- ignore: Collection[str]
2076
- postlex: "Optional[PostLex]"
2077
- callbacks: Dict[str, _LexerCallback]
2078
- g_regex_flags: int
2079
- skip_validation: bool
2080
- use_bytes: bool
2081
- lexer_type: Optional[_LexerArgType]
2082
- strict: bool
2083
-
2084
- def __init__(
2085
- self,
2086
- terminals: Collection[TerminalDef],
2087
- re_module: ModuleType,
2088
- ignore: Collection[str] = (),
2089
- postlex: "Optional[PostLex]" = None,
2090
- callbacks: Optional[Dict[str, _LexerCallback]] = None,
2091
- g_regex_flags: int = 0,
2092
- skip_validation: bool = False,
2093
- use_bytes: bool = False,
2094
- strict: bool = False,
2095
- ):
2096
- self.terminals = terminals
2097
- self.terminals_by_name = {t.name: t for t in self.terminals}
2098
- assert len(self.terminals) == len(self.terminals_by_name)
2099
- self.ignore = ignore
2100
- self.postlex = postlex
2101
- self.callbacks = callbacks or {}
2102
- self.g_regex_flags = g_regex_flags
2103
- self.re_module = re_module
2104
- self.skip_validation = skip_validation
2105
- self.use_bytes = use_bytes
2106
- self.strict = strict
2107
- self.lexer_type = None
2108
-
2109
- def _deserialize(self):
2110
- self.terminals_by_name = {t.name: t for t in self.terminals}
2111
-
2112
- def __deepcopy__(self, memo=None):
2113
- return type(self)(
2114
- deepcopy(self.terminals, memo),
2115
- self.re_module,
2116
- deepcopy(self.ignore, memo),
2117
- deepcopy(self.postlex, memo),
2118
- deepcopy(self.callbacks, memo),
2119
- deepcopy(self.g_regex_flags, memo),
2120
- deepcopy(self.skip_validation, memo),
2121
- deepcopy(self.use_bytes, memo),
2122
- )
2123
-
2124
-
2125
- class ParserConf(Serialize):
2126
- __serialize_fields__ = "rules", "start", "parser_type"
2127
-
2128
- rules: List["Rule"]
2129
- callbacks: ParserCallbacks
2130
- start: List[str]
2131
- parser_type: _ParserArgType
2132
-
2133
- def __init__(
2134
- self, rules: List["Rule"], callbacks: ParserCallbacks, start: List[str]
2135
- ):
2136
- assert isinstance(start, list)
2137
- self.rules = rules
2138
- self.callbacks = callbacks
2139
- self.start = start
2140
-
2141
-
2142
-
2143
- from functools import partial, wraps
2144
- from itertools import product
2145
-
2146
-
2147
- class ExpandSingleChild:
2148
- def __init__(self, node_builder):
2149
- self.node_builder = node_builder
2150
-
2151
- def __call__(self, children):
2152
- if len(children) == 1:
2153
- return children[0]
2154
- else:
2155
- return self.node_builder(children)
2156
-
2157
-
2158
- class PropagatePositions:
2159
- def __init__(self, node_builder, node_filter=None):
2160
- self.node_builder = node_builder
2161
- self.node_filter = node_filter
2162
-
2163
- def __call__(self, children):
2164
- res = self.node_builder(children)
2165
-
2166
- if isinstance(res, Tree):
2167
- ##
2168
-
2169
- ##
2170
-
2171
- ##
2172
-
2173
- ##
2174
-
2175
-
2176
- res_meta = res.meta
2177
-
2178
- first_meta = self._pp_get_meta(children)
2179
- if first_meta is not None:
2180
- if not hasattr(res_meta, "line"):
2181
- ##
2182
-
2183
- res_meta.line = getattr(
2184
- first_meta, "container_line", first_meta.line
2185
- )
2186
- res_meta.column = getattr(
2187
- first_meta, "container_column", first_meta.column
2188
- )
2189
- res_meta.start_pos = getattr(
2190
- first_meta, "container_start_pos", first_meta.start_pos
2191
- )
2192
- res_meta.empty = False
2193
-
2194
- res_meta.container_line = getattr(
2195
- first_meta, "container_line", first_meta.line
2196
- )
2197
- res_meta.container_column = getattr(
2198
- first_meta, "container_column", first_meta.column
2199
- )
2200
- res_meta.container_start_pos = getattr(
2201
- first_meta, "container_start_pos", first_meta.start_pos
2202
- )
2203
-
2204
- last_meta = self._pp_get_meta(reversed(children))
2205
- if last_meta is not None:
2206
- if not hasattr(res_meta, "end_line"):
2207
- res_meta.end_line = getattr(
2208
- last_meta, "container_end_line", last_meta.end_line
2209
- )
2210
- res_meta.end_column = getattr(
2211
- last_meta, "container_end_column", last_meta.end_column
2212
- )
2213
- res_meta.end_pos = getattr(
2214
- last_meta, "container_end_pos", last_meta.end_pos
2215
- )
2216
- res_meta.empty = False
2217
-
2218
- res_meta.container_end_line = getattr(
2219
- last_meta, "container_end_line", last_meta.end_line
2220
- )
2221
- res_meta.container_end_column = getattr(
2222
- last_meta, "container_end_column", last_meta.end_column
2223
- )
2224
- res_meta.container_end_pos = getattr(
2225
- last_meta, "container_end_pos", last_meta.end_pos
2226
- )
2227
-
2228
- return res
2229
-
2230
- def _pp_get_meta(self, children):
2231
- for c in children:
2232
- if self.node_filter is not None and not self.node_filter(c):
2233
- continue
2234
- if isinstance(c, Tree):
2235
- if not c.meta.empty:
2236
- return c.meta
2237
- elif isinstance(c, Token):
2238
- return c
2239
- elif hasattr(c, "__lark_meta__"):
2240
- return c.__lark_meta__()
2241
-
2242
-
2243
- def make_propagate_positions(option):
2244
- if callable(option):
2245
- return partial(PropagatePositions, node_filter=option)
2246
- elif option is True:
2247
- return PropagatePositions
2248
- elif option is False:
2249
- return None
2250
-
2251
- raise ConfigurationError("Invalid option for propagate_positions: %r" % option)
2252
-
2253
-
2254
- class ChildFilter:
2255
- def __init__(self, to_include, append_none, node_builder):
2256
- self.node_builder = node_builder
2257
- self.to_include = to_include
2258
- self.append_none = append_none
2259
-
2260
- def __call__(self, children):
2261
- filtered = []
2262
-
2263
- for i, to_expand, add_none in self.to_include:
2264
- if add_none:
2265
- filtered += [None] * add_none
2266
- if to_expand:
2267
- filtered += children[i].children
2268
- else:
2269
- filtered.append(children[i])
2270
-
2271
- if self.append_none:
2272
- filtered += [None] * self.append_none
2273
-
2274
- return self.node_builder(filtered)
2275
-
2276
-
2277
- class ChildFilterLALR(ChildFilter):
2278
- #--
2279
-
2280
- def __call__(self, children):
2281
- filtered = []
2282
- for i, to_expand, add_none in self.to_include:
2283
- if add_none:
2284
- filtered += [None] * add_none
2285
- if to_expand:
2286
- if filtered:
2287
- filtered += children[i].children
2288
- else: ##
2289
-
2290
- filtered = children[i].children
2291
- else:
2292
- filtered.append(children[i])
2293
-
2294
- if self.append_none:
2295
- filtered += [None] * self.append_none
2296
-
2297
- return self.node_builder(filtered)
2298
-
2299
-
2300
- class ChildFilterLALR_NoPlaceholders(ChildFilter):
2301
- #--
2302
-
2303
- def __init__(self, to_include, node_builder):
2304
- self.node_builder = node_builder
2305
- self.to_include = to_include
2306
-
2307
- def __call__(self, children):
2308
- filtered = []
2309
- for i, to_expand in self.to_include:
2310
- if to_expand:
2311
- if filtered:
2312
- filtered += children[i].children
2313
- else: ##
2314
-
2315
- filtered = children[i].children
2316
- else:
2317
- filtered.append(children[i])
2318
- return self.node_builder(filtered)
2319
-
2320
-
2321
- def _should_expand(sym):
2322
- return not sym.is_term and sym.name.startswith("_")
2323
-
2324
-
2325
- def maybe_create_child_filter(
2326
- expansion, keep_all_tokens, ambiguous, _empty_indices: List[bool]
2327
- ):
2328
- ##
2329
-
2330
- if _empty_indices:
2331
- assert _empty_indices.count(False) == len(expansion)
2332
- s = "".join(str(int(b)) for b in _empty_indices)
2333
- empty_indices = [len(ones) for ones in s.split("0")]
2334
- assert len(empty_indices) == len(expansion) + 1, (empty_indices, len(expansion))
2335
- else:
2336
- empty_indices = [0] * (len(expansion) + 1)
2337
-
2338
- to_include = []
2339
- nones_to_add = 0
2340
- for i, sym in enumerate(expansion):
2341
- nones_to_add += empty_indices[i]
2342
- if keep_all_tokens or not (sym.is_term and sym.filter_out):
2343
- to_include.append((i, _should_expand(sym), nones_to_add))
2344
- nones_to_add = 0
2345
-
2346
- nones_to_add += empty_indices[len(expansion)]
2347
-
2348
- if (
2349
- _empty_indices
2350
- or len(to_include) < len(expansion)
2351
- or any(to_expand for i, to_expand, _ in to_include)
2352
- ):
2353
- if _empty_indices or ambiguous:
2354
- return partial(
2355
- ChildFilter if ambiguous else ChildFilterLALR, to_include, nones_to_add
2356
- )
2357
- else:
2358
- ##
2359
-
2360
- return partial(
2361
- ChildFilterLALR_NoPlaceholders, [(i, x) for i, x, _ in to_include]
2362
- )
2363
-
2364
-
2365
- class AmbiguousExpander:
2366
- #--
2367
-
2368
- def __init__(self, to_expand, tree_class, node_builder):
2369
- self.node_builder = node_builder
2370
- self.tree_class = tree_class
2371
- self.to_expand = to_expand
2372
-
2373
- def __call__(self, children):
2374
- def _is_ambig_tree(t):
2375
- return hasattr(t, "data") and t.data == "_ambig"
2376
-
2377
- ##
2378
-
2379
- ##
2380
-
2381
- ##
2382
-
2383
- ##
2384
-
2385
- ambiguous = []
2386
- for i, child in enumerate(children):
2387
- if _is_ambig_tree(child):
2388
- if i in self.to_expand:
2389
- ambiguous.append(i)
2390
-
2391
- child.expand_kids_by_data("_ambig")
2392
-
2393
- if not ambiguous:
2394
- return self.node_builder(children)
2395
-
2396
- expand = [
2397
- child.children if i in ambiguous else (child,)
2398
- for i, child in enumerate(children)
2399
- ]
2400
- return self.tree_class(
2401
- "_ambig", [self.node_builder(list(f)) for f in product(*expand)]
2402
- )
2403
-
2404
-
2405
- def maybe_create_ambiguous_expander(tree_class, expansion, keep_all_tokens):
2406
- to_expand = [
2407
- i
2408
- for i, sym in enumerate(expansion)
2409
- if keep_all_tokens
2410
- or ((not (sym.is_term and sym.filter_out)) and _should_expand(sym))
2411
- ]
2412
- if to_expand:
2413
- return partial(AmbiguousExpander, to_expand, tree_class)
2414
-
2415
-
2416
- class AmbiguousIntermediateExpander:
2417
- #--
2418
-
2419
- def __init__(self, tree_class, node_builder):
2420
- self.node_builder = node_builder
2421
- self.tree_class = tree_class
2422
-
2423
- def __call__(self, children):
2424
- def _is_iambig_tree(child):
2425
- return hasattr(child, "data") and child.data == "_iambig"
2426
-
2427
- def _collapse_iambig(children):
2428
- #--
2429
-
2430
- ##
2431
-
2432
- ##
2433
-
2434
- if children and _is_iambig_tree(children[0]):
2435
- iambig_node = children[0]
2436
- result = []
2437
- for grandchild in iambig_node.children:
2438
- collapsed = _collapse_iambig(grandchild.children)
2439
- if collapsed:
2440
- for child in collapsed:
2441
- child.children += children[1:]
2442
- result += collapsed
2443
- else:
2444
- new_tree = self.tree_class(
2445
- "_inter", grandchild.children + children[1:]
2446
- )
2447
- result.append(new_tree)
2448
- return result
2449
-
2450
- collapsed = _collapse_iambig(children)
2451
- if collapsed:
2452
- processed_nodes = [self.node_builder(c.children) for c in collapsed]
2453
- return self.tree_class("_ambig", processed_nodes)
2454
-
2455
- return self.node_builder(children)
2456
-
2457
-
2458
- def inplace_transformer(func):
2459
- @wraps(func)
2460
- def f(children):
2461
- ##
2462
-
2463
- tree = Tree(func.__name__, children)
2464
- return func(tree)
2465
-
2466
- return f
2467
-
2468
-
2469
- def apply_visit_wrapper(func, name, wrapper):
2470
- if wrapper is _vargs_meta or wrapper is _vargs_meta_inline:
2471
- raise NotImplementedError("Meta args not supported for internal transformer")
2472
-
2473
- @wraps(func)
2474
- def f(children):
2475
- return wrapper(func, name, children, None)
2476
-
2477
- return f
2478
-
2479
-
2480
- class ParseTreeBuilder:
2481
- def __init__(
2482
- self,
2483
- rules,
2484
- tree_class,
2485
- propagate_positions=False,
2486
- ambiguous=False,
2487
- maybe_placeholders=False,
2488
- ):
2489
- self.tree_class = tree_class
2490
- self.propagate_positions = propagate_positions
2491
- self.ambiguous = ambiguous
2492
- self.maybe_placeholders = maybe_placeholders
2493
-
2494
- self.rule_builders = list(self._init_builders(rules))
2495
-
2496
- def _init_builders(self, rules):
2497
- propagate_positions = make_propagate_positions(self.propagate_positions)
2498
-
2499
- for rule in rules:
2500
- options = rule.options
2501
- keep_all_tokens = options.keep_all_tokens
2502
- expand_single_child = options.expand1
2503
-
2504
- wrapper_chain = list(
2505
- filter(
2506
- None,
2507
- [
2508
- (expand_single_child and not rule.alias) and ExpandSingleChild,
2509
- maybe_create_child_filter(
2510
- rule.expansion,
2511
- keep_all_tokens,
2512
- self.ambiguous,
2513
- options.empty_indices if self.maybe_placeholders else None,
2514
- ),
2515
- propagate_positions,
2516
- self.ambiguous
2517
- and maybe_create_ambiguous_expander(
2518
- self.tree_class, rule.expansion, keep_all_tokens
2519
- ),
2520
- self.ambiguous
2521
- and partial(AmbiguousIntermediateExpander, self.tree_class),
2522
- ],
2523
- )
2524
- )
2525
-
2526
- yield rule, wrapper_chain
2527
-
2528
- def create_callback(self, transformer=None):
2529
- callbacks = {}
2530
-
2531
- default_handler = getattr(transformer, "__default__", None)
2532
- if default_handler:
2533
-
2534
- def default_callback(data, children):
2535
- return default_handler(data, children, None)
2536
-
2537
- else:
2538
- default_callback = self.tree_class
2539
-
2540
- for rule, wrapper_chain in self.rule_builders:
2541
-
2542
- user_callback_name = (
2543
- rule.alias or rule.options.template_source or rule.origin.name
2544
- )
2545
- try:
2546
- f = getattr(transformer, user_callback_name)
2547
- wrapper = getattr(f, "visit_wrapper", None)
2548
- if wrapper is not None:
2549
- f = apply_visit_wrapper(f, user_callback_name, wrapper)
2550
- elif isinstance(transformer, Transformer_InPlace):
2551
- f = inplace_transformer(f)
2552
- except AttributeError:
2553
- f = partial(default_callback, user_callback_name)
2554
-
2555
- for w in wrapper_chain:
2556
- f = w(f)
2557
-
2558
- if rule in callbacks:
2559
- raise GrammarError("Rule '%s' already exists" % (rule,))
2560
-
2561
- callbacks[rule] = f
2562
-
2563
- return callbacks
2564
-
2565
-
2566
-
2567
-
2568
-
2569
- class Action:
2570
- def __init__(self, name):
2571
- self.name = name
2572
-
2573
- def __str__(self):
2574
- return self.name
2575
-
2576
- def __repr__(self):
2577
- return str(self)
2578
-
2579
-
2580
- Shift = Action("Shift")
2581
- Reduce = Action("Reduce")
2582
-
2583
- StateT = TypeVar("StateT")
2584
-
2585
-
2586
- class ParseTableBase(Generic[StateT]):
2587
- states: Dict[StateT, Dict[str, Tuple]]
2588
- start_states: Dict[str, StateT]
2589
- end_states: Dict[str, StateT]
2590
-
2591
- def __init__(self, states, start_states, end_states):
2592
- self.states = states
2593
- self.start_states = start_states
2594
- self.end_states = end_states
2595
-
2596
- def serialize(self, memo):
2597
- tokens = Enumerator()
2598
-
2599
- states = {
2600
- state: {
2601
- tokens.get(token): (
2602
- (1, arg.serialize(memo)) if action is Reduce else (0, arg)
2603
- )
2604
- for token, (action, arg) in actions.items()
2605
- }
2606
- for state, actions in self.states.items()
2607
- }
2608
-
2609
- return {
2610
- "tokens": tokens.reversed(),
2611
- "states": states,
2612
- "start_states": self.start_states,
2613
- "end_states": self.end_states,
2614
- }
2615
-
2616
- @classmethod
2617
- def deserialize(cls, data, memo):
2618
- tokens = data["tokens"]
2619
- states = {
2620
- state: {
2621
- tokens[token]: (
2622
- (Reduce, Rule.deserialize(arg, memo))
2623
- if action == 1
2624
- else (Shift, arg)
2625
- )
2626
- for token, (action, arg) in actions.items()
2627
- }
2628
- for state, actions in data["states"].items()
2629
- }
2630
- return cls(states, data["start_states"], data["end_states"])
2631
-
2632
-
2633
- class ParseTable(ParseTableBase["State"]):
2634
- #--
2635
-
2636
- pass
2637
-
2638
-
2639
- class IntParseTable(ParseTableBase[int]):
2640
- #--
2641
-
2642
- @classmethod
2643
- def from_ParseTable(cls, parse_table: ParseTable):
2644
- enum = list(parse_table.states)
2645
- state_to_idx: Dict["State", int] = {s: i for i, s in enumerate(enum)}
2646
- int_states = {}
2647
-
2648
- for s, la in parse_table.states.items():
2649
- la = {
2650
- k: (v[0], state_to_idx[v[1]]) if v[0] is Shift else v
2651
- for k, v in la.items()
2652
- }
2653
- int_states[state_to_idx[s]] = la
2654
-
2655
- start_states = {
2656
- start: state_to_idx[s] for start, s in parse_table.start_states.items()
2657
- }
2658
- end_states = {
2659
- start: state_to_idx[s] for start, s in parse_table.end_states.items()
2660
- }
2661
- return cls(int_states, start_states, end_states)
2662
-
2663
-
2664
-
2665
-
2666
-
2667
- class ParseConf(Generic[StateT]):
2668
- __slots__ = (
2669
- "parse_table",
2670
- "callbacks",
2671
- "start",
2672
- "start_state",
2673
- "end_state",
2674
- "states",
2675
- )
2676
-
2677
- parse_table: ParseTableBase[StateT]
2678
- callbacks: ParserCallbacks
2679
- start: str
2680
-
2681
- start_state: StateT
2682
- end_state: StateT
2683
- states: Dict[StateT, Dict[str, tuple]]
2684
-
2685
- def __init__(
2686
- self,
2687
- parse_table: ParseTableBase[StateT],
2688
- callbacks: ParserCallbacks,
2689
- start: str,
2690
- ):
2691
- self.parse_table = parse_table
2692
-
2693
- self.start_state = self.parse_table.start_states[start]
2694
- self.end_state = self.parse_table.end_states[start]
2695
- self.states = self.parse_table.states
2696
-
2697
- self.callbacks = callbacks
2698
- self.start = start
2699
-
2700
-
2701
- class ParserState(Generic[StateT]):
2702
- __slots__ = "parse_conf", "lexer", "state_stack", "value_stack"
2703
-
2704
- parse_conf: ParseConf[StateT]
2705
- lexer: LexerThread
2706
- state_stack: List[StateT]
2707
- value_stack: list
2708
-
2709
- def __init__(
2710
- self,
2711
- parse_conf: ParseConf[StateT],
2712
- lexer: LexerThread,
2713
- state_stack=None,
2714
- value_stack=None,
2715
- ):
2716
- self.parse_conf = parse_conf
2717
- self.lexer = lexer
2718
- self.state_stack = state_stack or [self.parse_conf.start_state]
2719
- self.value_stack = value_stack or []
2720
-
2721
- @property
2722
- def position(self) -> StateT:
2723
- return self.state_stack[-1]
2724
-
2725
- ##
2726
-
2727
- def __eq__(self, other) -> bool:
2728
- if not isinstance(other, ParserState):
2729
- return NotImplemented
2730
- return (
2731
- len(self.state_stack) == len(other.state_stack)
2732
- and self.position == other.position
2733
- )
2734
-
2735
- def __copy__(self):
2736
- return type(self)(
2737
- self.parse_conf,
2738
- self.lexer, ##
2739
-
2740
- copy(self.state_stack),
2741
- deepcopy(self.value_stack),
2742
- )
2743
-
2744
- def copy(self) -> "ParserState[StateT]":
2745
- return copy(self)
2746
-
2747
- def feed_token(self, token: Token, is_end=False) -> Any:
2748
- state_stack = self.state_stack
2749
- value_stack = self.value_stack
2750
- states = self.parse_conf.states
2751
- end_state = self.parse_conf.end_state
2752
- callbacks = self.parse_conf.callbacks
2753
-
2754
- while True:
2755
- state = state_stack[-1]
2756
- try:
2757
- action, arg = states[state][token.type]
2758
- except KeyError:
2759
- expected = {s for s in states[state].keys() if s.isupper()}
2760
- raise UnexpectedToken(
2761
- token, expected, state=self, interactive_parser=None
2762
- )
2763
-
2764
- assert arg != end_state
2765
-
2766
- if action is Shift:
2767
- ##
2768
-
2769
- assert not is_end
2770
- state_stack.append(arg)
2771
- value_stack.append(
2772
- token
2773
- if token.type not in callbacks
2774
- else callbacks[token.type](token)
2775
- )
2776
- return
2777
- else:
2778
- ##
2779
-
2780
- rule = arg
2781
- size = len(rule.expansion)
2782
- if size:
2783
- s = value_stack[-size:]
2784
- del state_stack[-size:]
2785
- del value_stack[-size:]
2786
- else:
2787
- s = []
2788
-
2789
- value = callbacks[rule](s) if callbacks else s
2790
-
2791
- _action, new_state = states[state_stack[-1]][rule.origin.name]
2792
- assert _action is Shift
2793
- state_stack.append(new_state)
2794
- value_stack.append(value)
2795
-
2796
- if is_end and state_stack[-1] == end_state:
2797
- return value_stack[-1]
2798
-
2799
-
2800
-
2801
-
2802
-
2803
- class LALR_Parser(Serialize):
2804
- def __init__(
2805
- self, parser_conf: ParserConf, debug: bool = False, strict: bool = False
2806
- ):
2807
- analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
2808
- analysis.compute_lalr()
2809
- callbacks = parser_conf.callbacks
2810
-
2811
- self._parse_table = analysis.parse_table
2812
- self.parser_conf = parser_conf
2813
- self.parser = _Parser(analysis.parse_table, callbacks, debug)
2814
-
2815
- @classmethod
2816
- def deserialize(cls, data, memo, callbacks, debug=False):
2817
- inst = cls.__new__(cls)
2818
- inst._parse_table = IntParseTable.deserialize(data, memo)
2819
- inst.parser = _Parser(inst._parse_table, callbacks, debug)
2820
- return inst
2821
-
2822
- def serialize(self, memo: Any = None) -> Dict[str, Any]:
2823
- return self._parse_table.serialize(memo)
2824
-
2825
- def parse_interactive(self, lexer: LexerThread, start: str):
2826
- return self.parser.parse(lexer, start, start_interactive=True)
2827
-
2828
- def parse(self, lexer, start, on_error=None):
2829
- try:
2830
- return self.parser.parse(lexer, start)
2831
- except UnexpectedInput as e:
2832
- if on_error is None:
2833
- raise
2834
-
2835
- while True:
2836
- if isinstance(e, UnexpectedCharacters):
2837
- s = e.interactive_parser.lexer_thread.state
2838
- p = s.line_ctr.char_pos
2839
-
2840
- if not on_error(e):
2841
- raise e
2842
-
2843
- if isinstance(e, UnexpectedCharacters):
2844
- ##
2845
-
2846
- if p == s.line_ctr.char_pos:
2847
- s.line_ctr.feed(s.text[p : p + 1])
2848
-
2849
- try:
2850
- return e.interactive_parser.resume_parse()
2851
- except UnexpectedToken as e2:
2852
- if (
2853
- isinstance(e, UnexpectedToken)
2854
- and e.token.type == e2.token.type == "$END"
2855
- and e.interactive_parser == e2.interactive_parser
2856
- ):
2857
- ##
2858
-
2859
- raise e2
2860
- e = e2
2861
- except UnexpectedCharacters as e2:
2862
- e = e2
2863
-
2864
-
2865
- class _Parser:
2866
- parse_table: ParseTableBase
2867
- callbacks: ParserCallbacks
2868
- debug: bool
2869
-
2870
- def __init__(
2871
- self,
2872
- parse_table: ParseTableBase,
2873
- callbacks: ParserCallbacks,
2874
- debug: bool = False,
2875
- ):
2876
- self.parse_table = parse_table
2877
- self.callbacks = callbacks
2878
- self.debug = debug
2879
-
2880
- def parse(
2881
- self,
2882
- lexer: LexerThread,
2883
- start: str,
2884
- value_stack=None,
2885
- state_stack=None,
2886
- start_interactive=False,
2887
- ):
2888
- parse_conf = ParseConf(self.parse_table, self.callbacks, start)
2889
- parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
2890
- if start_interactive:
2891
- return InteractiveParser(self, parser_state, parser_state.lexer)
2892
- return self.parse_from_state(parser_state)
2893
-
2894
- def parse_from_state(self, state: ParserState, last_token: Optional[Token] = None):
2895
- #--
2896
- try:
2897
- token = last_token
2898
- for token in state.lexer.lex(state):
2899
- assert token is not None
2900
- state.feed_token(token)
2901
-
2902
- end_token = (
2903
- Token.new_borrow_pos("$END", "", token)
2904
- if token
2905
- else Token("$END", "", 0, 1, 1)
2906
- )
2907
- return state.feed_token(end_token, True)
2908
- except UnexpectedInput as e:
2909
- try:
2910
- e.interactive_parser = InteractiveParser(self, state, state.lexer)
2911
- except NameError:
2912
- pass
2913
- raise e
2914
- except Exception as e:
2915
- if self.debug:
2916
- print("")
2917
- print("STATE STACK DUMP")
2918
- print("----------------")
2919
- for i, s in enumerate(state.state_stack):
2920
- print("%d)" % i, s)
2921
- print("")
2922
-
2923
- raise
2924
-
2925
-
2926
-
2927
-
2928
-
2929
- class InteractiveParser:
2930
- #--
2931
-
2932
- def __init__(self, parser, parser_state, lexer_thread: LexerThread):
2933
- self.parser = parser
2934
- self.parser_state = parser_state
2935
- self.lexer_thread = lexer_thread
2936
- self.result = None
2937
-
2938
- @property
2939
- def lexer_state(self) -> LexerThread:
2940
- warnings.warn(
2941
- "lexer_state will be removed in subsequent releases. Use lexer_thread instead.",
2942
- DeprecationWarning,
2943
- )
2944
- return self.lexer_thread
2945
-
2946
- def feed_token(self, token: Token):
2947
- #--
2948
- return self.parser_state.feed_token(token, token.type == "$END")
2949
-
2950
- def iter_parse(self) -> Iterator[Token]:
2951
- #--
2952
- for token in self.lexer_thread.lex(self.parser_state):
2953
- yield token
2954
- self.result = self.feed_token(token)
2955
-
2956
- def exhaust_lexer(self) -> List[Token]:
2957
- #--
2958
- return list(self.iter_parse())
2959
-
2960
- def feed_eof(self, last_token=None):
2961
- #--
2962
- eof = (
2963
- Token.new_borrow_pos("$END", "", last_token)
2964
- if last_token is not None
2965
- else self.lexer_thread._Token("$END", "", 0, 1, 1)
2966
- )
2967
- return self.feed_token(eof)
2968
-
2969
- def __copy__(self):
2970
- #--
2971
- return type(self)(
2972
- self.parser,
2973
- copy(self.parser_state),
2974
- copy(self.lexer_thread),
2975
- )
2976
-
2977
- def copy(self):
2978
- return copy(self)
2979
-
2980
- def __eq__(self, other):
2981
- if not isinstance(other, InteractiveParser):
2982
- return False
2983
-
2984
- return (
2985
- self.parser_state == other.parser_state
2986
- and self.lexer_thread == other.lexer_thread
2987
- )
2988
-
2989
- def as_immutable(self):
2990
- #--
2991
- p = copy(self)
2992
- return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
2993
-
2994
- def pretty(self):
2995
- #--
2996
- out = ["Parser choices:"]
2997
- for k, v in self.choices().items():
2998
- out.append("\t- %s -> %r" % (k, v))
2999
- out.append("stack size: %s" % len(self.parser_state.state_stack))
3000
- return "\n".join(out)
3001
-
3002
- def choices(self):
3003
- #--
3004
- return self.parser_state.parse_conf.parse_table.states[
3005
- self.parser_state.position
3006
- ]
3007
-
3008
- def accepts(self):
3009
- #--
3010
- accepts = set()
3011
- conf_no_callbacks = copy(self.parser_state.parse_conf)
3012
- ##
3013
-
3014
- ##
3015
-
3016
- conf_no_callbacks.callbacks = {}
3017
- for t in self.choices():
3018
- if t.isupper(): ##
3019
-
3020
- new_cursor = copy(self)
3021
- new_cursor.parser_state.parse_conf = conf_no_callbacks
3022
- try:
3023
- new_cursor.feed_token(self.lexer_thread._Token(t, ""))
3024
- except UnexpectedToken:
3025
- pass
3026
- else:
3027
- accepts.add(t)
3028
- return accepts
3029
-
3030
- def resume_parse(self):
3031
- #--
3032
- return self.parser.parse_from_state(
3033
- self.parser_state, last_token=self.lexer_thread.state.last_token
3034
- )
3035
-
3036
-
3037
- class ImmutableInteractiveParser(InteractiveParser):
3038
- #--
3039
-
3040
- result = None
3041
-
3042
- def __hash__(self):
3043
- return hash((self.parser_state, self.lexer_thread))
3044
-
3045
- def feed_token(self, token):
3046
- c = copy(self)
3047
- c.result = InteractiveParser.feed_token(c, token)
3048
- return c
3049
-
3050
- def exhaust_lexer(self):
3051
- #--
3052
- cursor = self.as_mutable()
3053
- cursor.exhaust_lexer()
3054
- return cursor.as_immutable()
3055
-
3056
- def as_mutable(self):
3057
- #--
3058
- p = copy(self)
3059
- return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
3060
-
3061
-
3062
-
3063
-
3064
-
3065
- def _wrap_lexer(lexer_class):
3066
- future_interface = getattr(lexer_class, "__future_interface__", False)
3067
- if future_interface:
3068
- return lexer_class
3069
- else:
3070
-
3071
- class CustomLexerWrapper(Lexer):
3072
- def __init__(self, lexer_conf):
3073
- self.lexer = lexer_class(lexer_conf)
3074
-
3075
- def lex(self, lexer_state, parser_state):
3076
- return self.lexer.lex(lexer_state.text)
3077
-
3078
- return CustomLexerWrapper
3079
-
3080
-
3081
- def _deserialize_parsing_frontend(data, memo, lexer_conf, callbacks, options):
3082
- parser_conf = ParserConf.deserialize(data["parser_conf"], memo)
3083
- cls = (options and options._plugins.get("LALR_Parser")) or LALR_Parser
3084
- parser = cls.deserialize(data["parser"], memo, callbacks, options.debug)
3085
- parser_conf.callbacks = callbacks
3086
- return ParsingFrontend(lexer_conf, parser_conf, options, parser=parser)
3087
-
3088
-
3089
- _parser_creators: "Dict[str, Callable[[LexerConf, Any, Any], Any]]" = {}
3090
-
3091
-
3092
- class ParsingFrontend(Serialize):
3093
- __serialize_fields__ = "lexer_conf", "parser_conf", "parser"
3094
-
3095
- lexer_conf: LexerConf
3096
- parser_conf: ParserConf
3097
- options: Any
3098
-
3099
- def __init__(
3100
- self, lexer_conf: LexerConf, parser_conf: ParserConf, options, parser=None
3101
- ):
3102
- self.parser_conf = parser_conf
3103
- self.lexer_conf = lexer_conf
3104
- self.options = options
3105
-
3106
- ##
3107
-
3108
- if parser: ##
3109
-
3110
- self.parser = parser
3111
- else:
3112
- create_parser = _parser_creators.get(parser_conf.parser_type)
3113
- assert (
3114
- create_parser is not None
3115
- ), "{} is not supported in standalone mode".format(parser_conf.parser_type)
3116
- self.parser = create_parser(lexer_conf, parser_conf, options)
3117
-
3118
- ##
3119
-
3120
- lexer_type = lexer_conf.lexer_type
3121
- self.skip_lexer = False
3122
- if lexer_type in ("dynamic", "dynamic_complete"):
3123
- assert lexer_conf.postlex is None
3124
- self.skip_lexer = True
3125
- return
3126
-
3127
- if isinstance(lexer_type, type):
3128
- assert issubclass(lexer_type, Lexer)
3129
- self.lexer = _wrap_lexer(lexer_type)(lexer_conf)
3130
- elif isinstance(lexer_type, str):
3131
- create_lexer = {
3132
- "basic": create_basic_lexer,
3133
- "contextual": create_contextual_lexer,
3134
- }[lexer_type]
3135
- self.lexer = create_lexer(
3136
- lexer_conf, self.parser, lexer_conf.postlex, options
3137
- )
3138
- else:
3139
- raise TypeError("Bad value for lexer_type: {lexer_type}")
3140
-
3141
- if lexer_conf.postlex:
3142
- self.lexer = PostLexConnector(self.lexer, lexer_conf.postlex)
3143
-
3144
- def _verify_start(self, start=None):
3145
- if start is None:
3146
- start_decls = self.parser_conf.start
3147
- if len(start_decls) > 1:
3148
- raise ConfigurationError(
3149
- "Lark initialized with more than 1 possible start rule. Must specify which start rule to parse",
3150
- start_decls,
3151
- )
3152
- (start,) = start_decls
3153
- elif start not in self.parser_conf.start:
3154
- raise ConfigurationError(
3155
- "Unknown start rule %s. Must be one of %r"
3156
- % (start, self.parser_conf.start)
3157
- )
3158
- return start
3159
-
3160
- def _make_lexer_thread(self, text: str) -> Union[str, LexerThread]:
3161
- cls = (self.options and self.options._plugins.get("LexerThread")) or LexerThread
3162
- return text if self.skip_lexer else cls.from_text(self.lexer, text)
3163
-
3164
- def parse(self, text: str, start=None, on_error=None):
3165
- chosen_start = self._verify_start(start)
3166
- kw = {} if on_error is None else {"on_error": on_error}
3167
- stream = self._make_lexer_thread(text)
3168
- return self.parser.parse(stream, chosen_start, **kw)
3169
-
3170
- def parse_interactive(self, text: Optional[str] = None, start=None):
3171
- ##
3172
-
3173
- ##
3174
-
3175
- chosen_start = self._verify_start(start)
3176
- if self.parser_conf.parser_type != "lalr":
3177
- raise ConfigurationError(
3178
- "parse_interactive() currently only works with parser='lalr' "
3179
- )
3180
- stream = self._make_lexer_thread(text) ##
3181
-
3182
- return self.parser.parse_interactive(stream, chosen_start)
3183
-
3184
-
3185
- def _validate_frontend_args(parser, lexer) -> None:
3186
- assert_config(parser, ("lalr", "earley", "cyk"))
3187
- if not isinstance(lexer, type): ##
3188
-
3189
- expected = {
3190
- "lalr": ("basic", "contextual"),
3191
- "earley": ("basic", "dynamic", "dynamic_complete"),
3192
- "cyk": ("basic",),
3193
- }[parser]
3194
- assert_config(
3195
- lexer,
3196
- expected,
3197
- "Parser %r does not support lexer %%r, expected one of %%s" % parser,
3198
- )
3199
-
3200
-
3201
- def _get_lexer_callbacks(transformer, terminals):
3202
- result = {}
3203
- for terminal in terminals:
3204
- callback = getattr(transformer, terminal.name, None)
3205
- if callback is not None:
3206
- result[terminal.name] = callback
3207
- return result
3208
-
3209
-
3210
- class PostLexConnector:
3211
- def __init__(self, lexer, postlexer):
3212
- self.lexer = lexer
3213
- self.postlexer = postlexer
3214
-
3215
- def lex(self, lexer_state, parser_state):
3216
- i = self.lexer.lex(lexer_state, parser_state)
3217
- return self.postlexer.process(i)
3218
-
3219
-
3220
- def create_basic_lexer(lexer_conf, parser, postlex, options) -> BasicLexer:
3221
- cls = (options and options._plugins.get("BasicLexer")) or BasicLexer
3222
- return cls(lexer_conf)
3223
-
3224
-
3225
- def create_contextual_lexer(
3226
- lexer_conf: LexerConf, parser, postlex, options
3227
- ) -> ContextualLexer:
3228
- cls = (options and options._plugins.get("ContextualLexer")) or ContextualLexer
3229
- parse_table: ParseTableBase[int] = parser._parse_table
3230
- states: Dict[int, Collection[str]] = {
3231
- idx: list(t.keys()) for idx, t in parse_table.states.items()
3232
- }
3233
- always_accept: Collection[str] = postlex.always_accept if postlex else ()
3234
- return cls(lexer_conf, states, always_accept=always_accept)
3235
-
3236
-
3237
- def create_lalr_parser(
3238
- lexer_conf: LexerConf, parser_conf: ParserConf, options=None
3239
- ) -> LALR_Parser:
3240
- debug = options.debug if options else False
3241
- strict = options.strict if options else False
3242
- cls = (options and options._plugins.get("LALR_Parser")) or LALR_Parser
3243
- return cls(parser_conf, debug=debug, strict=strict)
3244
-
3245
-
3246
- _parser_creators["lalr"] = create_lalr_parser
3247
-
3248
-
3249
-
3250
-
3251
- class PostLex(ABC):
3252
- @abstractmethod
3253
- def process(self, stream: Iterator[Token]) -> Iterator[Token]:
3254
- return stream
3255
-
3256
- always_accept: Iterable[str] = ()
3257
-
3258
-
3259
- class LarkOptions(Serialize):
3260
- #--
3261
-
3262
- start: List[str]
3263
- debug: bool
3264
- strict: bool
3265
- transformer: "Optional[Transformer]"
3266
- propagate_positions: Union[bool, str]
3267
- maybe_placeholders: bool
3268
- cache: Union[bool, str]
3269
- regex: bool
3270
- g_regex_flags: int
3271
- keep_all_tokens: bool
3272
- tree_class: Optional[Callable[[str, List], Any]]
3273
- parser: _ParserArgType
3274
- lexer: _LexerArgType
3275
- ambiguity: 'Literal["auto", "resolve", "explicit", "forest"]'
3276
- postlex: Optional[PostLex]
3277
- priority: 'Optional[Literal["auto", "normal", "invert"]]'
3278
- lexer_callbacks: Dict[str, Callable[[Token], Token]]
3279
- use_bytes: bool
3280
- ordered_sets: bool
3281
- edit_terminals: Optional[Callable[[TerminalDef], TerminalDef]]
3282
- import_paths: "List[Union[str, Callable[[Union[None, str, PackageResource], str], Tuple[str, str]]]]"
3283
- source_path: Optional[str]
3284
-
3285
- OPTIONS_DOC = r"""
3286
- **=== General Options ===**
3287
-
3288
- start
3289
- The start symbol. Either a string, or a list of strings for multiple possible starts (Default: "start")
3290
- debug
3291
- Display debug information and extra warnings. Use only when debugging (Default: ``False``)
3292
- When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
3293
- strict
3294
- Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
3295
- transformer
3296
- Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
3297
- propagate_positions
3298
- Propagates positional attributes into the 'meta' attribute of all tree branches.
3299
- Sets attributes: (line, column, end_line, end_column, start_pos, end_pos,
3300
- container_line, container_column, container_end_line, container_end_column)
3301
- Accepts ``False``, ``True``, or a callable, which will filter which nodes to ignore when propagating.
3302
- maybe_placeholders
3303
- When ``True``, the ``[]`` operator returns ``None`` when not matched.
3304
- When ``False``, ``[]`` behaves like the ``?`` operator, and returns no value at all.
3305
- (default= ``True``)
3306
- cache
3307
- Cache the results of the Lark grammar analysis, for x2 to x3 faster loading. LALR only for now.
3308
-
3309
- - When ``False``, does nothing (default)
3310
- - When ``True``, caches to a temporary file in the local directory
3311
- - When given a string, caches to the path pointed by the string
3312
- regex
3313
- When True, uses the ``regex`` module instead of the stdlib ``re``.
3314
- g_regex_flags
3315
- Flags that are applied to all terminals (both regex and strings)
3316
- keep_all_tokens
3317
- Prevent the tree builder from automagically removing "punctuation" tokens (Default: ``False``)
3318
- tree_class
3319
- Lark will produce trees comprised of instances of this class instead of the default ``lark.Tree``.
3320
-
3321
- **=== Algorithm Options ===**
3322
-
3323
- parser
3324
- Decides which parser engine to use. Accepts "earley" or "lalr". (Default: "earley").
3325
- (there is also a "cyk" option for legacy)
3326
- lexer
3327
- Decides whether or not to use a lexer stage
3328
-
3329
- - "auto" (default): Choose for me based on the parser
3330
- - "basic": Use a basic lexer
3331
- - "contextual": Stronger lexer (only works with parser="lalr")
3332
- - "dynamic": Flexible and powerful (only with parser="earley")
3333
- - "dynamic_complete": Same as dynamic, but tries *every* variation of tokenizing possible.
3334
- ambiguity
3335
- Decides how to handle ambiguity in the parse. Only relevant if parser="earley"
3336
-
3337
- - "resolve": The parser will automatically choose the simplest derivation
3338
- (it chooses consistently: greedy for tokens, non-greedy for rules)
3339
- - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
3340
- - "forest": The parser will return the root of the shared packed parse forest.
3341
-
3342
- **=== Misc. / Domain Specific Options ===**
3343
-
3344
- postlex
3345
- Lexer post-processing (Default: ``None``) Only works with the basic and contextual lexers.
3346
- priority
3347
- How priorities should be evaluated - "auto", ``None``, "normal", "invert" (Default: "auto")
3348
- lexer_callbacks
3349
- Dictionary of callbacks for the lexer. May alter tokens during lexing. Use with caution.
3350
- use_bytes
3351
- Accept an input of type ``bytes`` instead of ``str``.
3352
- ordered_sets
3353
- Should Earley use ordered-sets to achieve stable output (~10% slower than regular sets. Default: True)
3354
- edit_terminals
3355
- A callback for editing the terminals before parse.
3356
- import_paths
3357
- A List of either paths or loader functions to specify from where grammars are imported
3358
- source_path
3359
- Override the source of from where the grammar was loaded. Useful for relative imports and unconventional grammar loading
3360
- **=== End of Options ===**
3361
- """
3362
- if __doc__:
3363
- __doc__ += OPTIONS_DOC
3364
-
3365
- ##
3366
-
3367
- ##
3368
-
3369
- ##
3370
-
3371
- ##
3372
-
3373
- ##
3374
-
3375
- ##
3376
-
3377
- _defaults: Dict[str, Any] = {
3378
- "debug": False,
3379
- "strict": False,
3380
- "keep_all_tokens": False,
3381
- "tree_class": None,
3382
- "cache": False,
3383
- "postlex": None,
3384
- "parser": "earley",
3385
- "lexer": "auto",
3386
- "transformer": None,
3387
- "start": "start",
3388
- "priority": "auto",
3389
- "ambiguity": "auto",
3390
- "regex": False,
3391
- "propagate_positions": False,
3392
- "lexer_callbacks": {},
3393
- "maybe_placeholders": True,
3394
- "edit_terminals": None,
3395
- "g_regex_flags": 0,
3396
- "use_bytes": False,
3397
- "ordered_sets": True,
3398
- "import_paths": [],
3399
- "source_path": None,
3400
- "_plugins": {},
3401
- }
3402
-
3403
- def __init__(self, options_dict: Dict[str, Any]) -> None:
3404
- o = dict(options_dict)
3405
-
3406
- options = {}
3407
- for name, default in self._defaults.items():
3408
- if name in o:
3409
- value = o.pop(name)
3410
- if isinstance(default, bool) and name not in (
3411
- "cache",
3412
- "use_bytes",
3413
- "propagate_positions",
3414
- ):
3415
- value = bool(value)
3416
- else:
3417
- value = default
3418
-
3419
- options[name] = value
3420
-
3421
- if isinstance(options["start"], str):
3422
- options["start"] = [options["start"]]
3423
-
3424
- self.__dict__["options"] = options
3425
-
3426
- assert_config(self.parser, ("earley", "lalr", "cyk", None))
3427
-
3428
- if self.parser == "earley" and self.transformer:
3429
- raise ConfigurationError(
3430
- "Cannot specify an embedded transformer when using the Earley algorithm. "
3431
- "Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)"
3432
- )
3433
-
3434
- if o:
3435
- raise ConfigurationError("Unknown options: %s" % o.keys())
3436
-
3437
- def __getattr__(self, name: str) -> Any:
3438
- try:
3439
- return self.__dict__["options"][name]
3440
- except KeyError as e:
3441
- raise AttributeError(e)
3442
-
3443
- def __setattr__(self, name: str, value: str) -> None:
3444
- assert_config(
3445
- name, self.options.keys(), "%r isn't a valid option. Expected one of: %s"
3446
- )
3447
- self.options[name] = value
3448
-
3449
- def serialize(self, memo=None) -> Dict[str, Any]:
3450
- return self.options
3451
-
3452
- @classmethod
3453
- def deserialize(
3454
- cls, data: Dict[str, Any], memo: Dict[int, Union[TerminalDef, Rule]]
3455
- ) -> "LarkOptions":
3456
- return cls(data)
3457
-
3458
-
3459
- ##
3460
-
3461
- ##
3462
-
3463
- _LOAD_ALLOWED_OPTIONS = {
3464
- "postlex",
3465
- "transformer",
3466
- "lexer_callbacks",
3467
- "use_bytes",
3468
- "debug",
3469
- "g_regex_flags",
3470
- "regex",
3471
- "propagate_positions",
3472
- "tree_class",
3473
- "_plugins",
3474
- }
3475
-
3476
- _VALID_PRIORITY_OPTIONS = ("auto", "normal", "invert", None)
3477
- _VALID_AMBIGUITY_OPTIONS = ("auto", "resolve", "explicit", "forest")
3478
-
3479
-
3480
- _T = TypeVar("_T", bound="Lark")
3481
-
3482
-
3483
- class Lark(Serialize):
3484
- #--
3485
-
3486
- source_path: str
3487
- source_grammar: str
3488
- grammar: "Grammar"
3489
- options: LarkOptions
3490
- lexer: Lexer
3491
- parser: "ParsingFrontend"
3492
- terminals: Collection[TerminalDef]
3493
-
3494
- def __init__(self, grammar: "Union[Grammar, str, IO[str]]", **options) -> None:
3495
- self.options = LarkOptions(options)
3496
- re_module: types.ModuleType
3497
-
3498
- ##
3499
-
3500
- use_regex = self.options.regex
3501
- if use_regex:
3502
- if _has_regex:
3503
- re_module = regex
3504
- else:
3505
- raise ImportError(
3506
- "`regex` module must be installed if calling `Lark(regex=True)`."
3507
- )
3508
- else:
3509
- re_module = re
3510
-
3511
- ##
3512
-
3513
- if self.options.source_path is None:
3514
- try:
3515
- self.source_path = grammar.name ##
3516
-
3517
- except AttributeError:
3518
- self.source_path = "<string>"
3519
- else:
3520
- self.source_path = self.options.source_path
3521
-
3522
- ##
3523
-
3524
- try:
3525
- read = grammar.read ##
3526
-
3527
- except AttributeError:
3528
- pass
3529
- else:
3530
- grammar = read()
3531
-
3532
- cache_fn = None
3533
- cache_sha256 = None
3534
- if isinstance(grammar, str):
3535
- self.source_grammar = grammar
3536
- if self.options.use_bytes:
3537
- if not isascii(grammar):
3538
- raise ConfigurationError(
3539
- "Grammar must be ascii only, when use_bytes=True"
3540
- )
3541
-
3542
- if self.options.cache:
3543
- if self.options.parser != "lalr":
3544
- raise ConfigurationError(
3545
- "cache only works with parser='lalr' for now"
3546
- )
3547
-
3548
- unhashable = (
3549
- "transformer",
3550
- "postlex",
3551
- "lexer_callbacks",
3552
- "edit_terminals",
3553
- "_plugins",
3554
- )
3555
- options_str = "".join(
3556
- k + str(v) for k, v in options.items() if k not in unhashable
3557
- )
3558
- from . import __version__
3559
-
3560
- s = grammar + options_str + __version__ + str(sys.version_info[:2])
3561
- cache_sha256 = sha256_digest(s)
3562
-
3563
- if isinstance(self.options.cache, str):
3564
- cache_fn = self.options.cache
3565
- else:
3566
- if self.options.cache is not True:
3567
- raise ConfigurationError("cache argument must be bool or str")
3568
-
3569
- try:
3570
- username = getpass.getuser()
3571
- except Exception:
3572
- ##
3573
-
3574
- ##
3575
-
3576
- ##
3577
-
3578
- username = "unknown"
3579
-
3580
- cache_fn = (
3581
- tempfile.gettempdir()
3582
- + "/.lark_cache_%s_%s_%s_%s.tmp"
3583
- % (username, cache_sha256, *sys.version_info[:2])
3584
- )
3585
-
3586
- old_options = self.options
3587
- try:
3588
- with FS.open(cache_fn, "rb") as f:
3589
- logger.debug("Loading grammar from cache: %s", cache_fn)
3590
- ##
3591
-
3592
- for name in set(options) - _LOAD_ALLOWED_OPTIONS:
3593
- del options[name]
3594
- file_sha256 = f.readline().rstrip(b"\n")
3595
- cached_used_files = pickle.load(f)
3596
- if file_sha256 == cache_sha256.encode(
3597
- "utf8"
3598
- ) and verify_used_files(cached_used_files):
3599
- cached_parser_data = pickle.load(f)
3600
- self._load(cached_parser_data, **options)
3601
- return
3602
- except FileNotFoundError:
3603
- ##
3604
-
3605
- pass
3606
- except (
3607
- Exception
3608
- ): ##
3609
-
3610
- logger.exception(
3611
- "Failed to load Lark from cache: %r. We will try to carry on.",
3612
- cache_fn,
3613
- )
3614
-
3615
- ##
3616
-
3617
- ##
3618
-
3619
- self.options = old_options
3620
-
3621
- ##
3622
-
3623
- self.grammar, used_files = load_grammar(
3624
- grammar,
3625
- self.source_path,
3626
- self.options.import_paths,
3627
- self.options.keep_all_tokens,
3628
- )
3629
- else:
3630
- assert isinstance(grammar, Grammar)
3631
- self.grammar = grammar
3632
-
3633
- if self.options.lexer == "auto":
3634
- if self.options.parser == "lalr":
3635
- self.options.lexer = "contextual"
3636
- elif self.options.parser == "earley":
3637
- if self.options.postlex is not None:
3638
- logger.info(
3639
- "postlex can't be used with the dynamic lexer, so we use 'basic' instead. "
3640
- "Consider using lalr with contextual instead of earley"
3641
- )
3642
- self.options.lexer = "basic"
3643
- else:
3644
- self.options.lexer = "dynamic"
3645
- elif self.options.parser == "cyk":
3646
- self.options.lexer = "basic"
3647
- else:
3648
- assert False, self.options.parser
3649
- lexer = self.options.lexer
3650
- if isinstance(lexer, type):
3651
- assert issubclass(
3652
- lexer, Lexer
3653
- ) ##
3654
-
3655
- else:
3656
- assert_config(lexer, ("basic", "contextual", "dynamic", "dynamic_complete"))
3657
- if self.options.postlex is not None and "dynamic" in lexer:
3658
- raise ConfigurationError(
3659
- "Can't use postlex with a dynamic lexer. Use basic or contextual instead"
3660
- )
3661
-
3662
- if self.options.ambiguity == "auto":
3663
- if self.options.parser == "earley":
3664
- self.options.ambiguity = "resolve"
3665
- else:
3666
- assert_config(
3667
- self.options.parser,
3668
- ("earley", "cyk"),
3669
- "%r doesn't support disambiguation. Use one of these parsers instead: %s",
3670
- )
3671
-
3672
- if self.options.priority == "auto":
3673
- self.options.priority = "normal"
3674
-
3675
- if self.options.priority not in _VALID_PRIORITY_OPTIONS:
3676
- raise ConfigurationError(
3677
- "invalid priority option: %r. Must be one of %r"
3678
- % (self.options.priority, _VALID_PRIORITY_OPTIONS)
3679
- )
3680
- if self.options.ambiguity not in _VALID_AMBIGUITY_OPTIONS:
3681
- raise ConfigurationError(
3682
- "invalid ambiguity option: %r. Must be one of %r"
3683
- % (self.options.ambiguity, _VALID_AMBIGUITY_OPTIONS)
3684
- )
3685
-
3686
- if self.options.parser is None:
3687
- terminals_to_keep = "*"
3688
- elif self.options.postlex is not None:
3689
- terminals_to_keep = set(self.options.postlex.always_accept)
3690
- else:
3691
- terminals_to_keep = set()
3692
-
3693
- ##
3694
-
3695
- self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(
3696
- self.options.start, terminals_to_keep
3697
- )
3698
-
3699
- if self.options.edit_terminals:
3700
- for t in self.terminals:
3701
- self.options.edit_terminals(t)
3702
-
3703
- self._terminals_dict = {t.name: t for t in self.terminals}
3704
-
3705
- ##
3706
-
3707
- if self.options.priority == "invert":
3708
- for rule in self.rules:
3709
- if rule.options.priority is not None:
3710
- rule.options.priority = -rule.options.priority
3711
- for term in self.terminals:
3712
- term.priority = -term.priority
3713
- ##
3714
-
3715
- ##
3716
-
3717
- ##
3718
-
3719
- elif self.options.priority is None:
3720
- for rule in self.rules:
3721
- if rule.options.priority is not None:
3722
- rule.options.priority = None
3723
- for term in self.terminals:
3724
- term.priority = 0
3725
-
3726
- ##
3727
-
3728
- self.lexer_conf = LexerConf(
3729
- self.terminals,
3730
- re_module,
3731
- self.ignore_tokens,
3732
- self.options.postlex,
3733
- self.options.lexer_callbacks,
3734
- self.options.g_regex_flags,
3735
- use_bytes=self.options.use_bytes,
3736
- strict=self.options.strict,
3737
- )
3738
-
3739
- if self.options.parser:
3740
- self.parser = self._build_parser()
3741
- elif lexer:
3742
- self.lexer = self._build_lexer()
3743
-
3744
- if cache_fn:
3745
- logger.debug("Saving grammar to cache: %s", cache_fn)
3746
- try:
3747
- with FS.open(cache_fn, "wb") as f:
3748
- assert cache_sha256 is not None
3749
- f.write(cache_sha256.encode("utf8") + b"\n")
3750
- pickle.dump(used_files, f)
3751
- self.save(f, _LOAD_ALLOWED_OPTIONS)
3752
- except IOError as e:
3753
- logger.exception("Failed to save Lark to cache: %r.", cache_fn, e)
3754
-
3755
- if __doc__:
3756
- __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
3757
-
3758
- __serialize_fields__ = "parser", "rules", "options"
3759
-
3760
- def _build_lexer(self, dont_ignore: bool = False) -> BasicLexer:
3761
- lexer_conf = self.lexer_conf
3762
- if dont_ignore:
3763
- from copy import copy
3764
-
3765
- lexer_conf = copy(lexer_conf)
3766
- lexer_conf.ignore = ()
3767
- return BasicLexer(lexer_conf)
3768
-
3769
- def _prepare_callbacks(self) -> None:
3770
- self._callbacks = {}
3771
- ##
3772
-
3773
- if self.options.ambiguity != "forest":
3774
- self._parse_tree_builder = ParseTreeBuilder(
3775
- self.rules,
3776
- self.options.tree_class or Tree,
3777
- self.options.propagate_positions,
3778
- self.options.parser != "lalr" and self.options.ambiguity == "explicit",
3779
- self.options.maybe_placeholders,
3780
- )
3781
- self._callbacks = self._parse_tree_builder.create_callback(
3782
- self.options.transformer
3783
- )
3784
- self._callbacks.update(
3785
- _get_lexer_callbacks(self.options.transformer, self.terminals)
3786
- )
3787
-
3788
- def _build_parser(self) -> "ParsingFrontend":
3789
- self._prepare_callbacks()
3790
- _validate_frontend_args(self.options.parser, self.options.lexer)
3791
- parser_conf = ParserConf(self.rules, self._callbacks, self.options.start)
3792
- return _construct_parsing_frontend(
3793
- self.options.parser,
3794
- self.options.lexer,
3795
- self.lexer_conf,
3796
- parser_conf,
3797
- options=self.options,
3798
- )
3799
-
3800
- def save(self, f, exclude_options: Collection[str] = ()) -> None:
3801
- #--
3802
- if self.options.parser != "lalr":
3803
- raise NotImplementedError(
3804
- "Lark.save() is only implemented for the LALR(1) parser."
3805
- )
3806
- data, m = self.memo_serialize([TerminalDef, Rule])
3807
- if exclude_options:
3808
- data["options"] = {
3809
- n: v for n, v in data["options"].items() if n not in exclude_options
3810
- }
3811
- pickle.dump({"data": data, "memo": m}, f, protocol=pickle.HIGHEST_PROTOCOL)
3812
-
3813
- @classmethod
3814
- def load(cls: Type[_T], f) -> _T:
3815
- #--
3816
- inst = cls.__new__(cls)
3817
- return inst._load(f)
3818
-
3819
- def _deserialize_lexer_conf(
3820
- self,
3821
- data: Dict[str, Any],
3822
- memo: Dict[int, Union[TerminalDef, Rule]],
3823
- options: LarkOptions,
3824
- ) -> LexerConf:
3825
- lexer_conf = LexerConf.deserialize(data["lexer_conf"], memo)
3826
- lexer_conf.callbacks = options.lexer_callbacks or {}
3827
- lexer_conf.re_module = regex if options.regex else re
3828
- lexer_conf.use_bytes = options.use_bytes
3829
- lexer_conf.g_regex_flags = options.g_regex_flags
3830
- lexer_conf.skip_validation = True
3831
- lexer_conf.postlex = options.postlex
3832
- return lexer_conf
3833
-
3834
- def _load(self: _T, f: Any, **kwargs) -> _T:
3835
- if isinstance(f, dict):
3836
- d = f
3837
- else:
3838
- d = pickle.load(f)
3839
- memo_json = d["memo"]
3840
- data = d["data"]
3841
-
3842
- assert memo_json
3843
- memo = SerializeMemoizer.deserialize(
3844
- memo_json, {"Rule": Rule, "TerminalDef": TerminalDef}, {}
3845
- )
3846
- options = dict(data["options"])
3847
- if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
3848
- raise ConfigurationError(
3849
- "Some options are not allowed when loading a Parser: {}".format(
3850
- set(kwargs) - _LOAD_ALLOWED_OPTIONS
3851
- )
3852
- )
3853
- options.update(kwargs)
3854
- self.options = LarkOptions.deserialize(options, memo)
3855
- self.rules = [Rule.deserialize(r, memo) for r in data["rules"]]
3856
- self.source_path = "<deserialized>"
3857
- _validate_frontend_args(self.options.parser, self.options.lexer)
3858
- self.lexer_conf = self._deserialize_lexer_conf(
3859
- data["parser"], memo, self.options
3860
- )
3861
- self.terminals = self.lexer_conf.terminals
3862
- self._prepare_callbacks()
3863
- self._terminals_dict = {t.name: t for t in self.terminals}
3864
- self.parser = _deserialize_parsing_frontend(
3865
- data["parser"],
3866
- memo,
3867
- self.lexer_conf,
3868
- self._callbacks,
3869
- self.options, ##
3870
-
3871
- )
3872
- return self
3873
-
3874
- @classmethod
3875
- def _load_from_dict(cls, data, memo, **kwargs):
3876
- inst = cls.__new__(cls)
3877
- return inst._load({"data": data, "memo": memo}, **kwargs)
3878
-
3879
- @classmethod
3880
- def open(
3881
- cls: Type[_T], grammar_filename: str, rel_to: Optional[str] = None, **options
3882
- ) -> _T:
3883
- #--
3884
- if rel_to:
3885
- basepath = os.path.dirname(rel_to)
3886
- grammar_filename = os.path.join(basepath, grammar_filename)
3887
- with open(grammar_filename, encoding="utf8") as f:
3888
- return cls(f, **options)
3889
-
3890
- @classmethod
3891
- def open_from_package(
3892
- cls: Type[_T],
3893
- package: str,
3894
- grammar_path: str,
3895
- search_paths: "Sequence[str]" = [""],
3896
- **options
3897
- ) -> _T:
3898
- #--
3899
- package_loader = FromPackageLoader(package, search_paths)
3900
- full_path, text = package_loader(None, grammar_path)
3901
- options.setdefault("source_path", full_path)
3902
- options.setdefault("import_paths", [])
3903
- options["import_paths"].append(package_loader)
3904
- return cls(text, **options)
3905
-
3906
- def __repr__(self):
3907
- return "Lark(open(%r), parser=%r, lexer=%r, ...)" % (
3908
- self.source_path,
3909
- self.options.parser,
3910
- self.options.lexer,
3911
- )
3912
-
3913
- def lex(self, text: str, dont_ignore: bool = False) -> Iterator[Token]:
3914
- #--
3915
- lexer: Lexer
3916
- if not hasattr(self, "lexer") or dont_ignore:
3917
- lexer = self._build_lexer(dont_ignore)
3918
- else:
3919
- lexer = self.lexer
3920
- lexer_thread = LexerThread.from_text(lexer, text)
3921
- stream = lexer_thread.lex(None)
3922
- if self.options.postlex:
3923
- return self.options.postlex.process(stream)
3924
- return stream
3925
-
3926
- def get_terminal(self, name: str) -> TerminalDef:
3927
- #--
3928
- return self._terminals_dict[name]
3929
-
3930
- def parse_interactive(
3931
- self, text: Optional[str] = None, start: Optional[str] = None
3932
- ) -> "InteractiveParser":
3933
- #--
3934
- return self.parser.parse_interactive(text, start=start)
3935
-
3936
- def parse(
3937
- self,
3938
- text: str,
3939
- start: Optional[str] = None,
3940
- on_error: "Optional[Callable[[UnexpectedInput], bool]]" = None,
3941
- ) -> "ParseTree":
3942
- #--
3943
- return self.parser.parse(text, start=start, on_error=on_error)
3944
-
3945
-
3946
-
3947
-
3948
-
3949
- class DedentError(LarkError):
3950
- pass
3951
-
3952
-
3953
- class Indenter(PostLex, ABC):
3954
- paren_level: int
3955
- indent_level: List[int]
3956
-
3957
- def __init__(self) -> None:
3958
- self.paren_level = 0
3959
- self.indent_level = [0]
3960
- assert self.tab_len > 0
3961
-
3962
- def handle_NL(self, token: Token) -> Iterator[Token]:
3963
- if self.paren_level > 0:
3964
- return
3965
-
3966
- yield token
3967
-
3968
- indent_str = token.rsplit("\n", 1)[1] ##
3969
-
3970
- indent = indent_str.count(" ") + indent_str.count("\t") * self.tab_len
3971
-
3972
- if indent > self.indent_level[-1]:
3973
- self.indent_level.append(indent)
3974
- yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
3975
- else:
3976
- while indent < self.indent_level[-1]:
3977
- self.indent_level.pop()
3978
- yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
3979
-
3980
- if indent != self.indent_level[-1]:
3981
- raise DedentError(
3982
- "Unexpected dedent to column %s. Expected dedent to %s"
3983
- % (indent, self.indent_level[-1])
3984
- )
3985
-
3986
- def _process(self, stream):
3987
- for token in stream:
3988
- if token.type == self.NL_type:
3989
- yield from self.handle_NL(token)
3990
- else:
3991
- yield token
3992
-
3993
- if token.type in self.OPEN_PAREN_types:
3994
- self.paren_level += 1
3995
- elif token.type in self.CLOSE_PAREN_types:
3996
- self.paren_level -= 1
3997
- assert self.paren_level >= 0
3998
-
3999
- while len(self.indent_level) > 1:
4000
- self.indent_level.pop()
4001
- yield Token(self.DEDENT_type, "")
4002
-
4003
- assert self.indent_level == [0], self.indent_level
4004
-
4005
- def process(self, stream):
4006
- self.paren_level = 0
4007
- self.indent_level = [0]
4008
- return self._process(stream)
4009
-
4010
- ##
4011
-
4012
- @property
4013
- def always_accept(self):
4014
- return (self.NL_type,)
4015
-
4016
- @property
4017
- @abstractmethod
4018
- def NL_type(self) -> str:
4019
- raise NotImplementedError()
4020
-
4021
- @property
4022
- @abstractmethod
4023
- def OPEN_PAREN_types(self) -> List[str]:
4024
- raise NotImplementedError()
4025
-
4026
- @property
4027
- @abstractmethod
4028
- def CLOSE_PAREN_types(self) -> List[str]:
4029
- raise NotImplementedError()
4030
-
4031
- @property
4032
- @abstractmethod
4033
- def INDENT_type(self) -> str:
4034
- raise NotImplementedError()
4035
-
4036
- @property
4037
- @abstractmethod
4038
- def DEDENT_type(self) -> str:
4039
- raise NotImplementedError()
4040
-
4041
- @property
4042
- @abstractmethod
4043
- def tab_len(self) -> int:
4044
- raise NotImplementedError()
4045
-
4046
-
4047
- class PythonIndenter(Indenter):
4048
- NL_type = "_NEWLINE"
4049
- OPEN_PAREN_types = ["LPAR", "LSQB", "LBRACE"]
4050
- CLOSE_PAREN_types = ["RPAR", "RSQB", "RBRACE"]
4051
- INDENT_type = "_INDENT"
4052
- DEDENT_type = "_DEDENT"
4053
- tab_len = 8
4054
-
4055
-
4056
-
4057
- import pickle, zlib, base64
4058
- DATA = (
4059
- b''
4060
- )
4061
- DATA = pickle.loads(zlib.decompress(base64.b64decode(DATA)))
4062
- MEMO = (
4063
- b''
4064
- )
4065
- MEMO = pickle.loads(zlib.decompress(base64.b64decode(MEMO)))
4066
- Shift = 0
4067
- Reduce = 1
4068
- def Lark_StandAlone(**kwargs):
4069
- return Lark._load_from_dict(DATA, MEMO, **kwargs)