jaclang 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jaclang might be problematic. Click here for more details.

Files changed (73) hide show
  1. jaclang/__init__.py +4 -0
  2. jaclang/cli/__init__.py +7 -0
  3. jaclang/cli/cli.jac +46 -0
  4. jaclang/cli/cmds.jac +14 -0
  5. jaclang/cli/impl/__init__.py +1 -0
  6. jaclang/cli/impl/cli_impl.jac +93 -0
  7. jaclang/cli/impl/cmds_impl.jac +26 -0
  8. jaclang/core/__init__.py +12 -0
  9. jaclang/core/impl/__init__.py +1 -0
  10. jaclang/core/impl/arch_impl.jac +112 -0
  11. jaclang/core/impl/element_impl.jac +95 -0
  12. jaclang/core/impl/exec_ctx_impl.jac +17 -0
  13. jaclang/core/impl/memory_impl.jac +57 -0
  14. jaclang/core/primitives.jac +104 -0
  15. jaclang/jac/__init__.py +1 -0
  16. jaclang/jac/absyntree.py +1787 -0
  17. jaclang/jac/constant.py +46 -0
  18. jaclang/jac/importer.py +130 -0
  19. jaclang/jac/lexer.py +538 -0
  20. jaclang/jac/parser.py +1474 -0
  21. jaclang/jac/passes/__init__.py +5 -0
  22. jaclang/jac/passes/blue/__init__.py +25 -0
  23. jaclang/jac/passes/blue/ast_build_pass.py +3190 -0
  24. jaclang/jac/passes/blue/blue_pygen_pass.py +1335 -0
  25. jaclang/jac/passes/blue/decl_def_match_pass.py +278 -0
  26. jaclang/jac/passes/blue/import_pass.py +75 -0
  27. jaclang/jac/passes/blue/sub_node_tab_pass.py +30 -0
  28. jaclang/jac/passes/blue/tests/__init__.py +1 -0
  29. jaclang/jac/passes/blue/tests/test_ast_build_pass.py +61 -0
  30. jaclang/jac/passes/blue/tests/test_blue_pygen_pass.py +117 -0
  31. jaclang/jac/passes/blue/tests/test_decl_def_match_pass.py +43 -0
  32. jaclang/jac/passes/blue/tests/test_import_pass.py +18 -0
  33. jaclang/jac/passes/blue/tests/test_sub_node_pass.py +26 -0
  34. jaclang/jac/passes/blue/tests/test_type_analyze_pass.py +53 -0
  35. jaclang/jac/passes/blue/type_analyze_pass.py +731 -0
  36. jaclang/jac/passes/ir_pass.py +154 -0
  37. jaclang/jac/passes/purple/__init__.py +17 -0
  38. jaclang/jac/passes/purple/impl/__init__.py +1 -0
  39. jaclang/jac/passes/purple/impl/purple_pygen_pass_impl.jac +289 -0
  40. jaclang/jac/passes/purple/purple_pygen_pass.jac +35 -0
  41. jaclang/jac/sym_table.py +127 -0
  42. jaclang/jac/tests/__init__.py +1 -0
  43. jaclang/jac/tests/fixtures/__init__.py +1 -0
  44. jaclang/jac/tests/fixtures/activity.py +10 -0
  45. jaclang/jac/tests/fixtures/fam.jac +68 -0
  46. jaclang/jac/tests/fixtures/hello_world.jac +5 -0
  47. jaclang/jac/tests/fixtures/lexer_fam.jac +61 -0
  48. jaclang/jac/tests/fixtures/stuff.jac +6 -0
  49. jaclang/jac/tests/test_importer.py +24 -0
  50. jaclang/jac/tests/test_lexer.py +57 -0
  51. jaclang/jac/tests/test_parser.py +50 -0
  52. jaclang/jac/tests/test_utils.py +12 -0
  53. jaclang/jac/transform.py +63 -0
  54. jaclang/jac/transpiler.py +69 -0
  55. jaclang/jac/utils.py +120 -0
  56. jaclang/utils/__init__.py +1 -0
  57. jaclang/utils/fstring_parser.py +73 -0
  58. jaclang/utils/log.py +9 -0
  59. jaclang/utils/sly/__init__.py +6 -0
  60. jaclang/utils/sly/docparse.py +62 -0
  61. jaclang/utils/sly/lex.py +510 -0
  62. jaclang/utils/sly/yacc.py +2398 -0
  63. jaclang/utils/test.py +81 -0
  64. jaclang/utils/tests/__init__.py +1 -0
  65. jaclang/utils/tests/test_fstring_parser.py +55 -0
  66. jaclang-0.0.3.dist-info/METADATA +12 -0
  67. jaclang-0.0.3.dist-info/RECORD +70 -0
  68. {jaclang-0.0.1.dist-info → jaclang-0.0.3.dist-info}/WHEEL +1 -1
  69. jaclang-0.0.3.dist-info/entry_points.txt +3 -0
  70. jaclang-0.0.3.dist-info/top_level.txt +1 -0
  71. jaclang-0.0.1.dist-info/METADATA +0 -7
  72. jaclang-0.0.1.dist-info/RECORD +0 -4
  73. jaclang-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,2398 @@
1
+ # -----------------------------------------------------------------------------
2
+ # sly: yacc.py
3
+ #
4
+ # Copyright (C) 2016-2018
5
+ # David M. Beazley (Dabeaz LLC)
6
+ # All rights reserved.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are
10
+ # met:
11
+ #
12
+ # * Redistributions of source code must retain the above copyright notice,
13
+ # this list of conditions and the following disclaimer.
14
+ # * Redistributions in binary form must reproduce the above copyright notice,
15
+ # this list of conditions and the following disclaimer in the documentation
16
+ # and/or other materials provided with the distribution.
17
+ # * Neither the name of the David Beazley or Dabeaz LLC may be used to
18
+ # endorse or promote products derived from this software without
19
+ # specific prior written permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ # -----------------------------------------------------------------------------
33
+ # flake8: noqa
34
+ import sys
35
+ import inspect
36
+ from collections import OrderedDict, defaultdict, Counter
37
+
38
+ __all__ = ["Parser"]
39
+
40
+
41
+ class YaccError(Exception):
42
+ """
43
+ Exception raised for yacc-related build errors.
44
+ """
45
+
46
+ pass
47
+
48
+
49
+ # -----------------------------------------------------------------------------
50
+ # === User configurable parameters ===
51
+ #
52
+ # Change these to modify the default behavior of yacc (if you wish).
53
+ # Move these parameters to the Yacc class itself.
54
+ # -----------------------------------------------------------------------------
55
+
56
+ ERROR_COUNT = 3 # Number of symbols that must be shifted to leave recovery mode
57
+ MAXINT = sys.maxsize
58
+
59
+ # This object is a stand-in for a logging object created by the
60
+ # logging module. SLY will use this by default to create things
61
+ # such as the parser.out file. If a user wants more detailed
62
+ # information, they can create their own logging object and pass
63
+ # it into SLY.
64
+
65
+
66
+ class SlyLogger(object):
67
+ def __init__(self, f):
68
+ self.f = f
69
+
70
+ def debug(self, msg, *args, **kwargs):
71
+ self.f.write((msg % args) + "\n")
72
+
73
+ info = debug
74
+
75
+ def warning(self, msg, *args, **kwargs):
76
+ self.f.write("WARNING: " + (msg % args) + "\n")
77
+
78
+ def error(self, msg, *args, **kwargs):
79
+ self.f.write("ERROR: " + (msg % args) + "\n")
80
+
81
+ critical = debug
82
+
83
+
84
+ # ----------------------------------------------------------------------
85
+ # This class is used to hold non-terminal grammar symbols during parsing.
86
+ # It normally has the following attributes set:
87
+ # .type = Grammar symbol type
88
+ # .value = Symbol value
89
+ # .lineno = Starting line number
90
+ # .index = Starting lex position
91
+ # ----------------------------------------------------------------------
92
+
93
+
94
+ class YaccSymbol:
95
+ def __str__(self):
96
+ return self.type
97
+
98
+ def __repr__(self):
99
+ return str(self)
100
+
101
+
102
+ # ----------------------------------------------------------------------
103
+ # This class is a wrapper around the objects actually passed to each
104
+ # grammar rule. Index lookup and assignment actually assign the
105
+ # .value attribute of the underlying YaccSymbol object.
106
+ # The lineno() method returns the line number of a given
107
+ # item (or 0 if not defined).
108
+ # ----------------------------------------------------------------------
109
+
110
+
111
+ class YaccProduction:
112
+ __slots__ = ("_slice", "_namemap", "_stack")
113
+
114
+ def __init__(self, s, stack=None):
115
+ self._slice = s
116
+ self._namemap = {}
117
+ self._stack = stack
118
+
119
+ def __getitem__(self, n):
120
+ if n >= 0:
121
+ return self._slice[n].value
122
+ else:
123
+ return self._stack[n].value
124
+
125
+ def __setitem__(self, n, v):
126
+ if n >= 0:
127
+ self._slice[n].value = v
128
+ else:
129
+ self._stack[n].value = v
130
+
131
+ def __len__(self):
132
+ return len(self._slice)
133
+
134
+ @property
135
+ def lineno(self):
136
+ for tok in self._slice:
137
+ lineno = getattr(tok, "lineno", None)
138
+ if lineno:
139
+ return lineno
140
+ raise AttributeError("No line number found")
141
+
142
+ @property
143
+ def index(self):
144
+ for tok in self._slice:
145
+ index = getattr(tok, "index", None)
146
+ if index is not None:
147
+ return index
148
+ raise AttributeError("No index attribute found")
149
+
150
+ @property
151
+ def end(self):
152
+ result = None
153
+ for tok in self._slice:
154
+ r = getattr(tok, "end", None)
155
+ if r:
156
+ result = r
157
+ return result
158
+
159
+ def __getattr__(self, name):
160
+ if name in self._namemap:
161
+ return self._namemap[name](self._slice)
162
+ else:
163
+ nameset = "{" + ", ".join(self._namemap) + "}"
164
+ raise AttributeError(f"No symbol {name}. Must be one of {nameset}.")
165
+
166
+ def __setattr__(self, name, value):
167
+ if name[:1] == "_":
168
+ super().__setattr__(name, value)
169
+ else:
170
+ raise AttributeError(f"Can't reassign the value of attribute {name!r}")
171
+
172
+
173
+ # -----------------------------------------------------------------------------
174
+ # === Grammar Representation ===
175
+ #
176
+ # The following functions, classes, and variables are used to represent and
177
+ # manipulate the rules that make up a grammar.
178
+ # -----------------------------------------------------------------------------
179
+
180
+ # -----------------------------------------------------------------------------
181
+ # class Production:
182
+ #
183
+ # This class stores the raw information about a single production or grammar rule.
184
+ # A grammar rule refers to a specification such as this:
185
+ #
186
+ # expr : expr PLUS term
187
+ #
188
+ # Here are the basic attributes defined on all productions
189
+ #
190
+ # name - Name of the production. For example 'expr'
191
+ # prod - A list of symbols on the right side ['expr','PLUS','term']
192
+ # prec - Production precedence level
193
+ # number - Production number.
194
+ # func - Function that executes on reduce
195
+ # file - File where production function is defined
196
+ # lineno - Line number where production function is defined
197
+ #
198
+ # The following attributes are defined or optional.
199
+ #
200
+ # len - Length of the production (number of symbols on right hand side)
201
+ # usyms - Set of unique symbols found in the production
202
+ # -----------------------------------------------------------------------------
203
+
204
+
205
+ class Production(object):
206
+ reduced = 0
207
+
208
+ def __init__(
209
+ self, number, name, prod, precedence=("right", 0), func=None, file="", line=0
210
+ ):
211
+ self.name = name
212
+ self.prod = tuple(prod)
213
+ self.number = number
214
+ self.func = func
215
+ self.file = file
216
+ self.line = line
217
+ self.prec = precedence
218
+
219
+ # Internal settings used during table construction
220
+ self.len = len(self.prod) # Length of the production
221
+
222
+ # Create a list of unique production symbols used in the production
223
+ self.usyms = []
224
+ symmap = defaultdict(list)
225
+ for n, s in enumerate(self.prod):
226
+ symmap[s].append(n)
227
+ if s not in self.usyms:
228
+ self.usyms.append(s)
229
+
230
+ # Create a name mapping
231
+ # First determine (in advance) if there are duplicate names
232
+ namecount = defaultdict(int)
233
+ for key in self.prod:
234
+ namecount[key] += 1
235
+ if key in _name_aliases:
236
+ for key in _name_aliases[key]:
237
+ namecount[key] += 1
238
+
239
+ # Now, walk through the names and generate accessor functions
240
+ nameuse = defaultdict(int)
241
+ namemap = {}
242
+ for index, key in enumerate(self.prod):
243
+ if namecount[key] > 1:
244
+ k = f"{key}{nameuse[key]}"
245
+ nameuse[key] += 1
246
+ else:
247
+ k = key
248
+ namemap[k] = lambda s, i=index: s[i].value
249
+ if key in _name_aliases:
250
+ for n, alias in enumerate(_name_aliases[key]):
251
+ if namecount[alias] > 1:
252
+ k = f"{alias}{nameuse[alias]}"
253
+ nameuse[alias] += 1
254
+ else:
255
+ k = alias
256
+ # The value is either a list (for repetition) or a tuple for optional
257
+ namemap[k] = (
258
+ lambda s, i=index, n=n: ([x[n] for x in s[i].value])
259
+ if isinstance(s[i].value, list)
260
+ else s[i].value[n]
261
+ )
262
+
263
+ self.namemap = namemap
264
+
265
+ # List of all LR items for the production
266
+ self.lr_items = []
267
+ self.lr_next = None
268
+
269
+ def __str__(self):
270
+ if self.prod:
271
+ s = "%s -> %s" % (self.name, " ".join(self.prod))
272
+ else:
273
+ s = f"{self.name} -> <empty>"
274
+
275
+ if self.prec[1]:
276
+ s += " [precedence=%s, level=%d]" % self.prec
277
+
278
+ return s
279
+
280
+ def __repr__(self):
281
+ return f"Production({self})"
282
+
283
+ def __len__(self):
284
+ return len(self.prod)
285
+
286
+ def __nonzero__(self):
287
+ raise RuntimeError("Used")
288
+ return 1
289
+
290
+ def __getitem__(self, index):
291
+ return self.prod[index]
292
+
293
+ # Return the nth lr_item from the production (or None if at the end)
294
+ def lr_item(self, n):
295
+ if n > len(self.prod):
296
+ return None
297
+ p = LRItem(self, n)
298
+ # Precompute the list of productions immediately following.
299
+ try:
300
+ p.lr_after = Prodnames[p.prod[n + 1]]
301
+ except (IndexError, KeyError):
302
+ p.lr_after = []
303
+ try:
304
+ p.lr_before = p.prod[n - 1]
305
+ except IndexError:
306
+ p.lr_before = None
307
+ return p
308
+
309
+
310
+ # -----------------------------------------------------------------------------
311
+ # class LRItem
312
+ #
313
+ # This class represents a specific stage of parsing a production rule. For
314
+ # example:
315
+ #
316
+ # expr : expr . PLUS term
317
+ #
318
+ # In the above, the "." represents the current location of the parse. Here
319
+ # basic attributes:
320
+ #
321
+ # name - Name of the production. For example 'expr'
322
+ # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
323
+ # number - Production number.
324
+ #
325
+ # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
326
+ # then lr_next refers to 'expr -> expr PLUS . term'
327
+ # lr_index - LR item index (location of the ".") in the prod list.
328
+ # lookaheads - LALR lookahead symbols for this item
329
+ # len - Length of the production (number of symbols on right hand side)
330
+ # lr_after - List of all productions that immediately follow
331
+ # lr_before - Grammar symbol immediately before
332
+ # -----------------------------------------------------------------------------
333
+
334
+
335
+ class LRItem(object):
336
+ def __init__(self, p, n):
337
+ self.name = p.name
338
+ self.prod = list(p.prod)
339
+ self.number = p.number
340
+ self.lr_index = n
341
+ self.lookaheads = {}
342
+ self.prod.insert(n, ".")
343
+ self.prod = tuple(self.prod)
344
+ self.len = len(self.prod)
345
+ self.usyms = p.usyms
346
+
347
+ def __str__(self):
348
+ if self.prod:
349
+ s = "%s -> %s" % (self.name, " ".join(self.prod))
350
+ else:
351
+ s = f"{self.name} -> <empty>"
352
+ return s
353
+
354
+ def __repr__(self):
355
+ return f"LRItem({self})"
356
+
357
+
358
+ # -----------------------------------------------------------------------------
359
+ # rightmost_terminal()
360
+ #
361
+ # Return the rightmost terminal from a list of symbols. Used in add_production()
362
+ # -----------------------------------------------------------------------------
363
+ def rightmost_terminal(symbols, terminals):
364
+ i = len(symbols) - 1
365
+ while i >= 0:
366
+ if symbols[i] in terminals:
367
+ return symbols[i]
368
+ i -= 1
369
+ return None
370
+
371
+
372
+ # -----------------------------------------------------------------------------
373
+ # === GRAMMAR CLASS ===
374
+ #
375
+ # The following class represents the contents of the specified grammar along
376
+ # with various computed properties such as first sets, follow sets, LR items, etc.
377
+ # This data is used for critical parts of the table generation process later.
378
+ # -----------------------------------------------------------------------------
379
+
380
+
381
+ class GrammarError(YaccError):
382
+ pass
383
+
384
+
385
+ class Grammar(object):
386
+ def __init__(self, terminals):
387
+ self.Productions = [None] # A list of all of the productions. The first
388
+ # entry is always reserved for the purpose of
389
+ # building an augmented grammar
390
+
391
+ self.Prodnames = (
392
+ {}
393
+ ) # A dictionary mapping the names of nonterminals to a list of all
394
+ # productions of that nonterminal.
395
+
396
+ self.Prodmap = {} # A dictionary that is only used to detect duplicate
397
+ # productions.
398
+
399
+ self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
400
+ # list of the rules where they are used.
401
+
402
+ for term in terminals:
403
+ self.Terminals[term] = []
404
+
405
+ self.Terminals["error"] = []
406
+
407
+ self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
408
+ # of rule numbers where they are used.
409
+
410
+ self.First = {} # A dictionary of precomputed FIRST(x) symbols
411
+
412
+ self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
413
+
414
+ self.Precedence = (
415
+ {}
416
+ ) # Precedence rules for each terminal. Contains tuples of the
417
+ # form ('right',level) or ('nonassoc', level) or ('left',level)
418
+
419
+ self.UsedPrecedence = (
420
+ set()
421
+ ) # Precedence rules that were actually used by the grammer.
422
+ # This is only used to provide error checking and to generate
423
+ # a warning about unused precedence rules.
424
+
425
+ self.Start = None # Starting symbol for the grammar
426
+
427
+ def __len__(self):
428
+ return len(self.Productions)
429
+
430
+ def __getitem__(self, index):
431
+ return self.Productions[index]
432
+
433
+ # -----------------------------------------------------------------------------
434
+ # set_precedence()
435
+ #
436
+ # Sets the precedence for a given terminal. assoc is the associativity such as
437
+ # 'left','right', or 'nonassoc'. level is a numeric level.
438
+ #
439
+ # -----------------------------------------------------------------------------
440
+
441
+ def set_precedence(self, term, assoc, level):
442
+ assert self.Productions == [
443
+ None
444
+ ], "Must call set_precedence() before add_production()"
445
+ if term in self.Precedence:
446
+ raise GrammarError(f"Precedence already specified for terminal {term!r}")
447
+ if assoc not in ["left", "right", "nonassoc"]:
448
+ raise GrammarError(
449
+ f"Associativity of {term!r} must be one of 'left','right', or 'nonassoc'"
450
+ )
451
+ self.Precedence[term] = (assoc, level)
452
+
453
+ # -----------------------------------------------------------------------------
454
+ # add_production()
455
+ #
456
+ # Given an action function, this function assembles a production rule and
457
+ # computes its precedence level.
458
+ #
459
+ # The production rule is supplied as a list of symbols. For example,
460
+ # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
461
+ # symbols ['expr','PLUS','term'].
462
+ #
463
+ # Precedence is determined by the precedence of the right-most non-terminal
464
+ # or the precedence of a terminal specified by %prec.
465
+ #
466
+ # A variety of error checks are performed to make sure production symbols
467
+ # are valid and that %prec is used correctly.
468
+ # -----------------------------------------------------------------------------
469
+
470
+ def add_production(self, prodname, syms, func=None, file="", line=0):
471
+ if prodname in self.Terminals:
472
+ raise GrammarError(
473
+ f"{file}:{line}: Illegal rule name {prodname!r}. Already defined as a token"
474
+ )
475
+ if prodname == "error":
476
+ raise GrammarError(
477
+ f"{file}:{line}: Illegal rule name {prodname!r}. error is a reserved word"
478
+ )
479
+
480
+ # Look for literal tokens
481
+ for n, s in enumerate(syms):
482
+ if s[0] in "'\"" and s[0] == s[-1]:
483
+ c = s[1:-1]
484
+ if len(c) != 1:
485
+ raise GrammarError(
486
+ f"{file}:{line}: Literal token {s} in rule {prodname!r} may only be a single character"
487
+ )
488
+ if c not in self.Terminals:
489
+ self.Terminals[c] = []
490
+ syms[n] = c
491
+ continue
492
+
493
+ # Determine the precedence level
494
+ if "%prec" in syms:
495
+ if syms[-1] == "%prec":
496
+ raise GrammarError(
497
+ f"{file}:{line}: Syntax error. Nothing follows %%prec"
498
+ )
499
+ if syms[-2] != "%prec":
500
+ raise GrammarError(
501
+ f"{file}:{line}: Syntax error. %prec can only appear at the end of a grammar rule"
502
+ )
503
+ precname = syms[-1]
504
+ prodprec = self.Precedence.get(precname)
505
+ if not prodprec:
506
+ raise GrammarError(
507
+ f"{file}:{line}: Nothing known about the precedence of {precname!r}"
508
+ )
509
+ else:
510
+ self.UsedPrecedence.add(precname)
511
+ del syms[-2:] # Drop %prec from the rule
512
+ else:
513
+ # If no %prec, precedence is determined by the rightmost terminal symbol
514
+ precname = rightmost_terminal(syms, self.Terminals)
515
+ prodprec = self.Precedence.get(precname, ("right", 0))
516
+
517
+ # See if the rule is already in the rulemap
518
+ map = "%s -> %s" % (prodname, syms)
519
+ if map in self.Prodmap:
520
+ m = self.Prodmap[map]
521
+ raise GrammarError(
522
+ f"{file}:{line}: Duplicate rule {m}. "
523
+ + f"Previous definition at {m.file}:{m.line}"
524
+ )
525
+
526
+ # From this point on, everything is valid. Create a new Production instance
527
+ pnumber = len(self.Productions)
528
+ if prodname not in self.Nonterminals:
529
+ self.Nonterminals[prodname] = []
530
+
531
+ # Add the production number to Terminals and Nonterminals
532
+ for t in syms:
533
+ if t in self.Terminals:
534
+ self.Terminals[t].append(pnumber)
535
+ else:
536
+ if t not in self.Nonterminals:
537
+ self.Nonterminals[t] = []
538
+ self.Nonterminals[t].append(pnumber)
539
+
540
+ # Create a production and add it to the list of productions
541
+ p = Production(pnumber, prodname, syms, prodprec, func, file, line)
542
+ self.Productions.append(p)
543
+ self.Prodmap[map] = p
544
+
545
+ # Add to the global productions list
546
+ try:
547
+ self.Prodnames[prodname].append(p)
548
+ except KeyError:
549
+ self.Prodnames[prodname] = [p]
550
+
551
+ # -----------------------------------------------------------------------------
552
+ # set_start()
553
+ #
554
+ # Sets the starting symbol and creates the augmented grammar. Production
555
+ # rule 0 is S' -> start where start is the start symbol.
556
+ # -----------------------------------------------------------------------------
557
+
558
+ def set_start(self, start=None):
559
+ if callable(start):
560
+ start = start.__name__
561
+
562
+ if not start:
563
+ start = self.Productions[1].name
564
+
565
+ if start not in self.Nonterminals:
566
+ raise GrammarError(f"start symbol {start} undefined")
567
+ self.Productions[0] = Production(0, "S'", [start])
568
+ self.Nonterminals[start].append(0)
569
+ self.Start = start
570
+
571
+ # -----------------------------------------------------------------------------
572
+ # find_unreachable()
573
+ #
574
+ # Find all of the nonterminal symbols that can't be reached from the starting
575
+ # symbol. Returns a list of nonterminals that can't be reached.
576
+ # -----------------------------------------------------------------------------
577
+
578
+ def find_unreachable(self):
579
+ # Mark all symbols that are reachable from a symbol s
580
+ def mark_reachable_from(s):
581
+ if s in reachable:
582
+ return
583
+ reachable.add(s)
584
+ for p in self.Prodnames.get(s, []):
585
+ for r in p.prod:
586
+ mark_reachable_from(r)
587
+
588
+ reachable = set()
589
+ mark_reachable_from(self.Productions[0].prod[0])
590
+ return [s for s in self.Nonterminals if s not in reachable]
591
+
592
+ # -----------------------------------------------------------------------------
593
+ # infinite_cycles()
594
+ #
595
+ # This function looks at the various parsing rules and tries to detect
596
+ # infinite recursion cycles (grammar rules where there is no possible way
597
+ # to derive a string of only terminals).
598
+ # -----------------------------------------------------------------------------
599
+
600
+ def infinite_cycles(self):
601
+ terminates = {}
602
+
603
+ # Terminals:
604
+ for t in self.Terminals:
605
+ terminates[t] = True
606
+
607
+ terminates["$end"] = True
608
+
609
+ # Nonterminals:
610
+
611
+ # Initialize to false:
612
+ for n in self.Nonterminals:
613
+ terminates[n] = False
614
+
615
+ # Then propagate termination until no change:
616
+ while True:
617
+ some_change = False
618
+ for n, pl in self.Prodnames.items():
619
+ # Nonterminal n terminates iff any of its productions terminates.
620
+ for p in pl:
621
+ # Production p terminates iff all of its rhs symbols terminate.
622
+ for s in p.prod:
623
+ if not terminates[s]:
624
+ # The symbol s does not terminate,
625
+ # so production p does not terminate.
626
+ p_terminates = False
627
+ break
628
+ else:
629
+ # didn't break from the loop,
630
+ # so every symbol s terminates
631
+ # so production p terminates.
632
+ p_terminates = True
633
+
634
+ if p_terminates:
635
+ # symbol n terminates!
636
+ if not terminates[n]:
637
+ terminates[n] = True
638
+ some_change = True
639
+ # Don't need to consider any more productions for this n.
640
+ break
641
+
642
+ if not some_change:
643
+ break
644
+
645
+ infinite = []
646
+ for s, term in terminates.items():
647
+ if not term:
648
+ if s not in self.Prodnames and s not in self.Terminals and s != "error":
649
+ # s is used-but-not-defined, and we've already warned of that,
650
+ # so it would be overkill to say that it's also non-terminating.
651
+ pass
652
+ else:
653
+ infinite.append(s)
654
+
655
+ return infinite
656
+
657
+ # -----------------------------------------------------------------------------
658
+ # undefined_symbols()
659
+ #
660
+ # Find all symbols that were used the grammar, but not defined as tokens or
661
+ # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
662
+ # and prod is the production where the symbol was used.
663
+ # -----------------------------------------------------------------------------
664
+ def undefined_symbols(self):
665
+ result = []
666
+ for p in self.Productions:
667
+ if not p:
668
+ continue
669
+
670
+ for s in p.prod:
671
+ if s not in self.Prodnames and s not in self.Terminals and s != "error":
672
+ result.append((s, p))
673
+ return result
674
+
675
+ # -----------------------------------------------------------------------------
676
+ # unused_terminals()
677
+ #
678
+ # Find all terminals that were defined, but not used by the grammar. Returns
679
+ # a list of all symbols.
680
+ # -----------------------------------------------------------------------------
681
+ def unused_terminals(self):
682
+ unused_tok = []
683
+ for s, v in self.Terminals.items():
684
+ if s != "error" and not v:
685
+ unused_tok.append(s)
686
+
687
+ return unused_tok
688
+
689
+ # ------------------------------------------------------------------------------
690
+ # unused_rules()
691
+ #
692
+ # Find all grammar rules that were defined, but not used (maybe not reachable)
693
+ # Returns a list of productions.
694
+ # ------------------------------------------------------------------------------
695
+
696
+ def unused_rules(self):
697
+ unused_prod = []
698
+ for s, v in self.Nonterminals.items():
699
+ if not v:
700
+ p = self.Prodnames[s][0]
701
+ unused_prod.append(p)
702
+ return unused_prod
703
+
704
+ # -----------------------------------------------------------------------------
705
+ # unused_precedence()
706
+ #
707
+ # Returns a list of tuples (term,precedence) corresponding to precedence
708
+ # rules that were never used by the grammar. term is the name of the terminal
709
+ # on which precedence was applied and precedence is a string such as 'left' or
710
+ # 'right' corresponding to the type of precedence.
711
+ # -----------------------------------------------------------------------------
712
+
713
+ def unused_precedence(self):
714
+ unused = []
715
+ for termname in self.Precedence:
716
+ if not (termname in self.Terminals or termname in self.UsedPrecedence):
717
+ unused.append((termname, self.Precedence[termname][0]))
718
+
719
+ return unused
720
+
721
+ # -------------------------------------------------------------------------
722
+ # _first()
723
+ #
724
+ # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
725
+ #
726
+ # During execution of compute_first1, the result may be incomplete.
727
+ # Afterward (e.g., when called from compute_follow()), it will be complete.
728
+ # -------------------------------------------------------------------------
729
+ def _first(self, beta):
730
+ # We are computing First(x1,x2,x3,...,xn)
731
+ result = []
732
+ for x in beta:
733
+ x_produces_empty = False
734
+
735
+ # Add all the non-<empty> symbols of First[x] to the result.
736
+ for f in self.First[x]:
737
+ if f == "<empty>":
738
+ x_produces_empty = True
739
+ else:
740
+ if f not in result:
741
+ result.append(f)
742
+
743
+ if x_produces_empty:
744
+ # We have to consider the next x in beta,
745
+ # i.e. stay in the loop.
746
+ pass
747
+ else:
748
+ # We don't have to consider any further symbols in beta.
749
+ break
750
+ else:
751
+ # There was no 'break' from the loop,
752
+ # so x_produces_empty was true for all x in beta,
753
+ # so beta produces empty as well.
754
+ result.append("<empty>")
755
+
756
+ return result
757
+
758
+ # -------------------------------------------------------------------------
759
+ # compute_first()
760
+ #
761
+ # Compute the value of FIRST1(X) for all symbols
762
+ # -------------------------------------------------------------------------
763
+ def compute_first(self):
764
+ if self.First:
765
+ return self.First
766
+
767
+ # Terminals:
768
+ for t in self.Terminals:
769
+ self.First[t] = [t]
770
+
771
+ self.First["$end"] = ["$end"]
772
+
773
+ # Nonterminals:
774
+
775
+ # Initialize to the empty set:
776
+ for n in self.Nonterminals:
777
+ self.First[n] = []
778
+
779
+ # Then propagate symbols until no change:
780
+ while True:
781
+ some_change = False
782
+ for n in self.Nonterminals:
783
+ for p in self.Prodnames[n]:
784
+ for f in self._first(p.prod):
785
+ if f not in self.First[n]:
786
+ self.First[n].append(f)
787
+ some_change = True
788
+ if not some_change:
789
+ break
790
+
791
+ return self.First
792
+
793
+ # ---------------------------------------------------------------------
794
+ # compute_follow()
795
+ #
796
+ # Computes all of the follow sets for every non-terminal symbol. The
797
+ # follow set is the set of all symbols that might follow a given
798
+ # non-terminal. See the Dragon book, 2nd Ed. p. 189.
799
+ # ---------------------------------------------------------------------
800
+ def compute_follow(self, start=None):
801
+ # If already computed, return the result
802
+ if self.Follow:
803
+ return self.Follow
804
+
805
+ # If first sets not computed yet, do that first.
806
+ if not self.First:
807
+ self.compute_first()
808
+
809
+ # Add '$end' to the follow list of the start symbol
810
+ for k in self.Nonterminals:
811
+ self.Follow[k] = []
812
+
813
+ if not start:
814
+ start = self.Productions[1].name
815
+
816
+ self.Follow[start] = ["$end"]
817
+
818
+ while True:
819
+ didadd = False
820
+ for p in self.Productions[1:]:
821
+ # Here is the production set
822
+ for i, B in enumerate(p.prod):
823
+ if B in self.Nonterminals:
824
+ # Okay. We got a non-terminal in a production
825
+ fst = self._first(p.prod[i + 1 :])
826
+ hasempty = False
827
+ for f in fst:
828
+ if f != "<empty>" and f not in self.Follow[B]:
829
+ self.Follow[B].append(f)
830
+ didadd = True
831
+ if f == "<empty>":
832
+ hasempty = True
833
+ if hasempty or i == (len(p.prod) - 1):
834
+ # Add elements of follow(a) to follow(b)
835
+ for f in self.Follow[p.name]:
836
+ if f not in self.Follow[B]:
837
+ self.Follow[B].append(f)
838
+ didadd = True
839
+ if not didadd:
840
+ break
841
+ return self.Follow
842
+
843
+ # -----------------------------------------------------------------------------
844
+ # build_lritems()
845
+ #
846
+ # This function walks the list of productions and builds a complete set of the
847
+ # LR items. The LR items are stored in two ways: First, they are uniquely
848
+ # numbered and placed in the list _lritems. Second, a linked list of LR items
849
+ # is built for each production. For example:
850
+ #
851
+ # E -> E PLUS E
852
+ #
853
+ # Creates the list
854
+ #
855
+ # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
856
+ # -----------------------------------------------------------------------------
857
+
858
+ def build_lritems(self):
859
+ for p in self.Productions:
860
+ lastlri = p
861
+ i = 0
862
+ lr_items = []
863
+ while True:
864
+ if i > len(p):
865
+ lri = None
866
+ else:
867
+ lri = LRItem(p, i)
868
+ # Precompute the list of productions immediately following
869
+ try:
870
+ lri.lr_after = self.Prodnames[lri.prod[i + 1]]
871
+ except (IndexError, KeyError):
872
+ lri.lr_after = []
873
+ try:
874
+ lri.lr_before = lri.prod[i - 1]
875
+ except IndexError:
876
+ lri.lr_before = None
877
+
878
+ lastlri.lr_next = lri
879
+ if not lri:
880
+ break
881
+ lr_items.append(lri)
882
+ lastlri = lri
883
+ i += 1
884
+ p.lr_items = lr_items
885
+
886
+ # ----------------------------------------------------------------------
887
+ # Debugging output. Printing the grammar will produce a detailed
888
+ # description along with some diagnostics.
889
+ # ----------------------------------------------------------------------
890
+ def __str__(self):
891
+ out = []
892
+ out.append("Grammar:\n")
893
+ for n, p in enumerate(self.Productions):
894
+ out.append(f"Rule {n:<5d} {p}")
895
+
896
+ unused_terminals = self.unused_terminals()
897
+ if unused_terminals:
898
+ out.append("\nUnused terminals:\n")
899
+ for term in unused_terminals:
900
+ out.append(f" {term}")
901
+
902
+ out.append("\nTerminals, with rules where they appear:\n")
903
+ for term in sorted(self.Terminals):
904
+ out.append(
905
+ "%-20s : %s" % (term, " ".join(str(s) for s in self.Terminals[term]))
906
+ )
907
+
908
+ out.append("\nNonterminals, with rules where they appear:\n")
909
+ for nonterm in sorted(self.Nonterminals):
910
+ out.append(
911
+ "%-20s : %s"
912
+ % (nonterm, " ".join(str(s) for s in self.Nonterminals[nonterm]))
913
+ )
914
+
915
+ out.append("")
916
+ return "\n".join(out)
917
+
918
+
919
+ # -----------------------------------------------------------------------------
920
+ # === LR Generator ===
921
+ #
922
+ # The following classes and functions are used to generate LR parsing tables on
923
+ # a grammar.
924
+ # -----------------------------------------------------------------------------
925
+
926
+ # -----------------------------------------------------------------------------
927
+ # digraph()
928
+ # traverse()
929
+ #
930
+ # The following two functions are used to compute set valued functions
931
+ # of the form:
932
+ #
933
+ # F(x) = F'(x) U U{F(y) | x R y}
934
+ #
935
+ # This is used to compute the values of Read() sets as well as FOLLOW sets
936
+ # in LALR(1) generation.
937
+ #
938
+ # Inputs: X - An input set
939
+ # R - A relation
940
+ # FP - Set-valued function
941
+ # ------------------------------------------------------------------------------
942
+
943
+
944
+ def digraph(X, R, FP):
945
+ N = {}
946
+ for x in X:
947
+ N[x] = 0
948
+ stack = []
949
+ F = {}
950
+ for x in X:
951
+ if N[x] == 0:
952
+ traverse(x, N, stack, F, X, R, FP)
953
+ return F
954
+
955
+
956
+ def traverse(x, N, stack, F, X, R, FP):
957
+ stack.append(x)
958
+ d = len(stack)
959
+ N[x] = d
960
+ F[x] = FP(x) # F(X) <- F'(x)
961
+
962
+ rel = R(x) # Get y's related to x
963
+ for y in rel:
964
+ if N[y] == 0:
965
+ traverse(y, N, stack, F, X, R, FP)
966
+ N[x] = min(N[x], N[y])
967
+ for a in F.get(y, []):
968
+ if a not in F[x]:
969
+ F[x].append(a)
970
+ if N[x] == d:
971
+ N[stack[-1]] = MAXINT
972
+ F[stack[-1]] = F[x]
973
+ element = stack.pop()
974
+ while element != x:
975
+ N[stack[-1]] = MAXINT
976
+ F[stack[-1]] = F[x]
977
+ element = stack.pop()
978
+
979
+
980
+ class LALRError(YaccError):
981
+ pass
982
+
983
+
984
+ # -----------------------------------------------------------------------------
985
+ # == LRGeneratedTable ==
986
+ #
987
+ # This class implements the LR table generation algorithm. There are no
988
+ # public methods except for write()
989
+ # -----------------------------------------------------------------------------
990
+
991
+
992
+ class LRTable(object):
993
+ def __init__(self, grammar):
994
+ self.grammar = grammar
995
+
996
+ # Internal attributes
997
+ self.lr_action = {} # Action table
998
+ self.lr_goto = {} # Goto table
999
+ self.lr_productions = grammar.Productions # Copy of grammar Production array
1000
+ self.lr_goto_cache = {} # Cache of computed gotos
1001
+ self.lr0_cidhash = {} # Cache of closures
1002
+ self._add_count = 0 # Internal counter used to detect cycles
1003
+
1004
+ # Diagonistic information filled in by the table generator
1005
+ self.state_descriptions = OrderedDict()
1006
+ self.sr_conflict = 0
1007
+ self.rr_conflict = 0
1008
+ self.conflicts = [] # List of conflicts
1009
+
1010
+ self.sr_conflicts = []
1011
+ self.rr_conflicts = []
1012
+
1013
+ # Build the tables
1014
+ self.grammar.build_lritems()
1015
+ self.grammar.compute_first()
1016
+ self.grammar.compute_follow()
1017
+ self.lr_parse_table()
1018
+
1019
+ # Build default states
1020
+ # This identifies parser states where there is only one possible reduction action.
1021
+ # For such states, the parser can make a choose to make a rule reduction without consuming
1022
+ # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
1023
+ # certain kinds of advanced parsing situations where the lexer and parser interact with
1024
+ # each other or change states (i.e., manipulation of scope, lexer states, etc.).
1025
+ #
1026
+ # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
1027
+ self.defaulted_states = {}
1028
+ for state, actions in self.lr_action.items():
1029
+ rules = list(actions.values())
1030
+ if len(rules) == 1 and rules[0] < 0:
1031
+ self.defaulted_states[state] = rules[0]
1032
+
1033
+ # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1034
+ def lr0_closure(self, I):
1035
+ self._add_count += 1
1036
+
1037
+ # Add everything in I to J
1038
+ J = I[:]
1039
+ didadd = True
1040
+ while didadd:
1041
+ didadd = False
1042
+ for j in J:
1043
+ for x in j.lr_after:
1044
+ if getattr(x, "lr0_added", 0) == self._add_count:
1045
+ continue
1046
+ # Add B --> .G to J
1047
+ J.append(x.lr_next)
1048
+ x.lr0_added = self._add_count
1049
+ didadd = True
1050
+
1051
+ return J
1052
+
1053
+ # Compute the LR(0) goto function goto(I,X) where I is a set
1054
+ # of LR(0) items and X is a grammar symbol. This function is written
1055
+ # in a way that guarantees uniqueness of the generated goto sets
1056
+ # (i.e. the same goto set will never be returned as two different Python
1057
+ # objects). With uniqueness, we can later do fast set comparisons using
1058
+ # id(obj) instead of element-wise comparison.
1059
+
1060
+ def lr0_goto(self, I, x):
1061
+ # First we look for a previously cached entry
1062
+ g = self.lr_goto_cache.get((id(I), x))
1063
+ if g:
1064
+ return g
1065
+
1066
+ # Now we generate the goto set in a way that guarantees uniqueness
1067
+ # of the result
1068
+
1069
+ s = self.lr_goto_cache.get(x)
1070
+ if not s:
1071
+ s = {}
1072
+ self.lr_goto_cache[x] = s
1073
+
1074
+ gs = []
1075
+ for p in I:
1076
+ n = p.lr_next
1077
+ if n and n.lr_before == x:
1078
+ s1 = s.get(id(n))
1079
+ if not s1:
1080
+ s1 = {}
1081
+ s[id(n)] = s1
1082
+ gs.append(n)
1083
+ s = s1
1084
+ g = s.get("$end")
1085
+ if not g:
1086
+ if gs:
1087
+ g = self.lr0_closure(gs)
1088
+ s["$end"] = g
1089
+ else:
1090
+ s["$end"] = gs
1091
+ self.lr_goto_cache[(id(I), x)] = g
1092
+ return g
1093
+
1094
+ # Compute the LR(0) sets of item function
1095
+ def lr0_items(self):
1096
+ C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
1097
+ i = 0
1098
+ for I in C:
1099
+ self.lr0_cidhash[id(I)] = i
1100
+ i += 1
1101
+
1102
+ # Loop over the items in C and each grammar symbols
1103
+ i = 0
1104
+ while i < len(C):
1105
+ I = C[i]
1106
+ i += 1
1107
+
1108
+ # Collect all of the symbols that could possibly be in the goto(I,X) sets
1109
+ asyms = {}
1110
+ for ii in I:
1111
+ for s in ii.usyms:
1112
+ asyms[s] = None
1113
+
1114
+ for x in asyms:
1115
+ g = self.lr0_goto(I, x)
1116
+ if not g or id(g) in self.lr0_cidhash:
1117
+ continue
1118
+ self.lr0_cidhash[id(g)] = len(C)
1119
+ C.append(g)
1120
+
1121
+ return C
1122
+
1123
+ # -----------------------------------------------------------------------------
1124
+ # ==== LALR(1) Parsing ====
1125
+ #
1126
+ # LALR(1) parsing is almost exactly the same as SLR except that instead of
1127
+ # relying upon Follow() sets when performing reductions, a more selective
1128
+ # lookahead set that incorporates the state of the LR(0) machine is utilized.
1129
+ # Thus, we mainly just have to focus on calculating the lookahead sets.
1130
+ #
1131
+ # The method used here is due to DeRemer and Pennelo (1982).
1132
+ #
1133
+ # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
1134
+ # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
1135
+ # Vol. 4, No. 4, Oct. 1982, pp. 615-649
1136
+ #
1137
+ # Further details can also be found in:
1138
+ #
1139
+ # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
1140
+ # McGraw-Hill Book Company, (1985).
1141
+ #
1142
+ # -----------------------------------------------------------------------------
1143
+
1144
+ # -----------------------------------------------------------------------------
1145
+ # compute_nullable_nonterminals()
1146
+ #
1147
+ # Creates a dictionary containing all of the non-terminals that might produce
1148
+ # an empty production.
1149
+ # -----------------------------------------------------------------------------
1150
+
1151
+ def compute_nullable_nonterminals(self):
1152
+ nullable = set()
1153
+ num_nullable = 0
1154
+ while True:
1155
+ for p in self.grammar.Productions[1:]:
1156
+ if p.len == 0:
1157
+ nullable.add(p.name)
1158
+ continue
1159
+ for t in p.prod:
1160
+ if t not in nullable:
1161
+ break
1162
+ else:
1163
+ nullable.add(p.name)
1164
+ if len(nullable) == num_nullable:
1165
+ break
1166
+ num_nullable = len(nullable)
1167
+ return nullable
1168
+
1169
+ # -----------------------------------------------------------------------------
1170
+ # find_nonterminal_trans(C)
1171
+ #
1172
+ # Given a set of LR(0) items, this functions finds all of the non-terminal
1173
+ # transitions. These are transitions in which a dot appears immediately before
1174
+ # a non-terminal. Returns a list of tuples of the form (state,N) where state
1175
+ # is the state number and N is the nonterminal symbol.
1176
+ #
1177
+ # The input C is the set of LR(0) items.
1178
+ # -----------------------------------------------------------------------------
1179
+
1180
+ def find_nonterminal_transitions(self, C):
1181
+ trans = []
1182
+ for stateno, state in enumerate(C):
1183
+ for p in state:
1184
+ if p.lr_index < p.len - 1:
1185
+ t = (stateno, p.prod[p.lr_index + 1])
1186
+ if t[1] in self.grammar.Nonterminals:
1187
+ if t not in trans:
1188
+ trans.append(t)
1189
+ return trans
1190
+
1191
+ # -----------------------------------------------------------------------------
1192
+ # dr_relation()
1193
+ #
1194
+ # Computes the DR(p,A) relationships for non-terminal transitions. The input
1195
+ # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
1196
+ #
1197
+ # Returns a list of terminals.
1198
+ # -----------------------------------------------------------------------------
1199
+
1200
+ def dr_relation(self, C, trans, nullable):
1201
+ dr_set = {}
1202
+ state, N = trans
1203
+ terms = []
1204
+
1205
+ g = self.lr0_goto(C[state], N)
1206
+ for p in g:
1207
+ if p.lr_index < p.len - 1:
1208
+ a = p.prod[p.lr_index + 1]
1209
+ if a in self.grammar.Terminals:
1210
+ if a not in terms:
1211
+ terms.append(a)
1212
+
1213
+ # This extra bit is to handle the start state
1214
+ if state == 0 and N == self.grammar.Productions[0].prod[0]:
1215
+ terms.append("$end")
1216
+
1217
+ return terms
1218
+
1219
+ # -----------------------------------------------------------------------------
1220
+ # reads_relation()
1221
+ #
1222
+ # Computes the READS() relation (p,A) READS (t,C).
1223
+ # -----------------------------------------------------------------------------
1224
+
1225
+ def reads_relation(self, C, trans, empty):
1226
+ # Look for empty transitions
1227
+ rel = []
1228
+ state, N = trans
1229
+
1230
+ g = self.lr0_goto(C[state], N)
1231
+ j = self.lr0_cidhash.get(id(g), -1)
1232
+ for p in g:
1233
+ if p.lr_index < p.len - 1:
1234
+ a = p.prod[p.lr_index + 1]
1235
+ if a in empty:
1236
+ rel.append((j, a))
1237
+
1238
+ return rel
1239
+
1240
+ # -----------------------------------------------------------------------------
1241
+ # compute_lookback_includes()
1242
+ #
1243
+ # Determines the lookback and includes relations
1244
+ #
1245
+ # LOOKBACK:
1246
+ #
1247
+ # This relation is determined by running the LR(0) state machine forward.
1248
+ # For example, starting with a production "N : . A B C", we run it forward
1249
+ # to obtain "N : A B C ." We then build a relationship between this final
1250
+ # state and the starting state. These relationships are stored in a dictionary
1251
+ # lookdict.
1252
+ #
1253
+ # INCLUDES:
1254
+ #
1255
+ # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
1256
+ #
1257
+ # This relation is used to determine non-terminal transitions that occur
1258
+ # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
1259
+ # if the following holds:
1260
+ #
1261
+ # B -> LAT, where T -> epsilon and p' -L-> p
1262
+ #
1263
+ # L is essentially a prefix (which may be empty), T is a suffix that must be
1264
+ # able to derive an empty string. State p' must lead to state p with the string L.
1265
+ #
1266
+ # -----------------------------------------------------------------------------
1267
+
1268
+ def compute_lookback_includes(self, C, trans, nullable):
1269
+ lookdict = {} # Dictionary of lookback relations
1270
+ includedict = {} # Dictionary of include relations
1271
+
1272
+ # Make a dictionary of non-terminal transitions
1273
+ dtrans = {}
1274
+ for t in trans:
1275
+ dtrans[t] = 1
1276
+
1277
+ # Loop over all transitions and compute lookbacks and includes
1278
+ for state, N in trans:
1279
+ lookb = []
1280
+ includes = []
1281
+ for p in C[state]:
1282
+ if p.name != N:
1283
+ continue
1284
+
1285
+ # Okay, we have a name match. We now follow the production all the way
1286
+ # through the state machine until we get the . on the right hand side
1287
+
1288
+ lr_index = p.lr_index
1289
+ j = state
1290
+ while lr_index < p.len - 1:
1291
+ lr_index = lr_index + 1
1292
+ t = p.prod[lr_index]
1293
+
1294
+ # Check to see if this symbol and state are a non-terminal transition
1295
+ if (j, t) in dtrans:
1296
+ # Yes. Okay, there is some chance that this is an includes relation
1297
+ # the only way to know for certain is whether the rest of the
1298
+ # production derives empty
1299
+
1300
+ li = lr_index + 1
1301
+ while li < p.len:
1302
+ if p.prod[li] in self.grammar.Terminals:
1303
+ break # No forget it
1304
+ if p.prod[li] not in nullable:
1305
+ break
1306
+ li = li + 1
1307
+ else:
1308
+ # Appears to be a relation between (j,t) and (state,N)
1309
+ includes.append((j, t))
1310
+
1311
+ g = self.lr0_goto(C[j], t) # Go to next set
1312
+ j = self.lr0_cidhash.get(id(g), -1) # Go to next state
1313
+
1314
+ # When we get here, j is the final state, now we have to locate the production
1315
+ for r in C[j]:
1316
+ if r.name != p.name:
1317
+ continue
1318
+ if r.len != p.len:
1319
+ continue
1320
+ i = 0
1321
+ # This look is comparing a production ". A B C" with "A B C ."
1322
+ while i < r.lr_index:
1323
+ if r.prod[i] != p.prod[i + 1]:
1324
+ break
1325
+ i = i + 1
1326
+ else:
1327
+ lookb.append((j, r))
1328
+ for i in includes:
1329
+ if i not in includedict:
1330
+ includedict[i] = []
1331
+ includedict[i].append((state, N))
1332
+ lookdict[(state, N)] = lookb
1333
+
1334
+ return lookdict, includedict
1335
+
1336
+ # -----------------------------------------------------------------------------
1337
+ # compute_read_sets()
1338
+ #
1339
+ # Given a set of LR(0) items, this function computes the read sets.
1340
+ #
1341
+ # Inputs: C = Set of LR(0) items
1342
+ # ntrans = Set of nonterminal transitions
1343
+ # nullable = Set of empty transitions
1344
+ #
1345
+ # Returns a set containing the read sets
1346
+ # -----------------------------------------------------------------------------
1347
+
1348
+ def compute_read_sets(self, C, ntrans, nullable):
1349
+ FP = lambda x: self.dr_relation(C, x, nullable)
1350
+ R = lambda x: self.reads_relation(C, x, nullable)
1351
+ F = digraph(ntrans, R, FP)
1352
+ return F
1353
+
1354
+ # -----------------------------------------------------------------------------
1355
+ # compute_follow_sets()
1356
+ #
1357
+ # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
1358
+ # and an include set, this function computes the follow sets
1359
+ #
1360
+ # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
1361
+ #
1362
+ # Inputs:
1363
+ # ntrans = Set of nonterminal transitions
1364
+ # readsets = Readset (previously computed)
1365
+ # inclsets = Include sets (previously computed)
1366
+ #
1367
+ # Returns a set containing the follow sets
1368
+ # -----------------------------------------------------------------------------
1369
+
1370
+ def compute_follow_sets(self, ntrans, readsets, inclsets):
1371
+ FP = lambda x: readsets[x]
1372
+ R = lambda x: inclsets.get(x, [])
1373
+ F = digraph(ntrans, R, FP)
1374
+ return F
1375
+
1376
+ # -----------------------------------------------------------------------------
1377
+ # add_lookaheads()
1378
+ #
1379
+ # Attaches the lookahead symbols to grammar rules.
1380
+ #
1381
+ # Inputs: lookbacks - Set of lookback relations
1382
+ # followset - Computed follow set
1383
+ #
1384
+ # This function directly attaches the lookaheads to productions contained
1385
+ # in the lookbacks set
1386
+ # -----------------------------------------------------------------------------
1387
+
1388
+ def add_lookaheads(self, lookbacks, followset):
1389
+ for trans, lb in lookbacks.items():
1390
+ # Loop over productions in lookback
1391
+ for state, p in lb:
1392
+ if state not in p.lookaheads:
1393
+ p.lookaheads[state] = []
1394
+ f = followset.get(trans, [])
1395
+ for a in f:
1396
+ if a not in p.lookaheads[state]:
1397
+ p.lookaheads[state].append(a)
1398
+
1399
+ # -----------------------------------------------------------------------------
1400
+ # add_lalr_lookaheads()
1401
+ #
1402
+ # This function does all of the work of adding lookahead information for use
1403
+ # with LALR parsing
1404
+ # -----------------------------------------------------------------------------
1405
+
1406
+ def add_lalr_lookaheads(self, C):
1407
+ # Determine all of the nullable nonterminals
1408
+ nullable = self.compute_nullable_nonterminals()
1409
+
1410
+ # Find all non-terminal transitions
1411
+ trans = self.find_nonterminal_transitions(C)
1412
+
1413
+ # Compute read sets
1414
+ readsets = self.compute_read_sets(C, trans, nullable)
1415
+
1416
+ # Compute lookback/includes relations
1417
+ lookd, included = self.compute_lookback_includes(C, trans, nullable)
1418
+
1419
+ # Compute LALR FOLLOW sets
1420
+ followsets = self.compute_follow_sets(trans, readsets, included)
1421
+
1422
+ # Add all of the lookaheads
1423
+ self.add_lookaheads(lookd, followsets)
1424
+
1425
+ # -----------------------------------------------------------------------------
1426
+ # lr_parse_table()
1427
+ #
1428
+ # This function constructs the final LALR parse table. Touch this code and die.
1429
+ # -----------------------------------------------------------------------------
1430
+ def lr_parse_table(self):
1431
+ Productions = self.grammar.Productions
1432
+ Precedence = self.grammar.Precedence
1433
+ goto = self.lr_goto # Goto array
1434
+ action = self.lr_action # Action array
1435
+
1436
+ actionp = {} # Action production array (temporary)
1437
+
1438
+ # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
1439
+ # This determines the number of states
1440
+
1441
+ C = self.lr0_items()
1442
+ self.add_lalr_lookaheads(C)
1443
+
1444
+ # Build the parser table, state by state
1445
+ for st, I in enumerate(C):
1446
+ descrip = []
1447
+ # Loop over each production in I
1448
+ actlist = [] # List of actions
1449
+ st_action = {}
1450
+ st_actionp = {}
1451
+ st_goto = {}
1452
+
1453
+ descrip.append(f"\nstate {st}\n")
1454
+ for p in I:
1455
+ descrip.append(f" ({p.number}) {p}")
1456
+
1457
+ for p in I:
1458
+ if p.len == p.lr_index + 1:
1459
+ if p.name == "S'":
1460
+ # Start symbol. Accept!
1461
+ st_action["$end"] = 0
1462
+ st_actionp["$end"] = p
1463
+ else:
1464
+ # We are at the end of a production. Reduce!
1465
+ laheads = p.lookaheads[st]
1466
+ for a in laheads:
1467
+ actlist.append(
1468
+ (a, p, f"reduce using rule {p.number} ({p})")
1469
+ )
1470
+ r = st_action.get(a)
1471
+ if r is not None:
1472
+ # Have a shift/reduce or reduce/reduce conflict
1473
+ if r > 0:
1474
+ # Need to decide on shift or reduce here
1475
+ # By default we favor shifting. Need to add
1476
+ # some precedence rules here.
1477
+
1478
+ # Shift precedence comes from the token
1479
+ sprec, slevel = Precedence.get(a, ("right", 0))
1480
+
1481
+ # Reduce precedence comes from rule being reduced (p)
1482
+ rprec, rlevel = Productions[p.number].prec
1483
+
1484
+ if (slevel < rlevel) or (
1485
+ (slevel == rlevel) and (rprec == "left")
1486
+ ):
1487
+ # We really need to reduce here.
1488
+ st_action[a] = -p.number
1489
+ st_actionp[a] = p
1490
+ if not slevel and not rlevel:
1491
+ descrip.append(
1492
+ f" ! shift/reduce conflict for {a} resolved as reduce"
1493
+ )
1494
+ self.sr_conflicts.append((st, a, "reduce"))
1495
+ Productions[p.number].reduced += 1
1496
+ elif (slevel == rlevel) and (rprec == "nonassoc"):
1497
+ st_action[a] = None
1498
+ else:
1499
+ # Hmmm. Guess we'll keep the shift
1500
+ if not rlevel:
1501
+ descrip.append(
1502
+ f" ! shift/reduce conflict for {a} resolved as shift"
1503
+ )
1504
+ self.sr_conflicts.append((st, a, "shift"))
1505
+ elif r <= 0:
1506
+ # Reduce/reduce conflict. In this case, we favor the rule
1507
+ # that was defined first in the grammar file
1508
+ oldp = Productions[-r]
1509
+ pp = Productions[p.number]
1510
+ if oldp.line > pp.line:
1511
+ st_action[a] = -p.number
1512
+ st_actionp[a] = p
1513
+ chosenp, rejectp = pp, oldp
1514
+ Productions[p.number].reduced += 1
1515
+ Productions[oldp.number].reduced -= 1
1516
+ else:
1517
+ chosenp, rejectp = oldp, pp
1518
+ self.rr_conflicts.append((st, chosenp, rejectp))
1519
+ descrip.append(
1520
+ " ! reduce/reduce conflict for %s resolved using rule %d (%s)"
1521
+ % (a, st_actionp[a].number, st_actionp[a])
1522
+ )
1523
+ else:
1524
+ raise LALRError(f"Unknown conflict in state {st}")
1525
+ else:
1526
+ st_action[a] = -p.number
1527
+ st_actionp[a] = p
1528
+ Productions[p.number].reduced += 1
1529
+ else:
1530
+ i = p.lr_index
1531
+ a = p.prod[i + 1] # Get symbol right after the "."
1532
+ if a in self.grammar.Terminals:
1533
+ g = self.lr0_goto(I, a)
1534
+ j = self.lr0_cidhash.get(id(g), -1)
1535
+ if j >= 0:
1536
+ # We are in a shift state
1537
+ actlist.append((a, p, f"shift and go to state {j}"))
1538
+ r = st_action.get(a)
1539
+ if r is not None:
1540
+ # Whoa have a shift/reduce or shift/shift conflict
1541
+ if r > 0:
1542
+ if r != j:
1543
+ raise LALRError(
1544
+ f"Shift/shift conflict in state {st}"
1545
+ )
1546
+ elif r <= 0:
1547
+ # Do a precedence check.
1548
+ # - if precedence of reduce rule is higher, we reduce.
1549
+ # - if precedence of reduce is same and left assoc, we reduce.
1550
+ # - otherwise we shift
1551
+ rprec, rlevel = Productions[
1552
+ st_actionp[a].number
1553
+ ].prec
1554
+ sprec, slevel = Precedence.get(a, ("right", 0))
1555
+ if (slevel > rlevel) or (
1556
+ (slevel == rlevel) and (rprec == "right")
1557
+ ):
1558
+ # We decide to shift here... highest precedence to shift
1559
+ Productions[st_actionp[a].number].reduced -= 1
1560
+ st_action[a] = j
1561
+ st_actionp[a] = p
1562
+ if not rlevel:
1563
+ descrip.append(
1564
+ f" ! shift/reduce conflict for {a} resolved as shift"
1565
+ )
1566
+ self.sr_conflicts.append((st, a, "shift"))
1567
+ elif (slevel == rlevel) and (rprec == "nonassoc"):
1568
+ st_action[a] = None
1569
+ else:
1570
+ # Hmmm. Guess we'll keep the reduce
1571
+ if not slevel and not rlevel:
1572
+ descrip.append(
1573
+ f" ! shift/reduce conflict for {a} resolved as reduce"
1574
+ )
1575
+ self.sr_conflicts.append((st, a, "reduce"))
1576
+
1577
+ else:
1578
+ raise LALRError(f"Unknown conflict in state {st}")
1579
+ else:
1580
+ st_action[a] = j
1581
+ st_actionp[a] = p
1582
+
1583
+ # Print the actions associated with each terminal
1584
+ _actprint = {}
1585
+ for a, p, m in actlist:
1586
+ if a in st_action:
1587
+ if p is st_actionp[a]:
1588
+ descrip.append(f" {a:<15s} {m}")
1589
+ _actprint[(a, m)] = 1
1590
+ descrip.append("")
1591
+
1592
+ # Construct the goto table for this state
1593
+ nkeys = {}
1594
+ for ii in I:
1595
+ for s in ii.usyms:
1596
+ if s in self.grammar.Nonterminals:
1597
+ nkeys[s] = None
1598
+ for n in nkeys:
1599
+ g = self.lr0_goto(I, n)
1600
+ j = self.lr0_cidhash.get(id(g), -1)
1601
+ if j >= 0:
1602
+ st_goto[n] = j
1603
+ descrip.append(f" {n:<30s} shift and go to state {j}")
1604
+
1605
+ action[st] = st_action
1606
+ actionp[st] = st_actionp
1607
+ goto[st] = st_goto
1608
+ self.state_descriptions[st] = "\n".join(descrip)
1609
+
1610
+ # ----------------------------------------------------------------------
1611
+ # Debugging output. Printing the LRTable object will produce a listing
1612
+ # of all of the states, conflicts, and other details.
1613
+ # ----------------------------------------------------------------------
1614
+ def __str__(self):
1615
+ out = []
1616
+ for descrip in self.state_descriptions.values():
1617
+ out.append(descrip)
1618
+
1619
+ if self.sr_conflicts or self.rr_conflicts:
1620
+ out.append("\nConflicts:\n")
1621
+
1622
+ for state, tok, resolution in self.sr_conflicts:
1623
+ out.append(
1624
+ f"shift/reduce conflict for {tok} in state {state} resolved as {resolution}"
1625
+ )
1626
+
1627
+ already_reported = set()
1628
+ for state, rule, rejected in self.rr_conflicts:
1629
+ if (state, id(rule), id(rejected)) in already_reported:
1630
+ continue
1631
+ out.append(
1632
+ f"reduce/reduce conflict in state {state} resolved using rule {rule}"
1633
+ )
1634
+ out.append(f"rejected rule ({rejected}) in state {state}")
1635
+ already_reported.add((state, id(rule), id(rejected)))
1636
+
1637
+ warned_never = set()
1638
+ for state, rule, rejected in self.rr_conflicts:
1639
+ if not rejected.reduced and (rejected not in warned_never):
1640
+ out.append(f"Rule ({rejected}) is never reduced")
1641
+ warned_never.add(rejected)
1642
+
1643
+ return "\n".join(out)
1644
+
1645
+
1646
+ # Collect grammar rules from a function
1647
+ def _collect_grammar_rules(func):
1648
+ grammar = []
1649
+ while func:
1650
+ prodname = func.__name__
1651
+ unwrapped = inspect.unwrap(func)
1652
+ filename = unwrapped.__code__.co_filename
1653
+ lineno = unwrapped.__code__.co_firstlineno
1654
+ for rule, lineno in zip(func.rules, range(lineno + len(func.rules) - 1, 0, -1)):
1655
+ syms = rule.split()
1656
+ ebnf_prod = []
1657
+ while ("{" in syms) or ("[" in syms):
1658
+ for s in syms:
1659
+ if s == "[":
1660
+ syms, prod = _replace_ebnf_optional(syms)
1661
+ ebnf_prod.extend(prod)
1662
+ break
1663
+ elif s == "{":
1664
+ syms, prod = _replace_ebnf_repeat(syms)
1665
+ ebnf_prod.extend(prod)
1666
+ break
1667
+ elif "|" in s:
1668
+ syms, prod = _replace_ebnf_choice(syms)
1669
+ ebnf_prod.extend(prod)
1670
+ break
1671
+
1672
+ if syms[1:2] == [":"] or syms[1:2] == ["::="]:
1673
+ grammar.append((func, filename, lineno, syms[0], syms[2:]))
1674
+ else:
1675
+ grammar.append((func, filename, lineno, prodname, syms))
1676
+ grammar.extend(ebnf_prod)
1677
+
1678
+ func = getattr(func, "next_func", None)
1679
+
1680
+ return grammar
1681
+
1682
+
1683
+ # Replace EBNF repetition
1684
+ def _replace_ebnf_repeat(syms):
1685
+ syms = list(syms)
1686
+ first = syms.index("{")
1687
+ end = syms.index("}", first)
1688
+
1689
+ # Look for choices inside
1690
+ repeated_syms = syms[first + 1 : end]
1691
+ if any("|" in sym for sym in repeated_syms):
1692
+ repeated_syms, prods = _replace_ebnf_choice(repeated_syms)
1693
+ else:
1694
+ prods = []
1695
+
1696
+ symname, moreprods = _generate_repeat_rules(repeated_syms)
1697
+ syms[first : end + 1] = [symname]
1698
+ return syms, prods + moreprods
1699
+
1700
+
1701
+ def _replace_ebnf_optional(syms):
1702
+ syms = list(syms)
1703
+ first = syms.index("[")
1704
+ end = syms.index("]", first)
1705
+ symname, prods = _generate_optional_rules(syms[first + 1 : end])
1706
+ syms[first : end + 1] = [symname]
1707
+ return syms, prods
1708
+
1709
+
1710
+ def _replace_ebnf_choice(syms):
1711
+ syms = list(syms)
1712
+ newprods = []
1713
+ n = 0
1714
+ while n < len(syms):
1715
+ if "|" in syms[n]:
1716
+ symname, prods = _generate_choice_rules(syms[n].split("|"))
1717
+ syms[n] = symname
1718
+ newprods.extend(prods)
1719
+ n += 1
1720
+ return syms, newprods
1721
+
1722
+
1723
+ # Generate grammar rules for repeated items
1724
+ _gencount = 0
1725
+
1726
+ # Dictionary mapping name aliases generated by EBNF rules.
1727
+
1728
+ _name_aliases = {}
1729
+
1730
+
1731
+ def _sanitize_symbols(symbols):
1732
+ for sym in symbols:
1733
+ if sym.startswith("'"):
1734
+ yield str(hex(ord(sym[1])))
1735
+ elif sym.isidentifier():
1736
+ yield sym
1737
+ else:
1738
+ yield sym.encode("utf-8").hex()
1739
+
1740
+
1741
+ def _generate_repeat_rules(symbols):
1742
+ """
1743
+ Symbols is a list of grammar symbols [ symbols ]. This
1744
+ generates code corresponding to these grammar construction:
1745
+
1746
+ @('repeat : many')
1747
+ def repeat(self, p):
1748
+ return p.many
1749
+
1750
+ @('repeat :')
1751
+ def repeat(self, p):
1752
+ return []
1753
+
1754
+ @('many : many symbols')
1755
+ def many(self, p):
1756
+ p.many.append(symbols)
1757
+ return p.many
1758
+
1759
+ @('many : symbols')
1760
+ def many(self, p):
1761
+ return [ p.symbols ]
1762
+ """
1763
+ global _gencount
1764
+ _gencount += 1
1765
+ basename = f"_{_gencount}_" + "_".join(_sanitize_symbols(symbols))
1766
+ name = f"{basename}_repeat"
1767
+ oname = f"{basename}_items"
1768
+ iname = f"{basename}_item"
1769
+ symtext = " ".join(symbols)
1770
+
1771
+ _name_aliases[name] = symbols
1772
+
1773
+ productions = []
1774
+ _ = _decorator
1775
+
1776
+ @_(f"{name} : {oname}")
1777
+ def repeat(self, p):
1778
+ return getattr(p, oname)
1779
+
1780
+ @_(f"{name} : ")
1781
+ def repeat2(self, p):
1782
+ return []
1783
+
1784
+ productions.extend(_collect_grammar_rules(repeat))
1785
+ productions.extend(_collect_grammar_rules(repeat2))
1786
+
1787
+ @_(f"{oname} : {oname} {iname}")
1788
+ def many(self, p):
1789
+ items = getattr(p, oname)
1790
+ items.append(getattr(p, iname))
1791
+ return items
1792
+
1793
+ @_(f"{oname} : {iname}")
1794
+ def many2(self, p):
1795
+ return [getattr(p, iname)]
1796
+
1797
+ productions.extend(_collect_grammar_rules(many))
1798
+ productions.extend(_collect_grammar_rules(many2))
1799
+
1800
+ @_(f"{iname} : {symtext}")
1801
+ def item(self, p):
1802
+ return tuple(p)
1803
+
1804
+ productions.extend(_collect_grammar_rules(item))
1805
+ return name, productions
1806
+
1807
+
1808
+ def _generate_optional_rules(symbols):
1809
+ """
1810
+ Symbols is a list of grammar symbols [ symbols ]. This
1811
+ generates code corresponding to these grammar construction:
1812
+
1813
+ @('optional : symbols')
1814
+ def optional(self, p):
1815
+ return p.symbols
1816
+
1817
+ @('optional :')
1818
+ def optional(self, p):
1819
+ return None
1820
+ """
1821
+ global _gencount
1822
+ _gencount += 1
1823
+ basename = f"_{_gencount}_" + "_".join(_sanitize_symbols(symbols))
1824
+ name = f"{basename}_optional"
1825
+ symtext = " ".join(symbols)
1826
+
1827
+ _name_aliases[name] = symbols
1828
+
1829
+ productions = []
1830
+ _ = _decorator
1831
+
1832
+ no_values = (None,) * len(symbols)
1833
+
1834
+ @_(f"{name} : {symtext}")
1835
+ def optional(self, p):
1836
+ return tuple(p)
1837
+
1838
+ @_(f"{name} : ")
1839
+ def optional2(self, p):
1840
+ return no_values
1841
+
1842
+ productions.extend(_collect_grammar_rules(optional))
1843
+ productions.extend(_collect_grammar_rules(optional2))
1844
+ return name, productions
1845
+
1846
+
1847
+ def _generate_choice_rules(symbols):
1848
+ """
1849
+ Symbols is a list of grammar symbols such as [ 'PLUS', 'MINUS' ].
1850
+ This generates code corresponding to the following construction:
1851
+
1852
+ @('PLUS', 'MINUS')
1853
+ def choice(self, p):
1854
+ return p[0]
1855
+ """
1856
+ global _gencount
1857
+ _gencount += 1
1858
+ basename = f"_{_gencount}_" + "_".join(_sanitize_symbols(symbols))
1859
+ name = f"{basename}_choice"
1860
+
1861
+ _ = _decorator
1862
+ productions = []
1863
+
1864
+ def choice(self, p):
1865
+ return p[0]
1866
+
1867
+ choice.__name__ = name
1868
+ choice = _(*symbols)(choice)
1869
+ productions.extend(_collect_grammar_rules(choice))
1870
+ return name, productions
1871
+
1872
+
1873
+ class ParserMetaDict(dict):
1874
+ """
1875
+ Dictionary that allows decorated grammar rule functions to be overloaded
1876
+ """
1877
+
1878
+ def __setitem__(self, key, value):
1879
+ if key in self and callable(value) and hasattr(value, "rules"):
1880
+ value.next_func = self[key]
1881
+ if not hasattr(value.next_func, "rules"):
1882
+ raise GrammarError(
1883
+ f"Redefinition of {key}. Perhaps an earlier {key} is missing @_"
1884
+ )
1885
+ super().__setitem__(key, value)
1886
+
1887
+ def __getitem__(self, key):
1888
+ if key not in self and key.isupper() and key[:1] != "_":
1889
+ return key.upper()
1890
+ else:
1891
+ return super().__getitem__(key)
1892
+
1893
+
1894
+ def _decorator(rule, *extra):
1895
+ rules = [rule, *extra]
1896
+
1897
+ def decorate(func):
1898
+ func.rules = [*getattr(func, "rules", []), *rules[::-1]]
1899
+ return func
1900
+
1901
+ return decorate
1902
+
1903
+
1904
+ class ParserMeta(type):
1905
+ @classmethod
1906
+ def __prepare__(meta, *args, **kwargs):
1907
+ d = ParserMetaDict()
1908
+ d["_"] = _decorator
1909
+ return d
1910
+
1911
+ def __new__(meta, clsname, bases, attributes):
1912
+ del attributes["_"]
1913
+ cls = super().__new__(meta, clsname, bases, attributes)
1914
+ cls._build(list(attributes.items()))
1915
+ return cls
1916
+
1917
+
1918
+ class Parser(metaclass=ParserMeta):
1919
+ # Automatic tracking of position information
1920
+ track_positions = True
1921
+
1922
+ # Logging object where debugging/diagnostic messages are sent
1923
+ log = SlyLogger(sys.stderr)
1924
+
1925
+ # Debugging filename where parsetab.out data can be written
1926
+ debugfile = None
1927
+
1928
+ @classmethod
1929
+ def __validate_tokens(cls):
1930
+ if not hasattr(cls, "tokens"):
1931
+ cls.log.error("No token list is defined")
1932
+ return False
1933
+
1934
+ if not cls.tokens:
1935
+ cls.log.error("tokens is empty")
1936
+ return False
1937
+
1938
+ if "error" in cls.tokens:
1939
+ cls.log.error("Illegal token name 'error'. Is a reserved word")
1940
+ return False
1941
+
1942
+ return True
1943
+
1944
+ @classmethod
1945
+ def __validate_precedence(cls):
1946
+ if not hasattr(cls, "precedence"):
1947
+ cls.__preclist = []
1948
+ return True
1949
+
1950
+ preclist = []
1951
+ if not isinstance(cls.precedence, (list, tuple)):
1952
+ cls.log.error("precedence must be a list or tuple")
1953
+ return False
1954
+
1955
+ for level, p in enumerate(cls.precedence, start=1):
1956
+ if not isinstance(p, (list, tuple)):
1957
+ cls.log.error(
1958
+ f"Bad precedence table entry {p!r}. Must be a list or tuple"
1959
+ )
1960
+ return False
1961
+
1962
+ if len(p) < 2:
1963
+ cls.log.error(
1964
+ f"Malformed precedence entry {p!r}. Must be (assoc, term, ..., term)"
1965
+ )
1966
+ return False
1967
+
1968
+ if not all(isinstance(term, str) for term in p):
1969
+ cls.log.error("precedence items must be strings")
1970
+ return False
1971
+
1972
+ assoc = p[0]
1973
+ preclist.extend((term, assoc, level) for term in p[1:])
1974
+
1975
+ cls.__preclist = preclist
1976
+ return True
1977
+
1978
+ @classmethod
1979
+ def __validate_specification(cls):
1980
+ """
1981
+ Validate various parts of the grammar specification
1982
+ """
1983
+ if not cls.__validate_tokens():
1984
+ return False
1985
+ if not cls.__validate_precedence():
1986
+ return False
1987
+ return True
1988
+
1989
+ @classmethod
1990
+ def __build_grammar(cls, rules):
1991
+ """
1992
+ Build the grammar from the grammar rules
1993
+ """
1994
+ grammar_rules = []
1995
+ errors = ""
1996
+ # Check for non-empty symbols
1997
+ if not rules:
1998
+ raise YaccError("No grammar rules are defined")
1999
+
2000
+ grammar = Grammar(cls.tokens)
2001
+
2002
+ # Set the precedence level for terminals
2003
+ for term, assoc, level in cls.__preclist:
2004
+ try:
2005
+ grammar.set_precedence(term, assoc, level)
2006
+ except GrammarError as e:
2007
+ errors += f"{e}\n"
2008
+
2009
+ for name, func in rules:
2010
+ try:
2011
+ parsed_rule = _collect_grammar_rules(func)
2012
+ for pfunc, rulefile, ruleline, prodname, syms in parsed_rule:
2013
+ try:
2014
+ grammar.add_production(
2015
+ prodname, syms, pfunc, rulefile, ruleline
2016
+ )
2017
+ except GrammarError as e:
2018
+ errors += f"{e}\n"
2019
+ except SyntaxError as e:
2020
+ errors += f"{e}\n"
2021
+ try:
2022
+ grammar.set_start(getattr(cls, "start", None))
2023
+ except GrammarError as e:
2024
+ errors += f"{e}\n"
2025
+
2026
+ undefined_symbols = grammar.undefined_symbols()
2027
+ for sym, prod in undefined_symbols:
2028
+ errors += (
2029
+ "%s:%d: Symbol %r used, but not defined as a token or a rule\n"
2030
+ % (prod.file, prod.line, sym)
2031
+ )
2032
+ if cls.debugfile:
2033
+ unused_terminals = grammar.unused_terminals()
2034
+ if unused_terminals:
2035
+ unused_str = "{" + ",".join(unused_terminals) + "}"
2036
+ cls.log.warning(
2037
+ f'Token{"(s)" if len(unused_terminals) >1 else ""} {unused_str} defined, but not used'
2038
+ )
2039
+
2040
+ unused_rules = grammar.unused_rules()
2041
+ for prod in unused_rules:
2042
+ cls.log.warning(
2043
+ "%s:%d: Rule %r defined, but not used",
2044
+ prod.file,
2045
+ prod.line,
2046
+ prod.name,
2047
+ )
2048
+
2049
+ if len(unused_terminals) == 1:
2050
+ cls.log.warning("There is 1 unused token")
2051
+ if len(unused_terminals) > 1:
2052
+ cls.log.warning("There are %d unused tokens", len(unused_terminals))
2053
+
2054
+ if len(unused_rules) == 1:
2055
+ cls.log.warning("There is 1 unused rule")
2056
+ if len(unused_rules) > 1:
2057
+ cls.log.warning("There are %d unused rules", len(unused_rules))
2058
+
2059
+ unreachable = grammar.find_unreachable()
2060
+ for u in unreachable:
2061
+ cls.log.warning("Symbol %r is unreachable", u)
2062
+
2063
+ if len(undefined_symbols) == 0:
2064
+ infinite = grammar.infinite_cycles()
2065
+ for inf in infinite:
2066
+ errors += "Infinite recursion detected for symbol %r\n" % inf
2067
+
2068
+ unused_prec = grammar.unused_precedence()
2069
+ for term, assoc in unused_prec:
2070
+ errors += "Precedence rule %r defined for unknown symbol %r\n" % (
2071
+ assoc,
2072
+ term,
2073
+ )
2074
+
2075
+ cls._grammar = grammar
2076
+ if errors:
2077
+ raise YaccError("Unable to build grammar.\n" + errors)
2078
+
2079
+ @classmethod
2080
+ def __build_lrtables(cls):
2081
+ """
2082
+ Build the LR Parsing tables from the grammar
2083
+ """
2084
+ lrtable = LRTable(cls._grammar)
2085
+ num_sr = len(lrtable.sr_conflicts)
2086
+
2087
+ # Report shift/reduce and reduce/reduce conflicts
2088
+ if num_sr != getattr(cls, "expected_shift_reduce", None):
2089
+ if num_sr == 1:
2090
+ cls.log.warning("1 shift/reduce conflict")
2091
+ elif num_sr > 1:
2092
+ cls.log.warning("%d shift/reduce conflicts", num_sr)
2093
+
2094
+ num_rr = len(lrtable.rr_conflicts)
2095
+ if num_rr != getattr(cls, "expected_reduce_reduce", None):
2096
+ if num_rr == 1:
2097
+ cls.log.warning("1 reduce/reduce conflict")
2098
+ elif num_rr > 1:
2099
+ cls.log.warning("%d reduce/reduce conflicts", num_rr)
2100
+
2101
+ cls._lrtable = lrtable
2102
+ return True
2103
+
2104
+ @classmethod
2105
+ def __collect_rules(cls, definitions):
2106
+ """
2107
+ Collect all of the tagged grammar rules
2108
+ """
2109
+ rules = [
2110
+ (name, value)
2111
+ for name, value in definitions
2112
+ if callable(value) and hasattr(value, "rules")
2113
+ ]
2114
+ return rules
2115
+
2116
+ # ----------------------------------------------------------------------
2117
+ # Build the LALR(1) tables. definitions is a list of (name, item) tuples
2118
+ # of all definitions provided in the class, listed in the order in which
2119
+ # they were defined. This method is triggered by a metaclass.
2120
+ # ----------------------------------------------------------------------
2121
+ @classmethod
2122
+ def _build(cls, definitions):
2123
+ if vars(cls).get("_build", False):
2124
+ return
2125
+
2126
+ # Collect all of the grammar rules from the class definition
2127
+ rules = cls.__collect_rules(definitions)
2128
+
2129
+ # Validate other parts of the grammar specification
2130
+ if not cls.__validate_specification():
2131
+ raise YaccError("Invalid parser specification")
2132
+
2133
+ # Build the underlying grammar object
2134
+ cls.__build_grammar(rules)
2135
+
2136
+ # Build the LR tables
2137
+ if not cls.__build_lrtables():
2138
+ raise YaccError("Can't build parsing tables")
2139
+
2140
+ if cls.debugfile:
2141
+ with open(cls.debugfile, "w") as f:
2142
+ f.write(str(cls._grammar))
2143
+ f.write("\n")
2144
+ f.write(str(cls._lrtable))
2145
+ cls.log.info(
2146
+ "Parser debugging for %s written to %s", cls.__qualname__, cls.debugfile
2147
+ )
2148
+
2149
+ # ----------------------------------------------------------------------
2150
+ # Parsing Support. This is the parsing runtime that users use to
2151
+ # ----------------------------------------------------------------------
2152
+ def error(self, token):
2153
+ """
2154
+ Default error handling function. This may be subclassed.
2155
+ """
2156
+ if token:
2157
+ lineno = getattr(token, "lineno", 0)
2158
+ if lineno:
2159
+ sys.stderr.write(
2160
+ f'Jac Parse Error: Syntax error at line {lineno} while parsing "{token.value}"\n'
2161
+ )
2162
+ else:
2163
+ sys.stderr.write(
2164
+ f'Jac Parse Error: Syntax error while parsing "{token.value}"'
2165
+ )
2166
+ else:
2167
+ sys.stderr.write("Jac Parse Error: Parse error in input. EOF\n")
2168
+
2169
+ def errok(self):
2170
+ """
2171
+ Clear the error status
2172
+ """
2173
+ self.errorok = True
2174
+
2175
+ def restart(self):
2176
+ """
2177
+ Force the parser to restart from a fresh state. Clears the statestack
2178
+ """
2179
+ del self.statestack[:]
2180
+ del self.symstack[:]
2181
+ sym = YaccSymbol()
2182
+ sym.type = "$end"
2183
+ self.symstack.append(sym)
2184
+ self.statestack.append(0)
2185
+ self.state = 0
2186
+
2187
+ def parse(self, tokens):
2188
+ """
2189
+ Parse the given input tokens.
2190
+ """
2191
+ lookahead = None # Current lookahead symbol
2192
+ lookaheadstack = [] # Stack of lookahead symbols
2193
+ actions = (
2194
+ self._lrtable.lr_action
2195
+ ) # Local reference to action table (to avoid lookup on self.)
2196
+ goto = (
2197
+ self._lrtable.lr_goto
2198
+ ) # Local reference to goto table (to avoid lookup on self.)
2199
+ prod = (
2200
+ self._grammar.Productions
2201
+ ) # Local reference to production list (to avoid lookup on self.)
2202
+ defaulted_states = (
2203
+ self._lrtable.defaulted_states
2204
+ ) # Local reference to defaulted states
2205
+ pslice = YaccProduction(None) # Production object passed to grammar rules
2206
+ errorcount = 0 # Used during error recovery
2207
+
2208
+ # Set up the state and symbol stacks
2209
+ self.tokens = tokens
2210
+ self.statestack = statestack = [] # Stack of parsing states
2211
+ self.symstack = symstack = [] # Stack of grammar symbols
2212
+ pslice._stack = symstack # Associate the stack with the production
2213
+ self.restart()
2214
+
2215
+ # Set up position tracking
2216
+ track_positions = self.track_positions
2217
+ if not hasattr(self, "_line_positions"):
2218
+ self._line_positions = {} # id: -> lineno
2219
+ self._index_positions = {} # id: -> (start, end)
2220
+
2221
+ errtoken = None # Err token
2222
+ while True:
2223
+ # Get the next symbol on the input. If a lookahead symbol
2224
+ # is already set, we just use that. Otherwise, we'll pull
2225
+ # the next token off of the lookaheadstack or from the lexer
2226
+ if self.state not in defaulted_states:
2227
+ if not lookahead:
2228
+ if not lookaheadstack:
2229
+ lookahead = next(tokens, None) # Get the next token
2230
+ else:
2231
+ lookahead = lookaheadstack.pop()
2232
+ if not lookahead:
2233
+ lookahead = YaccSymbol()
2234
+ lookahead.type = "$end"
2235
+
2236
+ # Check the action table
2237
+ ltype = lookahead.type
2238
+ t = actions[self.state].get(ltype)
2239
+ else:
2240
+ t = defaulted_states[self.state]
2241
+
2242
+ if t is not None:
2243
+ if t > 0:
2244
+ # shift a symbol on the stack
2245
+ statestack.append(t)
2246
+ self.state = t
2247
+
2248
+ symstack.append(lookahead)
2249
+ lookahead = None
2250
+
2251
+ # Decrease error count on successful shift
2252
+ if errorcount:
2253
+ errorcount -= 1
2254
+ continue
2255
+
2256
+ if t < 0:
2257
+ # reduce a symbol on the stack, emit a production
2258
+ self.production = p = prod[-t]
2259
+ pname = p.name
2260
+ plen = p.len
2261
+ pslice._namemap = p.namemap
2262
+
2263
+ # Call the production function
2264
+ pslice._slice = symstack[-plen:] if plen else []
2265
+
2266
+ sym = YaccSymbol()
2267
+ sym.type = pname
2268
+ value = p.func(self, pslice)
2269
+ if value is pslice:
2270
+ value = (
2271
+ pname,
2272
+ pslice.lineno if hasattr(pslice, "lineno") else 0,
2273
+ *(
2274
+ s.value if isinstance(s, YaccSymbol) else s
2275
+ for s in pslice._slice
2276
+ ),
2277
+ )
2278
+
2279
+ sym.value = value
2280
+
2281
+ # Record positions
2282
+ if track_positions:
2283
+ if plen:
2284
+ sym.lineno = symstack[-plen].lineno
2285
+ sym.index = symstack[-plen].index
2286
+ sym.end = symstack[-1].end
2287
+ else:
2288
+ # A zero-length production (what to put here?)
2289
+ sym.lineno = None
2290
+ sym.index = None
2291
+ sym.end = None
2292
+ self._line_positions[id(value)] = sym.lineno
2293
+ self._index_positions[id(value)] = (sym.index, sym.end)
2294
+
2295
+ if plen:
2296
+ del symstack[-plen:]
2297
+ del statestack[-plen:]
2298
+
2299
+ symstack.append(sym)
2300
+ self.state = goto[statestack[-1]][pname]
2301
+ statestack.append(self.state)
2302
+ continue
2303
+
2304
+ if t == 0:
2305
+ n = symstack[-1]
2306
+ result = getattr(n, "value", None)
2307
+ return result
2308
+
2309
+ if t is None:
2310
+ # We have some kind of parsing error here. To handle
2311
+ # this, we are going to push the current token onto
2312
+ # the tokenstack and replace it with an 'error' token.
2313
+ # If there are any synchronization rules, they may
2314
+ # catch it.
2315
+ #
2316
+ # In addition to pushing the error token, we call call
2317
+ # the user defined error() function if this is the
2318
+ # first syntax error. This function is only called if
2319
+ # errorcount == 0.
2320
+ if errorcount == 0 or self.errorok:
2321
+ errorcount = ERROR_COUNT
2322
+ self.errorok = False
2323
+ if lookahead.type == "$end":
2324
+ errtoken = None # End of file!
2325
+ else:
2326
+ errtoken = lookahead
2327
+
2328
+ tok = self.error(errtoken)
2329
+ if tok:
2330
+ # User must have done some kind of panic
2331
+ # mode recovery on their own. The
2332
+ # returned token is the next lookahead
2333
+ lookahead = tok
2334
+ self.errorok = True
2335
+ continue
2336
+ else:
2337
+ # If at EOF. We just return. Basically dead.
2338
+ if not errtoken:
2339
+ return
2340
+ else:
2341
+ # Reset the error count. Unsuccessful token shifted
2342
+ errorcount = ERROR_COUNT
2343
+
2344
+ # case 1: the statestack only has 1 entry on it. If we're in this state, the
2345
+ # entire parse has been rolled back and we're completely hosed. The token is
2346
+ # discarded and we just keep going.
2347
+
2348
+ if len(statestack) <= 1 and lookahead.type != "$end":
2349
+ lookahead = None
2350
+ self.state = 0
2351
+ # Nuke the lookahead stack
2352
+ del lookaheadstack[:]
2353
+ continue
2354
+
2355
+ # case 2: the statestack has a couple of entries on it, but we're
2356
+ # at the end of the file. nuke the top entry and generate an error token
2357
+
2358
+ # Start nuking entries on the stack
2359
+ if lookahead.type == "$end":
2360
+ # Whoa. We're really hosed here. Bail out
2361
+ return
2362
+
2363
+ if lookahead.type != "error":
2364
+ sym = symstack[-1]
2365
+ if sym.type == "error":
2366
+ # Hmmm. Error is on top of stack, we'll just nuke input
2367
+ # symbol and continue
2368
+ lookahead = None
2369
+ continue
2370
+
2371
+ # Create the error symbol for the first time and make it the new lookahead symbol
2372
+ t = YaccSymbol()
2373
+ t.type = "error"
2374
+
2375
+ if hasattr(lookahead, "lineno"):
2376
+ t.lineno = lookahead.lineno
2377
+ if hasattr(lookahead, "index"):
2378
+ t.index = lookahead.index
2379
+ if hasattr(lookahead, "end"):
2380
+ t.end = lookahead.end
2381
+ t.value = lookahead
2382
+ lookaheadstack.append(lookahead)
2383
+ lookahead = t
2384
+ else:
2385
+ sym = symstack.pop()
2386
+ statestack.pop()
2387
+ self.state = statestack[-1]
2388
+ continue
2389
+
2390
+ # Call an error function here
2391
+ raise RuntimeError("sly: internal parser error!!!\n")
2392
+
2393
+ # Return position tracking information
2394
+ def line_position(self, value):
2395
+ return self._line_positions[id(value)]
2396
+
2397
+ def index_position(self, value):
2398
+ return self._index_positions[id(value)]