python-cc 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pcc/ply/yacc.py ADDED
@@ -0,0 +1,3471 @@
1
+ # -----------------------------------------------------------------------------
2
+ # ply: yacc.py
3
+ #
4
+ # Copyright (C) 2001-2015,
5
+ # David M. Beazley (Dabeaz LLC)
6
+ # All rights reserved.
7
+ #
8
+ # Redistribution and use in source and binary forms, with or without
9
+ # modification, are permitted provided that the following conditions are
10
+ # met:
11
+ #
12
+ # * Redistributions of source code must retain the above copyright notice,
13
+ # this list of conditions and the following disclaimer.
14
+ # * Redistributions in binary form must reproduce the above copyright notice,
15
+ # this list of conditions and the following disclaimer in the documentation
16
+ # and/or other materials provided with the distribution.
17
+ # * Neither the name of the David Beazley or Dabeaz LLC may be used to
18
+ # endorse or promote products derived from this software without
19
+ # specific prior written permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
+ # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ # -----------------------------------------------------------------------------
33
+ #
34
+ # This implements an LR parser that is constructed from grammar rules defined
35
+ # as Python functions. The grammer is specified by supplying the BNF inside
36
+ # Python documentation strings. The inspiration for this technique was borrowed
37
+ # from John Aycock's Spark parsing system. PLY might be viewed as cross between
38
+ # Spark and the GNU bison utility.
39
+ #
40
+ # The current implementation is only somewhat object-oriented. The
41
+ # LR parser itself is defined in terms of an object (which allows multiple
42
+ # parsers to co-exist). However, most of the variables used during table
43
+ # construction are defined in terms of global variables. Users shouldn't
44
+ # notice unless they are trying to define multiple parsers at the same
45
+ # time using threads (in which case they should have their head examined).
46
+ #
47
+ # This implementation supports both SLR and LALR(1) parsing. LALR(1)
48
+ # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49
+ # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50
+ # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51
+ # by the more efficient DeRemer and Pennello algorithm.
52
+ #
53
+ # :::::::: WARNING :::::::
54
+ #
55
+ # Construction of LR parsing tables is fairly complicated and expensive.
56
+ # To make this module run fast, a *LOT* of work has been put into
57
+ # optimization---often at the expensive of readability and what might
58
+ # consider to be good Python "coding style." Modify the code at your
59
+ # own risk!
60
+ # ----------------------------------------------------------------------------
61
+
62
+ import re
63
+ import types
64
+ import sys
65
+ import os.path
66
+ import inspect
67
+ import base64
68
+ import warnings
69
+
70
+ __version__ = '3.8'
71
+ __tabversion__ = '3.8'
72
+
73
+ #-----------------------------------------------------------------------------
74
+ # === User configurable parameters ===
75
+ #
76
+ # Change these to modify the default behavior of yacc (if you wish)
77
+ #-----------------------------------------------------------------------------
78
+
79
+ yaccdebug = True # Debugging mode. If set, yacc generates a
80
+ # a 'parser.out' file in the current directory
81
+
82
+ debug_file = 'parser.out' # Default name of the debugging file
83
+ tab_module = 'parsetab' # Default name of the table module
84
+ default_lr = 'LALR' # Default LR table generation method
85
+
86
+ error_count = 3 # Number of symbols that must be shifted to leave recovery mode
87
+
88
+ yaccdevel = False # Set to True if developing yacc. This turns off optimized
89
+ # implementations of certain functions.
90
+
91
+ resultlimit = 40 # Size limit of results when running in debug mode.
92
+
93
+ pickle_protocol = 0 # Protocol to use when writing pickle files
94
+
95
+ # String type-checking compatibility
96
+ if sys.version_info[0] < 3:
97
+ string_types = basestring
98
+ else:
99
+ string_types = str
100
+
101
+ MAXINT = sys.maxsize
102
+
103
+ # This object is a stand-in for a logging object created by the
104
+ # logging module. PLY will use this by default to create things
105
+ # such as the parser.out file. If a user wants more detailed
106
+ # information, they can create their own logging object and pass
107
+ # it into PLY.
108
+
109
+ class PlyLogger(object):
110
+ def __init__(self, f):
111
+ self.f = f
112
+
113
+ def debug(self, msg, *args, **kwargs):
114
+ self.f.write((msg % args) + '\n')
115
+
116
+ info = debug
117
+
118
+ def warning(self, msg, *args, **kwargs):
119
+ self.f.write('WARNING: ' + (msg % args) + '\n')
120
+
121
+ def error(self, msg, *args, **kwargs):
122
+ self.f.write('ERROR: ' + (msg % args) + '\n')
123
+
124
+ critical = debug
125
+
126
+ # Null logger is used when no output is generated. Does nothing.
127
+ class NullLogger(object):
128
+ def __getattribute__(self, name):
129
+ return self
130
+
131
+ def __call__(self, *args, **kwargs):
132
+ return self
133
+
134
+ # Exception raised for yacc-related errors
135
+ class YaccError(Exception):
136
+ pass
137
+
138
+ # Format the result message that the parser produces when running in debug mode.
139
+ def format_result(r):
140
+ repr_str = repr(r)
141
+ if '\n' in repr_str:
142
+ repr_str = repr(repr_str)
143
+ if len(repr_str) > resultlimit:
144
+ repr_str = repr_str[:resultlimit] + ' ...'
145
+ result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
146
+ return result
147
+
148
+ # Format stack entries when the parser is running in debug mode
149
+ def format_stack_entry(r):
150
+ repr_str = repr(r)
151
+ if '\n' in repr_str:
152
+ repr_str = repr(repr_str)
153
+ if len(repr_str) < 16:
154
+ return repr_str
155
+ else:
156
+ return '<%s @ 0x%x>' % (type(r).__name__, id(r))
157
+
158
+ # Panic mode error recovery support. This feature is being reworked--much of the
159
+ # code here is to offer a deprecation/backwards compatible transition
160
+
161
+ _errok = None
162
+ _token = None
163
+ _restart = None
164
+ _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
165
+ Instead, invoke the methods on the associated parser instance:
166
+
167
+ def p_error(p):
168
+ ...
169
+ # Use parser.errok(), parser.token(), parser.restart()
170
+ ...
171
+
172
+ parser = yacc.yacc()
173
+ '''
174
+
175
+ def errok():
176
+ warnings.warn(_warnmsg)
177
+ return _errok()
178
+
179
+ def restart():
180
+ warnings.warn(_warnmsg)
181
+ return _restart()
182
+
183
+ def token():
184
+ warnings.warn(_warnmsg)
185
+ return _token()
186
+
187
+ # Utility function to call the p_error() function with some deprecation hacks
188
+ def call_errorfunc(errorfunc, token, parser):
189
+ global _errok, _token, _restart
190
+ _errok = parser.errok
191
+ _token = parser.token
192
+ _restart = parser.restart
193
+ r = errorfunc(token)
194
+ try:
195
+ del _errok, _token, _restart
196
+ except NameError:
197
+ pass
198
+ return r
199
+
200
+ #-----------------------------------------------------------------------------
201
+ # === LR Parsing Engine ===
202
+ #
203
+ # The following classes are used for the LR parser itself. These are not
204
+ # used during table construction and are independent of the actual LR
205
+ # table generation algorithm
206
+ #-----------------------------------------------------------------------------
207
+
208
+ # This class is used to hold non-terminal grammar symbols during parsing.
209
+ # It normally has the following attributes set:
210
+ # .type = Grammar symbol type
211
+ # .value = Symbol value
212
+ # .lineno = Starting line number
213
+ # .endlineno = Ending line number (optional, set automatically)
214
+ # .lexpos = Starting lex position
215
+ # .endlexpos = Ending lex position (optional, set automatically)
216
+
217
+ class YaccSymbol:
218
+ def __str__(self):
219
+ return self.type
220
+
221
+ def __repr__(self):
222
+ return str(self)
223
+
224
+ # This class is a wrapper around the objects actually passed to each
225
+ # grammar rule. Index lookup and assignment actually assign the
226
+ # .value attribute of the underlying YaccSymbol object.
227
+ # The lineno() method returns the line number of a given
228
+ # item (or 0 if not defined). The linespan() method returns
229
+ # a tuple of (startline,endline) representing the range of lines
230
+ # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
231
+ # representing the range of positional information for a symbol.
232
+
233
+ class YaccProduction:
234
+ def __init__(self, s, stack=None):
235
+ self.slice = s
236
+ self.stack = stack
237
+ self.lexer = None
238
+ self.parser = None
239
+
240
+ def __getitem__(self, n):
241
+ if isinstance(n, slice):
242
+ return [s.value for s in self.slice[n]]
243
+ elif n >= 0:
244
+ return self.slice[n].value
245
+ else:
246
+ return self.stack[n].value
247
+
248
+ def __setitem__(self, n, v):
249
+ self.slice[n].value = v
250
+
251
+ def __getslice__(self, i, j):
252
+ return [s.value for s in self.slice[i:j]]
253
+
254
+ def __len__(self):
255
+ return len(self.slice)
256
+
257
+ def lineno(self, n):
258
+ return getattr(self.slice[n], 'lineno', 0)
259
+
260
+ def set_lineno(self, n, lineno):
261
+ self.slice[n].lineno = lineno
262
+
263
+ def linespan(self, n):
264
+ startline = getattr(self.slice[n], 'lineno', 0)
265
+ endline = getattr(self.slice[n], 'endlineno', startline)
266
+ return startline, endline
267
+
268
+ def lexpos(self, n):
269
+ return getattr(self.slice[n], 'lexpos', 0)
270
+
271
+ def lexspan(self, n):
272
+ startpos = getattr(self.slice[n], 'lexpos', 0)
273
+ endpos = getattr(self.slice[n], 'endlexpos', startpos)
274
+ return startpos, endpos
275
+
276
+ def error(self):
277
+ raise SyntaxError
278
+
279
+ # -----------------------------------------------------------------------------
280
+ # == LRParser ==
281
+ #
282
+ # The LR Parsing engine.
283
+ # -----------------------------------------------------------------------------
284
+
285
+ class LRParser:
286
+ def __init__(self, lrtab, errorf):
287
+ self.productions = lrtab.lr_productions
288
+ self.action = lrtab.lr_action
289
+ self.goto = lrtab.lr_goto
290
+ self.errorfunc = errorf
291
+ self.set_defaulted_states()
292
+ self.errorok = True
293
+
294
+ def errok(self):
295
+ self.errorok = True
296
+
297
+ def restart(self):
298
+ del self.statestack[:]
299
+ del self.symstack[:]
300
+ sym = YaccSymbol()
301
+ sym.type = '$end'
302
+ self.symstack.append(sym)
303
+ self.statestack.append(0)
304
+
305
+ # Defaulted state support.
306
+ # This method identifies parser states where there is only one possible reduction action.
307
+ # For such states, the parser can make a choose to make a rule reduction without consuming
308
+ # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
309
+ # certain kinds of advanced parsing situations where the lexer and parser interact with
310
+ # each other or change states (i.e., manipulation of scope, lexer states, etc.).
311
+ #
312
+ # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
313
+ def set_defaulted_states(self):
314
+ self.defaulted_states = {}
315
+ for state, actions in self.action.items():
316
+ rules = list(actions.values())
317
+ if len(rules) == 1 and rules[0] < 0:
318
+ self.defaulted_states[state] = rules[0]
319
+
320
+ def disable_defaulted_states(self):
321
+ self.defaulted_states = {}
322
+
323
+ def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
324
+ if debug or yaccdevel:
325
+ if isinstance(debug, int):
326
+ debug = PlyLogger(sys.stderr)
327
+ return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
328
+ elif tracking:
329
+ return self.parseopt(input, lexer, debug, tracking, tokenfunc)
330
+ else:
331
+ return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
332
+
333
+
334
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
335
+ # parsedebug().
336
+ #
337
+ # This is the debugging enabled version of parse(). All changes made to the
338
+ # parsing engine should be made here. Optimized versions of this function
339
+ # are automatically created by the ply/ygen.py script. This script cuts out
340
+ # sections enclosed in markers such as this:
341
+ #
342
+ # #--! DEBUG
343
+ # statements
344
+ # #--! DEBUG
345
+ #
346
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
347
+
348
+ def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
349
+ #--! parsedebug-start
350
+ lookahead = None # Current lookahead symbol
351
+ lookaheadstack = [] # Stack of lookahead symbols
352
+ actions = self.action # Local reference to action table (to avoid lookup on self.)
353
+ goto = self.goto # Local reference to goto table (to avoid lookup on self.)
354
+ prod = self.productions # Local reference to production list (to avoid lookup on self.)
355
+ defaulted_states = self.defaulted_states # Local reference to defaulted states
356
+ pslice = YaccProduction(None) # Production object passed to grammar rules
357
+ errorcount = 0 # Used during error recovery
358
+
359
+ #--! DEBUG
360
+ debug.info('PLY: PARSE DEBUG START')
361
+ #--! DEBUG
362
+
363
+ # If no lexer was given, we will try to use the lex module
364
+ if not lexer:
365
+ from . import lex
366
+ lexer = lex.lexer
367
+
368
+ # Set up the lexer and parser objects on pslice
369
+ pslice.lexer = lexer
370
+ pslice.parser = self
371
+
372
+ # If input was supplied, pass to lexer
373
+ if input is not None:
374
+ lexer.input(input)
375
+
376
+ if tokenfunc is None:
377
+ # Tokenize function
378
+ get_token = lexer.token
379
+ else:
380
+ get_token = tokenfunc
381
+
382
+ # Set the parser() token method (sometimes used in error recovery)
383
+ self.token = get_token
384
+
385
+ # Set up the state and symbol stacks
386
+
387
+ statestack = [] # Stack of parsing states
388
+ self.statestack = statestack
389
+ symstack = [] # Stack of grammar symbols
390
+ self.symstack = symstack
391
+
392
+ pslice.stack = symstack # Put in the production
393
+ errtoken = None # Err token
394
+
395
+ # The start state is assumed to be (0,$end)
396
+
397
+ statestack.append(0)
398
+ sym = YaccSymbol()
399
+ sym.type = '$end'
400
+ symstack.append(sym)
401
+ state = 0
402
+ while True:
403
+ # Get the next symbol on the input. If a lookahead symbol
404
+ # is already set, we just use that. Otherwise, we'll pull
405
+ # the next token off of the lookaheadstack or from the lexer
406
+
407
+ #--! DEBUG
408
+ debug.debug('')
409
+ debug.debug('State : %s', state)
410
+ #--! DEBUG
411
+
412
+ if state not in defaulted_states:
413
+ if not lookahead:
414
+ if not lookaheadstack:
415
+ lookahead = get_token() # Get the next token
416
+ else:
417
+ lookahead = lookaheadstack.pop()
418
+ if not lookahead:
419
+ lookahead = YaccSymbol()
420
+ lookahead.type = '$end'
421
+
422
+ # Check the action table
423
+ ltype = lookahead.type
424
+ t = actions[state].get(ltype)
425
+ else:
426
+ t = defaulted_states[state]
427
+ #--! DEBUG
428
+ debug.debug('Defaulted state %s: Reduce using %d', state, -t)
429
+ #--! DEBUG
430
+
431
+ #--! DEBUG
432
+ debug.debug('Stack : %s',
433
+ ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
434
+ #--! DEBUG
435
+
436
+ if t is not None:
437
+ if t > 0:
438
+ # shift a symbol on the stack
439
+ statestack.append(t)
440
+ state = t
441
+
442
+ #--! DEBUG
443
+ debug.debug('Action : Shift and goto state %s', t)
444
+ #--! DEBUG
445
+
446
+ symstack.append(lookahead)
447
+ lookahead = None
448
+
449
+ # Decrease error count on successful shift
450
+ if errorcount:
451
+ errorcount -= 1
452
+ continue
453
+
454
+ if t < 0:
455
+ # reduce a symbol on the stack, emit a production
456
+ p = prod[-t]
457
+ pname = p.name
458
+ plen = p.len
459
+
460
+ # Get production function
461
+ sym = YaccSymbol()
462
+ sym.type = pname # Production name
463
+ sym.value = None
464
+
465
+ #--! DEBUG
466
+ if plen:
467
+ debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
468
+ '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
469
+ goto[statestack[-1-plen]][pname])
470
+ else:
471
+ debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
472
+ goto[statestack[-1]][pname])
473
+
474
+ #--! DEBUG
475
+
476
+ if plen:
477
+ targ = symstack[-plen-1:]
478
+ targ[0] = sym
479
+
480
+ #--! TRACKING
481
+ if tracking:
482
+ t1 = targ[1]
483
+ sym.lineno = t1.lineno
484
+ sym.lexpos = t1.lexpos
485
+ t1 = targ[-1]
486
+ sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
487
+ sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
488
+ #--! TRACKING
489
+
490
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
491
+ # The code enclosed in this section is duplicated
492
+ # below as a performance optimization. Make sure
493
+ # changes get made in both locations.
494
+
495
+ pslice.slice = targ
496
+
497
+ try:
498
+ # Call the grammar rule with our special slice object
499
+ del symstack[-plen:]
500
+ del statestack[-plen:]
501
+ p.callable(pslice)
502
+ #--! DEBUG
503
+ debug.info('Result : %s', format_result(pslice[0]))
504
+ #--! DEBUG
505
+ symstack.append(sym)
506
+ state = goto[statestack[-1]][pname]
507
+ statestack.append(state)
508
+ except SyntaxError:
509
+ # If an error was set. Enter error recovery state
510
+ lookaheadstack.append(lookahead)
511
+ symstack.pop()
512
+ statestack.pop()
513
+ state = statestack[-1]
514
+ sym.type = 'error'
515
+ lookahead = sym
516
+ errorcount = error_count
517
+ self.errorok = False
518
+ continue
519
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
520
+
521
+ else:
522
+
523
+ #--! TRACKING
524
+ if tracking:
525
+ sym.lineno = lexer.lineno
526
+ sym.lexpos = lexer.lexpos
527
+ #--! TRACKING
528
+
529
+ targ = [sym]
530
+
531
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
532
+ # The code enclosed in this section is duplicated
533
+ # above as a performance optimization. Make sure
534
+ # changes get made in both locations.
535
+
536
+ pslice.slice = targ
537
+
538
+ try:
539
+ # Call the grammar rule with our special slice object
540
+ p.callable(pslice)
541
+ #--! DEBUG
542
+ debug.info('Result : %s', format_result(pslice[0]))
543
+ #--! DEBUG
544
+ symstack.append(sym)
545
+ state = goto[statestack[-1]][pname]
546
+ statestack.append(state)
547
+ except SyntaxError:
548
+ # If an error was set. Enter error recovery state
549
+ lookaheadstack.append(lookahead)
550
+ symstack.pop()
551
+ statestack.pop()
552
+ state = statestack[-1]
553
+ sym.type = 'error'
554
+ lookahead = sym
555
+ errorcount = error_count
556
+ self.errorok = False
557
+ continue
558
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
559
+
560
+ if t == 0:
561
+ n = symstack[-1]
562
+ result = getattr(n, 'value', None)
563
+ #--! DEBUG
564
+ debug.info('Done : Returning %s', format_result(result))
565
+ debug.info('PLY: PARSE DEBUG END')
566
+ #--! DEBUG
567
+ return result
568
+
569
+ if t is None:
570
+
571
+ #--! DEBUG
572
+ debug.error('Error : %s',
573
+ ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
574
+ #--! DEBUG
575
+
576
+ # We have some kind of parsing error here. To handle
577
+ # this, we are going to push the current token onto
578
+ # the tokenstack and replace it with an 'error' token.
579
+ # If there are any synchronization rules, they may
580
+ # catch it.
581
+ #
582
+ # In addition to pushing the error token, we call call
583
+ # the user defined p_error() function if this is the
584
+ # first syntax error. This function is only called if
585
+ # errorcount == 0.
586
+ if errorcount == 0 or self.errorok:
587
+ errorcount = error_count
588
+ self.errorok = False
589
+ errtoken = lookahead
590
+ if errtoken.type == '$end':
591
+ errtoken = None # End of file!
592
+ if self.errorfunc:
593
+ if errtoken and not hasattr(errtoken, 'lexer'):
594
+ errtoken.lexer = lexer
595
+ tok = call_errorfunc(self.errorfunc, errtoken, self)
596
+ if self.errorok:
597
+ # User must have done some kind of panic
598
+ # mode recovery on their own. The
599
+ # returned token is the next lookahead
600
+ lookahead = tok
601
+ errtoken = None
602
+ continue
603
+ else:
604
+ if errtoken:
605
+ if hasattr(errtoken, 'lineno'):
606
+ lineno = lookahead.lineno
607
+ else:
608
+ lineno = 0
609
+ if lineno:
610
+ sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
611
+ else:
612
+ sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
613
+ else:
614
+ sys.stderr.write('yacc: Parse error in input. EOF\n')
615
+ return
616
+
617
+ else:
618
+ errorcount = error_count
619
+
620
+ # case 1: the statestack only has 1 entry on it. If we're in this state, the
621
+ # entire parse has been rolled back and we're completely hosed. The token is
622
+ # discarded and we just keep going.
623
+
624
+ if len(statestack) <= 1 and lookahead.type != '$end':
625
+ lookahead = None
626
+ errtoken = None
627
+ state = 0
628
+ # Nuke the pushback stack
629
+ del lookaheadstack[:]
630
+ continue
631
+
632
+ # case 2: the statestack has a couple of entries on it, but we're
633
+ # at the end of the file. nuke the top entry and generate an error token
634
+
635
+ # Start nuking entries on the stack
636
+ if lookahead.type == '$end':
637
+ # Whoa. We're really hosed here. Bail out
638
+ return
639
+
640
+ if lookahead.type != 'error':
641
+ sym = symstack[-1]
642
+ if sym.type == 'error':
643
+ # Hmmm. Error is on top of stack, we'll just nuke input
644
+ # symbol and continue
645
+ #--! TRACKING
646
+ if tracking:
647
+ sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
648
+ sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
649
+ #--! TRACKING
650
+ lookahead = None
651
+ continue
652
+
653
+ # Create the error symbol for the first time and make it the new lookahead symbol
654
+ t = YaccSymbol()
655
+ t.type = 'error'
656
+
657
+ if hasattr(lookahead, 'lineno'):
658
+ t.lineno = t.endlineno = lookahead.lineno
659
+ if hasattr(lookahead, 'lexpos'):
660
+ t.lexpos = t.endlexpos = lookahead.lexpos
661
+ t.value = lookahead
662
+ lookaheadstack.append(lookahead)
663
+ lookahead = t
664
+ else:
665
+ sym = symstack.pop()
666
+ #--! TRACKING
667
+ if tracking:
668
+ lookahead.lineno = sym.lineno
669
+ lookahead.lexpos = sym.lexpos
670
+ #--! TRACKING
671
+ statestack.pop()
672
+ state = statestack[-1]
673
+
674
+ continue
675
+
676
+ # Call an error function here
677
+ raise RuntimeError('yacc: internal parser error!!!\n')
678
+
679
+ #--! parsedebug-end
680
+
681
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
682
+ # parseopt().
683
+ #
684
+ # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
685
+ # This code is automatically generated by the ply/ygen.py script. Make
686
+ # changes to the parsedebug() method instead.
687
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
688
+
689
+ def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
690
+ #--! parseopt-start
691
+ lookahead = None # Current lookahead symbol
692
+ lookaheadstack = [] # Stack of lookahead symbols
693
+ actions = self.action # Local reference to action table (to avoid lookup on self.)
694
+ goto = self.goto # Local reference to goto table (to avoid lookup on self.)
695
+ prod = self.productions # Local reference to production list (to avoid lookup on self.)
696
+ defaulted_states = self.defaulted_states # Local reference to defaulted states
697
+ pslice = YaccProduction(None) # Production object passed to grammar rules
698
+ errorcount = 0 # Used during error recovery
699
+
700
+
701
+ # If no lexer was given, we will try to use the lex module
702
+ if not lexer:
703
+ from . import lex
704
+ lexer = lex.lexer
705
+
706
+ # Set up the lexer and parser objects on pslice
707
+ pslice.lexer = lexer
708
+ pslice.parser = self
709
+
710
+ # If input was supplied, pass to lexer
711
+ if input is not None:
712
+ lexer.input(input)
713
+
714
+ if tokenfunc is None:
715
+ # Tokenize function
716
+ get_token = lexer.token
717
+ else:
718
+ get_token = tokenfunc
719
+
720
+ # Set the parser() token method (sometimes used in error recovery)
721
+ self.token = get_token
722
+
723
+ # Set up the state and symbol stacks
724
+
725
+ statestack = [] # Stack of parsing states
726
+ self.statestack = statestack
727
+ symstack = [] # Stack of grammar symbols
728
+ self.symstack = symstack
729
+
730
+ pslice.stack = symstack # Put in the production
731
+ errtoken = None # Err token
732
+
733
+ # The start state is assumed to be (0,$end)
734
+
735
+ statestack.append(0)
736
+ sym = YaccSymbol()
737
+ sym.type = '$end'
738
+ symstack.append(sym)
739
+ state = 0
740
+ while True:
741
+ # Get the next symbol on the input. If a lookahead symbol
742
+ # is already set, we just use that. Otherwise, we'll pull
743
+ # the next token off of the lookaheadstack or from the lexer
744
+
745
+
746
+ if state not in defaulted_states:
747
+ if not lookahead:
748
+ if not lookaheadstack:
749
+ lookahead = get_token() # Get the next token
750
+ else:
751
+ lookahead = lookaheadstack.pop()
752
+ if not lookahead:
753
+ lookahead = YaccSymbol()
754
+ lookahead.type = '$end'
755
+
756
+ # Check the action table
757
+ ltype = lookahead.type
758
+ t = actions[state].get(ltype)
759
+ else:
760
+ t = defaulted_states[state]
761
+
762
+
763
+ if t is not None:
764
+ if t > 0:
765
+ # shift a symbol on the stack
766
+ statestack.append(t)
767
+ state = t
768
+
769
+
770
+ symstack.append(lookahead)
771
+ lookahead = None
772
+
773
+ # Decrease error count on successful shift
774
+ if errorcount:
775
+ errorcount -= 1
776
+ continue
777
+
778
+ if t < 0:
779
+ # reduce a symbol on the stack, emit a production
780
+ p = prod[-t]
781
+ pname = p.name
782
+ plen = p.len
783
+
784
+ # Get production function
785
+ sym = YaccSymbol()
786
+ sym.type = pname # Production name
787
+ sym.value = None
788
+
789
+
790
+ if plen:
791
+ targ = symstack[-plen-1:]
792
+ targ[0] = sym
793
+
794
+ #--! TRACKING
795
+ if tracking:
796
+ t1 = targ[1]
797
+ sym.lineno = t1.lineno
798
+ sym.lexpos = t1.lexpos
799
+ t1 = targ[-1]
800
+ sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
801
+ sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
802
+ #--! TRACKING
803
+
804
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
805
+ # The code enclosed in this section is duplicated
806
+ # below as a performance optimization. Make sure
807
+ # changes get made in both locations.
808
+
809
+ pslice.slice = targ
810
+
811
+ try:
812
+ # Call the grammar rule with our special slice object
813
+ del symstack[-plen:]
814
+ del statestack[-plen:]
815
+ p.callable(pslice)
816
+ symstack.append(sym)
817
+ state = goto[statestack[-1]][pname]
818
+ statestack.append(state)
819
+ except SyntaxError:
820
+ # If an error was set. Enter error recovery state
821
+ lookaheadstack.append(lookahead)
822
+ symstack.pop()
823
+ statestack.pop()
824
+ state = statestack[-1]
825
+ sym.type = 'error'
826
+ lookahead = sym
827
+ errorcount = error_count
828
+ self.errorok = False
829
+ continue
830
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
831
+
832
+ else:
833
+
834
+ #--! TRACKING
835
+ if tracking:
836
+ sym.lineno = lexer.lineno
837
+ sym.lexpos = lexer.lexpos
838
+ #--! TRACKING
839
+
840
+ targ = [sym]
841
+
842
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
843
+ # The code enclosed in this section is duplicated
844
+ # above as a performance optimization. Make sure
845
+ # changes get made in both locations.
846
+
847
+ pslice.slice = targ
848
+
849
+ try:
850
+ # Call the grammar rule with our special slice object
851
+ p.callable(pslice)
852
+ symstack.append(sym)
853
+ state = goto[statestack[-1]][pname]
854
+ statestack.append(state)
855
+ except SyntaxError:
856
+ # If an error was set. Enter error recovery state
857
+ lookaheadstack.append(lookahead)
858
+ symstack.pop()
859
+ statestack.pop()
860
+ state = statestack[-1]
861
+ sym.type = 'error'
862
+ lookahead = sym
863
+ errorcount = error_count
864
+ self.errorok = False
865
+ continue
866
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
867
+
868
+ if t == 0:
869
+ n = symstack[-1]
870
+ result = getattr(n, 'value', None)
871
+ return result
872
+
873
+ if t is None:
874
+
875
+
876
+ # We have some kind of parsing error here. To handle
877
+ # this, we are going to push the current token onto
878
+ # the tokenstack and replace it with an 'error' token.
879
+ # If there are any synchronization rules, they may
880
+ # catch it.
881
+ #
882
+ # In addition to pushing the error token, we call call
883
+ # the user defined p_error() function if this is the
884
+ # first syntax error. This function is only called if
885
+ # errorcount == 0.
886
+ if errorcount == 0 or self.errorok:
887
+ errorcount = error_count
888
+ self.errorok = False
889
+ errtoken = lookahead
890
+ if errtoken.type == '$end':
891
+ errtoken = None # End of file!
892
+ if self.errorfunc:
893
+ if errtoken and not hasattr(errtoken, 'lexer'):
894
+ errtoken.lexer = lexer
895
+ tok = call_errorfunc(self.errorfunc, errtoken, self)
896
+ if self.errorok:
897
+ # User must have done some kind of panic
898
+ # mode recovery on their own. The
899
+ # returned token is the next lookahead
900
+ lookahead = tok
901
+ errtoken = None
902
+ continue
903
+ else:
904
+ if errtoken:
905
+ if hasattr(errtoken, 'lineno'):
906
+ lineno = lookahead.lineno
907
+ else:
908
+ lineno = 0
909
+ if lineno:
910
+ sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
911
+ else:
912
+ sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
913
+ else:
914
+ sys.stderr.write('yacc: Parse error in input. EOF\n')
915
+ return
916
+
917
+ else:
918
+ errorcount = error_count
919
+
920
+ # case 1: the statestack only has 1 entry on it. If we're in this state, the
921
+ # entire parse has been rolled back and we're completely hosed. The token is
922
+ # discarded and we just keep going.
923
+
924
+ if len(statestack) <= 1 and lookahead.type != '$end':
925
+ lookahead = None
926
+ errtoken = None
927
+ state = 0
928
+ # Nuke the pushback stack
929
+ del lookaheadstack[:]
930
+ continue
931
+
932
+ # case 2: the statestack has a couple of entries on it, but we're
933
+ # at the end of the file. nuke the top entry and generate an error token
934
+
935
+ # Start nuking entries on the stack
936
+ if lookahead.type == '$end':
937
+ # Whoa. We're really hosed here. Bail out
938
+ return
939
+
940
+ if lookahead.type != 'error':
941
+ sym = symstack[-1]
942
+ if sym.type == 'error':
943
+ # Hmmm. Error is on top of stack, we'll just nuke input
944
+ # symbol and continue
945
+ #--! TRACKING
946
+ if tracking:
947
+ sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
948
+ sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
949
+ #--! TRACKING
950
+ lookahead = None
951
+ continue
952
+
953
+ # Create the error symbol for the first time and make it the new lookahead symbol
954
+ t = YaccSymbol()
955
+ t.type = 'error'
956
+
957
+ if hasattr(lookahead, 'lineno'):
958
+ t.lineno = t.endlineno = lookahead.lineno
959
+ if hasattr(lookahead, 'lexpos'):
960
+ t.lexpos = t.endlexpos = lookahead.lexpos
961
+ t.value = lookahead
962
+ lookaheadstack.append(lookahead)
963
+ lookahead = t
964
+ else:
965
+ sym = symstack.pop()
966
+ #--! TRACKING
967
+ if tracking:
968
+ lookahead.lineno = sym.lineno
969
+ lookahead.lexpos = sym.lexpos
970
+ #--! TRACKING
971
+ statestack.pop()
972
+ state = statestack[-1]
973
+
974
+ continue
975
+
976
+ # Call an error function here
977
+ raise RuntimeError('yacc: internal parser error!!!\n')
978
+
979
+ #--! parseopt-end
980
+
981
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
982
+ # parseopt_notrack().
983
+ #
984
+ # Optimized version of parseopt() with line number tracking removed.
985
+ # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
986
+ # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
987
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
988
+
989
+ def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
990
+ #--! parseopt-notrack-start
991
+ lookahead = None # Current lookahead symbol
992
+ lookaheadstack = [] # Stack of lookahead symbols
993
+ actions = self.action # Local reference to action table (to avoid lookup on self.)
994
+ goto = self.goto # Local reference to goto table (to avoid lookup on self.)
995
+ prod = self.productions # Local reference to production list (to avoid lookup on self.)
996
+ defaulted_states = self.defaulted_states # Local reference to defaulted states
997
+ pslice = YaccProduction(None) # Production object passed to grammar rules
998
+ errorcount = 0 # Used during error recovery
999
+
1000
+
1001
+ # If no lexer was given, we will try to use the lex module
1002
+ if not lexer:
1003
+ from . import lex
1004
+ lexer = lex.lexer
1005
+
1006
+ # Set up the lexer and parser objects on pslice
1007
+ pslice.lexer = lexer
1008
+ pslice.parser = self
1009
+
1010
+ # If input was supplied, pass to lexer
1011
+ if input is not None:
1012
+ lexer.input(input)
1013
+
1014
+ if tokenfunc is None:
1015
+ # Tokenize function
1016
+ get_token = lexer.token
1017
+ else:
1018
+ get_token = tokenfunc
1019
+
1020
+ # Set the parser() token method (sometimes used in error recovery)
1021
+ self.token = get_token
1022
+
1023
+ # Set up the state and symbol stacks
1024
+
1025
+ statestack = [] # Stack of parsing states
1026
+ self.statestack = statestack
1027
+ symstack = [] # Stack of grammar symbols
1028
+ self.symstack = symstack
1029
+
1030
+ pslice.stack = symstack # Put in the production
1031
+ errtoken = None # Err token
1032
+
1033
+ # The start state is assumed to be (0,$end)
1034
+
1035
+ statestack.append(0)
1036
+ sym = YaccSymbol()
1037
+ sym.type = '$end'
1038
+ symstack.append(sym)
1039
+ state = 0
1040
+ while True:
1041
+ # Get the next symbol on the input. If a lookahead symbol
1042
+ # is already set, we just use that. Otherwise, we'll pull
1043
+ # the next token off of the lookaheadstack or from the lexer
1044
+
1045
+
1046
+ if state not in defaulted_states:
1047
+ if not lookahead:
1048
+ if not lookaheadstack:
1049
+ lookahead = get_token() # Get the next token
1050
+ else:
1051
+ lookahead = lookaheadstack.pop()
1052
+ if not lookahead:
1053
+ lookahead = YaccSymbol()
1054
+ lookahead.type = '$end'
1055
+
1056
+ # Check the action table
1057
+ ltype = lookahead.type
1058
+ t = actions[state].get(ltype)
1059
+ else:
1060
+ t = defaulted_states[state]
1061
+
1062
+
1063
+ if t is not None:
1064
+ if t > 0:
1065
+ # shift a symbol on the stack
1066
+ statestack.append(t)
1067
+ state = t
1068
+
1069
+
1070
+ symstack.append(lookahead)
1071
+ lookahead = None
1072
+
1073
+ # Decrease error count on successful shift
1074
+ if errorcount:
1075
+ errorcount -= 1
1076
+ continue
1077
+
1078
+ if t < 0:
1079
+ # reduce a symbol on the stack, emit a production
1080
+ p = prod[-t]
1081
+ pname = p.name
1082
+ plen = p.len
1083
+
1084
+ # Get production function
1085
+ sym = YaccSymbol()
1086
+ sym.type = pname # Production name
1087
+ sym.value = None
1088
+
1089
+
1090
+ if plen:
1091
+ targ = symstack[-plen-1:]
1092
+ targ[0] = sym
1093
+
1094
+
1095
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1096
+ # The code enclosed in this section is duplicated
1097
+ # below as a performance optimization. Make sure
1098
+ # changes get made in both locations.
1099
+
1100
+ pslice.slice = targ
1101
+
1102
+ try:
1103
+ # Call the grammar rule with our special slice object
1104
+ del symstack[-plen:]
1105
+ del statestack[-plen:]
1106
+ p.callable(pslice)
1107
+ symstack.append(sym)
1108
+ state = goto[statestack[-1]][pname]
1109
+ statestack.append(state)
1110
+ except SyntaxError:
1111
+ # If an error was set. Enter error recovery state
1112
+ lookaheadstack.append(lookahead)
1113
+ symstack.pop()
1114
+ statestack.pop()
1115
+ state = statestack[-1]
1116
+ sym.type = 'error'
1117
+ lookahead = sym
1118
+ errorcount = error_count
1119
+ self.errorok = False
1120
+ continue
1121
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1122
+
1123
+ else:
1124
+
1125
+
1126
+ targ = [sym]
1127
+
1128
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1129
+ # The code enclosed in this section is duplicated
1130
+ # above as a performance optimization. Make sure
1131
+ # changes get made in both locations.
1132
+
1133
+ pslice.slice = targ
1134
+
1135
+ try:
1136
+ # Call the grammar rule with our special slice object
1137
+ p.callable(pslice)
1138
+ symstack.append(sym)
1139
+ state = goto[statestack[-1]][pname]
1140
+ statestack.append(state)
1141
+ except SyntaxError:
1142
+ # If an error was set. Enter error recovery state
1143
+ lookaheadstack.append(lookahead)
1144
+ symstack.pop()
1145
+ statestack.pop()
1146
+ state = statestack[-1]
1147
+ sym.type = 'error'
1148
+ lookahead = sym
1149
+ errorcount = error_count
1150
+ self.errorok = False
1151
+ continue
1152
+ # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1153
+
1154
+ if t == 0:
1155
+ n = symstack[-1]
1156
+ result = getattr(n, 'value', None)
1157
+ return result
1158
+
1159
+ if t is None:
1160
+
1161
+
1162
+ # We have some kind of parsing error here. To handle
1163
+ # this, we are going to push the current token onto
1164
+ # the tokenstack and replace it with an 'error' token.
1165
+ # If there are any synchronization rules, they may
1166
+ # catch it.
1167
+ #
1168
+ # In addition to pushing the error token, we call call
1169
+ # the user defined p_error() function if this is the
1170
+ # first syntax error. This function is only called if
1171
+ # errorcount == 0.
1172
+ if errorcount == 0 or self.errorok:
1173
+ errorcount = error_count
1174
+ self.errorok = False
1175
+ errtoken = lookahead
1176
+ if errtoken.type == '$end':
1177
+ errtoken = None # End of file!
1178
+ if self.errorfunc:
1179
+ if errtoken and not hasattr(errtoken, 'lexer'):
1180
+ errtoken.lexer = lexer
1181
+ tok = call_errorfunc(self.errorfunc, errtoken, self)
1182
+ if self.errorok:
1183
+ # User must have done some kind of panic
1184
+ # mode recovery on their own. The
1185
+ # returned token is the next lookahead
1186
+ lookahead = tok
1187
+ errtoken = None
1188
+ continue
1189
+ else:
1190
+ if errtoken:
1191
+ if hasattr(errtoken, 'lineno'):
1192
+ lineno = lookahead.lineno
1193
+ else:
1194
+ lineno = 0
1195
+ if lineno:
1196
+ sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1197
+ else:
1198
+ sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1199
+ else:
1200
+ sys.stderr.write('yacc: Parse error in input. EOF\n')
1201
+ return
1202
+
1203
+ else:
1204
+ errorcount = error_count
1205
+
1206
+ # case 1: the statestack only has 1 entry on it. If we're in this state, the
1207
+ # entire parse has been rolled back and we're completely hosed. The token is
1208
+ # discarded and we just keep going.
1209
+
1210
+ if len(statestack) <= 1 and lookahead.type != '$end':
1211
+ lookahead = None
1212
+ errtoken = None
1213
+ state = 0
1214
+ # Nuke the pushback stack
1215
+ del lookaheadstack[:]
1216
+ continue
1217
+
1218
+ # case 2: the statestack has a couple of entries on it, but we're
1219
+ # at the end of the file. nuke the top entry and generate an error token
1220
+
1221
+ # Start nuking entries on the stack
1222
+ if lookahead.type == '$end':
1223
+ # Whoa. We're really hosed here. Bail out
1224
+ return
1225
+
1226
+ if lookahead.type != 'error':
1227
+ sym = symstack[-1]
1228
+ if sym.type == 'error':
1229
+ # Hmmm. Error is on top of stack, we'll just nuke input
1230
+ # symbol and continue
1231
+ lookahead = None
1232
+ continue
1233
+
1234
+ # Create the error symbol for the first time and make it the new lookahead symbol
1235
+ t = YaccSymbol()
1236
+ t.type = 'error'
1237
+
1238
+ if hasattr(lookahead, 'lineno'):
1239
+ t.lineno = t.endlineno = lookahead.lineno
1240
+ if hasattr(lookahead, 'lexpos'):
1241
+ t.lexpos = t.endlexpos = lookahead.lexpos
1242
+ t.value = lookahead
1243
+ lookaheadstack.append(lookahead)
1244
+ lookahead = t
1245
+ else:
1246
+ sym = symstack.pop()
1247
+ statestack.pop()
1248
+ state = statestack[-1]
1249
+
1250
+ continue
1251
+
1252
+ # Call an error function here
1253
+ raise RuntimeError('yacc: internal parser error!!!\n')
1254
+
1255
+ #--! parseopt-notrack-end
1256
+
1257
+ # -----------------------------------------------------------------------------
1258
+ # === Grammar Representation ===
1259
+ #
1260
+ # The following functions, classes, and variables are used to represent and
1261
+ # manipulate the rules that make up a grammar.
1262
+ # -----------------------------------------------------------------------------
1263
+
1264
+ # regex matching identifiers
1265
+ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1266
+
1267
+ # -----------------------------------------------------------------------------
1268
+ # class Production:
1269
+ #
1270
+ # This class stores the raw information about a single production or grammar rule.
1271
+ # A grammar rule refers to a specification such as this:
1272
+ #
1273
+ # expr : expr PLUS term
1274
+ #
1275
+ # Here are the basic attributes defined on all productions
1276
+ #
1277
+ # name - Name of the production. For example 'expr'
1278
+ # prod - A list of symbols on the right side ['expr','PLUS','term']
1279
+ # prec - Production precedence level
1280
+ # number - Production number.
1281
+ # func - Function that executes on reduce
1282
+ # file - File where production function is defined
1283
+ # lineno - Line number where production function is defined
1284
+ #
1285
+ # The following attributes are defined or optional.
1286
+ #
1287
+ # len - Length of the production (number of symbols on right hand side)
1288
+ # usyms - Set of unique symbols found in the production
1289
+ # -----------------------------------------------------------------------------
1290
+
1291
+ class Production(object):
1292
+ reduced = 0
1293
+ def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1294
+ self.name = name
1295
+ self.prod = tuple(prod)
1296
+ self.number = number
1297
+ self.func = func
1298
+ self.callable = None
1299
+ self.file = file
1300
+ self.line = line
1301
+ self.prec = precedence
1302
+
1303
+ # Internal settings used during table construction
1304
+
1305
+ self.len = len(self.prod) # Length of the production
1306
+
1307
+ # Create a list of unique production symbols used in the production
1308
+ self.usyms = []
1309
+ for s in self.prod:
1310
+ if s not in self.usyms:
1311
+ self.usyms.append(s)
1312
+
1313
+ # List of all LR items for the production
1314
+ self.lr_items = []
1315
+ self.lr_next = None
1316
+
1317
+ # Create a string representation
1318
+ if self.prod:
1319
+ self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1320
+ else:
1321
+ self.str = '%s -> <empty>' % self.name
1322
+
1323
+ def __str__(self):
1324
+ return self.str
1325
+
1326
+ def __repr__(self):
1327
+ return 'Production(' + str(self) + ')'
1328
+
1329
+ def __len__(self):
1330
+ return len(self.prod)
1331
+
1332
+ def __nonzero__(self):
1333
+ return 1
1334
+
1335
+ def __getitem__(self, index):
1336
+ return self.prod[index]
1337
+
1338
+ # Return the nth lr_item from the production (or None if at the end)
1339
+ def lr_item(self, n):
1340
+ if n > len(self.prod):
1341
+ return None
1342
+ p = LRItem(self, n)
1343
+ # Precompute the list of productions immediately following.
1344
+ try:
1345
+ p.lr_after = Prodnames[p.prod[n+1]]
1346
+ except (IndexError, KeyError):
1347
+ p.lr_after = []
1348
+ try:
1349
+ p.lr_before = p.prod[n-1]
1350
+ except IndexError:
1351
+ p.lr_before = None
1352
+ return p
1353
+
1354
+ # Bind the production function name to a callable
1355
+ def bind(self, pdict):
1356
+ if self.func:
1357
+ self.callable = pdict[self.func]
1358
+
1359
+ # This class serves as a minimal standin for Production objects when
1360
+ # reading table data from files. It only contains information
1361
+ # actually used by the LR parsing engine, plus some additional
1362
+ # debugging information.
1363
+ class MiniProduction(object):
1364
+ def __init__(self, str, name, len, func, file, line):
1365
+ self.name = name
1366
+ self.len = len
1367
+ self.func = func
1368
+ self.callable = None
1369
+ self.file = file
1370
+ self.line = line
1371
+ self.str = str
1372
+
1373
+ def __str__(self):
1374
+ return self.str
1375
+
1376
+ def __repr__(self):
1377
+ return 'MiniProduction(%s)' % self.str
1378
+
1379
+ # Bind the production function name to a callable
1380
+ def bind(self, pdict):
1381
+ if self.func:
1382
+ self.callable = pdict[self.func]
1383
+
1384
+
1385
+ # -----------------------------------------------------------------------------
1386
+ # class LRItem
1387
+ #
1388
+ # This class represents a specific stage of parsing a production rule. For
1389
+ # example:
1390
+ #
1391
+ # expr : expr . PLUS term
1392
+ #
1393
+ # In the above, the "." represents the current location of the parse. Here
1394
+ # basic attributes:
1395
+ #
1396
+ # name - Name of the production. For example 'expr'
1397
+ # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1398
+ # number - Production number.
1399
+ #
1400
+ # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1401
+ # then lr_next refers to 'expr -> expr PLUS . term'
1402
+ # lr_index - LR item index (location of the ".") in the prod list.
1403
+ # lookaheads - LALR lookahead symbols for this item
1404
+ # len - Length of the production (number of symbols on right hand side)
1405
+ # lr_after - List of all productions that immediately follow
1406
+ # lr_before - Grammar symbol immediately before
1407
+ # -----------------------------------------------------------------------------
1408
+
1409
+ class LRItem(object):
1410
+ def __init__(self, p, n):
1411
+ self.name = p.name
1412
+ self.prod = list(p.prod)
1413
+ self.number = p.number
1414
+ self.lr_index = n
1415
+ self.lookaheads = {}
1416
+ self.prod.insert(n, '.')
1417
+ self.prod = tuple(self.prod)
1418
+ self.len = len(self.prod)
1419
+ self.usyms = p.usyms
1420
+
1421
+ def __str__(self):
1422
+ if self.prod:
1423
+ s = '%s -> %s' % (self.name, ' '.join(self.prod))
1424
+ else:
1425
+ s = '%s -> <empty>' % self.name
1426
+ return s
1427
+
1428
+ def __repr__(self):
1429
+ return 'LRItem(' + str(self) + ')'
1430
+
1431
+ # -----------------------------------------------------------------------------
1432
+ # rightmost_terminal()
1433
+ #
1434
+ # Return the rightmost terminal from a list of symbols. Used in add_production()
1435
+ # -----------------------------------------------------------------------------
1436
+ def rightmost_terminal(symbols, terminals):
1437
+ i = len(symbols) - 1
1438
+ while i >= 0:
1439
+ if symbols[i] in terminals:
1440
+ return symbols[i]
1441
+ i -= 1
1442
+ return None
1443
+
1444
+ # -----------------------------------------------------------------------------
1445
+ # === GRAMMAR CLASS ===
1446
+ #
1447
+ # The following class represents the contents of the specified grammar along
1448
+ # with various computed properties such as first sets, follow sets, LR items, etc.
1449
+ # This data is used for critical parts of the table generation process later.
1450
+ # -----------------------------------------------------------------------------
1451
+
1452
+ class GrammarError(YaccError):
1453
+ pass
1454
+
1455
+ class Grammar(object):
1456
+ def __init__(self, terminals):
1457
+ self.Productions = [None] # A list of all of the productions. The first
1458
+ # entry is always reserved for the purpose of
1459
+ # building an augmented grammar
1460
+
1461
+ self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
1462
+ # productions of that nonterminal.
1463
+
1464
+ self.Prodmap = {} # A dictionary that is only used to detect duplicate
1465
+ # productions.
1466
+
1467
+ self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
1468
+ # list of the rules where they are used.
1469
+
1470
+ for term in terminals:
1471
+ self.Terminals[term] = []
1472
+
1473
+ self.Terminals['error'] = []
1474
+
1475
+ self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
1476
+ # of rule numbers where they are used.
1477
+
1478
+ self.First = {} # A dictionary of precomputed FIRST(x) symbols
1479
+
1480
+ self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
1481
+
1482
+ self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
1483
+ # form ('right',level) or ('nonassoc', level) or ('left',level)
1484
+
1485
+ self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1486
+ # This is only used to provide error checking and to generate
1487
+ # a warning about unused precedence rules.
1488
+
1489
+ self.Start = None # Starting symbol for the grammar
1490
+
1491
+
1492
+ def __len__(self):
1493
+ return len(self.Productions)
1494
+
1495
+ def __getitem__(self, index):
1496
+ return self.Productions[index]
1497
+
1498
+ # -----------------------------------------------------------------------------
1499
+ # set_precedence()
1500
+ #
1501
+ # Sets the precedence for a given terminal. assoc is the associativity such as
1502
+ # 'left','right', or 'nonassoc'. level is a numeric level.
1503
+ #
1504
+ # -----------------------------------------------------------------------------
1505
+
1506
+ def set_precedence(self, term, assoc, level):
1507
+ assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1508
+ if term in self.Precedence:
1509
+ raise GrammarError('Precedence already specified for terminal %r' % term)
1510
+ if assoc not in ['left', 'right', 'nonassoc']:
1511
+ raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1512
+ self.Precedence[term] = (assoc, level)
1513
+
1514
+ # -----------------------------------------------------------------------------
1515
+ # add_production()
1516
+ #
1517
+ # Given an action function, this function assembles a production rule and
1518
+ # computes its precedence level.
1519
+ #
1520
+ # The production rule is supplied as a list of symbols. For example,
1521
+ # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1522
+ # symbols ['expr','PLUS','term'].
1523
+ #
1524
+ # Precedence is determined by the precedence of the right-most non-terminal
1525
+ # or the precedence of a terminal specified by %prec.
1526
+ #
1527
+ # A variety of error checks are performed to make sure production symbols
1528
+ # are valid and that %prec is used correctly.
1529
+ # -----------------------------------------------------------------------------
1530
+
1531
+ def add_production(self, prodname, syms, func=None, file='', line=0):
1532
+
1533
+ if prodname in self.Terminals:
1534
+ raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1535
+ if prodname == 'error':
1536
+ raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1537
+ if not _is_identifier.match(prodname):
1538
+ raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1539
+
1540
+ # Look for literal tokens
1541
+ for n, s in enumerate(syms):
1542
+ if s[0] in "'\"":
1543
+ try:
1544
+ c = eval(s)
1545
+ if (len(c) > 1):
1546
+ raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1547
+ (file, line, s, prodname))
1548
+ if c not in self.Terminals:
1549
+ self.Terminals[c] = []
1550
+ syms[n] = c
1551
+ continue
1552
+ except SyntaxError:
1553
+ pass
1554
+ if not _is_identifier.match(s) and s != '%prec':
1555
+ raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1556
+
1557
+ # Determine the precedence level
1558
+ if '%prec' in syms:
1559
+ if syms[-1] == '%prec':
1560
+ raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1561
+ if syms[-2] != '%prec':
1562
+ raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1563
+ (file, line))
1564
+ precname = syms[-1]
1565
+ prodprec = self.Precedence.get(precname)
1566
+ if not prodprec:
1567
+ raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1568
+ else:
1569
+ self.UsedPrecedence.add(precname)
1570
+ del syms[-2:] # Drop %prec from the rule
1571
+ else:
1572
+ # If no %prec, precedence is determined by the rightmost terminal symbol
1573
+ precname = rightmost_terminal(syms, self.Terminals)
1574
+ prodprec = self.Precedence.get(precname, ('right', 0))
1575
+
1576
+ # See if the rule is already in the rulemap
1577
+ map = '%s -> %s' % (prodname, syms)
1578
+ if map in self.Prodmap:
1579
+ m = self.Prodmap[map]
1580
+ raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1581
+ 'Previous definition at %s:%d' % (m.file, m.line))
1582
+
1583
+ # From this point on, everything is valid. Create a new Production instance
1584
+ pnumber = len(self.Productions)
1585
+ if prodname not in self.Nonterminals:
1586
+ self.Nonterminals[prodname] = []
1587
+
1588
+ # Add the production number to Terminals and Nonterminals
1589
+ for t in syms:
1590
+ if t in self.Terminals:
1591
+ self.Terminals[t].append(pnumber)
1592
+ else:
1593
+ if t not in self.Nonterminals:
1594
+ self.Nonterminals[t] = []
1595
+ self.Nonterminals[t].append(pnumber)
1596
+
1597
+ # Create a production and add it to the list of productions
1598
+ p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1599
+ self.Productions.append(p)
1600
+ self.Prodmap[map] = p
1601
+
1602
+ # Add to the global productions list
1603
+ try:
1604
+ self.Prodnames[prodname].append(p)
1605
+ except KeyError:
1606
+ self.Prodnames[prodname] = [p]
1607
+
1608
+ # -----------------------------------------------------------------------------
1609
+ # set_start()
1610
+ #
1611
+ # Sets the starting symbol and creates the augmented grammar. Production
1612
+ # rule 0 is S' -> start where start is the start symbol.
1613
+ # -----------------------------------------------------------------------------
1614
+
1615
+ def set_start(self, start=None):
1616
+ if not start:
1617
+ start = self.Productions[1].name
1618
+ if start not in self.Nonterminals:
1619
+ raise GrammarError('start symbol %s undefined' % start)
1620
+ self.Productions[0] = Production(0, "S'", [start])
1621
+ self.Nonterminals[start].append(0)
1622
+ self.Start = start
1623
+
1624
+ # -----------------------------------------------------------------------------
1625
+ # find_unreachable()
1626
+ #
1627
+ # Find all of the nonterminal symbols that can't be reached from the starting
1628
+ # symbol. Returns a list of nonterminals that can't be reached.
1629
+ # -----------------------------------------------------------------------------
1630
+
1631
+ def find_unreachable(self):
1632
+
1633
+ # Mark all symbols that are reachable from a symbol s
1634
+ def mark_reachable_from(s):
1635
+ if s in reachable:
1636
+ return
1637
+ reachable.add(s)
1638
+ for p in self.Prodnames.get(s, []):
1639
+ for r in p.prod:
1640
+ mark_reachable_from(r)
1641
+
1642
+ reachable = set()
1643
+ mark_reachable_from(self.Productions[0].prod[0])
1644
+ return [s for s in self.Nonterminals if s not in reachable]
1645
+
1646
+ # -----------------------------------------------------------------------------
1647
+ # infinite_cycles()
1648
+ #
1649
+ # This function looks at the various parsing rules and tries to detect
1650
+ # infinite recursion cycles (grammar rules where there is no possible way
1651
+ # to derive a string of only terminals).
1652
+ # -----------------------------------------------------------------------------
1653
+
1654
+ def infinite_cycles(self):
1655
+ terminates = {}
1656
+
1657
+ # Terminals:
1658
+ for t in self.Terminals:
1659
+ terminates[t] = True
1660
+
1661
+ terminates['$end'] = True
1662
+
1663
+ # Nonterminals:
1664
+
1665
+ # Initialize to false:
1666
+ for n in self.Nonterminals:
1667
+ terminates[n] = False
1668
+
1669
+ # Then propagate termination until no change:
1670
+ while True:
1671
+ some_change = False
1672
+ for (n, pl) in self.Prodnames.items():
1673
+ # Nonterminal n terminates iff any of its productions terminates.
1674
+ for p in pl:
1675
+ # Production p terminates iff all of its rhs symbols terminate.
1676
+ for s in p.prod:
1677
+ if not terminates[s]:
1678
+ # The symbol s does not terminate,
1679
+ # so production p does not terminate.
1680
+ p_terminates = False
1681
+ break
1682
+ else:
1683
+ # didn't break from the loop,
1684
+ # so every symbol s terminates
1685
+ # so production p terminates.
1686
+ p_terminates = True
1687
+
1688
+ if p_terminates:
1689
+ # symbol n terminates!
1690
+ if not terminates[n]:
1691
+ terminates[n] = True
1692
+ some_change = True
1693
+ # Don't need to consider any more productions for this n.
1694
+ break
1695
+
1696
+ if not some_change:
1697
+ break
1698
+
1699
+ infinite = []
1700
+ for (s, term) in terminates.items():
1701
+ if not term:
1702
+ if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703
+ # s is used-but-not-defined, and we've already warned of that,
1704
+ # so it would be overkill to say that it's also non-terminating.
1705
+ pass
1706
+ else:
1707
+ infinite.append(s)
1708
+
1709
+ return infinite
1710
+
1711
+ # -----------------------------------------------------------------------------
1712
+ # undefined_symbols()
1713
+ #
1714
+ # Find all symbols that were used the grammar, but not defined as tokens or
1715
+ # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1716
+ # and prod is the production where the symbol was used.
1717
+ # -----------------------------------------------------------------------------
1718
+ def undefined_symbols(self):
1719
+ result = []
1720
+ for p in self.Productions:
1721
+ if not p:
1722
+ continue
1723
+
1724
+ for s in p.prod:
1725
+ if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1726
+ result.append((s, p))
1727
+ return result
1728
+
1729
+ # -----------------------------------------------------------------------------
1730
+ # unused_terminals()
1731
+ #
1732
+ # Find all terminals that were defined, but not used by the grammar. Returns
1733
+ # a list of all symbols.
1734
+ # -----------------------------------------------------------------------------
1735
+ def unused_terminals(self):
1736
+ unused_tok = []
1737
+ for s, v in self.Terminals.items():
1738
+ if s != 'error' and not v:
1739
+ unused_tok.append(s)
1740
+
1741
+ return unused_tok
1742
+
1743
+ # ------------------------------------------------------------------------------
1744
+ # unused_rules()
1745
+ #
1746
+ # Find all grammar rules that were defined, but not used (maybe not reachable)
1747
+ # Returns a list of productions.
1748
+ # ------------------------------------------------------------------------------
1749
+
1750
+ def unused_rules(self):
1751
+ unused_prod = []
1752
+ for s, v in self.Nonterminals.items():
1753
+ if not v:
1754
+ p = self.Prodnames[s][0]
1755
+ unused_prod.append(p)
1756
+ return unused_prod
1757
+
1758
+ # -----------------------------------------------------------------------------
1759
+ # unused_precedence()
1760
+ #
1761
+ # Returns a list of tuples (term,precedence) corresponding to precedence
1762
+ # rules that were never used by the grammar. term is the name of the terminal
1763
+ # on which precedence was applied and precedence is a string such as 'left' or
1764
+ # 'right' corresponding to the type of precedence.
1765
+ # -----------------------------------------------------------------------------
1766
+
1767
+ def unused_precedence(self):
1768
+ unused = []
1769
+ for termname in self.Precedence:
1770
+ if not (termname in self.Terminals or termname in self.UsedPrecedence):
1771
+ unused.append((termname, self.Precedence[termname][0]))
1772
+
1773
+ return unused
1774
+
1775
+ # -------------------------------------------------------------------------
1776
+ # _first()
1777
+ #
1778
+ # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1779
+ #
1780
+ # During execution of compute_first1, the result may be incomplete.
1781
+ # Afterward (e.g., when called from compute_follow()), it will be complete.
1782
+ # -------------------------------------------------------------------------
1783
+ def _first(self, beta):
1784
+
1785
+ # We are computing First(x1,x2,x3,...,xn)
1786
+ result = []
1787
+ for x in beta:
1788
+ x_produces_empty = False
1789
+
1790
+ # Add all the non-<empty> symbols of First[x] to the result.
1791
+ for f in self.First[x]:
1792
+ if f == '<empty>':
1793
+ x_produces_empty = True
1794
+ else:
1795
+ if f not in result:
1796
+ result.append(f)
1797
+
1798
+ if x_produces_empty:
1799
+ # We have to consider the next x in beta,
1800
+ # i.e. stay in the loop.
1801
+ pass
1802
+ else:
1803
+ # We don't have to consider any further symbols in beta.
1804
+ break
1805
+ else:
1806
+ # There was no 'break' from the loop,
1807
+ # so x_produces_empty was true for all x in beta,
1808
+ # so beta produces empty as well.
1809
+ result.append('<empty>')
1810
+
1811
+ return result
1812
+
1813
+ # -------------------------------------------------------------------------
1814
+ # compute_first()
1815
+ #
1816
+ # Compute the value of FIRST1(X) for all symbols
1817
+ # -------------------------------------------------------------------------
1818
+ def compute_first(self):
1819
+ if self.First:
1820
+ return self.First
1821
+
1822
+ # Terminals:
1823
+ for t in self.Terminals:
1824
+ self.First[t] = [t]
1825
+
1826
+ self.First['$end'] = ['$end']
1827
+
1828
+ # Nonterminals:
1829
+
1830
+ # Initialize to the empty set:
1831
+ for n in self.Nonterminals:
1832
+ self.First[n] = []
1833
+
1834
+ # Then propagate symbols until no change:
1835
+ while True:
1836
+ some_change = False
1837
+ for n in self.Nonterminals:
1838
+ for p in self.Prodnames[n]:
1839
+ for f in self._first(p.prod):
1840
+ if f not in self.First[n]:
1841
+ self.First[n].append(f)
1842
+ some_change = True
1843
+ if not some_change:
1844
+ break
1845
+
1846
+ return self.First
1847
+
1848
+ # ---------------------------------------------------------------------
1849
+ # compute_follow()
1850
+ #
1851
+ # Computes all of the follow sets for every non-terminal symbol. The
1852
+ # follow set is the set of all symbols that might follow a given
1853
+ # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1854
+ # ---------------------------------------------------------------------
1855
+ def compute_follow(self, start=None):
1856
+ # If already computed, return the result
1857
+ if self.Follow:
1858
+ return self.Follow
1859
+
1860
+ # If first sets not computed yet, do that first.
1861
+ if not self.First:
1862
+ self.compute_first()
1863
+
1864
+ # Add '$end' to the follow list of the start symbol
1865
+ for k in self.Nonterminals:
1866
+ self.Follow[k] = []
1867
+
1868
+ if not start:
1869
+ start = self.Productions[1].name
1870
+
1871
+ self.Follow[start] = ['$end']
1872
+
1873
+ while True:
1874
+ didadd = False
1875
+ for p in self.Productions[1:]:
1876
+ # Here is the production set
1877
+ for i, B in enumerate(p.prod):
1878
+ if B in self.Nonterminals:
1879
+ # Okay. We got a non-terminal in a production
1880
+ fst = self._first(p.prod[i+1:])
1881
+ hasempty = False
1882
+ for f in fst:
1883
+ if f != '<empty>' and f not in self.Follow[B]:
1884
+ self.Follow[B].append(f)
1885
+ didadd = True
1886
+ if f == '<empty>':
1887
+ hasempty = True
1888
+ if hasempty or i == (len(p.prod)-1):
1889
+ # Add elements of follow(a) to follow(b)
1890
+ for f in self.Follow[p.name]:
1891
+ if f not in self.Follow[B]:
1892
+ self.Follow[B].append(f)
1893
+ didadd = True
1894
+ if not didadd:
1895
+ break
1896
+ return self.Follow
1897
+
1898
+
1899
+ # -----------------------------------------------------------------------------
1900
+ # build_lritems()
1901
+ #
1902
+ # This function walks the list of productions and builds a complete set of the
1903
+ # LR items. The LR items are stored in two ways: First, they are uniquely
1904
+ # numbered and placed in the list _lritems. Second, a linked list of LR items
1905
+ # is built for each production. For example:
1906
+ #
1907
+ # E -> E PLUS E
1908
+ #
1909
+ # Creates the list
1910
+ #
1911
+ # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1912
+ # -----------------------------------------------------------------------------
1913
+
1914
+ def build_lritems(self):
1915
+ for p in self.Productions:
1916
+ lastlri = p
1917
+ i = 0
1918
+ lr_items = []
1919
+ while True:
1920
+ if i > len(p):
1921
+ lri = None
1922
+ else:
1923
+ lri = LRItem(p, i)
1924
+ # Precompute the list of productions immediately following
1925
+ try:
1926
+ lri.lr_after = self.Prodnames[lri.prod[i+1]]
1927
+ except (IndexError, KeyError):
1928
+ lri.lr_after = []
1929
+ try:
1930
+ lri.lr_before = lri.prod[i-1]
1931
+ except IndexError:
1932
+ lri.lr_before = None
1933
+
1934
+ lastlri.lr_next = lri
1935
+ if not lri:
1936
+ break
1937
+ lr_items.append(lri)
1938
+ lastlri = lri
1939
+ i += 1
1940
+ p.lr_items = lr_items
1941
+
1942
+ # -----------------------------------------------------------------------------
1943
+ # == Class LRTable ==
1944
+ #
1945
+ # This basic class represents a basic table of LR parsing information.
1946
+ # Methods for generating the tables are not defined here. They are defined
1947
+ # in the derived class LRGeneratedTable.
1948
+ # -----------------------------------------------------------------------------
1949
+
1950
+ class VersionError(YaccError):
1951
+ pass
1952
+
1953
+ class LRTable(object):
1954
+ def __init__(self):
1955
+ self.lr_action = None
1956
+ self.lr_goto = None
1957
+ self.lr_productions = None
1958
+ self.lr_method = None
1959
+
1960
+ def read_table(self, module):
1961
+ if isinstance(module, types.ModuleType):
1962
+ parsetab = module
1963
+ else:
1964
+ exec('import %s' % module)
1965
+ parsetab = sys.modules[module]
1966
+
1967
+ if parsetab._tabversion != __tabversion__:
1968
+ raise VersionError('yacc table file version is out of date')
1969
+
1970
+ self.lr_action = parsetab._lr_action
1971
+ self.lr_goto = parsetab._lr_goto
1972
+
1973
+ self.lr_productions = []
1974
+ for p in parsetab._lr_productions:
1975
+ self.lr_productions.append(MiniProduction(*p))
1976
+
1977
+ self.lr_method = parsetab._lr_method
1978
+ return parsetab._lr_signature
1979
+
1980
+ def read_pickle(self, filename):
1981
+ try:
1982
+ import cPickle as pickle
1983
+ except ImportError:
1984
+ import pickle
1985
+
1986
+ if not os.path.exists(filename):
1987
+ raise ImportError
1988
+
1989
+ in_f = open(filename, 'rb')
1990
+
1991
+ tabversion = pickle.load(in_f)
1992
+ if tabversion != __tabversion__:
1993
+ raise VersionError('yacc table file version is out of date')
1994
+ self.lr_method = pickle.load(in_f)
1995
+ signature = pickle.load(in_f)
1996
+ self.lr_action = pickle.load(in_f)
1997
+ self.lr_goto = pickle.load(in_f)
1998
+ productions = pickle.load(in_f)
1999
+
2000
+ self.lr_productions = []
2001
+ for p in productions:
2002
+ self.lr_productions.append(MiniProduction(*p))
2003
+
2004
+ in_f.close()
2005
+ return signature
2006
+
2007
+ # Bind all production function names to callable objects in pdict
2008
+ def bind_callables(self, pdict):
2009
+ for p in self.lr_productions:
2010
+ p.bind(pdict)
2011
+
2012
+
2013
+ # -----------------------------------------------------------------------------
2014
+ # === LR Generator ===
2015
+ #
2016
+ # The following classes and functions are used to generate LR parsing tables on
2017
+ # a grammar.
2018
+ # -----------------------------------------------------------------------------
2019
+
2020
+ # -----------------------------------------------------------------------------
2021
+ # digraph()
2022
+ # traverse()
2023
+ #
2024
+ # The following two functions are used to compute set valued functions
2025
+ # of the form:
2026
+ #
2027
+ # F(x) = F'(x) U U{F(y) | x R y}
2028
+ #
2029
+ # This is used to compute the values of Read() sets as well as FOLLOW sets
2030
+ # in LALR(1) generation.
2031
+ #
2032
+ # Inputs: X - An input set
2033
+ # R - A relation
2034
+ # FP - Set-valued function
2035
+ # ------------------------------------------------------------------------------
2036
+
2037
+ def digraph(X, R, FP):
2038
+ N = {}
2039
+ for x in X:
2040
+ N[x] = 0
2041
+ stack = []
2042
+ F = {}
2043
+ for x in X:
2044
+ if N[x] == 0:
2045
+ traverse(x, N, stack, F, X, R, FP)
2046
+ return F
2047
+
2048
+ def traverse(x, N, stack, F, X, R, FP):
2049
+ stack.append(x)
2050
+ d = len(stack)
2051
+ N[x] = d
2052
+ F[x] = FP(x) # F(X) <- F'(x)
2053
+
2054
+ rel = R(x) # Get y's related to x
2055
+ for y in rel:
2056
+ if N[y] == 0:
2057
+ traverse(y, N, stack, F, X, R, FP)
2058
+ N[x] = min(N[x], N[y])
2059
+ for a in F.get(y, []):
2060
+ if a not in F[x]:
2061
+ F[x].append(a)
2062
+ if N[x] == d:
2063
+ N[stack[-1]] = MAXINT
2064
+ F[stack[-1]] = F[x]
2065
+ element = stack.pop()
2066
+ while element != x:
2067
+ N[stack[-1]] = MAXINT
2068
+ F[stack[-1]] = F[x]
2069
+ element = stack.pop()
2070
+
2071
+ class LALRError(YaccError):
2072
+ pass
2073
+
2074
+ # -----------------------------------------------------------------------------
2075
+ # == LRGeneratedTable ==
2076
+ #
2077
+ # This class implements the LR table generation algorithm. There are no
2078
+ # public methods except for write()
2079
+ # -----------------------------------------------------------------------------
2080
+
2081
+ class LRGeneratedTable(LRTable):
2082
+ def __init__(self, grammar, method='LALR', log=None):
2083
+ if method not in ['SLR', 'LALR']:
2084
+ raise LALRError('Unsupported method %s' % method)
2085
+
2086
+ self.grammar = grammar
2087
+ self.lr_method = method
2088
+
2089
+ # Set up the logger
2090
+ if not log:
2091
+ log = NullLogger()
2092
+ self.log = log
2093
+
2094
+ # Internal attributes
2095
+ self.lr_action = {} # Action table
2096
+ self.lr_goto = {} # Goto table
2097
+ self.lr_productions = grammar.Productions # Copy of grammar Production array
2098
+ self.lr_goto_cache = {} # Cache of computed gotos
2099
+ self.lr0_cidhash = {} # Cache of closures
2100
+
2101
+ self._add_count = 0 # Internal counter used to detect cycles
2102
+
2103
+ # Diagonistic information filled in by the table generator
2104
+ self.sr_conflict = 0
2105
+ self.rr_conflict = 0
2106
+ self.conflicts = [] # List of conflicts
2107
+
2108
+ self.sr_conflicts = []
2109
+ self.rr_conflicts = []
2110
+
2111
+ # Build the tables
2112
+ self.grammar.build_lritems()
2113
+ self.grammar.compute_first()
2114
+ self.grammar.compute_follow()
2115
+ self.lr_parse_table()
2116
+
2117
+ # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2118
+
2119
+ def lr0_closure(self, I):
2120
+ self._add_count += 1
2121
+
2122
+ # Add everything in I to J
2123
+ J = I[:]
2124
+ didadd = True
2125
+ while didadd:
2126
+ didadd = False
2127
+ for j in J:
2128
+ for x in j.lr_after:
2129
+ if getattr(x, 'lr0_added', 0) == self._add_count:
2130
+ continue
2131
+ # Add B --> .G to J
2132
+ J.append(x.lr_next)
2133
+ x.lr0_added = self._add_count
2134
+ didadd = True
2135
+
2136
+ return J
2137
+
2138
+ # Compute the LR(0) goto function goto(I,X) where I is a set
2139
+ # of LR(0) items and X is a grammar symbol. This function is written
2140
+ # in a way that guarantees uniqueness of the generated goto sets
2141
+ # (i.e. the same goto set will never be returned as two different Python
2142
+ # objects). With uniqueness, we can later do fast set comparisons using
2143
+ # id(obj) instead of element-wise comparison.
2144
+
2145
+ def lr0_goto(self, I, x):
2146
+ # First we look for a previously cached entry
2147
+ g = self.lr_goto_cache.get((id(I), x))
2148
+ if g:
2149
+ return g
2150
+
2151
+ # Now we generate the goto set in a way that guarantees uniqueness
2152
+ # of the result
2153
+
2154
+ s = self.lr_goto_cache.get(x)
2155
+ if not s:
2156
+ s = {}
2157
+ self.lr_goto_cache[x] = s
2158
+
2159
+ gs = []
2160
+ for p in I:
2161
+ n = p.lr_next
2162
+ if n and n.lr_before == x:
2163
+ s1 = s.get(id(n))
2164
+ if not s1:
2165
+ s1 = {}
2166
+ s[id(n)] = s1
2167
+ gs.append(n)
2168
+ s = s1
2169
+ g = s.get('$end')
2170
+ if not g:
2171
+ if gs:
2172
+ g = self.lr0_closure(gs)
2173
+ s['$end'] = g
2174
+ else:
2175
+ s['$end'] = gs
2176
+ self.lr_goto_cache[(id(I), x)] = g
2177
+ return g
2178
+
2179
+ # Compute the LR(0) sets of item function
2180
+ def lr0_items(self):
2181
+ C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2182
+ i = 0
2183
+ for I in C:
2184
+ self.lr0_cidhash[id(I)] = i
2185
+ i += 1
2186
+
2187
+ # Loop over the items in C and each grammar symbols
2188
+ i = 0
2189
+ while i < len(C):
2190
+ I = C[i]
2191
+ i += 1
2192
+
2193
+ # Collect all of the symbols that could possibly be in the goto(I,X) sets
2194
+ asyms = {}
2195
+ for ii in I:
2196
+ for s in ii.usyms:
2197
+ asyms[s] = None
2198
+
2199
+ for x in asyms:
2200
+ g = self.lr0_goto(I, x)
2201
+ if not g or id(g) in self.lr0_cidhash:
2202
+ continue
2203
+ self.lr0_cidhash[id(g)] = len(C)
2204
+ C.append(g)
2205
+
2206
+ return C
2207
+
2208
+ # -----------------------------------------------------------------------------
2209
+ # ==== LALR(1) Parsing ====
2210
+ #
2211
+ # LALR(1) parsing is almost exactly the same as SLR except that instead of
2212
+ # relying upon Follow() sets when performing reductions, a more selective
2213
+ # lookahead set that incorporates the state of the LR(0) machine is utilized.
2214
+ # Thus, we mainly just have to focus on calculating the lookahead sets.
2215
+ #
2216
+ # The method used here is due to DeRemer and Pennelo (1982).
2217
+ #
2218
+ # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2219
+ # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2220
+ # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2221
+ #
2222
+ # Further details can also be found in:
2223
+ #
2224
+ # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2225
+ # McGraw-Hill Book Company, (1985).
2226
+ #
2227
+ # -----------------------------------------------------------------------------
2228
+
2229
+ # -----------------------------------------------------------------------------
2230
+ # compute_nullable_nonterminals()
2231
+ #
2232
+ # Creates a dictionary containing all of the non-terminals that might produce
2233
+ # an empty production.
2234
+ # -----------------------------------------------------------------------------
2235
+
2236
+ def compute_nullable_nonterminals(self):
2237
+ nullable = set()
2238
+ num_nullable = 0
2239
+ while True:
2240
+ for p in self.grammar.Productions[1:]:
2241
+ if p.len == 0:
2242
+ nullable.add(p.name)
2243
+ continue
2244
+ for t in p.prod:
2245
+ if t not in nullable:
2246
+ break
2247
+ else:
2248
+ nullable.add(p.name)
2249
+ if len(nullable) == num_nullable:
2250
+ break
2251
+ num_nullable = len(nullable)
2252
+ return nullable
2253
+
2254
+ # -----------------------------------------------------------------------------
2255
+ # find_nonterminal_trans(C)
2256
+ #
2257
+ # Given a set of LR(0) items, this functions finds all of the non-terminal
2258
+ # transitions. These are transitions in which a dot appears immediately before
2259
+ # a non-terminal. Returns a list of tuples of the form (state,N) where state
2260
+ # is the state number and N is the nonterminal symbol.
2261
+ #
2262
+ # The input C is the set of LR(0) items.
2263
+ # -----------------------------------------------------------------------------
2264
+
2265
+ def find_nonterminal_transitions(self, C):
2266
+ trans = []
2267
+ for stateno, state in enumerate(C):
2268
+ for p in state:
2269
+ if p.lr_index < p.len - 1:
2270
+ t = (stateno, p.prod[p.lr_index+1])
2271
+ if t[1] in self.grammar.Nonterminals:
2272
+ if t not in trans:
2273
+ trans.append(t)
2274
+ return trans
2275
+
2276
+ # -----------------------------------------------------------------------------
2277
+ # dr_relation()
2278
+ #
2279
+ # Computes the DR(p,A) relationships for non-terminal transitions. The input
2280
+ # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2281
+ #
2282
+ # Returns a list of terminals.
2283
+ # -----------------------------------------------------------------------------
2284
+
2285
+ def dr_relation(self, C, trans, nullable):
2286
+ dr_set = {}
2287
+ state, N = trans
2288
+ terms = []
2289
+
2290
+ g = self.lr0_goto(C[state], N)
2291
+ for p in g:
2292
+ if p.lr_index < p.len - 1:
2293
+ a = p.prod[p.lr_index+1]
2294
+ if a in self.grammar.Terminals:
2295
+ if a not in terms:
2296
+ terms.append(a)
2297
+
2298
+ # This extra bit is to handle the start state
2299
+ if state == 0 and N == self.grammar.Productions[0].prod[0]:
2300
+ terms.append('$end')
2301
+
2302
+ return terms
2303
+
2304
+ # -----------------------------------------------------------------------------
2305
+ # reads_relation()
2306
+ #
2307
+ # Computes the READS() relation (p,A) READS (t,C).
2308
+ # -----------------------------------------------------------------------------
2309
+
2310
+ def reads_relation(self, C, trans, empty):
2311
+ # Look for empty transitions
2312
+ rel = []
2313
+ state, N = trans
2314
+
2315
+ g = self.lr0_goto(C[state], N)
2316
+ j = self.lr0_cidhash.get(id(g), -1)
2317
+ for p in g:
2318
+ if p.lr_index < p.len - 1:
2319
+ a = p.prod[p.lr_index + 1]
2320
+ if a in empty:
2321
+ rel.append((j, a))
2322
+
2323
+ return rel
2324
+
2325
+ # -----------------------------------------------------------------------------
2326
+ # compute_lookback_includes()
2327
+ #
2328
+ # Determines the lookback and includes relations
2329
+ #
2330
+ # LOOKBACK:
2331
+ #
2332
+ # This relation is determined by running the LR(0) state machine forward.
2333
+ # For example, starting with a production "N : . A B C", we run it forward
2334
+ # to obtain "N : A B C ." We then build a relationship between this final
2335
+ # state and the starting state. These relationships are stored in a dictionary
2336
+ # lookdict.
2337
+ #
2338
+ # INCLUDES:
2339
+ #
2340
+ # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2341
+ #
2342
+ # This relation is used to determine non-terminal transitions that occur
2343
+ # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2344
+ # if the following holds:
2345
+ #
2346
+ # B -> LAT, where T -> epsilon and p' -L-> p
2347
+ #
2348
+ # L is essentially a prefix (which may be empty), T is a suffix that must be
2349
+ # able to derive an empty string. State p' must lead to state p with the string L.
2350
+ #
2351
+ # -----------------------------------------------------------------------------
2352
+
2353
+ def compute_lookback_includes(self, C, trans, nullable):
2354
+ lookdict = {} # Dictionary of lookback relations
2355
+ includedict = {} # Dictionary of include relations
2356
+
2357
+ # Make a dictionary of non-terminal transitions
2358
+ dtrans = {}
2359
+ for t in trans:
2360
+ dtrans[t] = 1
2361
+
2362
+ # Loop over all transitions and compute lookbacks and includes
2363
+ for state, N in trans:
2364
+ lookb = []
2365
+ includes = []
2366
+ for p in C[state]:
2367
+ if p.name != N:
2368
+ continue
2369
+
2370
+ # Okay, we have a name match. We now follow the production all the way
2371
+ # through the state machine until we get the . on the right hand side
2372
+
2373
+ lr_index = p.lr_index
2374
+ j = state
2375
+ while lr_index < p.len - 1:
2376
+ lr_index = lr_index + 1
2377
+ t = p.prod[lr_index]
2378
+
2379
+ # Check to see if this symbol and state are a non-terminal transition
2380
+ if (j, t) in dtrans:
2381
+ # Yes. Okay, there is some chance that this is an includes relation
2382
+ # the only way to know for certain is whether the rest of the
2383
+ # production derives empty
2384
+
2385
+ li = lr_index + 1
2386
+ while li < p.len:
2387
+ if p.prod[li] in self.grammar.Terminals:
2388
+ break # No forget it
2389
+ if p.prod[li] not in nullable:
2390
+ break
2391
+ li = li + 1
2392
+ else:
2393
+ # Appears to be a relation between (j,t) and (state,N)
2394
+ includes.append((j, t))
2395
+
2396
+ g = self.lr0_goto(C[j], t) # Go to next set
2397
+ j = self.lr0_cidhash.get(id(g), -1) # Go to next state
2398
+
2399
+ # When we get here, j is the final state, now we have to locate the production
2400
+ for r in C[j]:
2401
+ if r.name != p.name:
2402
+ continue
2403
+ if r.len != p.len:
2404
+ continue
2405
+ i = 0
2406
+ # This look is comparing a production ". A B C" with "A B C ."
2407
+ while i < r.lr_index:
2408
+ if r.prod[i] != p.prod[i+1]:
2409
+ break
2410
+ i = i + 1
2411
+ else:
2412
+ lookb.append((j, r))
2413
+ for i in includes:
2414
+ if i not in includedict:
2415
+ includedict[i] = []
2416
+ includedict[i].append((state, N))
2417
+ lookdict[(state, N)] = lookb
2418
+
2419
+ return lookdict, includedict
2420
+
2421
+ # -----------------------------------------------------------------------------
2422
+ # compute_read_sets()
2423
+ #
2424
+ # Given a set of LR(0) items, this function computes the read sets.
2425
+ #
2426
+ # Inputs: C = Set of LR(0) items
2427
+ # ntrans = Set of nonterminal transitions
2428
+ # nullable = Set of empty transitions
2429
+ #
2430
+ # Returns a set containing the read sets
2431
+ # -----------------------------------------------------------------------------
2432
+
2433
+ def compute_read_sets(self, C, ntrans, nullable):
2434
+ FP = lambda x: self.dr_relation(C, x, nullable)
2435
+ R = lambda x: self.reads_relation(C, x, nullable)
2436
+ F = digraph(ntrans, R, FP)
2437
+ return F
2438
+
2439
+ # -----------------------------------------------------------------------------
2440
+ # compute_follow_sets()
2441
+ #
2442
+ # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2443
+ # and an include set, this function computes the follow sets
2444
+ #
2445
+ # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2446
+ #
2447
+ # Inputs:
2448
+ # ntrans = Set of nonterminal transitions
2449
+ # readsets = Readset (previously computed)
2450
+ # inclsets = Include sets (previously computed)
2451
+ #
2452
+ # Returns a set containing the follow sets
2453
+ # -----------------------------------------------------------------------------
2454
+
2455
+ def compute_follow_sets(self, ntrans, readsets, inclsets):
2456
+ FP = lambda x: readsets[x]
2457
+ R = lambda x: inclsets.get(x, [])
2458
+ F = digraph(ntrans, R, FP)
2459
+ return F
2460
+
2461
+ # -----------------------------------------------------------------------------
2462
+ # add_lookaheads()
2463
+ #
2464
+ # Attaches the lookahead symbols to grammar rules.
2465
+ #
2466
+ # Inputs: lookbacks - Set of lookback relations
2467
+ # followset - Computed follow set
2468
+ #
2469
+ # This function directly attaches the lookaheads to productions contained
2470
+ # in the lookbacks set
2471
+ # -----------------------------------------------------------------------------
2472
+
2473
+ def add_lookaheads(self, lookbacks, followset):
2474
+ for trans, lb in lookbacks.items():
2475
+ # Loop over productions in lookback
2476
+ for state, p in lb:
2477
+ if state not in p.lookaheads:
2478
+ p.lookaheads[state] = []
2479
+ f = followset.get(trans, [])
2480
+ for a in f:
2481
+ if a not in p.lookaheads[state]:
2482
+ p.lookaheads[state].append(a)
2483
+
2484
+ # -----------------------------------------------------------------------------
2485
+ # add_lalr_lookaheads()
2486
+ #
2487
+ # This function does all of the work of adding lookahead information for use
2488
+ # with LALR parsing
2489
+ # -----------------------------------------------------------------------------
2490
+
2491
+ def add_lalr_lookaheads(self, C):
2492
+ # Determine all of the nullable nonterminals
2493
+ nullable = self.compute_nullable_nonterminals()
2494
+
2495
+ # Find all non-terminal transitions
2496
+ trans = self.find_nonterminal_transitions(C)
2497
+
2498
+ # Compute read sets
2499
+ readsets = self.compute_read_sets(C, trans, nullable)
2500
+
2501
+ # Compute lookback/includes relations
2502
+ lookd, included = self.compute_lookback_includes(C, trans, nullable)
2503
+
2504
+ # Compute LALR FOLLOW sets
2505
+ followsets = self.compute_follow_sets(trans, readsets, included)
2506
+
2507
+ # Add all of the lookaheads
2508
+ self.add_lookaheads(lookd, followsets)
2509
+
2510
+ # -----------------------------------------------------------------------------
2511
+ # lr_parse_table()
2512
+ #
2513
+ # This function constructs the parse tables for SLR or LALR
2514
+ # -----------------------------------------------------------------------------
2515
+ def lr_parse_table(self):
2516
+ Productions = self.grammar.Productions
2517
+ Precedence = self.grammar.Precedence
2518
+ goto = self.lr_goto # Goto array
2519
+ action = self.lr_action # Action array
2520
+ log = self.log # Logger for output
2521
+
2522
+ actionp = {} # Action production array (temporary)
2523
+
2524
+ log.info('Parsing method: %s', self.lr_method)
2525
+
2526
+ # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2527
+ # This determines the number of states
2528
+
2529
+ C = self.lr0_items()
2530
+
2531
+ if self.lr_method == 'LALR':
2532
+ self.add_lalr_lookaheads(C)
2533
+
2534
+ # Build the parser table, state by state
2535
+ st = 0
2536
+ for I in C:
2537
+ # Loop over each production in I
2538
+ actlist = [] # List of actions
2539
+ st_action = {}
2540
+ st_actionp = {}
2541
+ st_goto = {}
2542
+ log.info('')
2543
+ log.info('state %d', st)
2544
+ log.info('')
2545
+ for p in I:
2546
+ log.info(' (%d) %s', p.number, p)
2547
+ log.info('')
2548
+
2549
+ for p in I:
2550
+ if p.len == p.lr_index + 1:
2551
+ if p.name == "S'":
2552
+ # Start symbol. Accept!
2553
+ st_action['$end'] = 0
2554
+ st_actionp['$end'] = p
2555
+ else:
2556
+ # We are at the end of a production. Reduce!
2557
+ if self.lr_method == 'LALR':
2558
+ laheads = p.lookaheads[st]
2559
+ else:
2560
+ laheads = self.grammar.Follow[p.name]
2561
+ for a in laheads:
2562
+ actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2563
+ r = st_action.get(a)
2564
+ if r is not None:
2565
+ # Whoa. Have a shift/reduce or reduce/reduce conflict
2566
+ if r > 0:
2567
+ # Need to decide on shift or reduce here
2568
+ # By default we favor shifting. Need to add
2569
+ # some precedence rules here.
2570
+ sprec, slevel = Productions[st_actionp[a].number].prec
2571
+ rprec, rlevel = Precedence.get(a, ('right', 0))
2572
+ if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2573
+ # We really need to reduce here.
2574
+ st_action[a] = -p.number
2575
+ st_actionp[a] = p
2576
+ if not slevel and not rlevel:
2577
+ log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2578
+ self.sr_conflicts.append((st, a, 'reduce'))
2579
+ Productions[p.number].reduced += 1
2580
+ elif (slevel == rlevel) and (rprec == 'nonassoc'):
2581
+ st_action[a] = None
2582
+ else:
2583
+ # Hmmm. Guess we'll keep the shift
2584
+ if not rlevel:
2585
+ log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2586
+ self.sr_conflicts.append((st, a, 'shift'))
2587
+ elif r < 0:
2588
+ # Reduce/reduce conflict. In this case, we favor the rule
2589
+ # that was defined first in the grammar file
2590
+ oldp = Productions[-r]
2591
+ pp = Productions[p.number]
2592
+ if oldp.line > pp.line:
2593
+ st_action[a] = -p.number
2594
+ st_actionp[a] = p
2595
+ chosenp, rejectp = pp, oldp
2596
+ Productions[p.number].reduced += 1
2597
+ Productions[oldp.number].reduced -= 1
2598
+ else:
2599
+ chosenp, rejectp = oldp, pp
2600
+ self.rr_conflicts.append((st, chosenp, rejectp))
2601
+ log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2602
+ a, st_actionp[a].number, st_actionp[a])
2603
+ else:
2604
+ raise LALRError('Unknown conflict in state %d' % st)
2605
+ else:
2606
+ st_action[a] = -p.number
2607
+ st_actionp[a] = p
2608
+ Productions[p.number].reduced += 1
2609
+ else:
2610
+ i = p.lr_index
2611
+ a = p.prod[i+1] # Get symbol right after the "."
2612
+ if a in self.grammar.Terminals:
2613
+ g = self.lr0_goto(I, a)
2614
+ j = self.lr0_cidhash.get(id(g), -1)
2615
+ if j >= 0:
2616
+ # We are in a shift state
2617
+ actlist.append((a, p, 'shift and go to state %d' % j))
2618
+ r = st_action.get(a)
2619
+ if r is not None:
2620
+ # Whoa have a shift/reduce or shift/shift conflict
2621
+ if r > 0:
2622
+ if r != j:
2623
+ raise LALRError('Shift/shift conflict in state %d' % st)
2624
+ elif r < 0:
2625
+ # Do a precedence check.
2626
+ # - if precedence of reduce rule is higher, we reduce.
2627
+ # - if precedence of reduce is same and left assoc, we reduce.
2628
+ # - otherwise we shift
2629
+ rprec, rlevel = Productions[st_actionp[a].number].prec
2630
+ sprec, slevel = Precedence.get(a, ('right', 0))
2631
+ if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2632
+ # We decide to shift here... highest precedence to shift
2633
+ Productions[st_actionp[a].number].reduced -= 1
2634
+ st_action[a] = j
2635
+ st_actionp[a] = p
2636
+ if not rlevel:
2637
+ log.info(' ! shift/reduce conflict for %s resolved as shift', a)
2638
+ self.sr_conflicts.append((st, a, 'shift'))
2639
+ elif (slevel == rlevel) and (rprec == 'nonassoc'):
2640
+ st_action[a] = None
2641
+ else:
2642
+ # Hmmm. Guess we'll keep the reduce
2643
+ if not slevel and not rlevel:
2644
+ log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
2645
+ self.sr_conflicts.append((st, a, 'reduce'))
2646
+
2647
+ else:
2648
+ raise LALRError('Unknown conflict in state %d' % st)
2649
+ else:
2650
+ st_action[a] = j
2651
+ st_actionp[a] = p
2652
+
2653
+ # Print the actions associated with each terminal
2654
+ _actprint = {}
2655
+ for a, p, m in actlist:
2656
+ if a in st_action:
2657
+ if p is st_actionp[a]:
2658
+ log.info(' %-15s %s', a, m)
2659
+ _actprint[(a, m)] = 1
2660
+ log.info('')
2661
+ # Print the actions that were not used. (debugging)
2662
+ not_used = 0
2663
+ for a, p, m in actlist:
2664
+ if a in st_action:
2665
+ if p is not st_actionp[a]:
2666
+ if not (a, m) in _actprint:
2667
+ log.debug(' ! %-15s [ %s ]', a, m)
2668
+ not_used = 1
2669
+ _actprint[(a, m)] = 1
2670
+ if not_used:
2671
+ log.debug('')
2672
+
2673
+ # Construct the goto table for this state
2674
+
2675
+ nkeys = {}
2676
+ for ii in I:
2677
+ for s in ii.usyms:
2678
+ if s in self.grammar.Nonterminals:
2679
+ nkeys[s] = None
2680
+ for n in nkeys:
2681
+ g = self.lr0_goto(I, n)
2682
+ j = self.lr0_cidhash.get(id(g), -1)
2683
+ if j >= 0:
2684
+ st_goto[n] = j
2685
+ log.info(' %-30s shift and go to state %d', n, j)
2686
+
2687
+ action[st] = st_action
2688
+ actionp[st] = st_actionp
2689
+ goto[st] = st_goto
2690
+ st += 1
2691
+
2692
+ # -----------------------------------------------------------------------------
2693
+ # write()
2694
+ #
2695
+ # This function writes the LR parsing tables to a file
2696
+ # -----------------------------------------------------------------------------
2697
+
2698
+ def write_table(self, tabmodule, outputdir='', signature=''):
2699
+ if isinstance(tabmodule, types.ModuleType):
2700
+ raise IOError("Won't overwrite existing tabmodule")
2701
+
2702
+ basemodulename = tabmodule.split('.')[-1]
2703
+ filename = os.path.join(outputdir, basemodulename) + '.py'
2704
+ try:
2705
+ f = open(filename, 'w')
2706
+
2707
+ f.write('''
2708
+ # %s
2709
+ # This file is automatically generated. Do not edit.
2710
+ _tabversion = %r
2711
+
2712
+ _lr_method = %r
2713
+
2714
+ _lr_signature = %r
2715
+ ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2716
+
2717
+ # Change smaller to 0 to go back to original tables
2718
+ smaller = 1
2719
+
2720
+ # Factor out names to try and make smaller
2721
+ if smaller:
2722
+ items = {}
2723
+
2724
+ for s, nd in self.lr_action.items():
2725
+ for name, v in nd.items():
2726
+ i = items.get(name)
2727
+ if not i:
2728
+ i = ([], [])
2729
+ items[name] = i
2730
+ i[0].append(s)
2731
+ i[1].append(v)
2732
+
2733
+ f.write('\n_lr_action_items = {')
2734
+ for k, v in items.items():
2735
+ f.write('%r:([' % k)
2736
+ for i in v[0]:
2737
+ f.write('%r,' % i)
2738
+ f.write('],[')
2739
+ for i in v[1]:
2740
+ f.write('%r,' % i)
2741
+
2742
+ f.write(']),')
2743
+ f.write('}\n')
2744
+
2745
+ f.write('''
2746
+ _lr_action = {}
2747
+ for _k, _v in _lr_action_items.items():
2748
+ for _x,_y in zip(_v[0],_v[1]):
2749
+ if not _x in _lr_action: _lr_action[_x] = {}
2750
+ _lr_action[_x][_k] = _y
2751
+ del _lr_action_items
2752
+ ''')
2753
+
2754
+ else:
2755
+ f.write('\n_lr_action = { ')
2756
+ for k, v in self.lr_action.items():
2757
+ f.write('(%r,%r):%r,' % (k[0], k[1], v))
2758
+ f.write('}\n')
2759
+
2760
+ if smaller:
2761
+ # Factor out names to try and make smaller
2762
+ items = {}
2763
+
2764
+ for s, nd in self.lr_goto.items():
2765
+ for name, v in nd.items():
2766
+ i = items.get(name)
2767
+ if not i:
2768
+ i = ([], [])
2769
+ items[name] = i
2770
+ i[0].append(s)
2771
+ i[1].append(v)
2772
+
2773
+ f.write('\n_lr_goto_items = {')
2774
+ for k, v in items.items():
2775
+ f.write('%r:([' % k)
2776
+ for i in v[0]:
2777
+ f.write('%r,' % i)
2778
+ f.write('],[')
2779
+ for i in v[1]:
2780
+ f.write('%r,' % i)
2781
+
2782
+ f.write(']),')
2783
+ f.write('}\n')
2784
+
2785
+ f.write('''
2786
+ _lr_goto = {}
2787
+ for _k, _v in _lr_goto_items.items():
2788
+ for _x, _y in zip(_v[0], _v[1]):
2789
+ if not _x in _lr_goto: _lr_goto[_x] = {}
2790
+ _lr_goto[_x][_k] = _y
2791
+ del _lr_goto_items
2792
+ ''')
2793
+ else:
2794
+ f.write('\n_lr_goto = { ')
2795
+ for k, v in self.lr_goto.items():
2796
+ f.write('(%r,%r):%r,' % (k[0], k[1], v))
2797
+ f.write('}\n')
2798
+
2799
+ # Write production table
2800
+ f.write('_lr_productions = [\n')
2801
+ for p in self.lr_productions:
2802
+ if p.func:
2803
+ f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2804
+ p.func, os.path.basename(p.file), p.line))
2805
+ else:
2806
+ f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2807
+ f.write(']\n')
2808
+ f.close()
2809
+
2810
+ except IOError as e:
2811
+ raise
2812
+
2813
+
2814
+ # -----------------------------------------------------------------------------
2815
+ # pickle_table()
2816
+ #
2817
+ # This function pickles the LR parsing tables to a supplied file object
2818
+ # -----------------------------------------------------------------------------
2819
+
2820
+ def pickle_table(self, filename, signature=''):
2821
+ try:
2822
+ import cPickle as pickle
2823
+ except ImportError:
2824
+ import pickle
2825
+ with open(filename, 'wb') as outf:
2826
+ pickle.dump(__tabversion__, outf, pickle_protocol)
2827
+ pickle.dump(self.lr_method, outf, pickle_protocol)
2828
+ pickle.dump(signature, outf, pickle_protocol)
2829
+ pickle.dump(self.lr_action, outf, pickle_protocol)
2830
+ pickle.dump(self.lr_goto, outf, pickle_protocol)
2831
+
2832
+ outp = []
2833
+ for p in self.lr_productions:
2834
+ if p.func:
2835
+ outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2836
+ else:
2837
+ outp.append((str(p), p.name, p.len, None, None, None))
2838
+ pickle.dump(outp, outf, pickle_protocol)
2839
+
2840
+ # -----------------------------------------------------------------------------
2841
+ # === INTROSPECTION ===
2842
+ #
2843
+ # The following functions and classes are used to implement the PLY
2844
+ # introspection features followed by the yacc() function itself.
2845
+ # -----------------------------------------------------------------------------
2846
+
2847
+ # -----------------------------------------------------------------------------
2848
+ # get_caller_module_dict()
2849
+ #
2850
+ # This function returns a dictionary containing all of the symbols defined within
2851
+ # a caller further down the call stack. This is used to get the environment
2852
+ # associated with the yacc() call if none was provided.
2853
+ # -----------------------------------------------------------------------------
2854
+
2855
+ def get_caller_module_dict(levels):
2856
+ f = sys._getframe(levels)
2857
+ ldict = f.f_globals.copy()
2858
+ if f.f_globals != f.f_locals:
2859
+ ldict.update(f.f_locals)
2860
+ return ldict
2861
+
2862
+ # -----------------------------------------------------------------------------
2863
+ # parse_grammar()
2864
+ #
2865
+ # This takes a raw grammar rule string and parses it into production data
2866
+ # -----------------------------------------------------------------------------
2867
+ def parse_grammar(doc, file, line):
2868
+ grammar = []
2869
+ # Split the doc string into lines
2870
+ pstrings = doc.splitlines()
2871
+ lastp = None
2872
+ dline = line
2873
+ for ps in pstrings:
2874
+ dline += 1
2875
+ p = ps.split()
2876
+ if not p:
2877
+ continue
2878
+ try:
2879
+ if p[0] == '|':
2880
+ # This is a continuation of a previous rule
2881
+ if not lastp:
2882
+ raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2883
+ prodname = lastp
2884
+ syms = p[1:]
2885
+ else:
2886
+ prodname = p[0]
2887
+ lastp = prodname
2888
+ syms = p[2:]
2889
+ assign = p[1]
2890
+ if assign != ':' and assign != '::=':
2891
+ raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2892
+
2893
+ grammar.append((file, dline, prodname, syms))
2894
+ except SyntaxError:
2895
+ raise
2896
+ except Exception:
2897
+ raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2898
+
2899
+ return grammar
2900
+
2901
+ # -----------------------------------------------------------------------------
2902
+ # ParserReflect()
2903
+ #
2904
+ # This class represents information extracted for building a parser including
2905
+ # start symbol, error function, tokens, precedence list, action functions,
2906
+ # etc.
2907
+ # -----------------------------------------------------------------------------
2908
+ class ParserReflect(object):
2909
+ def __init__(self, pdict, log=None):
2910
+ self.pdict = pdict
2911
+ self.start = None
2912
+ self.error_func = None
2913
+ self.tokens = None
2914
+ self.modules = set()
2915
+ self.grammar = []
2916
+ self.error = False
2917
+
2918
+ if log is None:
2919
+ self.log = PlyLogger(sys.stderr)
2920
+ else:
2921
+ self.log = log
2922
+
2923
+ # Get all of the basic information
2924
+ def get_all(self):
2925
+ self.get_start()
2926
+ self.get_error_func()
2927
+ self.get_tokens()
2928
+ self.get_precedence()
2929
+ self.get_pfunctions()
2930
+
2931
+ # Validate all of the information
2932
+ def validate_all(self):
2933
+ self.validate_start()
2934
+ self.validate_error_func()
2935
+ self.validate_tokens()
2936
+ self.validate_precedence()
2937
+ self.validate_pfunctions()
2938
+ self.validate_modules()
2939
+ return self.error
2940
+
2941
+ # Compute a signature over the grammar
2942
+ def signature(self):
2943
+ try:
2944
+ from hashlib import md5
2945
+ except ImportError:
2946
+ from md5 import md5
2947
+ try:
2948
+ sig = md5()
2949
+ if self.start:
2950
+ sig.update(self.start.encode('latin-1'))
2951
+ if self.prec:
2952
+ sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
2953
+ if self.tokens:
2954
+ sig.update(' '.join(self.tokens).encode('latin-1'))
2955
+ for f in self.pfuncs:
2956
+ if f[3]:
2957
+ sig.update(f[3].encode('latin-1'))
2958
+ except (TypeError, ValueError):
2959
+ pass
2960
+
2961
+ digest = base64.b16encode(sig.digest())
2962
+ if sys.version_info[0] >= 3:
2963
+ digest = digest.decode('latin-1')
2964
+ return digest
2965
+
2966
+ # -----------------------------------------------------------------------------
2967
+ # validate_modules()
2968
+ #
2969
+ # This method checks to see if there are duplicated p_rulename() functions
2970
+ # in the parser module file. Without this function, it is really easy for
2971
+ # users to make mistakes by cutting and pasting code fragments (and it's a real
2972
+ # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2973
+ # we just do a little regular expression pattern matching of def statements
2974
+ # to try and detect duplicates.
2975
+ # -----------------------------------------------------------------------------
2976
+
2977
+ def validate_modules(self):
2978
+ # Match def p_funcname(
2979
+ fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2980
+
2981
+ for module in self.modules:
2982
+ lines, linen = inspect.getsourcelines(module)
2983
+
2984
+ counthash = {}
2985
+ for linen, line in enumerate(lines):
2986
+ linen += 1
2987
+ m = fre.match(line)
2988
+ if m:
2989
+ name = m.group(1)
2990
+ prev = counthash.get(name)
2991
+ if not prev:
2992
+ counthash[name] = linen
2993
+ else:
2994
+ filename = inspect.getsourcefile(module)
2995
+ self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2996
+ filename, linen, name, prev)
2997
+
2998
+ # Get the start symbol
2999
+ def get_start(self):
3000
+ self.start = self.pdict.get('start')
3001
+
3002
+ # Validate the start symbol
3003
+ def validate_start(self):
3004
+ if self.start is not None:
3005
+ if not isinstance(self.start, string_types):
3006
+ self.log.error("'start' must be a string")
3007
+
3008
+ # Look for error handler
3009
+ def get_error_func(self):
3010
+ self.error_func = self.pdict.get('p_error')
3011
+
3012
+ # Validate the error function
3013
+ def validate_error_func(self):
3014
+ if self.error_func:
3015
+ if isinstance(self.error_func, types.FunctionType):
3016
+ ismethod = 0
3017
+ elif isinstance(self.error_func, types.MethodType):
3018
+ ismethod = 1
3019
+ else:
3020
+ self.log.error("'p_error' defined, but is not a function or method")
3021
+ self.error = True
3022
+ return
3023
+
3024
+ eline = self.error_func.__code__.co_firstlineno
3025
+ efile = self.error_func.__code__.co_filename
3026
+ module = inspect.getmodule(self.error_func)
3027
+ self.modules.add(module)
3028
+
3029
+ argcount = self.error_func.__code__.co_argcount - ismethod
3030
+ if argcount != 1:
3031
+ self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3032
+ self.error = True
3033
+
3034
+ # Get the tokens map
3035
+ def get_tokens(self):
3036
+ tokens = self.pdict.get('tokens')
3037
+ if not tokens:
3038
+ self.log.error('No token list is defined')
3039
+ self.error = True
3040
+ return
3041
+
3042
+ if not isinstance(tokens, (list, tuple)):
3043
+ self.log.error('tokens must be a list or tuple')
3044
+ self.error = True
3045
+ return
3046
+
3047
+ if not tokens:
3048
+ self.log.error('tokens is empty')
3049
+ self.error = True
3050
+ return
3051
+
3052
+ self.tokens = tokens
3053
+
3054
+ # Validate the tokens
3055
+ def validate_tokens(self):
3056
+ # Validate the tokens.
3057
+ if 'error' in self.tokens:
3058
+ self.log.error("Illegal token name 'error'. Is a reserved word")
3059
+ self.error = True
3060
+ return
3061
+
3062
+ terminals = set()
3063
+ for n in self.tokens:
3064
+ if n in terminals:
3065
+ self.log.warning('Token %r multiply defined', n)
3066
+ terminals.add(n)
3067
+
3068
+ # Get the precedence map (if any)
3069
+ def get_precedence(self):
3070
+ self.prec = self.pdict.get('precedence')
3071
+
3072
+ # Validate and parse the precedence map
3073
+ def validate_precedence(self):
3074
+ preclist = []
3075
+ if self.prec:
3076
+ if not isinstance(self.prec, (list, tuple)):
3077
+ self.log.error('precedence must be a list or tuple')
3078
+ self.error = True
3079
+ return
3080
+ for level, p in enumerate(self.prec):
3081
+ if not isinstance(p, (list, tuple)):
3082
+ self.log.error('Bad precedence table')
3083
+ self.error = True
3084
+ return
3085
+
3086
+ if len(p) < 2:
3087
+ self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3088
+ self.error = True
3089
+ return
3090
+ assoc = p[0]
3091
+ if not isinstance(assoc, string_types):
3092
+ self.log.error('precedence associativity must be a string')
3093
+ self.error = True
3094
+ return
3095
+ for term in p[1:]:
3096
+ if not isinstance(term, string_types):
3097
+ self.log.error('precedence items must be strings')
3098
+ self.error = True
3099
+ return
3100
+ preclist.append((term, assoc, level+1))
3101
+ self.preclist = preclist
3102
+
3103
+ # Get all p_functions from the grammar
3104
+ def get_pfunctions(self):
3105
+ p_functions = []
3106
+ for name, item in self.pdict.items():
3107
+ if not name.startswith('p_') or name == 'p_error':
3108
+ continue
3109
+ if isinstance(item, (types.FunctionType, types.MethodType)):
3110
+ line = item.__code__.co_firstlineno
3111
+ module = inspect.getmodule(item)
3112
+ p_functions.append((line, module, name, item.__doc__))
3113
+
3114
+ # Sort all of the actions by line number; make sure to stringify
3115
+ # modules to make them sortable, since `line` may not uniquely sort all
3116
+ # p functions
3117
+ p_functions.sort(key=lambda p_function: (
3118
+ p_function[0],
3119
+ str(p_function[1]),
3120
+ p_function[2],
3121
+ p_function[3]))
3122
+ self.pfuncs = p_functions
3123
+
3124
+ # Validate all of the p_functions
3125
+ def validate_pfunctions(self):
3126
+ grammar = []
3127
+ # Check for non-empty symbols
3128
+ if len(self.pfuncs) == 0:
3129
+ self.log.error('no rules of the form p_rulename are defined')
3130
+ self.error = True
3131
+ return
3132
+
3133
+ for line, module, name, doc in self.pfuncs:
3134
+ file = inspect.getsourcefile(module)
3135
+ func = self.pdict[name]
3136
+ if isinstance(func, types.MethodType):
3137
+ reqargs = 2
3138
+ else:
3139
+ reqargs = 1
3140
+ if func.__code__.co_argcount > reqargs:
3141
+ self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3142
+ self.error = True
3143
+ elif func.__code__.co_argcount < reqargs:
3144
+ self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3145
+ self.error = True
3146
+ elif not func.__doc__:
3147
+ self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3148
+ file, line, func.__name__)
3149
+ else:
3150
+ try:
3151
+ parsed_g = parse_grammar(doc, file, line)
3152
+ for g in parsed_g:
3153
+ grammar.append((name, g))
3154
+ except SyntaxError as e:
3155
+ self.log.error(str(e))
3156
+ self.error = True
3157
+
3158
+ # Looks like a valid grammar rule
3159
+ # Mark the file in which defined.
3160
+ self.modules.add(module)
3161
+
3162
+ # Secondary validation step that looks for p_ definitions that are not functions
3163
+ # or functions that look like they might be grammar rules.
3164
+
3165
+ for n, v in self.pdict.items():
3166
+ if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3167
+ continue
3168
+ if n.startswith('t_'):
3169
+ continue
3170
+ if n.startswith('p_') and n != 'p_error':
3171
+ self.log.warning('%r not defined as a function', n)
3172
+ if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3173
+ (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3174
+ if v.__doc__:
3175
+ try:
3176
+ doc = v.__doc__.split(' ')
3177
+ if doc[1] == ':':
3178
+ self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3179
+ v.__code__.co_filename, v.__code__.co_firstlineno, n)
3180
+ except IndexError:
3181
+ pass
3182
+
3183
+ self.grammar = grammar
3184
+
3185
+ # -----------------------------------------------------------------------------
3186
+ # yacc(module)
3187
+ #
3188
+ # Build a parser
3189
+ # -----------------------------------------------------------------------------
3190
+
3191
+ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3192
+ check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3193
+ outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3194
+
3195
+ if tabmodule is None:
3196
+ tabmodule = tab_module
3197
+
3198
+ # Reference to the parsing method of the last built parser
3199
+ global parse
3200
+
3201
+ # If pickling is enabled, table files are not created
3202
+ if picklefile:
3203
+ write_tables = 0
3204
+
3205
+ if errorlog is None:
3206
+ errorlog = PlyLogger(sys.stderr)
3207
+
3208
+ # Get the module dictionary used for the parser
3209
+ if module:
3210
+ _items = [(k, getattr(module, k)) for k in dir(module)]
3211
+ pdict = dict(_items)
3212
+ # If no __file__ attribute is available, try to obtain it from the __module__ instead
3213
+ if '__file__' not in pdict:
3214
+ pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3215
+ else:
3216
+ pdict = get_caller_module_dict(2)
3217
+
3218
+ if outputdir is None:
3219
+ # If no output directory is set, the location of the output files
3220
+ # is determined according to the following rules:
3221
+ # - If tabmodule specifies a package, files go into that package directory
3222
+ # - Otherwise, files go in the same directory as the specifying module
3223
+ if isinstance(tabmodule, types.ModuleType):
3224
+ srcfile = tabmodule.__file__
3225
+ else:
3226
+ if '.' not in tabmodule:
3227
+ srcfile = pdict['__file__']
3228
+ else:
3229
+ parts = tabmodule.split('.')
3230
+ pkgname = '.'.join(parts[:-1])
3231
+ exec('import %s' % pkgname)
3232
+ srcfile = getattr(sys.modules[pkgname], '__file__', '')
3233
+ outputdir = os.path.dirname(srcfile)
3234
+
3235
+ # Determine if the module is package of a package or not.
3236
+ # If so, fix the tabmodule setting so that tables load correctly
3237
+ pkg = pdict.get('__package__')
3238
+ if pkg and isinstance(tabmodule, str):
3239
+ if '.' not in tabmodule:
3240
+ tabmodule = pkg + '.' + tabmodule
3241
+
3242
+
3243
+
3244
+ # Set start symbol if it's specified directly using an argument
3245
+ if start is not None:
3246
+ pdict['start'] = start
3247
+
3248
+ # Collect parser information from the dictionary
3249
+ pinfo = ParserReflect(pdict, log=errorlog)
3250
+ pinfo.get_all()
3251
+
3252
+ if pinfo.error:
3253
+ raise YaccError('Unable to build parser')
3254
+
3255
+ # Check signature against table files (if any)
3256
+ signature = pinfo.signature()
3257
+
3258
+ # Read the tables
3259
+ try:
3260
+ lr = LRTable()
3261
+ if picklefile:
3262
+ read_signature = lr.read_pickle(picklefile)
3263
+ else:
3264
+ read_signature = lr.read_table(tabmodule)
3265
+ if optimize or (read_signature == signature):
3266
+ try:
3267
+ lr.bind_callables(pinfo.pdict)
3268
+ parser = LRParser(lr, pinfo.error_func)
3269
+ parse = parser.parse
3270
+ return parser
3271
+ except Exception as e:
3272
+ errorlog.warning('There was a problem loading the table file: %r', e)
3273
+ except VersionError as e:
3274
+ errorlog.warning(str(e))
3275
+ except ImportError:
3276
+ pass
3277
+
3278
+ if debuglog is None:
3279
+ if debug:
3280
+ try:
3281
+ debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3282
+ except IOError as e:
3283
+ errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3284
+ debuglog = NullLogger()
3285
+ else:
3286
+ debuglog = NullLogger()
3287
+
3288
+ debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3289
+
3290
+ errors = False
3291
+
3292
+ # Validate the parser information
3293
+ if pinfo.validate_all():
3294
+ raise YaccError('Unable to build parser')
3295
+
3296
+ if not pinfo.error_func:
3297
+ errorlog.warning('no p_error() function is defined')
3298
+
3299
+ # Create a grammar object
3300
+ grammar = Grammar(pinfo.tokens)
3301
+
3302
+ # Set precedence level for terminals
3303
+ for term, assoc, level in pinfo.preclist:
3304
+ try:
3305
+ grammar.set_precedence(term, assoc, level)
3306
+ except GrammarError as e:
3307
+ errorlog.warning('%s', e)
3308
+
3309
+ # Add productions to the grammar
3310
+ for funcname, gram in pinfo.grammar:
3311
+ file, line, prodname, syms = gram
3312
+ try:
3313
+ grammar.add_production(prodname, syms, funcname, file, line)
3314
+ except GrammarError as e:
3315
+ errorlog.error('%s', e)
3316
+ errors = True
3317
+
3318
+ # Set the grammar start symbols
3319
+ try:
3320
+ if start is None:
3321
+ grammar.set_start(pinfo.start)
3322
+ else:
3323
+ grammar.set_start(start)
3324
+ except GrammarError as e:
3325
+ errorlog.error(str(e))
3326
+ errors = True
3327
+
3328
+ if errors:
3329
+ raise YaccError('Unable to build parser')
3330
+
3331
+ # Verify the grammar structure
3332
+ undefined_symbols = grammar.undefined_symbols()
3333
+ for sym, prod in undefined_symbols:
3334
+ errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3335
+ errors = True
3336
+
3337
+ unused_terminals = grammar.unused_terminals()
3338
+ if unused_terminals:
3339
+ debuglog.info('')
3340
+ debuglog.info('Unused terminals:')
3341
+ debuglog.info('')
3342
+ for term in unused_terminals:
3343
+ errorlog.warning('Token %r defined, but not used', term)
3344
+ debuglog.info(' %s', term)
3345
+
3346
+ # Print out all productions to the debug log
3347
+ if debug:
3348
+ debuglog.info('')
3349
+ debuglog.info('Grammar')
3350
+ debuglog.info('')
3351
+ for n, p in enumerate(grammar.Productions):
3352
+ debuglog.info('Rule %-5d %s', n, p)
3353
+
3354
+ # Find unused non-terminals
3355
+ unused_rules = grammar.unused_rules()
3356
+ for prod in unused_rules:
3357
+ errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3358
+
3359
+ if len(unused_terminals) == 1:
3360
+ errorlog.warning('There is 1 unused token')
3361
+ if len(unused_terminals) > 1:
3362
+ errorlog.warning('There are %d unused tokens', len(unused_terminals))
3363
+
3364
+ if len(unused_rules) == 1:
3365
+ errorlog.warning('There is 1 unused rule')
3366
+ if len(unused_rules) > 1:
3367
+ errorlog.warning('There are %d unused rules', len(unused_rules))
3368
+
3369
+ if debug:
3370
+ debuglog.info('')
3371
+ debuglog.info('Terminals, with rules where they appear')
3372
+ debuglog.info('')
3373
+ terms = list(grammar.Terminals)
3374
+ terms.sort()
3375
+ for term in terms:
3376
+ debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3377
+
3378
+ debuglog.info('')
3379
+ debuglog.info('Nonterminals, with rules where they appear')
3380
+ debuglog.info('')
3381
+ nonterms = list(grammar.Nonterminals)
3382
+ nonterms.sort()
3383
+ for nonterm in nonterms:
3384
+ debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3385
+ debuglog.info('')
3386
+
3387
+ if check_recursion:
3388
+ unreachable = grammar.find_unreachable()
3389
+ for u in unreachable:
3390
+ errorlog.warning('Symbol %r is unreachable', u)
3391
+
3392
+ infinite = grammar.infinite_cycles()
3393
+ for inf in infinite:
3394
+ errorlog.error('Infinite recursion detected for symbol %r', inf)
3395
+ errors = True
3396
+
3397
+ unused_prec = grammar.unused_precedence()
3398
+ for term, assoc in unused_prec:
3399
+ errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3400
+ errors = True
3401
+
3402
+ if errors:
3403
+ raise YaccError('Unable to build parser')
3404
+
3405
+ # Run the LRGeneratedTable on the grammar
3406
+ if debug:
3407
+ errorlog.debug('Generating %s tables', method)
3408
+
3409
+ lr = LRGeneratedTable(grammar, method, debuglog)
3410
+
3411
+ if debug:
3412
+ num_sr = len(lr.sr_conflicts)
3413
+
3414
+ # Report shift/reduce and reduce/reduce conflicts
3415
+ if num_sr == 1:
3416
+ errorlog.warning('1 shift/reduce conflict')
3417
+ elif num_sr > 1:
3418
+ errorlog.warning('%d shift/reduce conflicts', num_sr)
3419
+
3420
+ num_rr = len(lr.rr_conflicts)
3421
+ if num_rr == 1:
3422
+ errorlog.warning('1 reduce/reduce conflict')
3423
+ elif num_rr > 1:
3424
+ errorlog.warning('%d reduce/reduce conflicts', num_rr)
3425
+
3426
+ # Write out conflicts to the output file
3427
+ if debug and (lr.sr_conflicts or lr.rr_conflicts):
3428
+ debuglog.warning('')
3429
+ debuglog.warning('Conflicts:')
3430
+ debuglog.warning('')
3431
+
3432
+ for state, tok, resolution in lr.sr_conflicts:
3433
+ debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
3434
+
3435
+ already_reported = set()
3436
+ for state, rule, rejected in lr.rr_conflicts:
3437
+ if (state, id(rule), id(rejected)) in already_reported:
3438
+ continue
3439
+ debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3440
+ debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3441
+ errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3442
+ errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3443
+ already_reported.add((state, id(rule), id(rejected)))
3444
+
3445
+ warned_never = []
3446
+ for state, rule, rejected in lr.rr_conflicts:
3447
+ if not rejected.reduced and (rejected not in warned_never):
3448
+ debuglog.warning('Rule (%s) is never reduced', rejected)
3449
+ errorlog.warning('Rule (%s) is never reduced', rejected)
3450
+ warned_never.append(rejected)
3451
+
3452
+ # Write the table file if requested
3453
+ if write_tables:
3454
+ try:
3455
+ lr.write_table(tabmodule, outputdir, signature)
3456
+ except IOError as e:
3457
+ errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3458
+
3459
+ # Write a pickled version of the tables
3460
+ if picklefile:
3461
+ try:
3462
+ lr.pickle_table(picklefile, signature)
3463
+ except IOError as e:
3464
+ errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3465
+
3466
+ # Build the parser
3467
+ lr.bind_callables(pinfo.pdict)
3468
+ parser = LRParser(lr, pinfo.error_func)
3469
+
3470
+ parse = parser.parse
3471
+ return parser