python-cc 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pcc/parse/c_parser.py ADDED
@@ -0,0 +1,1700 @@
1
+ #------------------------------------------------------------------------------
2
+ # pycparser: c_parser.py
3
+ #
4
+ # CParser class: Parser and AST builder for the C language
5
+ #
6
+ # Copyright (C) 2008-2015, Eli Bendersky
7
+ # License: BSD
8
+ #------------------------------------------------------------------------------
9
+
10
+ from ..ply import yacc
11
+
12
+ from ..ast import c_ast
13
+ from ..lex.c_lexer import CLexer
14
+ from .plyparser import PLYParser, Coord, ParseError
15
+ from ..ast.ast_transforms import fix_switch_cases
16
+
17
+
18
+ class CParser(PLYParser):
19
+ def __init__(
20
+ self,
21
+ lex_optimize=True,
22
+ lextab='pycparser.lextab',
23
+ yacc_optimize=True,
24
+ yacctab='pycparser.yacctab',
25
+ yacc_debug=False,
26
+ taboutputdir=''):
27
+ """ Create a new CParser.
28
+
29
+ Some arguments for controlling the debug/optimization
30
+ level of the parser are provided. The defaults are
31
+ tuned for release/performance mode.
32
+ The simple rules for using them are:
33
+ *) When tweaking CParser/CLexer, set these to False
34
+ *) When releasing a stable parser, set to True
35
+
36
+ lex_optimize:
37
+ Set to False when you're modifying the lexer.
38
+ Otherwise, changes in the lexer won't be used, if
39
+ some lextab.py file exists.
40
+ When releasing with a stable lexer, set to True
41
+ to save the re-generation of the lexer table on
42
+ each run.
43
+
44
+ lextab:
45
+ Points to the lex table that's used for optimized
46
+ mode. Only if you're modifying the lexer and want
47
+ some tests to avoid re-generating the table, make
48
+ this point to a local lex table file (that's been
49
+ earlier generated with lex_optimize=True)
50
+
51
+ yacc_optimize:
52
+ Set to False when you're modifying the parser.
53
+ Otherwise, changes in the parser won't be used, if
54
+ some parsetab.py file exists.
55
+ When releasing with a stable parser, set to True
56
+ to save the re-generation of the parser table on
57
+ each run.
58
+
59
+ yacctab:
60
+ Points to the yacc table that's used for optimized
61
+ mode. Only if you're modifying the parser, make
62
+ this point to a local yacc table file
63
+
64
+ yacc_debug:
65
+ Generate a parser.out file that explains how yacc
66
+ built the parsing table from the grammar.
67
+
68
+ taboutputdir:
69
+ Set this parameter to control the location of generated
70
+ lextab and yacctab files.
71
+ """
72
+ self.clex = CLexer(
73
+ error_func=self._lex_error_func,
74
+ on_lbrace_func=self._lex_on_lbrace_func,
75
+ on_rbrace_func=self._lex_on_rbrace_func,
76
+ type_lookup_func=self._lex_type_lookup_func)
77
+
78
+ self.clex.build(
79
+ optimize=lex_optimize,
80
+ lextab=lextab,
81
+ outputdir=taboutputdir)
82
+ self.tokens = self.clex.tokens
83
+
84
+ rules_with_opt = [
85
+ 'abstract_declarator',
86
+ 'assignment_expression',
87
+ 'declaration_list',
88
+ 'declaration_specifiers',
89
+ 'designation',
90
+ 'expression',
91
+ 'identifier_list',
92
+ 'init_declarator_list',
93
+ 'initializer_list',
94
+ 'parameter_type_list',
95
+ 'specifier_qualifier_list',
96
+ 'block_item_list',
97
+ 'type_qualifier_list',
98
+ 'struct_declarator_list'
99
+ ]
100
+
101
+ for rule in rules_with_opt:
102
+ self._create_opt_rule(rule)
103
+
104
+ self.cparser = yacc.yacc(
105
+ module=self,
106
+ start='translation_unit_or_empty',
107
+ debug=yacc_debug,
108
+ optimize=yacc_optimize,
109
+ tabmodule=yacctab,
110
+ outputdir=taboutputdir)
111
+
112
+ # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
113
+ # the current (topmost) scope. Each scope is a dictionary that
114
+ # specifies whether a name is a type. If _scope_stack[n][name] is
115
+ # True, 'name' is currently a type in the scope. If it's False,
116
+ # 'name' is used in the scope but not as a type (for instance, if we
117
+ # saw: int name;
118
+ # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
119
+ # in this scope at all.
120
+ self._scope_stack = [dict()]
121
+
122
+ # Keeps track of the last token given to yacc (the lookahead token)
123
+ self._last_yielded_token = None
124
+
125
+ def parse(self, text, filename='', debuglevel=0):
126
+ """ Parses C code and returns an AST.
127
+
128
+ text:
129
+ A string containing the C source code
130
+
131
+ filename:
132
+ Name of the file being parsed (for meaningful
133
+ error messages)
134
+
135
+ debuglevel:
136
+ Debug level to yacc
137
+ """
138
+ self.clex.filename = filename
139
+ self.clex.reset_lineno()
140
+ self._scope_stack = [dict()]
141
+ self._last_yielded_token = None
142
+ return self.cparser.parse(
143
+ input=text,
144
+ lexer=self.clex,
145
+ debug=debuglevel)
146
+
147
+ ######################-- PRIVATE --######################
148
+
149
+ def _push_scope(self):
150
+ self._scope_stack.append(dict())
151
+
152
+ def _pop_scope(self):
153
+ assert len(self._scope_stack) > 1
154
+ self._scope_stack.pop()
155
+
156
+ def _add_typedef_name(self, name, coord):
157
+ """ Add a new typedef name (ie a TYPEID) to the current scope
158
+ """
159
+ if not self._scope_stack[-1].get(name, True):
160
+ self._parse_error(
161
+ "Typedef %r previously declared as non-typedef "
162
+ "in this scope" % name, coord)
163
+ self._scope_stack[-1][name] = True
164
+
165
+ def _add_identifier(self, name, coord):
166
+ """ Add a new object, function, or enum member name (ie an ID) to the
167
+ current scope
168
+ """
169
+ if self._scope_stack[-1].get(name, False):
170
+ self._parse_error(
171
+ "Non-typedef %r previously declared as typedef "
172
+ "in this scope" % name, coord)
173
+ self._scope_stack[-1][name] = False
174
+
175
+ def _is_type_in_scope(self, name):
176
+ """ Is *name* a typedef-name in the current scope?
177
+ """
178
+ for scope in reversed(self._scope_stack):
179
+ # If name is an identifier in this scope it shadows typedefs in
180
+ # higher scopes.
181
+ in_scope = scope.get(name)
182
+ if in_scope is not None: return in_scope
183
+ return False
184
+
185
+ def _lex_error_func(self, msg, line, column):
186
+ self._parse_error(msg, self._coord(line, column))
187
+
188
+ def _lex_on_lbrace_func(self):
189
+ self._push_scope()
190
+
191
+ def _lex_on_rbrace_func(self):
192
+ self._pop_scope()
193
+
194
+ def _lex_type_lookup_func(self, name):
195
+ """ Looks up types that were previously defined with
196
+ typedef.
197
+ Passed to the lexer for recognizing identifiers that
198
+ are types.
199
+ """
200
+ is_type = self._is_type_in_scope(name)
201
+ return is_type
202
+
203
+ def _get_yacc_lookahead_token(self):
204
+ """ We need access to yacc's lookahead token in certain cases.
205
+ This is the last token yacc requested from the lexer, so we
206
+ ask the lexer.
207
+ """
208
+ return self.clex.last_token
209
+
210
+ # To understand what's going on here, read sections A.8.5 and
211
+ # A.8.6 of K&R2 very carefully.
212
+ #
213
+ # A C type consists of a basic type declaration, with a list
214
+ # of modifiers. For example:
215
+ #
216
+ # int *c[5];
217
+ #
218
+ # The basic declaration here is 'int c', and the pointer and
219
+ # the array are the modifiers.
220
+ #
221
+ # Basic declarations are represented by TypeDecl (from module c_ast) and the
222
+ # modifiers are FuncDecl, PtrDecl and ArrayDecl.
223
+ #
224
+ # The standard states that whenever a new modifier is parsed, it should be
225
+ # added to the end of the list of modifiers. For example:
226
+ #
227
+ # K&R2 A.8.6.2: Array Declarators
228
+ #
229
+ # In a declaration T D where D has the form
230
+ # D1 [constant-expression-opt]
231
+ # and the type of the identifier in the declaration T D1 is
232
+ # "type-modifier T", the type of the
233
+ # identifier of D is "type-modifier array of T"
234
+ #
235
+ # This is what this method does. The declarator it receives
236
+ # can be a list of declarators ending with TypeDecl. It
237
+ # tacks the modifier to the end of this list, just before
238
+ # the TypeDecl.
239
+ #
240
+ # Additionally, the modifier may be a list itself. This is
241
+ # useful for pointers, that can come as a chain from the rule
242
+ # p_pointer. In this case, the whole modifier list is spliced
243
+ # into the new location.
244
+ def _type_modify_decl(self, decl, modifier):
245
+ """ Tacks a type modifier on a declarator, and returns
246
+ the modified declarator.
247
+
248
+ Note: the declarator and modifier may be modified
249
+ """
250
+ #~ print '****'
251
+ #~ decl.show(offset=3)
252
+ #~ modifier.show(offset=3)
253
+ #~ print '****'
254
+
255
+ modifier_head = modifier
256
+ modifier_tail = modifier
257
+
258
+ # The modifier may be a nested list. Reach its tail.
259
+ #
260
+ while modifier_tail.type:
261
+ modifier_tail = modifier_tail.type
262
+
263
+ # If the decl is a basic type, just tack the modifier onto
264
+ # it
265
+ #
266
+ if isinstance(decl, c_ast.TypeDecl):
267
+ modifier_tail.type = decl
268
+ return modifier
269
+ else:
270
+ # Otherwise, the decl is a list of modifiers. Reach
271
+ # its tail and splice the modifier onto the tail,
272
+ # pointing to the underlying basic type.
273
+ #
274
+ decl_tail = decl
275
+
276
+ while not isinstance(decl_tail.type, c_ast.TypeDecl):
277
+ decl_tail = decl_tail.type
278
+
279
+ modifier_tail.type = decl_tail.type
280
+ decl_tail.type = modifier_head
281
+ return decl
282
+
283
+ # Due to the order in which declarators are constructed,
284
+ # they have to be fixed in order to look like a normal AST.
285
+ #
286
+ # When a declaration arrives from syntax construction, it has
287
+ # these problems:
288
+ # * The innermost TypeDecl has no type (because the basic
289
+ # type is only known at the uppermost declaration level)
290
+ # * The declaration has no variable name, since that is saved
291
+ # in the innermost TypeDecl
292
+ # * The typename of the declaration is a list of type
293
+ # specifiers, and not a node. Here, basic identifier types
294
+ # should be separated from more complex types like enums
295
+ # and structs.
296
+ #
297
+ # This method fixes these problems.
298
+ #
299
+ def _fix_decl_name_type(self, decl, typename):
300
+ """ Fixes a declaration. Modifies decl.
301
+ """
302
+ # Reach the underlying basic type
303
+ #
304
+ type = decl
305
+ while not isinstance(type, c_ast.TypeDecl):
306
+ type = type.type
307
+
308
+ decl.name = type.declname
309
+ type.quals = decl.quals
310
+
311
+ # The typename is a list of types. If any type in this
312
+ # list isn't an IdentifierType, it must be the only
313
+ # type in the list (it's illegal to declare "int enum ..")
314
+ # If all the types are basic, they're collected in the
315
+ # IdentifierType holder.
316
+ #
317
+ for tn in typename:
318
+ if not isinstance(tn, c_ast.IdentifierType):
319
+ if len(typename) > 1:
320
+ self._parse_error(
321
+ "Invalid multiple types specified", tn.coord)
322
+ else:
323
+ type.type = tn
324
+ return decl
325
+
326
+ if not typename:
327
+ # Functions default to returning int
328
+ #
329
+ if not isinstance(decl.type, c_ast.FuncDecl):
330
+ self._parse_error(
331
+ "Missing type in declaration", decl.coord)
332
+ type.type = c_ast.IdentifierType(
333
+ ['int'],
334
+ coord=decl.coord)
335
+ else:
336
+ # At this point, we know that typename is a list of IdentifierType
337
+ # nodes. Concatenate all the names into a single list.
338
+ #
339
+ type.type = c_ast.IdentifierType(
340
+ [name for id in typename for name in id.names],
341
+ coord=typename[0].coord)
342
+ return decl
343
+
344
+ def _add_declaration_specifier(self, declspec, newspec, kind):
345
+ """ Declaration specifiers are represented by a dictionary
346
+ with the entries:
347
+ * qual: a list of type qualifiers
348
+ * storage: a list of storage type qualifiers
349
+ * type: a list of type specifiers
350
+ * function: a list of function specifiers
351
+
352
+ This method is given a declaration specifier, and a
353
+ new specifier of a given kind.
354
+ Returns the declaration specifier, with the new
355
+ specifier incorporated.
356
+ """
357
+ spec = declspec or dict(qual=[], storage=[], type=[], function=[])
358
+ spec[kind].insert(0, newspec)
359
+ return spec
360
+
361
+ def _build_declarations(self, spec, decls, typedef_namespace=False):
362
+ """ Builds a list of declarations all sharing the given specifiers.
363
+ If typedef_namespace is true, each declared name is added
364
+ to the "typedef namespace", which also includes objects,
365
+ functions, and enum constants.
366
+ """
367
+ is_typedef = 'typedef' in spec['storage']
368
+ declarations = []
369
+
370
+ # Bit-fields are allowed to be unnamed.
371
+ #
372
+ if decls[0].get('bitsize') is not None:
373
+ pass
374
+
375
+ # When redeclaring typedef names as identifiers in inner scopes, a
376
+ # problem can occur where the identifier gets grouped into
377
+ # spec['type'], leaving decl as None. This can only occur for the
378
+ # first declarator.
379
+ #
380
+ elif decls[0]['decl'] is None:
381
+ if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
382
+ not self._is_type_in_scope(spec['type'][-1].names[0]):
383
+ coord = '?'
384
+ for t in spec['type']:
385
+ if hasattr(t, 'coord'):
386
+ coord = t.coord
387
+ break
388
+ self._parse_error('Invalid declaration', coord)
389
+
390
+ # Make this look as if it came from "direct_declarator:ID"
391
+ decls[0]['decl'] = c_ast.TypeDecl(
392
+ declname=spec['type'][-1].names[0],
393
+ type=None,
394
+ quals=None,
395
+ coord=spec['type'][-1].coord)
396
+ # Remove the "new" type's name from the end of spec['type']
397
+ del spec['type'][-1]
398
+
399
+ # A similar problem can occur where the declaration ends up looking
400
+ # like an abstract declarator. Give it a name if this is the case.
401
+ #
402
+ elif not isinstance(decls[0]['decl'],
403
+ (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
404
+ decls_0_tail = decls[0]['decl']
405
+ while not isinstance(decls_0_tail, c_ast.TypeDecl):
406
+ decls_0_tail = decls_0_tail.type
407
+ if decls_0_tail.declname is None:
408
+ decls_0_tail.declname = spec['type'][-1].names[0]
409
+ del spec['type'][-1]
410
+
411
+ for decl in decls:
412
+ assert decl['decl'] is not None
413
+ if is_typedef:
414
+ declaration = c_ast.Typedef(
415
+ name=None,
416
+ quals=spec['qual'],
417
+ storage=spec['storage'],
418
+ type=decl['decl'],
419
+ coord=decl['decl'].coord)
420
+ else:
421
+ declaration = c_ast.Decl(
422
+ name=None,
423
+ quals=spec['qual'],
424
+ storage=spec['storage'],
425
+ funcspec=spec['function'],
426
+ type=decl['decl'],
427
+ init=decl.get('init'),
428
+ bitsize=decl.get('bitsize'),
429
+ coord=decl['decl'].coord)
430
+
431
+ if isinstance(declaration.type,
432
+ (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
433
+ fixed_decl = declaration
434
+ else:
435
+ fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
436
+
437
+ # Add the type name defined by typedef to a
438
+ # symbol table (for usage in the lexer)
439
+ #
440
+ if typedef_namespace:
441
+ if is_typedef:
442
+ self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
443
+ else:
444
+ self._add_identifier(fixed_decl.name, fixed_decl.coord)
445
+
446
+ declarations.append(fixed_decl)
447
+
448
+ return declarations
449
+
450
+ def _build_function_definition(self, spec, decl, param_decls, body):
451
+ """ Builds a function definition.
452
+ """
453
+ assert 'typedef' not in spec['storage']
454
+
455
+ declaration = self._build_declarations(
456
+ spec=spec,
457
+ decls=[dict(decl=decl, init=None)],
458
+ typedef_namespace=True)[0]
459
+
460
+ return c_ast.FuncDef(
461
+ decl=declaration,
462
+ param_decls=param_decls,
463
+ body=body,
464
+ coord=decl.coord)
465
+
466
+ def _select_struct_union_class(self, token):
467
+ """ Given a token (either STRUCT or UNION), selects the
468
+ appropriate AST class.
469
+ """
470
+ if token == 'struct':
471
+ return c_ast.Struct
472
+ else:
473
+ return c_ast.Union
474
+
475
+ ##
476
+ ## Precedence and associativity of operators
477
+ ##
478
+ precedence = (
479
+ ('left', 'LOR'),
480
+ ('left', 'LAND'),
481
+ ('left', 'OR'),
482
+ ('left', 'XOR'),
483
+ ('left', 'AND'),
484
+ ('left', 'EQ', 'NE'),
485
+ ('left', 'GT', 'GE', 'LT', 'LE'),
486
+ ('left', 'RSHIFT', 'LSHIFT'),
487
+ ('left', 'PLUS', 'MINUS'),
488
+ ('left', 'TIMES', 'DIVIDE', 'MOD')
489
+ )
490
+
491
+ ##
492
+ ## Grammar productions
493
+ ## Implementation of the BNF defined in K&R2 A.13
494
+ ##
495
+
496
+ # Wrapper around a translation unit, to allow for empty input.
497
+ # Not strictly part of the C99 Grammar, but useful in practice.
498
+ #
499
+ def p_translation_unit_or_empty(self, p):
500
+ """ translation_unit_or_empty : translation_unit
501
+ | empty
502
+ """
503
+ if p[1] is None:
504
+ p[0] = c_ast.FileAST([])
505
+ else:
506
+ p[0] = c_ast.FileAST(p[1])
507
+
508
+ def p_translation_unit_1(self, p):
509
+ """ translation_unit : external_declaration
510
+ """
511
+ # Note: external_declaration is already a list
512
+ #
513
+ p[0] = p[1]
514
+
515
+ def p_translation_unit_2(self, p):
516
+ """ translation_unit : translation_unit external_declaration
517
+ """
518
+ if p[2] is not None:
519
+ p[1].extend(p[2])
520
+ p[0] = p[1]
521
+
522
+ # Declarations always come as lists (because they can be
523
+ # several in one line), so we wrap the function definition
524
+ # into a list as well, to make the return value of
525
+ # external_declaration homogenous.
526
+ #
527
+ def p_external_declaration_1(self, p):
528
+ """ external_declaration : function_definition
529
+ """
530
+ p[0] = [p[1]]
531
+
532
+ def p_external_declaration_2(self, p):
533
+ """ external_declaration : declaration
534
+ """
535
+ p[0] = p[1]
536
+
537
+ def p_external_declaration_3(self, p):
538
+ """ external_declaration : pp_directive
539
+ """
540
+ p[0] = p[1]
541
+
542
+ def p_external_declaration_4(self, p):
543
+ """ external_declaration : SEMI
544
+ """
545
+ p[0] = None
546
+
547
+ def p_pp_directive(self, p):
548
+ """ pp_directive : PPHASH
549
+ """
550
+ self._parse_error('Directives not supported yet',
551
+ self._coord(p.lineno(1)))
552
+
553
+ # In function definitions, the declarator can be followed by
554
+ # a declaration list, for old "K&R style" function definitios.
555
+ #
556
+ def p_function_definition_1(self, p):
557
+ """ function_definition : declarator declaration_list_opt compound_statement
558
+ """
559
+ # no declaration specifiers - 'int' becomes the default type
560
+ spec = dict(
561
+ qual=[],
562
+ storage=[],
563
+ type=[c_ast.IdentifierType(['int'],
564
+ coord=self._coord(p.lineno(1)))],
565
+ function=[])
566
+
567
+ p[0] = self._build_function_definition(
568
+ spec=spec,
569
+ decl=p[1],
570
+ param_decls=p[2],
571
+ body=p[3])
572
+
573
+ def p_function_definition_2(self, p):
574
+ """ function_definition : declaration_specifiers declarator declaration_list_opt compound_statement
575
+ """
576
+ spec = p[1]
577
+
578
+ p[0] = self._build_function_definition(
579
+ spec=spec,
580
+ decl=p[2],
581
+ param_decls=p[3],
582
+ body=p[4])
583
+
584
+ def p_statement(self, p):
585
+ """ statement : labeled_statement
586
+ | expression_statement
587
+ | compound_statement
588
+ | selection_statement
589
+ | iteration_statement
590
+ | jump_statement
591
+ """
592
+ p[0] = p[1]
593
+
594
+ # In C, declarations can come several in a line:
595
+ # int x, *px, romulo = 5;
596
+ #
597
+ # However, for the AST, we will split them to separate Decl
598
+ # nodes.
599
+ #
600
+ # This rule splits its declarations and always returns a list
601
+ # of Decl nodes, even if it's one element long.
602
+ #
603
+ def p_decl_body(self, p):
604
+ """ decl_body : declaration_specifiers init_declarator_list_opt
605
+ """
606
+ spec = p[1]
607
+
608
+ # p[2] (init_declarator_list_opt) is either a list or None
609
+ #
610
+ if p[2] is None:
611
+ # By the standard, you must have at least one declarator unless
612
+ # declaring a structure tag, a union tag, or the members of an
613
+ # enumeration.
614
+ #
615
+ ty = spec['type']
616
+ s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
617
+ if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
618
+ decls = [c_ast.Decl(
619
+ name=None,
620
+ quals=spec['qual'],
621
+ storage=spec['storage'],
622
+ funcspec=spec['function'],
623
+ type=ty[0],
624
+ init=None,
625
+ bitsize=None,
626
+ coord=ty[0].coord)]
627
+
628
+ # However, this case can also occur on redeclared identifiers in
629
+ # an inner scope. The trouble is that the redeclared type's name
630
+ # gets grouped into declaration_specifiers; _build_declarations
631
+ # compensates for this.
632
+ #
633
+ else:
634
+ decls = self._build_declarations(
635
+ spec=spec,
636
+ decls=[dict(decl=None, init=None)],
637
+ typedef_namespace=True)
638
+
639
+ else:
640
+ decls = self._build_declarations(
641
+ spec=spec,
642
+ decls=p[2],
643
+ typedef_namespace=True)
644
+
645
+ p[0] = decls
646
+
647
+ # The declaration has been split to a decl_body sub-rule and
648
+ # SEMI, because having them in a single rule created a problem
649
+ # for defining typedefs.
650
+ #
651
+ # If a typedef line was directly followed by a line using the
652
+ # type defined with the typedef, the type would not be
653
+ # recognized. This is because to reduce the declaration rule,
654
+ # the parser's lookahead asked for the token after SEMI, which
655
+ # was the type from the next line, and the lexer had no chance
656
+ # to see the updated type symbol table.
657
+ #
658
+ # Splitting solves this problem, because after seeing SEMI,
659
+ # the parser reduces decl_body, which actually adds the new
660
+ # type into the table to be seen by the lexer before the next
661
+ # line is reached.
662
+ def p_declaration(self, p):
663
+ """ declaration : decl_body SEMI
664
+ """
665
+ p[0] = p[1]
666
+
667
+ # Since each declaration is a list of declarations, this
668
+ # rule will combine all the declarations and return a single
669
+ # list
670
+ #
671
+ def p_declaration_list(self, p):
672
+ """ declaration_list : declaration
673
+ | declaration_list declaration
674
+ """
675
+ p[0] = p[1] if len(p) == 2 else p[1] + p[2]
676
+
677
+ def p_declaration_specifiers_1(self, p):
678
+ """ declaration_specifiers : type_qualifier declaration_specifiers_opt
679
+ """
680
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
681
+
682
+ def p_declaration_specifiers_2(self, p):
683
+ """ declaration_specifiers : type_specifier declaration_specifiers_opt
684
+ """
685
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
686
+
687
+ def p_declaration_specifiers_3(self, p):
688
+ """ declaration_specifiers : storage_class_specifier declaration_specifiers_opt
689
+ """
690
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
691
+
692
+ def p_declaration_specifiers_4(self, p):
693
+ """ declaration_specifiers : function_specifier declaration_specifiers_opt
694
+ """
695
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
696
+
697
+ def p_storage_class_specifier(self, p):
698
+ """ storage_class_specifier : AUTO
699
+ | REGISTER
700
+ | STATIC
701
+ | EXTERN
702
+ | TYPEDEF
703
+ """
704
+ p[0] = p[1]
705
+
706
+ def p_function_specifier(self, p):
707
+ """ function_specifier : INLINE
708
+ """
709
+ p[0] = p[1]
710
+
711
+ def p_type_specifier_1(self, p):
712
+ """ type_specifier : VOID
713
+ | _BOOL
714
+ | CHAR
715
+ | SHORT
716
+ | INT
717
+ | LONG
718
+ | FLOAT
719
+ | DOUBLE
720
+ | _COMPLEX
721
+ | SIGNED
722
+ | UNSIGNED
723
+ """
724
+ p[0] = c_ast.IdentifierType([p[1]], coord=self._coord(p.lineno(1)))
725
+
726
+ def p_type_specifier_2(self, p):
727
+ """ type_specifier : typedef_name
728
+ | enum_specifier
729
+ | struct_or_union_specifier
730
+ """
731
+ p[0] = p[1]
732
+
733
+ def p_type_qualifier(self, p):
734
+ """ type_qualifier : CONST
735
+ | RESTRICT
736
+ | VOLATILE
737
+ """
738
+ p[0] = p[1]
739
+
740
+ def p_init_declarator_list_1(self, p):
741
+ """ init_declarator_list : init_declarator
742
+ | init_declarator_list COMMA init_declarator
743
+ """
744
+ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
745
+
746
+ # If the code is declaring a variable that was declared a typedef in an
747
+ # outer scope, yacc will think the name is part of declaration_specifiers,
748
+ # not init_declarator, and will then get confused by EQUALS. Pass None
749
+ # up in place of declarator, and handle this at a higher level.
750
+ #
751
+ def p_init_declarator_list_2(self, p):
752
+ """ init_declarator_list : EQUALS initializer
753
+ """
754
+ p[0] = [dict(decl=None, init=p[2])]
755
+
756
+ # Similarly, if the code contains duplicate typedefs of, for example,
757
+ # array types, the array portion will appear as an abstract declarator.
758
+ #
759
+ def p_init_declarator_list_3(self, p):
760
+ """ init_declarator_list : abstract_declarator
761
+ """
762
+ p[0] = [dict(decl=p[1], init=None)]
763
+
764
+ # Returns a {decl=<declarator> : init=<initializer>} dictionary
765
+ # If there's no initializer, uses None
766
+ #
767
+ def p_init_declarator(self, p):
768
+ """ init_declarator : declarator
769
+ | declarator EQUALS initializer
770
+ """
771
+ p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
772
+
773
+ def p_specifier_qualifier_list_1(self, p):
774
+ """ specifier_qualifier_list : type_qualifier specifier_qualifier_list_opt
775
+ """
776
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
777
+
778
+ def p_specifier_qualifier_list_2(self, p):
779
+ """ specifier_qualifier_list : type_specifier specifier_qualifier_list_opt
780
+ """
781
+ p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
782
+
783
+ # TYPEID is allowed here (and in other struct/enum related tag names), because
784
+ # struct/enum tags reside in their own namespace and can be named the same as types
785
+ #
786
+ def p_struct_or_union_specifier_1(self, p):
787
+ """ struct_or_union_specifier : struct_or_union ID
788
+ | struct_or_union TYPEID
789
+ """
790
+ klass = self._select_struct_union_class(p[1])
791
+ p[0] = klass(
792
+ name=p[2],
793
+ decls=None,
794
+ coord=self._coord(p.lineno(2)))
795
+
796
+ def p_struct_or_union_specifier_2(self, p):
797
+ """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
798
+ """
799
+ klass = self._select_struct_union_class(p[1])
800
+ p[0] = klass(
801
+ name=None,
802
+ decls=p[3],
803
+ coord=self._coord(p.lineno(2)))
804
+
805
+ def p_struct_or_union_specifier_3(self, p):
806
+ """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close
807
+ | struct_or_union TYPEID brace_open struct_declaration_list brace_close
808
+ """
809
+ klass = self._select_struct_union_class(p[1])
810
+ p[0] = klass(
811
+ name=p[2],
812
+ decls=p[4],
813
+ coord=self._coord(p.lineno(2)))
814
+
815
+ def p_struct_or_union(self, p):
816
+ """ struct_or_union : STRUCT
817
+ | UNION
818
+ """
819
+ p[0] = p[1]
820
+
821
+ # Combine all declarations into a single list
822
+ #
823
+ def p_struct_declaration_list(self, p):
824
+ """ struct_declaration_list : struct_declaration
825
+ | struct_declaration_list struct_declaration
826
+ """
827
+ p[0] = p[1] if len(p) == 2 else p[1] + p[2]
828
+
829
+ def p_struct_declaration_1(self, p):
830
+ """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
831
+ """
832
+ spec = p[1]
833
+ assert 'typedef' not in spec['storage']
834
+
835
+ if p[2] is not None:
836
+ decls = self._build_declarations(
837
+ spec=spec,
838
+ decls=p[2])
839
+
840
+ elif len(spec['type']) == 1:
841
+ # Anonymous struct/union, gcc extension, C1x feature.
842
+ # Although the standard only allows structs/unions here, I see no
843
+ # reason to disallow other types since some compilers have typedefs
844
+ # here, and pycparser isn't about rejecting all invalid code.
845
+ #
846
+ node = spec['type'][0]
847
+ if isinstance(node, c_ast.Node):
848
+ decl_type = node
849
+ else:
850
+ decl_type = c_ast.IdentifierType(node)
851
+
852
+ decls = self._build_declarations(
853
+ spec=spec,
854
+ decls=[dict(decl=decl_type)])
855
+
856
+ else:
857
+ # Structure/union members can have the same names as typedefs.
858
+ # The trouble is that the member's name gets grouped into
859
+ # specifier_qualifier_list; _build_declarations compensates.
860
+ #
861
+ decls = self._build_declarations(
862
+ spec=spec,
863
+ decls=[dict(decl=None, init=None)])
864
+
865
+ p[0] = decls
866
+
867
+ def p_struct_declaration_2(self, p):
868
+ """ struct_declaration : specifier_qualifier_list abstract_declarator SEMI
869
+ """
870
+ # "Abstract declarator?!", you ask? Structure members can have the
871
+ # same names as typedefs. The trouble is that the member's name gets
872
+ # grouped into specifier_qualifier_list, leaving any remainder to
873
+ # appear as an abstract declarator, as in:
874
+ # typedef int Foo;
875
+ # struct { Foo Foo[3]; };
876
+ #
877
+ p[0] = self._build_declarations(
878
+ spec=p[1],
879
+ decls=[dict(decl=p[2], init=None)])
880
+
881
+ def p_struct_declarator_list(self, p):
882
+ """ struct_declarator_list : struct_declarator
883
+ | struct_declarator_list COMMA struct_declarator
884
+ """
885
+ p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
886
+
887
+ # struct_declarator passes up a dict with the keys: decl (for
888
+ # the underlying declarator) and bitsize (for the bitsize)
889
+ #
890
+ def p_struct_declarator_1(self, p):
891
+ """ struct_declarator : declarator
892
+ """
893
+ p[0] = {'decl': p[1], 'bitsize': None}
894
+
895
+ def p_struct_declarator_2(self, p):
896
+ """ struct_declarator : declarator COLON constant_expression
897
+ | COLON constant_expression
898
+ """
899
+ if len(p) > 3:
900
+ p[0] = {'decl': p[1], 'bitsize': p[3]}
901
+ else:
902
+ p[0] = {'decl': c_ast.TypeDecl(None, None, None), 'bitsize': p[2]}
903
+
904
+ def p_enum_specifier_1(self, p):
905
+ """ enum_specifier : ENUM ID
906
+ | ENUM TYPEID
907
+ """
908
+ p[0] = c_ast.Enum(p[2], None, self._coord(p.lineno(1)))
909
+
910
+ def p_enum_specifier_2(self, p):
911
+ """ enum_specifier : ENUM brace_open enumerator_list brace_close
912
+ """
913
+ p[0] = c_ast.Enum(None, p[3], self._coord(p.lineno(1)))
914
+
915
+ def p_enum_specifier_3(self, p):
916
+ """ enum_specifier : ENUM ID brace_open enumerator_list brace_close
917
+ | ENUM TYPEID brace_open enumerator_list brace_close
918
+ """
919
+ p[0] = c_ast.Enum(p[2], p[4], self._coord(p.lineno(1)))
920
+
921
+ def p_enumerator_list(self, p):
922
+ """ enumerator_list : enumerator
923
+ | enumerator_list COMMA
924
+ | enumerator_list COMMA enumerator
925
+ """
926
+ if len(p) == 2:
927
+ p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
928
+ elif len(p) == 3:
929
+ p[0] = p[1]
930
+ else:
931
+ p[1].enumerators.append(p[3])
932
+ p[0] = p[1]
933
+
934
+ def p_enumerator(self, p):
935
+ """ enumerator : ID
936
+ | ID EQUALS constant_expression
937
+ """
938
+ if len(p) == 2:
939
+ enumerator = c_ast.Enumerator(
940
+ p[1], None,
941
+ self._coord(p.lineno(1)))
942
+ else:
943
+ enumerator = c_ast.Enumerator(
944
+ p[1], p[3],
945
+ self._coord(p.lineno(1)))
946
+ self._add_identifier(enumerator.name, enumerator.coord)
947
+
948
+ p[0] = enumerator
949
+
950
+ def p_declarator_1(self, p):
951
+ """ declarator : direct_declarator
952
+ """
953
+ p[0] = p[1]
954
+
955
+ def p_declarator_2(self, p):
956
+ """ declarator : pointer direct_declarator
957
+ """
958
+ p[0] = self._type_modify_decl(p[2], p[1])
959
+
960
+ # Since it's impossible for a type to be specified after a pointer, assume
961
+ # it's intended to be the name for this declaration. _add_identifier will
962
+ # raise an error if this TYPEID can't be redeclared.
963
+ #
964
+ def p_declarator_3(self, p):
965
+ """ declarator : pointer TYPEID
966
+ """
967
+ decl = c_ast.TypeDecl(
968
+ declname=p[2],
969
+ type=None,
970
+ quals=None,
971
+ coord=self._coord(p.lineno(2)))
972
+
973
+ p[0] = self._type_modify_decl(decl, p[1])
974
+
975
+ def p_direct_declarator_1(self, p):
976
+ """ direct_declarator : ID
977
+ """
978
+ p[0] = c_ast.TypeDecl(
979
+ declname=p[1],
980
+ type=None,
981
+ quals=None,
982
+ coord=self._coord(p.lineno(1)))
983
+
984
+ def p_direct_declarator_2(self, p):
985
+ """ direct_declarator : LPAREN declarator RPAREN
986
+ """
987
+ p[0] = p[2]
988
+
989
+ def p_direct_declarator_3(self, p):
990
+ """ direct_declarator : direct_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
991
+ """
992
+ quals = (p[3] if len(p) > 5 else []) or []
993
+ # Accept dimension qualifiers
994
+ # Per C99 6.7.5.3 p7
995
+ arr = c_ast.ArrayDecl(
996
+ type=None,
997
+ dim=p[4] if len(p) > 5 else p[3],
998
+ dim_quals=quals,
999
+ coord=p[1].coord)
1000
+
1001
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1002
+
1003
+ def p_direct_declarator_4(self, p):
1004
+ """ direct_declarator : direct_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
1005
+ | direct_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
1006
+ """
1007
+ # Using slice notation for PLY objects doesn't work in Python 3 for the
1008
+ # version of PLY embedded with pycparser; see PLY Google Code issue 30.
1009
+ # Work around that here by listing the two elements separately.
1010
+ listed_quals = [item if isinstance(item, list) else [item]
1011
+ for item in [p[3],p[4]]]
1012
+ dim_quals = [qual for sublist in listed_quals for qual in sublist
1013
+ if qual is not None]
1014
+ arr = c_ast.ArrayDecl(
1015
+ type=None,
1016
+ dim=p[5],
1017
+ dim_quals=dim_quals,
1018
+ coord=p[1].coord)
1019
+
1020
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1021
+
1022
+ # Special for VLAs
1023
+ #
1024
+ def p_direct_declarator_5(self, p):
1025
+ """ direct_declarator : direct_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
1026
+ """
1027
+ arr = c_ast.ArrayDecl(
1028
+ type=None,
1029
+ dim=c_ast.ID(p[4], self._coord(p.lineno(4))),
1030
+ dim_quals=p[3] if p[3] != None else [],
1031
+ coord=p[1].coord)
1032
+
1033
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1034
+
1035
+ def p_direct_declarator_6(self, p):
1036
+ """ direct_declarator : direct_declarator LPAREN parameter_type_list RPAREN
1037
+ | direct_declarator LPAREN identifier_list_opt RPAREN
1038
+ """
1039
+ func = c_ast.FuncDecl(
1040
+ args=p[3],
1041
+ type=None,
1042
+ coord=p[1].coord)
1043
+
1044
+ # To see why _get_yacc_lookahead_token is needed, consider:
1045
+ # typedef char TT;
1046
+ # void foo(int TT) { TT = 10; }
1047
+ # Outside the function, TT is a typedef, but inside (starting and
1048
+ # ending with the braces) it's a parameter. The trouble begins with
1049
+ # yacc's lookahead token. We don't know if we're declaring or
1050
+ # defining a function until we see LBRACE, but if we wait for yacc to
1051
+ # trigger a rule on that token, then TT will have already been read
1052
+ # and incorrectly interpreted as TYPEID. We need to add the
1053
+ # parameters to the scope the moment the lexer sees LBRACE.
1054
+ #
1055
+ if self._get_yacc_lookahead_token().type == "LBRACE":
1056
+ if func.args is not None:
1057
+ for param in func.args.params:
1058
+ if isinstance(param, c_ast.EllipsisParam): break
1059
+ self._add_identifier(param.name, param.coord)
1060
+
1061
+ p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1062
+
1063
+ def p_pointer(self, p):
1064
+ """ pointer : TIMES type_qualifier_list_opt
1065
+ | TIMES type_qualifier_list_opt pointer
1066
+ """
1067
+ coord = self._coord(p.lineno(1))
1068
+ # Pointer decls nest from inside out. This is important when different
1069
+ # levels have different qualifiers. For example:
1070
+ #
1071
+ # char * const * p;
1072
+ #
1073
+ # Means "pointer to const pointer to char"
1074
+ #
1075
+ # While:
1076
+ #
1077
+ # char ** const p;
1078
+ #
1079
+ # Means "const pointer to pointer to char"
1080
+ #
1081
+ # So when we construct PtrDecl nestings, the leftmost pointer goes in
1082
+ # as the most nested type.
1083
+ nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
1084
+ if len(p) > 3:
1085
+ tail_type = p[3]
1086
+ while tail_type.type is not None:
1087
+ tail_type = tail_type.type
1088
+ tail_type.type = nested_type
1089
+ p[0] = p[3]
1090
+ else:
1091
+ p[0] = nested_type
1092
+
1093
+ def p_type_qualifier_list(self, p):
1094
+ """ type_qualifier_list : type_qualifier
1095
+ | type_qualifier_list type_qualifier
1096
+ """
1097
+ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1098
+
1099
+ def p_parameter_type_list(self, p):
1100
+ """ parameter_type_list : parameter_list
1101
+ | parameter_list COMMA ELLIPSIS
1102
+ """
1103
+ if len(p) > 2:
1104
+ p[1].params.append(c_ast.EllipsisParam(self._coord(p.lineno(3))))
1105
+
1106
+ p[0] = p[1]
1107
+
1108
+ def p_parameter_list(self, p):
1109
+ """ parameter_list : parameter_declaration
1110
+ | parameter_list COMMA parameter_declaration
1111
+ """
1112
+ if len(p) == 2: # single parameter
1113
+ p[0] = c_ast.ParamList([p[1]], p[1].coord)
1114
+ else:
1115
+ p[1].params.append(p[3])
1116
+ p[0] = p[1]
1117
+
1118
+ def p_parameter_declaration_1(self, p):
1119
+ """ parameter_declaration : declaration_specifiers declarator
1120
+ """
1121
+ spec = p[1]
1122
+ if not spec['type']:
1123
+ spec['type'] = [c_ast.IdentifierType(['int'],
1124
+ coord=self._coord(p.lineno(1)))]
1125
+ p[0] = self._build_declarations(
1126
+ spec=spec,
1127
+ decls=[dict(decl=p[2])])[0]
1128
+
1129
+ def p_parameter_declaration_2(self, p):
1130
+ """ parameter_declaration : declaration_specifiers abstract_declarator_opt
1131
+ """
1132
+ spec = p[1]
1133
+ if not spec['type']:
1134
+ spec['type'] = [c_ast.IdentifierType(['int'],
1135
+ coord=self._coord(p.lineno(1)))]
1136
+
1137
+ # Parameters can have the same names as typedefs. The trouble is that
1138
+ # the parameter's name gets grouped into declaration_specifiers, making
1139
+ # it look like an old-style declaration; compensate.
1140
+ #
1141
+ if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
1142
+ self._is_type_in_scope(spec['type'][-1].names[0]):
1143
+ decl = self._build_declarations(
1144
+ spec=spec,
1145
+ decls=[dict(decl=p[2], init=None)])[0]
1146
+
1147
+ # This truly is an old-style parameter declaration
1148
+ #
1149
+ else:
1150
+ decl = c_ast.Typename(
1151
+ name='',
1152
+ quals=spec['qual'],
1153
+ type=p[2] or c_ast.TypeDecl(None, None, None),
1154
+ coord=self._coord(p.lineno(2)))
1155
+ typename = spec['type']
1156
+ decl = self._fix_decl_name_type(decl, typename)
1157
+
1158
+ p[0] = decl
1159
+
1160
+ def p_identifier_list(self, p):
1161
+ """ identifier_list : identifier
1162
+ | identifier_list COMMA identifier
1163
+ """
1164
+ if len(p) == 2: # single parameter
1165
+ p[0] = c_ast.ParamList([p[1]], p[1].coord)
1166
+ else:
1167
+ p[1].params.append(p[3])
1168
+ p[0] = p[1]
1169
+
1170
+ def p_initializer_1(self, p):
1171
+ """ initializer : assignment_expression
1172
+ """
1173
+ p[0] = p[1]
1174
+
1175
+ def p_initializer_2(self, p):
1176
+ """ initializer : brace_open initializer_list_opt brace_close
1177
+ | brace_open initializer_list COMMA brace_close
1178
+ """
1179
+ if p[2] is None:
1180
+ p[0] = c_ast.InitList([], self._coord(p.lineno(1)))
1181
+ else:
1182
+ p[0] = p[2]
1183
+
1184
+ def p_initializer_list(self, p):
1185
+ """ initializer_list : designation_opt initializer
1186
+ | initializer_list COMMA designation_opt initializer
1187
+ """
1188
+ if len(p) == 3: # single initializer
1189
+ init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
1190
+ p[0] = c_ast.InitList([init], p[2].coord)
1191
+ else:
1192
+ init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
1193
+ p[1].exprs.append(init)
1194
+ p[0] = p[1]
1195
+
1196
+ def p_designation(self, p):
1197
+ """ designation : designator_list EQUALS
1198
+ """
1199
+ p[0] = p[1]
1200
+
1201
+ # Designators are represented as a list of nodes, in the order in which
1202
+ # they're written in the code.
1203
+ #
1204
+ def p_designator_list(self, p):
1205
+ """ designator_list : designator
1206
+ | designator_list designator
1207
+ """
1208
+ p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1209
+
1210
+ def p_designator(self, p):
1211
+ """ designator : LBRACKET constant_expression RBRACKET
1212
+ | PERIOD identifier
1213
+ """
1214
+ p[0] = p[2]
1215
+
1216
+ def p_type_name(self, p):
1217
+ """ type_name : specifier_qualifier_list abstract_declarator_opt
1218
+ """
1219
+ #~ print '=========='
1220
+ #~ print p[1]
1221
+ #~ print p[2]
1222
+ #~ print p[2].children()
1223
+ #~ print '=========='
1224
+
1225
+ typename = c_ast.Typename(
1226
+ name='',
1227
+ quals=p[1]['qual'],
1228
+ type=p[2] or c_ast.TypeDecl(None, None, None),
1229
+ coord=self._coord(p.lineno(2)))
1230
+
1231
+ p[0] = self._fix_decl_name_type(typename, p[1]['type'])
1232
+
1233
+ def p_abstract_declarator_1(self, p):
1234
+ """ abstract_declarator : pointer
1235
+ """
1236
+ dummytype = c_ast.TypeDecl(None, None, None)
1237
+ p[0] = self._type_modify_decl(
1238
+ decl=dummytype,
1239
+ modifier=p[1])
1240
+
1241
+ def p_abstract_declarator_2(self, p):
1242
+ """ abstract_declarator : pointer direct_abstract_declarator
1243
+ """
1244
+ p[0] = self._type_modify_decl(p[2], p[1])
1245
+
1246
+ def p_abstract_declarator_3(self, p):
1247
+ """ abstract_declarator : direct_abstract_declarator
1248
+ """
1249
+ p[0] = p[1]
1250
+
1251
+ # Creating and using direct_abstract_declarator_opt here
1252
+ # instead of listing both direct_abstract_declarator and the
1253
+ # lack of it in the beginning of _1 and _2 caused two
1254
+ # shift/reduce errors.
1255
+ #
1256
+ def p_direct_abstract_declarator_1(self, p):
1257
+ """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """
1258
+ p[0] = p[2]
1259
+
1260
+ def p_direct_abstract_declarator_2(self, p):
1261
+ """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
1262
+ """
1263
+ arr = c_ast.ArrayDecl(
1264
+ type=None,
1265
+ dim=p[3],
1266
+ dim_quals=[],
1267
+ coord=p[1].coord)
1268
+
1269
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1270
+
1271
+ def p_direct_abstract_declarator_3(self, p):
1272
+ """ direct_abstract_declarator : LBRACKET assignment_expression_opt RBRACKET
1273
+ """
1274
+ p[0] = c_ast.ArrayDecl(
1275
+ type=c_ast.TypeDecl(None, None, None),
1276
+ dim=p[2],
1277
+ dim_quals=[],
1278
+ coord=self._coord(p.lineno(1)))
1279
+
1280
+ def p_direct_abstract_declarator_4(self, p):
1281
+ """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET
1282
+ """
1283
+ arr = c_ast.ArrayDecl(
1284
+ type=None,
1285
+ dim=c_ast.ID(p[3], self._coord(p.lineno(3))),
1286
+ dim_quals=[],
1287
+ coord=p[1].coord)
1288
+
1289
+ p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1290
+
1291
+ def p_direct_abstract_declarator_5(self, p):
1292
+ """ direct_abstract_declarator : LBRACKET TIMES RBRACKET
1293
+ """
1294
+ p[0] = c_ast.ArrayDecl(
1295
+ type=c_ast.TypeDecl(None, None, None),
1296
+ dim=c_ast.ID(p[3], self._coord(p.lineno(3))),
1297
+ dim_quals=[],
1298
+ coord=self._coord(p.lineno(1)))
1299
+
1300
+ def p_direct_abstract_declarator_6(self, p):
1301
+ """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
1302
+ """
1303
+ func = c_ast.FuncDecl(
1304
+ args=p[3],
1305
+ type=None,
1306
+ coord=p[1].coord)
1307
+
1308
+ p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1309
+
1310
+ def p_direct_abstract_declarator_7(self, p):
1311
+ """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN
1312
+ """
1313
+ p[0] = c_ast.FuncDecl(
1314
+ args=p[2],
1315
+ type=c_ast.TypeDecl(None, None, None),
1316
+ coord=self._coord(p.lineno(1)))
1317
+
1318
+ # declaration is a list, statement isn't. To make it consistent, block_item
1319
+ # will always be a list
1320
+ #
1321
+ def p_block_item(self, p):
1322
+ """ block_item : declaration
1323
+ | statement
1324
+ """
1325
+ p[0] = p[1] if isinstance(p[1], list) else [p[1]]
1326
+
1327
+ # Since we made block_item a list, this just combines lists
1328
+ #
1329
+ def p_block_item_list(self, p):
1330
+ """ block_item_list : block_item
1331
+ | block_item_list block_item
1332
+ """
1333
+ # Empty block items (plain ';') produce [None], so ignore them
1334
+ p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
1335
+
1336
+ def p_compound_statement_1(self, p):
1337
+ """ compound_statement : brace_open block_item_list_opt brace_close """
1338
+ p[0] = c_ast.Compound(
1339
+ block_items=p[2],
1340
+ coord=self._coord(p.lineno(1)))
1341
+
1342
+ def p_labeled_statement_1(self, p):
1343
+ """ labeled_statement : ID COLON statement """
1344
+ p[0] = c_ast.Label(p[1], p[3], self._coord(p.lineno(1)))
1345
+
1346
+ def p_labeled_statement_2(self, p):
1347
+ """ labeled_statement : CASE constant_expression COLON statement """
1348
+ p[0] = c_ast.Case(p[2], [p[4]], self._coord(p.lineno(1)))
1349
+
1350
+ def p_labeled_statement_3(self, p):
1351
+ """ labeled_statement : DEFAULT COLON statement """
1352
+ p[0] = c_ast.Default([p[3]], self._coord(p.lineno(1)))
1353
+
1354
+ def p_selection_statement_1(self, p):
1355
+ """ selection_statement : IF LPAREN expression RPAREN statement """
1356
+ p[0] = c_ast.If(p[3], p[5], None, self._coord(p.lineno(1)))
1357
+
1358
+ def p_selection_statement_2(self, p):
1359
+ """ selection_statement : IF LPAREN expression RPAREN statement ELSE statement """
1360
+ p[0] = c_ast.If(p[3], p[5], p[7], self._coord(p.lineno(1)))
1361
+
1362
+ def p_selection_statement_3(self, p):
1363
+ """ selection_statement : SWITCH LPAREN expression RPAREN statement """
1364
+ p[0] = fix_switch_cases(
1365
+ c_ast.Switch(p[3], p[5], self._coord(p.lineno(1))))
1366
+
1367
+ def p_iteration_statement_1(self, p):
1368
+ """ iteration_statement : WHILE LPAREN expression RPAREN statement """
1369
+ p[0] = c_ast.While(p[3], p[5], self._coord(p.lineno(1)))
1370
+
1371
+ def p_iteration_statement_2(self, p):
1372
+ """ iteration_statement : DO statement WHILE LPAREN expression RPAREN SEMI """
1373
+ p[0] = c_ast.DoWhile(p[5], p[2], self._coord(p.lineno(1)))
1374
+
1375
+ def p_iteration_statement_3(self, p):
1376
+ """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement """
1377
+ p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._coord(p.lineno(1)))
1378
+
1379
+ def p_iteration_statement_4(self, p):
1380
+ """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN statement """
1381
+ p[0] = c_ast.For(c_ast.DeclList(p[3], self._coord(p.lineno(1))),
1382
+ p[4], p[6], p[8], self._coord(p.lineno(1)))
1383
+
1384
+ def p_jump_statement_1(self, p):
1385
+ """ jump_statement : GOTO ID SEMI """
1386
+ p[0] = c_ast.Goto(p[2], self._coord(p.lineno(1)))
1387
+
1388
+ def p_jump_statement_2(self, p):
1389
+ """ jump_statement : BREAK SEMI """
1390
+ p[0] = c_ast.Break(self._coord(p.lineno(1)))
1391
+
1392
+ def p_jump_statement_3(self, p):
1393
+ """ jump_statement : CONTINUE SEMI """
1394
+ p[0] = c_ast.Continue(self._coord(p.lineno(1)))
1395
+
1396
+ def p_jump_statement_4(self, p):
1397
+ """ jump_statement : RETURN expression SEMI
1398
+ | RETURN SEMI
1399
+ """
1400
+ p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._coord(p.lineno(1)))
1401
+
1402
+ def p_expression_statement(self, p):
1403
+ """ expression_statement : expression_opt SEMI """
1404
+ if p[1] is None:
1405
+ p[0] = c_ast.EmptyStatement(self._coord(p.lineno(1)))
1406
+ else:
1407
+ p[0] = p[1]
1408
+
1409
+ def p_expression(self, p):
1410
+ """ expression : assignment_expression
1411
+ | expression COMMA assignment_expression
1412
+ """
1413
+ if len(p) == 2:
1414
+ p[0] = p[1]
1415
+ else:
1416
+ if not isinstance(p[1], c_ast.ExprList):
1417
+ p[1] = c_ast.ExprList([p[1]], p[1].coord)
1418
+
1419
+ p[1].exprs.append(p[3])
1420
+ p[0] = p[1]
1421
+
1422
+ def p_typedef_name(self, p):
1423
+ """ typedef_name : TYPEID """
1424
+ p[0] = c_ast.IdentifierType([p[1]], coord=self._coord(p.lineno(1)))
1425
+
1426
+ def p_assignment_expression(self, p):
1427
+ """ assignment_expression : conditional_expression
1428
+ | unary_expression assignment_operator assignment_expression
1429
+ """
1430
+ if len(p) == 2:
1431
+ p[0] = p[1]
1432
+ else:
1433
+ p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
1434
+
1435
+ # K&R2 defines these as many separate rules, to encode
1436
+ # precedence and associativity. Why work hard ? I'll just use
1437
+ # the built in precedence/associativity specification feature
1438
+ # of PLY. (see precedence declaration above)
1439
+ #
1440
+ def p_assignment_operator(self, p):
1441
+ """ assignment_operator : EQUALS
1442
+ | XOREQUAL
1443
+ | TIMESEQUAL
1444
+ | DIVEQUAL
1445
+ | MODEQUAL
1446
+ | PLUSEQUAL
1447
+ | MINUSEQUAL
1448
+ | LSHIFTEQUAL
1449
+ | RSHIFTEQUAL
1450
+ | ANDEQUAL
1451
+ | OREQUAL
1452
+ """
1453
+ p[0] = p[1]
1454
+
1455
+ def p_constant_expression(self, p):
1456
+ """ constant_expression : conditional_expression """
1457
+ p[0] = p[1]
1458
+
1459
+ def p_conditional_expression(self, p):
1460
+ """ conditional_expression : binary_expression
1461
+ | binary_expression CONDOP expression COLON conditional_expression
1462
+ """
1463
+ if len(p) == 2:
1464
+ p[0] = p[1]
1465
+ else:
1466
+ p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
1467
+
1468
+ def p_binary_expression(self, p):
1469
+ """ binary_expression : cast_expression
1470
+ | binary_expression TIMES binary_expression
1471
+ | binary_expression DIVIDE binary_expression
1472
+ | binary_expression MOD binary_expression
1473
+ | binary_expression PLUS binary_expression
1474
+ | binary_expression MINUS binary_expression
1475
+ | binary_expression RSHIFT binary_expression
1476
+ | binary_expression LSHIFT binary_expression
1477
+ | binary_expression LT binary_expression
1478
+ | binary_expression LE binary_expression
1479
+ | binary_expression GE binary_expression
1480
+ | binary_expression GT binary_expression
1481
+ | binary_expression EQ binary_expression
1482
+ | binary_expression NE binary_expression
1483
+ | binary_expression AND binary_expression
1484
+ | binary_expression OR binary_expression
1485
+ | binary_expression XOR binary_expression
1486
+ | binary_expression LAND binary_expression
1487
+ | binary_expression LOR binary_expression
1488
+ """
1489
+ if len(p) == 2:
1490
+ p[0] = p[1]
1491
+ else:
1492
+ p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
1493
+
1494
+ def p_cast_expression_1(self, p):
1495
+ """ cast_expression : unary_expression """
1496
+ p[0] = p[1]
1497
+
1498
+ def p_cast_expression_2(self, p):
1499
+ """ cast_expression : LPAREN type_name RPAREN cast_expression """
1500
+ p[0] = c_ast.Cast(p[2], p[4], self._coord(p.lineno(1)))
1501
+
1502
+ def p_unary_expression_1(self, p):
1503
+ """ unary_expression : postfix_expression """
1504
+ p[0] = p[1]
1505
+
1506
+ def p_unary_expression_2(self, p):
1507
+ """ unary_expression : PLUSPLUS unary_expression
1508
+ | MINUSMINUS unary_expression
1509
+ | unary_operator cast_expression
1510
+ """
1511
+ p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
1512
+
1513
+ def p_unary_expression_3(self, p):
1514
+ """ unary_expression : SIZEOF unary_expression
1515
+ | SIZEOF LPAREN type_name RPAREN
1516
+ """
1517
+ p[0] = c_ast.UnaryOp(
1518
+ p[1],
1519
+ p[2] if len(p) == 3 else p[3],
1520
+ self._coord(p.lineno(1)))
1521
+
1522
+ def p_unary_operator(self, p):
1523
+ """ unary_operator : AND
1524
+ | TIMES
1525
+ | PLUS
1526
+ | MINUS
1527
+ | NOT
1528
+ | LNOT
1529
+ """
1530
+ p[0] = p[1]
1531
+
1532
+ def p_postfix_expression_1(self, p):
1533
+ """ postfix_expression : primary_expression """
1534
+ p[0] = p[1]
1535
+
1536
+ def p_postfix_expression_2(self, p):
1537
+ """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """
1538
+ p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1539
+
1540
+ def p_postfix_expression_3(self, p):
1541
+ """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN
1542
+ | postfix_expression LPAREN RPAREN
1543
+ """
1544
+ p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
1545
+
1546
+ def p_postfix_expression_4(self, p):
1547
+ """ postfix_expression : postfix_expression PERIOD ID
1548
+ | postfix_expression PERIOD TYPEID
1549
+ | postfix_expression ARROW ID
1550
+ | postfix_expression ARROW TYPEID
1551
+ """
1552
+ field = c_ast.ID(p[3], self._coord(p.lineno(3)))
1553
+ p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
1554
+
1555
+ def p_postfix_expression_5(self, p):
1556
+ """ postfix_expression : postfix_expression PLUSPLUS
1557
+ | postfix_expression MINUSMINUS
1558
+ """
1559
+ p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
1560
+
1561
+ def p_postfix_expression_6(self, p):
1562
+ """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close
1563
+ | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
1564
+ """
1565
+ p[0] = c_ast.CompoundLiteral(p[2], p[5])
1566
+
1567
+ def p_primary_expression_1(self, p):
1568
+ """ primary_expression : identifier """
1569
+ p[0] = p[1]
1570
+
1571
+ def p_primary_expression_2(self, p):
1572
+ """ primary_expression : constant """
1573
+ p[0] = p[1]
1574
+
1575
+ def p_primary_expression_3(self, p):
1576
+ """ primary_expression : unified_string_literal
1577
+ | unified_wstring_literal
1578
+ """
1579
+ p[0] = p[1]
1580
+
1581
+ def p_primary_expression_4(self, p):
1582
+ """ primary_expression : LPAREN expression RPAREN """
1583
+ p[0] = p[2]
1584
+
1585
+ def p_primary_expression_5(self, p):
1586
+ """ primary_expression : OFFSETOF LPAREN type_name COMMA identifier RPAREN
1587
+ """
1588
+ coord = self._coord(p.lineno(1))
1589
+ p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
1590
+ c_ast.ExprList([p[3], p[5]], coord),
1591
+ coord)
1592
+
1593
+ def p_argument_expression_list(self, p):
1594
+ """ argument_expression_list : assignment_expression
1595
+ | argument_expression_list COMMA assignment_expression
1596
+ """
1597
+ if len(p) == 2: # single expr
1598
+ p[0] = c_ast.ExprList([p[1]], p[1].coord)
1599
+ else:
1600
+ p[1].exprs.append(p[3])
1601
+ p[0] = p[1]
1602
+
1603
+ def p_identifier(self, p):
1604
+ """ identifier : ID """
1605
+ p[0] = c_ast.ID(p[1], self._coord(p.lineno(1)))
1606
+
1607
+ def p_constant_1(self, p):
1608
+ """ constant : INT_CONST_DEC
1609
+ | INT_CONST_OCT
1610
+ | INT_CONST_HEX
1611
+ | INT_CONST_BIN
1612
+ """
1613
+ p[0] = c_ast.Constant(
1614
+ 'int', p[1], self._coord(p.lineno(1)))
1615
+
1616
+ def p_constant_2(self, p):
1617
+ """ constant : FLOAT_CONST
1618
+ | HEX_FLOAT_CONST
1619
+ """
1620
+ p[0] = c_ast.Constant(
1621
+ 'float', p[1], self._coord(p.lineno(1)))
1622
+
1623
+ def p_constant_3(self, p):
1624
+ """ constant : CHAR_CONST
1625
+ | WCHAR_CONST
1626
+ """
1627
+ p[0] = c_ast.Constant(
1628
+ 'char', p[1], self._coord(p.lineno(1)))
1629
+
1630
+ # The "unified" string and wstring literal rules are for supporting
1631
+ # concatenation of adjacent string literals.
1632
+ # I.e. "hello " "world" is seen by the C compiler as a single string literal
1633
+ # with the value "hello world"
1634
+ #
1635
+ def p_unified_string_literal(self, p):
1636
+ """ unified_string_literal : STRING_LITERAL
1637
+ | unified_string_literal STRING_LITERAL
1638
+ """
1639
+ if len(p) == 2: # single literal
1640
+ p[0] = c_ast.Constant(
1641
+ 'string', p[1], self._coord(p.lineno(1)))
1642
+ else:
1643
+ p[1].value = p[1].value[:-1] + p[2][1:]
1644
+ p[0] = p[1]
1645
+
1646
+ def p_unified_wstring_literal(self, p):
1647
+ """ unified_wstring_literal : WSTRING_LITERAL
1648
+ | unified_wstring_literal WSTRING_LITERAL
1649
+ """
1650
+ if len(p) == 2: # single literal
1651
+ p[0] = c_ast.Constant(
1652
+ 'string', p[1], self._coord(p.lineno(1)))
1653
+ else:
1654
+ p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
1655
+ p[0] = p[1]
1656
+
1657
+ def p_brace_open(self, p):
1658
+ """ brace_open : LBRACE
1659
+ """
1660
+ p[0] = p[1]
1661
+
1662
+ def p_brace_close(self, p):
1663
+ """ brace_close : RBRACE
1664
+ """
1665
+ p[0] = p[1]
1666
+
1667
+ def p_empty(self, p):
1668
+ 'empty : '
1669
+ p[0] = None
1670
+
1671
+ def p_error(self, p):
1672
+ # If error recovery is added here in the future, make sure
1673
+ # _get_yacc_lookahead_token still works!
1674
+ #
1675
+ if p:
1676
+ self._parse_error(
1677
+ 'before: %s' % p.value,
1678
+ self._coord(lineno=p.lineno,
1679
+ column=self.clex.find_tok_column(p)))
1680
+ else:
1681
+ self._parse_error('At end of input', '')
1682
+
1683
+
1684
+ #------------------------------------------------------------------------------
1685
+ if __name__ == "__main__":
1686
+ pass
1687
+
1688
+ #t1 = time.time()
1689
+ #parser = CParser(lex_optimize=True, yacc_debug=True, yacc_optimize=False)
1690
+ #sys.write(time.time() - t1)
1691
+
1692
+ #buf = '''
1693
+ #int (*k)(int);
1694
+ #'''
1695
+
1696
+ ## set debuglevel to 2 for debugging
1697
+ #t = parser.parse(buf, 'x.c', debuglevel=0)
1698
+ #t.show(showcoord=True)
1699
+
1700
+