antlr3 1.8.0 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/History.txt +35 -0
  2. data/Manifest.txt +73 -0
  3. data/README.txt +6 -13
  4. data/java/RubyTarget.java +43 -19
  5. data/java/antlr-full-3.2.1.jar +0 -0
  6. data/lib/antlr3/debug.rb +2 -0
  7. data/lib/antlr3/debug/event-hub.rb +55 -55
  8. data/lib/antlr3/debug/record-event-listener.rb +2 -2
  9. data/lib/antlr3/debug/rule-tracer.rb +14 -14
  10. data/lib/antlr3/debug/socket.rb +47 -47
  11. data/lib/antlr3/debug/trace-event-listener.rb +8 -8
  12. data/lib/antlr3/main.rb +29 -9
  13. data/lib/antlr3/modes/ast-builder.rb +7 -7
  14. data/lib/antlr3/modes/filter.rb +19 -17
  15. data/lib/antlr3/profile.rb +34 -6
  16. data/lib/antlr3/recognizers.rb +50 -1
  17. data/lib/antlr3/streams.rb +19 -15
  18. data/lib/antlr3/streams/rewrite.rb +241 -229
  19. data/lib/antlr3/template/group-file-lexer.rb +6 -8
  20. data/lib/antlr3/template/group-file-parser.rb +16 -16
  21. data/lib/antlr3/template/group-file.rb +1 -1
  22. data/lib/antlr3/test/call-stack.rb +13 -13
  23. data/lib/antlr3/test/core-extensions.rb +69 -69
  24. data/lib/antlr3/test/functional.rb +0 -4
  25. data/lib/antlr3/test/grammar.rb +70 -70
  26. data/lib/antlr3/token.rb +41 -17
  27. data/lib/antlr3/tree.rb +11 -14
  28. data/lib/antlr3/tree/debug.rb +53 -53
  29. data/lib/antlr3/tree/visitor.rb +11 -11
  30. data/lib/antlr3/tree/wizard.rb +35 -35
  31. data/lib/antlr3/util.rb +18 -0
  32. data/lib/antlr3/version.rb +1 -1
  33. data/rakefile +1 -0
  34. data/samples/ANTLRv3Grammar.g +3 -3
  35. data/samples/JavaScript.g +702 -0
  36. data/samples/standard/C/C.g +543 -0
  37. data/samples/standard/C/C.tokens +175 -0
  38. data/samples/standard/C/C__testrig.st +0 -0
  39. data/samples/standard/C/c.rb +12 -0
  40. data/samples/standard/C/input +3479 -0
  41. data/samples/standard/C/output +171 -0
  42. data/samples/standard/LL-star/LLStar.g +101 -0
  43. data/samples/standard/LL-star/input +12 -0
  44. data/samples/standard/LL-star/ll-star.rb +12 -0
  45. data/samples/standard/LL-star/output +2 -0
  46. data/samples/standard/calc/Calculator.g +47 -0
  47. data/samples/standard/calc/Calculator.py +16 -0
  48. data/samples/standard/calc/Calculator.rb +28 -0
  49. data/samples/standard/cminus/CMinus.g +141 -0
  50. data/samples/standard/cminus/bytecode.group +80 -0
  51. data/samples/standard/cminus/cminus.rb +16 -0
  52. data/samples/standard/cminus/input +9 -0
  53. data/samples/standard/cminus/java.group +91 -0
  54. data/samples/standard/cminus/output +11 -0
  55. data/samples/standard/cminus/python.group +48 -0
  56. data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
  57. data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
  58. data/samples/standard/dynamic-scope/input +7 -0
  59. data/samples/standard/dynamic-scope/output +4 -0
  60. data/samples/standard/fuzzy/FuzzyJava.g +89 -0
  61. data/samples/standard/fuzzy/fuzzy.py +11 -0
  62. data/samples/standard/fuzzy/fuzzy.rb +9 -0
  63. data/samples/standard/fuzzy/input +13 -0
  64. data/samples/standard/fuzzy/output +12 -0
  65. data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
  66. data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
  67. data/samples/standard/hoisted-predicates/input +1 -0
  68. data/samples/standard/hoisted-predicates/output +1 -0
  69. data/samples/standard/island-grammar/Javadoc.g +46 -0
  70. data/samples/standard/island-grammar/Simple.g +104 -0
  71. data/samples/standard/island-grammar/input +11 -0
  72. data/samples/standard/island-grammar/island.rb +12 -0
  73. data/samples/standard/island-grammar/output +16 -0
  74. data/samples/standard/java/Java.g +827 -0
  75. data/samples/standard/java/input +80 -0
  76. data/samples/standard/java/java.rb +13 -0
  77. data/samples/standard/java/output +1 -0
  78. data/samples/standard/python/Python.g +718 -0
  79. data/samples/standard/python/PythonTokenSource.rb +107 -0
  80. data/samples/standard/python/input +210 -0
  81. data/samples/standard/python/output +24 -0
  82. data/samples/standard/python/python.rb +14 -0
  83. data/samples/standard/rakefile +18 -0
  84. data/samples/standard/scopes/SymbolTable.g +66 -0
  85. data/samples/standard/scopes/input +12 -0
  86. data/samples/standard/scopes/output +3 -0
  87. data/samples/standard/scopes/scopes.rb +12 -0
  88. data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
  89. data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
  90. data/samples/standard/simplecTreeParser/input +12 -0
  91. data/samples/standard/simplecTreeParser/output +1 -0
  92. data/samples/standard/simplecTreeParser/simplec.rb +18 -0
  93. data/samples/standard/treeparser/Lang.g +24 -0
  94. data/samples/standard/treeparser/LangDumpDecl.g +17 -0
  95. data/samples/standard/treeparser/input +1 -0
  96. data/samples/standard/treeparser/output +2 -0
  97. data/samples/standard/treeparser/treeparser.rb +18 -0
  98. data/samples/standard/tweak/Tweak.g +68 -0
  99. data/samples/standard/tweak/input +9 -0
  100. data/samples/standard/tweak/output +16 -0
  101. data/samples/standard/tweak/tweak.rb +13 -0
  102. data/samples/standard/xml/README +16 -0
  103. data/samples/standard/xml/XML.g +123 -0
  104. data/samples/standard/xml/input +21 -0
  105. data/samples/standard/xml/output +39 -0
  106. data/samples/standard/xml/xml.rb +9 -0
  107. data/templates/Ruby.stg +4 -4
  108. data/test/functional/ast-output/auto-ast.rb +0 -5
  109. data/test/functional/ast-output/rewrites.rb +4 -4
  110. data/test/unit/test-scope.rb +45 -0
  111. metadata +96 -8
@@ -0,0 +1,543 @@
1
+ /** ANSI C ANTLR v3 grammar
2
+
3
+ Translated from Jutta Degener's 1995 ANSI C yacc grammar by Terence Parr
4
+ July 2006. The lexical rules were taken from the Java grammar.
5
+
6
+ Jutta says: "In 1985, Jeff Lee published his Yacc grammar (which
7
+ is accompanied by a matching Lex specification) for the April 30, 1985 draft
8
+ version of the ANSI C standard. Tom Stockfisch reposted it to net.sources in
9
+ 1987; that original, as mentioned in the answer to question 17.25 of the
10
+ comp.lang.c FAQ, can be ftp'ed from ftp.uu.net,
11
+ file usenet/net.sources/ansi.c.grammar.Z.
12
+ I intend to keep this version as close to the current C Standard grammar as
13
+ possible; please let me know if you discover discrepancies. Jutta Degener, 1995"
14
+
15
+ Generally speaking, you need symbol table info to parse C; typedefs
16
+ define types and then IDENTIFIERS are either types or plain IDs. I'm doing
17
+ the min necessary here tracking only type names. This is a good example
18
+ of the global scope (called Symbols). Every rule that declares its usage
19
+ of Symbols pushes a new copy on the stack effectively creating a new
20
+ symbol scope. Also note rule declaration declares a rule scope that
21
+ lets any invoked rule see isTypedef boolean. It's much easier than
22
+ passing that info down as parameters. Very clean. Rule
23
+ direct_declarator can then easily determine whether the IDENTIFIER
24
+ should be declared as a type name.
25
+
26
+ I have only tested this on a single file, though it is 3500 lines.
27
+
28
+ This grammar requires ANTLR v3 (3.0b3 or higher)
29
+
30
+ Terence Parr
31
+ July 2006
32
+ */
33
+ grammar C;
34
+ options {
35
+ language = Ruby;
36
+ backtrack = true;
37
+ memoize = true;
38
+ k = 2;
39
+ }
40
+
41
+ scope Symbols {
42
+ types;
43
+ }
44
+
45
+ @members {
46
+ def type_name?( name )
47
+ @Symbols_stack.reverse_each do | scope |
48
+ scope.types.member?( name ) and return( true )
49
+ end
50
+ return( false )
51
+ end
52
+ }
53
+
54
+ translation_unit
55
+ scope Symbols; // entire file is a scope
56
+ @init {
57
+ $Symbols::types = Set.new
58
+ }
59
+ : external_declaration+
60
+ ;
61
+
62
+ /** Either a function definition or any other kind of C decl/def.
63
+ * The LL(*) analysis algorithm fails to deal with this due to
64
+ * recursion in the declarator rules. I'm putting in a
65
+ * manual predicate here so that we don't backtrack over
66
+ * the entire function. Further, you get a better error
67
+ * as errors within the function itself don't make it fail
68
+ * to predict that it's a function. Weird errors previously.
69
+ * Remember: the goal is to avoid backtrack like the plague
70
+ * because it makes debugging, actions, and errors harder.
71
+ *
72
+ * Note that k=1 results in a much smaller predictor for the
73
+ * fixed lookahead; k=2 made a few extra thousand lines. ;)
74
+ * I'll have to optimize that in the future.
75
+ */
76
+ external_declaration
77
+ options {k=1;}
78
+ : ( declaration_specifiers? declarator declaration* '{' )=> function_definition
79
+ | declaration
80
+ ;
81
+
82
+ function_definition
83
+ scope Symbols; // put parameters and locals into same scope for now
84
+ @init {
85
+ $Symbols::types = Set.new
86
+ }
87
+ : declaration_specifiers? declarator
88
+ ( declaration+ compound_statement // K&R style
89
+ | compound_statement // ANSI style
90
+ )
91
+ ;
92
+
93
+ declaration
94
+ scope {
95
+ type_def;
96
+ }
97
+ @init { $declaration::type_def = false }
98
+ : 'typedef' declaration_specifiers? { $declaration::type_def = true }
99
+ init_declarator_list ';' // special case, looking for typedef
100
+ | declaration_specifiers init_declarator_list? ';'
101
+ ;
102
+
103
+ declaration_specifiers
104
+ : ( storage_class_specifier
105
+ | type_specifier
106
+ | type_qualifier
107
+ )+
108
+ ;
109
+
110
+ init_declarator_list
111
+ : init_declarator (',' init_declarator)*
112
+ ;
113
+
114
+ init_declarator
115
+ : declarator ('=' initializer)?
116
+ ;
117
+
118
+ storage_class_specifier
119
+ : 'extern'
120
+ | 'static'
121
+ | 'auto'
122
+ | 'register'
123
+ ;
124
+
125
+ type_specifier
126
+ : 'void'
127
+ | 'char'
128
+ | 'short'
129
+ | 'int'
130
+ | 'long'
131
+ | 'float'
132
+ | 'double'
133
+ | 'signed'
134
+ | 'unsigned'
135
+ | struct_or_union_specifier
136
+ | enum_specifier
137
+ | type_id
138
+ ;
139
+
140
+ type_id
141
+ : { type_name?( @input.look.text ) }? IDENTIFIER
142
+ ;
143
+
144
+ struct_or_union_specifier
145
+ options {k=3;}
146
+ scope Symbols;
147
+ @init {
148
+ $Symbols::types = Set.new
149
+ }
150
+ : struct_or_union IDENTIFIER? '{' struct_declaration_list '}'
151
+ | struct_or_union IDENTIFIER
152
+ ;
153
+
154
+ struct_or_union
155
+ : 'struct'
156
+ | 'union'
157
+ ;
158
+
159
+ struct_declaration_list
160
+ : struct_declaration+
161
+ ;
162
+
163
+ struct_declaration
164
+ : specifier_qualifier_list struct_declarator_list ';'
165
+ ;
166
+
167
+ specifier_qualifier_list
168
+ : ( type_qualifier | type_specifier )+
169
+ ;
170
+
171
+ struct_declarator_list
172
+ : struct_declarator (',' struct_declarator)*
173
+ ;
174
+
175
+ struct_declarator
176
+ : declarator (':' constant_expression)?
177
+ | ':' constant_expression
178
+ ;
179
+
180
+ enum_specifier
181
+ options {k=3;}
182
+ : 'enum' '{' enumerator_list '}'
183
+ | 'enum' IDENTIFIER '{' enumerator_list '}'
184
+ | 'enum' IDENTIFIER
185
+ ;
186
+
187
+ enumerator_list
188
+ : enumerator (',' enumerator)*
189
+ ;
190
+
191
+ enumerator
192
+ : IDENTIFIER ('=' constant_expression)?
193
+ ;
194
+
195
+ type_qualifier
196
+ : 'const'
197
+ | 'volatile'
198
+ ;
199
+
200
+ declarator
201
+ : pointer? direct_declarator
202
+ | pointer
203
+ ;
204
+
205
+ direct_declarator
206
+ : ( IDENTIFIER
207
+ {
208
+ if $declaration.length > 0 and $declaration::type_def
209
+ $Symbols::types.add?( $IDENTIFIER.text ) and
210
+ puts( "define type " << $IDENTIFIER.text )
211
+ end
212
+ }
213
+ | '(' declarator ')'
214
+ )
215
+ declarator_suffix*
216
+ ;
217
+
218
+ declarator_suffix
219
+ : '[' constant_expression ']'
220
+ | '[' ']'
221
+ | '(' parameter_type_list ')'
222
+ | '(' identifier_list ')'
223
+ | '(' ')'
224
+ ;
225
+
226
+ pointer
227
+ : '*' type_qualifier+ pointer?
228
+ | '*' pointer
229
+ | '*'
230
+ ;
231
+
232
+ parameter_type_list
233
+ : parameter_list (',' '...')?
234
+ ;
235
+
236
+ parameter_list
237
+ : parameter_declaration (',' parameter_declaration)*
238
+ ;
239
+
240
+ parameter_declaration
241
+ : declaration_specifiers (declarator|abstract_declarator)*
242
+ ;
243
+
244
+ identifier_list
245
+ : IDENTIFIER (',' IDENTIFIER)*
246
+ ;
247
+
248
+ type_name
249
+ : specifier_qualifier_list abstract_declarator?
250
+ ;
251
+
252
+ abstract_declarator
253
+ : pointer direct_abstract_declarator?
254
+ | direct_abstract_declarator
255
+ ;
256
+
257
+ direct_abstract_declarator
258
+ : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix*
259
+ ;
260
+
261
+ abstract_declarator_suffix
262
+ : '[' ']'
263
+ | '[' constant_expression ']'
264
+ | '(' ')'
265
+ | '(' parameter_type_list ')'
266
+ ;
267
+
268
+ initializer
269
+ : assignment_expression
270
+ | '{' initializer_list ','? '}'
271
+ ;
272
+
273
+ initializer_list
274
+ : initializer (',' initializer)*
275
+ ;
276
+
277
+ // E x p r e s s i o n s
278
+
279
+ argument_expression_list
280
+ : assignment_expression (',' assignment_expression)*
281
+ ;
282
+
283
+ additive_expression
284
+ : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)*
285
+ ;
286
+
287
+ multiplicative_expression
288
+ : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
289
+ ;
290
+
291
+ cast_expression
292
+ : '(' type_name ')' cast_expression
293
+ | unary_expression
294
+ ;
295
+
296
+ unary_expression
297
+ : postfix_expression
298
+ | '++' unary_expression
299
+ | '--' unary_expression
300
+ | unary_operator cast_expression
301
+ | 'sizeof' unary_expression
302
+ | 'sizeof' '(' type_name ')'
303
+ ;
304
+
305
+ postfix_expression
306
+ : primary_expression
307
+ ( '[' expression ']'
308
+ | '(' ')'
309
+ | '(' argument_expression_list ')'
310
+ | '.' IDENTIFIER
311
+ | '*' IDENTIFIER
312
+ | '->' IDENTIFIER
313
+ | '++'
314
+ | '--'
315
+ )*
316
+ ;
317
+
318
+ unary_operator
319
+ : '&'
320
+ | '*'
321
+ | '+'
322
+ | '-'
323
+ | '~'
324
+ | '!'
325
+ ;
326
+
327
+ primary_expression
328
+ : IDENTIFIER
329
+ | constant
330
+ | '(' expression ')'
331
+ ;
332
+
333
+ constant
334
+ : HEX_LITERAL
335
+ | OCTAL_LITERAL
336
+ | DECIMAL_LITERAL
337
+ | CHARACTER_LITERAL
338
+ | STRING_LITERAL
339
+ | FLOATING_POINT_LITERAL
340
+ ;
341
+
342
+ /////
343
+
344
+ expression
345
+ : assignment_expression (',' assignment_expression)*
346
+ ;
347
+
348
+ constant_expression
349
+ : conditional_expression
350
+ ;
351
+
352
+ assignment_expression
353
+ : lvalue assignment_operator assignment_expression
354
+ | conditional_expression
355
+ ;
356
+
357
+ lvalue
358
+ : unary_expression
359
+ ;
360
+
361
+ assignment_operator
362
+ : '='
363
+ | '*='
364
+ | '/='
365
+ | '%='
366
+ | '+='
367
+ | '-='
368
+ | '<<='
369
+ | '>>='
370
+ | '&='
371
+ | '^='
372
+ | '|='
373
+ ;
374
+
375
+ conditional_expression
376
+ : logical_or_expression ('?' expression ':' conditional_expression)?
377
+ ;
378
+
379
+ logical_or_expression
380
+ : logical_and_expression ('||' logical_and_expression)*
381
+ ;
382
+
383
+ logical_and_expression
384
+ : inclusive_or_expression ('&&' inclusive_or_expression)*
385
+ ;
386
+
387
+ inclusive_or_expression
388
+ : exclusive_or_expression ('|' exclusive_or_expression)*
389
+ ;
390
+
391
+ exclusive_or_expression
392
+ : and_expression ('^' and_expression)*
393
+ ;
394
+
395
+ and_expression
396
+ : equality_expression ('&' equality_expression)*
397
+ ;
398
+ equality_expression
399
+ : relational_expression (('=='|'!=') relational_expression)*
400
+ ;
401
+
402
+ relational_expression
403
+ : shift_expression (('<'|'>'|'<='|'>=') shift_expression)*
404
+ ;
405
+
406
+ shift_expression
407
+ : additive_expression (('<<'|'>>') additive_expression)*
408
+ ;
409
+
410
+ // S t a t e m e n t s
411
+
412
+ statement
413
+ : labeled_statement
414
+ | compound_statement
415
+ | expression_statement
416
+ | selection_statement
417
+ | iteration_statement
418
+ | jump_statement
419
+ ;
420
+
421
+ labeled_statement
422
+ : IDENTIFIER ':' statement
423
+ | 'case' constant_expression ':' statement
424
+ | 'default' ':' statement
425
+ ;
426
+
427
+ compound_statement
428
+ scope Symbols; // blocks have a scope of symbols
429
+ @init {
430
+ $Symbols::types = Set.new
431
+ }
432
+ : '{' declaration* statement_list? '}'
433
+ ;
434
+
435
+ statement_list
436
+ : statement+
437
+ ;
438
+
439
+ expression_statement
440
+ : ';'
441
+ | expression ';'
442
+ ;
443
+
444
+ selection_statement
445
+ : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)?
446
+ | 'switch' '(' expression ')' statement
447
+ ;
448
+
449
+ iteration_statement
450
+ : 'while' '(' expression ')' statement
451
+ | 'do' statement 'while' '(' expression ')' ';'
452
+ | 'for' '(' expression_statement expression_statement expression? ')' statement
453
+ ;
454
+
455
+ jump_statement
456
+ : 'goto' IDENTIFIER ';'
457
+ | 'continue' ';'
458
+ | 'break' ';'
459
+ | 'return' ';'
460
+ | 'return' expression ';'
461
+ ;
462
+
463
+ IDENTIFIER
464
+ : LETTER (LETTER|'0'..'9')*
465
+ ;
466
+
467
+ fragment
468
+ LETTER
469
+ : '$'
470
+ | 'A'..'Z'
471
+ | 'a'..'z'
472
+ | '_'
473
+ ;
474
+
475
+ CHARACTER_LITERAL
476
+ : '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
477
+ ;
478
+
479
+ STRING_LITERAL
480
+ : '"' ( EscapeSequence | ~('\\'|'"') )* '"'
481
+ ;
482
+
483
+ HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
484
+
485
+ DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
486
+
487
+ OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
488
+
489
+ fragment
490
+ HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
491
+
492
+ fragment
493
+ IntegerTypeSuffix
494
+ : ('u'|'U')? ('l'|'L')
495
+ | ('u'|'U') ('l'|'L')?
496
+ ;
497
+
498
+ FLOATING_POINT_LITERAL
499
+ : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
500
+ | '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
501
+ | ('0'..'9')+ Exponent FloatTypeSuffix?
502
+ | ('0'..'9')+ Exponent? FloatTypeSuffix
503
+ ;
504
+
505
+ fragment
506
+ Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
507
+
508
+ fragment
509
+ FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
510
+
511
+ fragment
512
+ EscapeSequence
513
+ : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
514
+ | OctalEscape
515
+ ;
516
+
517
+ fragment
518
+ OctalEscape
519
+ : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
520
+ | '\\' ('0'..'7') ('0'..'7')
521
+ | '\\' ('0'..'7')
522
+ ;
523
+
524
+ fragment
525
+ UnicodeEscape
526
+ : '\\' 'u' HexDigit HexDigit HexDigit HexDigit
527
+ ;
528
+
529
+ WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
530
+ ;
531
+
532
+ COMMENT
533
+ : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
534
+ ;
535
+
536
+ LINE_COMMENT
537
+ : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
538
+ ;
539
+
540
+ // ignore #line info for now
541
+ LINE_COMMAND
542
+ : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
543
+ ;