antlr3 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/History.txt +35 -0
  2. data/Manifest.txt +73 -0
  3. data/README.txt +6 -13
  4. data/java/RubyTarget.java +43 -19
  5. data/java/antlr-full-3.2.1.jar +0 -0
  6. data/lib/antlr3/debug.rb +2 -0
  7. data/lib/antlr3/debug/event-hub.rb +55 -55
  8. data/lib/antlr3/debug/record-event-listener.rb +2 -2
  9. data/lib/antlr3/debug/rule-tracer.rb +14 -14
  10. data/lib/antlr3/debug/socket.rb +47 -47
  11. data/lib/antlr3/debug/trace-event-listener.rb +8 -8
  12. data/lib/antlr3/main.rb +29 -9
  13. data/lib/antlr3/modes/ast-builder.rb +7 -7
  14. data/lib/antlr3/modes/filter.rb +19 -17
  15. data/lib/antlr3/profile.rb +34 -6
  16. data/lib/antlr3/recognizers.rb +50 -1
  17. data/lib/antlr3/streams.rb +19 -15
  18. data/lib/antlr3/streams/rewrite.rb +241 -229
  19. data/lib/antlr3/template/group-file-lexer.rb +6 -8
  20. data/lib/antlr3/template/group-file-parser.rb +16 -16
  21. data/lib/antlr3/template/group-file.rb +1 -1
  22. data/lib/antlr3/test/call-stack.rb +13 -13
  23. data/lib/antlr3/test/core-extensions.rb +69 -69
  24. data/lib/antlr3/test/functional.rb +0 -4
  25. data/lib/antlr3/test/grammar.rb +70 -70
  26. data/lib/antlr3/token.rb +41 -17
  27. data/lib/antlr3/tree.rb +11 -14
  28. data/lib/antlr3/tree/debug.rb +53 -53
  29. data/lib/antlr3/tree/visitor.rb +11 -11
  30. data/lib/antlr3/tree/wizard.rb +35 -35
  31. data/lib/antlr3/util.rb +18 -0
  32. data/lib/antlr3/version.rb +1 -1
  33. data/rakefile +1 -0
  34. data/samples/ANTLRv3Grammar.g +3 -3
  35. data/samples/JavaScript.g +702 -0
  36. data/samples/standard/C/C.g +543 -0
  37. data/samples/standard/C/C.tokens +175 -0
  38. data/samples/standard/C/C__testrig.st +0 -0
  39. data/samples/standard/C/c.rb +12 -0
  40. data/samples/standard/C/input +3479 -0
  41. data/samples/standard/C/output +171 -0
  42. data/samples/standard/LL-star/LLStar.g +101 -0
  43. data/samples/standard/LL-star/input +12 -0
  44. data/samples/standard/LL-star/ll-star.rb +12 -0
  45. data/samples/standard/LL-star/output +2 -0
  46. data/samples/standard/calc/Calculator.g +47 -0
  47. data/samples/standard/calc/Calculator.py +16 -0
  48. data/samples/standard/calc/Calculator.rb +28 -0
  49. data/samples/standard/cminus/CMinus.g +141 -0
  50. data/samples/standard/cminus/bytecode.group +80 -0
  51. data/samples/standard/cminus/cminus.rb +16 -0
  52. data/samples/standard/cminus/input +9 -0
  53. data/samples/standard/cminus/java.group +91 -0
  54. data/samples/standard/cminus/output +11 -0
  55. data/samples/standard/cminus/python.group +48 -0
  56. data/samples/standard/dynamic-scope/DynamicScopes.g +50 -0
  57. data/samples/standard/dynamic-scope/dynamic-scopes.rb +12 -0
  58. data/samples/standard/dynamic-scope/input +7 -0
  59. data/samples/standard/dynamic-scope/output +4 -0
  60. data/samples/standard/fuzzy/FuzzyJava.g +89 -0
  61. data/samples/standard/fuzzy/fuzzy.py +11 -0
  62. data/samples/standard/fuzzy/fuzzy.rb +9 -0
  63. data/samples/standard/fuzzy/input +13 -0
  64. data/samples/standard/fuzzy/output +12 -0
  65. data/samples/standard/hoisted-predicates/HoistedPredicates.g +40 -0
  66. data/samples/standard/hoisted-predicates/hoisted-predicates.rb +13 -0
  67. data/samples/standard/hoisted-predicates/input +1 -0
  68. data/samples/standard/hoisted-predicates/output +1 -0
  69. data/samples/standard/island-grammar/Javadoc.g +46 -0
  70. data/samples/standard/island-grammar/Simple.g +104 -0
  71. data/samples/standard/island-grammar/input +11 -0
  72. data/samples/standard/island-grammar/island.rb +12 -0
  73. data/samples/standard/island-grammar/output +16 -0
  74. data/samples/standard/java/Java.g +827 -0
  75. data/samples/standard/java/input +80 -0
  76. data/samples/standard/java/java.rb +13 -0
  77. data/samples/standard/java/output +1 -0
  78. data/samples/standard/python/Python.g +718 -0
  79. data/samples/standard/python/PythonTokenSource.rb +107 -0
  80. data/samples/standard/python/input +210 -0
  81. data/samples/standard/python/output +24 -0
  82. data/samples/standard/python/python.rb +14 -0
  83. data/samples/standard/rakefile +18 -0
  84. data/samples/standard/scopes/SymbolTable.g +66 -0
  85. data/samples/standard/scopes/input +12 -0
  86. data/samples/standard/scopes/output +3 -0
  87. data/samples/standard/scopes/scopes.rb +12 -0
  88. data/samples/standard/simplecTreeParser/SimpleC.g +113 -0
  89. data/samples/standard/simplecTreeParser/SimpleCWalker.g +64 -0
  90. data/samples/standard/simplecTreeParser/input +12 -0
  91. data/samples/standard/simplecTreeParser/output +1 -0
  92. data/samples/standard/simplecTreeParser/simplec.rb +18 -0
  93. data/samples/standard/treeparser/Lang.g +24 -0
  94. data/samples/standard/treeparser/LangDumpDecl.g +17 -0
  95. data/samples/standard/treeparser/input +1 -0
  96. data/samples/standard/treeparser/output +2 -0
  97. data/samples/standard/treeparser/treeparser.rb +18 -0
  98. data/samples/standard/tweak/Tweak.g +68 -0
  99. data/samples/standard/tweak/input +9 -0
  100. data/samples/standard/tweak/output +16 -0
  101. data/samples/standard/tweak/tweak.rb +13 -0
  102. data/samples/standard/xml/README +16 -0
  103. data/samples/standard/xml/XML.g +123 -0
  104. data/samples/standard/xml/input +21 -0
  105. data/samples/standard/xml/output +39 -0
  106. data/samples/standard/xml/xml.rb +9 -0
  107. data/templates/Ruby.stg +4 -4
  108. data/test/functional/ast-output/auto-ast.rb +0 -5
  109. data/test/functional/ast-output/rewrites.rb +4 -4
  110. data/test/unit/test-scope.rb +45 -0
  111. metadata +96 -8
@@ -0,0 +1,543 @@
1
+ /** ANSI C ANTLR v3 grammar
2
+
3
+ Translated from Jutta Degener's 1995 ANSI C yacc grammar by Terence Parr
4
+ July 2006. The lexical rules were taken from the Java grammar.
5
+
6
+ Jutta says: "In 1985, Jeff Lee published his Yacc grammar (which
7
+ is accompanied by a matching Lex specification) for the April 30, 1985 draft
8
+ version of the ANSI C standard. Tom Stockfisch reposted it to net.sources in
9
+ 1987; that original, as mentioned in the answer to question 17.25 of the
10
+ comp.lang.c FAQ, can be ftp'ed from ftp.uu.net,
11
+ file usenet/net.sources/ansi.c.grammar.Z.
12
+ I intend to keep this version as close to the current C Standard grammar as
13
+ possible; please let me know if you discover discrepancies. Jutta Degener, 1995"
14
+
15
+ Generally speaking, you need symbol table info to parse C; typedefs
16
+ define types and then IDENTIFIERS are either types or plain IDs. I'm doing
17
+ the min necessary here tracking only type names. This is a good example
18
+ of the global scope (called Symbols). Every rule that declares its usage
19
+ of Symbols pushes a new copy on the stack effectively creating a new
20
+ symbol scope. Also note rule declaration declares a rule scope that
21
+ lets any invoked rule see isTypedef boolean. It's much easier than
22
+ passing that info down as parameters. Very clean. Rule
23
+ direct_declarator can then easily determine whether the IDENTIFIER
24
+ should be declared as a type name.
25
+
26
+ I have only tested this on a single file, though it is 3500 lines.
27
+
28
+ This grammar requires ANTLR v3 (3.0b3 or higher)
29
+
30
+ Terence Parr
31
+ July 2006
32
+ */
33
+ grammar C;
34
+ options {
35
+ language = Ruby;
36
+ backtrack = true;
37
+ memoize = true;
38
+ k = 2;
39
+ }
40
+
41
+ scope Symbols {
42
+ types;
43
+ }
44
+
45
+ @members {
46
+ def type_name?( name )
47
+ @Symbols_stack.reverse_each do | scope |
48
+ scope.types.member?( name ) and return( true )
49
+ end
50
+ return( false )
51
+ end
52
+ }
53
+
54
+ translation_unit
55
+ scope Symbols; // entire file is a scope
56
+ @init {
57
+ $Symbols::types = Set.new
58
+ }
59
+ : external_declaration+
60
+ ;
61
+
62
+ /** Either a function definition or any other kind of C decl/def.
63
+ * The LL(*) analysis algorithm fails to deal with this due to
64
+ * recursion in the declarator rules. I'm putting in a
65
+ * manual predicate here so that we don't backtrack over
66
+ * the entire function. Further, you get a better error
67
+ * as errors within the function itself don't make it fail
68
+ * to predict that it's a function. Weird errors previously.
69
+ * Remember: the goal is to avoid backtrack like the plague
70
+ * because it makes debugging, actions, and errors harder.
71
+ *
72
+ * Note that k=1 results in a much smaller predictor for the
73
+ * fixed lookahead; k=2 made a few extra thousand lines. ;)
74
+ * I'll have to optimize that in the future.
75
+ */
76
+ external_declaration
77
+ options {k=1;}
78
+ : ( declaration_specifiers? declarator declaration* '{' )=> function_definition
79
+ | declaration
80
+ ;
81
+
82
+ function_definition
83
+ scope Symbols; // put parameters and locals into same scope for now
84
+ @init {
85
+ $Symbols::types = Set.new
86
+ }
87
+ : declaration_specifiers? declarator
88
+ ( declaration+ compound_statement // K&R style
89
+ | compound_statement // ANSI style
90
+ )
91
+ ;
92
+
93
+ declaration
94
+ scope {
95
+ type_def;
96
+ }
97
+ @init { $declaration::type_def = false }
98
+ : 'typedef' declaration_specifiers? { $declaration::type_def = true }
99
+ init_declarator_list ';' // special case, looking for typedef
100
+ | declaration_specifiers init_declarator_list? ';'
101
+ ;
102
+
103
+ declaration_specifiers
104
+ : ( storage_class_specifier
105
+ | type_specifier
106
+ | type_qualifier
107
+ )+
108
+ ;
109
+
110
+ init_declarator_list
111
+ : init_declarator (',' init_declarator)*
112
+ ;
113
+
114
+ init_declarator
115
+ : declarator ('=' initializer)?
116
+ ;
117
+
118
+ storage_class_specifier
119
+ : 'extern'
120
+ | 'static'
121
+ | 'auto'
122
+ | 'register'
123
+ ;
124
+
125
+ type_specifier
126
+ : 'void'
127
+ | 'char'
128
+ | 'short'
129
+ | 'int'
130
+ | 'long'
131
+ | 'float'
132
+ | 'double'
133
+ | 'signed'
134
+ | 'unsigned'
135
+ | struct_or_union_specifier
136
+ | enum_specifier
137
+ | type_id
138
+ ;
139
+
140
+ type_id
141
+ : { type_name?( @input.look.text ) }? IDENTIFIER
142
+ ;
143
+
144
+ struct_or_union_specifier
145
+ options {k=3;}
146
+ scope Symbols;
147
+ @init {
148
+ $Symbols::types = Set.new
149
+ }
150
+ : struct_or_union IDENTIFIER? '{' struct_declaration_list '}'
151
+ | struct_or_union IDENTIFIER
152
+ ;
153
+
154
+ struct_or_union
155
+ : 'struct'
156
+ | 'union'
157
+ ;
158
+
159
+ struct_declaration_list
160
+ : struct_declaration+
161
+ ;
162
+
163
+ struct_declaration
164
+ : specifier_qualifier_list struct_declarator_list ';'
165
+ ;
166
+
167
+ specifier_qualifier_list
168
+ : ( type_qualifier | type_specifier )+
169
+ ;
170
+
171
+ struct_declarator_list
172
+ : struct_declarator (',' struct_declarator)*
173
+ ;
174
+
175
+ struct_declarator
176
+ : declarator (':' constant_expression)?
177
+ | ':' constant_expression
178
+ ;
179
+
180
+ enum_specifier
181
+ options {k=3;}
182
+ : 'enum' '{' enumerator_list '}'
183
+ | 'enum' IDENTIFIER '{' enumerator_list '}'
184
+ | 'enum' IDENTIFIER
185
+ ;
186
+
187
+ enumerator_list
188
+ : enumerator (',' enumerator)*
189
+ ;
190
+
191
+ enumerator
192
+ : IDENTIFIER ('=' constant_expression)?
193
+ ;
194
+
195
+ type_qualifier
196
+ : 'const'
197
+ | 'volatile'
198
+ ;
199
+
200
+ declarator
201
+ : pointer? direct_declarator
202
+ | pointer
203
+ ;
204
+
205
+ direct_declarator
206
+ : ( IDENTIFIER
207
+ {
208
+ if $declaration.length > 0 and $declaration::type_def
209
+ $Symbols::types.add?( $IDENTIFIER.text ) and
210
+ puts( "define type " << $IDENTIFIER.text )
211
+ end
212
+ }
213
+ | '(' declarator ')'
214
+ )
215
+ declarator_suffix*
216
+ ;
217
+
218
+ declarator_suffix
219
+ : '[' constant_expression ']'
220
+ | '[' ']'
221
+ | '(' parameter_type_list ')'
222
+ | '(' identifier_list ')'
223
+ | '(' ')'
224
+ ;
225
+
226
+ pointer
227
+ : '*' type_qualifier+ pointer?
228
+ | '*' pointer
229
+ | '*'
230
+ ;
231
+
232
+ parameter_type_list
233
+ : parameter_list (',' '...')?
234
+ ;
235
+
236
+ parameter_list
237
+ : parameter_declaration (',' parameter_declaration)*
238
+ ;
239
+
240
+ parameter_declaration
241
+ : declaration_specifiers (declarator|abstract_declarator)*
242
+ ;
243
+
244
+ identifier_list
245
+ : IDENTIFIER (',' IDENTIFIER)*
246
+ ;
247
+
248
+ type_name
249
+ : specifier_qualifier_list abstract_declarator?
250
+ ;
251
+
252
+ abstract_declarator
253
+ : pointer direct_abstract_declarator?
254
+ | direct_abstract_declarator
255
+ ;
256
+
257
+ direct_abstract_declarator
258
+ : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix*
259
+ ;
260
+
261
+ abstract_declarator_suffix
262
+ : '[' ']'
263
+ | '[' constant_expression ']'
264
+ | '(' ')'
265
+ | '(' parameter_type_list ')'
266
+ ;
267
+
268
+ initializer
269
+ : assignment_expression
270
+ | '{' initializer_list ','? '}'
271
+ ;
272
+
273
+ initializer_list
274
+ : initializer (',' initializer)*
275
+ ;
276
+
277
+ // E x p r e s s i o n s
278
+
279
+ argument_expression_list
280
+ : assignment_expression (',' assignment_expression)*
281
+ ;
282
+
283
+ additive_expression
284
+ : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)*
285
+ ;
286
+
287
+ multiplicative_expression
288
+ : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)*
289
+ ;
290
+
291
+ cast_expression
292
+ : '(' type_name ')' cast_expression
293
+ | unary_expression
294
+ ;
295
+
296
+ unary_expression
297
+ : postfix_expression
298
+ | '++' unary_expression
299
+ | '--' unary_expression
300
+ | unary_operator cast_expression
301
+ | 'sizeof' unary_expression
302
+ | 'sizeof' '(' type_name ')'
303
+ ;
304
+
305
+ postfix_expression
306
+ : primary_expression
307
+ ( '[' expression ']'
308
+ | '(' ')'
309
+ | '(' argument_expression_list ')'
310
+ | '.' IDENTIFIER
311
+ | '*' IDENTIFIER
312
+ | '->' IDENTIFIER
313
+ | '++'
314
+ | '--'
315
+ )*
316
+ ;
317
+
318
+ unary_operator
319
+ : '&'
320
+ | '*'
321
+ | '+'
322
+ | '-'
323
+ | '~'
324
+ | '!'
325
+ ;
326
+
327
+ primary_expression
328
+ : IDENTIFIER
329
+ | constant
330
+ | '(' expression ')'
331
+ ;
332
+
333
+ constant
334
+ : HEX_LITERAL
335
+ | OCTAL_LITERAL
336
+ | DECIMAL_LITERAL
337
+ | CHARACTER_LITERAL
338
+ | STRING_LITERAL
339
+ | FLOATING_POINT_LITERAL
340
+ ;
341
+
342
+ /////
343
+
344
+ expression
345
+ : assignment_expression (',' assignment_expression)*
346
+ ;
347
+
348
+ constant_expression
349
+ : conditional_expression
350
+ ;
351
+
352
+ assignment_expression
353
+ : lvalue assignment_operator assignment_expression
354
+ | conditional_expression
355
+ ;
356
+
357
+ lvalue
358
+ : unary_expression
359
+ ;
360
+
361
+ assignment_operator
362
+ : '='
363
+ | '*='
364
+ | '/='
365
+ | '%='
366
+ | '+='
367
+ | '-='
368
+ | '<<='
369
+ | '>>='
370
+ | '&='
371
+ | '^='
372
+ | '|='
373
+ ;
374
+
375
+ conditional_expression
376
+ : logical_or_expression ('?' expression ':' conditional_expression)?
377
+ ;
378
+
379
+ logical_or_expression
380
+ : logical_and_expression ('||' logical_and_expression)*
381
+ ;
382
+
383
+ logical_and_expression
384
+ : inclusive_or_expression ('&&' inclusive_or_expression)*
385
+ ;
386
+
387
+ inclusive_or_expression
388
+ : exclusive_or_expression ('|' exclusive_or_expression)*
389
+ ;
390
+
391
+ exclusive_or_expression
392
+ : and_expression ('^' and_expression)*
393
+ ;
394
+
395
+ and_expression
396
+ : equality_expression ('&' equality_expression)*
397
+ ;
398
+ equality_expression
399
+ : relational_expression (('=='|'!=') relational_expression)*
400
+ ;
401
+
402
+ relational_expression
403
+ : shift_expression (('<'|'>'|'<='|'>=') shift_expression)*
404
+ ;
405
+
406
+ shift_expression
407
+ : additive_expression (('<<'|'>>') additive_expression)*
408
+ ;
409
+
410
+ // S t a t e m e n t s
411
+
412
+ statement
413
+ : labeled_statement
414
+ | compound_statement
415
+ | expression_statement
416
+ | selection_statement
417
+ | iteration_statement
418
+ | jump_statement
419
+ ;
420
+
421
+ labeled_statement
422
+ : IDENTIFIER ':' statement
423
+ | 'case' constant_expression ':' statement
424
+ | 'default' ':' statement
425
+ ;
426
+
427
+ compound_statement
428
+ scope Symbols; // blocks have a scope of symbols
429
+ @init {
430
+ $Symbols::types = Set.new
431
+ }
432
+ : '{' declaration* statement_list? '}'
433
+ ;
434
+
435
+ statement_list
436
+ : statement+
437
+ ;
438
+
439
+ expression_statement
440
+ : ';'
441
+ | expression ';'
442
+ ;
443
+
444
+ selection_statement
445
+ : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)?
446
+ | 'switch' '(' expression ')' statement
447
+ ;
448
+
449
+ iteration_statement
450
+ : 'while' '(' expression ')' statement
451
+ | 'do' statement 'while' '(' expression ')' ';'
452
+ | 'for' '(' expression_statement expression_statement expression? ')' statement
453
+ ;
454
+
455
+ jump_statement
456
+ : 'goto' IDENTIFIER ';'
457
+ | 'continue' ';'
458
+ | 'break' ';'
459
+ | 'return' ';'
460
+ | 'return' expression ';'
461
+ ;
462
+
463
+ IDENTIFIER
464
+ : LETTER (LETTER|'0'..'9')*
465
+ ;
466
+
467
+ fragment
468
+ LETTER
469
+ : '$'
470
+ | 'A'..'Z'
471
+ | 'a'..'z'
472
+ | '_'
473
+ ;
474
+
475
+ CHARACTER_LITERAL
476
+ : '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
477
+ ;
478
+
479
+ STRING_LITERAL
480
+ : '"' ( EscapeSequence | ~('\\'|'"') )* '"'
481
+ ;
482
+
483
+ HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
484
+
485
+ DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
486
+
487
+ OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
488
+
489
+ fragment
490
+ HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
491
+
492
+ fragment
493
+ IntegerTypeSuffix
494
+ : ('u'|'U')? ('l'|'L')
495
+ | ('u'|'U') ('l'|'L')?
496
+ ;
497
+
498
+ FLOATING_POINT_LITERAL
499
+ : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
500
+ | '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
501
+ | ('0'..'9')+ Exponent FloatTypeSuffix?
502
+ | ('0'..'9')+ Exponent? FloatTypeSuffix
503
+ ;
504
+
505
+ fragment
506
+ Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
507
+
508
+ fragment
509
+ FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
510
+
511
+ fragment
512
+ EscapeSequence
513
+ : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
514
+ | OctalEscape
515
+ ;
516
+
517
+ fragment
518
+ OctalEscape
519
+ : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
520
+ | '\\' ('0'..'7') ('0'..'7')
521
+ | '\\' ('0'..'7')
522
+ ;
523
+
524
+ fragment
525
+ UnicodeEscape
526
+ : '\\' 'u' HexDigit HexDigit HexDigit HexDigit
527
+ ;
528
+
529
+ WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
530
+ ;
531
+
532
+ COMMENT
533
+ : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
534
+ ;
535
+
536
+ LINE_COMMENT
537
+ : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
538
+ ;
539
+
540
+ // ignore #line info for now
541
+ LINE_COMMAND
542
+ : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
543
+ ;