antlr3 1.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/ANTLR-LICENSE.txt +26 -0
  2. data/History.txt +66 -0
  3. data/README.txt +139 -0
  4. data/bin/antlr4ruby +33 -0
  5. data/java/RubyTarget.java +524 -0
  6. data/java/antlr-full-3.2.1.jar +0 -0
  7. data/lib/antlr3.rb +176 -0
  8. data/lib/antlr3/constants.rb +88 -0
  9. data/lib/antlr3/debug.rb +701 -0
  10. data/lib/antlr3/debug/event-hub.rb +210 -0
  11. data/lib/antlr3/debug/record-event-listener.rb +25 -0
  12. data/lib/antlr3/debug/rule-tracer.rb +55 -0
  13. data/lib/antlr3/debug/socket.rb +360 -0
  14. data/lib/antlr3/debug/trace-event-listener.rb +92 -0
  15. data/lib/antlr3/dfa.rb +247 -0
  16. data/lib/antlr3/dot.rb +174 -0
  17. data/lib/antlr3/error.rb +657 -0
  18. data/lib/antlr3/main.rb +561 -0
  19. data/lib/antlr3/modes/ast-builder.rb +41 -0
  20. data/lib/antlr3/modes/filter.rb +56 -0
  21. data/lib/antlr3/profile.rb +322 -0
  22. data/lib/antlr3/recognizers.rb +1280 -0
  23. data/lib/antlr3/streams.rb +985 -0
  24. data/lib/antlr3/streams/interactive.rb +91 -0
  25. data/lib/antlr3/streams/rewrite.rb +412 -0
  26. data/lib/antlr3/test/call-stack.rb +57 -0
  27. data/lib/antlr3/test/config.rb +23 -0
  28. data/lib/antlr3/test/core-extensions.rb +269 -0
  29. data/lib/antlr3/test/diff.rb +165 -0
  30. data/lib/antlr3/test/functional.rb +207 -0
  31. data/lib/antlr3/test/grammar.rb +371 -0
  32. data/lib/antlr3/token.rb +592 -0
  33. data/lib/antlr3/tree.rb +1415 -0
  34. data/lib/antlr3/tree/debug.rb +163 -0
  35. data/lib/antlr3/tree/visitor.rb +84 -0
  36. data/lib/antlr3/tree/wizard.rb +481 -0
  37. data/lib/antlr3/util.rb +149 -0
  38. data/lib/antlr3/version.rb +27 -0
  39. data/samples/ANTLRv3Grammar.g +621 -0
  40. data/samples/Cpp.g +749 -0
  41. data/templates/AST.stg +335 -0
  42. data/templates/ASTDbg.stg +40 -0
  43. data/templates/ASTParser.stg +153 -0
  44. data/templates/ASTTreeParser.stg +272 -0
  45. data/templates/Dbg.stg +192 -0
  46. data/templates/Ruby.stg +1514 -0
  47. data/test/functional/ast-output/auto-ast.rb +797 -0
  48. data/test/functional/ast-output/construction.rb +555 -0
  49. data/test/functional/ast-output/hetero-nodes.rb +753 -0
  50. data/test/functional/ast-output/rewrites.rb +1327 -0
  51. data/test/functional/ast-output/tree-rewrite.rb +1662 -0
  52. data/test/functional/debugging/debug-mode.rb +689 -0
  53. data/test/functional/debugging/profile-mode.rb +165 -0
  54. data/test/functional/debugging/rule-tracing.rb +74 -0
  55. data/test/functional/delegation/import.rb +379 -0
  56. data/test/functional/lexer/basic.rb +559 -0
  57. data/test/functional/lexer/filter-mode.rb +245 -0
  58. data/test/functional/lexer/nuances.rb +47 -0
  59. data/test/functional/lexer/properties.rb +104 -0
  60. data/test/functional/lexer/syn-pred.rb +32 -0
  61. data/test/functional/lexer/xml.rb +206 -0
  62. data/test/functional/main/main-scripts.rb +245 -0
  63. data/test/functional/parser/actions.rb +224 -0
  64. data/test/functional/parser/backtracking.rb +244 -0
  65. data/test/functional/parser/basic.rb +282 -0
  66. data/test/functional/parser/calc.rb +98 -0
  67. data/test/functional/parser/ll-star.rb +143 -0
  68. data/test/functional/parser/nuances.rb +165 -0
  69. data/test/functional/parser/predicates.rb +103 -0
  70. data/test/functional/parser/properties.rb +242 -0
  71. data/test/functional/parser/rule-methods.rb +132 -0
  72. data/test/functional/parser/scopes.rb +274 -0
  73. data/test/functional/token-rewrite/basic.rb +318 -0
  74. data/test/functional/token-rewrite/via-parser.rb +100 -0
  75. data/test/functional/tree-parser/basic.rb +750 -0
  76. data/test/unit/sample-input/file-stream-1 +2 -0
  77. data/test/unit/sample-input/teststreams.input2 +2 -0
  78. data/test/unit/test-dfa.rb +52 -0
  79. data/test/unit/test-exceptions.rb +44 -0
  80. data/test/unit/test-recognizers.rb +55 -0
  81. data/test/unit/test-scheme.rb +62 -0
  82. data/test/unit/test-streams.rb +459 -0
  83. data/test/unit/test-tree-wizard.rb +535 -0
  84. data/test/unit/test-trees.rb +854 -0
  85. metadata +205 -0
@@ -0,0 +1,244 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ require 'antlr3/test/functional'
5
+
6
+ class TestBacktracking < ANTLR3::Test::Functional
7
+
8
+ inline_grammar(<<-'END')
9
+ grammar Backtrack;
10
+ options {
11
+ language = Ruby;
12
+ backtrack=true;
13
+ memoize=true;
14
+ k=2;
15
+ }
16
+
17
+ scope Symbols {
18
+ types;
19
+ }
20
+
21
+ @members {
22
+ def is_type_name?(name)
23
+ @Symbols_stack.reverse_each do |scope|
24
+ scope.types.include?(name) and return true
25
+ end
26
+ return false
27
+ end
28
+
29
+ def report_error(e)
30
+ # do nothing
31
+ end
32
+
33
+ }
34
+
35
+ translation_unit
36
+ scope Symbols; // entire file is a scope
37
+ @init {
38
+ $Symbols::types = Set.new
39
+ }
40
+ : external_declaration+
41
+ ;
42
+
43
+ /** Either a function definition or any other kind of C decl/def.
44
+ * The LL(*) analysis algorithm fails to deal with this due to
45
+ * recursion in the declarator rules. I'm putting in a
46
+ * manual predicate here so that we don't backtrack over
47
+ * the entire function. Further, you get a better error
48
+ * as errors within the function itself don't make it fail
49
+ * to predict that it's a function. Weird errors previously.
50
+ * Remember: the goal is to avoid backtrack like the plague
51
+ * because it makes debugging, actions, and errors harder.
52
+ *
53
+ * Note that k=1 results in a much smaller predictor for the
54
+ * fixed look; k=2 made a few extra thousand lines. ;)
55
+ * I'll have to optimize that in the future.
56
+ */
57
+ external_declaration
58
+ options {k=1;}
59
+ : ( declaration_specifiers? declarator declaration* '{' )=> function_definition
60
+ | declaration
61
+ ;
62
+
63
+ function_definition
64
+ scope Symbols; // put parameters and locals into same scope for now
65
+ @init {
66
+ $Symbols::types = set()
67
+ }
68
+ : declaration_specifiers? declarator
69
+ ;
70
+
71
+ declaration
72
+ scope {
73
+ is_type_def;
74
+ }
75
+ @init {
76
+ $declaration::is_type_def = false
77
+ }
78
+ : 'typedef' declaration_specifiers? {$declaration::is_type_def = true}
79
+ init_declarator_list ';' // special case, looking for typedef
80
+ | declaration_specifiers init_declarator_list? ';'
81
+ ;
82
+
83
+ declaration_specifiers
84
+ : ( storage_class_specifier
85
+ | type_specifier
86
+ | type_qualifier
87
+ )+
88
+ ;
89
+
90
+ init_declarator_list
91
+ : init_declarator (',' init_declarator)*
92
+ ;
93
+
94
+ init_declarator
95
+ : declarator //('=' initializer)?
96
+ ;
97
+
98
+ storage_class_specifier
99
+ : 'extern'
100
+ | 'static'
101
+ | 'auto'
102
+ | 'register'
103
+ ;
104
+
105
+ type_specifier
106
+ : 'void'
107
+ | 'char'
108
+ | 'short'
109
+ | 'int'
110
+ | 'long'
111
+ | 'float'
112
+ | 'double'
113
+ | 'signed'
114
+ | 'unsigned'
115
+ | type_id
116
+ ;
117
+
118
+ type_id
119
+ : { is_type_name?(@input.look(1).text)}? IDENTIFIER
120
+ ;
121
+
122
+ type_qualifier
123
+ : 'const'
124
+ | 'volatile'
125
+ ;
126
+
127
+ declarator
128
+ : pointer? direct_declarator
129
+ | pointer
130
+ ;
131
+
132
+ direct_declarator
133
+ : ( IDENTIFIER
134
+ {
135
+ if $declaration.length > 0 && $declaration::is_type_def
136
+ $Symbols::types.add($IDENTIFIER.text)
137
+ end
138
+ }
139
+ | '(' declarator ')'
140
+ )
141
+ declarator_suffix*
142
+ ;
143
+
144
+ declarator_suffix
145
+ : /*'[' constant_expression ']'
146
+ |*/ '[' ']'
147
+ | '(' ')'
148
+ ;
149
+
150
+ pointer
151
+ : '*' type_qualifier+ pointer?
152
+ | '*' pointer
153
+ | '*'
154
+ ;
155
+
156
+ IDENTIFIER
157
+ : LETTER (LETTER|'0'..'9')*
158
+ ;
159
+
160
+ fragment
161
+ LETTER
162
+ : '$'
163
+ | 'A'..'Z'
164
+ | 'a'..'z'
165
+ | '_'
166
+ ;
167
+
168
+ CHARACTER_LITERAL
169
+ : '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
170
+ ;
171
+
172
+ STRING_LITERAL
173
+ : '"' ( EscapeSequence | ~('\\'|'"') )* '"'
174
+ ;
175
+
176
+ HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
177
+
178
+ DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
179
+
180
+ OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
181
+
182
+ fragment
183
+ HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
184
+
185
+ fragment
186
+ IntegerTypeSuffix
187
+ : ('u'|'U')? ('l'|'L')
188
+ | ('u'|'U') ('l'|'L')?
189
+ ;
190
+
191
+ FLOATING_POINT_LITERAL
192
+ : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
193
+ | '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
194
+ | ('0'..'9')+ Exponent FloatTypeSuffix?
195
+ | ('0'..'9')+ Exponent? FloatTypeSuffix
196
+ ;
197
+
198
+ fragment
199
+ Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
200
+
201
+ fragment
202
+ FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
203
+
204
+ fragment
205
+ EscapeSequence
206
+ : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
207
+ | OctalEscape
208
+ ;
209
+
210
+ fragment
211
+ OctalEscape
212
+ : '\\' ('0'..'3') ('0'..'7') ('0'..'7')
213
+ | '\\' ('0'..'7') ('0'..'7')
214
+ | '\\' ('0'..'7')
215
+ ;
216
+
217
+ fragment
218
+ UnicodeEscape
219
+ : '\\' 'u' HexDigit HexDigit HexDigit HexDigit
220
+ ;
221
+
222
+ WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
223
+ ;
224
+
225
+ COMMENT
226
+ : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
227
+ ;
228
+
229
+ LINE_COMMENT
230
+ : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
231
+ ;
232
+ LINE_COMMAND
233
+ : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
234
+ ;
235
+ END
236
+
237
+ example "grammar with backtracking and memoization" do
238
+ lexer = Backtrack::Lexer.new( 'int a;' )
239
+ parser = Backtrack::Parser.new lexer
240
+ events = parser.translation_unit
241
+ end
242
+
243
+ end
244
+
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/ruby
2
+ # encoding: utf-8
3
+
4
+ require 'antlr3/test/functional'
5
+
6
+ class TestParser001 < ANTLR3::Test::Functional
7
+ inline_grammar(<<-'END')
8
+ grammar Identifiers;
9
+ options { language = Ruby; }
10
+
11
+ @parser::init {
12
+ @identifiers = []
13
+ @reported_errors = []
14
+ }
15
+
16
+ @parser::members {
17
+ attr_reader :reported_errors, :identifiers
18
+
19
+ def found_identifier(name)
20
+ @identifiers << name
21
+ end
22
+
23
+ def emit_error_message(msg)
24
+ @reported_errors << msg
25
+ end
26
+ }
27
+
28
+ document:
29
+ t=IDENTIFIER {found_identifier($t.text)}
30
+ ;
31
+
32
+ IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*;
33
+ END
34
+
35
+ example "parsing 'blah_de_blah'" do
36
+ # to build a parser, this is the standard chain of calls to prepare the input
37
+ input = ANTLR3::StringStream.new( 'blah_de_blah', :file => 'blah.txt' )
38
+ lexer = Identifiers::Lexer.new( input )
39
+ tokens = ANTLR3::CommonTokenStream.new( lexer )
40
+ parser = Identifiers::Parser.new( tokens )
41
+
42
+ parser.document
43
+
44
+ parser.reported_errors.should be_empty
45
+ parser.identifiers.should == %w(blah_de_blah)
46
+ end
47
+
48
+ example "error from empty input" do
49
+ # if you don't need to use a customized stream, lexers and parsers will
50
+ # automatically wrap input in the standard stream classes
51
+ lexer = Identifiers::Lexer.new( '' )
52
+ parser = Identifiers::Parser.new( lexer )
53
+ parser.document
54
+
55
+ parser.reported_errors.should have(1).thing
56
+ end
57
+
58
+ example 'automatic input wrapping' do
59
+ # if the parser is able to figure out what lexer class
60
+ # to use (typically when it comes from a combined grammar),
61
+ # and you don't need to do any special token processing
62
+ # before making a parser, this is an extra shortcut for
63
+ # parser construction
64
+ parser = Identifiers::Parser.new( 'blah_de_blah', :file => 'blah.txt' )
65
+
66
+ parser.document
67
+
68
+ parser.reported_errors.should be_empty
69
+ parser.identifiers.should == %w(blah_de_blah)
70
+ end
71
+ end
72
+
73
+ class TestParser002 < ANTLR3::Test::Functional
74
+ inline_grammar(<<-'END')
75
+ grammar SimpleLanguage;
76
+ options {
77
+ language = Ruby;
78
+ }
79
+
80
+ @parser::init {
81
+ @events = []
82
+ @reported_errors = []
83
+ }
84
+
85
+ @parser::members {
86
+ attr_reader :reported_errors, :events
87
+
88
+ def emit_error_message(msg)
89
+ @reported_errors << msg
90
+ end
91
+ }
92
+
93
+ document:
94
+ ( declaration
95
+ | call
96
+ )*
97
+ EOF
98
+ ;
99
+
100
+ declaration:
101
+ 'var' t=IDENTIFIER ';'
102
+ {@events << ['decl', $t.text]}
103
+ ;
104
+
105
+ call:
106
+ t=IDENTIFIER '(' ')' ';'
107
+ {@events << ['call', $t.text]}
108
+ ;
109
+
110
+ IDENTIFIER: ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*;
111
+ WS: (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;};
112
+ END
113
+
114
+
115
+ example "parsing decls and calls" do
116
+ lexer = SimpleLanguage::Lexer.new( "var foobar; gnarz(); var blupp; flupp ( ) ;" )
117
+ parser = SimpleLanguage::Parser.new( lexer )
118
+
119
+ parser.document
120
+
121
+ parser.reported_errors.should be_empty
122
+ parser.events.should == [
123
+ %w(decl foobar),
124
+ %w(call gnarz),
125
+ %w(decl blupp),
126
+ %w(call flupp)
127
+ ]
128
+ end
129
+
130
+ example "bad declaration" do
131
+ lexer = SimpleLanguage::Lexer.new( 'var; foo()' )
132
+ parser = SimpleLanguage::Parser.new( lexer )
133
+
134
+ parser.document
135
+
136
+ parser.reported_errors.should have(1).thing
137
+ parser.events.should be_empty
138
+ end
139
+
140
+ example "error recovery via token insertion" do
141
+ lexer = SimpleLanguage::Lexer.new( 'gnarz(; flupp();' )
142
+ parser = SimpleLanguage::Parser.new( lexer )
143
+
144
+ parser.document
145
+
146
+ parser.reported_errors.should have(1).thing
147
+ parser.events.should == [
148
+ %w(call gnarz),
149
+ %w(call flupp)
150
+ ]
151
+ end
152
+
153
+ end
154
+
155
+ class TestParser003 < ANTLR3::Test::Functional
156
+ inline_grammar(<<-'END')
157
+ grammar MoreComplicated;
158
+
159
+ options { language = Ruby; }
160
+
161
+ @init {
162
+ @reported_errors = []
163
+ }
164
+
165
+ @members {
166
+ attr_reader :reported_errors
167
+
168
+ def emit_error_message(msg)
169
+ @reported_errors << msg
170
+ end
171
+ }
172
+
173
+ program
174
+ : declaration+
175
+ ;
176
+
177
+ declaration
178
+ : variable
179
+ | functionHeader ';'
180
+ | functionHeader block
181
+ ;
182
+
183
+ variable
184
+ : type declarator ';'
185
+ ;
186
+
187
+ declarator
188
+ : ID
189
+ ;
190
+
191
+ functionHeader
192
+ : type ID '(' ( formalParameter ( ',' formalParameter )* )? ')'
193
+ ;
194
+
195
+ formalParameter
196
+ : type declarator
197
+ ;
198
+
199
+ type
200
+ : 'int'
201
+ | 'char'
202
+ | 'void'
203
+ | ID
204
+ ;
205
+
206
+ block
207
+ : '{'
208
+ variable*
209
+ stat*
210
+ '}'
211
+ ;
212
+
213
+ stat: forStat
214
+ | expr ';'
215
+ | block
216
+ | assignStat ';'
217
+ | ';'
218
+ ;
219
+
220
+ forStat
221
+ : 'for' '(' assignStat ';' expr ';' assignStat ')' block
222
+ ;
223
+
224
+ assignStat
225
+ : ID '=' expr
226
+ ;
227
+
228
+ expr: condExpr
229
+ ;
230
+
231
+ condExpr
232
+ : aexpr ( ('==' | '<') aexpr )?
233
+ ;
234
+
235
+ aexpr
236
+ : atom ( '+' atom )*
237
+ ;
238
+
239
+ atom
240
+ : ID
241
+ | INT
242
+ | '(' expr ')'
243
+ ;
244
+
245
+ ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
246
+ ;
247
+
248
+ INT : ('0'..'9')+
249
+ ;
250
+
251
+ WS : ( ' '
252
+ | '\t'
253
+ | '\r'
254
+ | '\n'
255
+ )+
256
+ {$channel=HIDDEN}
257
+ ;
258
+ END
259
+
260
+ example "parsing 'int foo;'" do
261
+ lexer = MoreComplicated::Lexer.new "int foo;"
262
+ parser = MoreComplicated::Parser.new lexer
263
+ parser.program
264
+ parser.reported_errors.should be_empty
265
+ end
266
+
267
+
268
+ example "catching badly formed input" do
269
+ lexer = MoreComplicated::Lexer.new "int foo() { 1+2 }"
270
+ parser = MoreComplicated::Parser.new lexer
271
+ parser.program
272
+ parser.reported_errors.should have(1).thing
273
+ end
274
+
275
+ example "two instances of badly formed input" do
276
+ lexer = MoreComplicated::Lexer.new "int foo() { 1+; 1+2 }"
277
+ parser = MoreComplicated::Parser.new lexer
278
+ parser.program
279
+ parser.reported_errors.should have(2).things
280
+ end
281
+
282
+ end