antlr3 1.6.0 → 1.6.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -0
- data/Manifest.txt +94 -0
- data/README.txt +1 -1
- data/Rakefile +58 -0
- data/bin/antlr4ruby +101 -7
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +38 -10
- data/lib/antlr3/constants.rb +13 -5
- data/lib/antlr3/debug.rb +57 -57
- data/lib/antlr3/dfa.rb +138 -68
- data/lib/antlr3/dot.rb +32 -32
- data/lib/antlr3/error.rb +85 -78
- data/lib/antlr3/main.rb +191 -187
- data/lib/antlr3/profile.rb +71 -70
- data/lib/antlr3/recognizers.rb +261 -226
- data/lib/antlr3/streams.rb +85 -84
- data/lib/antlr3/streams/interactive.rb +20 -27
- data/lib/antlr3/streams/rewrite.rb +89 -89
- data/lib/antlr3/task.rb +42 -33
- data/lib/antlr3/template.rb +2 -2
- data/lib/antlr3/template/group-lexer.rb +1 -1
- data/lib/antlr3/token.rb +76 -68
- data/lib/antlr3/tree.rb +125 -121
- data/lib/antlr3/tree/visitor.rb +1 -1
- data/lib/antlr3/tree/wizard.rb +1 -1
- data/lib/antlr3/util.rb +32 -33
- data/lib/antlr3/version.rb +3 -3
- data/templates/Ruby.stg +1 -1
- data/test/unit/test-streams.rb +11 -10
- data/test/unit/test-template.rb +206 -204
- metadata +4 -2
data/lib/antlr3/profile.rb
CHANGED
@@ -13,13 +13,13 @@ switch.
|
|
13
13
|
module ParserEvents
|
14
14
|
include ANTLR3::Debug::ParserEvents
|
15
15
|
|
16
|
-
def initialize(stream, options = {})
|
17
|
-
options[:debug_listener] ||= Profiler.new( self )
|
16
|
+
def initialize( stream, options = {} )
|
17
|
+
options[ :debug_listener ] ||= Profiler.new( self )
|
18
18
|
super( stream, options )
|
19
19
|
end
|
20
20
|
|
21
|
-
def already_parsed_rule?(rule)
|
22
|
-
@debug_listener.examine_rule_memoization(rule)
|
21
|
+
def already_parsed_rule?( rule )
|
22
|
+
@debug_listener.examine_rule_memoization( rule )
|
23
23
|
super
|
24
24
|
end
|
25
25
|
|
@@ -27,8 +27,8 @@ module ParserEvents
|
|
27
27
|
@debug_listener.profile
|
28
28
|
end
|
29
29
|
|
30
|
-
def memoize(rule, start_index, success)
|
31
|
-
@debug_listener.memoize(rule, rule_start_index, sucess)
|
30
|
+
def memoize( rule, start_index, success )
|
31
|
+
@debug_listener.memoize( rule, rule_start_index, sucess )
|
32
32
|
super
|
33
33
|
end
|
34
34
|
end
|
@@ -36,18 +36,18 @@ end
|
|
36
36
|
class DataSet < ::Array
|
37
37
|
include ::Math
|
38
38
|
def total
|
39
|
-
inject(:+)
|
39
|
+
inject( :+ )
|
40
40
|
end
|
41
41
|
def average
|
42
|
-
length > 0 ? (total.to_f / length) : 0
|
42
|
+
length > 0 ? ( total.to_f / length ) : 0
|
43
43
|
end
|
44
44
|
def variance
|
45
|
-
length.zero? and return(0.0)
|
45
|
+
length.zero? and return( 0.0 )
|
46
46
|
mean = average
|
47
|
-
inject(0.0) { |t, i| t + (i - mean)**2 } / (length - 1)
|
47
|
+
inject( 0.0 ) { |t, i| t + ( i - mean )**2 } / ( length - 1 )
|
48
48
|
end
|
49
49
|
def standard_deviation
|
50
|
-
sqrt(variance)
|
50
|
+
sqrt( variance )
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
@@ -55,8 +55,8 @@ end
|
|
55
55
|
|
56
56
|
|
57
57
|
|
58
|
-
unless const_defined?(:Profile)
|
59
|
-
Profile = Struct.new(
|
58
|
+
unless const_defined?( :Profile )
|
59
|
+
Profile = Struct.new(
|
60
60
|
:grammar_file, :parser_class, :top_rule,
|
61
61
|
:rule_invocations, :guessing_rule_invocations, :rule_invocation_depth,
|
62
62
|
:fixed_looks, :cyclic_looks, :syntactic_predicate_looks,
|
@@ -69,8 +69,8 @@ end
|
|
69
69
|
|
70
70
|
class Profile
|
71
71
|
def initialize
|
72
|
-
init_values = Array.new(self.class.members.length, 0)
|
73
|
-
super(*init_values)
|
72
|
+
init_values = Array.new( self.class.members.length, 0 )
|
73
|
+
super( *init_values )
|
74
74
|
self.top_rule = self.parser_class = self.grammar_file = nil
|
75
75
|
self.fixed_looks = DataSet.new
|
76
76
|
self.cyclic_looks = DataSet.new
|
@@ -91,58 +91,58 @@ class Profile
|
|
91
91
|
|
92
92
|
def generate_report
|
93
93
|
report = '+' << '-' * 78 << "+\n"
|
94
|
-
report << '| ' << "ANTLR Rule Profile".center(76) << " |\n"
|
94
|
+
report << '| ' << "ANTLR Rule Profile".center( 76 ) << " |\n"
|
95
95
|
report << '+' << '-' * 78 << "+\n"
|
96
|
-
report << "| Generated at #{Time.now}".ljust(78) << " |\n"
|
97
|
-
report << "| Profiled #{parser_class.name}##{top_rule}".ljust(78) << " |\n"
|
98
|
-
report << "| Rule source generated from grammar file #{grammar_file}".ljust(78) << " |\n"
|
96
|
+
report << "| Generated at #{ Time.now }".ljust( 78 ) << " |\n"
|
97
|
+
report << "| Profiled #{ parser_class.name }##{ top_rule }".ljust( 78 ) << " |\n"
|
98
|
+
report << "| Rule source generated from grammar file #{ grammar_file }".ljust( 78 ) << " |\n"
|
99
99
|
report << '+' << '-' * 78 << "+\n"
|
100
100
|
|
101
|
-
report << '| ' << "Rule Invocations".center(76) << " |\n"
|
101
|
+
report << '| ' << "Rule Invocations".center( 76 ) << " |\n"
|
102
102
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
103
|
-
report << "| %-66s | %7i |\n" % ["Total Invocations", rule_invocations]
|
104
|
-
report << "| %-66s | %7i |\n" % ["``Guessing'' Invocations", guessing_rule_invocations]
|
105
|
-
report << "| %-66s | %7i |\n" % ["Deepest Level of Invocation", rule_invocation_depth]
|
103
|
+
report << "| %-66s | %7i |\n" % [ "Total Invocations", rule_invocations ]
|
104
|
+
report << "| %-66s | %7i |\n" % [ "``Guessing'' Invocations", guessing_rule_invocations ]
|
105
|
+
report << "| %-66s | %7i |\n" % [ "Deepest Level of Invocation", rule_invocation_depth ]
|
106
106
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
107
107
|
|
108
|
-
report << '| ' << "Execution Events".center(76) << " |\n"
|
108
|
+
report << '| ' << "Execution Events".center( 76 ) << " |\n"
|
109
109
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
110
|
-
report << "| %-66s | %7i |\n" % ["Semantic Predicates Evaluated", semantic_predicates]
|
111
|
-
report << "| %-66s | %7i |\n" % ["Syntactic Predicates Evaluated", syntactic_predicates]
|
112
|
-
report << "| %-66s | %7i |\n" % ["Errors Reported", reported_errors]
|
110
|
+
report << "| %-66s | %7i |\n" % [ "Semantic Predicates Evaluated", semantic_predicates ]
|
111
|
+
report << "| %-66s | %7i |\n" % [ "Syntactic Predicates Evaluated", syntactic_predicates ]
|
112
|
+
report << "| %-66s | %7i |\n" % [ "Errors Reported", reported_errors ]
|
113
113
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
114
114
|
|
115
|
-
report << '| ' << "Token and Character Data".center(76) << " |\n"
|
115
|
+
report << '| ' << "Token and Character Data".center( 76 ) << " |\n"
|
116
116
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
117
|
-
report << "| %-66s | %7i |\n" % ["Tokens Consumed", tokens]
|
118
|
-
report << "| %-66s | %7i |\n" % ["Hidden Tokens Consumed", hidden_tokens]
|
119
|
-
report << "| %-66s | %7i |\n" % ["Characters Matched", characters_matched]
|
120
|
-
report << "| %-66s | %7i |\n" % ["Hidden Characters Matched", hidden_characters_matched]
|
117
|
+
report << "| %-66s | %7i |\n" % [ "Tokens Consumed", tokens ]
|
118
|
+
report << "| %-66s | %7i |\n" % [ "Hidden Tokens Consumed", hidden_tokens ]
|
119
|
+
report << "| %-66s | %7i |\n" % [ "Characters Matched", characters_matched ]
|
120
|
+
report << "| %-66s | %7i |\n" % [ "Hidden Characters Matched", hidden_characters_matched ]
|
121
121
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
122
122
|
|
123
|
-
report << '| ' << "Memoization".center(76) << " |\n"
|
123
|
+
report << '| ' << "Memoization".center( 76 ) << " |\n"
|
124
124
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
125
|
-
report << "| %-66s | %7i |\n" % ["Cache Entries", memoization_cache_entries]
|
126
|
-
report << "| %-66s | %7i |\n" % ["Cache Hits", memoization_cache_hits]
|
127
|
-
report << "| %-66s | %7i |\n" % ["Cache Misses", memoization_cache_misses]
|
125
|
+
report << "| %-66s | %7i |\n" % [ "Cache Entries", memoization_cache_entries ]
|
126
|
+
report << "| %-66s | %7i |\n" % [ "Cache Hits", memoization_cache_hits ]
|
127
|
+
report << "| %-66s | %7i |\n" % [ "Cache Misses", memoization_cache_misses ]
|
128
128
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
129
129
|
|
130
|
-
[
|
131
|
-
['Fixed Lookahead (k)', fixed_looks],
|
132
|
-
['Arbitrary Lookahead (k)', cyclic_looks],
|
133
|
-
['Backtracking (Syntactic Predicate)', syntactic_predicate_looks]
|
130
|
+
[
|
131
|
+
[ 'Fixed Lookahead (k)', fixed_looks ],
|
132
|
+
[ 'Arbitrary Lookahead (k)', cyclic_looks ],
|
133
|
+
[ 'Backtracking (Syntactic Predicate)', syntactic_predicate_looks ]
|
134
134
|
].each do |name, set|
|
135
135
|
mean, stdev = '%4.2f' % set.average, '%4.2f' % set.standard_deviation
|
136
|
-
report << '| ' << "#{name} Decisions".center(76) << " |\n"
|
136
|
+
report << '| ' << "#{ name } Decisions".center( 76 ) << " |\n"
|
137
137
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
138
|
-
report << "| %-66s | %7i |\n" % ["Count", set.length]
|
139
|
-
report << "| %-66s | %7i |\n" % ["Minimum k", set.min]
|
140
|
-
report << "| %-66s | %7i |\n" % ["Maximum k", set.max]
|
141
|
-
report << "| %-66s | %7s |\n" % ["Average k", mean]
|
142
|
-
report << "| %-66s | %7s |\n" % ["Standard Deviation of k", stdev]
|
138
|
+
report << "| %-66s | %7i |\n" % [ "Count", set.length ]
|
139
|
+
report << "| %-66s | %7i |\n" % [ "Minimum k", set.min ]
|
140
|
+
report << "| %-66s | %7i |\n" % [ "Maximum k", set.max ]
|
141
|
+
report << "| %-66s | %7s |\n" % [ "Average k", mean ]
|
142
|
+
report << "| %-66s | %7s |\n" % [ "Standard Deviation of k", stdev ]
|
143
143
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
144
144
|
end
|
145
|
-
return(report)
|
145
|
+
return( report )
|
146
146
|
end
|
147
147
|
end
|
148
148
|
|
@@ -156,7 +156,8 @@ builds a simple report to present the various statistics.
|
|
156
156
|
|
157
157
|
=end
|
158
158
|
class Profiler
|
159
|
-
include
|
159
|
+
include Debug::EventListener
|
160
|
+
include Constants
|
160
161
|
|
161
162
|
PROTOCOL_VERSION = 2
|
162
163
|
|
@@ -176,7 +177,7 @@ class Profiler
|
|
176
177
|
|
177
178
|
attr_accessor :output
|
178
179
|
|
179
|
-
def initialize(parser = nil, output = nil)
|
180
|
+
def initialize( parser = nil, output = nil )
|
180
181
|
@parser = parser
|
181
182
|
@profile = nil
|
182
183
|
@rule_level = 0
|
@@ -196,7 +197,7 @@ class Profiler
|
|
196
197
|
@look_stack = []
|
197
198
|
end
|
198
199
|
|
199
|
-
def enter_rule(grammar_file_name, rule_name)
|
200
|
+
def enter_rule( grammar_file_name, rule_name )
|
200
201
|
if @rule_level.zero?
|
201
202
|
commence
|
202
203
|
@profile.grammar_file = grammar_file_name
|
@@ -209,13 +210,13 @@ class Profiler
|
|
209
210
|
@profile.rule_invocation_depth = @rule_level
|
210
211
|
end
|
211
212
|
|
212
|
-
def exit_rule(grammar_file_name, rule_name)
|
213
|
+
def exit_rule( grammar_file_name, rule_name )
|
213
214
|
@rule_level -= 1
|
214
215
|
end
|
215
216
|
|
216
|
-
def examine_rule_memoization(rule)
|
217
|
-
stop_index = parser.rule_memoization(rule, @parser.input.index)
|
218
|
-
if stop_index ==
|
217
|
+
def examine_rule_memoization( rule )
|
218
|
+
stop_index = parser.rule_memoization( rule, @parser.input.index )
|
219
|
+
if stop_index == MEMO_RULE_UNKNOWN
|
219
220
|
@profile.memoization_cache_misses += 1
|
220
221
|
@profile.guessing_rule_invocations += 1
|
221
222
|
else
|
@@ -223,18 +224,18 @@ class Profiler
|
|
223
224
|
end
|
224
225
|
end
|
225
226
|
|
226
|
-
def memoize(rule, start_index, success)
|
227
|
+
def memoize( rule, start_index, success )
|
227
228
|
@profile.memoization_cache_entries += 1
|
228
229
|
end
|
229
230
|
|
230
231
|
|
231
|
-
def enter_decision(decision_number)
|
232
|
+
def enter_decision( decision_number )
|
232
233
|
@decision_level += 1
|
233
234
|
starting_look_index = @parser.token_stream.index
|
234
235
|
@look_stack << starting_look_index
|
235
236
|
end
|
236
237
|
|
237
|
-
def exit_decision(decision_number)
|
238
|
+
def exit_decision( decision_number )
|
238
239
|
@look_stack.pop
|
239
240
|
@decision_level -= 1
|
240
241
|
if @parser.cyclic_decision? then
|
@@ -246,39 +247,39 @@ class Profiler
|
|
246
247
|
@decision_look = 0
|
247
248
|
end
|
248
249
|
|
249
|
-
def consume_token(token)
|
250
|
+
def consume_token( token )
|
250
251
|
@last_token = token
|
251
252
|
end
|
252
253
|
|
253
254
|
def in_decision?
|
254
|
-
return(@decision_level > 0)
|
255
|
+
return( @decision_level > 0 )
|
255
256
|
end
|
256
257
|
|
257
|
-
def consume_hidden_token(token)
|
258
|
+
def consume_hidden_token( token )
|
258
259
|
@last_token = token
|
259
260
|
end
|
260
261
|
|
261
|
-
def look(i, token)
|
262
|
+
def look( i, token )
|
262
263
|
in_decision? or return
|
263
264
|
starting_index = look_stack.last
|
264
265
|
input = @parser.token_stream
|
265
266
|
this_ref_index = input.index
|
266
|
-
num_hidden = input.tokens(starting_index, this_ref_index).count { |t| t.hidden? }
|
267
|
+
num_hidden = input.tokens( starting_index, this_ref_index ).count { |t| t.hidden? }
|
267
268
|
depth = i + this_ref_index - starting_index - num_hidden
|
268
269
|
if depth > @decision_look
|
269
270
|
@decision_look = depth
|
270
271
|
end
|
271
272
|
end
|
272
273
|
|
273
|
-
def end_backtrack(level, successful)
|
274
|
+
def end_backtrack( level, successful )
|
274
275
|
@profile.syntactic_predicate_looks << @decision_look
|
275
276
|
end
|
276
277
|
|
277
|
-
def recognition_exception(error)
|
278
|
+
def recognition_exception( error )
|
278
279
|
@profile.reported_errors += 1
|
279
280
|
end
|
280
281
|
|
281
|
-
def semantic_predicate(result, predicate)
|
282
|
+
def semantic_predicate( result, predicate )
|
282
283
|
in_decision? and @profile.semantic_predicates += 1
|
283
284
|
end
|
284
285
|
|
@@ -287,10 +288,10 @@ class Profiler
|
|
287
288
|
hidden_tokens = input.select { |token| token.hidden? }
|
288
289
|
@profile.hidden_tokens = hidden_tokens.length
|
289
290
|
@profile.tokens = input.tokens.length
|
290
|
-
@profile.hidden_characters_matched = hidden_tokens.inject(0) do |count, token|
|
291
|
+
@profile.hidden_characters_matched = hidden_tokens.inject( 0 ) do |count, token|
|
291
292
|
count + token.text.length rescue count
|
292
293
|
end
|
293
|
-
@profile.characters_matched = (@last_token || input.tokens.last).stop + 1
|
294
|
+
@profile.characters_matched = ( @last_token || input.tokens.last ).stop + 1
|
294
295
|
write_report
|
295
296
|
end
|
296
297
|
|
@@ -299,17 +300,17 @@ class Profiler
|
|
299
300
|
@output << @profile.generate_report unless @output.nil?
|
300
301
|
rescue NoMethodError => error
|
301
302
|
if error.name.to_s == '<<'
|
302
|
-
warn(<<-END.strip! % [__FILE__, __LINE__, @output])
|
303
|
+
warn( <<-END.strip! % [ __FILE__, __LINE__, @output ] )
|
303
304
|
[%s @ %s]: failed to write report to %p as it does not respond to :<<
|
304
305
|
END
|
305
306
|
else raise
|
306
307
|
end
|
307
308
|
rescue IOError => error
|
308
|
-
$stderr.puts( Util.tidy(<<-END) % [__FILE__, __LINE__, @output, error.class, error.message])
|
309
|
+
$stderr.puts( Util.tidy( <<-END ) % [ __FILE__, __LINE__, @output, error.class, error.message ] )
|
309
310
|
| [%s @ %s]: failed to write profile report to %p due to an IO Error:
|
310
311
|
| %s: %s
|
311
312
|
END
|
312
|
-
$stderr.puts(error.backtrace.map { |call| " - #{call}" }.join("\n"))
|
313
|
+
$stderr.puts( error.backtrace.map { |call| " - #{ call }" }.join( "\n" ) )
|
313
314
|
end
|
314
315
|
|
315
316
|
def report
|
data/lib/antlr3/recognizers.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
=begin LICENSE
|
5
5
|
|
6
6
|
[The "BSD licence"]
|
7
|
-
Copyright (c) 2009 Kyle Yetter
|
7
|
+
Copyright (c) 2009-2010 Kyle Yetter
|
8
8
|
All rights reserved.
|
9
9
|
|
10
10
|
Redistribution and use in source and binary forms, with or without
|
@@ -33,9 +33,9 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
33
33
|
=end
|
34
34
|
|
35
35
|
module ANTLR3
|
36
|
-
unless const_defined?(:RecognizerSharedState)
|
36
|
+
unless const_defined?( :RecognizerSharedState )
|
37
37
|
|
38
|
-
RecognizerSharedState = Struct.new(
|
38
|
+
RecognizerSharedState = Struct.new(
|
39
39
|
:following,
|
40
40
|
:error_recovery,
|
41
41
|
:last_error_index,
|
@@ -89,9 +89,10 @@ text::
|
|
89
89
|
the text of the target token
|
90
90
|
|
91
91
|
=end
|
92
|
+
|
92
93
|
class RecognizerSharedState
|
93
94
|
def initialize
|
94
|
-
super([], false, -1, 0, nil, 0, nil, -1)
|
95
|
+
super( [], false, -1, 0, nil, 0, nil, -1 )
|
95
96
|
# ^-- same as this --v
|
96
97
|
# self.following = []
|
97
98
|
# self.error_recovery = false
|
@@ -123,18 +124,18 @@ end
|
|
123
124
|
end
|
124
125
|
|
125
126
|
|
126
|
-
=begin rdoc ANTLR3::
|
127
|
+
=begin rdoc ANTLR3::Recognizer
|
127
128
|
|
128
|
-
=
|
129
|
+
= Recognizer
|
129
130
|
|
130
|
-
As the base class of all ANTLR-generated recognizers,
|
131
|
+
As the base class of all ANTLR-generated recognizers, Recognizer provides
|
131
132
|
much of the shared functionality and structure used in the recognition process.
|
132
133
|
For all effective purposes, the class and its immediate subclasses Lexer,
|
133
134
|
Parser, and TreeParser are abstract classes. They can be instantiated, but
|
134
135
|
they're pretty useless on their own. Instead, to make useful code, you write an
|
135
136
|
ANTLR grammar and ANTLR will generate classes which inherit from one of the
|
136
137
|
recognizer base classes, providing the implementation of the grammar rules
|
137
|
-
itself. this group of classes to implement necessary tasks.
|
138
|
+
itself. this group of classes to implement necessary tasks. Recognizer
|
138
139
|
defines methods related to:
|
139
140
|
|
140
141
|
* token and character matching
|
@@ -145,17 +146,13 @@ defines methods related to:
|
|
145
146
|
* simple rule tracing and debugging
|
146
147
|
|
147
148
|
=end
|
148
|
-
|
149
|
+
|
150
|
+
class Recognizer
|
149
151
|
include Constants
|
150
152
|
include Error
|
151
153
|
include TokenFactory
|
152
154
|
extend ClassMacros
|
153
155
|
|
154
|
-
MEMO_RULE_FAILED = -2
|
155
|
-
MEMO_RULE_UNKNOWN = -1
|
156
|
-
DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
|
157
|
-
HIDDEN = HIDDEN_CHANNEL
|
158
|
-
|
159
156
|
@rules = {}
|
160
157
|
|
161
158
|
# inherited class methods and hooks
|
@@ -178,7 +175,7 @@ class BaseRecognizer
|
|
178
175
|
@antlr_version_string = antlr_version.freeze
|
179
176
|
@library_version = Util.parse_version( library_version )
|
180
177
|
if @antlr_version_string =~ /^(\d+)\.(\d+)(?:\.(\d+)(?:b(\d+))?)?(.*)$/
|
181
|
-
@antlr_version = [$1, $2, $3, $4].map! { |str| str.to_i }
|
178
|
+
@antlr_version = [ $1, $2, $3, $4 ].map! { |str| str.to_i }
|
182
179
|
timestamp = $5.strip
|
183
180
|
#@antlr_release_time = $5.empty? ? nil : Time.parse($5)
|
184
181
|
else
|
@@ -195,11 +192,11 @@ class BaseRecognizer
|
|
195
192
|
# require additional custom members will have a rule-return
|
196
193
|
# name constant that just points to the generic return
|
197
194
|
# value.
|
198
|
-
def define_return_scope(*members)
|
195
|
+
def define_return_scope( *members )
|
199
196
|
if members.empty? then generic_return_scope
|
200
197
|
else
|
201
198
|
members += return_scope_members
|
202
|
-
Struct.new(*members)
|
199
|
+
Struct.new( *members )
|
203
200
|
end
|
204
201
|
end
|
205
202
|
|
@@ -209,15 +206,15 @@ class BaseRecognizer
|
|
209
206
|
# this method to add an extra +:tree+ field to
|
210
207
|
# all rule return structures.
|
211
208
|
def return_scope_members
|
212
|
-
[:start, :stop]
|
209
|
+
[ :start, :stop ]
|
213
210
|
end
|
214
211
|
|
215
212
|
# sets up and returns the generic rule return
|
216
213
|
# scope for a recognizer
|
217
214
|
def generic_return_scope
|
218
215
|
@generic_return_scope ||= begin
|
219
|
-
struct = Struct.new(*return_scope_members)
|
220
|
-
const_set(:Return, struct)
|
216
|
+
struct = Struct.new( *return_scope_members )
|
217
|
+
const_set( :Return, struct )
|
221
218
|
end
|
222
219
|
end
|
223
220
|
|
@@ -245,7 +242,7 @@ class BaseRecognizer
|
|
245
242
|
|
246
243
|
def imports( *grammar_names )
|
247
244
|
for grammar in grammar_names
|
248
|
-
imported_grammars.add?(grammar.to_sym) and
|
245
|
+
imported_grammars.add?( grammar.to_sym ) and
|
249
246
|
attr_reader( Util.snake_case( grammar ) )
|
250
247
|
end
|
251
248
|
return imported_grammars
|
@@ -306,10 +303,10 @@ class BaseRecognizer
|
|
306
303
|
# See the main recognizer subclasses for more specific
|
307
304
|
# information about creating recognizer objects like
|
308
305
|
# lexers and parsers.
|
309
|
-
def initialize(options = {})
|
310
|
-
@state = options[:state] || RecognizerSharedState.new
|
311
|
-
@error_output = options.fetch(:error_output, $stderr)
|
312
|
-
defined?(@input) or @input = nil
|
306
|
+
def initialize( options = {} )
|
307
|
+
@state = options[ :state ] || RecognizerSharedState.new
|
308
|
+
@error_output = options.fetch( :error_output, $stderr )
|
309
|
+
defined?( @input ) or @input = nil
|
313
310
|
initialize_dfas
|
314
311
|
end
|
315
312
|
|
@@ -331,15 +328,15 @@ class BaseRecognizer
|
|
331
328
|
# the symbol doesn't match, attempt to use the follow-set
|
332
329
|
# data provided by +follow+ to recover from the mismatched
|
333
330
|
# token.
|
334
|
-
def match(type, follow)
|
335
|
-
matched_symbol =
|
331
|
+
def match( type, follow )
|
332
|
+
matched_symbol = current_symbol
|
336
333
|
if @input.peek == type
|
337
334
|
@input.consume
|
338
335
|
@state.error_recovery = false
|
339
336
|
return matched_symbol
|
340
337
|
end
|
341
|
-
raise(BacktrackingFailed) if @state.backtracking > 0
|
342
|
-
matched_symbol = recover_from_mismatched_token(type, follow)
|
338
|
+
raise( BacktrackingFailed ) if @state.backtracking > 0
|
339
|
+
matched_symbol = recover_from_mismatched_token( type, follow )
|
343
340
|
return matched_symbol
|
344
341
|
end
|
345
342
|
|
@@ -359,7 +356,7 @@ class BaseRecognizer
|
|
359
356
|
# hook for carrying out the error reporting process. The
|
360
357
|
# default implementation calls +display_recognition_error+
|
361
358
|
# to display the error info on $stderr.
|
362
|
-
def report_error(e = $!)
|
359
|
+
def report_error( e = $! )
|
363
360
|
@state.error_recovery and return
|
364
361
|
@state.syntax_errors += 1
|
365
362
|
@state.error_recovery = true
|
@@ -371,87 +368,95 @@ class BaseRecognizer
|
|
371
368
|
# message text using +error_header+ and +error_message+,
|
372
369
|
# and calls +emit_error_message+ to write the error
|
373
370
|
# message out to some source
|
374
|
-
def display_recognition_error(e = $!)
|
375
|
-
header = error_header(e)
|
376
|
-
message = error_message(e)
|
377
|
-
emit_error_message("#{header} #{message}")
|
371
|
+
def display_recognition_error( e = $! )
|
372
|
+
header = error_header( e )
|
373
|
+
message = error_message( e )
|
374
|
+
emit_error_message( "#{ header } #{ message }" )
|
378
375
|
end
|
379
376
|
|
380
377
|
# used to construct an appropriate error message
|
381
378
|
# based on the specific type of error and the
|
382
379
|
# error's attributes
|
383
|
-
def error_message(e = $!)
|
380
|
+
def error_message( e = $! )
|
384
381
|
case e
|
385
|
-
when
|
386
|
-
token_name = token_name(e.expecting)
|
387
|
-
"extraneous input #{token_error_display(e.unexpected_token)} expecting #{token_name}"
|
388
|
-
when
|
389
|
-
token_name = token_name(e.expecting)
|
390
|
-
"missing #{token_name} at #{token_error_display(e.symbol)}"
|
391
|
-
when
|
392
|
-
token_name = token_name(e.expecting)
|
393
|
-
"mismatched input #{token_error_display(e.symbol)} expecting #{token_name}"
|
394
|
-
when
|
395
|
-
token_name = token_name(e.expecting)
|
396
|
-
"mismatched tree node: #{e.symbol} expecting #{token_name}"
|
397
|
-
when
|
398
|
-
"no viable alternative at input " << token_error_display(e.symbol)
|
399
|
-
when
|
382
|
+
when UnwantedToken
|
383
|
+
token_name = token_name( e.expecting )
|
384
|
+
"extraneous input #{ token_error_display( e.unexpected_token ) } expecting #{ token_name }"
|
385
|
+
when MissingToken
|
386
|
+
token_name = token_name( e.expecting )
|
387
|
+
"missing #{ token_name } at #{ token_error_display( e.symbol ) }"
|
388
|
+
when MismatchedToken
|
389
|
+
token_name = token_name( e.expecting )
|
390
|
+
"mismatched input #{ token_error_display( e.symbol ) } expecting #{ token_name }"
|
391
|
+
when MismatchedTreeNode
|
392
|
+
token_name = token_name( e.expecting )
|
393
|
+
"mismatched tree node: #{ e.symbol } expecting #{ token_name }"
|
394
|
+
when NoViableAlternative
|
395
|
+
"no viable alternative at input " << token_error_display( e.symbol )
|
396
|
+
when MismatchedSet
|
400
397
|
"mismatched input %s expecting set %s" %
|
401
|
-
[token_error_display(e.symbol), e.expecting.inspect]
|
402
|
-
when
|
398
|
+
[ token_error_display( e.symbol ), e.expecting.inspect ]
|
399
|
+
when MismatchedNotSet
|
403
400
|
"mismatched input %s expecting set %s" %
|
404
|
-
[token_error_display(e.symbol), e.expecting.inspect]
|
405
|
-
when
|
406
|
-
"rule %s failed predicate: { %s }?" % [e.rule_name, e.predicate_text]
|
401
|
+
[ token_error_display( e.symbol ), e.expecting.inspect ]
|
402
|
+
when FailedPredicate
|
403
|
+
"rule %s failed predicate: { %s }?" % [ e.rule_name, e.predicate_text ]
|
407
404
|
else e.message
|
408
405
|
end
|
409
406
|
end
|
410
407
|
|
408
|
+
#
|
411
409
|
# used to add a tag to the error message that indicates
|
412
410
|
# the location of the input stream when the error
|
413
411
|
# occurred
|
414
|
-
|
412
|
+
#
|
413
|
+
def error_header( e = $! )
|
415
414
|
e.location
|
416
415
|
end
|
417
416
|
|
417
|
+
#
|
418
418
|
# formats a token object appropriately for inspection
|
419
419
|
# within an error message
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
420
|
+
#
|
421
|
+
def token_error_display( token )
|
422
|
+
unless text = token.text || ( token.source_text rescue nil )
|
423
|
+
text =
|
424
|
+
case
|
425
|
+
when token.type == EOF then '<EOF>'
|
426
|
+
when name = token_name( token.type ) rescue nil then "<#{ name }>"
|
427
|
+
when token.respond_to?( :name ) then "<#{ token.name }>"
|
428
|
+
else "<#{ token.type }>"
|
429
|
+
end
|
428
430
|
end
|
429
431
|
return text.inspect
|
430
432
|
end
|
431
433
|
|
434
|
+
#
|
432
435
|
# Write the error report data out to some source. By default,
|
433
436
|
# the error message is written to $stderr
|
434
|
-
|
435
|
-
|
437
|
+
#
|
438
|
+
def emit_error_message( message )
|
439
|
+
@error_output.puts( message ) if @error_output
|
436
440
|
end
|
437
441
|
|
438
442
|
##############################################################################################
|
439
443
|
###################################### Error Recovery ########################################
|
440
444
|
##############################################################################################
|
441
|
-
|
445
|
+
|
446
|
+
def recover( error = $! )
|
442
447
|
@state.last_error_index == @input.index and @input.consume
|
443
448
|
@state.last_error_index = @input.index
|
444
449
|
|
445
450
|
follow_set = compute_error_recovery_set
|
446
451
|
|
447
|
-
resync { consume_until(follow_set) }
|
452
|
+
resync { consume_until( follow_set ) }
|
448
453
|
end
|
449
454
|
|
450
455
|
def resync
|
451
456
|
begin_resync
|
452
|
-
|
457
|
+
return( yield )
|
458
|
+
ensure
|
453
459
|
end_resync
|
454
|
-
return(value)
|
455
460
|
end
|
456
461
|
|
457
462
|
# overridable hook method that is executed at the start of the
|
@@ -504,9 +509,9 @@ class BaseRecognizer
|
|
504
509
|
# that rule is pushed on a stack. Here are the various "local"
|
505
510
|
# follow sets:
|
506
511
|
#
|
507
|
-
# FOLLOW(b1_in_a) = FIRST(']') = ']'
|
508
|
-
# FOLLOW(b2_in_a) = FIRST(')') = ')'
|
509
|
-
# FOLLOW(c_in_b) = FIRST('^') = '^'
|
512
|
+
# FOLLOW( b1_in_a ) = FIRST( ']' ) = ']'
|
513
|
+
# FOLLOW( b2_in_a ) = FIRST( ')' ) = ')'
|
514
|
+
# FOLLOW( c_in_b ) = FIRST( '^' ) = '^'
|
510
515
|
#
|
511
516
|
# Upon erroneous input "[]", the call chain is
|
512
517
|
#
|
@@ -515,7 +520,7 @@ class BaseRecognizer
|
|
515
520
|
# and, hence, the follow context stack is:
|
516
521
|
#
|
517
522
|
# depth local follow set after call to rule
|
518
|
-
# 0 \<EOF> a (from main())
|
523
|
+
# 0 \<EOF> a (from main( ) )
|
519
524
|
# 1 ']' b
|
520
525
|
# 3 '^' c
|
521
526
|
#
|
@@ -563,44 +568,56 @@ class BaseRecognizer
|
|
563
568
|
# Like Grosch I implemented local FOLLOW sets that are combined
|
564
569
|
# at run-time upon error to avoid overhead during parsing.
|
565
570
|
def compute_error_recovery_set
|
566
|
-
combine_follows(false)
|
571
|
+
combine_follows( false )
|
567
572
|
end
|
568
|
-
|
569
|
-
def recover_from_mismatched_token(type, follow)
|
570
|
-
if mismatch_is_unwanted_token?(type)
|
571
|
-
err = UnwantedToken(type)
|
573
|
+
|
574
|
+
def recover_from_mismatched_token( type, follow )
|
575
|
+
if mismatch_is_unwanted_token?( type )
|
576
|
+
err = UnwantedToken( type )
|
572
577
|
|
573
|
-
|
574
|
-
|
575
|
-
|
578
|
+
resync do
|
579
|
+
@input.consume
|
580
|
+
end
|
576
581
|
|
577
|
-
report_error(err)
|
582
|
+
report_error( err )
|
578
583
|
|
579
|
-
matched_symbol =
|
584
|
+
matched_symbol = current_symbol
|
580
585
|
@input.consume
|
581
586
|
return matched_symbol
|
582
587
|
end
|
583
588
|
|
584
|
-
if mismatch_is_missing_token?(follow)
|
585
|
-
inserted = missing_symbol(err, type, follow)
|
586
|
-
err = MissingToken(type, inserted)
|
589
|
+
if mismatch_is_missing_token?( follow )
|
590
|
+
inserted = missing_symbol( err, type, follow )
|
591
|
+
err = MissingToken( type, inserted )
|
587
592
|
|
588
|
-
report_error(err)
|
593
|
+
report_error( err )
|
589
594
|
return inserted
|
590
595
|
end
|
591
596
|
|
592
|
-
|
593
|
-
raise err
|
597
|
+
raise MismatchedToken( type )
|
594
598
|
end
|
595
599
|
|
596
|
-
def recover_from_mismatched_set(e, follow)
|
597
|
-
if mismatch_is_missing_token?(follow)
|
598
|
-
report_error(e)
|
599
|
-
return missing_symbol(e, INVALID_TOKEN_TYPE, follow)
|
600
|
+
def recover_from_mismatched_set( e, follow )
|
601
|
+
if mismatch_is_missing_token?( follow )
|
602
|
+
report_error( e )
|
603
|
+
return missing_symbol( e, INVALID_TOKEN_TYPE, follow )
|
600
604
|
end
|
601
605
|
raise e
|
602
606
|
end
|
603
607
|
|
608
|
+
def recover_from_mismatched_element( e, follow )
|
609
|
+
follow.nil? and return false
|
610
|
+
if follow.include?( EOR_TOKEN_TYPE )
|
611
|
+
viable_tokens = compute_context_sensitive_rule_follow
|
612
|
+
follow = ( follow | viable_tokens ) - Set[ EOR_TOKEN_TYPE ]
|
613
|
+
end
|
614
|
+
if follow.include?( @input.peek )
|
615
|
+
report_error( e )
|
616
|
+
return true
|
617
|
+
end
|
618
|
+
return false
|
619
|
+
end
|
620
|
+
|
604
621
|
# Conjure up a missing token during error recovery.
|
605
622
|
#
|
606
623
|
# The recognizer attempts to recover from single missing
|
@@ -619,41 +636,32 @@ class BaseRecognizer
|
|
619
636
|
# a CommonToken of the appropriate type. The text will be the token.
|
620
637
|
# If you change what tokens must be created by the lexer,
|
621
638
|
# override this method to create the appropriate tokens.
|
622
|
-
def missing_symbol(error, expected_token_type, follow)
|
639
|
+
def missing_symbol( error, expected_token_type, follow )
|
623
640
|
return nil
|
624
641
|
end
|
625
642
|
|
626
|
-
def
|
627
|
-
|
628
|
-
if follow.include?(EOR_TOKEN_TYPE)
|
629
|
-
viable_tokens = compute_context_sensitive_rule_follow()
|
630
|
-
follow = (follow | viable_tokens) - Set.new([EOR_TOKEN_TYPE])
|
631
|
-
end
|
632
|
-
if follow.include?(@input.peek)
|
633
|
-
report_error(e)
|
634
|
-
return true
|
635
|
-
end
|
636
|
-
return false
|
637
|
-
end
|
638
|
-
|
639
|
-
def mismatch_is_unwanted_token?(type)
|
640
|
-
@input.peek(2) == type
|
643
|
+
def mismatch_is_unwanted_token?( type )
|
644
|
+
@input.peek( 2 ) == type
|
641
645
|
end
|
642
646
|
|
643
|
-
def mismatch_is_missing_token?(follow)
|
647
|
+
def mismatch_is_missing_token?( follow )
|
644
648
|
follow.nil? and return false
|
645
|
-
if follow.include?(EOR_TOKEN_TYPE)
|
649
|
+
if follow.include?( EOR_TOKEN_TYPE )
|
646
650
|
viable_tokens = compute_context_sensitive_rule_follow
|
647
651
|
follow = follow | viable_tokens
|
648
652
|
|
649
|
-
follow.delete(EOR_TOKEN_TYPE) unless @state.following.empty?
|
653
|
+
follow.delete( EOR_TOKEN_TYPE ) unless @state.following.empty?
|
650
654
|
end
|
651
|
-
if follow.include?(@input.peek) or follow.include?(EOR_TOKEN_TYPE)
|
655
|
+
if follow.include?( @input.peek ) or follow.include?( EOR_TOKEN_TYPE )
|
652
656
|
return true
|
653
657
|
end
|
654
658
|
return false
|
655
659
|
end
|
656
660
|
|
661
|
+
def syntax_errors?
|
662
|
+
( error_count = @state.syntax_errors ) > 0 and return( error_count )
|
663
|
+
end
|
664
|
+
|
657
665
|
# factor out what to do upon token mismatch so
|
658
666
|
# tree parsers can behave differently.
|
659
667
|
#
|
@@ -666,7 +674,8 @@ class BaseRecognizer
|
|
666
674
|
@state.syntax_errors
|
667
675
|
end
|
668
676
|
|
669
|
-
#
|
677
|
+
#
|
678
|
+
# Compute the context-sensitive +FOLLOW+ set for current rule.
|
670
679
|
# This is set of token types that can follow a specific rule
|
671
680
|
# reference given a specific call chain. You get the set of
|
672
681
|
# viable tokens that can possibly come next (look depth 1)
|
@@ -717,17 +726,18 @@ class BaseRecognizer
|
|
717
726
|
# the viable next token set, then you know there is most likely
|
718
727
|
# a missing token in the input stream. "Insert" one by just not
|
719
728
|
# throwing an exception.
|
729
|
+
#
|
720
730
|
def compute_context_sensitive_rule_follow
|
721
|
-
combine_follows
|
731
|
+
combine_follows true
|
722
732
|
end
|
723
|
-
|
724
|
-
def combine_follows(exact)
|
733
|
+
|
734
|
+
def combine_follows( exact )
|
725
735
|
follow_set = Set.new
|
726
736
|
@state.following.each_with_index.reverse_each do |local_follow_set, index|
|
727
737
|
follow_set |= local_follow_set
|
728
738
|
if exact
|
729
|
-
if local_follow_set.include?(EOR_TOKEN_TYPE)
|
730
|
-
follow_set.delete(EOR_TOKEN_TYPE) if index > 0
|
739
|
+
if local_follow_set.include?( EOR_TOKEN_TYPE )
|
740
|
+
follow_set.delete( EOR_TOKEN_TYPE ) if index > 0
|
731
741
|
else
|
732
742
|
break
|
733
743
|
end
|
@@ -736,6 +746,7 @@ class BaseRecognizer
|
|
736
746
|
return follow_set
|
737
747
|
end
|
738
748
|
|
749
|
+
#
|
739
750
|
# Match needs to return the current input symbol, which gets put
|
740
751
|
# into the label for the associated token ref; e.g., x=ID. Token
|
741
752
|
# and tree parsers need to return different objects. Rather than test
|
@@ -744,28 +755,39 @@ class BaseRecognizer
|
|
744
755
|
# input symbol is.
|
745
756
|
#
|
746
757
|
# This is ignored for lexers.
|
747
|
-
|
758
|
+
#
|
759
|
+
def current_symbol
|
748
760
|
@input.look
|
749
761
|
end
|
750
762
|
|
751
|
-
# Consume tokens until one matches the given token or token set
|
752
763
|
#
|
753
|
-
#
|
754
|
-
|
755
|
-
|
764
|
+
# Consume input symbols until one matches a type within types
|
765
|
+
#
|
766
|
+
# types can be a single symbol type or a set of symbol types
|
767
|
+
#
|
768
|
+
def consume_until( types )
|
769
|
+
types.is_a?( Set ) or types = Set[ *types ]
|
756
770
|
type = @input.peek
|
757
|
-
until type == EOF or
|
771
|
+
until type == EOF or types.include?( type )
|
758
772
|
@input.consume
|
759
773
|
type = @input.peek
|
760
774
|
end
|
761
|
-
return(type)
|
775
|
+
return( type )
|
776
|
+
end
|
777
|
+
|
778
|
+
#
|
779
|
+
# Returns true if the recognizer is currently in a decision for which
|
780
|
+
# backtracking has been enabled
|
781
|
+
#
|
782
|
+
def backtracking?
|
783
|
+
@state.backtracking > 0
|
762
784
|
end
|
763
785
|
|
764
786
|
def backtracking_level
|
765
787
|
@state.backtracking
|
766
788
|
end
|
767
789
|
|
768
|
-
def backtracking_level=(n)
|
790
|
+
def backtracking_level=( n )
|
769
791
|
@state.backtracking = n
|
770
792
|
end
|
771
793
|
|
@@ -779,20 +801,21 @@ class BaseRecognizer
|
|
779
801
|
end
|
780
802
|
return success
|
781
803
|
ensure
|
782
|
-
@input.rewind(start)
|
804
|
+
@input.rewind( start )
|
783
805
|
@state.backtracking -= 1
|
784
806
|
end
|
785
807
|
|
786
|
-
def syntactic_predicate?(name)
|
787
|
-
backtrack { send
|
808
|
+
def syntactic_predicate?( name )
|
809
|
+
backtrack { send name }
|
788
810
|
end
|
789
811
|
|
790
812
|
alias backtracking backtracking_level
|
791
813
|
alias backtracking= backtracking_level=
|
792
814
|
|
793
815
|
def rule_memoization( rule, start_index )
|
794
|
-
@state.rule_memory
|
795
|
-
|
816
|
+
@state.rule_memory.fetch( rule ) do
|
817
|
+
@state.rule_memory[ rule ] = Hash.new( MEMO_RULE_UNKNOWN )
|
818
|
+
end[ start_index ]
|
796
819
|
end
|
797
820
|
|
798
821
|
def already_parsed_rule?( rule )
|
@@ -807,40 +830,45 @@ class BaseRecognizer
|
|
807
830
|
return true
|
808
831
|
end
|
809
832
|
|
810
|
-
def memoize(rule, start_index, success)
|
811
|
-
stop_index = success ?
|
812
|
-
memo = @state.rule_memory[rule] and memo[start_index] = stop_index
|
833
|
+
def memoize( rule, start_index, success )
|
834
|
+
stop_index = success ? @input.index - 1 : MEMO_RULE_FAILED
|
835
|
+
memo = @state.rule_memory[ rule ] and memo[ start_index ] = stop_index
|
813
836
|
end
|
814
837
|
|
815
|
-
def trace_in(rule_name, rule_index, input_symbol)
|
838
|
+
def trace_in( rule_name, rule_index, input_symbol )
|
816
839
|
@error_output.printf( "--> enter %s on %s", rule_name, input_symbol )
|
817
|
-
@state.backtracking > 0 and @error_output.printf(
|
840
|
+
@state.backtracking > 0 and @error_output.printf(
|
818
841
|
" (in backtracking mode: depth = %s)", @state.backtracking
|
819
842
|
)
|
820
|
-
@error_output.print("\n")
|
843
|
+
@error_output.print( "\n" )
|
821
844
|
end
|
822
845
|
|
823
|
-
def trace_out(rule_name, rule_index, input_symbol)
|
824
|
-
@error_output.printf("<-- exit %s on %s", rule_name, input_symbol)
|
825
|
-
@state.backtracking > 0 and @error_output.printf(
|
846
|
+
def trace_out( rule_name, rule_index, input_symbol )
|
847
|
+
@error_output.printf( "<-- exit %s on %s", rule_name, input_symbol )
|
848
|
+
@state.backtracking > 0 and @error_output.printf(
|
826
849
|
" (in backtracking mode: depth = %s)", @state.backtracking
|
827
850
|
)
|
828
|
-
@error_output.print("\n")
|
851
|
+
@error_output.print( "\n" )
|
829
852
|
end
|
830
853
|
|
831
|
-
|
854
|
+
private
|
832
855
|
|
833
856
|
def initialize_dfas
|
834
857
|
# do nothing
|
835
858
|
end
|
836
859
|
end
|
837
860
|
|
861
|
+
|
862
|
+
# constant alias for compatibility with older versions of the
|
863
|
+
# runtime library
|
864
|
+
BaseRecognizer = Recognizer
|
865
|
+
|
838
866
|
=begin rdoc ANTLR3::Lexer
|
839
867
|
|
840
868
|
= Lexer
|
841
869
|
|
842
870
|
Lexer is the default superclass of all lexers generated by ANTLR. The class
|
843
|
-
tailors the core functionality provided by
|
871
|
+
tailors the core functionality provided by Recognizer to the task of
|
844
872
|
matching patterns in the text input and breaking the input into tokens.
|
845
873
|
|
846
874
|
== About Lexers
|
@@ -899,19 +927,19 @@ demonstrates the typical setup for using ANTLR parsers and lexers in Ruby.
|
|
899
927
|
|
900
928
|
source = "some hypothetical source code"
|
901
929
|
input = ANTLR3::StringStream.new(source, :file => 'blah-de-blah.hyp')
|
902
|
-
lexer = Hypothetical::Lexer.new(input)
|
903
|
-
tokens = ANTLR3::CommonTokenStream.new(lexer)
|
904
|
-
parser = Hypothetical::Parser.new(tokens)
|
930
|
+
lexer = Hypothetical::Lexer.new( input )
|
931
|
+
tokens = ANTLR3::CommonTokenStream.new( lexer )
|
932
|
+
parser = Hypothetical::Parser.new( tokens )
|
905
933
|
|
906
934
|
# if you're using the standard streams, ANTLR3::StringStream and
|
907
935
|
# ANTLR3::CommonTokenStream, you can write the same process
|
908
936
|
# shown above more succinctly:
|
909
937
|
|
910
938
|
lexer = Hypothetical::Lexer.new("some hypothetical source code", :file => 'blah-de-blah.hyp')
|
911
|
-
parser = Hypothetical::Parser.new(lexer)
|
939
|
+
parser = Hypothetical::Parser.new( lexer )
|
912
940
|
|
913
941
|
=end
|
914
|
-
class Lexer <
|
942
|
+
class Lexer < Recognizer
|
915
943
|
include TokenSource
|
916
944
|
@token_class = CommonToken
|
917
945
|
|
@@ -919,36 +947,31 @@ class Lexer < BaseRecognizer
|
|
919
947
|
@default_rule ||= :token!
|
920
948
|
end
|
921
949
|
|
922
|
-
def self.main(argv = ARGV, options = {})
|
923
|
-
if argv.is_a?(::Hash) then argv, options = ARGV, argv end
|
924
|
-
main = ANTLR3::Main::LexerMain.new(self, options)
|
925
|
-
block_given? ? yield(main) : main.execute(argv)
|
950
|
+
def self.main( argv = ARGV, options = {} )
|
951
|
+
if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end
|
952
|
+
main = ANTLR3::Main::LexerMain.new( self, options )
|
953
|
+
block_given? ? yield( main ) : main.execute( argv )
|
926
954
|
end
|
927
955
|
|
928
956
|
def self.associated_parser
|
929
957
|
@associated_parser ||= begin
|
930
958
|
@grammar_home and @grammar_home::Parser
|
931
959
|
rescue NameError
|
932
|
-
grammar_name = @grammar_home.name.split("::").last
|
960
|
+
grammar_name = @grammar_home.name.split( "::" ).last
|
933
961
|
begin
|
934
|
-
require "#{grammar_name}Parser"
|
962
|
+
require "#{ grammar_name }Parser"
|
935
963
|
@grammar_home::Parser
|
936
964
|
rescue LoadError, NameError
|
937
965
|
end
|
938
966
|
end
|
939
967
|
end
|
940
968
|
|
941
|
-
def initialize(input, options = {})
|
969
|
+
def initialize( input, options = {} )
|
942
970
|
super( options )
|
943
|
-
@input =
|
944
|
-
case input
|
945
|
-
when ::String then StringStream.new(input, options)
|
946
|
-
when ::IO then FileStream.new(input, options)
|
947
|
-
else input
|
948
|
-
end
|
971
|
+
@input = cast_input( input, options )
|
949
972
|
end
|
950
973
|
|
951
|
-
def
|
974
|
+
def current_symbol
|
952
975
|
nil
|
953
976
|
end
|
954
977
|
|
@@ -965,16 +988,16 @@ class Lexer < BaseRecognizer
|
|
965
988
|
token!
|
966
989
|
|
967
990
|
case token = @state.token
|
968
|
-
when nil then return(emit
|
991
|
+
when nil then return( emit )
|
969
992
|
when SKIP_TOKEN then next
|
970
993
|
else
|
971
994
|
return token
|
972
995
|
end
|
973
996
|
rescue NoViableAlternative => re
|
974
|
-
report_error(re)
|
975
|
-
recover(re)
|
997
|
+
report_error( re )
|
998
|
+
recover( re )
|
976
999
|
rescue Error::RecognitionError => re
|
977
|
-
report_error(re)
|
1000
|
+
report_error( re )
|
978
1001
|
end
|
979
1002
|
end
|
980
1003
|
end
|
@@ -989,7 +1012,7 @@ class Lexer < BaseRecognizer
|
|
989
1012
|
self.to_a
|
990
1013
|
end
|
991
1014
|
|
992
|
-
def char_stream=(input)
|
1015
|
+
def char_stream=( input )
|
993
1016
|
@input = nil
|
994
1017
|
reset()
|
995
1018
|
@input = input
|
@@ -1005,14 +1028,14 @@ class Lexer < BaseRecognizer
|
|
1005
1028
|
return token
|
1006
1029
|
end
|
1007
1030
|
|
1008
|
-
def match(expected)
|
1031
|
+
def match( expected )
|
1009
1032
|
case expected
|
1010
1033
|
when String
|
1011
1034
|
expected.each_byte do |char|
|
1012
1035
|
unless @input.peek == char
|
1013
1036
|
@state.backtracking > 0 and raise BacktrackingFailed
|
1014
|
-
error = MismatchedToken(char)
|
1015
|
-
recover(error)
|
1037
|
+
error = MismatchedToken( char )
|
1038
|
+
recover( error )
|
1016
1039
|
raise error
|
1017
1040
|
end
|
1018
1041
|
@input.consume()
|
@@ -1020,8 +1043,8 @@ class Lexer < BaseRecognizer
|
|
1020
1043
|
else # single integer character
|
1021
1044
|
unless @input.peek == expected
|
1022
1045
|
@state.backtracking > 0 and raise BacktrackingFailed
|
1023
|
-
error = MismatchedToken(expected)
|
1024
|
-
recover(error)
|
1046
|
+
error = MismatchedToken( expected )
|
1047
|
+
recover( error )
|
1025
1048
|
raise error
|
1026
1049
|
end
|
1027
1050
|
@input.consume
|
@@ -1033,14 +1056,14 @@ class Lexer < BaseRecognizer
|
|
1033
1056
|
@input.consume
|
1034
1057
|
end
|
1035
1058
|
|
1036
|
-
def match_range(min, max)
|
1059
|
+
def match_range( min, max )
|
1037
1060
|
char = @input.peek
|
1038
|
-
if char.between?(min, max) then @input.consume
|
1061
|
+
if char.between?( min, max ) then @input.consume
|
1039
1062
|
else
|
1040
1063
|
@state.backtracking > 0 and raise BacktrackingFailed
|
1041
|
-
error = MismatchedRange(min.chr, max.chr)
|
1042
|
-
recover(error)
|
1043
|
-
raise(error)
|
1064
|
+
error = MismatchedRange( min.chr, max.chr )
|
1065
|
+
recover( error )
|
1066
|
+
raise( error )
|
1044
1067
|
end
|
1045
1068
|
return true
|
1046
1069
|
end
|
@@ -1059,40 +1082,40 @@ class Lexer < BaseRecognizer
|
|
1059
1082
|
|
1060
1083
|
def text
|
1061
1084
|
@state.text and return @state.text
|
1062
|
-
@input.substring(@state.token_start_position, character_index - 1)
|
1085
|
+
@input.substring( @state.token_start_position, character_index - 1 )
|
1063
1086
|
end
|
1064
1087
|
|
1065
|
-
def text=(text)
|
1088
|
+
def text=( text )
|
1066
1089
|
@state.text = text
|
1067
1090
|
end
|
1068
1091
|
|
1069
|
-
def report_error(e)
|
1070
|
-
display_recognition_error(e)
|
1092
|
+
def report_error( e )
|
1093
|
+
display_recognition_error( e )
|
1071
1094
|
end
|
1072
1095
|
|
1073
|
-
def error_message(e)
|
1074
|
-
char = character_error_display(e.symbol) rescue nil
|
1096
|
+
def error_message( e )
|
1097
|
+
char = character_error_display( e.symbol ) rescue nil
|
1075
1098
|
case e
|
1076
1099
|
when Error::MismatchedToken
|
1077
|
-
expecting = character_error_display(e.expecting)
|
1078
|
-
"mismatched character #{char}; expecting #{expecting}"
|
1100
|
+
expecting = character_error_display( e.expecting )
|
1101
|
+
"mismatched character #{ char }; expecting #{ expecting }"
|
1079
1102
|
when Error::NoViableAlternative
|
1080
|
-
"no viable alternative at character #{char}"
|
1103
|
+
"no viable alternative at character #{ char }"
|
1081
1104
|
when Error::EarlyExit
|
1082
|
-
"required (...)+ loop did not match anything at character #{char}"
|
1105
|
+
"required ( ... )+ loop did not match anything at character #{ char }"
|
1083
1106
|
when Error::MismatchedNotSet
|
1084
|
-
"mismatched character %s; expecting set %p" % [char, e.expecting]
|
1107
|
+
"mismatched character %s; expecting set %p" % [ char, e.expecting ]
|
1085
1108
|
when Error::MismatchedSet
|
1086
|
-
"mismatched character %s; expecting set %p" % [char, e.expecting]
|
1109
|
+
"mismatched character %s; expecting set %p" % [ char, e.expecting ]
|
1087
1110
|
when Error::MismatchedRange
|
1088
|
-
a = character_error_display(e.min)
|
1089
|
-
b = character_error_display(e.max)
|
1090
|
-
"mismatched character %s; expecting set %s..%s" % [char, a, b]
|
1111
|
+
a = character_error_display( e.min )
|
1112
|
+
b = character_error_display( e.max )
|
1113
|
+
"mismatched character %s; expecting set %s..%s" % [ char, a, b ]
|
1091
1114
|
else super
|
1092
1115
|
end
|
1093
1116
|
end
|
1094
1117
|
|
1095
|
-
def character_error_display(char)
|
1118
|
+
def character_error_display( char )
|
1096
1119
|
case char
|
1097
1120
|
when EOF then '<EOF>'
|
1098
1121
|
when Integer then char.chr.inspect
|
@@ -1100,29 +1123,39 @@ class Lexer < BaseRecognizer
|
|
1100
1123
|
end
|
1101
1124
|
end
|
1102
1125
|
|
1103
|
-
def recover(re)
|
1126
|
+
def recover( re )
|
1104
1127
|
@input.consume
|
1105
1128
|
end
|
1106
1129
|
|
1130
|
+
alias input= char_stream=
|
1107
1131
|
|
1108
1132
|
private
|
1109
1133
|
|
1110
|
-
def
|
1134
|
+
def cast_input( input, options )
|
1135
|
+
case input
|
1136
|
+
when CharacterStream then input
|
1137
|
+
when ::String then StringStream.new( input, options )
|
1138
|
+
when ::IO then FileStream.new( input, options )
|
1139
|
+
else input
|
1140
|
+
end
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
def trace_in( rule_name, rule_index )
|
1111
1144
|
if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
|
1112
1145
|
else symbol = '<EOF>' end
|
1113
|
-
input_symbol = "#{symbol} @ line #{line} / col #{column}"
|
1114
|
-
super(rule_name, rule_index, input_symbol)
|
1146
|
+
input_symbol = "#{ symbol } @ line #{ line } / col #{ column }"
|
1147
|
+
super( rule_name, rule_index, input_symbol )
|
1115
1148
|
end
|
1116
1149
|
|
1117
|
-
def trace_out(rule_name, rule_index)
|
1150
|
+
def trace_out( rule_name, rule_index )
|
1118
1151
|
if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
|
1119
1152
|
else symbol = '<EOF>' end
|
1120
|
-
input_symbol = "#{symbol} @ line #{line} / col #{column}"
|
1121
|
-
super(rule_name, rule_index, input_symbol)
|
1153
|
+
input_symbol = "#{ symbol } @ line #{ line } / col #{ column }"
|
1154
|
+
super( rule_name, rule_index, input_symbol )
|
1122
1155
|
end
|
1123
1156
|
|
1124
|
-
def create_token(&b)
|
1125
|
-
if block_given? then super(&b)
|
1157
|
+
def create_token( &b )
|
1158
|
+
if block_given? then super( &b )
|
1126
1159
|
else
|
1127
1160
|
super do |t|
|
1128
1161
|
t.input = @input
|
@@ -1144,7 +1177,7 @@ end
|
|
1144
1177
|
= Parser
|
1145
1178
|
|
1146
1179
|
Parser is the default base class of ANTLR-generated parser classes. The class
|
1147
|
-
tailors the functionality provided by
|
1180
|
+
tailors the functionality provided by Recognizer to the task of parsing.
|
1148
1181
|
|
1149
1182
|
== About Parsing
|
1150
1183
|
|
@@ -1171,56 +1204,56 @@ otherwise within the grammar options. The generated code will provide a method
|
|
1171
1204
|
for each parser rule defined in the ANTLR grammar, as well as any other
|
1172
1205
|
customized member attributes and methods specified in the source grammar.
|
1173
1206
|
|
1174
|
-
This class does not override much of the functionality in
|
1175
|
-
thus the API closely mirrors
|
1207
|
+
This class does not override much of the functionality in Recognizer, and
|
1208
|
+
thus the API closely mirrors Recognizer.
|
1176
1209
|
|
1177
1210
|
=end
|
1178
|
-
class Parser <
|
1179
|
-
def self.main(argv = ARGV, options = {})
|
1180
|
-
if argv.is_a?(::Hash) then argv, options = ARGV, argv end
|
1181
|
-
main = ANTLR3::Main::ParserMain.new(self, options)
|
1182
|
-
block_given? ? yield(main) : main.execute(argv)
|
1211
|
+
class Parser < Recognizer
|
1212
|
+
def self.main( argv = ARGV, options = {} )
|
1213
|
+
if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end
|
1214
|
+
main = ANTLR3::Main::ParserMain.new( self, options )
|
1215
|
+
block_given? ? yield( main ) : main.execute( argv )
|
1183
1216
|
end
|
1184
1217
|
|
1185
1218
|
def self.associated_lexer
|
1186
1219
|
@associated_lexer ||= begin
|
1187
1220
|
@grammar_home and @grammar_home::Lexer
|
1188
1221
|
rescue NameError
|
1189
|
-
grammar_name = @grammar_home.name.split("::").last
|
1222
|
+
grammar_name = @grammar_home.name.split( "::" ).last
|
1190
1223
|
begin
|
1191
|
-
require "#{grammar_name}Lexer"
|
1224
|
+
require "#{ grammar_name }Lexer"
|
1192
1225
|
@grammar_home::Lexer
|
1193
1226
|
rescue LoadError, NameError
|
1194
1227
|
end
|
1195
1228
|
end
|
1196
1229
|
end
|
1197
1230
|
|
1231
|
+
|
1198
1232
|
def initialize( input, options = {} )
|
1199
1233
|
super( options )
|
1200
1234
|
@input = nil
|
1201
1235
|
reset
|
1202
|
-
input = cast_input( input, options )
|
1203
|
-
@input = input
|
1236
|
+
@input = cast_input( input, options )
|
1204
1237
|
end
|
1205
1238
|
|
1206
|
-
def missing_symbol(error, expected_type, follow)
|
1239
|
+
def missing_symbol( error, expected_type, follow )
|
1207
1240
|
current = @input.look
|
1208
|
-
current = @input.look(-1) if current == ANTLR3::EOF_TOKEN
|
1241
|
+
current = @input.look( -1 ) if current == ANTLR3::EOF_TOKEN
|
1209
1242
|
t =
|
1210
1243
|
case
|
1211
1244
|
when current && current != ANTLR3::EOF_TOKEN then current.clone
|
1212
1245
|
when @input.token_class then @input.token_class.new
|
1213
|
-
else (create_token rescue CommonToken.new)
|
1246
|
+
else ( create_token rescue CommonToken.new )
|
1214
1247
|
end
|
1215
1248
|
|
1216
1249
|
t.type = expected_type
|
1217
|
-
name = t.name.gsub(/(^<)|(>$)/,'')
|
1218
|
-
t.text = "<missing #{name}>"
|
1250
|
+
name = t.name.gsub( /(^<)|(>$)/,'' )
|
1251
|
+
t.text = "<missing #{ name }>"
|
1219
1252
|
t.channel = DEFAULT_CHANNEL
|
1220
|
-
return(t)
|
1253
|
+
return( t )
|
1221
1254
|
end
|
1222
1255
|
|
1223
|
-
def token_stream=(input)
|
1256
|
+
def token_stream=( input )
|
1224
1257
|
@input = nil
|
1225
1258
|
reset
|
1226
1259
|
@input = input
|
@@ -1231,18 +1264,20 @@ class Parser < BaseRecognizer
|
|
1231
1264
|
@input.source_name
|
1232
1265
|
end
|
1233
1266
|
|
1267
|
+
|
1234
1268
|
private
|
1235
1269
|
|
1236
|
-
def trace_in(rule_name, rule_index)
|
1270
|
+
def trace_in( rule_name, rule_index )
|
1237
1271
|
super( rule_name, rule_index, @input.look.inspect )
|
1238
1272
|
end
|
1239
1273
|
|
1240
|
-
def trace_out(rule_name, rule_index)
|
1274
|
+
def trace_out( rule_name, rule_index )
|
1241
1275
|
super( rule_name, rule_index, @input.look.inspect )
|
1242
1276
|
end
|
1243
1277
|
|
1244
1278
|
def cast_input( input, options )
|
1245
1279
|
case input
|
1280
|
+
when TokenStream then input
|
1246
1281
|
when TokenSource then CommonTokenStream.new( input, options )
|
1247
1282
|
when IO, String, CharacterStream
|
1248
1283
|
if lexer_class = self.class.associated_lexer
|
@@ -1257,7 +1292,7 @@ private
|
|
1257
1292
|
else
|
1258
1293
|
# assume it's a stream if it at least implements peek and consume
|
1259
1294
|
unless input.respond_to?( :peek ) and input.respond_to?( :consume )
|
1260
|
-
raise ArgumentError, Util.tidy(<<-END, true)
|
1295
|
+
raise ArgumentError, Util.tidy( <<-END, true )
|
1261
1296
|
| #{ self.class } requires a token stream as input, but
|
1262
1297
|
| #{ input.inspect } was provided
|
1263
1298
|
END
|