antlr3 1.6.0 → 1.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +8 -0
- data/Manifest.txt +94 -0
- data/README.txt +1 -1
- data/Rakefile +58 -0
- data/bin/antlr4ruby +101 -7
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +38 -10
- data/lib/antlr3/constants.rb +13 -5
- data/lib/antlr3/debug.rb +57 -57
- data/lib/antlr3/dfa.rb +138 -68
- data/lib/antlr3/dot.rb +32 -32
- data/lib/antlr3/error.rb +85 -78
- data/lib/antlr3/main.rb +191 -187
- data/lib/antlr3/profile.rb +71 -70
- data/lib/antlr3/recognizers.rb +261 -226
- data/lib/antlr3/streams.rb +85 -84
- data/lib/antlr3/streams/interactive.rb +20 -27
- data/lib/antlr3/streams/rewrite.rb +89 -89
- data/lib/antlr3/task.rb +42 -33
- data/lib/antlr3/template.rb +2 -2
- data/lib/antlr3/template/group-lexer.rb +1 -1
- data/lib/antlr3/token.rb +76 -68
- data/lib/antlr3/tree.rb +125 -121
- data/lib/antlr3/tree/visitor.rb +1 -1
- data/lib/antlr3/tree/wizard.rb +1 -1
- data/lib/antlr3/util.rb +32 -33
- data/lib/antlr3/version.rb +3 -3
- data/templates/Ruby.stg +1 -1
- data/test/unit/test-streams.rb +11 -10
- data/test/unit/test-template.rb +206 -204
- metadata +4 -2
data/lib/antlr3/profile.rb
CHANGED
@@ -13,13 +13,13 @@ switch.
|
|
13
13
|
module ParserEvents
|
14
14
|
include ANTLR3::Debug::ParserEvents
|
15
15
|
|
16
|
-
def initialize(stream, options = {})
|
17
|
-
options[:debug_listener] ||= Profiler.new( self )
|
16
|
+
def initialize( stream, options = {} )
|
17
|
+
options[ :debug_listener ] ||= Profiler.new( self )
|
18
18
|
super( stream, options )
|
19
19
|
end
|
20
20
|
|
21
|
-
def already_parsed_rule?(rule)
|
22
|
-
@debug_listener.examine_rule_memoization(rule)
|
21
|
+
def already_parsed_rule?( rule )
|
22
|
+
@debug_listener.examine_rule_memoization( rule )
|
23
23
|
super
|
24
24
|
end
|
25
25
|
|
@@ -27,8 +27,8 @@ module ParserEvents
|
|
27
27
|
@debug_listener.profile
|
28
28
|
end
|
29
29
|
|
30
|
-
def memoize(rule, start_index, success)
|
31
|
-
@debug_listener.memoize(rule, rule_start_index, sucess)
|
30
|
+
def memoize( rule, start_index, success )
|
31
|
+
@debug_listener.memoize( rule, rule_start_index, sucess )
|
32
32
|
super
|
33
33
|
end
|
34
34
|
end
|
@@ -36,18 +36,18 @@ end
|
|
36
36
|
class DataSet < ::Array
|
37
37
|
include ::Math
|
38
38
|
def total
|
39
|
-
inject(:+)
|
39
|
+
inject( :+ )
|
40
40
|
end
|
41
41
|
def average
|
42
|
-
length > 0 ? (total.to_f / length) : 0
|
42
|
+
length > 0 ? ( total.to_f / length ) : 0
|
43
43
|
end
|
44
44
|
def variance
|
45
|
-
length.zero? and return(0.0)
|
45
|
+
length.zero? and return( 0.0 )
|
46
46
|
mean = average
|
47
|
-
inject(0.0) { |t, i| t + (i - mean)**2 } / (length - 1)
|
47
|
+
inject( 0.0 ) { |t, i| t + ( i - mean )**2 } / ( length - 1 )
|
48
48
|
end
|
49
49
|
def standard_deviation
|
50
|
-
sqrt(variance)
|
50
|
+
sqrt( variance )
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
@@ -55,8 +55,8 @@ end
|
|
55
55
|
|
56
56
|
|
57
57
|
|
58
|
-
unless const_defined?(:Profile)
|
59
|
-
Profile = Struct.new(
|
58
|
+
unless const_defined?( :Profile )
|
59
|
+
Profile = Struct.new(
|
60
60
|
:grammar_file, :parser_class, :top_rule,
|
61
61
|
:rule_invocations, :guessing_rule_invocations, :rule_invocation_depth,
|
62
62
|
:fixed_looks, :cyclic_looks, :syntactic_predicate_looks,
|
@@ -69,8 +69,8 @@ end
|
|
69
69
|
|
70
70
|
class Profile
|
71
71
|
def initialize
|
72
|
-
init_values = Array.new(self.class.members.length, 0)
|
73
|
-
super(*init_values)
|
72
|
+
init_values = Array.new( self.class.members.length, 0 )
|
73
|
+
super( *init_values )
|
74
74
|
self.top_rule = self.parser_class = self.grammar_file = nil
|
75
75
|
self.fixed_looks = DataSet.new
|
76
76
|
self.cyclic_looks = DataSet.new
|
@@ -91,58 +91,58 @@ class Profile
|
|
91
91
|
|
92
92
|
def generate_report
|
93
93
|
report = '+' << '-' * 78 << "+\n"
|
94
|
-
report << '| ' << "ANTLR Rule Profile".center(76) << " |\n"
|
94
|
+
report << '| ' << "ANTLR Rule Profile".center( 76 ) << " |\n"
|
95
95
|
report << '+' << '-' * 78 << "+\n"
|
96
|
-
report << "| Generated at #{Time.now}".ljust(78) << " |\n"
|
97
|
-
report << "| Profiled #{parser_class.name}##{top_rule}".ljust(78) << " |\n"
|
98
|
-
report << "| Rule source generated from grammar file #{grammar_file}".ljust(78) << " |\n"
|
96
|
+
report << "| Generated at #{ Time.now }".ljust( 78 ) << " |\n"
|
97
|
+
report << "| Profiled #{ parser_class.name }##{ top_rule }".ljust( 78 ) << " |\n"
|
98
|
+
report << "| Rule source generated from grammar file #{ grammar_file }".ljust( 78 ) << " |\n"
|
99
99
|
report << '+' << '-' * 78 << "+\n"
|
100
100
|
|
101
|
-
report << '| ' << "Rule Invocations".center(76) << " |\n"
|
101
|
+
report << '| ' << "Rule Invocations".center( 76 ) << " |\n"
|
102
102
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
103
|
-
report << "| %-66s | %7i |\n" % ["Total Invocations", rule_invocations]
|
104
|
-
report << "| %-66s | %7i |\n" % ["``Guessing'' Invocations", guessing_rule_invocations]
|
105
|
-
report << "| %-66s | %7i |\n" % ["Deepest Level of Invocation", rule_invocation_depth]
|
103
|
+
report << "| %-66s | %7i |\n" % [ "Total Invocations", rule_invocations ]
|
104
|
+
report << "| %-66s | %7i |\n" % [ "``Guessing'' Invocations", guessing_rule_invocations ]
|
105
|
+
report << "| %-66s | %7i |\n" % [ "Deepest Level of Invocation", rule_invocation_depth ]
|
106
106
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
107
107
|
|
108
|
-
report << '| ' << "Execution Events".center(76) << " |\n"
|
108
|
+
report << '| ' << "Execution Events".center( 76 ) << " |\n"
|
109
109
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
110
|
-
report << "| %-66s | %7i |\n" % ["Semantic Predicates Evaluated", semantic_predicates]
|
111
|
-
report << "| %-66s | %7i |\n" % ["Syntactic Predicates Evaluated", syntactic_predicates]
|
112
|
-
report << "| %-66s | %7i |\n" % ["Errors Reported", reported_errors]
|
110
|
+
report << "| %-66s | %7i |\n" % [ "Semantic Predicates Evaluated", semantic_predicates ]
|
111
|
+
report << "| %-66s | %7i |\n" % [ "Syntactic Predicates Evaluated", syntactic_predicates ]
|
112
|
+
report << "| %-66s | %7i |\n" % [ "Errors Reported", reported_errors ]
|
113
113
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
114
114
|
|
115
|
-
report << '| ' << "Token and Character Data".center(76) << " |\n"
|
115
|
+
report << '| ' << "Token and Character Data".center( 76 ) << " |\n"
|
116
116
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
117
|
-
report << "| %-66s | %7i |\n" % ["Tokens Consumed", tokens]
|
118
|
-
report << "| %-66s | %7i |\n" % ["Hidden Tokens Consumed", hidden_tokens]
|
119
|
-
report << "| %-66s | %7i |\n" % ["Characters Matched", characters_matched]
|
120
|
-
report << "| %-66s | %7i |\n" % ["Hidden Characters Matched", hidden_characters_matched]
|
117
|
+
report << "| %-66s | %7i |\n" % [ "Tokens Consumed", tokens ]
|
118
|
+
report << "| %-66s | %7i |\n" % [ "Hidden Tokens Consumed", hidden_tokens ]
|
119
|
+
report << "| %-66s | %7i |\n" % [ "Characters Matched", characters_matched ]
|
120
|
+
report << "| %-66s | %7i |\n" % [ "Hidden Characters Matched", hidden_characters_matched ]
|
121
121
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
122
122
|
|
123
|
-
report << '| ' << "Memoization".center(76) << " |\n"
|
123
|
+
report << '| ' << "Memoization".center( 76 ) << " |\n"
|
124
124
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
125
|
-
report << "| %-66s | %7i |\n" % ["Cache Entries", memoization_cache_entries]
|
126
|
-
report << "| %-66s | %7i |\n" % ["Cache Hits", memoization_cache_hits]
|
127
|
-
report << "| %-66s | %7i |\n" % ["Cache Misses", memoization_cache_misses]
|
125
|
+
report << "| %-66s | %7i |\n" % [ "Cache Entries", memoization_cache_entries ]
|
126
|
+
report << "| %-66s | %7i |\n" % [ "Cache Hits", memoization_cache_hits ]
|
127
|
+
report << "| %-66s | %7i |\n" % [ "Cache Misses", memoization_cache_misses ]
|
128
128
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
129
129
|
|
130
|
-
[
|
131
|
-
['Fixed Lookahead (k)', fixed_looks],
|
132
|
-
['Arbitrary Lookahead (k)', cyclic_looks],
|
133
|
-
['Backtracking (Syntactic Predicate)', syntactic_predicate_looks]
|
130
|
+
[
|
131
|
+
[ 'Fixed Lookahead (k)', fixed_looks ],
|
132
|
+
[ 'Arbitrary Lookahead (k)', cyclic_looks ],
|
133
|
+
[ 'Backtracking (Syntactic Predicate)', syntactic_predicate_looks ]
|
134
134
|
].each do |name, set|
|
135
135
|
mean, stdev = '%4.2f' % set.average, '%4.2f' % set.standard_deviation
|
136
|
-
report << '| ' << "#{name} Decisions".center(76) << " |\n"
|
136
|
+
report << '| ' << "#{ name } Decisions".center( 76 ) << " |\n"
|
137
137
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
138
|
-
report << "| %-66s | %7i |\n" % ["Count", set.length]
|
139
|
-
report << "| %-66s | %7i |\n" % ["Minimum k", set.min]
|
140
|
-
report << "| %-66s | %7i |\n" % ["Maximum k", set.max]
|
141
|
-
report << "| %-66s | %7s |\n" % ["Average k", mean]
|
142
|
-
report << "| %-66s | %7s |\n" % ["Standard Deviation of k", stdev]
|
138
|
+
report << "| %-66s | %7i |\n" % [ "Count", set.length ]
|
139
|
+
report << "| %-66s | %7i |\n" % [ "Minimum k", set.min ]
|
140
|
+
report << "| %-66s | %7i |\n" % [ "Maximum k", set.max ]
|
141
|
+
report << "| %-66s | %7s |\n" % [ "Average k", mean ]
|
142
|
+
report << "| %-66s | %7s |\n" % [ "Standard Deviation of k", stdev ]
|
143
143
|
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
144
144
|
end
|
145
|
-
return(report)
|
145
|
+
return( report )
|
146
146
|
end
|
147
147
|
end
|
148
148
|
|
@@ -156,7 +156,8 @@ builds a simple report to present the various statistics.
|
|
156
156
|
|
157
157
|
=end
|
158
158
|
class Profiler
|
159
|
-
include
|
159
|
+
include Debug::EventListener
|
160
|
+
include Constants
|
160
161
|
|
161
162
|
PROTOCOL_VERSION = 2
|
162
163
|
|
@@ -176,7 +177,7 @@ class Profiler
|
|
176
177
|
|
177
178
|
attr_accessor :output
|
178
179
|
|
179
|
-
def initialize(parser = nil, output = nil)
|
180
|
+
def initialize( parser = nil, output = nil )
|
180
181
|
@parser = parser
|
181
182
|
@profile = nil
|
182
183
|
@rule_level = 0
|
@@ -196,7 +197,7 @@ class Profiler
|
|
196
197
|
@look_stack = []
|
197
198
|
end
|
198
199
|
|
199
|
-
def enter_rule(grammar_file_name, rule_name)
|
200
|
+
def enter_rule( grammar_file_name, rule_name )
|
200
201
|
if @rule_level.zero?
|
201
202
|
commence
|
202
203
|
@profile.grammar_file = grammar_file_name
|
@@ -209,13 +210,13 @@ class Profiler
|
|
209
210
|
@profile.rule_invocation_depth = @rule_level
|
210
211
|
end
|
211
212
|
|
212
|
-
def exit_rule(grammar_file_name, rule_name)
|
213
|
+
def exit_rule( grammar_file_name, rule_name )
|
213
214
|
@rule_level -= 1
|
214
215
|
end
|
215
216
|
|
216
|
-
def examine_rule_memoization(rule)
|
217
|
-
stop_index = parser.rule_memoization(rule, @parser.input.index)
|
218
|
-
if stop_index ==
|
217
|
+
def examine_rule_memoization( rule )
|
218
|
+
stop_index = parser.rule_memoization( rule, @parser.input.index )
|
219
|
+
if stop_index == MEMO_RULE_UNKNOWN
|
219
220
|
@profile.memoization_cache_misses += 1
|
220
221
|
@profile.guessing_rule_invocations += 1
|
221
222
|
else
|
@@ -223,18 +224,18 @@ class Profiler
|
|
223
224
|
end
|
224
225
|
end
|
225
226
|
|
226
|
-
def memoize(rule, start_index, success)
|
227
|
+
def memoize( rule, start_index, success )
|
227
228
|
@profile.memoization_cache_entries += 1
|
228
229
|
end
|
229
230
|
|
230
231
|
|
231
|
-
def enter_decision(decision_number)
|
232
|
+
def enter_decision( decision_number )
|
232
233
|
@decision_level += 1
|
233
234
|
starting_look_index = @parser.token_stream.index
|
234
235
|
@look_stack << starting_look_index
|
235
236
|
end
|
236
237
|
|
237
|
-
def exit_decision(decision_number)
|
238
|
+
def exit_decision( decision_number )
|
238
239
|
@look_stack.pop
|
239
240
|
@decision_level -= 1
|
240
241
|
if @parser.cyclic_decision? then
|
@@ -246,39 +247,39 @@ class Profiler
|
|
246
247
|
@decision_look = 0
|
247
248
|
end
|
248
249
|
|
249
|
-
def consume_token(token)
|
250
|
+
def consume_token( token )
|
250
251
|
@last_token = token
|
251
252
|
end
|
252
253
|
|
253
254
|
def in_decision?
|
254
|
-
return(@decision_level > 0)
|
255
|
+
return( @decision_level > 0 )
|
255
256
|
end
|
256
257
|
|
257
|
-
def consume_hidden_token(token)
|
258
|
+
def consume_hidden_token( token )
|
258
259
|
@last_token = token
|
259
260
|
end
|
260
261
|
|
261
|
-
def look(i, token)
|
262
|
+
def look( i, token )
|
262
263
|
in_decision? or return
|
263
264
|
starting_index = look_stack.last
|
264
265
|
input = @parser.token_stream
|
265
266
|
this_ref_index = input.index
|
266
|
-
num_hidden = input.tokens(starting_index, this_ref_index).count { |t| t.hidden? }
|
267
|
+
num_hidden = input.tokens( starting_index, this_ref_index ).count { |t| t.hidden? }
|
267
268
|
depth = i + this_ref_index - starting_index - num_hidden
|
268
269
|
if depth > @decision_look
|
269
270
|
@decision_look = depth
|
270
271
|
end
|
271
272
|
end
|
272
273
|
|
273
|
-
def end_backtrack(level, successful)
|
274
|
+
def end_backtrack( level, successful )
|
274
275
|
@profile.syntactic_predicate_looks << @decision_look
|
275
276
|
end
|
276
277
|
|
277
|
-
def recognition_exception(error)
|
278
|
+
def recognition_exception( error )
|
278
279
|
@profile.reported_errors += 1
|
279
280
|
end
|
280
281
|
|
281
|
-
def semantic_predicate(result, predicate)
|
282
|
+
def semantic_predicate( result, predicate )
|
282
283
|
in_decision? and @profile.semantic_predicates += 1
|
283
284
|
end
|
284
285
|
|
@@ -287,10 +288,10 @@ class Profiler
|
|
287
288
|
hidden_tokens = input.select { |token| token.hidden? }
|
288
289
|
@profile.hidden_tokens = hidden_tokens.length
|
289
290
|
@profile.tokens = input.tokens.length
|
290
|
-
@profile.hidden_characters_matched = hidden_tokens.inject(0) do |count, token|
|
291
|
+
@profile.hidden_characters_matched = hidden_tokens.inject( 0 ) do |count, token|
|
291
292
|
count + token.text.length rescue count
|
292
293
|
end
|
293
|
-
@profile.characters_matched = (@last_token || input.tokens.last).stop + 1
|
294
|
+
@profile.characters_matched = ( @last_token || input.tokens.last ).stop + 1
|
294
295
|
write_report
|
295
296
|
end
|
296
297
|
|
@@ -299,17 +300,17 @@ class Profiler
|
|
299
300
|
@output << @profile.generate_report unless @output.nil?
|
300
301
|
rescue NoMethodError => error
|
301
302
|
if error.name.to_s == '<<'
|
302
|
-
warn(<<-END.strip! % [__FILE__, __LINE__, @output])
|
303
|
+
warn( <<-END.strip! % [ __FILE__, __LINE__, @output ] )
|
303
304
|
[%s @ %s]: failed to write report to %p as it does not respond to :<<
|
304
305
|
END
|
305
306
|
else raise
|
306
307
|
end
|
307
308
|
rescue IOError => error
|
308
|
-
$stderr.puts( Util.tidy(<<-END) % [__FILE__, __LINE__, @output, error.class, error.message])
|
309
|
+
$stderr.puts( Util.tidy( <<-END ) % [ __FILE__, __LINE__, @output, error.class, error.message ] )
|
309
310
|
| [%s @ %s]: failed to write profile report to %p due to an IO Error:
|
310
311
|
| %s: %s
|
311
312
|
END
|
312
|
-
$stderr.puts(error.backtrace.map { |call| " - #{call}" }.join("\n"))
|
313
|
+
$stderr.puts( error.backtrace.map { |call| " - #{ call }" }.join( "\n" ) )
|
313
314
|
end
|
314
315
|
|
315
316
|
def report
|
data/lib/antlr3/recognizers.rb
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
=begin LICENSE
|
5
5
|
|
6
6
|
[The "BSD licence"]
|
7
|
-
Copyright (c) 2009 Kyle Yetter
|
7
|
+
Copyright (c) 2009-2010 Kyle Yetter
|
8
8
|
All rights reserved.
|
9
9
|
|
10
10
|
Redistribution and use in source and binary forms, with or without
|
@@ -33,9 +33,9 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
33
33
|
=end
|
34
34
|
|
35
35
|
module ANTLR3
|
36
|
-
unless const_defined?(:RecognizerSharedState)
|
36
|
+
unless const_defined?( :RecognizerSharedState )
|
37
37
|
|
38
|
-
RecognizerSharedState = Struct.new(
|
38
|
+
RecognizerSharedState = Struct.new(
|
39
39
|
:following,
|
40
40
|
:error_recovery,
|
41
41
|
:last_error_index,
|
@@ -89,9 +89,10 @@ text::
|
|
89
89
|
the text of the target token
|
90
90
|
|
91
91
|
=end
|
92
|
+
|
92
93
|
class RecognizerSharedState
|
93
94
|
def initialize
|
94
|
-
super([], false, -1, 0, nil, 0, nil, -1)
|
95
|
+
super( [], false, -1, 0, nil, 0, nil, -1 )
|
95
96
|
# ^-- same as this --v
|
96
97
|
# self.following = []
|
97
98
|
# self.error_recovery = false
|
@@ -123,18 +124,18 @@ end
|
|
123
124
|
end
|
124
125
|
|
125
126
|
|
126
|
-
=begin rdoc ANTLR3::
|
127
|
+
=begin rdoc ANTLR3::Recognizer
|
127
128
|
|
128
|
-
=
|
129
|
+
= Recognizer
|
129
130
|
|
130
|
-
As the base class of all ANTLR-generated recognizers,
|
131
|
+
As the base class of all ANTLR-generated recognizers, Recognizer provides
|
131
132
|
much of the shared functionality and structure used in the recognition process.
|
132
133
|
For all effective purposes, the class and its immediate subclasses Lexer,
|
133
134
|
Parser, and TreeParser are abstract classes. They can be instantiated, but
|
134
135
|
they're pretty useless on their own. Instead, to make useful code, you write an
|
135
136
|
ANTLR grammar and ANTLR will generate classes which inherit from one of the
|
136
137
|
recognizer base classes, providing the implementation of the grammar rules
|
137
|
-
itself. this group of classes to implement necessary tasks.
|
138
|
+
itself. this group of classes to implement necessary tasks. Recognizer
|
138
139
|
defines methods related to:
|
139
140
|
|
140
141
|
* token and character matching
|
@@ -145,17 +146,13 @@ defines methods related to:
|
|
145
146
|
* simple rule tracing and debugging
|
146
147
|
|
147
148
|
=end
|
148
|
-
|
149
|
+
|
150
|
+
class Recognizer
|
149
151
|
include Constants
|
150
152
|
include Error
|
151
153
|
include TokenFactory
|
152
154
|
extend ClassMacros
|
153
155
|
|
154
|
-
MEMO_RULE_FAILED = -2
|
155
|
-
MEMO_RULE_UNKNOWN = -1
|
156
|
-
DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
|
157
|
-
HIDDEN = HIDDEN_CHANNEL
|
158
|
-
|
159
156
|
@rules = {}
|
160
157
|
|
161
158
|
# inherited class methods and hooks
|
@@ -178,7 +175,7 @@ class BaseRecognizer
|
|
178
175
|
@antlr_version_string = antlr_version.freeze
|
179
176
|
@library_version = Util.parse_version( library_version )
|
180
177
|
if @antlr_version_string =~ /^(\d+)\.(\d+)(?:\.(\d+)(?:b(\d+))?)?(.*)$/
|
181
|
-
@antlr_version = [$1, $2, $3, $4].map! { |str| str.to_i }
|
178
|
+
@antlr_version = [ $1, $2, $3, $4 ].map! { |str| str.to_i }
|
182
179
|
timestamp = $5.strip
|
183
180
|
#@antlr_release_time = $5.empty? ? nil : Time.parse($5)
|
184
181
|
else
|
@@ -195,11 +192,11 @@ class BaseRecognizer
|
|
195
192
|
# require additional custom members will have a rule-return
|
196
193
|
# name constant that just points to the generic return
|
197
194
|
# value.
|
198
|
-
def define_return_scope(*members)
|
195
|
+
def define_return_scope( *members )
|
199
196
|
if members.empty? then generic_return_scope
|
200
197
|
else
|
201
198
|
members += return_scope_members
|
202
|
-
Struct.new(*members)
|
199
|
+
Struct.new( *members )
|
203
200
|
end
|
204
201
|
end
|
205
202
|
|
@@ -209,15 +206,15 @@ class BaseRecognizer
|
|
209
206
|
# this method to add an extra +:tree+ field to
|
210
207
|
# all rule return structures.
|
211
208
|
def return_scope_members
|
212
|
-
[:start, :stop]
|
209
|
+
[ :start, :stop ]
|
213
210
|
end
|
214
211
|
|
215
212
|
# sets up and returns the generic rule return
|
216
213
|
# scope for a recognizer
|
217
214
|
def generic_return_scope
|
218
215
|
@generic_return_scope ||= begin
|
219
|
-
struct = Struct.new(*return_scope_members)
|
220
|
-
const_set(:Return, struct)
|
216
|
+
struct = Struct.new( *return_scope_members )
|
217
|
+
const_set( :Return, struct )
|
221
218
|
end
|
222
219
|
end
|
223
220
|
|
@@ -245,7 +242,7 @@ class BaseRecognizer
|
|
245
242
|
|
246
243
|
def imports( *grammar_names )
|
247
244
|
for grammar in grammar_names
|
248
|
-
imported_grammars.add?(grammar.to_sym) and
|
245
|
+
imported_grammars.add?( grammar.to_sym ) and
|
249
246
|
attr_reader( Util.snake_case( grammar ) )
|
250
247
|
end
|
251
248
|
return imported_grammars
|
@@ -306,10 +303,10 @@ class BaseRecognizer
|
|
306
303
|
# See the main recognizer subclasses for more specific
|
307
304
|
# information about creating recognizer objects like
|
308
305
|
# lexers and parsers.
|
309
|
-
def initialize(options = {})
|
310
|
-
@state = options[:state] || RecognizerSharedState.new
|
311
|
-
@error_output = options.fetch(:error_output, $stderr)
|
312
|
-
defined?(@input) or @input = nil
|
306
|
+
def initialize( options = {} )
|
307
|
+
@state = options[ :state ] || RecognizerSharedState.new
|
308
|
+
@error_output = options.fetch( :error_output, $stderr )
|
309
|
+
defined?( @input ) or @input = nil
|
313
310
|
initialize_dfas
|
314
311
|
end
|
315
312
|
|
@@ -331,15 +328,15 @@ class BaseRecognizer
|
|
331
328
|
# the symbol doesn't match, attempt to use the follow-set
|
332
329
|
# data provided by +follow+ to recover from the mismatched
|
333
330
|
# token.
|
334
|
-
def match(type, follow)
|
335
|
-
matched_symbol =
|
331
|
+
def match( type, follow )
|
332
|
+
matched_symbol = current_symbol
|
336
333
|
if @input.peek == type
|
337
334
|
@input.consume
|
338
335
|
@state.error_recovery = false
|
339
336
|
return matched_symbol
|
340
337
|
end
|
341
|
-
raise(BacktrackingFailed) if @state.backtracking > 0
|
342
|
-
matched_symbol = recover_from_mismatched_token(type, follow)
|
338
|
+
raise( BacktrackingFailed ) if @state.backtracking > 0
|
339
|
+
matched_symbol = recover_from_mismatched_token( type, follow )
|
343
340
|
return matched_symbol
|
344
341
|
end
|
345
342
|
|
@@ -359,7 +356,7 @@ class BaseRecognizer
|
|
359
356
|
# hook for carrying out the error reporting process. The
|
360
357
|
# default implementation calls +display_recognition_error+
|
361
358
|
# to display the error info on $stderr.
|
362
|
-
def report_error(e = $!)
|
359
|
+
def report_error( e = $! )
|
363
360
|
@state.error_recovery and return
|
364
361
|
@state.syntax_errors += 1
|
365
362
|
@state.error_recovery = true
|
@@ -371,87 +368,95 @@ class BaseRecognizer
|
|
371
368
|
# message text using +error_header+ and +error_message+,
|
372
369
|
# and calls +emit_error_message+ to write the error
|
373
370
|
# message out to some source
|
374
|
-
def display_recognition_error(e = $!)
|
375
|
-
header = error_header(e)
|
376
|
-
message = error_message(e)
|
377
|
-
emit_error_message("#{header} #{message}")
|
371
|
+
def display_recognition_error( e = $! )
|
372
|
+
header = error_header( e )
|
373
|
+
message = error_message( e )
|
374
|
+
emit_error_message( "#{ header } #{ message }" )
|
378
375
|
end
|
379
376
|
|
380
377
|
# used to construct an appropriate error message
|
381
378
|
# based on the specific type of error and the
|
382
379
|
# error's attributes
|
383
|
-
def error_message(e = $!)
|
380
|
+
def error_message( e = $! )
|
384
381
|
case e
|
385
|
-
when
|
386
|
-
token_name = token_name(e.expecting)
|
387
|
-
"extraneous input #{token_error_display(e.unexpected_token)} expecting #{token_name}"
|
388
|
-
when
|
389
|
-
token_name = token_name(e.expecting)
|
390
|
-
"missing #{token_name} at #{token_error_display(e.symbol)}"
|
391
|
-
when
|
392
|
-
token_name = token_name(e.expecting)
|
393
|
-
"mismatched input #{token_error_display(e.symbol)} expecting #{token_name}"
|
394
|
-
when
|
395
|
-
token_name = token_name(e.expecting)
|
396
|
-
"mismatched tree node: #{e.symbol} expecting #{token_name}"
|
397
|
-
when
|
398
|
-
"no viable alternative at input " << token_error_display(e.symbol)
|
399
|
-
when
|
382
|
+
when UnwantedToken
|
383
|
+
token_name = token_name( e.expecting )
|
384
|
+
"extraneous input #{ token_error_display( e.unexpected_token ) } expecting #{ token_name }"
|
385
|
+
when MissingToken
|
386
|
+
token_name = token_name( e.expecting )
|
387
|
+
"missing #{ token_name } at #{ token_error_display( e.symbol ) }"
|
388
|
+
when MismatchedToken
|
389
|
+
token_name = token_name( e.expecting )
|
390
|
+
"mismatched input #{ token_error_display( e.symbol ) } expecting #{ token_name }"
|
391
|
+
when MismatchedTreeNode
|
392
|
+
token_name = token_name( e.expecting )
|
393
|
+
"mismatched tree node: #{ e.symbol } expecting #{ token_name }"
|
394
|
+
when NoViableAlternative
|
395
|
+
"no viable alternative at input " << token_error_display( e.symbol )
|
396
|
+
when MismatchedSet
|
400
397
|
"mismatched input %s expecting set %s" %
|
401
|
-
[token_error_display(e.symbol), e.expecting.inspect]
|
402
|
-
when
|
398
|
+
[ token_error_display( e.symbol ), e.expecting.inspect ]
|
399
|
+
when MismatchedNotSet
|
403
400
|
"mismatched input %s expecting set %s" %
|
404
|
-
[token_error_display(e.symbol), e.expecting.inspect]
|
405
|
-
when
|
406
|
-
"rule %s failed predicate: { %s }?" % [e.rule_name, e.predicate_text]
|
401
|
+
[ token_error_display( e.symbol ), e.expecting.inspect ]
|
402
|
+
when FailedPredicate
|
403
|
+
"rule %s failed predicate: { %s }?" % [ e.rule_name, e.predicate_text ]
|
407
404
|
else e.message
|
408
405
|
end
|
409
406
|
end
|
410
407
|
|
408
|
+
#
|
411
409
|
# used to add a tag to the error message that indicates
|
412
410
|
# the location of the input stream when the error
|
413
411
|
# occurred
|
414
|
-
|
412
|
+
#
|
413
|
+
def error_header( e = $! )
|
415
414
|
e.location
|
416
415
|
end
|
417
416
|
|
417
|
+
#
|
418
418
|
# formats a token object appropriately for inspection
|
419
419
|
# within an error message
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
420
|
+
#
|
421
|
+
def token_error_display( token )
|
422
|
+
unless text = token.text || ( token.source_text rescue nil )
|
423
|
+
text =
|
424
|
+
case
|
425
|
+
when token.type == EOF then '<EOF>'
|
426
|
+
when name = token_name( token.type ) rescue nil then "<#{ name }>"
|
427
|
+
when token.respond_to?( :name ) then "<#{ token.name }>"
|
428
|
+
else "<#{ token.type }>"
|
429
|
+
end
|
428
430
|
end
|
429
431
|
return text.inspect
|
430
432
|
end
|
431
433
|
|
434
|
+
#
|
432
435
|
# Write the error report data out to some source. By default,
|
433
436
|
# the error message is written to $stderr
|
434
|
-
|
435
|
-
|
437
|
+
#
|
438
|
+
def emit_error_message( message )
|
439
|
+
@error_output.puts( message ) if @error_output
|
436
440
|
end
|
437
441
|
|
438
442
|
##############################################################################################
|
439
443
|
###################################### Error Recovery ########################################
|
440
444
|
##############################################################################################
|
441
|
-
|
445
|
+
|
446
|
+
def recover( error = $! )
|
442
447
|
@state.last_error_index == @input.index and @input.consume
|
443
448
|
@state.last_error_index = @input.index
|
444
449
|
|
445
450
|
follow_set = compute_error_recovery_set
|
446
451
|
|
447
|
-
resync { consume_until(follow_set) }
|
452
|
+
resync { consume_until( follow_set ) }
|
448
453
|
end
|
449
454
|
|
450
455
|
def resync
|
451
456
|
begin_resync
|
452
|
-
|
457
|
+
return( yield )
|
458
|
+
ensure
|
453
459
|
end_resync
|
454
|
-
return(value)
|
455
460
|
end
|
456
461
|
|
457
462
|
# overridable hook method that is executed at the start of the
|
@@ -504,9 +509,9 @@ class BaseRecognizer
|
|
504
509
|
# that rule is pushed on a stack. Here are the various "local"
|
505
510
|
# follow sets:
|
506
511
|
#
|
507
|
-
# FOLLOW(b1_in_a) = FIRST(']') = ']'
|
508
|
-
# FOLLOW(b2_in_a) = FIRST(')') = ')'
|
509
|
-
# FOLLOW(c_in_b) = FIRST('^') = '^'
|
512
|
+
# FOLLOW( b1_in_a ) = FIRST( ']' ) = ']'
|
513
|
+
# FOLLOW( b2_in_a ) = FIRST( ')' ) = ')'
|
514
|
+
# FOLLOW( c_in_b ) = FIRST( '^' ) = '^'
|
510
515
|
#
|
511
516
|
# Upon erroneous input "[]", the call chain is
|
512
517
|
#
|
@@ -515,7 +520,7 @@ class BaseRecognizer
|
|
515
520
|
# and, hence, the follow context stack is:
|
516
521
|
#
|
517
522
|
# depth local follow set after call to rule
|
518
|
-
# 0 \<EOF> a (from main())
|
523
|
+
# 0 \<EOF> a (from main( ) )
|
519
524
|
# 1 ']' b
|
520
525
|
# 3 '^' c
|
521
526
|
#
|
@@ -563,44 +568,56 @@ class BaseRecognizer
|
|
563
568
|
# Like Grosch I implemented local FOLLOW sets that are combined
|
564
569
|
# at run-time upon error to avoid overhead during parsing.
|
565
570
|
def compute_error_recovery_set
|
566
|
-
combine_follows(false)
|
571
|
+
combine_follows( false )
|
567
572
|
end
|
568
|
-
|
569
|
-
def recover_from_mismatched_token(type, follow)
|
570
|
-
if mismatch_is_unwanted_token?(type)
|
571
|
-
err = UnwantedToken(type)
|
573
|
+
|
574
|
+
def recover_from_mismatched_token( type, follow )
|
575
|
+
if mismatch_is_unwanted_token?( type )
|
576
|
+
err = UnwantedToken( type )
|
572
577
|
|
573
|
-
|
574
|
-
|
575
|
-
|
578
|
+
resync do
|
579
|
+
@input.consume
|
580
|
+
end
|
576
581
|
|
577
|
-
report_error(err)
|
582
|
+
report_error( err )
|
578
583
|
|
579
|
-
matched_symbol =
|
584
|
+
matched_symbol = current_symbol
|
580
585
|
@input.consume
|
581
586
|
return matched_symbol
|
582
587
|
end
|
583
588
|
|
584
|
-
if mismatch_is_missing_token?(follow)
|
585
|
-
inserted = missing_symbol(err, type, follow)
|
586
|
-
err = MissingToken(type, inserted)
|
589
|
+
if mismatch_is_missing_token?( follow )
|
590
|
+
inserted = missing_symbol( err, type, follow )
|
591
|
+
err = MissingToken( type, inserted )
|
587
592
|
|
588
|
-
report_error(err)
|
593
|
+
report_error( err )
|
589
594
|
return inserted
|
590
595
|
end
|
591
596
|
|
592
|
-
|
593
|
-
raise err
|
597
|
+
raise MismatchedToken( type )
|
594
598
|
end
|
595
599
|
|
596
|
-
def recover_from_mismatched_set(e, follow)
|
597
|
-
if mismatch_is_missing_token?(follow)
|
598
|
-
report_error(e)
|
599
|
-
return missing_symbol(e, INVALID_TOKEN_TYPE, follow)
|
600
|
+
def recover_from_mismatched_set( e, follow )
|
601
|
+
if mismatch_is_missing_token?( follow )
|
602
|
+
report_error( e )
|
603
|
+
return missing_symbol( e, INVALID_TOKEN_TYPE, follow )
|
600
604
|
end
|
601
605
|
raise e
|
602
606
|
end
|
603
607
|
|
608
|
+
def recover_from_mismatched_element( e, follow )
|
609
|
+
follow.nil? and return false
|
610
|
+
if follow.include?( EOR_TOKEN_TYPE )
|
611
|
+
viable_tokens = compute_context_sensitive_rule_follow
|
612
|
+
follow = ( follow | viable_tokens ) - Set[ EOR_TOKEN_TYPE ]
|
613
|
+
end
|
614
|
+
if follow.include?( @input.peek )
|
615
|
+
report_error( e )
|
616
|
+
return true
|
617
|
+
end
|
618
|
+
return false
|
619
|
+
end
|
620
|
+
|
604
621
|
# Conjure up a missing token during error recovery.
|
605
622
|
#
|
606
623
|
# The recognizer attempts to recover from single missing
|
@@ -619,41 +636,32 @@ class BaseRecognizer
|
|
619
636
|
# a CommonToken of the appropriate type. The text will be the token.
|
620
637
|
# If you change what tokens must be created by the lexer,
|
621
638
|
# override this method to create the appropriate tokens.
|
622
|
-
def missing_symbol(error, expected_token_type, follow)
|
639
|
+
def missing_symbol( error, expected_token_type, follow )
|
623
640
|
return nil
|
624
641
|
end
|
625
642
|
|
626
|
-
def
|
627
|
-
|
628
|
-
if follow.include?(EOR_TOKEN_TYPE)
|
629
|
-
viable_tokens = compute_context_sensitive_rule_follow()
|
630
|
-
follow = (follow | viable_tokens) - Set.new([EOR_TOKEN_TYPE])
|
631
|
-
end
|
632
|
-
if follow.include?(@input.peek)
|
633
|
-
report_error(e)
|
634
|
-
return true
|
635
|
-
end
|
636
|
-
return false
|
637
|
-
end
|
638
|
-
|
639
|
-
def mismatch_is_unwanted_token?(type)
|
640
|
-
@input.peek(2) == type
|
643
|
+
def mismatch_is_unwanted_token?( type )
|
644
|
+
@input.peek( 2 ) == type
|
641
645
|
end
|
642
646
|
|
643
|
-
def mismatch_is_missing_token?(follow)
|
647
|
+
def mismatch_is_missing_token?( follow )
|
644
648
|
follow.nil? and return false
|
645
|
-
if follow.include?(EOR_TOKEN_TYPE)
|
649
|
+
if follow.include?( EOR_TOKEN_TYPE )
|
646
650
|
viable_tokens = compute_context_sensitive_rule_follow
|
647
651
|
follow = follow | viable_tokens
|
648
652
|
|
649
|
-
follow.delete(EOR_TOKEN_TYPE) unless @state.following.empty?
|
653
|
+
follow.delete( EOR_TOKEN_TYPE ) unless @state.following.empty?
|
650
654
|
end
|
651
|
-
if follow.include?(@input.peek) or follow.include?(EOR_TOKEN_TYPE)
|
655
|
+
if follow.include?( @input.peek ) or follow.include?( EOR_TOKEN_TYPE )
|
652
656
|
return true
|
653
657
|
end
|
654
658
|
return false
|
655
659
|
end
|
656
660
|
|
661
|
+
def syntax_errors?
|
662
|
+
( error_count = @state.syntax_errors ) > 0 and return( error_count )
|
663
|
+
end
|
664
|
+
|
657
665
|
# factor out what to do upon token mismatch so
|
658
666
|
# tree parsers can behave differently.
|
659
667
|
#
|
@@ -666,7 +674,8 @@ class BaseRecognizer
|
|
666
674
|
@state.syntax_errors
|
667
675
|
end
|
668
676
|
|
669
|
-
#
|
677
|
+
#
|
678
|
+
# Compute the context-sensitive +FOLLOW+ set for current rule.
|
670
679
|
# This is set of token types that can follow a specific rule
|
671
680
|
# reference given a specific call chain. You get the set of
|
672
681
|
# viable tokens that can possibly come next (look depth 1)
|
@@ -717,17 +726,18 @@ class BaseRecognizer
|
|
717
726
|
# the viable next token set, then you know there is most likely
|
718
727
|
# a missing token in the input stream. "Insert" one by just not
|
719
728
|
# throwing an exception.
|
729
|
+
#
|
720
730
|
def compute_context_sensitive_rule_follow
|
721
|
-
combine_follows
|
731
|
+
combine_follows true
|
722
732
|
end
|
723
|
-
|
724
|
-
def combine_follows(exact)
|
733
|
+
|
734
|
+
def combine_follows( exact )
|
725
735
|
follow_set = Set.new
|
726
736
|
@state.following.each_with_index.reverse_each do |local_follow_set, index|
|
727
737
|
follow_set |= local_follow_set
|
728
738
|
if exact
|
729
|
-
if local_follow_set.include?(EOR_TOKEN_TYPE)
|
730
|
-
follow_set.delete(EOR_TOKEN_TYPE) if index > 0
|
739
|
+
if local_follow_set.include?( EOR_TOKEN_TYPE )
|
740
|
+
follow_set.delete( EOR_TOKEN_TYPE ) if index > 0
|
731
741
|
else
|
732
742
|
break
|
733
743
|
end
|
@@ -736,6 +746,7 @@ class BaseRecognizer
|
|
736
746
|
return follow_set
|
737
747
|
end
|
738
748
|
|
749
|
+
#
|
739
750
|
# Match needs to return the current input symbol, which gets put
|
740
751
|
# into the label for the associated token ref; e.g., x=ID. Token
|
741
752
|
# and tree parsers need to return different objects. Rather than test
|
@@ -744,28 +755,39 @@ class BaseRecognizer
|
|
744
755
|
# input symbol is.
|
745
756
|
#
|
746
757
|
# This is ignored for lexers.
|
747
|
-
|
758
|
+
#
|
759
|
+
def current_symbol
|
748
760
|
@input.look
|
749
761
|
end
|
750
762
|
|
751
|
-
# Consume tokens until one matches the given token or token set
|
752
763
|
#
|
753
|
-
#
|
754
|
-
|
755
|
-
|
764
|
+
# Consume input symbols until one matches a type within types
|
765
|
+
#
|
766
|
+
# types can be a single symbol type or a set of symbol types
|
767
|
+
#
|
768
|
+
def consume_until( types )
|
769
|
+
types.is_a?( Set ) or types = Set[ *types ]
|
756
770
|
type = @input.peek
|
757
|
-
until type == EOF or
|
771
|
+
until type == EOF or types.include?( type )
|
758
772
|
@input.consume
|
759
773
|
type = @input.peek
|
760
774
|
end
|
761
|
-
return(type)
|
775
|
+
return( type )
|
776
|
+
end
|
777
|
+
|
778
|
+
#
|
779
|
+
# Returns true if the recognizer is currently in a decision for which
|
780
|
+
# backtracking has been enabled
|
781
|
+
#
|
782
|
+
def backtracking?
|
783
|
+
@state.backtracking > 0
|
762
784
|
end
|
763
785
|
|
764
786
|
def backtracking_level
|
765
787
|
@state.backtracking
|
766
788
|
end
|
767
789
|
|
768
|
-
def backtracking_level=(n)
|
790
|
+
def backtracking_level=( n )
|
769
791
|
@state.backtracking = n
|
770
792
|
end
|
771
793
|
|
@@ -779,20 +801,21 @@ class BaseRecognizer
|
|
779
801
|
end
|
780
802
|
return success
|
781
803
|
ensure
|
782
|
-
@input.rewind(start)
|
804
|
+
@input.rewind( start )
|
783
805
|
@state.backtracking -= 1
|
784
806
|
end
|
785
807
|
|
786
|
-
def syntactic_predicate?(name)
|
787
|
-
backtrack { send
|
808
|
+
def syntactic_predicate?( name )
|
809
|
+
backtrack { send name }
|
788
810
|
end
|
789
811
|
|
790
812
|
alias backtracking backtracking_level
|
791
813
|
alias backtracking= backtracking_level=
|
792
814
|
|
793
815
|
def rule_memoization( rule, start_index )
|
794
|
-
@state.rule_memory
|
795
|
-
|
816
|
+
@state.rule_memory.fetch( rule ) do
|
817
|
+
@state.rule_memory[ rule ] = Hash.new( MEMO_RULE_UNKNOWN )
|
818
|
+
end[ start_index ]
|
796
819
|
end
|
797
820
|
|
798
821
|
def already_parsed_rule?( rule )
|
@@ -807,40 +830,45 @@ class BaseRecognizer
|
|
807
830
|
return true
|
808
831
|
end
|
809
832
|
|
810
|
-
def memoize(rule, start_index, success)
|
811
|
-
stop_index = success ?
|
812
|
-
memo = @state.rule_memory[rule] and memo[start_index] = stop_index
|
833
|
+
def memoize( rule, start_index, success )
|
834
|
+
stop_index = success ? @input.index - 1 : MEMO_RULE_FAILED
|
835
|
+
memo = @state.rule_memory[ rule ] and memo[ start_index ] = stop_index
|
813
836
|
end
|
814
837
|
|
815
|
-
def trace_in(rule_name, rule_index, input_symbol)
|
838
|
+
def trace_in( rule_name, rule_index, input_symbol )
|
816
839
|
@error_output.printf( "--> enter %s on %s", rule_name, input_symbol )
|
817
|
-
@state.backtracking > 0 and @error_output.printf(
|
840
|
+
@state.backtracking > 0 and @error_output.printf(
|
818
841
|
" (in backtracking mode: depth = %s)", @state.backtracking
|
819
842
|
)
|
820
|
-
@error_output.print("\n")
|
843
|
+
@error_output.print( "\n" )
|
821
844
|
end
|
822
845
|
|
823
|
-
def trace_out(rule_name, rule_index, input_symbol)
|
824
|
-
@error_output.printf("<-- exit %s on %s", rule_name, input_symbol)
|
825
|
-
@state.backtracking > 0 and @error_output.printf(
|
846
|
+
def trace_out( rule_name, rule_index, input_symbol )
|
847
|
+
@error_output.printf( "<-- exit %s on %s", rule_name, input_symbol )
|
848
|
+
@state.backtracking > 0 and @error_output.printf(
|
826
849
|
" (in backtracking mode: depth = %s)", @state.backtracking
|
827
850
|
)
|
828
|
-
@error_output.print("\n")
|
851
|
+
@error_output.print( "\n" )
|
829
852
|
end
|
830
853
|
|
831
|
-
|
854
|
+
private
|
832
855
|
|
833
856
|
def initialize_dfas
|
834
857
|
# do nothing
|
835
858
|
end
|
836
859
|
end
|
837
860
|
|
861
|
+
|
862
|
+
# constant alias for compatibility with older versions of the
|
863
|
+
# runtime library
|
864
|
+
BaseRecognizer = Recognizer
|
865
|
+
|
838
866
|
=begin rdoc ANTLR3::Lexer
|
839
867
|
|
840
868
|
= Lexer
|
841
869
|
|
842
870
|
Lexer is the default superclass of all lexers generated by ANTLR. The class
|
843
|
-
tailors the core functionality provided by
|
871
|
+
tailors the core functionality provided by Recognizer to the task of
|
844
872
|
matching patterns in the text input and breaking the input into tokens.
|
845
873
|
|
846
874
|
== About Lexers
|
@@ -899,19 +927,19 @@ demonstrates the typical setup for using ANTLR parsers and lexers in Ruby.
|
|
899
927
|
|
900
928
|
source = "some hypothetical source code"
|
901
929
|
input = ANTLR3::StringStream.new(source, :file => 'blah-de-blah.hyp')
|
902
|
-
lexer = Hypothetical::Lexer.new(input)
|
903
|
-
tokens = ANTLR3::CommonTokenStream.new(lexer)
|
904
|
-
parser = Hypothetical::Parser.new(tokens)
|
930
|
+
lexer = Hypothetical::Lexer.new( input )
|
931
|
+
tokens = ANTLR3::CommonTokenStream.new( lexer )
|
932
|
+
parser = Hypothetical::Parser.new( tokens )
|
905
933
|
|
906
934
|
# if you're using the standard streams, ANTLR3::StringStream and
|
907
935
|
# ANTLR3::CommonTokenStream, you can write the same process
|
908
936
|
# shown above more succinctly:
|
909
937
|
|
910
938
|
lexer = Hypothetical::Lexer.new("some hypothetical source code", :file => 'blah-de-blah.hyp')
|
911
|
-
parser = Hypothetical::Parser.new(lexer)
|
939
|
+
parser = Hypothetical::Parser.new( lexer )
|
912
940
|
|
913
941
|
=end
|
914
|
-
class Lexer <
|
942
|
+
class Lexer < Recognizer
|
915
943
|
include TokenSource
|
916
944
|
@token_class = CommonToken
|
917
945
|
|
@@ -919,36 +947,31 @@ class Lexer < BaseRecognizer
|
|
919
947
|
@default_rule ||= :token!
|
920
948
|
end
|
921
949
|
|
922
|
-
def self.main(argv = ARGV, options = {})
|
923
|
-
if argv.is_a?(::Hash) then argv, options = ARGV, argv end
|
924
|
-
main = ANTLR3::Main::LexerMain.new(self, options)
|
925
|
-
block_given? ? yield(main) : main.execute(argv)
|
950
|
+
def self.main( argv = ARGV, options = {} )
|
951
|
+
if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end
|
952
|
+
main = ANTLR3::Main::LexerMain.new( self, options )
|
953
|
+
block_given? ? yield( main ) : main.execute( argv )
|
926
954
|
end
|
927
955
|
|
928
956
|
def self.associated_parser
|
929
957
|
@associated_parser ||= begin
|
930
958
|
@grammar_home and @grammar_home::Parser
|
931
959
|
rescue NameError
|
932
|
-
grammar_name = @grammar_home.name.split("::").last
|
960
|
+
grammar_name = @grammar_home.name.split( "::" ).last
|
933
961
|
begin
|
934
|
-
require "#{grammar_name}Parser"
|
962
|
+
require "#{ grammar_name }Parser"
|
935
963
|
@grammar_home::Parser
|
936
964
|
rescue LoadError, NameError
|
937
965
|
end
|
938
966
|
end
|
939
967
|
end
|
940
968
|
|
941
|
-
def initialize(input, options = {})
|
969
|
+
def initialize( input, options = {} )
|
942
970
|
super( options )
|
943
|
-
@input =
|
944
|
-
case input
|
945
|
-
when ::String then StringStream.new(input, options)
|
946
|
-
when ::IO then FileStream.new(input, options)
|
947
|
-
else input
|
948
|
-
end
|
971
|
+
@input = cast_input( input, options )
|
949
972
|
end
|
950
973
|
|
951
|
-
def
|
974
|
+
def current_symbol
|
952
975
|
nil
|
953
976
|
end
|
954
977
|
|
@@ -965,16 +988,16 @@ class Lexer < BaseRecognizer
|
|
965
988
|
token!
|
966
989
|
|
967
990
|
case token = @state.token
|
968
|
-
when nil then return(emit
|
991
|
+
when nil then return( emit )
|
969
992
|
when SKIP_TOKEN then next
|
970
993
|
else
|
971
994
|
return token
|
972
995
|
end
|
973
996
|
rescue NoViableAlternative => re
|
974
|
-
report_error(re)
|
975
|
-
recover(re)
|
997
|
+
report_error( re )
|
998
|
+
recover( re )
|
976
999
|
rescue Error::RecognitionError => re
|
977
|
-
report_error(re)
|
1000
|
+
report_error( re )
|
978
1001
|
end
|
979
1002
|
end
|
980
1003
|
end
|
@@ -989,7 +1012,7 @@ class Lexer < BaseRecognizer
|
|
989
1012
|
self.to_a
|
990
1013
|
end
|
991
1014
|
|
992
|
-
def char_stream=(input)
|
1015
|
+
def char_stream=( input )
|
993
1016
|
@input = nil
|
994
1017
|
reset()
|
995
1018
|
@input = input
|
@@ -1005,14 +1028,14 @@ class Lexer < BaseRecognizer
|
|
1005
1028
|
return token
|
1006
1029
|
end
|
1007
1030
|
|
1008
|
-
def match(expected)
|
1031
|
+
def match( expected )
|
1009
1032
|
case expected
|
1010
1033
|
when String
|
1011
1034
|
expected.each_byte do |char|
|
1012
1035
|
unless @input.peek == char
|
1013
1036
|
@state.backtracking > 0 and raise BacktrackingFailed
|
1014
|
-
error = MismatchedToken(char)
|
1015
|
-
recover(error)
|
1037
|
+
error = MismatchedToken( char )
|
1038
|
+
recover( error )
|
1016
1039
|
raise error
|
1017
1040
|
end
|
1018
1041
|
@input.consume()
|
@@ -1020,8 +1043,8 @@ class Lexer < BaseRecognizer
|
|
1020
1043
|
else # single integer character
|
1021
1044
|
unless @input.peek == expected
|
1022
1045
|
@state.backtracking > 0 and raise BacktrackingFailed
|
1023
|
-
error = MismatchedToken(expected)
|
1024
|
-
recover(error)
|
1046
|
+
error = MismatchedToken( expected )
|
1047
|
+
recover( error )
|
1025
1048
|
raise error
|
1026
1049
|
end
|
1027
1050
|
@input.consume
|
@@ -1033,14 +1056,14 @@ class Lexer < BaseRecognizer
|
|
1033
1056
|
@input.consume
|
1034
1057
|
end
|
1035
1058
|
|
1036
|
-
def match_range(min, max)
|
1059
|
+
def match_range( min, max )
|
1037
1060
|
char = @input.peek
|
1038
|
-
if char.between?(min, max) then @input.consume
|
1061
|
+
if char.between?( min, max ) then @input.consume
|
1039
1062
|
else
|
1040
1063
|
@state.backtracking > 0 and raise BacktrackingFailed
|
1041
|
-
error = MismatchedRange(min.chr, max.chr)
|
1042
|
-
recover(error)
|
1043
|
-
raise(error)
|
1064
|
+
error = MismatchedRange( min.chr, max.chr )
|
1065
|
+
recover( error )
|
1066
|
+
raise( error )
|
1044
1067
|
end
|
1045
1068
|
return true
|
1046
1069
|
end
|
@@ -1059,40 +1082,40 @@ class Lexer < BaseRecognizer
|
|
1059
1082
|
|
1060
1083
|
def text
|
1061
1084
|
@state.text and return @state.text
|
1062
|
-
@input.substring(@state.token_start_position, character_index - 1)
|
1085
|
+
@input.substring( @state.token_start_position, character_index - 1 )
|
1063
1086
|
end
|
1064
1087
|
|
1065
|
-
def text=(text)
|
1088
|
+
def text=( text )
|
1066
1089
|
@state.text = text
|
1067
1090
|
end
|
1068
1091
|
|
1069
|
-
def report_error(e)
|
1070
|
-
display_recognition_error(e)
|
1092
|
+
def report_error( e )
|
1093
|
+
display_recognition_error( e )
|
1071
1094
|
end
|
1072
1095
|
|
1073
|
-
def error_message(e)
|
1074
|
-
char = character_error_display(e.symbol) rescue nil
|
1096
|
+
def error_message( e )
|
1097
|
+
char = character_error_display( e.symbol ) rescue nil
|
1075
1098
|
case e
|
1076
1099
|
when Error::MismatchedToken
|
1077
|
-
expecting = character_error_display(e.expecting)
|
1078
|
-
"mismatched character #{char}; expecting #{expecting}"
|
1100
|
+
expecting = character_error_display( e.expecting )
|
1101
|
+
"mismatched character #{ char }; expecting #{ expecting }"
|
1079
1102
|
when Error::NoViableAlternative
|
1080
|
-
"no viable alternative at character #{char}"
|
1103
|
+
"no viable alternative at character #{ char }"
|
1081
1104
|
when Error::EarlyExit
|
1082
|
-
"required (...)+ loop did not match anything at character #{char}"
|
1105
|
+
"required ( ... )+ loop did not match anything at character #{ char }"
|
1083
1106
|
when Error::MismatchedNotSet
|
1084
|
-
"mismatched character %s; expecting set %p" % [char, e.expecting]
|
1107
|
+
"mismatched character %s; expecting set %p" % [ char, e.expecting ]
|
1085
1108
|
when Error::MismatchedSet
|
1086
|
-
"mismatched character %s; expecting set %p" % [char, e.expecting]
|
1109
|
+
"mismatched character %s; expecting set %p" % [ char, e.expecting ]
|
1087
1110
|
when Error::MismatchedRange
|
1088
|
-
a = character_error_display(e.min)
|
1089
|
-
b = character_error_display(e.max)
|
1090
|
-
"mismatched character %s; expecting set %s..%s" % [char, a, b]
|
1111
|
+
a = character_error_display( e.min )
|
1112
|
+
b = character_error_display( e.max )
|
1113
|
+
"mismatched character %s; expecting set %s..%s" % [ char, a, b ]
|
1091
1114
|
else super
|
1092
1115
|
end
|
1093
1116
|
end
|
1094
1117
|
|
1095
|
-
def character_error_display(char)
|
1118
|
+
def character_error_display( char )
|
1096
1119
|
case char
|
1097
1120
|
when EOF then '<EOF>'
|
1098
1121
|
when Integer then char.chr.inspect
|
@@ -1100,29 +1123,39 @@ class Lexer < BaseRecognizer
|
|
1100
1123
|
end
|
1101
1124
|
end
|
1102
1125
|
|
1103
|
-
def recover(re)
|
1126
|
+
def recover( re )
|
1104
1127
|
@input.consume
|
1105
1128
|
end
|
1106
1129
|
|
1130
|
+
alias input= char_stream=
|
1107
1131
|
|
1108
1132
|
private
|
1109
1133
|
|
1110
|
-
def
|
1134
|
+
def cast_input( input, options )
|
1135
|
+
case input
|
1136
|
+
when CharacterStream then input
|
1137
|
+
when ::String then StringStream.new( input, options )
|
1138
|
+
when ::IO then FileStream.new( input, options )
|
1139
|
+
else input
|
1140
|
+
end
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
def trace_in( rule_name, rule_index )
|
1111
1144
|
if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
|
1112
1145
|
else symbol = '<EOF>' end
|
1113
|
-
input_symbol = "#{symbol} @ line #{line} / col #{column}"
|
1114
|
-
super(rule_name, rule_index, input_symbol)
|
1146
|
+
input_symbol = "#{ symbol } @ line #{ line } / col #{ column }"
|
1147
|
+
super( rule_name, rule_index, input_symbol )
|
1115
1148
|
end
|
1116
1149
|
|
1117
|
-
def trace_out(rule_name, rule_index)
|
1150
|
+
def trace_out( rule_name, rule_index )
|
1118
1151
|
if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
|
1119
1152
|
else symbol = '<EOF>' end
|
1120
|
-
input_symbol = "#{symbol} @ line #{line} / col #{column}"
|
1121
|
-
super(rule_name, rule_index, input_symbol)
|
1153
|
+
input_symbol = "#{ symbol } @ line #{ line } / col #{ column }"
|
1154
|
+
super( rule_name, rule_index, input_symbol )
|
1122
1155
|
end
|
1123
1156
|
|
1124
|
-
def create_token(&b)
|
1125
|
-
if block_given? then super(&b)
|
1157
|
+
def create_token( &b )
|
1158
|
+
if block_given? then super( &b )
|
1126
1159
|
else
|
1127
1160
|
super do |t|
|
1128
1161
|
t.input = @input
|
@@ -1144,7 +1177,7 @@ end
|
|
1144
1177
|
= Parser
|
1145
1178
|
|
1146
1179
|
Parser is the default base class of ANTLR-generated parser classes. The class
|
1147
|
-
tailors the functionality provided by
|
1180
|
+
tailors the functionality provided by Recognizer to the task of parsing.
|
1148
1181
|
|
1149
1182
|
== About Parsing
|
1150
1183
|
|
@@ -1171,56 +1204,56 @@ otherwise within the grammar options. The generated code will provide a method
|
|
1171
1204
|
for each parser rule defined in the ANTLR grammar, as well as any other
|
1172
1205
|
customized member attributes and methods specified in the source grammar.
|
1173
1206
|
|
1174
|
-
This class does not override much of the functionality in
|
1175
|
-
thus the API closely mirrors
|
1207
|
+
This class does not override much of the functionality in Recognizer, and
|
1208
|
+
thus the API closely mirrors Recognizer.
|
1176
1209
|
|
1177
1210
|
=end
|
1178
|
-
class Parser <
|
1179
|
-
def self.main(argv = ARGV, options = {})
|
1180
|
-
if argv.is_a?(::Hash) then argv, options = ARGV, argv end
|
1181
|
-
main = ANTLR3::Main::ParserMain.new(self, options)
|
1182
|
-
block_given? ? yield(main) : main.execute(argv)
|
1211
|
+
class Parser < Recognizer
|
1212
|
+
def self.main( argv = ARGV, options = {} )
|
1213
|
+
if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end
|
1214
|
+
main = ANTLR3::Main::ParserMain.new( self, options )
|
1215
|
+
block_given? ? yield( main ) : main.execute( argv )
|
1183
1216
|
end
|
1184
1217
|
|
1185
1218
|
def self.associated_lexer
|
1186
1219
|
@associated_lexer ||= begin
|
1187
1220
|
@grammar_home and @grammar_home::Lexer
|
1188
1221
|
rescue NameError
|
1189
|
-
grammar_name = @grammar_home.name.split("::").last
|
1222
|
+
grammar_name = @grammar_home.name.split( "::" ).last
|
1190
1223
|
begin
|
1191
|
-
require "#{grammar_name}Lexer"
|
1224
|
+
require "#{ grammar_name }Lexer"
|
1192
1225
|
@grammar_home::Lexer
|
1193
1226
|
rescue LoadError, NameError
|
1194
1227
|
end
|
1195
1228
|
end
|
1196
1229
|
end
|
1197
1230
|
|
1231
|
+
|
1198
1232
|
def initialize( input, options = {} )
|
1199
1233
|
super( options )
|
1200
1234
|
@input = nil
|
1201
1235
|
reset
|
1202
|
-
input = cast_input( input, options )
|
1203
|
-
@input = input
|
1236
|
+
@input = cast_input( input, options )
|
1204
1237
|
end
|
1205
1238
|
|
1206
|
-
def missing_symbol(error, expected_type, follow)
|
1239
|
+
def missing_symbol( error, expected_type, follow )
|
1207
1240
|
current = @input.look
|
1208
|
-
current = @input.look(-1) if current == ANTLR3::EOF_TOKEN
|
1241
|
+
current = @input.look( -1 ) if current == ANTLR3::EOF_TOKEN
|
1209
1242
|
t =
|
1210
1243
|
case
|
1211
1244
|
when current && current != ANTLR3::EOF_TOKEN then current.clone
|
1212
1245
|
when @input.token_class then @input.token_class.new
|
1213
|
-
else (create_token rescue CommonToken.new)
|
1246
|
+
else ( create_token rescue CommonToken.new )
|
1214
1247
|
end
|
1215
1248
|
|
1216
1249
|
t.type = expected_type
|
1217
|
-
name = t.name.gsub(/(^<)|(>$)/,'')
|
1218
|
-
t.text = "<missing #{name}>"
|
1250
|
+
name = t.name.gsub( /(^<)|(>$)/,'' )
|
1251
|
+
t.text = "<missing #{ name }>"
|
1219
1252
|
t.channel = DEFAULT_CHANNEL
|
1220
|
-
return(t)
|
1253
|
+
return( t )
|
1221
1254
|
end
|
1222
1255
|
|
1223
|
-
def token_stream=(input)
|
1256
|
+
def token_stream=( input )
|
1224
1257
|
@input = nil
|
1225
1258
|
reset
|
1226
1259
|
@input = input
|
@@ -1231,18 +1264,20 @@ class Parser < BaseRecognizer
|
|
1231
1264
|
@input.source_name
|
1232
1265
|
end
|
1233
1266
|
|
1267
|
+
|
1234
1268
|
private
|
1235
1269
|
|
1236
|
-
def trace_in(rule_name, rule_index)
|
1270
|
+
def trace_in( rule_name, rule_index )
|
1237
1271
|
super( rule_name, rule_index, @input.look.inspect )
|
1238
1272
|
end
|
1239
1273
|
|
1240
|
-
def trace_out(rule_name, rule_index)
|
1274
|
+
def trace_out( rule_name, rule_index )
|
1241
1275
|
super( rule_name, rule_index, @input.look.inspect )
|
1242
1276
|
end
|
1243
1277
|
|
1244
1278
|
def cast_input( input, options )
|
1245
1279
|
case input
|
1280
|
+
when TokenStream then input
|
1246
1281
|
when TokenSource then CommonTokenStream.new( input, options )
|
1247
1282
|
when IO, String, CharacterStream
|
1248
1283
|
if lexer_class = self.class.associated_lexer
|
@@ -1257,7 +1292,7 @@ private
|
|
1257
1292
|
else
|
1258
1293
|
# assume it's a stream if it at least implements peek and consume
|
1259
1294
|
unless input.respond_to?( :peek ) and input.respond_to?( :consume )
|
1260
|
-
raise ArgumentError, Util.tidy(<<-END, true)
|
1295
|
+
raise ArgumentError, Util.tidy( <<-END, true )
|
1261
1296
|
| #{ self.class } requires a token stream as input, but
|
1262
1297
|
| #{ input.inspect } was provided
|
1263
1298
|
END
|