antlr3 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ANTLR-LICENSE.txt +26 -0
- data/History.txt +66 -0
- data/README.txt +139 -0
- data/bin/antlr4ruby +33 -0
- data/java/RubyTarget.java +524 -0
- data/java/antlr-full-3.2.1.jar +0 -0
- data/lib/antlr3.rb +176 -0
- data/lib/antlr3/constants.rb +88 -0
- data/lib/antlr3/debug.rb +701 -0
- data/lib/antlr3/debug/event-hub.rb +210 -0
- data/lib/antlr3/debug/record-event-listener.rb +25 -0
- data/lib/antlr3/debug/rule-tracer.rb +55 -0
- data/lib/antlr3/debug/socket.rb +360 -0
- data/lib/antlr3/debug/trace-event-listener.rb +92 -0
- data/lib/antlr3/dfa.rb +247 -0
- data/lib/antlr3/dot.rb +174 -0
- data/lib/antlr3/error.rb +657 -0
- data/lib/antlr3/main.rb +561 -0
- data/lib/antlr3/modes/ast-builder.rb +41 -0
- data/lib/antlr3/modes/filter.rb +56 -0
- data/lib/antlr3/profile.rb +322 -0
- data/lib/antlr3/recognizers.rb +1280 -0
- data/lib/antlr3/streams.rb +985 -0
- data/lib/antlr3/streams/interactive.rb +91 -0
- data/lib/antlr3/streams/rewrite.rb +412 -0
- data/lib/antlr3/test/call-stack.rb +57 -0
- data/lib/antlr3/test/config.rb +23 -0
- data/lib/antlr3/test/core-extensions.rb +269 -0
- data/lib/antlr3/test/diff.rb +165 -0
- data/lib/antlr3/test/functional.rb +207 -0
- data/lib/antlr3/test/grammar.rb +371 -0
- data/lib/antlr3/token.rb +592 -0
- data/lib/antlr3/tree.rb +1415 -0
- data/lib/antlr3/tree/debug.rb +163 -0
- data/lib/antlr3/tree/visitor.rb +84 -0
- data/lib/antlr3/tree/wizard.rb +481 -0
- data/lib/antlr3/util.rb +149 -0
- data/lib/antlr3/version.rb +27 -0
- data/samples/ANTLRv3Grammar.g +621 -0
- data/samples/Cpp.g +749 -0
- data/templates/AST.stg +335 -0
- data/templates/ASTDbg.stg +40 -0
- data/templates/ASTParser.stg +153 -0
- data/templates/ASTTreeParser.stg +272 -0
- data/templates/Dbg.stg +192 -0
- data/templates/Ruby.stg +1514 -0
- data/test/functional/ast-output/auto-ast.rb +797 -0
- data/test/functional/ast-output/construction.rb +555 -0
- data/test/functional/ast-output/hetero-nodes.rb +753 -0
- data/test/functional/ast-output/rewrites.rb +1327 -0
- data/test/functional/ast-output/tree-rewrite.rb +1662 -0
- data/test/functional/debugging/debug-mode.rb +689 -0
- data/test/functional/debugging/profile-mode.rb +165 -0
- data/test/functional/debugging/rule-tracing.rb +74 -0
- data/test/functional/delegation/import.rb +379 -0
- data/test/functional/lexer/basic.rb +559 -0
- data/test/functional/lexer/filter-mode.rb +245 -0
- data/test/functional/lexer/nuances.rb +47 -0
- data/test/functional/lexer/properties.rb +104 -0
- data/test/functional/lexer/syn-pred.rb +32 -0
- data/test/functional/lexer/xml.rb +206 -0
- data/test/functional/main/main-scripts.rb +245 -0
- data/test/functional/parser/actions.rb +224 -0
- data/test/functional/parser/backtracking.rb +244 -0
- data/test/functional/parser/basic.rb +282 -0
- data/test/functional/parser/calc.rb +98 -0
- data/test/functional/parser/ll-star.rb +143 -0
- data/test/functional/parser/nuances.rb +165 -0
- data/test/functional/parser/predicates.rb +103 -0
- data/test/functional/parser/properties.rb +242 -0
- data/test/functional/parser/rule-methods.rb +132 -0
- data/test/functional/parser/scopes.rb +274 -0
- data/test/functional/token-rewrite/basic.rb +318 -0
- data/test/functional/token-rewrite/via-parser.rb +100 -0
- data/test/functional/tree-parser/basic.rb +750 -0
- data/test/unit/sample-input/file-stream-1 +2 -0
- data/test/unit/sample-input/teststreams.input2 +2 -0
- data/test/unit/test-dfa.rb +52 -0
- data/test/unit/test-exceptions.rb +44 -0
- data/test/unit/test-recognizers.rb +55 -0
- data/test/unit/test-scheme.rb +62 -0
- data/test/unit/test-streams.rb +459 -0
- data/test/unit/test-tree-wizard.rb +535 -0
- data/test/unit/test-trees.rb +854 -0
- metadata +205 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
require 'antlr3'
|
4
|
+
require 'antlr3/tree'
|
5
|
+
|
6
|
+
module ANTLR3
|
7
|
+
module ASTBuilder
|
8
|
+
extend ClassMacros
|
9
|
+
|
10
|
+
def self.included(klass)
|
11
|
+
def klass.return_scope_members
|
12
|
+
super.push(:tree)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize( input, options = {} )
|
17
|
+
@adaptor = options[:adaptor] ||= begin
|
18
|
+
(input.adaptor rescue nil) or
|
19
|
+
AST::CommonTreeAdaptor.new( token_class )
|
20
|
+
end
|
21
|
+
super( input, options )
|
22
|
+
end
|
23
|
+
|
24
|
+
shared_attribute( :adaptor )
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def subtree_stream(desc, element = nil)
|
29
|
+
AST::RewriteRuleSubtreeStream.new(@adaptor, desc, element)
|
30
|
+
end
|
31
|
+
|
32
|
+
def token_stream(desc, element = nil)
|
33
|
+
AST::RewriteRuleTokenStream.new(@adaptor, desc, element)
|
34
|
+
end
|
35
|
+
|
36
|
+
def node_stream(desc, element = nil)
|
37
|
+
AST::RewriteRuleNodeStream.new(@adaptor, desc, element)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'antlr3'
|
5
|
+
|
6
|
+
module ANTLR3
|
7
|
+
=begin rdoc ANTLR3::FilterMode
|
8
|
+
|
9
|
+
If a lexer grammar specifies the <tt>filter = true</t> option, the generated
|
10
|
+
Lexer code will include this module. It modifies the standard
|
11
|
+
<tt>next_token</tt> to catch RecognitionErrors and skip ahead in the input until
|
12
|
+
the token! method can match a token without raising a RecognitionError.
|
13
|
+
|
14
|
+
See http://www.antlr.org/wiki/display/ANTLR3/Lexical+filters for more info on
|
15
|
+
lexer filter mode.
|
16
|
+
|
17
|
+
=end
|
18
|
+
module FilterMode
|
19
|
+
def next_token
|
20
|
+
@input.peek == ANTLR3::EOF and return ANTLR3::EOF_TOKEN
|
21
|
+
@state.token = nil
|
22
|
+
@state.channel = ANTLR3::DEFAULT_CHANNEL
|
23
|
+
@state.token_start_position = @input.index
|
24
|
+
@state.token_start_column = @input.column
|
25
|
+
@state.token_start_line = @input.line
|
26
|
+
@state.text = nil
|
27
|
+
@state.backtracking = 1
|
28
|
+
m = @input.mark
|
29
|
+
# means we won't throw slow exception
|
30
|
+
token!
|
31
|
+
@input.release(m)
|
32
|
+
emit
|
33
|
+
return @state.token
|
34
|
+
rescue ANTLR3::BacktrackingFailed
|
35
|
+
# token! backtracks with synpred at backtracking==2
|
36
|
+
# and we set the synpredgate to allow actions at level 1.
|
37
|
+
@input.rewind(m)
|
38
|
+
@input.consume # advance one char and try again
|
39
|
+
retry
|
40
|
+
rescue ANTLR3::Error::RecognitionError => re
|
41
|
+
# shouldn't happen in backtracking mode, but...
|
42
|
+
report_error(re)
|
43
|
+
recover(re)
|
44
|
+
ensure
|
45
|
+
@state.backtracking = 0
|
46
|
+
end
|
47
|
+
|
48
|
+
def memoize(rule, start_index, success)
|
49
|
+
super(rule, start_index, success) if @state.backtracking > 1
|
50
|
+
end
|
51
|
+
|
52
|
+
def already_parsed_rule?(rule)
|
53
|
+
@state.backtracking > 1 ? super(rule) : false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,322 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
module ANTLR3
|
5
|
+
module Profile
|
6
|
+
=begin rdoc ANTLR3::Profile::ParserEvents
|
7
|
+
|
8
|
+
ANTLR3::Profile::ParserEvents expands basic debugging events for use by
|
9
|
+
recognition code generated by ANTLR when called with the <tt>-profile</tt>
|
10
|
+
switch.
|
11
|
+
|
12
|
+
=end
|
13
|
+
module ParserEvents
|
14
|
+
include ANTLR3::Debug::ParserEvents
|
15
|
+
|
16
|
+
def initialize(stream, options = {})
|
17
|
+
options[:debug_listener] ||= Profiler.new( self )
|
18
|
+
super( stream, options )
|
19
|
+
end
|
20
|
+
|
21
|
+
def already_parsed_rule?(rule)
|
22
|
+
@debug_listener.examine_rule_memoization(rule)
|
23
|
+
super
|
24
|
+
end
|
25
|
+
|
26
|
+
def profile
|
27
|
+
@debug_listener.profile
|
28
|
+
end
|
29
|
+
|
30
|
+
def memoize(rule, start_index, success)
|
31
|
+
@debug_listener.memoize(rule, rule_start_index, sucess)
|
32
|
+
super
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
class DataSet < ::Array
|
37
|
+
include ::Math
|
38
|
+
def total
|
39
|
+
inject(:+)
|
40
|
+
end
|
41
|
+
def average
|
42
|
+
length > 0 ? (total.to_f / length) : 0
|
43
|
+
end
|
44
|
+
def variance
|
45
|
+
length.zero? and return(0.0)
|
46
|
+
mean = average
|
47
|
+
inject(0.0) { |t, i| t + (i - mean)**2 } / (length - 1)
|
48
|
+
end
|
49
|
+
def standard_deviation
|
50
|
+
sqrt(variance)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
unless const_defined?(:Profile)
|
59
|
+
Profile = Struct.new(
|
60
|
+
:grammar_file, :parser_class, :top_rule,
|
61
|
+
:rule_invocations, :guessing_rule_invocations, :rule_invocation_depth,
|
62
|
+
:fixed_looks, :cyclic_looks, :syntactic_predicate_looks,
|
63
|
+
:memoization_cache_entries, :memoization_cache_hits,
|
64
|
+
:memoization_cache_misses, :tokens, :hidden_tokens,
|
65
|
+
:characters_matched, :hidden_characters_matched, :semantic_predicates,
|
66
|
+
:syntactic_predicates, :reported_errors
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
class Profile
|
71
|
+
def initialize
|
72
|
+
init_values = Array.new(self.class.members.length, 0)
|
73
|
+
super(*init_values)
|
74
|
+
self.top_rule = self.parser_class = self.grammar_file = nil
|
75
|
+
self.fixed_looks = DataSet.new
|
76
|
+
self.cyclic_looks = DataSet.new
|
77
|
+
self.syntactic_predicate_looks = DataSet.new
|
78
|
+
end
|
79
|
+
|
80
|
+
def fixed_decisions
|
81
|
+
fixed_looks.length
|
82
|
+
end
|
83
|
+
|
84
|
+
def cyclic_decisions
|
85
|
+
cyclic_looks.length
|
86
|
+
end
|
87
|
+
|
88
|
+
def backtracking_decisions
|
89
|
+
syntactic_predicate_looks.length
|
90
|
+
end
|
91
|
+
|
92
|
+
def generate_report
|
93
|
+
report = '+' << '-' * 78 << "+\n"
|
94
|
+
report << '| ' << "ANTLR Rule Profile".center(76) << " |\n"
|
95
|
+
report << '+' << '-' * 78 << "+\n"
|
96
|
+
report << "| Generated at #{Time.now}".ljust(78) << " |\n"
|
97
|
+
report << "| Profiled #{parser_class.name}##{top_rule}".ljust(78) << " |\n"
|
98
|
+
report << "| Rule source generated from grammar file #{grammar_file}".ljust(78) << " |\n"
|
99
|
+
report << '+' << '-' * 78 << "+\n"
|
100
|
+
|
101
|
+
report << '| ' << "Rule Invocations".center(76) << " |\n"
|
102
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
103
|
+
report << "| %-66s | %7i |\n" % ["Total Invocations", rule_invocations]
|
104
|
+
report << "| %-66s | %7i |\n" % ["``Guessing'' Invocations", guessing_rule_invocations]
|
105
|
+
report << "| %-66s | %7i |\n" % ["Deepest Level of Invocation", rule_invocation_depth]
|
106
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
107
|
+
|
108
|
+
report << '| ' << "Execution Events".center(76) << " |\n"
|
109
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
110
|
+
report << "| %-66s | %7i |\n" % ["Semantic Predicates Evaluated", semantic_predicates]
|
111
|
+
report << "| %-66s | %7i |\n" % ["Syntactic Predicates Evaluated", syntactic_predicates]
|
112
|
+
report << "| %-66s | %7i |\n" % ["Errors Reported", reported_errors]
|
113
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
114
|
+
|
115
|
+
report << '| ' << "Token and Character Data".center(76) << " |\n"
|
116
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
117
|
+
report << "| %-66s | %7i |\n" % ["Tokens Consumed", tokens]
|
118
|
+
report << "| %-66s | %7i |\n" % ["Hidden Tokens Consumed", hidden_tokens]
|
119
|
+
report << "| %-66s | %7i |\n" % ["Characters Matched", characters_matched]
|
120
|
+
report << "| %-66s | %7i |\n" % ["Hidden Characters Matched", hidden_characters_matched]
|
121
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
122
|
+
|
123
|
+
report << '| ' << "Memoization".center(76) << " |\n"
|
124
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
125
|
+
report << "| %-66s | %7i |\n" % ["Cache Entries", memoization_cache_entries]
|
126
|
+
report << "| %-66s | %7i |\n" % ["Cache Hits", memoization_cache_hits]
|
127
|
+
report << "| %-66s | %7i |\n" % ["Cache Misses", memoization_cache_misses]
|
128
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
129
|
+
|
130
|
+
[
|
131
|
+
['Fixed Lookahead (k)', fixed_looks],
|
132
|
+
['Arbitrary Lookahead (k)', cyclic_looks],
|
133
|
+
['Backtracking (Syntactic Predicate)', syntactic_predicate_looks]
|
134
|
+
].each do |name, set|
|
135
|
+
mean, stdev = '%4.2f' % set.average, '%4.2f' % set.standard_deviation
|
136
|
+
report << '| ' << "#{name} Decisions".center(76) << " |\n"
|
137
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
138
|
+
report << "| %-66s | %7i |\n" % ["Count", set.length]
|
139
|
+
report << "| %-66s | %7i |\n" % ["Minimum k", set.min]
|
140
|
+
report << "| %-66s | %7i |\n" % ["Maximum k", set.max]
|
141
|
+
report << "| %-66s | %7s |\n" % ["Average k", mean]
|
142
|
+
report << "| %-66s | %7s |\n" % ["Standard Deviation of k", stdev]
|
143
|
+
report << '+' << '-' * 68 << '+' << '-' * 9 << "+\n"
|
144
|
+
end
|
145
|
+
return(report)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
=begin rdoc ANTLR3::Profile::Profiler
|
150
|
+
|
151
|
+
When ANTLR is run with the <tt>-profile</tt> switch, it generates recognition
|
152
|
+
code that performs accounting about the decision logic performed while parsing
|
153
|
+
any given input. This information can be used to help refactor a slow grammar.
|
154
|
+
Profiler is an event-listener that performs all of the profiling accounting and
|
155
|
+
builds a simple report to present the various statistics.
|
156
|
+
|
157
|
+
=end
|
158
|
+
class Profiler
|
159
|
+
include ANTLR3::Debug::EventListener
|
160
|
+
|
161
|
+
PROTOCOL_VERSION = 2
|
162
|
+
|
163
|
+
attr_accessor :parser
|
164
|
+
attr_reader :rule_level
|
165
|
+
attr_reader :decision_level
|
166
|
+
|
167
|
+
# tracks the maximum look value for the current decision
|
168
|
+
# (maxLookaheadInCurrentDecision in java Profiler)
|
169
|
+
attr_reader :decision_look
|
170
|
+
|
171
|
+
# the last token consumed
|
172
|
+
# (lastTokenConsumed in java Profiler)
|
173
|
+
attr_reader :last_token
|
174
|
+
attr_reader :look_stack
|
175
|
+
attr_reader :profile
|
176
|
+
|
177
|
+
attr_accessor :output
|
178
|
+
|
179
|
+
def initialize(parser = nil, output = nil)
|
180
|
+
@parser = parser
|
181
|
+
@profile = nil
|
182
|
+
@rule_level = 0
|
183
|
+
@decision_level = 0
|
184
|
+
@decision_look = 0
|
185
|
+
@last_token = nil
|
186
|
+
@look_stack = []
|
187
|
+
@output = output
|
188
|
+
end
|
189
|
+
|
190
|
+
def commence
|
191
|
+
@profile = Profile.new
|
192
|
+
@rule_level = 0
|
193
|
+
@decision_level = 0
|
194
|
+
@decision_look = 0
|
195
|
+
@last_token = nil
|
196
|
+
@look_stack = []
|
197
|
+
end
|
198
|
+
|
199
|
+
def enter_rule(grammar_file_name, rule_name)
|
200
|
+
if @rule_level.zero?
|
201
|
+
commence
|
202
|
+
@profile.grammar_file = grammar_file_name
|
203
|
+
@profile.parser_class = @parser.class
|
204
|
+
@profile.top_rule = rule_name
|
205
|
+
end
|
206
|
+
@rule_level += 1
|
207
|
+
@profile.rule_invocations += 1
|
208
|
+
@profile.rule_invocation_depth < @rule_level and
|
209
|
+
@profile.rule_invocation_depth = @rule_level
|
210
|
+
end
|
211
|
+
|
212
|
+
def exit_rule(grammar_file_name, rule_name)
|
213
|
+
@rule_level -= 1
|
214
|
+
end
|
215
|
+
|
216
|
+
def examine_rule_memoization(rule)
|
217
|
+
stop_index = parser.rule_memoization(rule, @parser.input.index)
|
218
|
+
if stop_index == BaseRecognizer::MEMO_RULE_UNKNOWN
|
219
|
+
@profile.memoization_cache_misses += 1
|
220
|
+
@profile.guessing_rule_invocations += 1
|
221
|
+
else
|
222
|
+
@profile.memoization_cache_hits += 1
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def memoize(rule, start_index, success)
|
227
|
+
@profile.memoization_cache_entries += 1
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
def enter_decision(decision_number)
|
232
|
+
@decision_level += 1
|
233
|
+
starting_look_index = @parser.token_stream.index
|
234
|
+
@look_stack << starting_look_index
|
235
|
+
end
|
236
|
+
|
237
|
+
def exit_decision(decision_number)
|
238
|
+
@look_stack.pop
|
239
|
+
@decision_level -= 1
|
240
|
+
if @parser.cyclic_decision? then
|
241
|
+
@profile.cyclic_looks << @decision_look
|
242
|
+
else @profile.fixed_looks << @decision_look
|
243
|
+
end
|
244
|
+
|
245
|
+
@parser.cyclic_decision = false
|
246
|
+
@decision_look = 0
|
247
|
+
end
|
248
|
+
|
249
|
+
def consume_token(token)
|
250
|
+
@last_token = token
|
251
|
+
end
|
252
|
+
|
253
|
+
def in_decision?
|
254
|
+
return(@decision_level > 0)
|
255
|
+
end
|
256
|
+
|
257
|
+
def consume_hidden_token(token)
|
258
|
+
@last_token = token
|
259
|
+
end
|
260
|
+
|
261
|
+
def look(i, token)
|
262
|
+
in_decision? or return
|
263
|
+
starting_index = look_stack.last
|
264
|
+
input = @parser.token_stream
|
265
|
+
this_ref_index = input.index
|
266
|
+
num_hidden = input.tokens(starting_index, this_ref_index).count { |t| t.hidden? }
|
267
|
+
depth = i + this_ref_index - starting_index - num_hidden
|
268
|
+
if depth > @decision_look
|
269
|
+
@decision_look = depth
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
def end_backtrack(level, successful)
|
274
|
+
@profile.syntactic_predicate_looks << @decision_look
|
275
|
+
end
|
276
|
+
|
277
|
+
def recognition_exception(error)
|
278
|
+
@profile.reported_errors += 1
|
279
|
+
end
|
280
|
+
|
281
|
+
def semantic_predicate(result, predicate)
|
282
|
+
in_decision? and @profile.semantic_predicates += 1
|
283
|
+
end
|
284
|
+
|
285
|
+
def terminate
|
286
|
+
input = @parser.token_stream
|
287
|
+
hidden_tokens = input.select { |token| token.hidden? }
|
288
|
+
@profile.hidden_tokens = hidden_tokens.length
|
289
|
+
@profile.tokens = input.tokens.length
|
290
|
+
@profile.hidden_characters_matched = hidden_tokens.inject(0) do |count, token|
|
291
|
+
count + token.text.length rescue count
|
292
|
+
end
|
293
|
+
@profile.characters_matched = (@last_token || input.tokens.last).stop + 1
|
294
|
+
write_report
|
295
|
+
end
|
296
|
+
|
297
|
+
|
298
|
+
def write_report
|
299
|
+
@output << @profile.generate_report unless @output.nil?
|
300
|
+
rescue NoMethodError => error
|
301
|
+
if error.name.to_s == '<<'
|
302
|
+
warn(<<-END.strip! % [__FILE__, __LINE__, @output])
|
303
|
+
[%s @ %s]: failed to write report to %p as it does not respond to :<<
|
304
|
+
END
|
305
|
+
else raise
|
306
|
+
end
|
307
|
+
rescue IOError => error
|
308
|
+
$stderr.puts( Util.tidy(<<-END) % [__FILE__, __LINE__, @output, error.class, error.message])
|
309
|
+
| [%s @ %s]: failed to write profile report to %p due to an IO Error:
|
310
|
+
| %s: %s
|
311
|
+
END
|
312
|
+
$stderr.puts(error.backtrace.map { |call| " - #{call}" }.join("\n"))
|
313
|
+
end
|
314
|
+
|
315
|
+
def report
|
316
|
+
@profile.generate_report
|
317
|
+
end
|
318
|
+
|
319
|
+
alias to_s report
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
@@ -0,0 +1,1280 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
=begin LICENSE
|
5
|
+
|
6
|
+
[The "BSD licence"]
|
7
|
+
Copyright (c) 2009 Kyle Yetter
|
8
|
+
All rights reserved.
|
9
|
+
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
11
|
+
modification, are permitted provided that the following conditions
|
12
|
+
are met:
|
13
|
+
|
14
|
+
1. Redistributions of source code must retain the above copyright
|
15
|
+
notice, this list of conditions and the following disclaimer.
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright
|
17
|
+
notice, this list of conditions and the following disclaimer in the
|
18
|
+
documentation and/or other materials provided with the distribution.
|
19
|
+
3. The name of the author may not be used to endorse or promote products
|
20
|
+
derived from this software without specific prior written permission.
|
21
|
+
|
22
|
+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
23
|
+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
24
|
+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
25
|
+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
26
|
+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
27
|
+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
28
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
29
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
30
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
31
|
+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
32
|
+
|
33
|
+
=end
|
34
|
+
|
35
|
+
module ANTLR3
|
36
|
+
unless const_defined?(:RecognizerSharedState)
|
37
|
+
|
38
|
+
RecognizerSharedState = Struct.new(
|
39
|
+
:following,
|
40
|
+
:error_recovery,
|
41
|
+
:last_error_index,
|
42
|
+
:backtracking,
|
43
|
+
:rule_memory,
|
44
|
+
:syntax_errors,
|
45
|
+
:token,
|
46
|
+
:token_start_position,
|
47
|
+
:token_start_line,
|
48
|
+
:token_start_column,
|
49
|
+
:channel,
|
50
|
+
:type,
|
51
|
+
:text
|
52
|
+
)
|
53
|
+
|
54
|
+
=begin rdoc ANTLR3::RecognizerSharedState
|
55
|
+
|
56
|
+
A big Struct-based class containing most of the data that makes up a
|
57
|
+
recognizer's state. These attributes are externalized from the recognizer itself
|
58
|
+
so that recognizer delegation (which occurs when you import other grammars into
|
59
|
+
your grammar) can function; multiple recognizers can share a common state.
|
60
|
+
|
61
|
+
== Structure Attributes
|
62
|
+
|
63
|
+
following::
|
64
|
+
a stack that tracks follow sets for error recovery
|
65
|
+
error_recovery::
|
66
|
+
a flag indicating whether or not the recognizer is in error recovery mode
|
67
|
+
last_error_index::
|
68
|
+
the index in the input stream of the last error
|
69
|
+
backtracking::
|
70
|
+
tracks the backtracking depth
|
71
|
+
rule_memory::
|
72
|
+
if a grammar is compiled with the memoization option, this will be
|
73
|
+
set to a hash mapping previously parsed rules to cached indices
|
74
|
+
syntax_errors::
|
75
|
+
tracks the number of syntax errors seen so far
|
76
|
+
token::
|
77
|
+
holds newly constructed tokens for lexer rules
|
78
|
+
token_start_position::
|
79
|
+
the input stream index at which the token starts
|
80
|
+
token_start_line::
|
81
|
+
the input stream line number at which the token starts
|
82
|
+
token_start_column::
|
83
|
+
the input stream column at which the token starts
|
84
|
+
channel::
|
85
|
+
the channel value of the target token
|
86
|
+
type::
|
87
|
+
the type value of the target token
|
88
|
+
text::
|
89
|
+
the text of the target token
|
90
|
+
|
91
|
+
=end
|
92
|
+
class RecognizerSharedState
|
93
|
+
def initialize
|
94
|
+
super([], false, -1, 0, nil, 0, nil, -1)
|
95
|
+
# ^-- same as this --v
|
96
|
+
# self.following = []
|
97
|
+
# self.error_recovery = false
|
98
|
+
# self.last_error_index = -1
|
99
|
+
# self.backtracking = 0
|
100
|
+
# self.syntax_errors = 0
|
101
|
+
# self.token_start_position = -1
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
# restores all of the state variables to their respective
|
106
|
+
# initial default values
|
107
|
+
def reset!
|
108
|
+
self.following.clear
|
109
|
+
self.error_recovery = false
|
110
|
+
self.last_error_index = -1
|
111
|
+
self.backtracking = 0
|
112
|
+
self.rule_memory and rule_memory.clear
|
113
|
+
self.syntax_errors = 0
|
114
|
+
self.token = nil
|
115
|
+
self.token_start_position = -1
|
116
|
+
self.token_start_line = nil
|
117
|
+
self.token_start_column = nil
|
118
|
+
self.channel = nil
|
119
|
+
self.type = nil
|
120
|
+
self.text = nil
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
|
127
|
+
=begin rdoc ANTLR3::BaseRecognizer
|
128
|
+
|
129
|
+
= BaseRecognizer
|
130
|
+
|
131
|
+
As the base class of all ANTLR-generated recognizers, BaseRecognizer provides
|
132
|
+
much of the shared functionality and structure used in the recognition process.
|
133
|
+
For all effective purposes, the class and its immediate subclasses Lexer,
|
134
|
+
Parser, and TreeParser are abstract classes. They can be instantiated, but
|
135
|
+
they're pretty useless on their own. Instead, to make useful code, you write an
|
136
|
+
ANTLR grammar and ANTLR will generate classes which inherit from one of the
|
137
|
+
recognizer base classes, providing the implementation of the grammar rules
|
138
|
+
itself. this group of classes to implement necessary tasks. BaseRecognizer
|
139
|
+
defines methods related to:
|
140
|
+
|
141
|
+
* token and character matching
|
142
|
+
* prediction and recognition strategy
|
143
|
+
* recovering from errors
|
144
|
+
* reporting errors
|
145
|
+
* memoization
|
146
|
+
* simple rule tracing and debugging
|
147
|
+
|
148
|
+
=end
|
149
|
+
class BaseRecognizer
|
150
|
+
include Constants
|
151
|
+
include Error
|
152
|
+
include TokenFactory
|
153
|
+
extend ClassMacros
|
154
|
+
|
155
|
+
MEMO_RULE_FAILED = -2
|
156
|
+
MEMO_RULE_UNKNOWN = -1
|
157
|
+
DEFAULT_TOKEN_CHANNEL = DEFAULT_CHANNEL
|
158
|
+
HIDDEN = HIDDEN_CHANNEL
|
159
|
+
|
160
|
+
@rules = {}
|
161
|
+
|
162
|
+
# inherited class methods and hooks
|
163
|
+
class << self
|
164
|
+
|
165
|
+
attr_reader :grammar_file_name,
|
166
|
+
:antlr_version,
|
167
|
+
:antlr_version_string,
|
168
|
+
:grammar_home
|
169
|
+
|
170
|
+
attr_accessor :token_scheme, :default_rule
|
171
|
+
|
172
|
+
# generated recognizer code uses this method to stamp
|
173
|
+
# the code with the name of the grammar file and
|
174
|
+
# the current version of ANTLR being used to generate
|
175
|
+
# the code
|
176
|
+
def generated_using(grammar_file, version_string)
|
177
|
+
@grammar_file_name = grammar_file.freeze
|
178
|
+
@antlr_version_string = version_string.freeze
|
179
|
+
if @antlr_version_string =~ /^(\d+)\.(\d+)(?:\.(\d+)(?:b(\d+))?)?(.*)$/
|
180
|
+
@antlr_version = [$1, $2, $3, $4].map! { |str| str.to_i }
|
181
|
+
timestamp = $5.strip
|
182
|
+
#@antlr_release_time = $5.empty? ? nil : Time.parse($5)
|
183
|
+
else
|
184
|
+
raise "bad version string: %p" % version_string
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# this method is used to generate return-value structures for
|
189
|
+
# rules with multiple return values. To avoid generating
|
190
|
+
# a special class for ever rule in AST parsers and such
|
191
|
+
# (where most rules have the same default set of return values),
|
192
|
+
# each recognizer gets a default return value structure
|
193
|
+
# assigned to the constant +Return+. Rules which don't
|
194
|
+
# require additional custom members will have a rule-return
|
195
|
+
# name constant that just points to the generic return
|
196
|
+
# value.
|
197
|
+
def define_return_scope(*members)
|
198
|
+
if members.empty? then generic_return_scope
|
199
|
+
else
|
200
|
+
members += return_scope_members
|
201
|
+
Struct.new(*members)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# used as a hook to add additional default members
|
206
|
+
# to default return value structures
|
207
|
+
# For example, all AST-building parsers override
|
208
|
+
# this method to add an extra +:tree+ field to
|
209
|
+
# all rule return structures.
|
210
|
+
def return_scope_members
|
211
|
+
[:start, :stop]
|
212
|
+
end
|
213
|
+
|
214
|
+
# sets up and returns the generic rule return
|
215
|
+
# scope for a recognizer
|
216
|
+
def generic_return_scope
|
217
|
+
@generic_return_scope ||= begin
|
218
|
+
struct = Struct.new(*return_scope_members)
|
219
|
+
const_set(:Return, struct)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def imported_grammars
|
224
|
+
@imported_grammars ||= Set.new
|
225
|
+
end
|
226
|
+
|
227
|
+
def master_grammars
|
228
|
+
@master_grammars ||= []
|
229
|
+
end
|
230
|
+
|
231
|
+
def master
|
232
|
+
master_grammars.last
|
233
|
+
end
|
234
|
+
|
235
|
+
def masters( *grammar_names )
|
236
|
+
for grammar in grammar_names
|
237
|
+
unless master_grammars.include?( grammar )
|
238
|
+
master_grammars << grammar
|
239
|
+
attr_reader( Util.snake_case( grammar ) )
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
private :masters
|
244
|
+
|
245
|
+
def imports( *grammar_names )
|
246
|
+
for grammar in grammar_names
|
247
|
+
imported_grammars.add?(grammar.to_sym) and
|
248
|
+
attr_reader( Util.snake_case( grammar ) )
|
249
|
+
end
|
250
|
+
return imported_grammars
|
251
|
+
end
|
252
|
+
private :imports
|
253
|
+
|
254
|
+
def rules
|
255
|
+
self::RULE_METHODS.dup rescue []
|
256
|
+
end
|
257
|
+
|
258
|
+
def default_rule
|
259
|
+
@default_rule ||= rules.first
|
260
|
+
end
|
261
|
+
|
262
|
+
def debug?
|
263
|
+
return false
|
264
|
+
end
|
265
|
+
|
266
|
+
def token_class
|
267
|
+
@token_class ||= begin
|
268
|
+
self::Token rescue
|
269
|
+
superclass.token_class rescue
|
270
|
+
ANTLR3::CommonToken
|
271
|
+
end
|
272
|
+
end
|
273
|
+
private :generated_using
|
274
|
+
end
|
275
|
+
|
276
|
+
@grammar_file_name = nil
|
277
|
+
@antlr_version = ANTLR3::ANTLR_VERSION
|
278
|
+
@antlr_version_string = ANTLR3::ANTLR_VERSION_STRING
|
279
|
+
|
280
|
+
def grammar_file_name
|
281
|
+
self.class.grammar_file_name
|
282
|
+
end
|
283
|
+
|
284
|
+
def antlr_version
|
285
|
+
self.class.antlr_version
|
286
|
+
end
|
287
|
+
|
288
|
+
def antlr_version_string
|
289
|
+
self.class.antlr_version_string
|
290
|
+
end
|
291
|
+
|
292
|
+
attr_accessor :input
|
293
|
+
attr_reader :state
|
294
|
+
|
295
|
+
def each_delegate
|
296
|
+
block_given? or return enum_for( __method__ )
|
297
|
+
for grammar in self.class.imported_grammars
|
298
|
+
del = __send__( Util.snake_case( grammar ) ) and
|
299
|
+
yield( del )
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
# Create a new recognizer. The constructor simply ensures that
|
304
|
+
# all recognizers are initialized with a shared state object.
|
305
|
+
# See the main recognizer subclasses for more specific
|
306
|
+
# information about creating recognizer objects like
|
307
|
+
# lexers and parsers.
|
308
|
+
def initialize(options = {})
|
309
|
+
@state = options[:state] || RecognizerSharedState.new
|
310
|
+
@error_output = options.fetch(:error_output, $stderr)
|
311
|
+
defined?(@input) or @input = nil
|
312
|
+
initialize_dfas
|
313
|
+
end
|
314
|
+
|
315
|
+
# Resets the recognizer's state data to initial values.
|
316
|
+
# As a result, all error tracking and error recovery
|
317
|
+
# data accumulated in the current state will be cleared.
|
318
|
+
# It will also attempt to reset the input stream
|
319
|
+
# via input.reset, but it ignores any errors received
|
320
|
+
# from doing so. Thus the input stream is not guarenteed
|
321
|
+
# to be rewound to its initial position
|
322
|
+
def reset
|
323
|
+
@state and @state.reset!
|
324
|
+
@input and @input.reset rescue nil
|
325
|
+
end
|
326
|
+
|
327
|
+
# Attempt to match the current input symbol the token type
|
328
|
+
# specified by +type+. If the symbol matches the type,
|
329
|
+
# consume the current symbol and return its value. If
|
330
|
+
# the symbol doesn't match, attempt to use the follow-set
|
331
|
+
# data provided by +follow+ to recover from the mismatched
|
332
|
+
# token.
|
333
|
+
def match(type, follow)
|
334
|
+
matched_symbol = current_input_symbol
|
335
|
+
if @input.peek == type
|
336
|
+
@input.consume
|
337
|
+
@state.error_recovery = false
|
338
|
+
return matched_symbol
|
339
|
+
end
|
340
|
+
raise(BacktrackingFailed) if @state.backtracking > 0
|
341
|
+
matched_symbol = recover_from_mismatched_token(type, follow)
|
342
|
+
return matched_symbol
|
343
|
+
end
|
344
|
+
|
345
|
+
# match anything -- i.e. wildcard match. Simply consume
|
346
|
+
# the current symbol from the input stream.
|
347
|
+
def match_any
|
348
|
+
@state.error_recovery = false
|
349
|
+
@input.consume
|
350
|
+
end
|
351
|
+
|
352
|
+
##############################################################################################
|
353
|
+
###################################### Error Reporting #######################################
|
354
|
+
##############################################################################################
|
355
|
+
##############################################################################################
|
356
|
+
|
357
|
+
# When a recognition error occurs, this method is the main
|
358
|
+
# hook for carrying out the error reporting process. The
|
359
|
+
# default implementation calls +display_recognition_error+
|
360
|
+
# to display the error info on $stderr.
|
361
|
+
def report_error(e = $!)
|
362
|
+
@state.error_recovery and return
|
363
|
+
@state.error_recovery = true
|
364
|
+
display_recognition_error(e)
|
365
|
+
end
|
366
|
+
|
367
|
+
# error reporting hook for presenting the information
|
368
|
+
# The default implementation builds appropriate error
|
369
|
+
# message text using +error_header+ and +error_message+,
|
370
|
+
# and calls +emit_error_message+ to write the error
|
371
|
+
# message out to some source
|
372
|
+
def display_recognition_error(e = $!)
|
373
|
+
header = error_header(e)
|
374
|
+
message = error_message(e)
|
375
|
+
emit_error_message("#{header} #{message}")
|
376
|
+
end
|
377
|
+
|
378
|
+
# used to construct an appropriate error message
|
379
|
+
# based on the specific type of error and the
|
380
|
+
# error's attributes
|
381
|
+
def error_message(e = $!)
|
382
|
+
case e
|
383
|
+
when Error::UnwantedToken
|
384
|
+
token_name = token_name(e.expecting)
|
385
|
+
"extraneous input #{token_error_display(e.unexpected_token)} expecting #{token_name}"
|
386
|
+
when Error::MissingToken
|
387
|
+
token_name = token_name(e.expecting)
|
388
|
+
"missing #{token_name} at #{token_error_display(e.symbol)}"
|
389
|
+
when Error::MismatchedToken
|
390
|
+
token_name = token_name(e.expecting)
|
391
|
+
"mismatched input #{token_error_display(e.symbol)} expecting #{token_name}"
|
392
|
+
when Error::MismatchedTreeNode
|
393
|
+
token_name = token_name(e.expecting)
|
394
|
+
"mismatched tree node: #{e.symbol} expecting #{token_name}"
|
395
|
+
when Error::NoViableAlternative
|
396
|
+
"no viable alternative at input " << token_error_display(e.symbol)
|
397
|
+
when Error::MismatchedSet
|
398
|
+
"mismatched input %s expecting set %s" %
|
399
|
+
[token_error_display(e.symbol), e.expecting.inspect]
|
400
|
+
when Error::MismatchedNotSet
|
401
|
+
"mismatched input %s expecting set %s" %
|
402
|
+
[token_error_display(e.symbol), e.expecting.inspect]
|
403
|
+
when Error::FailedPredicate
|
404
|
+
"rule %s failed predicate: { %s }?" % [e.rule_name, e.predicate_text]
|
405
|
+
else e.message
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
# used to add a tag to the error message that indicates
|
410
|
+
# the location of the input stream when the error
|
411
|
+
# occurred
|
412
|
+
def error_header(e = $!)
|
413
|
+
e.location
|
414
|
+
end
|
415
|
+
|
416
|
+
# formats a token object appropriately for inspection
|
417
|
+
# within an error message
|
418
|
+
def token_error_display(token)
|
419
|
+
unless text = token.text
|
420
|
+
if token.type == EOF then text = '<EOF>'
|
421
|
+
elsif name = token_name(token.type) rescue false
|
422
|
+
text = "<#{name}>"
|
423
|
+
elsif token.respond_to?(:name) then text = "<#{token.name}>"
|
424
|
+
else "<#{token.type}>"
|
425
|
+
end
|
426
|
+
end
|
427
|
+
return text.inspect
|
428
|
+
end
|
429
|
+
|
430
|
+
# Write the error report data out to some source. By default,
|
431
|
+
# the error message is written to $stderr
|
432
|
+
def emit_error_message(message)
|
433
|
+
@error_output.puts(message) if @error_output
|
434
|
+
end
|
435
|
+
|
436
|
+
##############################################################################################
|
437
|
+
###################################### Error Recovery ########################################
|
438
|
+
##############################################################################################
|
439
|
+
def recover(error = $!)
|
440
|
+
@state.last_error_index == @input.index and @input.consume
|
441
|
+
@state.last_error_index = @input.index
|
442
|
+
|
443
|
+
follow_set = compute_error_recovery_set
|
444
|
+
|
445
|
+
resync { consume_until(follow_set) }
|
446
|
+
end
|
447
|
+
|
448
|
+
def resync
|
449
|
+
begin_resync
|
450
|
+
value = yield
|
451
|
+
end_resync
|
452
|
+
return(value)
|
453
|
+
end
|
454
|
+
|
455
|
+
# overridable hook method that is executed at the start of the
|
456
|
+
# resyncing procedure in recover
|
457
|
+
#
|
458
|
+
# by default, it does nothing
|
459
|
+
def begin_resync
|
460
|
+
# do nothing
|
461
|
+
end
|
462
|
+
|
463
|
+
# overridable hook method that is after the resyncing procedure has completed
|
464
|
+
#
|
465
|
+
# by default, it does nothing
|
466
|
+
def end_resync
|
467
|
+
# do nothing
|
468
|
+
end
|
469
|
+
|
470
|
+
# (The following explanation has been lifted directly from the
|
471
|
+
# source code documentation of the ANTLR Java runtime library)
|
472
|
+
#
|
473
|
+
# Compute the error recovery set for the current rule. During
|
474
|
+
# rule invocation, the parser pushes the set of tokens that can
|
475
|
+
# follow that rule reference on the stack; this amounts to
|
476
|
+
# computing FIRST of what follows the rule reference in the
|
477
|
+
# enclosing rule. This local follow set only includes tokens
|
478
|
+
# from within the rule; i.e., the FIRST computation done by
|
479
|
+
# ANTLR stops at the end of a rule.
|
480
|
+
#
|
481
|
+
# EXAMPLE
|
482
|
+
#
|
483
|
+
# When you find a "no viable alt exception", the input is not
|
484
|
+
# consistent with any of the alternatives for rule r. The best
|
485
|
+
# thing to do is to consume tokens until you see something that
|
486
|
+
# can legally follow a call to r *or* any rule that called r.
|
487
|
+
# You don't want the exact set of viable next tokens because the
|
488
|
+
# input might just be missing a token--you might consume the
|
489
|
+
# rest of the input looking for one of the missing tokens.
|
490
|
+
#
|
491
|
+
# Consider grammar:
|
492
|
+
#
|
493
|
+
# a : '[' b ']'
|
494
|
+
# | '(' b ')'
|
495
|
+
# ;
|
496
|
+
# b : c '^' INT ;
|
497
|
+
# c : ID
|
498
|
+
# | INT
|
499
|
+
# ;
|
500
|
+
#
|
501
|
+
# At each rule invocation, the set of tokens that could follow
|
502
|
+
# that rule is pushed on a stack. Here are the various "local"
|
503
|
+
# follow sets:
|
504
|
+
#
|
505
|
+
# FOLLOW(b1_in_a) = FIRST(']') = ']'
|
506
|
+
# FOLLOW(b2_in_a) = FIRST(')') = ')'
|
507
|
+
# FOLLOW(c_in_b) = FIRST('^') = '^'
|
508
|
+
#
|
509
|
+
# Upon erroneous input "[]", the call chain is
|
510
|
+
#
|
511
|
+
# a -> b -> c
|
512
|
+
#
|
513
|
+
# and, hence, the follow context stack is:
|
514
|
+
#
|
515
|
+
# depth local follow set after call to rule
|
516
|
+
# 0 \<EOF> a (from main())
|
517
|
+
# 1 ']' b
|
518
|
+
# 3 '^' c
|
519
|
+
#
|
520
|
+
# Notice that <tt>')'</tt> is not included, because b would have to have
|
521
|
+
# been called from a different context in rule a for ')' to be
|
522
|
+
# included.
|
523
|
+
#
|
524
|
+
# For error recovery, we cannot consider FOLLOW(c)
|
525
|
+
# (context-sensitive or otherwise). We need the combined set of
|
526
|
+
# all context-sensitive FOLLOW sets--the set of all tokens that
|
527
|
+
# could follow any reference in the call chain. We need to
|
528
|
+
# resync to one of those tokens. Note that FOLLOW(c)='^' and if
|
529
|
+
# we resync'd to that token, we'd consume until EOF. We need to
|
530
|
+
# sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
|
531
|
+
# In this case, for input "[]", LA(1) is in this set so we would
|
532
|
+
# not consume anything and after printing an error rule c would
|
533
|
+
# return normally. It would not find the required '^' though.
|
534
|
+
# At this point, it gets a mismatched token error and throws an
|
535
|
+
# exception (since LA(1) is not in the viable following token
|
536
|
+
# set). The rule exception handler tries to recover, but finds
|
537
|
+
# the same recovery set and doesn't consume anything. Rule b
|
538
|
+
# exits normally returning to rule a. Now it finds the ']' (and
|
539
|
+
# with the successful match exits errorRecovery mode).
|
540
|
+
#
|
541
|
+
# So, you cna see that the parser walks up call chain looking
|
542
|
+
# for the token that was a member of the recovery set.
|
543
|
+
#
|
544
|
+
# Errors are not generated in errorRecovery mode.
|
545
|
+
#
|
546
|
+
# ANTLR's error recovery mechanism is based upon original ideas:
|
547
|
+
#
|
548
|
+
# "Algorithms + Data Structures = Programs" by Niklaus Wirth
|
549
|
+
#
|
550
|
+
# and
|
551
|
+
#
|
552
|
+
# "A note on error recovery in recursive descent parsers":
|
553
|
+
# http://portal.acm.org/citation.cfm?id=947902.947905
|
554
|
+
#
|
555
|
+
# Later, Josef Grosch had some good ideas:
|
556
|
+
#
|
557
|
+
# "Efficient and Comfortable Error Recovery in Recursive Descent
|
558
|
+
# Parsers":
|
559
|
+
# ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
|
560
|
+
#
|
561
|
+
# Like Grosch I implemented local FOLLOW sets that are combined
|
562
|
+
# at run-time upon error to avoid overhead during parsing.
|
563
|
+
def compute_error_recovery_set
|
564
|
+
combine_follows(false)
|
565
|
+
end
|
566
|
+
|
567
|
+
def recover_from_mismatched_token(type, follow)
|
568
|
+
if mismatch_is_unwanted_token?(type)
|
569
|
+
err = UnwantedToken(type)
|
570
|
+
|
571
|
+
begin_resync
|
572
|
+
@input.consume
|
573
|
+
end_resync
|
574
|
+
|
575
|
+
report_error(err)
|
576
|
+
|
577
|
+
matched_symbol = current_input_symbol
|
578
|
+
@input.consume
|
579
|
+
return matched_symbol
|
580
|
+
end
|
581
|
+
|
582
|
+
if mismatch_is_missing_token?(follow)
|
583
|
+
inserted = missing_symbol(err, type, follow)
|
584
|
+
err = MissingToken(type, inserted)
|
585
|
+
|
586
|
+
report_error(err)
|
587
|
+
return inserted
|
588
|
+
end
|
589
|
+
|
590
|
+
err = MismatchedToken(type)
|
591
|
+
raise err
|
592
|
+
end
|
593
|
+
|
594
|
+
def recover_from_mismatched_set(e, follow)
|
595
|
+
if mismatch_is_missing_token?(follow)
|
596
|
+
report_error(e)
|
597
|
+
return missing_symbol(e, INVALID_TOKEN_TYPE, follow)
|
598
|
+
end
|
599
|
+
raise e
|
600
|
+
end
|
601
|
+
|
602
|
+
# Conjure up a missing token during error recovery.
|
603
|
+
#
|
604
|
+
# The recognizer attempts to recover from single missing
|
605
|
+
# symbols. But, actions might refer to that missing symbol.
|
606
|
+
# For example, x=ID {f($x);}. The action clearly assumes
|
607
|
+
# that there has been an identifier matched previously and that
|
608
|
+
# $x points at that token. If that token is missing, but
|
609
|
+
# the next token in the stream is what we want we assume that
|
610
|
+
# this token is missing and we keep going. Because we
|
611
|
+
# have to return some token to replace the missing token,
|
612
|
+
# we have to conjure one up. This method gives the user control
|
613
|
+
# over the tokens returned for missing tokens. Mostly,
|
614
|
+
# you will want to create something special for identifier
|
615
|
+
# tokens. For literals such as '{' and ',', the default
|
616
|
+
# action in the parser or tree parser works. It simply creates
|
617
|
+
# a CommonToken of the appropriate type. The text will be the token.
|
618
|
+
# If you change what tokens must be created by the lexer,
|
619
|
+
# override this method to create the appropriate tokens.
|
620
|
+
def missing_symbol(error, expected_token_type, follow)
|
621
|
+
return nil
|
622
|
+
end
|
623
|
+
|
624
|
+
def recover_from_mismatched_element(e, follow)
|
625
|
+
follow.nil? and return false
|
626
|
+
if follow.include?(EOR_TOKEN_TYPE)
|
627
|
+
viable_tokens = compute_context_sensitive_rule_follow()
|
628
|
+
follow = (follow | viable_tokens) - Set.new([EOR_TOKEN_TYPE])
|
629
|
+
end
|
630
|
+
if follow.include?(@input.peek)
|
631
|
+
report_error(e)
|
632
|
+
return true
|
633
|
+
end
|
634
|
+
return false
|
635
|
+
end
|
636
|
+
|
637
|
+
def mismatch_is_unwanted_token?(type)
|
638
|
+
@input.peek(2) == type
|
639
|
+
end
|
640
|
+
|
641
|
+
def mismatch_is_missing_token?(follow)
|
642
|
+
follow.nil? and return false
|
643
|
+
if follow.include?(EOR_TOKEN_TYPE)
|
644
|
+
viable_tokens = compute_context_sensitive_rule_follow
|
645
|
+
follow = follow | viable_tokens
|
646
|
+
|
647
|
+
follow.delete(EOR_TOKEN_TYPE) unless @state.following.empty?
|
648
|
+
end
|
649
|
+
if follow.include?(@input.peek) or follow.include?(EOR_TOKEN_TYPE)
|
650
|
+
return true
|
651
|
+
end
|
652
|
+
return false
|
653
|
+
end
|
654
|
+
|
655
|
+
# factor out what to do upon token mismatch so
|
656
|
+
# tree parsers can behave differently.
|
657
|
+
#
|
658
|
+
# * override this method in your parser to do things
|
659
|
+
# like bailing out after the first error
|
660
|
+
# * just raise the exception instead of
|
661
|
+
# calling the recovery method.
|
662
|
+
#
|
663
|
+
def number_of_syntax_errors
|
664
|
+
@state.syntax_errors
|
665
|
+
end
|
666
|
+
|
667
|
+
# Compute the context-sensitive FOLLOW set for current rule.
|
668
|
+
# This is set of token types that can follow a specific rule
|
669
|
+
# reference given a specific call chain. You get the set of
|
670
|
+
# viable tokens that can possibly come next (look depth 1)
|
671
|
+
# given the current call chain. Contrast this with the
|
672
|
+
# definition of plain FOLLOW for rule r:
|
673
|
+
#
|
674
|
+
# FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
|
675
|
+
#
|
676
|
+
# where x in T* and alpha, beta in V*; T is set of terminals and
|
677
|
+
# V is the set of terminals and nonterminals. In other words,
|
678
|
+
# FOLLOW(r) is the set of all tokens that can possibly follow
|
679
|
+
# references to r in *any* sentential form (context). At
|
680
|
+
# runtime, however, we know precisely which context applies as
|
681
|
+
# we have the call chain. We may compute the exact (rather
|
682
|
+
# than covering superset) set of following tokens.
|
683
|
+
#
|
684
|
+
# For example, consider grammar:
|
685
|
+
#
|
686
|
+
# stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
|
687
|
+
# | "return" expr '.'
|
688
|
+
# ;
|
689
|
+
# expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
|
690
|
+
# atom : INT // FOLLOW(atom)=={'+',')',';','.'}
|
691
|
+
# | '(' expr ')'
|
692
|
+
# ;
|
693
|
+
#
|
694
|
+
# The FOLLOW sets are all inclusive whereas context-sensitive
|
695
|
+
# FOLLOW sets are precisely what could follow a rule reference.
|
696
|
+
# For input input "i=(3);", here is the derivation:
|
697
|
+
#
|
698
|
+
# stat => ID '=' expr ';'
|
699
|
+
# => ID '=' atom ('+' atom)* ';'
|
700
|
+
# => ID '=' '(' expr ')' ('+' atom)* ';'
|
701
|
+
# => ID '=' '(' atom ')' ('+' atom)* ';'
|
702
|
+
# => ID '=' '(' INT ')' ('+' atom)* ';'
|
703
|
+
# => ID '=' '(' INT ')' ';'
|
704
|
+
#
|
705
|
+
# At the "3" token, you'd have a call chain of
|
706
|
+
#
|
707
|
+
# stat -> expr -> atom -> expr -> atom
|
708
|
+
#
|
709
|
+
# What can follow that specific nested ref to atom? Exactly ')'
|
710
|
+
# as you can see by looking at the derivation of this specific
|
711
|
+
# input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
|
712
|
+
#
|
713
|
+
# You want the exact viable token set when recovering from a
|
714
|
+
# token mismatch. Upon token mismatch, if LA(1) is member of
|
715
|
+
# the viable next token set, then you know there is most likely
|
716
|
+
# a missing token in the input stream. "Insert" one by just not
|
717
|
+
# throwing an exception.
|
718
|
+
def compute_context_sensitive_rule_follow
|
719
|
+
combine_follows(true)
|
720
|
+
end
|
721
|
+
|
722
|
+
def combine_follows(exact)
|
723
|
+
follow_set = Set.new
|
724
|
+
@state.following.each_with_index.reverse_each do |local_follow_set, index|
|
725
|
+
follow_set |= local_follow_set
|
726
|
+
if exact
|
727
|
+
if local_follow_set.include?(EOR_TOKEN_TYPE)
|
728
|
+
follow_set.delete(EOR_TOKEN_TYPE) if index > 0
|
729
|
+
else
|
730
|
+
break
|
731
|
+
end
|
732
|
+
end
|
733
|
+
end
|
734
|
+
return follow_set
|
735
|
+
end
|
736
|
+
|
737
|
+
# Match needs to return the current input symbol, which gets put
|
738
|
+
# into the label for the associated token ref; e.g., x=ID. Token
|
739
|
+
# and tree parsers need to return different objects. Rather than test
|
740
|
+
# for input stream type or change the IntStream interface, I use
|
741
|
+
# a simple method to ask the recognizer to tell me what the current
|
742
|
+
# input symbol is.
|
743
|
+
#
|
744
|
+
# This is ignored for lexers.
|
745
|
+
|
746
|
+
def current_input_symbol
|
747
|
+
return nil
|
748
|
+
end
|
749
|
+
|
750
|
+
# Consume tokens until one matches the given token or token set
|
751
|
+
#
|
752
|
+
# tokenTypes can be a single token type or a set of token types
|
753
|
+
def consume_until(token_types)
|
754
|
+
token_types.is_a?(Set) or token_types = Set.new(token_types.to_a)
|
755
|
+
type = @input.peek
|
756
|
+
until type == EOF or token_types.include?(type)
|
757
|
+
@input.consume
|
758
|
+
type = @input.peek
|
759
|
+
end
|
760
|
+
return(type)
|
761
|
+
end
|
762
|
+
|
763
|
+
def backtracking_level
|
764
|
+
@state.backtracking
|
765
|
+
end
|
766
|
+
|
767
|
+
def backtracking_level=(n)
|
768
|
+
@state.backtracking = n
|
769
|
+
end
|
770
|
+
|
771
|
+
def backtrack
|
772
|
+
@state.backtracking += 1
|
773
|
+
start = @input.mark
|
774
|
+
success =
|
775
|
+
begin yield
|
776
|
+
rescue BacktrackingFailed then false
|
777
|
+
else true
|
778
|
+
end
|
779
|
+
return success
|
780
|
+
ensure
|
781
|
+
@input.rewind(start)
|
782
|
+
@state.backtracking -= 1
|
783
|
+
end
|
784
|
+
|
785
|
+
def syntactic_predicate?(name)
|
786
|
+
backtrack { send(name) }
|
787
|
+
end
|
788
|
+
|
789
|
+
alias backtracking backtracking_level
|
790
|
+
alias backtracking= backtracking_level=
|
791
|
+
|
792
|
+
def rule_memoization(rule, start_index)
|
793
|
+
@state.rule_memory[rule] ||= Hash.new(MEMO_RULE_UNKNOWN)
|
794
|
+
@state.rule_memory[rule][start_index]
|
795
|
+
end
|
796
|
+
|
797
|
+
def already_parsed_rule?(rule)
|
798
|
+
stop_index = rule_memoization(rule, @input.index)
|
799
|
+
case stop_index
|
800
|
+
when MEMO_RULE_UNKNOWN then return false
|
801
|
+
when MEMO_RULE_FAILED then return true
|
802
|
+
else
|
803
|
+
@input.seek(stop_index + 1)
|
804
|
+
end
|
805
|
+
return true
|
806
|
+
end
|
807
|
+
|
808
|
+
def memoize(rule, start_index, success)
|
809
|
+
stop_index = success ? (@input.index - 1) : MEMO_RULE_FAILED
|
810
|
+
memo = @state.rule_memory[rule] and memo[start_index] = stop_index
|
811
|
+
end
|
812
|
+
|
813
|
+
def trace_in(rule_name, rule_index, input_symbol)
|
814
|
+
@error_output.printf("--> enter %s on %s", rule_name, input_symbol)
|
815
|
+
@state.backtracking > 0 and @error_output.printf(
|
816
|
+
" (in backtracking mode: depth = %s)", @state.backtracking
|
817
|
+
)
|
818
|
+
@error_output.print("\n")
|
819
|
+
end
|
820
|
+
|
821
|
+
def trace_out(rule_name, rule_index, input_symbol)
|
822
|
+
@error_output.printf("<-- exit %s on %s", rule_name, input_symbol)
|
823
|
+
@state.backtracking > 0 and @error_output.printf(
|
824
|
+
" (in backtracking mode: depth = %s)", @state.backtracking
|
825
|
+
)
|
826
|
+
@error_output.print("\n")
|
827
|
+
end
|
828
|
+
|
829
|
+
private
|
830
|
+
|
831
|
+
def initialize_dfas
|
832
|
+
# do nothing
|
833
|
+
end
|
834
|
+
end
|
835
|
+
|
836
|
+
=begin rdoc ANTLR3::Lexer
|
837
|
+
|
838
|
+
= Lexer
|
839
|
+
|
840
|
+
Lexer is the default superclass of all lexers generated by ANTLR. The class
|
841
|
+
tailors the core functionality provided by BaseRecognizer to the task of
|
842
|
+
matching patterns in the text input and breaking the input into tokens.
|
843
|
+
|
844
|
+
== About Lexers
|
845
|
+
|
846
|
+
A lexer's job is to take input text and break it up into _tokens_ -- objects
|
847
|
+
that encapsulate a piece of text, a type label (such as ID or INTEGER), and the
|
848
|
+
position of the text with respect to the input. Thus, a lexer is essentially a
|
849
|
+
complicated iterator that steps through an input stream and produces a sequence
|
850
|
+
of tokens. Sometimes lexers are enough to carry out a goal on their own, such as
|
851
|
+
tasks like source code highlighting and simple code analysis. Usually, however,
|
852
|
+
the lexer converts text into tokens for use by a parser, which recognizes larger
|
853
|
+
structures within the text.
|
854
|
+
|
855
|
+
ANTLR parsers have a variety of entry points specified by parser rules, each of
|
856
|
+
which defines the structure of a specific type of sentence in a grammar. Lexers,
|
857
|
+
however, are primarily intended to have a single entry point. It looks at the
|
858
|
+
characters starting at the current input position, decides if the chunk of text
|
859
|
+
matches one of a number of possible token type definitions, wraps the chunk into
|
860
|
+
a token with information on its type and location, and advances the input stream
|
861
|
+
to the next place.
|
862
|
+
|
863
|
+
== ANTLR Lexers and the Lexer API
|
864
|
+
|
865
|
+
ANTLR-generated lexers will subclass this class, unless specified otherwise
|
866
|
+
within a grammar file. The generated class will provide an implementation of
|
867
|
+
each lexer rule as a method of the same name. The subclass will also provide an
|
868
|
+
implementation for the abstract method #m_tokens, the purpose of which is to
|
869
|
+
multiplex the token type definitions and predict what rule definition to execute
|
870
|
+
to fetch a token. The primary method in the lexer API, #next_token, uses
|
871
|
+
#m_tokens to fetch the next token and drive the iteration.
|
872
|
+
|
873
|
+
If the lexer is preparing tokens for use by an ANTLR generated parser, the lexer
|
874
|
+
will generally be used to build a TokenStream object. The following code example
|
875
|
+
demonstrates the typical setup for using ANTLR parsers and lexers in Ruby.
|
876
|
+
|
877
|
+
# in HypotheticalLexer.rb
|
878
|
+
module Hypothetical
|
879
|
+
class Lexer < ANTLR3::Lexer
|
880
|
+
# ...
|
881
|
+
# ANTLR generated code
|
882
|
+
# ...
|
883
|
+
end
|
884
|
+
end
|
885
|
+
|
886
|
+
# in HypotheticalParser.rb
|
887
|
+
module Hypothetical
|
888
|
+
class Parser < ANTLR3::Parser
|
889
|
+
# ...
|
890
|
+
# more ANTLR generated code
|
891
|
+
# ...
|
892
|
+
end
|
893
|
+
end
|
894
|
+
|
895
|
+
# to take hypothetical source code and prepare it for parsing,
|
896
|
+
# there is generally a four-step construction process
|
897
|
+
|
898
|
+
source = "some hypothetical source code"
|
899
|
+
input = ANTLR3::StringStream.new(source, :file => 'blah-de-blah.hyp')
|
900
|
+
lexer = Hypothetical::Lexer.new(input)
|
901
|
+
tokens = ANTLR3::CommonTokenStream.new(lexer)
|
902
|
+
parser = Hypothetical::Parser.new(tokens)
|
903
|
+
|
904
|
+
# if you're using the standard streams, ANTLR3::StringStream and
|
905
|
+
# ANTLR3::CommonTokenStream, you can write the same process
|
906
|
+
# shown above more succinctly:
|
907
|
+
|
908
|
+
lexer = Hypothetical::Lexer.new("some hypothetical source code", :file => 'blah-de-blah.hyp')
|
909
|
+
parser = Hypothetical::Parser.new(lexer)
|
910
|
+
|
911
|
+
=end
|
912
|
+
class Lexer < BaseRecognizer
|
913
|
+
include TokenSource
|
914
|
+
@token_class = CommonToken
|
915
|
+
|
916
|
+
def self.default_rule
|
917
|
+
@default_rule ||= :token!
|
918
|
+
end
|
919
|
+
|
920
|
+
def self.main(argv = ARGV, options = {})
|
921
|
+
if argv.is_a?(::Hash) then argv, options = ARGV, argv end
|
922
|
+
main = ANTLR3::Main::LexerMain.new(self, options)
|
923
|
+
block_given? ? yield(main) : main.execute(argv)
|
924
|
+
end
|
925
|
+
|
926
|
+
def self.associated_parser
|
927
|
+
@grammar_home and @grammar_home::Parser
|
928
|
+
rescue NameError
|
929
|
+
grammar_name = @grammar_home.name.split("::").last
|
930
|
+
begin
|
931
|
+
require "#{grammar_name}Parser"
|
932
|
+
rescue LoadError => e
|
933
|
+
return nil
|
934
|
+
end
|
935
|
+
return @grammar_home::Parser rescue nil
|
936
|
+
end
|
937
|
+
|
938
|
+
def self.associated_parser
|
939
|
+
@associated_parser ||= begin
|
940
|
+
@grammar_home and @grammar_home::Parser
|
941
|
+
rescue NameError
|
942
|
+
grammar_name = @grammar_home.name.split("::").last
|
943
|
+
begin
|
944
|
+
require "#{grammar_name}Parser"
|
945
|
+
@grammar_home::Parser
|
946
|
+
rescue LoadError, NameError
|
947
|
+
end
|
948
|
+
end
|
949
|
+
end
|
950
|
+
|
951
|
+
def initialize(input, options = {})
|
952
|
+
super(options)
|
953
|
+
@input =
|
954
|
+
case input
|
955
|
+
when ::String then StringStream.new(input, options)
|
956
|
+
when ::IO then FileStream.new(input, options)
|
957
|
+
else input
|
958
|
+
end
|
959
|
+
end
|
960
|
+
|
961
|
+
def next_token
|
962
|
+
loop do
|
963
|
+
@state.token = nil
|
964
|
+
@state.channel = DEFAULT_CHANNEL
|
965
|
+
@state.token_start_position = @input.index
|
966
|
+
@state.token_start_column = @input.column
|
967
|
+
@state.token_start_line = @input.line
|
968
|
+
@state.text = nil
|
969
|
+
@input.peek == EOF and return EOF_TOKEN
|
970
|
+
begin
|
971
|
+
token!
|
972
|
+
|
973
|
+
case token = @state.token
|
974
|
+
when nil then return(emit())
|
975
|
+
when SKIP_TOKEN then next
|
976
|
+
else
|
977
|
+
return token
|
978
|
+
end
|
979
|
+
rescue NoViableAlternative => re
|
980
|
+
report_error(re)
|
981
|
+
recover(re)
|
982
|
+
rescue Error::RecognitionError => re
|
983
|
+
report_error(re)
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|
987
|
+
|
988
|
+
def skip
|
989
|
+
@state.token = SKIP_TOKEN
|
990
|
+
end
|
991
|
+
|
992
|
+
abstract :token!
|
993
|
+
|
994
|
+
def exhaust
|
995
|
+
self.to_a
|
996
|
+
end
|
997
|
+
|
998
|
+
def char_stream=(input)
|
999
|
+
@input = nil
|
1000
|
+
reset()
|
1001
|
+
@input = input
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
def source_name
|
1005
|
+
@input.source_name
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
def emit(token = nil)
|
1009
|
+
token ||= create_token
|
1010
|
+
@state.token = token
|
1011
|
+
return token
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
def match(expected)
|
1015
|
+
case expected
|
1016
|
+
when String
|
1017
|
+
expected.each_byte do |char|
|
1018
|
+
unless @input.peek == char
|
1019
|
+
@state.backtracking > 0 and raise BacktrackingFailed
|
1020
|
+
error = MismatchedToken(char)
|
1021
|
+
recover(error)
|
1022
|
+
raise error
|
1023
|
+
end
|
1024
|
+
@input.consume()
|
1025
|
+
end
|
1026
|
+
else # single integer character
|
1027
|
+
unless @input.peek == expected
|
1028
|
+
@state.backtracking > 0 and raise BacktrackingFailed
|
1029
|
+
error = MismatchedToken(expected)
|
1030
|
+
recover(error)
|
1031
|
+
raise error
|
1032
|
+
end
|
1033
|
+
@input.consume
|
1034
|
+
end
|
1035
|
+
return true
|
1036
|
+
end
|
1037
|
+
|
1038
|
+
def match_any
|
1039
|
+
@input.consume
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
def match_range(min, max)
|
1043
|
+
char = @input.peek
|
1044
|
+
if char.between?(min, max) then @input.consume
|
1045
|
+
else
|
1046
|
+
@state.backtracking > 0 and raise BacktrackingFailed
|
1047
|
+
error = MismatchedRange(min.chr, max.chr)
|
1048
|
+
recover(error)
|
1049
|
+
raise(error)
|
1050
|
+
end
|
1051
|
+
return true
|
1052
|
+
end
|
1053
|
+
|
1054
|
+
def line
|
1055
|
+
@input.line
|
1056
|
+
end
|
1057
|
+
|
1058
|
+
def column
|
1059
|
+
@input.column
|
1060
|
+
end
|
1061
|
+
|
1062
|
+
def character_index
|
1063
|
+
@input.index
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
def text
|
1067
|
+
@state.text and return @state.text
|
1068
|
+
@input.substring(@state.token_start_position, character_index - 1)
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
def text=(text)
|
1072
|
+
@state.text = text
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
def report_error(e)
|
1076
|
+
display_recognition_error(e)
|
1077
|
+
end
|
1078
|
+
|
1079
|
+
def error_message(e)
|
1080
|
+
char = character_error_display(e.symbol) rescue nil
|
1081
|
+
case e
|
1082
|
+
when Error::MismatchedToken
|
1083
|
+
expecting = character_error_display(e.expecting)
|
1084
|
+
"mismatched character #{char}; expecting #{expecting}"
|
1085
|
+
when Error::NoViableAlternative
|
1086
|
+
"no viable alternative at character #{char}"
|
1087
|
+
when Error::EarlyExit
|
1088
|
+
"required (...)+ loop did not match anything at character #{char}"
|
1089
|
+
when Error::MismatchedNotSet
|
1090
|
+
"mismatched character %s; expecting set %p" % [char, e.expecting]
|
1091
|
+
when Error::MismatchedSet
|
1092
|
+
"mismatched character %s; expecting set %p" % [char, e.expecting]
|
1093
|
+
when Error::MismatchedRange
|
1094
|
+
a = character_error_display(e.min)
|
1095
|
+
b = character_error_display(e.max)
|
1096
|
+
"mismatched character %s; expecting set %s..%s" % [char, a, b]
|
1097
|
+
else super
|
1098
|
+
end
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
def character_error_display(char)
|
1102
|
+
case char
|
1103
|
+
when EOF then '<EOF>'
|
1104
|
+
when Integer then char.chr.inspect
|
1105
|
+
else char.inspect
|
1106
|
+
end
|
1107
|
+
end
|
1108
|
+
|
1109
|
+
def recover(re)
|
1110
|
+
@input.consume
|
1111
|
+
end
|
1112
|
+
|
1113
|
+
private
|
1114
|
+
|
1115
|
+
def trace_in(rule_name, rule_index)
|
1116
|
+
if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
|
1117
|
+
else symbol = '<EOF>' end
|
1118
|
+
input_symbol = "#{symbol} @ line #{line} / col #{column}"
|
1119
|
+
super(rule_name, rule_index, input_symbol)
|
1120
|
+
end
|
1121
|
+
|
1122
|
+
def trace_out(rule_name, rule_index)
|
1123
|
+
if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
|
1124
|
+
else symbol = '<EOF>' end
|
1125
|
+
input_symbol = "#{symbol} @ line #{line} / col #{column}"
|
1126
|
+
super(rule_name, rule_index, input_symbol)
|
1127
|
+
end
|
1128
|
+
|
1129
|
+
def create_token(&b)
|
1130
|
+
if block_given? then super(&b)
|
1131
|
+
else
|
1132
|
+
super do |t|
|
1133
|
+
t.input = @input
|
1134
|
+
t.type = @state.type
|
1135
|
+
t.channel = @state.channel
|
1136
|
+
t.start = @state.token_start_position
|
1137
|
+
t.stop = @input.index - 1
|
1138
|
+
t.line = @state.token_start_line
|
1139
|
+
t.text = self.text
|
1140
|
+
t.column = @state.token_start_column
|
1141
|
+
end
|
1142
|
+
end
|
1143
|
+
end
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
|
1147
|
+
=begin rdoc ANTLR3::Parser
|
1148
|
+
|
1149
|
+
= Parser
|
1150
|
+
|
1151
|
+
Parser is the default base class of ANTLR-generated parser classes. The class
|
1152
|
+
tailors the functionality provided by BaseRecognizer to the task of parsing.
|
1153
|
+
|
1154
|
+
== About Parsing
|
1155
|
+
|
1156
|
+
This is just a lose overview of parsing. For considerably more in-depth coverage
|
1157
|
+
of the topic, read the ANTLR documentation or check out the ANTLR website
|
1158
|
+
(http://www.antlr.org).
|
1159
|
+
|
1160
|
+
A grammar defines the vocabulary and the sentence structure of a language. While
|
1161
|
+
a lexer concerns the basic vocabulary symbols of the language, a parser's
|
1162
|
+
primary task is to implement the sentence structure.
|
1163
|
+
|
1164
|
+
Parsers are set up by providing a stream of tokens, which is usually created by
|
1165
|
+
a corresponding lexer. Then, the user requests a specific sentence-structure
|
1166
|
+
within the grammar, such as "class_definition" or "xml_node", from the parser.
|
1167
|
+
It iterates through the tokens, verifying the syntax of the sentence and
|
1168
|
+
performing actions specified by the grammar. It stops when it encounters an
|
1169
|
+
error or when it has matched the full sentence according to its defined
|
1170
|
+
structure.
|
1171
|
+
|
1172
|
+
== ANTLR Parsers and the Parser API
|
1173
|
+
|
1174
|
+
Plain ANTLR-generated parsers directly subclass this class, unless specified
|
1175
|
+
otherwise within the grammar options. The generated code will provide a method
|
1176
|
+
for each parser rule defined in the ANTLR grammar, as well as any other
|
1177
|
+
customized member attributes and methods specified in the source grammar.
|
1178
|
+
|
1179
|
+
This class does not override much of the functionality in BaseRecognizer, and
|
1180
|
+
thus the API closely mirrors BaseRecognizer.
|
1181
|
+
|
1182
|
+
=end
|
1183
|
+
class Parser < BaseRecognizer
|
1184
|
+
def self.main(argv = ARGV, options = {})
|
1185
|
+
if argv.is_a?(::Hash) then argv, options = ARGV, argv end
|
1186
|
+
main = ANTLR3::Main::ParserMain.new(self, options)
|
1187
|
+
block_given? ? yield(main) : main.execute(argv)
|
1188
|
+
end
|
1189
|
+
|
1190
|
+
def self.associated_lexer
|
1191
|
+
@associated_lexer ||= begin
|
1192
|
+
@grammar_home and @grammar_home::Lexer
|
1193
|
+
rescue NameError
|
1194
|
+
grammar_name = @grammar_home.name.split("::").last
|
1195
|
+
begin
|
1196
|
+
require "#{grammar_name}Lexer"
|
1197
|
+
@grammar_home::Lexer
|
1198
|
+
rescue LoadError, NameError
|
1199
|
+
end
|
1200
|
+
end
|
1201
|
+
end
|
1202
|
+
|
1203
|
+
def initialize(input, options = {})
|
1204
|
+
super(options)
|
1205
|
+
@input = nil
|
1206
|
+
reset
|
1207
|
+
input = cast_input( input, options ) unless TokenStream === input
|
1208
|
+
@input = input
|
1209
|
+
end
|
1210
|
+
|
1211
|
+
def current_input_symbol
|
1212
|
+
@input.look
|
1213
|
+
end
|
1214
|
+
|
1215
|
+
def missing_symbol(error, expected_type, follow)
|
1216
|
+
current = @input.look
|
1217
|
+
current = @input.look(-1) if current == ANTLR3::EOF_TOKEN
|
1218
|
+
t =
|
1219
|
+
case
|
1220
|
+
when current && current != ANTLR3::EOF_TOKEN then current.clone
|
1221
|
+
when @input.token_class then @input.token_class.new
|
1222
|
+
else (create_token rescue CommonToken.new)
|
1223
|
+
end
|
1224
|
+
|
1225
|
+
t.type = expected_type
|
1226
|
+
name = t.name.gsub(/(^<)|(>$)/,'')
|
1227
|
+
t.text = "<missing #{name}>"
|
1228
|
+
t.channel = DEFAULT_CHANNEL
|
1229
|
+
return(t)
|
1230
|
+
end
|
1231
|
+
|
1232
|
+
def token_stream=(input)
|
1233
|
+
@input = nil
|
1234
|
+
reset
|
1235
|
+
@input = input
|
1236
|
+
end
|
1237
|
+
alias token_stream input
|
1238
|
+
|
1239
|
+
def source_name
|
1240
|
+
@input.source_name
|
1241
|
+
end
|
1242
|
+
|
1243
|
+
private
|
1244
|
+
|
1245
|
+
def trace_in(rule_name, rule_index)
|
1246
|
+
super(rule_name, rule_index, @input.look.inspect)
|
1247
|
+
end
|
1248
|
+
|
1249
|
+
def trace_out(rule_name, rule_index)
|
1250
|
+
super(rule_name, rule_index, @input.look.inspect)
|
1251
|
+
end
|
1252
|
+
|
1253
|
+
def cast_input( input, options )
|
1254
|
+
case input
|
1255
|
+
when TokenSource then CommonTokenStream.new( input, options )
|
1256
|
+
when IO, String
|
1257
|
+
if lexer_class = self.class.associated_lexer
|
1258
|
+
CommonTokenStream.new( lexer_class.new( input, options ), options )
|
1259
|
+
else
|
1260
|
+
raise ArgumentError, Util.tidy( <<-END, true )
|
1261
|
+
| unable to automatically convert input #{ input.inspect }
|
1262
|
+
| to a ANTLR3::TokenStream object as #{ self.class }
|
1263
|
+
| does not appear to have an associated lexer class
|
1264
|
+
END
|
1265
|
+
end
|
1266
|
+
else
|
1267
|
+
# assume it's a stream if it at least implements peek and consume
|
1268
|
+
unless input.respond_to?( :peek ) and input.respond_to?( :consume )
|
1269
|
+
raise ArgumentError, Util.tidy(<<-END, true)
|
1270
|
+
| #{ self.class } requires a token stream as input, but
|
1271
|
+
| #{ input.inspect } was provided
|
1272
|
+
END
|
1273
|
+
end
|
1274
|
+
input
|
1275
|
+
end
|
1276
|
+
end
|
1277
|
+
|
1278
|
+
end
|
1279
|
+
|
1280
|
+
end
|