antlr4 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,136 @@
|
|
1
|
+
#
|
2
|
+
# [The "BSD license"]
|
3
|
+
# Copyright (c) 2012 Terence Parr
|
4
|
+
# Copyright (c) 2012 Sam Harwell
|
5
|
+
# Copyright (c) 2014 Eric Vergnaud
|
6
|
+
# All rights reserved.
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# 1. Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
# 3. The name of the author may not be used to endorse or promote products
|
18
|
+
# derived from this software without specific prior written permission.
|
19
|
+
#
|
20
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
21
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
22
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
23
|
+
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
24
|
+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
25
|
+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
29
|
+
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
#
|
31
|
+
|
32
|
+
|
33
|
+
#
|
34
|
+
# This implementation of {@link ANTLRErrorListener} can be used to identify
|
35
|
+
# certain potential correctness and performance problems in grammars. "Reports"
|
36
|
+
# are made by calling {@link Parser#notifyErrorListeners} with the appropriate
|
37
|
+
# message.
|
38
|
+
#
|
39
|
+
# <ul>
|
40
|
+
# <li><b>Ambiguities</b>: These are cases where more than one path through the
|
41
|
+
# grammar can match the input.</li>
|
42
|
+
# <li><b>Weak context sensitivity</b>: These are cases where full-context
|
43
|
+
# prediction resolved an SLL conflict to a unique alternative which equaled the
|
44
|
+
# minimum alternative of the SLL conflict.</li>
|
45
|
+
# <li><b>Strong (forced) context sensitivity</b>: These are cases where the
|
46
|
+
# full-context prediction resolved an SLL conflict to a unique alternative,
|
47
|
+
# <em>and</em> the minimum alternative of the SLL conflict was found to not be
|
48
|
+
# a truly viable alternative. Two-stage parsing cannot be used for inputs where
|
49
|
+
# this situation occurs.</li>
|
50
|
+
# </ul>
|
51
|
+
|
52
|
+
class DiagnosticErrorListener < ErrorListener
|
53
|
+
|
54
|
+
attr_accessor :exactOnly
|
55
|
+
def initialize(exactOnly=true)
|
56
|
+
# whether all ambiguities or only exact ambiguities are reported.
|
57
|
+
@exactOnly = exactOnly
|
58
|
+
end
|
59
|
+
|
60
|
+
def reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
61
|
+
return if self.exactOnly and not exact
|
62
|
+
|
63
|
+
StringIO.open { |buf|
|
64
|
+
buf.write("reportAmbiguity d=")
|
65
|
+
buf.write(self.getDecisionDescription(recognizer, dfa))
|
66
|
+
buf.write(": ambigAlts=")
|
67
|
+
buf.write(self.getConflictingAlts(ambigAlts, configs).to_s)
|
68
|
+
buf.write(", input='")
|
69
|
+
buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex)) # getText
|
70
|
+
buf.write("'")
|
71
|
+
recognizer.notifyErrorListeners(buf.string())
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
76
|
+
StringIO.open {|buf|
|
77
|
+
buf.write("reportAttemptingFullContext d=")
|
78
|
+
buf.write(self.getDecisionDescription(recognizer, dfa))
|
79
|
+
buf.write(", input='")
|
80
|
+
buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex))
|
81
|
+
buf.write("'")
|
82
|
+
recognizer.notifyErrorListeners(buf.string())
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
def reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
|
87
|
+
StringIO.open {|buf|
|
88
|
+
buf.write("reportContextSensitivity d=")
|
89
|
+
buf.write(self.getDecisionDescription(recognizer, dfa))
|
90
|
+
buf.write(", input='")
|
91
|
+
buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex))
|
92
|
+
buf.write("'")
|
93
|
+
recognizer.notifyErrorListeners(buf.string())
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
def getDecisionDescription(recognizer, dfa)
|
98
|
+
decision = dfa.decision
|
99
|
+
ruleIndex = dfa.atnStartState.ruleIndex
|
100
|
+
|
101
|
+
ruleNames = recognizer.ruleNames
|
102
|
+
if ruleIndex < 0 or ruleIndex >= ruleNames.length
|
103
|
+
return decision.to_s
|
104
|
+
end
|
105
|
+
|
106
|
+
ruleName = ruleNames[ruleIndex]
|
107
|
+
if ruleName.nil? or ruleName.zero? then
|
108
|
+
return decision.to_s
|
109
|
+
end
|
110
|
+
|
111
|
+
return "#{decision} (#{ruleName})"
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# Computes the set of conflicting or ambiguous alternatives from a
|
116
|
+
# configuration set, if that information was not already provided by the
|
117
|
+
# parser.
|
118
|
+
#
|
119
|
+
# @param reportedAlts The set of conflicting or ambiguous alternatives, as
|
120
|
+
# reported by the parser.
|
121
|
+
# @param configs The conflicting or ambiguous configuration set.
|
122
|
+
# @return Returns {@code reportedAlts} if it is not {@code null}, otherwise
|
123
|
+
# returns the set of alternatives represented in {@code configs}.
|
124
|
+
#
|
125
|
+
def getConflictingAlts(reportedAlts, configs)
|
126
|
+
if not reportedAlts.nil? then
|
127
|
+
return reportedAlts
|
128
|
+
end
|
129
|
+
|
130
|
+
result = Set.new
|
131
|
+
for config in configs
|
132
|
+
result.add(config.alt)
|
133
|
+
end
|
134
|
+
return result
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#
|
2
|
+
# [The "BSD license"]
|
3
|
+
# Copyright (c) 2012 Terence Parr
|
4
|
+
# Copyright (c) 2012 Sam Harwell
|
5
|
+
# Copyright (c) 2014 Eric Vergnaud
|
6
|
+
# Copyright (c) 2014 Chad Slaughter
|
7
|
+
# All rights reserved.
|
8
|
+
#
|
9
|
+
# Redistribution and use in source and binary forms, with or without
|
10
|
+
# modification, are permitted provided that the following conditions
|
11
|
+
# are met:
|
12
|
+
#
|
13
|
+
# 1. Redistributions of source code must retain the above copyright
|
14
|
+
# notice, this list of conditions and the following disclaimer.
|
15
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
16
|
+
# notice, this list of conditions and the following disclaimer in the
|
17
|
+
# documentation and/or other materials provided with the distribution.
|
18
|
+
# 3. The name of the author may not be used to endorse or promote products
|
19
|
+
# derived from this software without specific prior written permission.
|
20
|
+
#
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
22
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
23
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
24
|
+
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
25
|
+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
26
|
+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
27
|
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
28
|
+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
29
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
30
|
+
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
|
32
|
+
# Provides an empty default implementation of {@link ANTLRErrorListener}. The
|
33
|
+
# default implementation of each method does nothing, but can be overridden as
|
34
|
+
# necessary.
|
35
|
+
|
36
|
+
class ErrorListener
|
37
|
+
def syntaxError(recognizer, offendingSymbol, line, column, msg, e)
|
38
|
+
end
|
39
|
+
def reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
40
|
+
end
|
41
|
+
|
42
|
+
def reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
43
|
+
end
|
44
|
+
|
45
|
+
def reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class ConsoleErrorListener < ErrorListener
|
50
|
+
# Provides a default instance of {@link ConsoleErrorListener}.
|
51
|
+
@@instance = nil
|
52
|
+
def self.INSTANCE
|
53
|
+
if @@instance.nil? then
|
54
|
+
@@instance = self.new()
|
55
|
+
end
|
56
|
+
@@instance
|
57
|
+
end
|
58
|
+
|
59
|
+
# {@inheritDoc}
|
60
|
+
#
|
61
|
+
# <p>
|
62
|
+
# This implementation prints messages to {@link System#err} containing the
|
63
|
+
# values of {@code line}, {@code charPositionInLine}, and {@code msg} using
|
64
|
+
# the following format.</p>
|
65
|
+
#
|
66
|
+
# <pre>
|
67
|
+
# line <em>line</em>:<em>charPositionInLine</em> <em>msg</em>
|
68
|
+
# </pre>
|
69
|
+
#
|
70
|
+
def syntaxError(recognizer, offendingSymbol, line, column, msg, e)
|
71
|
+
STDERR.puts "line #{line}:#{column} #{msg}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
class ProxyErrorListener < ErrorListener
|
77
|
+
|
78
|
+
attr_accessor :delegates
|
79
|
+
def initialize(delegates)
|
80
|
+
super()
|
81
|
+
raise ReferenceError.new("delegates") if delegates.nil?
|
82
|
+
@delegates = delegates
|
83
|
+
end
|
84
|
+
|
85
|
+
def syntaxError(recognizer, offendingSymbol, line, column, msg, e)
|
86
|
+
for delegate in self.delegates
|
87
|
+
delegate.syntaxError(recognizer, offendingSymbol, line, column, msg, e)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
92
|
+
for delegate in self.delegates
|
93
|
+
delegate.reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
98
|
+
for delegate in self.delegates
|
99
|
+
delegate.reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
|
104
|
+
def reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
|
105
|
+
for delegate in self.delegates
|
106
|
+
delegate.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,742 @@
|
|
1
|
+
#
|
2
|
+
# [The "BSD license"]
|
3
|
+
# Copyright (c) 2012 Terence Parr
|
4
|
+
# Copyright (c) 2012 Sam Harwell
|
5
|
+
# Copyright (c) 2014 Eric Vergnaud
|
6
|
+
# All rights reserved.
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# 1. Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
# 3. The name of the author may not be used to endorse or promote products
|
18
|
+
# derived from this software without specific prior written permission.
|
19
|
+
#
|
20
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
21
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
22
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
23
|
+
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
24
|
+
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
25
|
+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
29
|
+
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
#
|
31
|
+
|
32
|
+
class ErrorStrategy
|
33
|
+
|
34
|
+
def reset(recognizer)
|
35
|
+
end
|
36
|
+
|
37
|
+
def recoverInline(recognizer)
|
38
|
+
end
|
39
|
+
|
40
|
+
def recover(recognizer, e)
|
41
|
+
end
|
42
|
+
|
43
|
+
def sync(recognizer)
|
44
|
+
end
|
45
|
+
|
46
|
+
def inErrorRecoveryMode(recognizer)
|
47
|
+
end
|
48
|
+
|
49
|
+
def reportError(recognizer, e)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# This is the default implementation of {@link ANTLRErrorStrategy} used for
|
54
|
+
# error reporting and recovery in ANTLR parsers.
|
55
|
+
#
|
56
|
+
class DefaultErrorStrategy < ErrorStrategy
|
57
|
+
|
58
|
+
attr_accessor :errorRecoveryMode, :lastErrorIndex , :lastErrorStates
|
59
|
+
def initialize()
|
60
|
+
super()
|
61
|
+
# Indicates whether the error strategy is currently "recovering from an
|
62
|
+
# error". This is used to suppress reporting multiple error messages while
|
63
|
+
# attempting to recover from a detected syntax error.
|
64
|
+
#
|
65
|
+
# @see #inErrorRecoveryMode
|
66
|
+
#
|
67
|
+
@errorRecoveryMode = false
|
68
|
+
|
69
|
+
# The index into the input stream where the last error occurred.
|
70
|
+
# This is used to prevent infinite loops where an error is found
|
71
|
+
# but no token is consumed during recovery...another error is found,
|
72
|
+
# ad nauseum. This is a failsafe mechanism to guarantee that at least
|
73
|
+
# one token/tree node is consumed for two errors.
|
74
|
+
#
|
75
|
+
@lastErrorIndex = -1
|
76
|
+
@lastErrorStates = nil
|
77
|
+
end
|
78
|
+
|
79
|
+
# <p>The default implementation simply calls {@link #endErrorCondition} to
|
80
|
+
# ensure that the handler is not in error recovery mode.</p>
|
81
|
+
def reset(recognizer)
|
82
|
+
self.endErrorCondition(recognizer)
|
83
|
+
end
|
84
|
+
|
85
|
+
#
|
86
|
+
# This method is called to enter error recovery mode when a recognition
|
87
|
+
# exception is reported.
|
88
|
+
#
|
89
|
+
# @param recognizer the parser instance
|
90
|
+
#
|
91
|
+
def beginErrorCondition(recognizer)
|
92
|
+
self.errorRecoveryMode = true
|
93
|
+
end
|
94
|
+
|
95
|
+
def inErrorRecoveryMode(recognizer)
|
96
|
+
return self.errorRecoveryMode
|
97
|
+
end
|
98
|
+
|
99
|
+
#
|
100
|
+
# This method is called to leave error recovery mode after recovering from
|
101
|
+
# a recognition exception.
|
102
|
+
#
|
103
|
+
# @param recognizer
|
104
|
+
#
|
105
|
+
def endErrorCondition(recognizer)
|
106
|
+
self.errorRecoveryMode = false
|
107
|
+
self.lastErrorStates = nil
|
108
|
+
self.lastErrorIndex = -1
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# {@inheritDoc}
|
113
|
+
#
|
114
|
+
# <p>The default implementation simply calls {@link #endErrorCondition}.</p>
|
115
|
+
#
|
116
|
+
def reportMatch(recognizer)
|
117
|
+
self.endErrorCondition(recognizer)
|
118
|
+
end
|
119
|
+
|
120
|
+
#
|
121
|
+
# {@inheritDoc}
|
122
|
+
#
|
123
|
+
# <p>The default implementation returns immediately if the handler is already
|
124
|
+
# in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
|
125
|
+
# and dispatches the reporting task based on the runtime type of {@code e}
|
126
|
+
# according to the following table.</p>
|
127
|
+
#
|
128
|
+
# <ul>
|
129
|
+
# <li>{@link NoViableAltException}: Dispatches the call to
|
130
|
+
# {@link #reportNoViableAlternative}</li>
|
131
|
+
# <li>{@link InputMismatchException}: Dispatches the call to
|
132
|
+
# {@link #reportInputMismatch}</li>
|
133
|
+
# <li>{@link FailedPredicateException}: Dispatches the call to
|
134
|
+
# {@link #reportFailedPredicate}</li>
|
135
|
+
# <li>All other types: calls {@link Parser#notifyErrorListeners} to report
|
136
|
+
# the exception</li>
|
137
|
+
# </ul>
|
138
|
+
#
|
139
|
+
def reportError(recognizer, e)
|
140
|
+
# if we've already reported an error and have not matched a token
|
141
|
+
# yet successfully, don't report any errors.
|
142
|
+
return if self.inErrorRecoveryMode(recognizer) # don't report spurious errors
|
143
|
+
|
144
|
+
self.beginErrorCondition(recognizer)
|
145
|
+
case e
|
146
|
+
when NoViableAltException then
|
147
|
+
self.reportNoViableAlternative(recognizer, e)
|
148
|
+
when InputMismatchException then
|
149
|
+
self.reportInputMismatch(recognizer, e)
|
150
|
+
when FailedPredicateException then
|
151
|
+
self.reportFailedPredicate(recognizer, e)
|
152
|
+
else
|
153
|
+
puts "unknown recognition error type: #{e.class}"
|
154
|
+
recognizer.notifyErrorListeners(e.getOffendingToken(), e.getMessage(), e)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
#
|
158
|
+
# {@inheritDoc}
|
159
|
+
#
|
160
|
+
# <p>The default implementation resynchronizes the parser by consuming tokens
|
161
|
+
# until we find one in the resynchronization set--loosely the set of tokens
|
162
|
+
# that can follow the current rule.</p>
|
163
|
+
#
|
164
|
+
def recover(recognizer, e)
|
165
|
+
if self.lastErrorIndex==recognizer.getInputStream().index \
|
166
|
+
and not self.lastErrorStates.nil? \
|
167
|
+
and self.lastErrorStates.member? recognizer.state then
|
168
|
+
# uh oh, another error at same token index and previously-visited
|
169
|
+
# state in ATN; must be a case where LT(1) is in the recovery
|
170
|
+
# token set so nothing got consumed. Consume a single token
|
171
|
+
# at least to prevent an infinite loop; this is a failsafe.
|
172
|
+
recognizer.consume()
|
173
|
+
end
|
174
|
+
|
175
|
+
self.lastErrorIndex = recognizer.input.index
|
176
|
+
if self.lastErrorStates.nil? then
|
177
|
+
self.lastErrorStates = Array.new(32)
|
178
|
+
end
|
179
|
+
self.lastErrorStates.push(recognizer.state)
|
180
|
+
followSet = self.getErrorRecoverySet(recognizer)
|
181
|
+
self.consumeUntil(recognizer, followSet)
|
182
|
+
end
|
183
|
+
|
184
|
+
# The default implementation of {@link ANTLRErrorStrategy#sync} makes sure
|
185
|
+
# that the current lookahead symbol is consistent with what were expecting
|
186
|
+
# at this point in the ATN. You can call this anytime but ANTLR only
|
187
|
+
# generates code to check before subrules/loops and each iteration.
|
188
|
+
#
|
189
|
+
# <p>Implements Jim Idle's magic sync mechanism in closures and optional
|
190
|
+
# subrules. E.g.,</p>
|
191
|
+
#
|
192
|
+
# <pre>
|
193
|
+
# a : sync ( stuff sync )* ;
|
194
|
+
# sync : {consume to what can follow sync} ;
|
195
|
+
# </pre>
|
196
|
+
#
|
197
|
+
# At the start of a sub rule upon error, {@link #sync} performs single
|
198
|
+
# token deletion, if possible. If it can't do that, it bails on the current
|
199
|
+
# rule and uses the default error recovery, which consumes until the
|
200
|
+
# resynchronization set of the current rule.
|
201
|
+
#
|
202
|
+
# <p>If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
|
203
|
+
# with an empty alternative), then the expected set includes what follows
|
204
|
+
# the subrule.</p>
|
205
|
+
#
|
206
|
+
# <p>During loop iteration, it consumes until it sees a token that can start a
|
207
|
+
# sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
|
208
|
+
# stay in the loop as long as possible.</p>
|
209
|
+
#
|
210
|
+
# <p><strong>ORIGINS</strong></p>
|
211
|
+
#
|
212
|
+
# <p>Previous versions of ANTLR did a poor job of their recovery within loops.
|
213
|
+
# A single mismatch token or missing token would force the parser to bail
|
214
|
+
# out of the entire rules surrounding the loop. So, for rule</p>
|
215
|
+
#
|
216
|
+
# <pre>
|
217
|
+
# classDef : 'class' ID '{' member* '}'
|
218
|
+
# </pre>
|
219
|
+
#
|
220
|
+
# input with an extra token between members would force the parser to
|
221
|
+
# consume until it found the next class definition rather than the next
|
222
|
+
# member definition of the current class.
|
223
|
+
#
|
224
|
+
# <p>This functionality cost a little bit of effort because the parser has to
|
225
|
+
# compare token set at the start of the loop and at each iteration. If for
|
226
|
+
# some reason speed is suffering for you, you can turn off this
|
227
|
+
# functionality by simply overriding this method as a blank { }.</p>
|
228
|
+
#
|
229
|
+
def sync(recognizer)
|
230
|
+
# If already recovering, don't try to sync
|
231
|
+
return if self.inErrorRecoveryMode(recognizer)
|
232
|
+
s = recognizer.interp.atn.states[recognizer.state]
|
233
|
+
la = recognizer.getTokenStream().LA(1)
|
234
|
+
# try cheaper subset first; might get lucky. seems to shave a wee bit off
|
235
|
+
if la==Token::EOF or recognizer.atn.nextTokens(s).member? la
|
236
|
+
return
|
237
|
+
end
|
238
|
+
|
239
|
+
# Return but don't end recovery. only do that upon valid token match
|
240
|
+
if recognizer.isExpectedToken(la) then
|
241
|
+
return
|
242
|
+
end
|
243
|
+
|
244
|
+
possibleStates = [ATNState::BLOCK_START, ATNState::STAR_BLOCK_START, ATNState::PLUS_BLOCK_START, ATNState::STAR_LOOP_ENTRY]
|
245
|
+
if possibleStates.member? s.stateType then
|
246
|
+
# report error and recover if possible
|
247
|
+
if self.singleTokenDeletion(recognizer).nil?
|
248
|
+
raise InputMismatchException.new(recognizer)
|
249
|
+
else
|
250
|
+
return
|
251
|
+
end
|
252
|
+
elsif [ATNState::PLUS_LOOP_BACK, ATNState::STAR_LOOP_BACK].member? s.stateType then
|
253
|
+
self.reportUnwantedToken(recognizer)
|
254
|
+
expecting = recognizer.getExpectedTokens()
|
255
|
+
whatFollowsLoopIterationOrRule = expecting.addSet(self.getErrorRecoverySet(recognizer))
|
256
|
+
self.consumeUntil(recognizer, whatFollowsLoopIterationOrRule)
|
257
|
+
# else
|
258
|
+
# # do nothing if we can't identify the exact kind of ATN state
|
259
|
+
end
|
260
|
+
end
|
261
|
+
# This is called by {@link #reportError} when the exception is a
|
262
|
+
# {@link NoViableAltException}.
|
263
|
+
#
|
264
|
+
# @see #reportError
|
265
|
+
#
|
266
|
+
# @param recognizer the parser instance
|
267
|
+
# @param e the recognition exception
|
268
|
+
#
|
269
|
+
def reportNoViableAlternative(recognizer, e)
|
270
|
+
tokens = recognizer.getTokenStream()
|
271
|
+
if not tokens.nil? then
|
272
|
+
if e.startToken.type==Token::EOF then
|
273
|
+
input = "<EOF>"
|
274
|
+
else
|
275
|
+
input = tokens.getText([e.startToken, e.offendingToken])
|
276
|
+
end
|
277
|
+
else
|
278
|
+
input = "<unknown input>"
|
279
|
+
end
|
280
|
+
msg = "no viable alternative at input " + self.escapeWSAndQuote(input)
|
281
|
+
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
|
282
|
+
end
|
283
|
+
#
|
284
|
+
# This is called by {@link #reportError} when the exception is an
|
285
|
+
# {@link InputMismatchException}.
|
286
|
+
#
|
287
|
+
# @see #reportError
|
288
|
+
#
|
289
|
+
# @param recognizer the parser instance
|
290
|
+
# @param e the recognition exception
|
291
|
+
#
|
292
|
+
def reportInputMismatch(recognizer, e)
|
293
|
+
if e.recognizer.nil? then
|
294
|
+
e.recognizer = recognizer
|
295
|
+
end
|
296
|
+
t = self.getTokenErrorDisplay(e.offendingToken)
|
297
|
+
expecting = e.getExpectedTokens().toString(recognizer.tokenNames)
|
298
|
+
msg = "mismatched input #{t} expecting #{ escapeWSAndQuote(expecting) }"
|
299
|
+
recognizer.notifyErrorListeners(msg, e.offendingToken, e)
|
300
|
+
end
|
301
|
+
|
302
|
+
#
|
303
|
+
# This is called by {@link #reportError} when the exception is a
|
304
|
+
# {@link FailedPredicateException}.
|
305
|
+
#
|
306
|
+
# @see #reportError
|
307
|
+
#
|
308
|
+
# @param recognizer the parser instance
|
309
|
+
# @param e the recognition exception
|
310
|
+
#
|
311
|
+
def reportFailedPredicate(recognizer, e)
|
312
|
+
ruleName = recognizer.ruleNames[recognizer.ctx.ruleIndex]
|
313
|
+
msg = "rule #{ruleName} #{e.message}"
|
314
|
+
recognizer.notifyErrorListeners(e.offendingToken, msg, e)
|
315
|
+
end
|
316
|
+
|
317
|
+
# This method is called to report a syntax error which requires the removal
|
318
|
+
# of a token from the input stream. At the time this method is called, the
|
319
|
+
# erroneous symbol is current {@code LT(1)} symbol and has not yet been
|
320
|
+
# removed from the input stream. When this method returns,
|
321
|
+
# {@code recognizer} is in error recovery mode.
|
322
|
+
#
|
323
|
+
# <p>This method is called when {@link #singleTokenDeletion} identifies
|
324
|
+
# single-token deletion as a viable recovery strategy for a mismatched
|
325
|
+
# input error.</p>
|
326
|
+
#
|
327
|
+
# <p>The default implementation simply returns if the handler is already in
|
328
|
+
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
|
329
|
+
# enter error recovery mode, followed by calling
|
330
|
+
# {@link Parser#notifyErrorListeners}.</p>
|
331
|
+
#
|
332
|
+
# @param recognizer the parser instance
|
333
|
+
#
|
334
|
+
def reportUnwantedToken(recognizer)
|
335
|
+
return if self.inErrorRecoveryMode(recognizer)
|
336
|
+
|
337
|
+
self.beginErrorCondition(recognizer)
|
338
|
+
t = recognizer.getCurrentToken()
|
339
|
+
tokenName = self.getTokenErrorDisplay(t)
|
340
|
+
expecting = self.getExpectedTokens(recognizer)
|
341
|
+
msg = "extraneous input #{tokenName} expecting #{expecting.toString(recognizer.tokenNames)}"
|
342
|
+
recognizer.notifyErrorListeners(msg, t, nil)
|
343
|
+
end
|
344
|
+
# This method is called to report a syntax error which requires the
|
345
|
+
# insertion of a missing token into the input stream. At the time this
|
346
|
+
# method is called, the missing token has not yet been inserted. When this
|
347
|
+
# method returns, {@code recognizer} is in error recovery mode.
|
348
|
+
#
|
349
|
+
# <p>This method is called when {@link #singleTokenInsertion} identifies
|
350
|
+
# single-token insertion as a viable recovery strategy for a mismatched
|
351
|
+
# input error.</p>
|
352
|
+
#
|
353
|
+
# <p>The default implementation simply returns if the handler is already in
|
354
|
+
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
|
355
|
+
# enter error recovery mode, followed by calling
|
356
|
+
# {@link Parser#notifyErrorListeners}.</p>
|
357
|
+
#
|
358
|
+
# @param recognizer the parser instance
|
359
|
+
#
|
360
|
+
def reportMissingToken(recognizer)
|
361
|
+
return if self.inErrorRecoveryMode(recognizer)
|
362
|
+
|
363
|
+
self.beginErrorCondition(recognizer)
|
364
|
+
t = recognizer.getCurrentToken()
|
365
|
+
expecting = self.getExpectedTokens(recognizer)
|
366
|
+
msg = "missing " + expecting.toString(recognizer.tokenNames) \
|
367
|
+
+ " at " + self.getTokenErrorDisplay(t)
|
368
|
+
recognizer.notifyErrorListeners(msg, t, nil)
|
369
|
+
end
|
370
|
+
# <p>The default implementation attempts to recover from the mismatched input
|
371
|
+
# by using single token insertion and deletion as described below. If the
|
372
|
+
# recovery attempt fails, this method throws an
|
373
|
+
# {@link InputMismatchException}.</p>
|
374
|
+
#
|
375
|
+
# <p><strong>EXTRA TOKEN</strong> (single token deletion)</p>
|
376
|
+
#
|
377
|
+
# <p>{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
|
378
|
+
# right token, however, then assume {@code LA(1)} is some extra spurious
|
379
|
+
# token and delete it. Then consume and return the next token (which was
|
380
|
+
# the {@code LA(2)} token) as the successful result of the match operation.</p>
|
381
|
+
#
|
382
|
+
# <p>This recovery strategy is implemented by {@link #singleTokenDeletion}.</p>
|
383
|
+
#
|
384
|
+
# <p><strong>MISSING TOKEN</strong> (single token insertion)</p>
|
385
|
+
#
|
386
|
+
# <p>If current token (at {@code LA(1)}) is consistent with what could come
|
387
|
+
# after the expected {@code LA(1)} token, then assume the token is missing
|
388
|
+
# and use the parser's {@link TokenFactory} to create it on the fly. The
|
389
|
+
# "insertion" is performed by returning the created token as the successful
|
390
|
+
# result of the match operation.</p>
|
391
|
+
#
|
392
|
+
# <p>This recovery strategy is implemented by {@link #singleTokenInsertion}.</p>
|
393
|
+
#
|
394
|
+
# <p><strong>EXAMPLE</strong></p>
|
395
|
+
#
|
396
|
+
# <p>For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
|
397
|
+
# the parser returns from the nested call to {@code expr}, it will have
|
398
|
+
# call chain:</p>
|
399
|
+
#
|
400
|
+
# <pre>
|
401
|
+
# stat → expr → atom
|
402
|
+
# </pre>
|
403
|
+
#
|
404
|
+
# and it will be trying to match the {@code ')'} at this point in the
|
405
|
+
# derivation:
|
406
|
+
#
|
407
|
+
# <pre>
|
408
|
+
# => ID '=' '(' INT ')' ('+' atom)* ';'
|
409
|
+
# ^
|
410
|
+
# </pre>
|
411
|
+
#
|
412
|
+
# The attempt to match {@code ')'} will fail when it sees {@code ';'} and
|
413
|
+
# call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
|
414
|
+
# is in the set of tokens that can follow the {@code ')'} token reference
|
415
|
+
# in rule {@code atom}. It can assume that you forgot the {@code ')'}.
|
416
|
+
#
|
417
|
+
def recoverInline(recognizer)
|
418
|
+
# SINGLE TOKEN DELETION
|
419
|
+
matchedSymbol = self.singleTokenDeletion(recognizer)
|
420
|
+
if not matchedSymbol.nil? then
|
421
|
+
# we have deleted the extra token.
|
422
|
+
# now, move past ttype token as if all were ok
|
423
|
+
recognizer.consume()
|
424
|
+
return matchedSymbol
|
425
|
+
end
|
426
|
+
|
427
|
+
# SINGLE TOKEN INSERTION
|
428
|
+
if self.singleTokenInsertion(recognizer) then
|
429
|
+
return self.getMissingSymbol(recognizer)
|
430
|
+
end
|
431
|
+
|
432
|
+
# even that didn't work; must throw the exception
|
433
|
+
raise InputMismatchException.new(recognizer)
|
434
|
+
end
|
435
|
+
#
|
436
|
+
# This method implements the single-token insertion inline error recovery
|
437
|
+
# strategy. It is called by {@link #recoverInline} if the single-token
|
438
|
+
# deletion strategy fails to recover from the mismatched input. If this
|
439
|
+
# method returns {@code true}, {@code recognizer} will be in error recovery
|
440
|
+
# mode.
|
441
|
+
#
|
442
|
+
# <p>This method determines whether or not single-token insertion is viable by
|
443
|
+
# checking if the {@code LA(1)} input symbol could be successfully matched
|
444
|
+
# if it were instead the {@code LA(2)} symbol. If this method returns
|
445
|
+
# {@code true}, the caller is responsible for creating and inserting a
|
446
|
+
# token with the correct type to produce this behavior.</p>
|
447
|
+
#
|
448
|
+
# @param recognizer the parser instance
|
449
|
+
# @return {@code true} if single-token insertion is a viable recovery
|
450
|
+
# strategy for the current mismatched input, otherwise {@code false}
|
451
|
+
#
|
452
|
+
def singleTokenInsertion(recognizer)
|
453
|
+
currentSymbolType = recognizer.getTokenStream().LA(1)
|
454
|
+
# if current token is consistent with what could come after current
|
455
|
+
# ATN state, then we know we're missing a token; error recovery
|
456
|
+
# is free to conjure up and insert the missing token
|
457
|
+
atn = recognizer.interp.atn
|
458
|
+
currentState = atn.states[recognizer.state]
|
459
|
+
nextToken = currentState.transitions[0].target
|
460
|
+
expectingAtLL2 = atn.nextTokens(nextToken, recognizer.ctx)
|
461
|
+
if expectingAtLL2.member? currentSymbolType then
|
462
|
+
self.reportMissingToken(recognizer)
|
463
|
+
return true
|
464
|
+
else
|
465
|
+
return false
|
466
|
+
end
|
467
|
+
end
|
468
|
+
# This method implements the single-token deletion inline error recovery
|
469
|
+
# strategy. It is called by {@link #recoverInline} to attempt to recover
|
470
|
+
# from mismatched input. If this method returns null, the parser and error
|
471
|
+
# handler state will not have changed. If this method returns non-null,
|
472
|
+
# {@code recognizer} will <em>not</em> be in error recovery mode since the
|
473
|
+
# returned token was a successful match.
|
474
|
+
#
|
475
|
+
# <p>If the single-token deletion is successful, this method calls
|
476
|
+
# {@link #reportUnwantedToken} to report the error, followed by
|
477
|
+
# {@link Parser#consume} to actually "delete" the extraneous token. Then,
|
478
|
+
# before returning {@link #reportMatch} is called to signal a successful
|
479
|
+
# match.</p>
|
480
|
+
#
|
481
|
+
# @param recognizer the parser instance
|
482
|
+
# @return the successfully matched {@link Token} instance if single-token
|
483
|
+
# deletion successfully recovers from the mismatched input, otherwise
|
484
|
+
# {@code null}
|
485
|
+
#
|
486
|
+
def singleTokenDeletion(recognizer)
|
487
|
+
nextTokenType = recognizer.getTokenStream().LA(2)
|
488
|
+
expecting = self.getExpectedTokens(recognizer)
|
489
|
+
if expecting.member? nextTokenType then
|
490
|
+
self.reportUnwantedToken(recognizer)
|
491
|
+
# print("recoverFromMismatchedToken deleting " \
|
492
|
+
# + str(recognizer.getTokenStream().LT(1)) \
|
493
|
+
# + " since " + str(recognizer.getTokenStream().LT(2)) \
|
494
|
+
# + " is what we want", file=sys.stderr)
|
495
|
+
recognizer.consume() # simply delete extra token
|
496
|
+
# we want to return the token we're actually matching
|
497
|
+
matchedSymbol = recognizer.getCurrentToken()
|
498
|
+
self.reportMatch(recognizer) # we know current token is correct
|
499
|
+
return matchedSymbol
|
500
|
+
else
|
501
|
+
return nil
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
# Conjure up a missing token during error recovery.
|
506
|
+
#
|
507
|
+
# The recognizer attempts to recover from single missing
|
508
|
+
# symbols. But, actions might refer to that missing symbol.
|
509
|
+
# For example, x=ID {f($x);}. The action clearly assumes
|
510
|
+
# that there has been an identifier matched previously and that
|
511
|
+
# $x points at that token. If that token is missing, but
|
512
|
+
# the next token in the stream is what we want we assume that
|
513
|
+
# this token is missing and we keep going. Because we
|
514
|
+
# have to return some token to replace the missing token,
|
515
|
+
# we have to conjure one up. This method gives the user control
|
516
|
+
# over the tokens returned for missing tokens. Mostly,
|
517
|
+
# you will want to create something special for identifier
|
518
|
+
# tokens. For literals such as '{' and ',', the default
|
519
|
+
# action in the parser or tree parser works. It simply creates
|
520
|
+
# a CommonToken of the appropriate type. The text will be the token.
|
521
|
+
# If you change what tokens must be created by the lexer,
|
522
|
+
# override this method to create the appropriate tokens.
|
523
|
+
#
|
524
|
+
def getMissingSymbol(recognizer)
|
525
|
+
currentSymbol = recognizer.getCurrentToken()
|
526
|
+
expecting = self.getExpectedTokens(recognizer)
|
527
|
+
expectedTokenType = expecting.getMinElement # get any element
|
528
|
+
if expectedTokenType==Token::EOF then
|
529
|
+
tokenText = "<missing EOF>"
|
530
|
+
else
|
531
|
+
tokenText = "<missing #{recognizer.tokenNames[expectedTokenType]}>"
|
532
|
+
end
|
533
|
+
current = currentSymbol
|
534
|
+
lookback = recognizer.getTokenStream().LT(-1)
|
535
|
+
if current.type==Token::EOF and not lookback.nil? then
|
536
|
+
current = lookback
|
537
|
+
end
|
538
|
+
return recognizer.getTokenFactory().create(current.source,
|
539
|
+
expectedTokenType, tokenText, Token::DEFAULT_CHANNEL,
|
540
|
+
-1, -1, current.line, current.column)
|
541
|
+
end
|
542
|
+
def getExpectedTokens(recognizer)
|
543
|
+
return recognizer.getExpectedTokens()
|
544
|
+
end
|
545
|
+
|
546
|
+
# How should a token be displayed in an error message? The default
|
547
|
+
# is to display just the text, but during development you might
|
548
|
+
# want to have a lot of information spit out. Override in that case
|
549
|
+
# to use t.toString() (which, for CommonToken, dumps everything about
|
550
|
+
# the token). This is better than forcing you to override a method in
|
551
|
+
# your token objects because you don't have to go modify your lexer
|
552
|
+
# so that it creates a new Java type.
|
553
|
+
#
|
554
|
+
def getTokenErrorDisplay(token)
|
555
|
+
return "<no token>" if token.nil?
|
556
|
+
s = token.text
|
557
|
+
if s.nil? then
|
558
|
+
if token.type==Token::EOF then
|
559
|
+
s = "<EOF>"
|
560
|
+
else
|
561
|
+
s = "<#{token.class}>"
|
562
|
+
end
|
563
|
+
end
|
564
|
+
return self.escapeWSAndQuote(s)
|
565
|
+
end
|
566
|
+
def escapeWSAndQuote( s)
|
567
|
+
s.gsub!("\n","\\n")
|
568
|
+
s.gsub!("\r","\\r")
|
569
|
+
s.gsub!("\t","\\t")
|
570
|
+
"'#{s}'"
|
571
|
+
end
|
572
|
+
|
573
|
+
# Compute the error recovery set for the current rule. During
|
574
|
+
# rule invocation, the parser pushes the set of tokens that can
|
575
|
+
# follow that rule reference on the stack; this amounts to
|
576
|
+
# computing FIRST of what follows the rule reference in the
|
577
|
+
# enclosing rule. See LinearApproximator.FIRST().
|
578
|
+
# This local follow set only includes tokens
|
579
|
+
# from within the rule; i.e., the FIRST computation done by
|
580
|
+
# ANTLR stops at the end of a rule.
|
581
|
+
#
|
582
|
+
# EXAMPLE
|
583
|
+
#
|
584
|
+
# When you find a "no viable alt exception", the input is not
|
585
|
+
# consistent with any of the alternatives for rule r. The best
|
586
|
+
# thing to do is to consume tokens until you see something that
|
587
|
+
# can legally follow a call to r#or* any rule that called r.
|
588
|
+
# You don't want the exact set of viable next tokens because the
|
589
|
+
# input might just be missing a token--you might consume the
|
590
|
+
# rest of the input looking for one of the missing tokens.
|
591
|
+
#
|
592
|
+
# Consider grammar:
|
593
|
+
#
|
594
|
+
# a : '[' b ']'
|
595
|
+
# | '(' b ')'
|
596
|
+
# ;
|
597
|
+
# b : c '^' INT ;
|
598
|
+
# c : ID
|
599
|
+
# | INT
|
600
|
+
# ;
|
601
|
+
#
|
602
|
+
# At each rule invocation, the set of tokens that could follow
|
603
|
+
# that rule is pushed on a stack. Here are the various
|
604
|
+
# context-sensitive follow sets:
|
605
|
+
#
|
606
|
+
# FOLLOW(b1_in_a) = FIRST(']') = ']'
|
607
|
+
# FOLLOW(b2_in_a) = FIRST(')') = ')'
|
608
|
+
# FOLLOW(c_in_b) = FIRST('^') = '^'
|
609
|
+
#
|
610
|
+
# Upon erroneous input "[]", the call chain is
|
611
|
+
#
|
612
|
+
# a -> b -> c
|
613
|
+
#
|
614
|
+
# and, hence, the follow context stack is:
|
615
|
+
#
|
616
|
+
# depth follow set start of rule execution
|
617
|
+
# 0 <EOF> a (from main())
|
618
|
+
# 1 ']' b
|
619
|
+
# 2 '^' c
|
620
|
+
#
|
621
|
+
# Notice that ')' is not included, because b would have to have
|
622
|
+
# been called from a different context in rule a for ')' to be
|
623
|
+
# included.
|
624
|
+
#
|
625
|
+
# For error recovery, we cannot consider FOLLOW(c)
|
626
|
+
# (context-sensitive or otherwise). We need the combined set of
|
627
|
+
# all context-sensitive FOLLOW sets--the set of all tokens that
|
628
|
+
# could follow any reference in the call chain. We need to
|
629
|
+
# resync to one of those tokens. Note that FOLLOW(c)='^' and if
|
630
|
+
# we resync'd to that token, we'd consume until EOF. We need to
|
631
|
+
# sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
|
632
|
+
# In this case, for input "[]", LA(1) is ']' and in the set, so we would
|
633
|
+
# not consume anything. After printing an error, rule c would
|
634
|
+
# return normally. Rule b would not find the required '^' though.
|
635
|
+
# At this point, it gets a mismatched token error and throws an
|
636
|
+
# exception (since LA(1) is not in the viable following token
|
637
|
+
# set). The rule exception handler tries to recover, but finds
|
638
|
+
# the same recovery set and doesn't consume anything. Rule b
|
639
|
+
# exits normally returning to rule a. Now it finds the ']' (and
|
640
|
+
# with the successful match exits errorRecovery mode).
|
641
|
+
#
|
642
|
+
# So, you can see that the parser walks up the call chain looking
|
643
|
+
# for the token that was a member of the recovery set.
|
644
|
+
#
|
645
|
+
# Errors are not generated in errorRecovery mode.
|
646
|
+
#
|
647
|
+
# ANTLR's error recovery mechanism is based upon original ideas:
|
648
|
+
#
|
649
|
+
# "Algorithms + Data Structures = Programs" by Niklaus Wirth
|
650
|
+
#
|
651
|
+
# and
|
652
|
+
#
|
653
|
+
# "A note on error recovery in recursive descent parsers":
|
654
|
+
# http:#portal.acm.org/citation.cfm?id=947902.947905
|
655
|
+
#
|
656
|
+
# Later, Josef Grosch had some good ideas:
|
657
|
+
#
|
658
|
+
# "Efficient and Comfortable Error Recovery in Recursive Descent
|
659
|
+
# Parsers":
|
660
|
+
# ftp:#www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
|
661
|
+
#
|
662
|
+
# Like Grosch I implement context-sensitive FOLLOW sets that are combined
|
663
|
+
# at run-time upon error to avoid overhead during parsing.
|
664
|
+
#
|
665
|
+
def getErrorRecoverySet(recognizer)
|
666
|
+
atn = recognizer.interp.atn
|
667
|
+
ctx = recognizer.ctx
|
668
|
+
recoverSet = IntervalSet.new()
|
669
|
+
while ctx and ctx.invokingState >= 0 do
|
670
|
+
# compute what follows who invoked us
|
671
|
+
invokingState = atn.states[ctx.invokingState]
|
672
|
+
rt = invokingState.transitions[0]
|
673
|
+
follow = atn.nextTokens(rt.followState)
|
674
|
+
recoverSet.addSet(follow)
|
675
|
+
ctx = ctx.parentCtx
|
676
|
+
end
|
677
|
+
recoverSet.remove(Token::EPSILON)
|
678
|
+
return recoverSet
|
679
|
+
end
|
680
|
+
|
681
|
+
# Consume tokens until one matches the given token set.#
|
682
|
+
def consumeUntil(recognizer, set_)
|
683
|
+
ttype = recognizer.getTokenStream().LA(1)
|
684
|
+
while ttype != Token::EOF and not set_.member? ttype do
|
685
|
+
recognizer.consume()
|
686
|
+
ttype = recognizer.getTokenStream().LA(1)
|
687
|
+
end
|
688
|
+
end
|
689
|
+
|
690
|
+
end
|
691
|
+
#
|
692
|
+
# This implementation of {@link ANTLRErrorStrategy} responds to syntax errors
|
693
|
+
# by immediately canceling the parse operation with a
|
694
|
+
# {@link ParseCancellationException}. The implementation ensures that the
|
695
|
+
# {@link ParserRuleContext#exception} field is set for all parse tree nodes
|
696
|
+
# that were not completed prior to encountering the error.
|
697
|
+
#
|
698
|
+
# <p>
|
699
|
+
# This error strategy is useful in the following scenarios.</p>
|
700
|
+
#
|
701
|
+
# <ul>
|
702
|
+
# <li><strong>Two-stage parsing:</strong> This error strategy allows the first
|
703
|
+
# stage of two-stage parsing to immediately terminate if an error is
|
704
|
+
# encountered, and immediately fall back to the second stage. In addition to
|
705
|
+
# avoiding wasted work by attempting to recover from errors here, the empty
|
706
|
+
# implementation of {@link BailErrorStrategy#sync} improves the performance of
|
707
|
+
# the first stage.</li>
|
708
|
+
# <li><strong>Silent validation:</strong> When syntax errors are not being
|
709
|
+
# reported or logged, and the parse result is simply ignored if errors occur,
|
710
|
+
# the {@link BailErrorStrategy} avoids wasting work on recovering from errors
|
711
|
+
# when the result will be ignored either way.</li>
|
712
|
+
# </ul>
|
713
|
+
#
|
714
|
+
# <p>
|
715
|
+
# {@code myparser.setErrorHandler(new BailErrorStrategy());}</p>
|
716
|
+
#
|
717
|
+
# @see Parser#setErrorHandler(ANTLRErrorStrategy)
|
718
|
+
#
|
719
|
+
class BailErrorStrategy < DefaultErrorStrategy
|
720
|
+
# Instead of recovering from exception {@code e}, re-throw it wrapped
|
721
|
+
# in a {@link ParseCancellationException} so it is not caught by the
|
722
|
+
# rule function catches. Use {@link Exception#getCause()} to get the
|
723
|
+
# original {@link RecognitionException}.
|
724
|
+
#
|
725
|
+
def recover(recognizer, e)
|
726
|
+
context = recognizer.ctx
|
727
|
+
while not context.nil? do
|
728
|
+
context.exception = e
|
729
|
+
context = context.parentCtx
|
730
|
+
end
|
731
|
+
raise ParseCancellationException.new(e)
|
732
|
+
end
|
733
|
+
# Make sure we don't attempt to recover inline; if the parser
|
734
|
+
# successfully recovers, it won't throw an exception.
|
735
|
+
#
|
736
|
+
def recoverInline(recognizer)
|
737
|
+
self.recover(recognizer, InputMismatchException.new(recognizer))
|
738
|
+
end
|
739
|
+
# Make sure we don't attempt to recover from problems in subrules.#
|
740
|
+
def sync(recognizer)
|
741
|
+
end
|
742
|
+
end
|