antlr4 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,311 @@
|
|
1
|
+
|
2
|
+
class LexerActionType
|
3
|
+
# include JavaSymbols
|
4
|
+
CHANNEL = 0 #The type of a {@link LexerChannelAction} action.
|
5
|
+
CUSTOM = 1 #The type of a {@link LexerCustomAction} action.
|
6
|
+
MODE = 2 #The type of a {@link LexerModeAction} action.
|
7
|
+
MORE = 3 #The type of a {@link LexerMoreAction} action.
|
8
|
+
POP_MODE = 4 #The type of a {@link LexerPopModeAction} action.
|
9
|
+
PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action.
|
10
|
+
SKIP = 6 #The type of a {@link LexerSkipAction} action.
|
11
|
+
TYPE = 7 #The type of a {@link LexerTypeAction} action.
|
12
|
+
end
|
13
|
+
|
14
|
+
class LexerAction
|
15
|
+
|
16
|
+
attr_accessor :actionType, :isPositionDependent
|
17
|
+
def initialize(action)
|
18
|
+
self.actionType = action
|
19
|
+
self.isPositionDependent = false
|
20
|
+
end
|
21
|
+
|
22
|
+
def hash
|
23
|
+
self.actionType.to_s.hash
|
24
|
+
end
|
25
|
+
|
26
|
+
def eql?(other)
|
27
|
+
self == other
|
28
|
+
end
|
29
|
+
|
30
|
+
def ==(other)
|
31
|
+
self.equal? other
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Implements the {@code skip} lexer action by calling {@link Lexer#skip}.
|
37
|
+
#
|
38
|
+
# <p>The {@code skip} command does not have any parameters, so this action is
|
39
|
+
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
40
|
+
class LexerSkipAction < LexerAction
|
41
|
+
|
42
|
+
# Provides a singleton instance of this parameterless lexer action.
|
43
|
+
@@INSTANCE = nil
|
44
|
+
def self.INSTANCE
|
45
|
+
if @@INSTANCE.nil?
|
46
|
+
@@INSTANCE = LexerSkipAction.new()
|
47
|
+
end
|
48
|
+
@@INSTANCE
|
49
|
+
end
|
50
|
+
def initialize()
|
51
|
+
super(LexerActionType::SKIP)
|
52
|
+
end
|
53
|
+
|
54
|
+
def execute(lexer)
|
55
|
+
lexer.skip()
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
return "skip"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Implements the {@code type} lexer action by calling {@link Lexer#setType}
|
64
|
+
# with the assigned type.
|
65
|
+
class LexerTypeAction < LexerAction
|
66
|
+
|
67
|
+
attr_accessor :type
|
68
|
+
def initialize(_type)
|
69
|
+
super(LexerActionType::TYPE)
|
70
|
+
self.type = _type
|
71
|
+
end
|
72
|
+
def execute(lexer)
|
73
|
+
lexer.type = self.type
|
74
|
+
end
|
75
|
+
|
76
|
+
def hash
|
77
|
+
return "#{self.actionType}#{self.type}".hash
|
78
|
+
end
|
79
|
+
|
80
|
+
def ==(other)
|
81
|
+
self.equal?(other) or other.kind_of?(LexerTypeAction) and self.type == other.type
|
82
|
+
end
|
83
|
+
def to_s
|
84
|
+
return "type(#{self.type})"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Implements the {@code pushMode} lexer action by calling
|
89
|
+
# {@link Lexer#pushMode} with the assigned mode.
|
90
|
+
class LexerPushModeAction < LexerAction
|
91
|
+
|
92
|
+
attr_accessor :mode
|
93
|
+
def initialize(_mode)
|
94
|
+
super(LexerActionType::PUSH_MODE)
|
95
|
+
self.mode = _mode
|
96
|
+
end
|
97
|
+
|
98
|
+
# <p>This action is implemented by calling {@link Lexer#pushMode} with the
|
99
|
+
# value provided by {@link #getMode}.</p>
|
100
|
+
def execute(lexer)
|
101
|
+
lexer.pushMode(self.mode)
|
102
|
+
end
|
103
|
+
|
104
|
+
def hash
|
105
|
+
"#{self.actionType}#{self.mode}".hash
|
106
|
+
end
|
107
|
+
|
108
|
+
def ==(other)
|
109
|
+
self.equal?(other) or other.kind_of?(LexerPushModeAction) and self.mode == other.mode
|
110
|
+
end
|
111
|
+
|
112
|
+
def to_s
|
113
|
+
"pushMode(#{self.mode})"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}.
|
118
|
+
#
|
119
|
+
# <p>The {@code popMode} command does not have any parameters, so this action is
|
120
|
+
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
121
|
+
class LexerPopModeAction < LexerAction
|
122
|
+
|
123
|
+
@@INSTANCE = nil
|
124
|
+
def self.INSTANCE
|
125
|
+
@@INSTANCE = new() if @@INSTANCE.nil?
|
126
|
+
@@INSTANCE
|
127
|
+
end
|
128
|
+
|
129
|
+
def initialize
|
130
|
+
super(LexerActionType::POP_MODE)
|
131
|
+
end
|
132
|
+
|
133
|
+
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
|
134
|
+
def execute(lexer)
|
135
|
+
lexer.popMode()
|
136
|
+
end
|
137
|
+
|
138
|
+
def to_s
|
139
|
+
return "popMode"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Implements the {@code more} lexer action by calling {@link Lexer#more}.
|
144
|
+
#
|
145
|
+
# <p>The {@code more} command does not have any parameters, so this action is
|
146
|
+
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
147
|
+
class LexerMoreAction < LexerAction
|
148
|
+
|
149
|
+
@@INSTANCE = nil
|
150
|
+
def self.INSTANCE
|
151
|
+
@@INSTANCE = new() if @@INSTANCE.nil?
|
152
|
+
@@INSTANCE
|
153
|
+
end
|
154
|
+
|
155
|
+
def initialize
|
156
|
+
super(LexerActionType::MORE)
|
157
|
+
end
|
158
|
+
|
159
|
+
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
|
160
|
+
def execute(lexer)
|
161
|
+
lexer.more()
|
162
|
+
end
|
163
|
+
|
164
|
+
def to_s
|
165
|
+
return "more"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Implements the {@code mode} lexer action by calling {@link Lexer#mode} with
|
170
|
+
# the assigned mode.
|
171
|
+
class LexerModeAction < LexerAction
|
172
|
+
|
173
|
+
attr_accessor :mode
|
174
|
+
def initialize(_mode)
|
175
|
+
super(LexerActionType::MODE)
|
176
|
+
self.mode = _mode
|
177
|
+
end
|
178
|
+
|
179
|
+
# <p>This action is implemented by calling {@link Lexer#mode} with the
|
180
|
+
# value provided by {@link #getMode}.</p>
|
181
|
+
def execute(lexer)
|
182
|
+
lexer.mode = self.mode
|
183
|
+
end
|
184
|
+
|
185
|
+
def hash
|
186
|
+
"#{self.actionType}#{self.mode}".hash
|
187
|
+
end
|
188
|
+
|
189
|
+
def ==(other)
|
190
|
+
self.equal?(other)or other.kind_of?(LexerModeAction)and self.mode == other.mode
|
191
|
+
end
|
192
|
+
|
193
|
+
def to_s
|
194
|
+
"mode(#{self.mode})"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
# Executes a custom lexer action by calling {@link Recognizer#action} with the
|
198
|
+
# rule and action indexes assigned to the custom action. The implementation of
|
199
|
+
# a custom action is added to the generated code for the lexer in an override
|
200
|
+
# of {@link Recognizer#action} when the grammar is compiled.
|
201
|
+
#
|
202
|
+
# <p>This class may represent embedded actions created with the <code>{...}</code>
|
203
|
+
# syntax in ANTLR 4, as well as actions created for lexer commands where the
|
204
|
+
# command argument could not be evaluated when the grammar was compiled.</p>
|
205
|
+
|
206
|
+
class LexerCustomAction < LexerAction
|
207
|
+
|
208
|
+
# Constructs a custom lexer action with the specified rule and action
|
209
|
+
# indexes.
|
210
|
+
#
|
211
|
+
# @param ruleIndex The rule index to use for calls to
|
212
|
+
# {@link Recognizer#action}.
|
213
|
+
# @param actionIndex The action index to use for calls to
|
214
|
+
# {@link Recognizer#action}.
|
215
|
+
#/
|
216
|
+
attr_accessor :ruleIndex, :actionIndex, :isPositionDependent
|
217
|
+
def initialize(rule_index, action_index)
|
218
|
+
super(LexerActionType::CUSTOM)
|
219
|
+
@ruleIndex = rule_index
|
220
|
+
@actionIndex = action_index
|
221
|
+
@isPositionDependent = true
|
222
|
+
end
|
223
|
+
# <p>Custom actions are implemented by calling {@link Lexer#action} with the
|
224
|
+
# appropriate rule and action indexes.</p>
|
225
|
+
def execute(lexer)
|
226
|
+
lexer.action(nil, self.ruleIndex, self.actionIndex)
|
227
|
+
end
|
228
|
+
def hash
|
229
|
+
"#{self.actionType}#{self.ruleIndex}#{self.actionIndex}".hash
|
230
|
+
end
|
231
|
+
|
232
|
+
def ==( other)
|
233
|
+
self.equal?(other) or other.kind_of?( LexerCustomAction) \
|
234
|
+
and self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex
|
235
|
+
end
|
236
|
+
end
|
237
|
+
# Implements the {@code channel} lexer action by calling
|
238
|
+
# {@link Lexer#setChannel} with the assigned channel.
|
239
|
+
class LexerChannelAction < LexerAction
|
240
|
+
|
241
|
+
# Constructs a new {@code channel} action with the specified channel value.
|
242
|
+
# @param channel The channel value to pass to {@link Lexer#setChannel}.
|
243
|
+
attr_accessor :channel
|
244
|
+
def initialize(_channel)
|
245
|
+
super(LexerActionType::CHANNEL)
|
246
|
+
self.channel = _channel
|
247
|
+
end
|
248
|
+
|
249
|
+
# <p>This action is implemented by calling {@link Lexer#setChannel} with the
|
250
|
+
# value provided by {@link #getChannel}.</p>
|
251
|
+
def execute(lexer)
|
252
|
+
lexer.channel = self.channel
|
253
|
+
end
|
254
|
+
def hash
|
255
|
+
"#{self.actionType}#{self.channel}".hash
|
256
|
+
end
|
257
|
+
|
258
|
+
def ==(other)
|
259
|
+
self.equal?(other) or other.kind_of?(LexerChannelAction) \
|
260
|
+
and self.channel == other.channel
|
261
|
+
end
|
262
|
+
|
263
|
+
def to_s
|
264
|
+
return "channel(#{self.channel})"
|
265
|
+
end
|
266
|
+
end
|
267
|
+
# This implementation of {@link LexerAction} is used for tracking input offsets
|
268
|
+
# for position-dependent actions within a {@link LexerActionExecutor}.
|
269
|
+
#
|
270
|
+
# <p>This action is not serialized as part of the ATN, and is only required for
|
271
|
+
# position-dependent lexer actions which appear at a location other than the
|
272
|
+
# end of a rule. For more information about DFA optimizations employed for
|
273
|
+
# lexer actions, see {@link LexerActionExecutor#append} and
|
274
|
+
# {@link LexerActionExecutor#fixOffsetBeforeMatch}.</p>
|
275
|
+
class LexerIndexedCustomAction < LexerAction
|
276
|
+
|
277
|
+
# Constructs a new indexed custom action by associating a character offset
|
278
|
+
# with a {@link LexerAction}.
|
279
|
+
#
|
280
|
+
# <p>Note: This class is only required for lexer actions for which
|
281
|
+
# {@link LexerAction#isPositionDependent} returns {@code true}.</p>
|
282
|
+
#
|
283
|
+
# @param offset The offset into the input {@link CharStream}, relative to
|
284
|
+
# the token start index, at which the specified lexer action should be
|
285
|
+
# executed.
|
286
|
+
# @param action The lexer action to execute at a particular offset in the
|
287
|
+
# input {@link CharStream}.
|
288
|
+
attr_accessor :offset, :action, :isPositionDependent
|
289
|
+
def initialize(_offset, _action)
|
290
|
+
super(action.actionType)
|
291
|
+
self.offset = _offset
|
292
|
+
self.action = _action
|
293
|
+
self.isPositionDependent = true
|
294
|
+
end
|
295
|
+
|
296
|
+
# <p>This method calls {@link #execute} on the result of {@link #getAction}
|
297
|
+
# using the provided {@code lexer}.</p>
|
298
|
+
def execute(lexer)
|
299
|
+
# assume the input stream position was properly set by the calling code
|
300
|
+
self.action.execute(lexer)
|
301
|
+
end
|
302
|
+
|
303
|
+
def hash
|
304
|
+
"#{self.actionType}#{self.offset}#{self.action}".hash
|
305
|
+
end
|
306
|
+
|
307
|
+
def ==(other)
|
308
|
+
self.equal?(other) or other.kind_of?(LexerIndexedCustomAction) \
|
309
|
+
and self.offset == other.offset and self.action == other.action
|
310
|
+
end
|
311
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# Represents an executor for a sequence of lexer actions which traversed during
|
2
|
+
# the matching operation of a lexer rule (token).
|
3
|
+
#
|
4
|
+
# <p>The executor tracks position information for position-dependent lexer actions
|
5
|
+
# efficiently, ensuring that actions appearing only at the end of the rule do
|
6
|
+
# not cause bloating of the {@link DFA} created for the lexer.</p>
|
7
|
+
|
8
|
+
class LexerActionExecutor
|
9
|
+
|
10
|
+
attr_accessor :hashCode, :lexerActions
|
11
|
+
def initialize(_lexerActions=Array.new)
|
12
|
+
@lexerActions = _lexerActions
|
13
|
+
# Caches the result of {@link #hashCode} since the hash code is an element
|
14
|
+
# of the performance-critical {@link LexerATNConfig#hashCode} operation.
|
15
|
+
@hashCode = self.lexerActions.map(&:to_s).join('').hash
|
16
|
+
end
|
17
|
+
|
18
|
+
# Creates a {@link LexerActionExecutor} which executes the actions for
|
19
|
+
# the input {@code lexerActionExecutor} followed by a specified
|
20
|
+
# {@code lexerAction}.
|
21
|
+
#
|
22
|
+
# @param lexerActionExecutor The executor for actions already traversed by
|
23
|
+
# the lexer while matching a token within a particular
|
24
|
+
# {@link LexerATNConfig}. If this is {@code null}, the method behaves as
|
25
|
+
# though it were an empty executor.
|
26
|
+
# @param lexerAction The lexer action to execute after the actions
|
27
|
+
# specified in {@code lexerActionExecutor}.
|
28
|
+
#
|
29
|
+
# @return A {@link LexerActionExecutor} for executing the combine actions
|
30
|
+
# of {@code lexerActionExecutor} and {@code lexerAction}.
|
31
|
+
def self.append(lexerActionExecutor, lexerAction)
|
32
|
+
if lexerActionExecutor.nil?
|
33
|
+
return LexerActionExecutor.new([ lexerAction ])
|
34
|
+
end
|
35
|
+
lexerActions = lexerActionExecutor.lexerActions.concat(lexerAction )
|
36
|
+
return LexerActionExecutor.new(lexerActions)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates a {@link LexerActionExecutor} which encodes the current offset
|
40
|
+
# for position-dependent lexer actions.
|
41
|
+
#
|
42
|
+
# <p>Normally, when the executor encounters lexer actions where
|
43
|
+
# {@link LexerAction#isPositionDependent} returns {@code true}, it calls
|
44
|
+
# {@link IntStream#seek} on the input {@link CharStream} to set the input
|
45
|
+
# position to the <em>end</em> of the current token. This behavior provides
|
46
|
+
# for efficient DFA representation of lexer actions which appear at the end
|
47
|
+
# of a lexer rule, even when the lexer rule matches a variable number of
|
48
|
+
# characters.</p>
|
49
|
+
#
|
50
|
+
# <p>Prior to traversing a match transition in the ATN, the current offset
|
51
|
+
# from the token start index is assigned to all position-dependent lexer
|
52
|
+
# actions which have not already been assigned a fixed offset. By storing
|
53
|
+
# the offsets relative to the token start index, the DFA representation of
|
54
|
+
# lexer actions which appear in the middle of tokens remains efficient due
|
55
|
+
# to sharing among tokens of the same length, regardless of their absolute
|
56
|
+
# position in the input stream.</p>
|
57
|
+
#
|
58
|
+
# <p>If the current executor already has offsets assigned to all
|
59
|
+
# position-dependent lexer actions, the method returns {@code this}.</p>
|
60
|
+
#
|
61
|
+
# @param offset The current offset to assign to all position-dependent
|
62
|
+
# lexer actions which do not already have offsets assigned.
|
63
|
+
#
|
64
|
+
# @return A {@link LexerActionExecutor} which stores input stream offsets
|
65
|
+
# for all position-dependent lexer actions.
|
66
|
+
#/
|
67
|
+
def fixOffsetBeforeMatch(offset)
|
68
|
+
updatedLexerActions = nil
|
69
|
+
@lexerActions.each_index {|i|
|
70
|
+
if @lexerActions[i].isPositionDependent and not @lexerActions[i].kind_of?(LexerIndexedCustomAction) then
|
71
|
+
if updatedLexerActions.nil? then
|
72
|
+
updatedLexerActions = @lexerActions.map{|x| x}
|
73
|
+
end
|
74
|
+
updatedLexerActions[i] = LexerIndexedCustomAction.new(offset, @lexerActions[i])
|
75
|
+
end
|
76
|
+
}
|
77
|
+
if updatedLexerActions.nil?
|
78
|
+
return self
|
79
|
+
else
|
80
|
+
return LexerActionExecutor.new(updatedLexerActions)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
# Execute the actions encapsulated by this executor within the context of a
|
86
|
+
# particular {@link Lexer}.
|
87
|
+
#
|
88
|
+
# <p>This method calls {@link IntStream#seek} to set the position of the
|
89
|
+
# {@code input} {@link CharStream} prior to calling
|
90
|
+
# {@link LexerAction#execute} on a position-dependent action. Before the
|
91
|
+
# method returns, the input position will be restored to the same position
|
92
|
+
# it was in when the method was invoked.</p>
|
93
|
+
#
|
94
|
+
# @param lexer The lexer instance.
|
95
|
+
# @param input The input stream which is the source for the current token.
|
96
|
+
# When this method is called, the current {@link IntStream#index} for
|
97
|
+
# {@code input} should be the start of the following token, i.e. 1
|
98
|
+
# character past the end of the current token.
|
99
|
+
# @param startIndex The token start index. This value may be passed to
|
100
|
+
# {@link IntStream#seek} to set the {@code input} position to the beginning
|
101
|
+
# of the token.
|
102
|
+
#/
|
103
|
+
def execute(lexer, input, startIndex)
|
104
|
+
requiresSeek = false
|
105
|
+
stopIndex = input.index
|
106
|
+
begin
|
107
|
+
self.lexerActions.each { |lexerAction|
|
108
|
+
if lexerAction.kind_of? LexerIndexedCustomAction
|
109
|
+
offset = lexerAction.offset
|
110
|
+
input.seek(startIndex + offset)
|
111
|
+
lexerAction = lexerAction.action
|
112
|
+
requiresSeek = (startIndex + offset) != stopIndex
|
113
|
+
elsif lexerAction.isPositionDependent
|
114
|
+
input.seek(stopIndex)
|
115
|
+
requiresSeek = false
|
116
|
+
end
|
117
|
+
lexerAction.execute(lexer)
|
118
|
+
}
|
119
|
+
ensure
|
120
|
+
input.seek(stopIndex) if requiresSeek
|
121
|
+
end
|
122
|
+
end
|
123
|
+
def hash
|
124
|
+
return self.hashCode
|
125
|
+
end
|
126
|
+
|
127
|
+
def eql?(other)
|
128
|
+
self == other
|
129
|
+
end
|
130
|
+
def ==( other)
|
131
|
+
self.equal?(other) or (other.kind_of?(LexerActionExecutor) and
|
132
|
+
self.hashCode == other.hashCode and self.lexerActions == other.lexerActions)
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,1622 @@
|
|
1
|
+
#
|
2
|
+
# The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
|
3
|
+
#
|
4
|
+
# <p>
|
5
|
+
# The basic complexity of the adaptive strategy makes it harder to understand.
|
6
|
+
# We begin with ATN simulation to build paths in a DFA. Subsequent prediction
|
7
|
+
# requests go through the DFA first. If they reach a state without an edge for
|
8
|
+
# the current symbol, the algorithm fails over to the ATN simulation to
|
9
|
+
# complete the DFA path for the current input (until it finds a conflict state
|
10
|
+
# or uniquely predicting state).</p>
|
11
|
+
#
|
12
|
+
# <p>
|
13
|
+
# All of that is done without using the outer context because we want to create
|
14
|
+
# a DFA that is not dependent upon the rule invocation stack when we do a
|
15
|
+
# prediction. One DFA works in all contexts. We avoid using context not
|
16
|
+
# necessarily because it's slower, although it can be, but because of the DFA
|
17
|
+
# caching problem. The closure routine only considers the rule invocation stack
|
18
|
+
# created during prediction beginning in the decision rule. For example, if
|
19
|
+
# prediction occurs without invoking another rule's ATN, there are no context
|
20
|
+
# stacks in the configurations. When lack of context leads to a conflict, we
|
21
|
+
# don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
|
22
|
+
# strategy (versus full LL(*)).</p>
|
23
|
+
#
|
24
|
+
# <p>
|
25
|
+
# When SLL yields a configuration set with conflict, we rewind the input and
|
26
|
+
# retry the ATN simulation, this time using full outer context without adding
|
27
|
+
# to the DFA. Configuration context stacks will be the full invocation stacks
|
28
|
+
# from the start rule. If we get a conflict using full context, then we can
|
29
|
+
# definitively say we have a true ambiguity for that input sequence. If we
|
30
|
+
# don't get a conflict, it implies that the decision is sensitive to the outer
|
31
|
+
# context. (It is not context-sensitive in the sense of context-sensitive
|
32
|
+
# grammars.)</p>
|
33
|
+
#
|
34
|
+
# <p>
|
35
|
+
# The next time we reach this DFA state with an SLL conflict, through DFA
|
36
|
+
# simulation, we will again retry the ATN simulation using full context mode.
|
37
|
+
# This is slow because we can't save the results and have to "interpret" the
|
38
|
+
# ATN each time we get that input.</p>
|
39
|
+
#
|
40
|
+
# <p>
|
41
|
+
# <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p>
|
42
|
+
#
|
43
|
+
# <p>
|
44
|
+
# We could cache results from full context to predicted alternative easily and
|
45
|
+
# that saves a lot of time but doesn't work in presence of predicates. The set
|
46
|
+
# of visible predicates from the ATN start state changes depending on the
|
47
|
+
# context, because closure can fall off the end of a rule. I tried to cache
|
48
|
+
# tuples (stack context, semantic context, predicted alt) but it was slower
|
49
|
+
# than interpreting and much more complicated. Also required a huge amount of
|
50
|
+
# memory. The goal is not to create the world's fastest parser anyway. I'd like
|
51
|
+
# to keep this algorithm simple. By launching multiple threads, we can improve
|
52
|
+
# the speed of parsing across a large number of files.</p>
|
53
|
+
#
|
54
|
+
# <p>
|
55
|
+
# There is no strict ordering between the amount of input used by SLL vs LL,
|
56
|
+
# which makes it really hard to build a cache for full context. Let's say that
|
57
|
+
# we have input A B C that leads to an SLL conflict with full context X. That
|
58
|
+
# implies that using X we might only use A B but we could also use A B C D to
|
59
|
+
# resolve conflict. Input A B C D could predict alternative 1 in one position
|
60
|
+
# in the input and A B C E could predict alternative 2 in another position in
|
61
|
+
# input. The conflicting SLL configurations could still be non-unique in the
|
62
|
+
# full context prediction, which would lead us to requiring more input than the
|
63
|
+
# original A B C. To make a prediction cache work, we have to track the exact
|
64
|
+
# input used during the previous prediction. That amounts to a cache that maps
|
65
|
+
# X to a specific DFA for that context.</p>
|
66
|
+
#
|
67
|
+
# <p>
|
68
|
+
# Something should be done for left-recursive expression predictions. They are
|
69
|
+
# likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
|
70
|
+
# with full LL thing Sam does.</p>
|
71
|
+
#
|
72
|
+
# <p>
|
73
|
+
# <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p>
|
74
|
+
#
|
75
|
+
# <p>
|
76
|
+
# We avoid doing full context retry when the outer context is empty, we did not
|
77
|
+
# dip into the outer context by falling off the end of the decision state rule,
|
78
|
+
# or when we force SLL mode.</p>
|
79
|
+
#
|
80
|
+
# <p>
|
81
|
+
# As an example of the not dip into outer context case, consider as super
|
82
|
+
# constructor calls versus function calls. One grammar might look like
|
83
|
+
# this:</p>
|
84
|
+
#
|
85
|
+
# <pre>
|
86
|
+
# ctorBody
|
87
|
+
# : '{' superCall? stat* '}'
|
88
|
+
# ;
|
89
|
+
# </pre>
|
90
|
+
#
|
91
|
+
# <p>
|
92
|
+
# Or, you might see something like</p>
|
93
|
+
#
|
94
|
+
# <pre>
|
95
|
+
# stat
|
96
|
+
# : superCall ';'
|
97
|
+
# | expression ';'
|
98
|
+
# | ...
|
99
|
+
# ;
|
100
|
+
# </pre>
|
101
|
+
#
|
102
|
+
# <p>
|
103
|
+
# In both cases I believe that no closure operations will dip into the outer
|
104
|
+
# context. In the first case ctorBody in the worst case will stop at the '}'.
|
105
|
+
# In the 2nd case it should stop at the ';'. Both cases should stay within the
|
106
|
+
# entry rule and not dip into the outer context.</p>
|
107
|
+
#
|
108
|
+
# <p>
|
109
|
+
# <strong>PREDICATES</strong></p>
|
110
|
+
#
|
111
|
+
# <p>
|
112
|
+
# Predicates are always evaluated if present in either SLL or LL both. SLL and
|
113
|
+
# LL simulation deals with predicates differently. SLL collects predicates as
|
114
|
+
# it performs closure operations like ANTLR v3 did. It delays predicate
|
115
|
+
# evaluation until it reaches and accept state. This allows us to cache the SLL
|
116
|
+
# ATN simulation whereas, if we had evaluated predicates on-the-fly during
|
117
|
+
# closure, the DFA state configuration sets would be different and we couldn't
|
118
|
+
# build up a suitable DFA.</p>
|
119
|
+
#
|
120
|
+
# <p>
|
121
|
+
# When building a DFA accept state during ATN simulation, we evaluate any
|
122
|
+
# predicates and return the sole semantically valid alternative. If there is
|
123
|
+
# more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
|
124
|
+
# we throw an exception. Alternatives without predicates act like they have
|
125
|
+
# true predicates. The simple way to think about it is to strip away all
|
126
|
+
# alternatives with false predicates and choose the minimum alternative that
|
127
|
+
# remains.</p>
|
128
|
+
#
|
129
|
+
# <p>
|
130
|
+
# When we start in the DFA and reach an accept state that's predicated, we test
|
131
|
+
# those and return the minimum semantically viable alternative. If no
|
132
|
+
# alternatives are viable, we throw an exception.</p>
|
133
|
+
#
|
134
|
+
# <p>
|
135
|
+
# During full LL ATN simulation, closure always evaluates predicates and
|
136
|
+
# on-the-fly. This is crucial to reducing the configuration set size during
|
137
|
+
# closure. It hits a landmine when parsing with the Java grammar, for example,
|
138
|
+
# without this on-the-fly evaluation.</p>
|
139
|
+
#
|
140
|
+
# <p>
|
141
|
+
# <strong>SHARING DFA</strong></p>
|
142
|
+
#
|
143
|
+
# <p>
|
144
|
+
# All instances of the same parser share the same decision DFAs through a
|
145
|
+
# static field. Each instance gets its own ATN simulator but they share the
|
146
|
+
# same {@link #decisionToDFA} field. They also share a
|
147
|
+
# {@link PredictionContextCache} object that makes sure that all
|
148
|
+
# {@link PredictionContext} objects are shared among the DFA states. This makes
|
149
|
+
# a big size difference.</p>
|
150
|
+
#
|
151
|
+
# <p>
|
152
|
+
# <strong>THREAD SAFETY</strong></p>
|
153
|
+
#
|
154
|
+
# <p>
|
155
|
+
# The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
|
156
|
+
# it adds a new DFA object to that array. {@link #addDFAEdge}
|
157
|
+
# locks on the DFA for the current decision when setting the
|
158
|
+
# {@link DFAState#edges} field. {@link #addDFAState} locks on
|
159
|
+
# the DFA for the current decision when looking up a DFA state to see if it
|
160
|
+
# already exists. We must make sure that all requests to add DFA states that
|
161
|
+
# are equivalent result in the same shared DFA object. This is because lots of
|
162
|
+
# threads will be trying to update the DFA at once. The
|
163
|
+
# {@link #addDFAState} method also locks inside the DFA lock
|
164
|
+
# but this time on the shared context cache when it rebuilds the
|
165
|
+
# configurations' {@link PredictionContext} objects using cached
|
166
|
+
# subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
|
167
|
+
# safe as long as we can guarantee that all threads referencing
|
168
|
+
# {@code s.edge[t]} get the same physical target {@link DFAState}, or
|
169
|
+
# {@code null}. Once into the DFA, the DFA simulation does not reference the
|
170
|
+
# {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
|
171
|
+
# targets. The DFA simulator will either find {@link DFAState#edges} to be
|
172
|
+
# {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
|
173
|
+
# {@code dfa.edges[t]} to be non-null. The
|
174
|
+
# {@link #addDFAEdge} method could be racing to set the field
|
175
|
+
# but in either case the DFA simulator works; if {@code null}, and requests ATN
|
176
|
+
# simulation. It could also race trying to get {@code dfa.edges[t]}, but either
|
177
|
+
# way it will work because it's not doing a test and set operation.</p>
|
178
|
+
#
|
179
|
+
# <p>
|
180
|
+
# <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage
|
181
|
+
# Parsing)</strong></p>
|
182
|
+
#
|
183
|
+
# <p>
|
184
|
+
# Sam pointed out that if SLL does not give a syntax error, then there is no
|
185
|
+
# point in doing full LL, which is slower. We only have to try LL if we get a
|
186
|
+
# syntax error. For maximum speed, Sam starts the parser set to pure SLL
|
187
|
+
# mode with the {@link BailErrorStrategy}:</p>
|
188
|
+
#
|
189
|
+
# <pre>
|
190
|
+
# parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
|
191
|
+
# parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
|
192
|
+
# </pre>
|
193
|
+
#
|
194
|
+
# <p>
|
195
|
+
# If it does not get a syntax error, then we're done. If it does get a syntax
|
196
|
+
# error, we need to retry with the combined SLL/LL strategy.</p>
|
197
|
+
#
|
198
|
+
# <p>
|
199
|
+
# The reason this works is as follows. If there are no SLL conflicts, then the
|
200
|
+
# grammar is SLL (at least for that input set). If there is an SLL conflict,
|
201
|
+
# the full LL analysis must yield a set of viable alternatives which is a
|
202
|
+
# subset of the alternatives reported by SLL. If the LL set is a singleton,
|
203
|
+
# then the grammar is LL but not SLL. If the LL set is the same size as the SLL
|
204
|
+
# set, the decision is SLL. If the LL set has size > 1, then that decision
|
205
|
+
# is truly ambiguous on the current input. If the LL set is smaller, then the
|
206
|
+
# SLL conflict resolution might choose an alternative that the full LL would
|
207
|
+
# rule out as a possibility based upon better context information. If that's
|
208
|
+
# the case, then the SLL parse will definitely get an error because the full LL
|
209
|
+
# analysis says it's not viable. If SLL conflict resolution chooses an
|
210
|
+
# alternative within the LL set, them both SLL and LL would choose the same
|
211
|
+
# alternative because they both choose the minimum of multiple conflicting
|
212
|
+
# alternatives.</p>
|
213
|
+
#
|
214
|
+
# <p>
|
215
|
+
# Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
|
216
|
+
# a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL
|
217
|
+
# parsing will get an error because SLL will pursue alternative 1. If
|
218
|
+
# <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
|
219
|
+
# choose the same alternative because alternative one is the minimum of either
|
220
|
+
# set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax
|
221
|
+
# error. If <em>s</em> is {@code {1}} then SLL will succeed.</p>
|
222
|
+
#
|
223
|
+
# <p>
|
224
|
+
# Of course, if the input is invalid, then we will get an error for sure in
|
225
|
+
# both SLL and LL parsing. Erroneous input will therefore require 2 passes over
|
226
|
+
# the input.</p>
|
227
|
+
#
|
228
|
+
|
229
|
+
class ParserATNSimulator < ATNSimulator
|
230
|
+
include PredictionContextFunctions
|
231
|
+
|
232
|
+
class << self
|
233
|
+
attr_reader :debug, :dfa_debug, :debug_list_atn_decisions,:retry_debug
|
234
|
+
end
|
235
|
+
@@debug = false
|
236
|
+
@@dfa_debug = false
|
237
|
+
@@debug_list_atn_decisions = false
|
238
|
+
@@retry_debug = false
|
239
|
+
|
240
|
+
def debug; @@debug ;end
|
241
|
+
def dfa_debug; @@dfa_debug ;end
|
242
|
+
|
243
|
+
def debug_list_atn_decisions; @@debug_list_atn_decisions ; end
|
244
|
+
def retry_debug ; @@retry_debug ; end
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
attr_accessor :decisionToDFA, :startIndex
|
249
|
+
attr_accessor :parser, :predictionMode, :input, :outerContext, :mergeCache
|
250
|
+
attr_accessor :_dfa
|
251
|
+
|
252
|
+
def initialize(parser, atn, decisionToDFA, sharedContextCache)
|
253
|
+
super(atn, sharedContextCache)
|
254
|
+
self.parser = parser
|
255
|
+
self.decisionToDFA = decisionToDFA
|
256
|
+
# SLL, LL, or LL + exact ambig detection?#
|
257
|
+
self.predictionMode = PredictionMode.LL
|
258
|
+
# LAME globals to avoid parameters!!!!! I need these down deep in predTransition
|
259
|
+
self.input = nil
|
260
|
+
self.startIndex = 0
|
261
|
+
self.outerContext = nil
|
262
|
+
# Each prediction operation uses a cache for merge of prediction contexts.
|
263
|
+
# Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
|
264
|
+
# isn't synchronized but we're ok since two threads shouldn't reuse same
|
265
|
+
# parser/atnsim object because it can only handle one input at a time.
|
266
|
+
# This maps graphs a and b to merged result c. (a,b)→c. We can avoid
|
267
|
+
# the merge if we ever see a and b again. Note that (b,a)→c should
|
268
|
+
# also be examined during cache lookup.
|
269
|
+
#
|
270
|
+
self.mergeCache = nil
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
def reset()
|
275
|
+
end
|
276
|
+
|
277
|
+
def adaptivePredict(input, decision, outerContext)
|
278
|
+
if self.debug or self.debug_list_atn_decisions then
|
279
|
+
s1 = "adaptivePredict decision #{decision} exec LA(1)=="
|
280
|
+
s2 = "#{self.getLookaheadName(input)} line #{input.LT(1).line}:#{input.LT(1).column}"
|
281
|
+
puts "#{s1}#{s2}"
|
282
|
+
end
|
283
|
+
# type_check(TokenStream, input)
|
284
|
+
# type_check(ParserRuleContext, outerContext)
|
285
|
+
self.input = input
|
286
|
+
self.startIndex = input.index
|
287
|
+
self.outerContext = outerContext
|
288
|
+
|
289
|
+
dfa = self.decisionToDFA[decision]
|
290
|
+
@_dfa = dfa
|
291
|
+
m = input.mark()
|
292
|
+
index = input.index
|
293
|
+
|
294
|
+
# Now we are certain to have a specific decision's DFA
|
295
|
+
# But, do we still need an initial state?
|
296
|
+
begin
|
297
|
+
if dfa.precedenceDfa then
|
298
|
+
# the start state for a precedence DFA depends on the current
|
299
|
+
# parser precedence, and is provided by a DFA method.
|
300
|
+
s0 = dfa.getPrecedenceStartState(self.parser.getPrecedence())
|
301
|
+
else
|
302
|
+
# the start state for a "regular" DFA is just s0
|
303
|
+
s0 = dfa.s0
|
304
|
+
end
|
305
|
+
|
306
|
+
if s0.nil?
|
307
|
+
if outerContext.nil?
|
308
|
+
outerContext = ParserRuleContext.EMPTY
|
309
|
+
end
|
310
|
+
if self.debug or self.debug_list_atn_decisions
|
311
|
+
puts "predictATN decision #{dfa.decision
|
312
|
+
} exec LA(1)==#{self.getLookaheadName(input)
|
313
|
+
}, outerContext=#{outerContext.to_s}"
|
314
|
+
# }, outerContext=#{outerContext.toString(self.parser)}"
|
315
|
+
end
|
316
|
+
# If this is not a precedence DFA, we check the ATN start state
|
317
|
+
# to determine if this ATN start state is the decision for the
|
318
|
+
# closure block that determines whether a precedence rule
|
319
|
+
# should continue or complete.
|
320
|
+
#
|
321
|
+
if not dfa.precedenceDfa and dfa.atnStartState.kind_of?(StarLoopEntryState) then
|
322
|
+
if dfa.atnStartState.precedenceRuleDecision
|
323
|
+
dfa.setPrecedenceDfa(true)
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
fullCtx = false
|
328
|
+
type_check(ParserRuleContext.EMPTY(), ParserRuleContext)
|
329
|
+
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
|
330
|
+
|
331
|
+
if dfa.precedenceDfa
|
332
|
+
# If this is a precedence DFA, we use applyPrecedenceFilter
|
333
|
+
# to convert the computed start state to a precedence start
|
334
|
+
# state. We then use DFA.setPrecedenceStartState to set the
|
335
|
+
# appropriate start state for the precedence level rather
|
336
|
+
# than simply setting DFA.s0.
|
337
|
+
#
|
338
|
+
s0_closure = self.applyPrecedenceFilter(s0_closure)
|
339
|
+
s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
|
340
|
+
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
|
341
|
+
else
|
342
|
+
s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
|
343
|
+
dfa.s0 = s0
|
344
|
+
end
|
345
|
+
end
|
346
|
+
alt = self.execATN(dfa, s0, input, index, outerContext)
|
347
|
+
if self.debug
|
348
|
+
puts "DFA after predictATN: #{dfa.toString(self.parser.tokenNames)}"
|
349
|
+
end
|
350
|
+
return alt
|
351
|
+
ensure
|
352
|
+
self.mergeCache = nil# wack cache after each prediction
|
353
|
+
input.seek(index)
|
354
|
+
input.release(m)
|
355
|
+
@_dfa = nil
|
356
|
+
end
|
357
|
+
end
|
358
|
+
# Performs ATN simulation to compute a predicted alternative based
|
359
|
+
# upon the remaining input, but also updates the DFA cache to avoid
|
360
|
+
# having to traverse the ATN again for the same input sequence.
|
361
|
+
|
362
|
+
# There are some key conditions we're looking for after computing a new
|
363
|
+
# set of ATN configs (proposed DFA state):
|
364
|
+
# if the set is empty, there is no viable alternative for current symbol
|
365
|
+
# does the state uniquely predict an alternative?
|
366
|
+
# does the state have a conflict that would prevent us from
|
367
|
+
# putting it on the work list?
|
368
|
+
|
369
|
+
# We also have some key operations to do:
|
370
|
+
# add an edge from previous DFA state to potentially new DFA state, D,
|
371
|
+
# upon current symbol but only if adding to work list, which means in all
|
372
|
+
# cases except no viable alternative (and possibly non-greedy decisions?)
|
373
|
+
# collecting predicates and adding semantic context to DFA accept states
|
374
|
+
# adding rule context to context-sensitive DFA accept states
|
375
|
+
# consuming an input symbol
|
376
|
+
# reporting a conflict
|
377
|
+
# reporting an ambiguity
|
378
|
+
# reporting a context sensitivity
|
379
|
+
# reporting insufficient predicates
|
380
|
+
|
381
|
+
# cover these cases:
|
382
|
+
# dead end
|
383
|
+
# single alt
|
384
|
+
# single alt + preds
|
385
|
+
# conflict
|
386
|
+
# conflict + preds
|
387
|
+
#
|
388
|
+
def execATN(dfa, s0, input, startIndex, outerContext)
|
389
|
+
type_check( outerContext, ParserRuleContext )
|
390
|
+
if self.debug or self.debug_list_atn_decisions
|
391
|
+
print "execATN decision #{dfa.decision
|
392
|
+
} exec LA(1)==#{self.getLookaheadName(input)
|
393
|
+
} line #{input.LT(1).line}:#{input.LT(1).column}"
|
394
|
+
end
|
395
|
+
previousD = s0
|
396
|
+
|
397
|
+
if self.debug
|
398
|
+
print "s0 = #{s0}"
|
399
|
+
end
|
400
|
+
t = input.LA(1)
|
401
|
+
while true do # while more work
|
402
|
+
cD = self.getExistingTargetState(previousD, t)
|
403
|
+
if cD.nil?
|
404
|
+
cD = self.computeTargetState(dfa, previousD, t)
|
405
|
+
end
|
406
|
+
if cD.equal? ATNSimulator::ERROR
|
407
|
+
# if any configs in previous dipped into outer context, that
|
408
|
+
# means that input up to t actually finished entry rule
|
409
|
+
# at least for SLL decision. Full LL doesn't dip into outer
|
410
|
+
# so don't need special case.
|
411
|
+
# We will get an error no matter what so delay until after
|
412
|
+
# decision; better error message. Also, no reachable target
|
413
|
+
# ATN states in SLL implies LL will also get nowhere.
|
414
|
+
# If conflict in states that dip out, choose min since we
|
415
|
+
# will get error no matter what.
|
416
|
+
e = self.noViableAlt(input, outerContext, previousD.configs, startIndex)
|
417
|
+
input.seek(startIndex)
|
418
|
+
alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext)
|
419
|
+
if alt!=ATN::INVALID_ALT_NUMBER
|
420
|
+
return alt
|
421
|
+
end
|
422
|
+
raise e
|
423
|
+
end
|
424
|
+
if cD.requiresFullContext and self.predictionMode != PredictionMode.SLL
|
425
|
+
# IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
|
426
|
+
conflictingAlts = nil
|
427
|
+
if cD.predicates then
|
428
|
+
if self.debug
|
429
|
+
print("DFA state has preds in DFA sim LL failover")
|
430
|
+
end
|
431
|
+
conflictIndex = input.index
|
432
|
+
if conflictIndex != startIndex
|
433
|
+
input.seek(startIndex)
|
434
|
+
end
|
435
|
+
conflictingAlts = self.evalSemanticContext(cD.predicates, outerContext, true)
|
436
|
+
if conflictingAlts.length==1
|
437
|
+
if self.debug
|
438
|
+
print("Full LL avoided")
|
439
|
+
end
|
440
|
+
return conflictingAlts.min
|
441
|
+
end
|
442
|
+
if conflictIndex != startIndex
|
443
|
+
# restore the index so reporting the fallback to full
|
444
|
+
# context occurs with the index at the correct spot
|
445
|
+
input.seek(conflictIndex)
|
446
|
+
end
|
447
|
+
end
|
448
|
+
if self.dfa_debug
|
449
|
+
print "ctx sensitive state #{outerContext} in #{cD}"
|
450
|
+
end
|
451
|
+
fullCtx = true
|
452
|
+
s0_closure = self.computeStartState(dfa.atnStartState, outerContext, fullCtx)
|
453
|
+
self.reportAttemptingFullContext(dfa, conflictingAlts, cD.configs, startIndex, input.index)
|
454
|
+
alt = self.execATNWithFullContext(dfa, cD, s0_closure, input, startIndex, outerContext)
|
455
|
+
return alt
|
456
|
+
end
|
457
|
+
|
458
|
+
if cD.isAcceptState
|
459
|
+
if cD.predicates.nil?
|
460
|
+
return cD.prediction
|
461
|
+
end
|
462
|
+
stopIndex = input.index
|
463
|
+
input.seek(startIndex)
|
464
|
+
alts = self.evalSemanticContext(cD.predicates, outerContext, true)
|
465
|
+
if alts.length==0
|
466
|
+
raise self.noViableAlt(input, outerContext, cD.configs, startIndex)
|
467
|
+
elsif alts.length==1
|
468
|
+
return alts.min
|
469
|
+
else
|
470
|
+
# report ambiguity after predicate evaluation to make sure the correct
|
471
|
+
# set of ambig alts is reported.
|
472
|
+
self.reportAmbiguity(dfa, cD, startIndex, stopIndex, false, alts, cD.configs)
|
473
|
+
return alts.min
|
474
|
+
end
|
475
|
+
end
|
476
|
+
previousD = cD
|
477
|
+
|
478
|
+
if t != Token::EOF
|
479
|
+
input.consume()
|
480
|
+
t = input.LA(1)
|
481
|
+
end
|
482
|
+
end
|
483
|
+
end
|
484
|
+
#
|
485
|
+
# Get an existing target state for an edge in the DFA. If the target state
|
486
|
+
# for the edge has not yet been computed or is otherwise not available,
|
487
|
+
# this method returns {@code null}.
|
488
|
+
#
|
489
|
+
# @param previousD The current DFA state
|
490
|
+
# @param t The next input symbol
|
491
|
+
# @return The existing target DFA state for the given input symbol
|
492
|
+
# {@code t}, or {@code null} if the target state for this edge is not
|
493
|
+
# already cached
|
494
|
+
#
|
495
|
+
def getExistingTargetState(previousD, t)
|
496
|
+
edges = previousD.edges
|
497
|
+
if edges.nil? or t + 1 < 0 or t + 1 >= edges.length
|
498
|
+
return nil
|
499
|
+
else
|
500
|
+
return edges[t + 1]
|
501
|
+
end
|
502
|
+
end
|
503
|
+
#
|
504
|
+
# Compute a target state for an edge in the DFA, and attempt to add the
|
505
|
+
# computed state and corresponding edge to the DFA.
|
506
|
+
#
|
507
|
+
# @param dfa The DFA
|
508
|
+
# @param previousD The current DFA state
|
509
|
+
# @param t The next input symbol
|
510
|
+
#
|
511
|
+
# @return The computed target DFA state for the given input symbol
|
512
|
+
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
|
513
|
+
# returns {@link #ERROR}.
|
514
|
+
#
|
515
|
+
def computeTargetState(dfa, previousD, t)
|
516
|
+
reach = self.computeReachSet(previousD.configs, t, false)
|
517
|
+
if reach.nil?
|
518
|
+
self.addDFAEdge(dfa, previousD, t, ATNSimulator::ERROR)
|
519
|
+
return ATNSimulator::ERROR
|
520
|
+
end
|
521
|
+
|
522
|
+
# create new target state; we'll add to DFA after it's complete
|
523
|
+
cD = DFAState.new(nil,reach)
|
524
|
+
|
525
|
+
predictedAlt = self.getUniqueAlt(reach)
|
526
|
+
|
527
|
+
if self.debug
|
528
|
+
altSubSets = PredictionMode.getConflictingAltSubsets(reach)
|
529
|
+
puts "SLL altSubSets=#{altSubSets}, configs=#{reach
|
530
|
+
}, predict=#{predictedAlt
|
531
|
+
}, allSubsetsConflict=#{PredictionMode.allSubsetsConflict(altSubSets)
|
532
|
+
}, conflictingAlts=#{self.getConflictingAlts(reach)}"
|
533
|
+
end
|
534
|
+
if predictedAlt!=ATN::INVALID_ALT_NUMBER
|
535
|
+
# NO CONFLICT, UNIQUELY PREDICTED ALT
|
536
|
+
cD.isAcceptState = true
|
537
|
+
cD.configs.uniqueAlt = predictedAlt
|
538
|
+
cD.prediction = predictedAlt
|
539
|
+
elsif PredictionMode.hasSLLConflictTerminatingPrediction(self.predictionMode, reach)
|
540
|
+
# MORE THAN ONE VIABLE ALTERNATIVE
|
541
|
+
cD.configs.conflictingAlts = self.getConflictingAlts(reach)
|
542
|
+
cD.requiresFullContext = true
|
543
|
+
# in SLL-only mode, we will stop at this state and return the minimum alt
|
544
|
+
cD.isAcceptState = true
|
545
|
+
cD.prediction = cD.configs.conflictingAlts.min
|
546
|
+
end
|
547
|
+
if cD.isAcceptState and cD.configs.hasSemanticContext
|
548
|
+
self.predicateDFAState(cD, self.atn.getDecisionState(dfa.decision))
|
549
|
+
if cD.predicates then
|
550
|
+
cD.prediction = ATN::INVALID_ALT_NUMBER
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# all adds to dfa are done after we've created full D state
|
555
|
+
cD = self.addDFAEdge(dfa, previousD, t, cD)
|
556
|
+
return cD
|
557
|
+
end
|
558
|
+
def predicateDFAState(dfaState, decisionState)
|
559
|
+
# We need to test all predicates, even in DFA states that
|
560
|
+
# uniquely predict alternative.
|
561
|
+
nalts = decisionState.transitions.length
|
562
|
+
# Update DFA so reach becomes accept state with (predicate,alt)
|
563
|
+
# pairs if preds found for conflicting alts
|
564
|
+
altsToCollectPredsFrom = self.getConflictingAltsOrUniqueAlt(dfaState.configs)
|
565
|
+
altToPred = self.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts)
|
566
|
+
if altToPred
|
567
|
+
dfaState.predicates = self.getPredicatePredictions(altsToCollectPredsFrom, altToPred)
|
568
|
+
dfaState.prediction = ATN::INVALID_ALT_NUMBER # make sure we use preds
|
569
|
+
else
|
570
|
+
# There are preds in configs but they might go away
|
571
|
+
# when OR'd together like {p}? || NONE == NONE. If neither
|
572
|
+
# alt has preds, resolve to min alt
|
573
|
+
dfaState.prediction = altsToCollectPredsFrom.min
|
574
|
+
end
|
575
|
+
end
|
576
|
+
# comes back with reach.uniqueAlt set to a valid alt
|
577
|
+
def execATNWithFullContext(dfa, cD, # how far we got before failing over
|
578
|
+
s0, input, startIndex, outerContext)
|
579
|
+
if self.debug or self.debug_list_atn_decisions
|
580
|
+
print "execATNWithFullContext #{s0}"
|
581
|
+
end
|
582
|
+
fullCtx = true
|
583
|
+
foundExactAmbig = false
|
584
|
+
reach = nil
|
585
|
+
previous = s0
|
586
|
+
input.seek(startIndex)
|
587
|
+
t = input.LA(1)
|
588
|
+
predictedAlt = -1
|
589
|
+
while true do
|
590
|
+
reach = self.computeReachSet(previous, t, fullCtx)
|
591
|
+
if reach.nil?
|
592
|
+
# if any configs in previous dipped into outer context, that
|
593
|
+
# means that input up to t actually finished entry rule
|
594
|
+
# at least for LL decision. Full LL doesn't dip into outer
|
595
|
+
# so don't need special case.
|
596
|
+
# We will get an error no matter what so delay until after
|
597
|
+
# decision; better error message. Also, no reachable target
|
598
|
+
# ATN states in SLL implies LL will also get nowhere.
|
599
|
+
# If conflict in states that dip out, choose min since we
|
600
|
+
# will get error no matter what.
|
601
|
+
e = self.noViableAlt(input, outerContext, previous, startIndex)
|
602
|
+
input.seek(startIndex)
|
603
|
+
alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext)
|
604
|
+
if alt!=ATN::INVALID_ALT_NUMBER
|
605
|
+
return alt
|
606
|
+
else
|
607
|
+
raise e
|
608
|
+
end
|
609
|
+
end
|
610
|
+
altSubSets = PredictionMode.getConflictingAltSubsets(reach)
|
611
|
+
if self.debug
|
612
|
+
print "LL altSubSets=#{altSubSets}, predict=#{PredictionMode.getUniqueAlt(altSubSets)
|
613
|
+
}, resolvesToJustOneViableAlt=#{PredictionMode.resolvesToJustOneViableAlt(altSubSets)}"
|
614
|
+
end
|
615
|
+
reach.uniqueAlt = self.getUniqueAlt(reach)
|
616
|
+
# unique prediction?
|
617
|
+
if reach.uniqueAlt!=ATN::INVALID_ALT_NUMBER then
|
618
|
+
predictedAlt = reach.uniqueAlt
|
619
|
+
break
|
620
|
+
elsif self.predictionMode != PredictionMode.LL_EXACT_AMBIG_DETECTION
|
621
|
+
predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets)
|
622
|
+
if predictedAlt != ATN::INVALID_ALT_NUMBER
|
623
|
+
break
|
624
|
+
end
|
625
|
+
else
|
626
|
+
# In exact ambiguity mode, we never try to terminate early.
|
627
|
+
# Just keeps scarfing until we know what the conflict is
|
628
|
+
if PredictionMode.allSubsetsConflict(altSubSets) and PredictionMode.allSubsetsEqual(altSubSets)
|
629
|
+
foundExactAmbig = true
|
630
|
+
predictedAlt = PredictionMode.getSingleViableAlt(altSubSets)
|
631
|
+
break
|
632
|
+
end
|
633
|
+
# else there are multiple non-conflicting subsets or
|
634
|
+
# we're not sure what the ambiguity is yet.
|
635
|
+
# So, keep going.
|
636
|
+
end
|
637
|
+
previous = reach
|
638
|
+
if t != Token::EOF
|
639
|
+
input.consume()
|
640
|
+
t = input.LA(1)
|
641
|
+
end
|
642
|
+
end
|
643
|
+
# If the configuration set uniquely predicts an alternative,
|
644
|
+
# without conflict, then we know that it's a full LL decision
|
645
|
+
# not SLL.
|
646
|
+
if reach.uniqueAlt != ATN::INVALID_ALT_NUMBER
|
647
|
+
self.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index)
|
648
|
+
return predictedAlt
|
649
|
+
end
|
650
|
+
# We do not check predicates here because we have checked them
|
651
|
+
# on-the-fly when doing full context prediction.
|
652
|
+
|
653
|
+
#
|
654
|
+
# In non-exact ambiguity detection mode, we might actually be able to
|
655
|
+
# detect an exact ambiguity, but I'm not going to spend the cycles
|
656
|
+
# needed to check. We only emit ambiguity warnings in exact ambiguity
|
657
|
+
# mode.
|
658
|
+
#
|
659
|
+
# For example, we might know that we have conflicting configurations.
|
660
|
+
# But, that does not mean that there is no way forward without a
|
661
|
+
# conflict. It's possible to have nonconflicting alt subsets as in:
|
662
|
+
|
663
|
+
# altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}]
|
664
|
+
|
665
|
+
# from
|
666
|
+
#
|
667
|
+
# [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]),
|
668
|
+
# (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])]
|
669
|
+
#
|
670
|
+
# In this case, (17,1,[5 $]) indicates there is some next sequence that
|
671
|
+
# would resolve this without conflict to alternative 1. Any other viable
|
672
|
+
# next sequence, however, is associated with a conflict. We stop
|
673
|
+
# looking for input because no amount of further lookahead will alter
|
674
|
+
# the fact that we should predict alternative 1. We just can't say for
|
675
|
+
# sure that there is an ambiguity without looking further.
|
676
|
+
|
677
|
+
self.reportAmbiguity(dfa, cD, startIndex, input.index, foundExactAmbig, nil, reach)
|
678
|
+
|
679
|
+
return predictedAlt
|
680
|
+
end
|
681
|
+
def computeReachSet(closure, t, fullCtx)
|
682
|
+
if self.debug
|
683
|
+
print "in computeReachSet, starting closure: #{closure}"
|
684
|
+
end
|
685
|
+
|
686
|
+
if self.mergeCache.nil?
|
687
|
+
self.mergeCache = Hash.new
|
688
|
+
end
|
689
|
+
|
690
|
+
intermediate = ATNConfigSet.new(fullCtx)
|
691
|
+
|
692
|
+
# Configurations already in a rule stop state indicate reaching the end
|
693
|
+
# of the decision rule (local context) or end of the start rule (full
|
694
|
+
# context). Once reached, these configurations are never updated by a
|
695
|
+
# closure operation, so they are handled separately for the performance
|
696
|
+
# advantage of having a smaller intermediate set when calling closure.
|
697
|
+
#
|
698
|
+
# For full-context reach operations, separate handling is required to
|
699
|
+
# ensure that the alternative matching the longest overall sequence is
|
700
|
+
# chosen when multiple such configurations can match the input.
|
701
|
+
|
702
|
+
skippedStopStates = nil
|
703
|
+
|
704
|
+
# First figure out where we can reach on input t
|
705
|
+
closure.each do |c|
|
706
|
+
if self.debug
|
707
|
+
puts "testing #{self.getTokenName(t)} at #{c}"
|
708
|
+
end
|
709
|
+
|
710
|
+
if c.state.kind_of? RuleStopState then
|
711
|
+
#assert c.context.isEmpty()
|
712
|
+
if fullCtx or t == Token::EOF
|
713
|
+
if skippedStopStates.nil?
|
714
|
+
skippedStopStates = Array.new
|
715
|
+
end
|
716
|
+
skippedStopStates.push(c)
|
717
|
+
end
|
718
|
+
next
|
719
|
+
end
|
720
|
+
#for trans in c.state.transitions do
|
721
|
+
c.state.transitions.each do |trans|
|
722
|
+
target = self.getReachableTarget(trans, t)
|
723
|
+
if target
|
724
|
+
intermediate.add(ATNConfig.createConfigState(c,target), self.mergeCache)
|
725
|
+
end
|
726
|
+
end
|
727
|
+
end
|
728
|
+
# Now figure out where the reach operation can take us...
|
729
|
+
|
730
|
+
reach = nil
|
731
|
+
|
732
|
+
# This block optimizes the reach operation for intermediate sets which
|
733
|
+
# trivially indicate a termination state for the overall
|
734
|
+
# adaptivePredict operation.
|
735
|
+
#
|
736
|
+
# The conditions assume that intermediate
|
737
|
+
# contains all configurations relevant to the reach set, but this
|
738
|
+
# condition is not true when one or more configurations have been
|
739
|
+
# withheld in skippedStopStates.
|
740
|
+
#
|
741
|
+
if skippedStopStates.nil?
|
742
|
+
if intermediate.length==1
|
743
|
+
# Don't pursue the closure if there is just one state.
|
744
|
+
# It can only have one alternative; just add to result
|
745
|
+
# Also don't pursue the closure if there is unique alternative
|
746
|
+
# among the configurations.
|
747
|
+
reach = intermediate
|
748
|
+
elsif self.getUniqueAlt(intermediate)!=ATN::INVALID_ALT_NUMBER
|
749
|
+
# Also don't pursue the closure if there is unique alternative
|
750
|
+
# among the configurations.
|
751
|
+
reach = intermediate
|
752
|
+
end
|
753
|
+
end
|
754
|
+
# If the reach set could not be trivially determined, perform a closure
|
755
|
+
# operation on the intermediate set to compute its initial value.
|
756
|
+
#
|
757
|
+
if reach.nil?
|
758
|
+
reach = ATNConfigSet.new(fullCtx)
|
759
|
+
closureBusy = Set.new()
|
760
|
+
treatEofAsEpsilon = t == Token::EOF
|
761
|
+
intermediate.each {|c|
|
762
|
+
self.closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon)
|
763
|
+
}
|
764
|
+
end
|
765
|
+
if t == Token::EOF
|
766
|
+
# After consuming EOF no additional input is possible, so we are
|
767
|
+
# only interested in configurations which reached the end of the
|
768
|
+
# decision rule (local context) or end of the start rule (full
|
769
|
+
# context). Update reach to contain only these configurations. This
|
770
|
+
# handles both explicit EOF transitions in the grammar and implicit
|
771
|
+
# EOF transitions following the end of the decision or start rule.
|
772
|
+
#
|
773
|
+
# When reach==intermediate, no closure operation was performed. In
|
774
|
+
# this case, removeAllConfigsNotInRuleStopState needs to check for
|
775
|
+
# reachable rule stop states as well as configurations already in
|
776
|
+
# a rule stop state.
|
777
|
+
#
|
778
|
+
# This is handled before the configurations in skippedStopStates,
|
779
|
+
# because any configurations potentially added from that list are
|
780
|
+
# already guaranteed to meet this condition whether or not it's
|
781
|
+
# required.
|
782
|
+
#
|
783
|
+
reach = self.removeAllConfigsNotInRuleStopState(reach, reach.equal?(intermediate))
|
784
|
+
end
|
785
|
+
# If skippedStopStates is not null, then it contains at least one
|
786
|
+
# configuration. For full-context reach operations, these
|
787
|
+
# configurations reached the end of the start rule, in which case we
|
788
|
+
# only add them back to reach if no configuration during the current
|
789
|
+
# closure operation reached such a state. This ensures adaptivePredict
|
790
|
+
# chooses an alternative matching the longest overall sequence when
|
791
|
+
# multiple alternatives are viable.
|
792
|
+
#
|
793
|
+
if skippedStopStates and ( (not fullCtx) or (not PredictionMode.hasConfigInRuleStopState(reach)))
|
794
|
+
#assert len(skippedStopStates)>0
|
795
|
+
skippedStopStates.each {|c| reach.add(c, self.mergeCache) }
|
796
|
+
end
|
797
|
+
if reach.empty?
|
798
|
+
return nil
|
799
|
+
else
|
800
|
+
return reach
|
801
|
+
end
|
802
|
+
end
|
803
|
+
#
|
804
|
+
# Return a configuration set containing only the configurations from
|
805
|
+
# {@code configs} which are in a {@link RuleStopState}. If all
|
806
|
+
# configurations in {@code configs} are already in a rule stop state, this
|
807
|
+
# method simply returns {@code configs}.
|
808
|
+
#
|
809
|
+
# <p>When {@code lookToEndOfRule} is true, this method uses
|
810
|
+
# {@link ATN#nextTokens} for each configuration in {@code configs} which is
|
811
|
+
# not already in a rule stop state to see if a rule stop state is reachable
|
812
|
+
# from the configuration via epsilon-only transitions.</p>
|
813
|
+
#
|
814
|
+
# @param configs the configuration set to update
|
815
|
+
# @param lookToEndOfRule when true, this method checks for rule stop states
|
816
|
+
# reachable by epsilon-only transitions from each configuration in
|
817
|
+
# {@code configs}.
|
818
|
+
#
|
819
|
+
# @return {@code configs} if all configurations in {@code configs} are in a
|
820
|
+
# rule stop state, otherwise return a new configuration set containing only
|
821
|
+
# the configurations from {@code configs} which are in a rule stop state
|
822
|
+
#
|
823
|
+
def removeAllConfigsNotInRuleStopState(configs, lookToEndOfRule)
|
824
|
+
if PredictionMode.allConfigsInRuleStopStates(configs)
|
825
|
+
return configs
|
826
|
+
end
|
827
|
+
result = ATNConfigSet.new(configs.fullCtx)
|
828
|
+
configs.each do |config|
|
829
|
+
if config.state.kind_of? RuleStopState then
|
830
|
+
result.add(config, self.mergeCache)
|
831
|
+
next
|
832
|
+
end
|
833
|
+
if lookToEndOfRule and config.state.epsilonOnlyTransitions
|
834
|
+
nextTokens = self.atn.nextTokens(config.state)
|
835
|
+
if nextTokens.member? Token::EPSILON then
|
836
|
+
endOfRuleState = self.atn.ruleToStopState[config.state.ruleIndex]
|
837
|
+
result.add(ATNConfig.new(endOfRuleState, nil, nil, nil, config), self.mergeCache)
|
838
|
+
end
|
839
|
+
end
|
840
|
+
end
|
841
|
+
return result
|
842
|
+
end
|
843
|
+
def computeStartState(p, ctx, fullCtx)
|
844
|
+
type_check(p, ATNState)
|
845
|
+
type_check(ctx, RuleContext)
|
846
|
+
|
847
|
+
# always at least the implicit call to start rule
|
848
|
+
initialContext = PredictionContextFromRuleContext(self.atn, ctx)
|
849
|
+
configs = ATNConfigSet.new(fullCtx)
|
850
|
+
|
851
|
+
p.transitions.each_index do |i|
|
852
|
+
target = p.transitions[i].target
|
853
|
+
c = ATNConfig.new(target, i+1, initialContext)
|
854
|
+
closureBusy = Set.new
|
855
|
+
self.closure(c, configs, closureBusy, true, fullCtx, false)
|
856
|
+
end
|
857
|
+
return configs
|
858
|
+
end
|
859
|
+
#
|
860
|
+
# This method transforms the start state computed by
|
861
|
+
# {@link #computeStartState} to the special start state used by a
|
862
|
+
# precedence DFA for a particular precedence value. The transformation
|
863
|
+
# process applies the following changes to the start state's configuration
|
864
|
+
# set.
|
865
|
+
#
|
866
|
+
# <ol>
|
867
|
+
# <li>Evaluate the precedence predicates for each configuration using
|
868
|
+
# {@link SemanticContext#evalPrecedence}.</li>
|
869
|
+
# <li>Remove all configurations which predict an alternative greater than
|
870
|
+
# 1, for which another configuration that predicts alternative 1 is in the
|
871
|
+
# same ATN state with the same prediction context. This transformation is
|
872
|
+
# valid for the following reasons:
|
873
|
+
# <ul>
|
874
|
+
# <li>The closure block cannot contain any epsilon transitions which bypass
|
875
|
+
# the body of the closure, so all states reachable via alternative 1 are
|
876
|
+
# part of the precedence alternatives of the transformed left-recursive
|
877
|
+
# rule.</li>
|
878
|
+
# <li>The "primary" portion of a left recursive rule cannot contain an
|
879
|
+
# epsilon transition, so the only way an alternative other than 1 can exist
|
880
|
+
# in a state that is also reachable via alternative 1 is by nesting calls
|
881
|
+
# to the left-recursive rule, with the outer calls not being at the
|
882
|
+
# preferred precedence level.</li>
|
883
|
+
# </ul>
|
884
|
+
# </li>
|
885
|
+
# </ol>
|
886
|
+
#
|
887
|
+
# <p>
|
888
|
+
# The prediction context must be considered by this filter to address
|
889
|
+
# situations like the following.
|
890
|
+
# </p>
|
891
|
+
# <code>
|
892
|
+
# <pre>
|
893
|
+
# grammar TA;
|
894
|
+
# prog: statement* EOF;
|
895
|
+
# statement: letterA | statement letterA 'b' ;
|
896
|
+
# letterA: 'a';
|
897
|
+
# </pre>
|
898
|
+
# </code>
|
899
|
+
# <p>
|
900
|
+
# If the above grammar, the ATN state immediately before the token
|
901
|
+
# reference {@code 'a'} in {@code letterA} is reachable from the left edge
|
902
|
+
# of both the primary and closure blocks of the left-recursive rule
|
903
|
+
# {@code statement}. The prediction context associated with each of these
|
904
|
+
# configurations distinguishes between them, and prevents the alternative
|
905
|
+
# which stepped out to {@code prog} (and then back in to {@code statement}
|
906
|
+
# from being eliminated by the filter.
|
907
|
+
# </p>
|
908
|
+
#
|
909
|
+
# @param configs The configuration set computed by
|
910
|
+
# {@link #computeStartState} as the start state for the DFA.
|
911
|
+
# @return The transformed configuration set representing the start state
|
912
|
+
# for a precedence DFA at a particular precedence level (determined by
|
913
|
+
# calling {@link Parser#getPrecedence}).
|
914
|
+
#
|
915
|
+
def applyPrecedenceFilter(configs)
|
916
|
+
statesFromAlt1 = Hash.new
|
917
|
+
configSet = ATNConfigSet.new(configs.fullCtx)
|
918
|
+
configs.each do |config|
|
919
|
+
# handle alt 1 first
|
920
|
+
next if config.alt != 1
|
921
|
+
|
922
|
+
updatedContext = config.semanticContext.evalPrecedence(self.parser, self.outerContext)
|
923
|
+
next if updatedContext.nil? # the configuration was eliminated
|
924
|
+
|
925
|
+
statesFromAlt1[config.state.stateNumber] = config.context
|
926
|
+
if updatedContext != config.semanticContext
|
927
|
+
configSet.add(ATNConfig.new(nil,nil,nil, updatedContext, config), self.mergeCache)
|
928
|
+
else
|
929
|
+
configSet.add(config, self.mergeCache)
|
930
|
+
end
|
931
|
+
end
|
932
|
+
configs.each do |config|
|
933
|
+
next if config.alt == 1 # already handled
|
934
|
+
|
935
|
+
# In the future, this elimination step could be updated to also
|
936
|
+
# filter the prediction context for alternatives predicting alt>1
|
937
|
+
# (basically a graph subtraction algorithm).
|
938
|
+
#
|
939
|
+
context = statesFromAlt1[config.state.stateNumber]
|
940
|
+
next if context==config.context # eliminated
|
941
|
+
|
942
|
+
configSet.add(config, self.mergeCache)
|
943
|
+
end
|
944
|
+
return configSet
|
945
|
+
end
|
946
|
+
def getReachableTarget(trans, ttype)
|
947
|
+
if trans.matches(ttype, 0, self.atn.maxTokenType)
|
948
|
+
return trans.target
|
949
|
+
else
|
950
|
+
return nil
|
951
|
+
end
|
952
|
+
end
|
953
|
+
|
954
|
+
def getPredsForAmbigAlts(ambigAlts, configs, nalts)
|
955
|
+
# REACH=[1|1|[]|0:0, 1|2|[]|0:1]
|
956
|
+
# altToPred starts as an array of all null contexts. The entry at index i
|
957
|
+
# corresponds to alternative i. altToPred[i] may have one of three values:
|
958
|
+
# 1. null: no ATNConfig c is found such that c.alt==i
|
959
|
+
# 2. SemanticContext.NONE: At least one ATNConfig c exists such that
|
960
|
+
# c.alt==i and c.semanticContext==SemanticContext.NONE. In other words,
|
961
|
+
# alt i has at least one unpredicated config.
|
962
|
+
# 3. Non-NONE Semantic Context: There exists at least one, and for all
|
963
|
+
# ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE.
|
964
|
+
#
|
965
|
+
# From this, it is clear that NONE||anything==NONE.
|
966
|
+
#
|
967
|
+
altToPred = Array.new(nalts + 1)
|
968
|
+
configs.each do |c|
|
969
|
+
if ambigAlts.member? c.alt
|
970
|
+
altToPred[c.alt] = SemanticContext.orContext(altToPred[c.alt], c.semanticContext)
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
nPredAlts = 0
|
975
|
+
for i in 1..nalts do
|
976
|
+
if altToPred[i].nil?
|
977
|
+
altToPred[i] = SemanticContext.NONE
|
978
|
+
elsif ! altToPred[i].equal? SemanticContext.NONE
|
979
|
+
nPredAlts = nPredAlts + 1
|
980
|
+
end
|
981
|
+
end
|
982
|
+
# nonambig alts are null in altToPred
|
983
|
+
if nPredAlts==0
|
984
|
+
altToPred = nil
|
985
|
+
end
|
986
|
+
if self.debug
|
987
|
+
puts "getPredsForAmbigAlts result #{altToPred}"
|
988
|
+
end
|
989
|
+
return altToPred
|
990
|
+
end
|
991
|
+
def getPredicatePredictions(ambigAlts, altToPred)
|
992
|
+
pairs = Array.new
|
993
|
+
containsPredicate = false
|
994
|
+
|
995
|
+
altToPred.each_index do |i|
|
996
|
+
pred = altToPred[i]
|
997
|
+
# unpredicated is indicated by SemanticContext.NONE
|
998
|
+
# assert pred is not None
|
999
|
+
if ambigAlts and ambigAlts.member? i
|
1000
|
+
pairs.push(PredPrediction.new(pred, i))
|
1001
|
+
end
|
1002
|
+
if ! pred.equal?(SemanticContext.NONE) then
|
1003
|
+
containsPredicate = true
|
1004
|
+
end
|
1005
|
+
end
|
1006
|
+
if not containsPredicate
|
1007
|
+
return nil
|
1008
|
+
end
|
1009
|
+
return pairs
|
1010
|
+
end
|
1011
|
+
#
|
1012
|
+
# This method is used to improve the localization of error messages by
|
1013
|
+
# choosing an alternative rather than throwing a
|
1014
|
+
# {@link NoViableAltException} in particular prediction scenarios where the
|
1015
|
+
# {@link #ERROR} state was reached during ATN simulation.
|
1016
|
+
#
|
1017
|
+
# <p>
|
1018
|
+
# The default implementation of this method uses the following
|
1019
|
+
# algorithm to identify an ATN configuration which successfully parsed the
|
1020
|
+
# decision entry rule. Choosing such an alternative ensures that the
|
1021
|
+
# {@link ParserRuleContext} returned by the calling rule will be complete
|
1022
|
+
# and valid, and the syntax error will be reported later at a more
|
1023
|
+
# localized location.</p>
|
1024
|
+
#
|
1025
|
+
# <ul>
|
1026
|
+
# <li>If a syntactically valid path or paths reach the end of the decision rule and
|
1027
|
+
# they are semantically valid if predicated, return the min associated alt.</li>
|
1028
|
+
# <li>Else, if a semantically invalid but syntactically valid path exist
|
1029
|
+
# or paths exist, return the minimum associated alt.
|
1030
|
+
# </li>
|
1031
|
+
# <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li>
|
1032
|
+
# </ul>
|
1033
|
+
#
|
1034
|
+
# <p>
|
1035
|
+
# In some scenarios, the algorithm described above could predict an
|
1036
|
+
# alternative which will result in a {@link FailedPredicateException} in
|
1037
|
+
# the parser. Specifically, this could occur if the <em>only</em> configuration
|
1038
|
+
# capable of successfully parsing to the end of the decision rule is
|
1039
|
+
# blocked by a semantic predicate. By choosing this alternative within
|
1040
|
+
# {@link #adaptivePredict} instead of throwing a
|
1041
|
+
# {@link NoViableAltException}, the resulting
|
1042
|
+
# {@link FailedPredicateException} in the parser will identify the specific
|
1043
|
+
# predicate which is preventing the parser from successfully parsing the
|
1044
|
+
# decision rule, which helps developers identify and correct logic errors
|
1045
|
+
# in semantic predicates.
|
1046
|
+
# </p>
|
1047
|
+
#
|
1048
|
+
# @param configs The ATN configurations which were valid immediately before
|
1049
|
+
# the {@link #ERROR} state was reached
|
1050
|
+
# @param outerContext The is the \gamma_0 initial parser context from the paper
|
1051
|
+
# or the parser stack at the instant before prediction commences.
|
1052
|
+
#
|
1053
|
+
# @return The value to return from {@link #adaptivePredict}, or
|
1054
|
+
# {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not
|
1055
|
+
# identified and {@link #adaptivePredict} should report an error instead.
|
1056
|
+
#
|
1057
|
+
def getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs, outerContext)
|
1058
|
+
semValidConfigs, semInvalidConfigs = self.splitAccordingToSemanticValidity(configs, outerContext)
|
1059
|
+
alt = self.getAltThatFinishedDecisionEntryRule(semValidConfigs)
|
1060
|
+
if alt!=ATN::INVALID_ALT_NUMBER # semantically/syntactically viable path exists
|
1061
|
+
return alt
|
1062
|
+
end
|
1063
|
+
# Is there a syntactically valid path with a failed pred?
|
1064
|
+
if semInvalidConfigs.length>0
|
1065
|
+
alt = self.getAltThatFinishedDecisionEntryRule(semInvalidConfigs)
|
1066
|
+
if alt!=ATN::INVALID_ALT_NUMBER # syntactically viable path exists
|
1067
|
+
return alt
|
1068
|
+
end
|
1069
|
+
end
|
1070
|
+
return ATN::INVALID_ALT_NUMBER
|
1071
|
+
end
|
1072
|
+
def getAltThatFinishedDecisionEntryRule(configs)
|
1073
|
+
alts = Set.new()
|
1074
|
+
configs.each do |c|
|
1075
|
+
if c.reachesIntoOuterContext>0 or (c.state.kind_of? RuleStopState and c.context.hasEmptyPath() )
|
1076
|
+
alts.add(c.alt)
|
1077
|
+
end
|
1078
|
+
end
|
1079
|
+
if alts.empty?
|
1080
|
+
return ATN::INVALID_ALT_NUMBER
|
1081
|
+
else
|
1082
|
+
return alts.min
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
# Walk the list of configurations and split them according to
|
1086
|
+
# those that have preds evaluating to true/false. If no pred, assume
|
1087
|
+
# true pred and include in succeeded set. Returns Pair of sets.
|
1088
|
+
#
|
1089
|
+
# Create a new set so as not to alter the incoming parameter.
|
1090
|
+
#
|
1091
|
+
# Assumption: the input stream has been restored to the starting point
|
1092
|
+
# prediction, which is where predicates need to evaluate.
|
1093
|
+
#
|
1094
|
+
def splitAccordingToSemanticValidity(configs, outerContext)
|
1095
|
+
succeeded = ATNConfigSet.new(configs.fullCtx)
|
1096
|
+
failed = ATNConfigSet.new(configs.fullCtx)
|
1097
|
+
configs.each do |c|
|
1098
|
+
if ! c.semanticContext.equal? SemanticContext.NONE
|
1099
|
+
predicateEvaluationResult = c.semanticContext.eval(self.parser, outerContext)
|
1100
|
+
if predicateEvaluationResult
|
1101
|
+
succeeded.add(c)
|
1102
|
+
else
|
1103
|
+
failed.add(c)
|
1104
|
+
end
|
1105
|
+
else
|
1106
|
+
succeeded.add(c)
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
return [succeeded,failed]
|
1110
|
+
end
|
1111
|
+
# Look through a list of predicate/alt pairs, returning alts for the
|
1112
|
+
# pairs that win. A {@code NONE} predicate indicates an alt containing an
|
1113
|
+
# unpredicated config which behaves as "always true." If !complete
|
1114
|
+
# then we stop at the first predicate that evaluates to true. This
|
1115
|
+
# includes pairs with null predicates.
|
1116
|
+
#
|
1117
|
+
def evalSemanticContext( predPredictions, outerContext, complete)
|
1118
|
+
predictions = Set.new()
|
1119
|
+
|
1120
|
+
predPredictions.each do |pair|
|
1121
|
+
if pair.pred.equal? SemanticContext.NONE
|
1122
|
+
predictions.add(pair.alt)
|
1123
|
+
break if not complete
|
1124
|
+
next
|
1125
|
+
end
|
1126
|
+
predicateEvaluationResult = pair.pred.eval(self.parser, outerContext)
|
1127
|
+
if self.debug or self.dfa_debug
|
1128
|
+
puts "eval pred #{pair}=#{predicateEvaluationResult}"
|
1129
|
+
end
|
1130
|
+
if predicateEvaluationResult
|
1131
|
+
if self.debug or self.dfa_debug
|
1132
|
+
puts "PREDICT #{pair.alt}"
|
1133
|
+
end
|
1134
|
+
predictions.add(pair.alt)
|
1135
|
+
break if not complete
|
1136
|
+
end
|
1137
|
+
end
|
1138
|
+
return predictions
|
1139
|
+
end
|
1140
|
+
# TODO: If we are doing predicates, there is no point in pursuing
|
1141
|
+
# closure operations if we reach a DFA state that uniquely predicts
|
1142
|
+
# alternative. We will not be caching that DFA state and it is a
|
1143
|
+
# waste to pursue the closure. Might have to advance when we do
|
1144
|
+
# ambig detection thought :(
|
1145
|
+
#
|
1146
|
+
|
1147
|
+
def closure(config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon)
|
1148
|
+
initialDepth = 0
|
1149
|
+
self.closureCheckingStopState(config, configs, closureBusy, collectPredicates,
|
1150
|
+
fullCtx, initialDepth, treatEofAsEpsilon)
|
1151
|
+
#assert not fullCtx or not configs.dipsIntoOuterContext
|
1152
|
+
end
|
1153
|
+
|
1154
|
+
|
1155
|
+
def closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
|
1156
|
+
if self.debug
|
1157
|
+
puts "closure(#{config.toString(self.parser,true)})"
|
1158
|
+
end
|
1159
|
+
|
1160
|
+
if config.state.kind_of? RuleStopState then
|
1161
|
+
# We hit rule end. If we have context info, use it
|
1162
|
+
# run thru all possible stack tops in ctx
|
1163
|
+
if not config.context.isEmpty() then
|
1164
|
+
# for i in range(0, len(config.context)):
|
1165
|
+
0.upto(config.context.length - 1).each do |i|
|
1166
|
+
if config.context.getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE
|
1167
|
+
if fullCtx
|
1168
|
+
configs.add(ATNConfig.new(config.state,nil,PredictionContext.EMPTY,nil,config), self.mergeCache)
|
1169
|
+
next
|
1170
|
+
else
|
1171
|
+
# we have no context info, just chase follow links (if greedy)
|
1172
|
+
if self.debug
|
1173
|
+
puts "FALLING off rule " + self.getRuleName(config.state.ruleIndex)
|
1174
|
+
end
|
1175
|
+
self.closure_(config, configs, closureBusy, collectPredicates,
|
1176
|
+
fullCtx, depth, treatEofAsEpsilon)
|
1177
|
+
end
|
1178
|
+
next
|
1179
|
+
end
|
1180
|
+
returnState = self.atn.states[config.context.getReturnState(i)]
|
1181
|
+
newContext = config.context.getParent(i) # "pop" return state
|
1182
|
+
c = ATNConfig.new(returnState, config.alt, newContext, config.semanticContext)
|
1183
|
+
# While we have context to pop back from, we may have
|
1184
|
+
# gotten that context AFTER having falling off a rule.
|
1185
|
+
# Make sure we track that we are now out of context.
|
1186
|
+
c.reachesIntoOuterContext = config.reachesIntoOuterContext
|
1187
|
+
# assert depth > - 2**63
|
1188
|
+
self.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon)
|
1189
|
+
end
|
1190
|
+
return
|
1191
|
+
elsif fullCtx
|
1192
|
+
# reached end of start rule
|
1193
|
+
configs.add(config, self.mergeCache)
|
1194
|
+
return
|
1195
|
+
else
|
1196
|
+
# else if we have no context info, just chase follow links (if greedy)
|
1197
|
+
if self.debug
|
1198
|
+
puts "FALLING off rule #{self.getRuleName(config.state.ruleIndex)}"
|
1199
|
+
end
|
1200
|
+
end
|
1201
|
+
end
|
1202
|
+
self.closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
|
1203
|
+
end
|
1204
|
+
# Do the actual work of walking epsilon edges#
|
1205
|
+
def closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
|
1206
|
+
p = config.state
|
1207
|
+
# optimization
|
1208
|
+
if not p.epsilonOnlyTransitions
|
1209
|
+
configs.add(config, self.mergeCache)
|
1210
|
+
# make sure to not return here, because EOF transitions can act as
|
1211
|
+
# both epsilon transitions and non-epsilon transitions.
|
1212
|
+
end
|
1213
|
+
p.transitions.each do |t|
|
1214
|
+
continueCollecting = collectPredicates and not t.kind_of? ActionTransition
|
1215
|
+
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
|
1216
|
+
if c
|
1217
|
+
newDepth = depth
|
1218
|
+
if config.state.kind_of? RuleStopState
|
1219
|
+
#assert not fullCtx
|
1220
|
+
# target fell off end of rule; mark resulting c as having dipped into outer context
|
1221
|
+
# We can't get here if incoming config was rule stop and we had context
|
1222
|
+
# track how far we dip into outer context. Might
|
1223
|
+
# come in handy and we avoid evaluating context dependent
|
1224
|
+
# preds if this is > 0.
|
1225
|
+
if closureBusy.member? c
|
1226
|
+
# avoid infinite recursion for right-recursive rules
|
1227
|
+
next
|
1228
|
+
end
|
1229
|
+
closureBusy.add(c)
|
1230
|
+
|
1231
|
+
# if @_dfa && @_dfa.isPrecedenceDfa() then
|
1232
|
+
# outermostPrecedenceReturn = t.outermostPrecedenceReturn()
|
1233
|
+
# if outermostPrecedenceReturn == @_dfa.atnStartState.ruleIndex then
|
1234
|
+
# c.setPrecedenceFilterSuppressed(true)
|
1235
|
+
# end
|
1236
|
+
# end
|
1237
|
+
# if (_dfa != null && _dfa.isPrecedenceDfa()) {
|
1238
|
+
# int outermostPrecedenceReturn = ((EpsilonTransition)t).outermostPrecedenceReturn();
|
1239
|
+
# if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) {
|
1240
|
+
# c.setPrecedenceFilterSuppressed(true);
|
1241
|
+
# }
|
1242
|
+
# }
|
1243
|
+
|
1244
|
+
|
1245
|
+
|
1246
|
+
c.reachesIntoOuterContext =c.reachesIntoOuterContext + 1
|
1247
|
+
configs.dipsIntoOuterContext = true # TODO: can remove? only care when we add to set per middle of this method
|
1248
|
+
# !assert newDepth > - 2**63
|
1249
|
+
newDepth = newDepth - 1
|
1250
|
+
puts "dips into outer ctx: #{c}" if self.debug
|
1251
|
+
elsif t.kind_of? RuleTransition
|
1252
|
+
# latch when newDepth goes negative - once we step out of the entry context we can't return
|
1253
|
+
if newDepth >= 0
|
1254
|
+
newDepth =newDepth + 1
|
1255
|
+
end
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
|
1259
|
+
end
|
1260
|
+
end
|
1261
|
+
end
|
1262
|
+
|
1263
|
+
def getRuleName(index)
|
1264
|
+
if self.parser and index>=0
|
1265
|
+
return self.parser.ruleNames[index]
|
1266
|
+
else
|
1267
|
+
return "<rule #{index}>"
|
1268
|
+
end
|
1269
|
+
end
|
1270
|
+
|
1271
|
+
def getEpsilonTarget(config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon)
|
1272
|
+
tt = t.serializationType
|
1273
|
+
case tt
|
1274
|
+
when Transition::RULE
|
1275
|
+
return self.ruleTransition(config, t)
|
1276
|
+
when Transition::PRECEDENCE
|
1277
|
+
return self.precedenceTransition(config, t, collectPredicates, inContext, fullCtx)
|
1278
|
+
when Transition::PREDICATE
|
1279
|
+
return self.predTransition(config, t, collectPredicates, inContext, fullCtx)
|
1280
|
+
when Transition::ACTION
|
1281
|
+
return self.actionTransition(config, t)
|
1282
|
+
when Transition::EPSILON
|
1283
|
+
return ATNConfig.new(t.target,nil,nil,nil, config)
|
1284
|
+
else
|
1285
|
+
if [ Transition::ATOM, Transition::RANGE, Transition::SET ].member?(tt) then
|
1286
|
+
# EOF transitions act like epsilon transitions after the first EOF
|
1287
|
+
# transition is traversed
|
1288
|
+
# if treatEofAsEpsilon then
|
1289
|
+
# if t.matches(Token::EOF, 0, 1) then
|
1290
|
+
# return ATNConfig.createConfigState(config, t.target)
|
1291
|
+
# end
|
1292
|
+
# end
|
1293
|
+
if treatEofAsEpsilon and t.matches(Token::EOF, 0, 1) then
|
1294
|
+
return ATNConfig.createConfigState(config, t.target)
|
1295
|
+
end
|
1296
|
+
end
|
1297
|
+
return nil
|
1298
|
+
end
|
1299
|
+
end
|
1300
|
+
def actionTransition(config, t)
|
1301
|
+
if self.debug
|
1302
|
+
puts "ACTION edge #{t.ruleIndex}:#{t.actionIndex}"
|
1303
|
+
end
|
1304
|
+
return ATNConfig.new(t.target,nil,nil,nil, config)
|
1305
|
+
end
|
1306
|
+
def precedenceTransition(config, pt, collectPredicates, inContext, fullCtx)
|
1307
|
+
if self.debug
|
1308
|
+
puts "PRED (collectPredicates=#{collectPredicates}) #{pt.precedence}>=_p, ctx dependent=true"
|
1309
|
+
if self.parser
|
1310
|
+
puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
|
1311
|
+
end
|
1312
|
+
end
|
1313
|
+
c = nil
|
1314
|
+
if collectPredicates and inContext
|
1315
|
+
if fullCtx
|
1316
|
+
# In full context mode, we can evaluate predicates on-the-fly
|
1317
|
+
# during closure, which dramatically reduces the size of
|
1318
|
+
# the config sets. It also obviates the need to test predicates
|
1319
|
+
# later during conflict resolution.
|
1320
|
+
currentPosition = self.input.index
|
1321
|
+
self.input.seek(self.startIndex)
|
1322
|
+
predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
|
1323
|
+
self.input.seek(currentPosition)
|
1324
|
+
if predSucceeds
|
1325
|
+
c = ATNConfig.new(pt.target,nil,nil,nil,config) # no pred context
|
1326
|
+
end
|
1327
|
+
else
|
1328
|
+
newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
|
1329
|
+
c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
|
1330
|
+
end
|
1331
|
+
else
|
1332
|
+
c = ATNConfig.new(pt.target,nil,nil,nil,config)
|
1333
|
+
end
|
1334
|
+
|
1335
|
+
if self.debug
|
1336
|
+
puts "config from pred transition=#{c}"
|
1337
|
+
end
|
1338
|
+
return c
|
1339
|
+
end
|
1340
|
+
def predTransition(config, pt, collectPredicates, inContext, fullCtx)
|
1341
|
+
if self.debug
|
1342
|
+
puts "PRED (collectPredicates=#{collectPredicates}) #{pt.ruleIndex}:#{pt.predIndex}, ctx dependent=#{pt.isCtxDependent}"
|
1343
|
+
if self.parser
|
1344
|
+
puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
|
1345
|
+
end
|
1346
|
+
end
|
1347
|
+
c = nil
|
1348
|
+
if collectPredicates and (not pt.isCtxDependent or (pt.isCtxDependent and inContext))
|
1349
|
+
if fullCtx
|
1350
|
+
# In full context mode, we can evaluate predicates on-the-fly
|
1351
|
+
# during closure, which dramatically reduces the size of
|
1352
|
+
# the config sets. It also obviates the need to test predicates
|
1353
|
+
# later during conflict resolution.
|
1354
|
+
currentPosition = self.input.index
|
1355
|
+
self.input.seek(self.startIndex)
|
1356
|
+
predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
|
1357
|
+
self.input.seek(currentPosition)
|
1358
|
+
if predSucceeds
|
1359
|
+
c = ATNConfig.new(pt.target,nil,nil,nil, config) # no pred context
|
1360
|
+
end
|
1361
|
+
else
|
1362
|
+
newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
|
1363
|
+
c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
|
1364
|
+
end
|
1365
|
+
else
|
1366
|
+
c = ATNConfig.new(pt.target, nil,nil,nil,config)
|
1367
|
+
end
|
1368
|
+
|
1369
|
+
if self.debug
|
1370
|
+
puts "config from pred transition=#{c}"
|
1371
|
+
end
|
1372
|
+
return c
|
1373
|
+
end
|
1374
|
+
def ruleTransition(config, t)
|
1375
|
+
if self.debug
|
1376
|
+
puts "CALL rule #{self.getRuleName(t.target.ruleIndex) }, ctx=#{config.context}"
|
1377
|
+
end
|
1378
|
+
returnState = t.followState
|
1379
|
+
newContext = SingletonPredictionContext.create(config.context, returnState.stateNumber)
|
1380
|
+
return ATNConfig.new(t.target, nil,newContext, nil,config )
|
1381
|
+
end
|
1382
|
+
def getConflictingAlts(configs)
|
1383
|
+
altsets = PredictionMode.getConflictingAltSubsets(configs)
|
1384
|
+
return PredictionMode.getAlts(altsets)
|
1385
|
+
end
|
1386
|
+
# Sam pointed out a problem with the previous definition, v3, of
|
1387
|
+
# ambiguous states. If we have another state associated with conflicting
|
1388
|
+
# alternatives, we should keep going. For example, the following grammar
|
1389
|
+
#
|
1390
|
+
# s : (ID | ID ID?) ';' ;
|
1391
|
+
#
|
1392
|
+
# When the ATN simulation reaches the state before ';', it has a DFA
|
1393
|
+
# state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally
|
1394
|
+
# 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node
|
1395
|
+
# because alternative to has another way to continue, via [6|2|[]].
|
1396
|
+
# The key is that we have a single state that has config's only associated
|
1397
|
+
# with a single alternative, 2, and crucially the state transitions
|
1398
|
+
# among the configurations are all non-epsilon transitions. That means
|
1399
|
+
# we don't consider any conflicts that include alternative 2. So, we
|
1400
|
+
# ignore the conflict between alts 1 and 2. We ignore a set of
|
1401
|
+
# conflicting alts when there is an intersection with an alternative
|
1402
|
+
# associated with a single alt state in the state→config-list map.
|
1403
|
+
#
|
1404
|
+
# It's also the case that we might have two conflicting configurations but
|
1405
|
+
# also a 3rd nonconflicting configuration for a different alternative:
|
1406
|
+
# [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar:
|
1407
|
+
#
|
1408
|
+
# a : A | A | A B ;
|
1409
|
+
#
|
1410
|
+
# After matching input A, we reach the stop state for rule A, state 1.
|
1411
|
+
# State 8 is the state right before B. Clearly alternatives 1 and 2
|
1412
|
+
# conflict and no amount of further lookahead will separate the two.
|
1413
|
+
# However, alternative 3 will be able to continue and so we do not
|
1414
|
+
# stop working on this state. In the previous example, we're concerned
|
1415
|
+
# with states associated with the conflicting alternatives. Here alt
|
1416
|
+
# 3 is not associated with the conflicting configs, but since we can continue
|
1417
|
+
# looking for input reasonably, I don't declare the state done. We
|
1418
|
+
# ignore a set of conflicting alts when we have an alternative
|
1419
|
+
# that we still need to pursue.
|
1420
|
+
#
|
1421
|
+
|
1422
|
+
def getConflictingAltsOrUniqueAlt(configs)
|
1423
|
+
conflictingAlts = nil
|
1424
|
+
if configs.uniqueAlt!= ATN::INVALID_ALT_NUMBER
|
1425
|
+
conflictingAlts = Set.new()
|
1426
|
+
conflictingAlts.add(configs.uniqueAlt)
|
1427
|
+
else
|
1428
|
+
conflictingAlts = configs.conflictingAlts
|
1429
|
+
end
|
1430
|
+
return conflictingAlts
|
1431
|
+
end
|
1432
|
+
def getTokenName(t)
|
1433
|
+
if t==Token::EOF
|
1434
|
+
return "EOF"
|
1435
|
+
end
|
1436
|
+
if self.parser and self.parser.tokenNames then
|
1437
|
+
if t >= self.parser.tokenNames.length() then
|
1438
|
+
puts "#{t} ttype out of range: #{self.parser.tokenNames}"
|
1439
|
+
puts self.parser.getInputStream().getTokens().to_s
|
1440
|
+
else
|
1441
|
+
return self.parser.tokenNames[t] + "<#{t}>"
|
1442
|
+
end
|
1443
|
+
end
|
1444
|
+
return t.to_s
|
1445
|
+
end
|
1446
|
+
def getLookaheadName(input)
|
1447
|
+
return getTokenName(input.LA(1))
|
1448
|
+
end
|
1449
|
+
# Used for debugging in adaptivePredict around execATN but I cut
|
1450
|
+
# it out for clarity now that alg. works well. We can leave this
|
1451
|
+
# "dead" code for a bit.
|
1452
|
+
#
|
1453
|
+
def dumpDeadEndConfigs(nvae)
|
1454
|
+
print "dead end configs: "
|
1455
|
+
nvae.getDeadEndConfigs().each do |c|
|
1456
|
+
trans = "no edges"
|
1457
|
+
if c.state.transitions.length>0 then
|
1458
|
+
t = c.state.transitions[0]
|
1459
|
+
if t.kind_of? AtomTransition then
|
1460
|
+
trans = "Atom #{self.getTokenName(t.label)}"
|
1461
|
+
elsif t.kind_of? SetTransition then
|
1462
|
+
#trans = ("~" if neg else "")+"Set "+ str(t.set)
|
1463
|
+
if t.kind_of? NotSetTransition then
|
1464
|
+
neg = "~"
|
1465
|
+
else
|
1466
|
+
neg = ""
|
1467
|
+
end
|
1468
|
+
trans = "#{neg}Set #{t.set}"
|
1469
|
+
end
|
1470
|
+
end
|
1471
|
+
# STDERR.puts "#{c.toString(self.parser, true)}:#{trans}"
|
1472
|
+
end
|
1473
|
+
end
|
1474
|
+
def noViableAlt(input, outerContext, configs, startIndex)
|
1475
|
+
return NoViableAltException.new(self.parser, input, input.get(startIndex), input.LT(1), configs, outerContext)
|
1476
|
+
end
|
1477
|
+
|
1478
|
+
def getUniqueAlt(configs)
|
1479
|
+
alt = ATN::INVALID_ALT_NUMBER
|
1480
|
+
configs.each do |c|
|
1481
|
+
if alt == ATN::INVALID_ALT_NUMBER
|
1482
|
+
alt = c.alt # found first alt
|
1483
|
+
elsif c.alt!=alt
|
1484
|
+
return ATN::INVALID_ALT_NUMBER
|
1485
|
+
end
|
1486
|
+
end
|
1487
|
+
return alt
|
1488
|
+
end
|
1489
|
+
#
|
1490
|
+
# Add an edge to the DFA, if possible. This method calls
|
1491
|
+
# {@link #addDFAState} to ensure the {@code to} state is present in the
|
1492
|
+
# DFA. If {@code from} is {@code null}, or if {@code t} is outside the
|
1493
|
+
# range of edges that can be represented in the DFA tables, this method
|
1494
|
+
# returns without adding the edge to the DFA.
|
1495
|
+
#
|
1496
|
+
# <p>If {@code to} is {@code null}, this method returns {@code null}.
|
1497
|
+
# Otherwise, this method returns the {@link DFAState} returned by calling
|
1498
|
+
# {@link #addDFAState} for the {@code to} state.</p>
|
1499
|
+
#
|
1500
|
+
# @param dfa The DFA
|
1501
|
+
# @param from The source state for the edge
|
1502
|
+
# @param t The input symbol
|
1503
|
+
# @param to The target state for the edge
|
1504
|
+
#
|
1505
|
+
# @return If {@code to} is {@code null}, this method returns {@code null};
|
1506
|
+
# otherwise this method returns the result of calling {@link #addDFAState}
|
1507
|
+
# on {@code to}
|
1508
|
+
#
|
1509
|
+
def addDFAEdge(dfa, from_, t, to)
|
1510
|
+
if self.debug
|
1511
|
+
puts "EDGE #{from_} -> #{to} upon #{self.getTokenName(t)}"
|
1512
|
+
end
|
1513
|
+
|
1514
|
+
if to.nil?
|
1515
|
+
return nil
|
1516
|
+
end
|
1517
|
+
|
1518
|
+
to = self.addDFAState(dfa, to) # used existing if possible not incoming
|
1519
|
+
if from_.nil? or t < -1 or t > self.atn.maxTokenType
|
1520
|
+
return to
|
1521
|
+
end
|
1522
|
+
|
1523
|
+
if from_.edges.nil? then
|
1524
|
+
from_.edges = Array.new(self.atn.maxTokenType + 2)
|
1525
|
+
end
|
1526
|
+
from_.edges[t+1] = to # connect
|
1527
|
+
|
1528
|
+
if self.debug
|
1529
|
+
if self.parser.nil?
|
1530
|
+
names = nil
|
1531
|
+
else
|
1532
|
+
names = self.parser.tokenNames
|
1533
|
+
end
|
1534
|
+
print "DFA=\n#{dfa.toString(names)}"
|
1535
|
+
end
|
1536
|
+
return to
|
1537
|
+
end
|
1538
|
+
#
|
1539
|
+
# Add state {@code D} to the DFA if it is not already present, and return
|
1540
|
+
# the actual instance stored in the DFA. If a state equivalent to {@code D}
|
1541
|
+
# is already in the DFA, the existing state is returned. Otherwise this
|
1542
|
+
# method returns {@code D} after adding it to the DFA.
|
1543
|
+
#
|
1544
|
+
# <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and
|
1545
|
+
# does not change the DFA.</p>
|
1546
|
+
#
|
1547
|
+
# @param dfa The dfa
|
1548
|
+
# @param D The DFA state to add
|
1549
|
+
# @return The state stored in the DFA. This will be either the existing
|
1550
|
+
# state if {@code D} is already in the DFA, or {@code D} itself if the
|
1551
|
+
# state was not already present.
|
1552
|
+
#
|
1553
|
+
def addDFAState(dfa, cD)
|
1554
|
+
if cD.equal? ATNSimulator::ERROR
|
1555
|
+
return cD
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
existing = dfa.states[cD]
|
1559
|
+
if existing
|
1560
|
+
return existing
|
1561
|
+
end
|
1562
|
+
|
1563
|
+
cD.stateNumber = dfa.states.length
|
1564
|
+
if not cD.configs.readonly
|
1565
|
+
cD.configs.optimizeConfigs(self)
|
1566
|
+
cD.configs.setReadonly(true)
|
1567
|
+
end
|
1568
|
+
dfa.states[cD] = cD
|
1569
|
+
if self.debug
|
1570
|
+
puts "adding new DFA state: #{cD}"
|
1571
|
+
end
|
1572
|
+
return cD
|
1573
|
+
end
|
1574
|
+
def reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex)
|
1575
|
+
if self.debug or self.retry_debug
|
1576
|
+
interval = startIndex..stopIndex
|
1577
|
+
puts "reportAttemptingFullContext decision=#{dfa.decision}:#{configs}, input=#{
|
1578
|
+
self.parser.getTokenStream().getText(interval)}"
|
1579
|
+
end
|
1580
|
+
if self.parser
|
1581
|
+
self.parser.getErrorListenerDispatch().reportAttemptingFullContext(self.parser, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
1582
|
+
end
|
1583
|
+
end
|
1584
|
+
def reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex)
|
1585
|
+
if self.debug or self.retry_debug
|
1586
|
+
interval = startIndex..stopIndex
|
1587
|
+
puts "reportContextSensitivity decision=#{dfa.decision}:#{configs}, input=#{
|
1588
|
+
self.parser.getTokenStream().getText(interval)}"
|
1589
|
+
end
|
1590
|
+
if self.parser
|
1591
|
+
self.parser.getErrorListenerDispatch().reportContextSensitivity(self.parser, dfa, startIndex, stopIndex, prediction, configs)
|
1592
|
+
end
|
1593
|
+
end
|
1594
|
+
|
1595
|
+
# If context sensitive parsing, we know it's ambiguity not conflict#
|
1596
|
+
def reportAmbiguity(dfa, cD, startIndex, stopIndex, exact, ambigAlts, configs)
|
1597
|
+
if self.debug or self.retry_debug
|
1598
|
+
# ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn);
|
1599
|
+
# int i = 1;
|
1600
|
+
# for (Transition t : dfa.atnStartState.transitions) {
|
1601
|
+
# print("ALT "+i+"=");
|
1602
|
+
# print(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size());
|
1603
|
+
# TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(),
|
1604
|
+
# startIndex, stopIndex);
|
1605
|
+
# if ( path!=null ) {
|
1606
|
+
# print("path = "+path.toStringTree());
|
1607
|
+
# for (TraceTree leaf : path.leaves) {
|
1608
|
+
# List<ATNState> states = path.getPathToNode(leaf);
|
1609
|
+
# print("states="+states);
|
1610
|
+
# }
|
1611
|
+
# }
|
1612
|
+
# i++;
|
1613
|
+
# }
|
1614
|
+
interval = startIndex..stopIndex
|
1615
|
+
puts "reportAmbiguity #{ambigAlts}:#{configs}, input=#{
|
1616
|
+
self.parser.getTokenStream().getText(interval)}"
|
1617
|
+
end
|
1618
|
+
if self.parser
|
1619
|
+
self.parser.getErrorListenerDispatch().reportAmbiguity(self.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
1620
|
+
end
|
1621
|
+
end
|
1622
|
+
end
|