antlr4 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,311 @@
|
|
1
|
+
|
2
|
+
class LexerActionType
|
3
|
+
# include JavaSymbols
|
4
|
+
CHANNEL = 0 #The type of a {@link LexerChannelAction} action.
|
5
|
+
CUSTOM = 1 #The type of a {@link LexerCustomAction} action.
|
6
|
+
MODE = 2 #The type of a {@link LexerModeAction} action.
|
7
|
+
MORE = 3 #The type of a {@link LexerMoreAction} action.
|
8
|
+
POP_MODE = 4 #The type of a {@link LexerPopModeAction} action.
|
9
|
+
PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action.
|
10
|
+
SKIP = 6 #The type of a {@link LexerSkipAction} action.
|
11
|
+
TYPE = 7 #The type of a {@link LexerTypeAction} action.
|
12
|
+
end
|
13
|
+
|
14
|
+
class LexerAction
|
15
|
+
|
16
|
+
attr_accessor :actionType, :isPositionDependent
|
17
|
+
def initialize(action)
|
18
|
+
self.actionType = action
|
19
|
+
self.isPositionDependent = false
|
20
|
+
end
|
21
|
+
|
22
|
+
def hash
|
23
|
+
self.actionType.to_s.hash
|
24
|
+
end
|
25
|
+
|
26
|
+
def eql?(other)
|
27
|
+
self == other
|
28
|
+
end
|
29
|
+
|
30
|
+
def ==(other)
|
31
|
+
self.equal? other
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Implements the {@code skip} lexer action by calling {@link Lexer#skip}.
|
37
|
+
#
|
38
|
+
# <p>The {@code skip} command does not have any parameters, so this action is
|
39
|
+
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
40
|
+
class LexerSkipAction < LexerAction
|
41
|
+
|
42
|
+
# Provides a singleton instance of this parameterless lexer action.
|
43
|
+
@@INSTANCE = nil
|
44
|
+
def self.INSTANCE
|
45
|
+
if @@INSTANCE.nil?
|
46
|
+
@@INSTANCE = LexerSkipAction.new()
|
47
|
+
end
|
48
|
+
@@INSTANCE
|
49
|
+
end
|
50
|
+
def initialize()
|
51
|
+
super(LexerActionType::SKIP)
|
52
|
+
end
|
53
|
+
|
54
|
+
def execute(lexer)
|
55
|
+
lexer.skip()
|
56
|
+
end
|
57
|
+
|
58
|
+
def to_s
|
59
|
+
return "skip"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Implements the {@code type} lexer action by calling {@link Lexer#setType}
|
64
|
+
# with the assigned type.
|
65
|
+
class LexerTypeAction < LexerAction
|
66
|
+
|
67
|
+
attr_accessor :type
|
68
|
+
def initialize(_type)
|
69
|
+
super(LexerActionType::TYPE)
|
70
|
+
self.type = _type
|
71
|
+
end
|
72
|
+
def execute(lexer)
|
73
|
+
lexer.type = self.type
|
74
|
+
end
|
75
|
+
|
76
|
+
def hash
|
77
|
+
return "#{self.actionType}#{self.type}".hash
|
78
|
+
end
|
79
|
+
|
80
|
+
def ==(other)
|
81
|
+
self.equal?(other) or other.kind_of?(LexerTypeAction) and self.type == other.type
|
82
|
+
end
|
83
|
+
def to_s
|
84
|
+
return "type(#{self.type})"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Implements the {@code pushMode} lexer action by calling
|
89
|
+
# {@link Lexer#pushMode} with the assigned mode.
|
90
|
+
class LexerPushModeAction < LexerAction
|
91
|
+
|
92
|
+
attr_accessor :mode
|
93
|
+
def initialize(_mode)
|
94
|
+
super(LexerActionType::PUSH_MODE)
|
95
|
+
self.mode = _mode
|
96
|
+
end
|
97
|
+
|
98
|
+
# <p>This action is implemented by calling {@link Lexer#pushMode} with the
|
99
|
+
# value provided by {@link #getMode}.</p>
|
100
|
+
def execute(lexer)
|
101
|
+
lexer.pushMode(self.mode)
|
102
|
+
end
|
103
|
+
|
104
|
+
def hash
|
105
|
+
"#{self.actionType}#{self.mode}".hash
|
106
|
+
end
|
107
|
+
|
108
|
+
def ==(other)
|
109
|
+
self.equal?(other) or other.kind_of?(LexerPushModeAction) and self.mode == other.mode
|
110
|
+
end
|
111
|
+
|
112
|
+
def to_s
|
113
|
+
"pushMode(#{self.mode})"
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
# Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}.
|
118
|
+
#
|
119
|
+
# <p>The {@code popMode} command does not have any parameters, so this action is
|
120
|
+
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
121
|
+
class LexerPopModeAction < LexerAction
|
122
|
+
|
123
|
+
@@INSTANCE = nil
|
124
|
+
def self.INSTANCE
|
125
|
+
@@INSTANCE = new() if @@INSTANCE.nil?
|
126
|
+
@@INSTANCE
|
127
|
+
end
|
128
|
+
|
129
|
+
def initialize
|
130
|
+
super(LexerActionType::POP_MODE)
|
131
|
+
end
|
132
|
+
|
133
|
+
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
|
134
|
+
def execute(lexer)
|
135
|
+
lexer.popMode()
|
136
|
+
end
|
137
|
+
|
138
|
+
def to_s
|
139
|
+
return "popMode"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Implements the {@code more} lexer action by calling {@link Lexer#more}.
|
144
|
+
#
|
145
|
+
# <p>The {@code more} command does not have any parameters, so this action is
|
146
|
+
# implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
|
147
|
+
class LexerMoreAction < LexerAction
|
148
|
+
|
149
|
+
@@INSTANCE = nil
|
150
|
+
def self.INSTANCE
|
151
|
+
@@INSTANCE = new() if @@INSTANCE.nil?
|
152
|
+
@@INSTANCE
|
153
|
+
end
|
154
|
+
|
155
|
+
def initialize
|
156
|
+
super(LexerActionType::MORE)
|
157
|
+
end
|
158
|
+
|
159
|
+
# <p>This action is implemented by calling {@link Lexer#popMode}.</p>
|
160
|
+
def execute(lexer)
|
161
|
+
lexer.more()
|
162
|
+
end
|
163
|
+
|
164
|
+
def to_s
|
165
|
+
return "more"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Implements the {@code mode} lexer action by calling {@link Lexer#mode} with
|
170
|
+
# the assigned mode.
|
171
|
+
class LexerModeAction < LexerAction
|
172
|
+
|
173
|
+
attr_accessor :mode
|
174
|
+
def initialize(_mode)
|
175
|
+
super(LexerActionType::MODE)
|
176
|
+
self.mode = _mode
|
177
|
+
end
|
178
|
+
|
179
|
+
# <p>This action is implemented by calling {@link Lexer#mode} with the
|
180
|
+
# value provided by {@link #getMode}.</p>
|
181
|
+
def execute(lexer)
|
182
|
+
lexer.mode = self.mode
|
183
|
+
end
|
184
|
+
|
185
|
+
def hash
|
186
|
+
"#{self.actionType}#{self.mode}".hash
|
187
|
+
end
|
188
|
+
|
189
|
+
def ==(other)
|
190
|
+
self.equal?(other)or other.kind_of?(LexerModeAction)and self.mode == other.mode
|
191
|
+
end
|
192
|
+
|
193
|
+
def to_s
|
194
|
+
"mode(#{self.mode})"
|
195
|
+
end
|
196
|
+
end
|
197
|
+
# Executes a custom lexer action by calling {@link Recognizer#action} with the
|
198
|
+
# rule and action indexes assigned to the custom action. The implementation of
|
199
|
+
# a custom action is added to the generated code for the lexer in an override
|
200
|
+
# of {@link Recognizer#action} when the grammar is compiled.
|
201
|
+
#
|
202
|
+
# <p>This class may represent embedded actions created with the <code>{...}</code>
|
203
|
+
# syntax in ANTLR 4, as well as actions created for lexer commands where the
|
204
|
+
# command argument could not be evaluated when the grammar was compiled.</p>
|
205
|
+
|
206
|
+
class LexerCustomAction < LexerAction
|
207
|
+
|
208
|
+
# Constructs a custom lexer action with the specified rule and action
|
209
|
+
# indexes.
|
210
|
+
#
|
211
|
+
# @param ruleIndex The rule index to use for calls to
|
212
|
+
# {@link Recognizer#action}.
|
213
|
+
# @param actionIndex The action index to use for calls to
|
214
|
+
# {@link Recognizer#action}.
|
215
|
+
#/
|
216
|
+
attr_accessor :ruleIndex, :actionIndex, :isPositionDependent
|
217
|
+
def initialize(rule_index, action_index)
|
218
|
+
super(LexerActionType::CUSTOM)
|
219
|
+
@ruleIndex = rule_index
|
220
|
+
@actionIndex = action_index
|
221
|
+
@isPositionDependent = true
|
222
|
+
end
|
223
|
+
# <p>Custom actions are implemented by calling {@link Lexer#action} with the
|
224
|
+
# appropriate rule and action indexes.</p>
|
225
|
+
def execute(lexer)
|
226
|
+
lexer.action(nil, self.ruleIndex, self.actionIndex)
|
227
|
+
end
|
228
|
+
def hash
|
229
|
+
"#{self.actionType}#{self.ruleIndex}#{self.actionIndex}".hash
|
230
|
+
end
|
231
|
+
|
232
|
+
def ==( other)
|
233
|
+
self.equal?(other) or other.kind_of?( LexerCustomAction) \
|
234
|
+
and self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex
|
235
|
+
end
|
236
|
+
end
|
237
|
+
# Implements the {@code channel} lexer action by calling
|
238
|
+
# {@link Lexer#setChannel} with the assigned channel.
|
239
|
+
class LexerChannelAction < LexerAction
|
240
|
+
|
241
|
+
# Constructs a new {@code channel} action with the specified channel value.
|
242
|
+
# @param channel The channel value to pass to {@link Lexer#setChannel}.
|
243
|
+
attr_accessor :channel
|
244
|
+
def initialize(_channel)
|
245
|
+
super(LexerActionType::CHANNEL)
|
246
|
+
self.channel = _channel
|
247
|
+
end
|
248
|
+
|
249
|
+
# <p>This action is implemented by calling {@link Lexer#setChannel} with the
|
250
|
+
# value provided by {@link #getChannel}.</p>
|
251
|
+
def execute(lexer)
|
252
|
+
lexer.channel = self.channel
|
253
|
+
end
|
254
|
+
def hash
|
255
|
+
"#{self.actionType}#{self.channel}".hash
|
256
|
+
end
|
257
|
+
|
258
|
+
def ==(other)
|
259
|
+
self.equal?(other) or other.kind_of?(LexerChannelAction) \
|
260
|
+
and self.channel == other.channel
|
261
|
+
end
|
262
|
+
|
263
|
+
def to_s
|
264
|
+
return "channel(#{self.channel})"
|
265
|
+
end
|
266
|
+
end
|
267
|
+
# This implementation of {@link LexerAction} is used for tracking input offsets
|
268
|
+
# for position-dependent actions within a {@link LexerActionExecutor}.
|
269
|
+
#
|
270
|
+
# <p>This action is not serialized as part of the ATN, and is only required for
|
271
|
+
# position-dependent lexer actions which appear at a location other than the
|
272
|
+
# end of a rule. For more information about DFA optimizations employed for
|
273
|
+
# lexer actions, see {@link LexerActionExecutor#append} and
|
274
|
+
# {@link LexerActionExecutor#fixOffsetBeforeMatch}.</p>
|
275
|
+
class LexerIndexedCustomAction < LexerAction
|
276
|
+
|
277
|
+
# Constructs a new indexed custom action by associating a character offset
|
278
|
+
# with a {@link LexerAction}.
|
279
|
+
#
|
280
|
+
# <p>Note: This class is only required for lexer actions for which
|
281
|
+
# {@link LexerAction#isPositionDependent} returns {@code true}.</p>
|
282
|
+
#
|
283
|
+
# @param offset The offset into the input {@link CharStream}, relative to
|
284
|
+
# the token start index, at which the specified lexer action should be
|
285
|
+
# executed.
|
286
|
+
# @param action The lexer action to execute at a particular offset in the
|
287
|
+
# input {@link CharStream}.
|
288
|
+
attr_accessor :offset, :action, :isPositionDependent
|
289
|
+
def initialize(_offset, _action)
|
290
|
+
super(action.actionType)
|
291
|
+
self.offset = _offset
|
292
|
+
self.action = _action
|
293
|
+
self.isPositionDependent = true
|
294
|
+
end
|
295
|
+
|
296
|
+
# <p>This method calls {@link #execute} on the result of {@link #getAction}
|
297
|
+
# using the provided {@code lexer}.</p>
|
298
|
+
def execute(lexer)
|
299
|
+
# assume the input stream position was properly set by the calling code
|
300
|
+
self.action.execute(lexer)
|
301
|
+
end
|
302
|
+
|
303
|
+
def hash
|
304
|
+
"#{self.actionType}#{self.offset}#{self.action}".hash
|
305
|
+
end
|
306
|
+
|
307
|
+
def ==(other)
|
308
|
+
self.equal?(other) or other.kind_of?(LexerIndexedCustomAction) \
|
309
|
+
and self.offset == other.offset and self.action == other.action
|
310
|
+
end
|
311
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# Represents an executor for a sequence of lexer actions which traversed during
|
2
|
+
# the matching operation of a lexer rule (token).
|
3
|
+
#
|
4
|
+
# <p>The executor tracks position information for position-dependent lexer actions
|
5
|
+
# efficiently, ensuring that actions appearing only at the end of the rule do
|
6
|
+
# not cause bloating of the {@link DFA} created for the lexer.</p>
|
7
|
+
|
8
|
+
class LexerActionExecutor
|
9
|
+
|
10
|
+
attr_accessor :hashCode, :lexerActions
|
11
|
+
def initialize(_lexerActions=Array.new)
|
12
|
+
@lexerActions = _lexerActions
|
13
|
+
# Caches the result of {@link #hashCode} since the hash code is an element
|
14
|
+
# of the performance-critical {@link LexerATNConfig#hashCode} operation.
|
15
|
+
@hashCode = self.lexerActions.map(&:to_s).join('').hash
|
16
|
+
end
|
17
|
+
|
18
|
+
# Creates a {@link LexerActionExecutor} which executes the actions for
|
19
|
+
# the input {@code lexerActionExecutor} followed by a specified
|
20
|
+
# {@code lexerAction}.
|
21
|
+
#
|
22
|
+
# @param lexerActionExecutor The executor for actions already traversed by
|
23
|
+
# the lexer while matching a token within a particular
|
24
|
+
# {@link LexerATNConfig}. If this is {@code null}, the method behaves as
|
25
|
+
# though it were an empty executor.
|
26
|
+
# @param lexerAction The lexer action to execute after the actions
|
27
|
+
# specified in {@code lexerActionExecutor}.
|
28
|
+
#
|
29
|
+
# @return A {@link LexerActionExecutor} for executing the combine actions
|
30
|
+
# of {@code lexerActionExecutor} and {@code lexerAction}.
|
31
|
+
def self.append(lexerActionExecutor, lexerAction)
|
32
|
+
if lexerActionExecutor.nil?
|
33
|
+
return LexerActionExecutor.new([ lexerAction ])
|
34
|
+
end
|
35
|
+
lexerActions = lexerActionExecutor.lexerActions.concat(lexerAction )
|
36
|
+
return LexerActionExecutor.new(lexerActions)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates a {@link LexerActionExecutor} which encodes the current offset
|
40
|
+
# for position-dependent lexer actions.
|
41
|
+
#
|
42
|
+
# <p>Normally, when the executor encounters lexer actions where
|
43
|
+
# {@link LexerAction#isPositionDependent} returns {@code true}, it calls
|
44
|
+
# {@link IntStream#seek} on the input {@link CharStream} to set the input
|
45
|
+
# position to the <em>end</em> of the current token. This behavior provides
|
46
|
+
# for efficient DFA representation of lexer actions which appear at the end
|
47
|
+
# of a lexer rule, even when the lexer rule matches a variable number of
|
48
|
+
# characters.</p>
|
49
|
+
#
|
50
|
+
# <p>Prior to traversing a match transition in the ATN, the current offset
|
51
|
+
# from the token start index is assigned to all position-dependent lexer
|
52
|
+
# actions which have not already been assigned a fixed offset. By storing
|
53
|
+
# the offsets relative to the token start index, the DFA representation of
|
54
|
+
# lexer actions which appear in the middle of tokens remains efficient due
|
55
|
+
# to sharing among tokens of the same length, regardless of their absolute
|
56
|
+
# position in the input stream.</p>
|
57
|
+
#
|
58
|
+
# <p>If the current executor already has offsets assigned to all
|
59
|
+
# position-dependent lexer actions, the method returns {@code this}.</p>
|
60
|
+
#
|
61
|
+
# @param offset The current offset to assign to all position-dependent
|
62
|
+
# lexer actions which do not already have offsets assigned.
|
63
|
+
#
|
64
|
+
# @return A {@link LexerActionExecutor} which stores input stream offsets
|
65
|
+
# for all position-dependent lexer actions.
|
66
|
+
#/
|
67
|
+
def fixOffsetBeforeMatch(offset)
|
68
|
+
updatedLexerActions = nil
|
69
|
+
@lexerActions.each_index {|i|
|
70
|
+
if @lexerActions[i].isPositionDependent and not @lexerActions[i].kind_of?(LexerIndexedCustomAction) then
|
71
|
+
if updatedLexerActions.nil? then
|
72
|
+
updatedLexerActions = @lexerActions.map{|x| x}
|
73
|
+
end
|
74
|
+
updatedLexerActions[i] = LexerIndexedCustomAction.new(offset, @lexerActions[i])
|
75
|
+
end
|
76
|
+
}
|
77
|
+
if updatedLexerActions.nil?
|
78
|
+
return self
|
79
|
+
else
|
80
|
+
return LexerActionExecutor.new(updatedLexerActions)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
# Execute the actions encapsulated by this executor within the context of a
|
86
|
+
# particular {@link Lexer}.
|
87
|
+
#
|
88
|
+
# <p>This method calls {@link IntStream#seek} to set the position of the
|
89
|
+
# {@code input} {@link CharStream} prior to calling
|
90
|
+
# {@link LexerAction#execute} on a position-dependent action. Before the
|
91
|
+
# method returns, the input position will be restored to the same position
|
92
|
+
# it was in when the method was invoked.</p>
|
93
|
+
#
|
94
|
+
# @param lexer The lexer instance.
|
95
|
+
# @param input The input stream which is the source for the current token.
|
96
|
+
# When this method is called, the current {@link IntStream#index} for
|
97
|
+
# {@code input} should be the start of the following token, i.e. 1
|
98
|
+
# character past the end of the current token.
|
99
|
+
# @param startIndex The token start index. This value may be passed to
|
100
|
+
# {@link IntStream#seek} to set the {@code input} position to the beginning
|
101
|
+
# of the token.
|
102
|
+
#/
|
103
|
+
def execute(lexer, input, startIndex)
|
104
|
+
requiresSeek = false
|
105
|
+
stopIndex = input.index
|
106
|
+
begin
|
107
|
+
self.lexerActions.each { |lexerAction|
|
108
|
+
if lexerAction.kind_of? LexerIndexedCustomAction
|
109
|
+
offset = lexerAction.offset
|
110
|
+
input.seek(startIndex + offset)
|
111
|
+
lexerAction = lexerAction.action
|
112
|
+
requiresSeek = (startIndex + offset) != stopIndex
|
113
|
+
elsif lexerAction.isPositionDependent
|
114
|
+
input.seek(stopIndex)
|
115
|
+
requiresSeek = false
|
116
|
+
end
|
117
|
+
lexerAction.execute(lexer)
|
118
|
+
}
|
119
|
+
ensure
|
120
|
+
input.seek(stopIndex) if requiresSeek
|
121
|
+
end
|
122
|
+
end
|
123
|
+
def hash
|
124
|
+
return self.hashCode
|
125
|
+
end
|
126
|
+
|
127
|
+
def eql?(other)
|
128
|
+
self == other
|
129
|
+
end
|
130
|
+
def ==( other)
|
131
|
+
self.equal?(other) or (other.kind_of?(LexerActionExecutor) and
|
132
|
+
self.hashCode == other.hashCode and self.lexerActions == other.lexerActions)
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,1622 @@
|
|
1
|
+
#
|
2
|
+
# The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
|
3
|
+
#
|
4
|
+
# <p>
|
5
|
+
# The basic complexity of the adaptive strategy makes it harder to understand.
|
6
|
+
# We begin with ATN simulation to build paths in a DFA. Subsequent prediction
|
7
|
+
# requests go through the DFA first. If they reach a state without an edge for
|
8
|
+
# the current symbol, the algorithm fails over to the ATN simulation to
|
9
|
+
# complete the DFA path for the current input (until it finds a conflict state
|
10
|
+
# or uniquely predicting state).</p>
|
11
|
+
#
|
12
|
+
# <p>
|
13
|
+
# All of that is done without using the outer context because we want to create
|
14
|
+
# a DFA that is not dependent upon the rule invocation stack when we do a
|
15
|
+
# prediction. One DFA works in all contexts. We avoid using context not
|
16
|
+
# necessarily because it's slower, although it can be, but because of the DFA
|
17
|
+
# caching problem. The closure routine only considers the rule invocation stack
|
18
|
+
# created during prediction beginning in the decision rule. For example, if
|
19
|
+
# prediction occurs without invoking another rule's ATN, there are no context
|
20
|
+
# stacks in the configurations. When lack of context leads to a conflict, we
|
21
|
+
# don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
|
22
|
+
# strategy (versus full LL(*)).</p>
|
23
|
+
#
|
24
|
+
# <p>
|
25
|
+
# When SLL yields a configuration set with conflict, we rewind the input and
|
26
|
+
# retry the ATN simulation, this time using full outer context without adding
|
27
|
+
# to the DFA. Configuration context stacks will be the full invocation stacks
|
28
|
+
# from the start rule. If we get a conflict using full context, then we can
|
29
|
+
# definitively say we have a true ambiguity for that input sequence. If we
|
30
|
+
# don't get a conflict, it implies that the decision is sensitive to the outer
|
31
|
+
# context. (It is not context-sensitive in the sense of context-sensitive
|
32
|
+
# grammars.)</p>
|
33
|
+
#
|
34
|
+
# <p>
|
35
|
+
# The next time we reach this DFA state with an SLL conflict, through DFA
|
36
|
+
# simulation, we will again retry the ATN simulation using full context mode.
|
37
|
+
# This is slow because we can't save the results and have to "interpret" the
|
38
|
+
# ATN each time we get that input.</p>
|
39
|
+
#
|
40
|
+
# <p>
|
41
|
+
# <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p>
|
42
|
+
#
|
43
|
+
# <p>
|
44
|
+
# We could cache results from full context to predicted alternative easily and
|
45
|
+
# that saves a lot of time but doesn't work in presence of predicates. The set
|
46
|
+
# of visible predicates from the ATN start state changes depending on the
|
47
|
+
# context, because closure can fall off the end of a rule. I tried to cache
|
48
|
+
# tuples (stack context, semantic context, predicted alt) but it was slower
|
49
|
+
# than interpreting and much more complicated. Also required a huge amount of
|
50
|
+
# memory. The goal is not to create the world's fastest parser anyway. I'd like
|
51
|
+
# to keep this algorithm simple. By launching multiple threads, we can improve
|
52
|
+
# the speed of parsing across a large number of files.</p>
|
53
|
+
#
|
54
|
+
# <p>
|
55
|
+
# There is no strict ordering between the amount of input used by SLL vs LL,
|
56
|
+
# which makes it really hard to build a cache for full context. Let's say that
|
57
|
+
# we have input A B C that leads to an SLL conflict with full context X. That
|
58
|
+
# implies that using X we might only use A B but we could also use A B C D to
|
59
|
+
# resolve conflict. Input A B C D could predict alternative 1 in one position
|
60
|
+
# in the input and A B C E could predict alternative 2 in another position in
|
61
|
+
# input. The conflicting SLL configurations could still be non-unique in the
|
62
|
+
# full context prediction, which would lead us to requiring more input than the
|
63
|
+
# original A B C. To make a prediction cache work, we have to track the exact
|
64
|
+
# input used during the previous prediction. That amounts to a cache that maps
|
65
|
+
# X to a specific DFA for that context.</p>
|
66
|
+
#
|
67
|
+
# <p>
|
68
|
+
# Something should be done for left-recursive expression predictions. They are
|
69
|
+
# likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
|
70
|
+
# with full LL thing Sam does.</p>
|
71
|
+
#
|
72
|
+
# <p>
|
73
|
+
# <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p>
|
74
|
+
#
|
75
|
+
# <p>
|
76
|
+
# We avoid doing full context retry when the outer context is empty, we did not
|
77
|
+
# dip into the outer context by falling off the end of the decision state rule,
|
78
|
+
# or when we force SLL mode.</p>
|
79
|
+
#
|
80
|
+
# <p>
|
81
|
+
# As an example of the not dip into outer context case, consider as super
|
82
|
+
# constructor calls versus function calls. One grammar might look like
|
83
|
+
# this:</p>
|
84
|
+
#
|
85
|
+
# <pre>
|
86
|
+
# ctorBody
|
87
|
+
# : '{' superCall? stat* '}'
|
88
|
+
# ;
|
89
|
+
# </pre>
|
90
|
+
#
|
91
|
+
# <p>
|
92
|
+
# Or, you might see something like</p>
|
93
|
+
#
|
94
|
+
# <pre>
|
95
|
+
# stat
|
96
|
+
# : superCall ';'
|
97
|
+
# | expression ';'
|
98
|
+
# | ...
|
99
|
+
# ;
|
100
|
+
# </pre>
|
101
|
+
#
|
102
|
+
# <p>
|
103
|
+
# In both cases I believe that no closure operations will dip into the outer
|
104
|
+
# context. In the first case ctorBody in the worst case will stop at the '}'.
|
105
|
+
# In the 2nd case it should stop at the ';'. Both cases should stay within the
|
106
|
+
# entry rule and not dip into the outer context.</p>
|
107
|
+
#
|
108
|
+
# <p>
|
109
|
+
# <strong>PREDICATES</strong></p>
|
110
|
+
#
|
111
|
+
# <p>
|
112
|
+
# Predicates are always evaluated if present in either SLL or LL both. SLL and
|
113
|
+
# LL simulation deals with predicates differently. SLL collects predicates as
|
114
|
+
# it performs closure operations like ANTLR v3 did. It delays predicate
|
115
|
+
# evaluation until it reaches and accept state. This allows us to cache the SLL
|
116
|
+
# ATN simulation whereas, if we had evaluated predicates on-the-fly during
|
117
|
+
# closure, the DFA state configuration sets would be different and we couldn't
|
118
|
+
# build up a suitable DFA.</p>
|
119
|
+
#
|
120
|
+
# <p>
|
121
|
+
# When building a DFA accept state during ATN simulation, we evaluate any
|
122
|
+
# predicates and return the sole semantically valid alternative. If there is
|
123
|
+
# more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
|
124
|
+
# we throw an exception. Alternatives without predicates act like they have
|
125
|
+
# true predicates. The simple way to think about it is to strip away all
|
126
|
+
# alternatives with false predicates and choose the minimum alternative that
|
127
|
+
# remains.</p>
|
128
|
+
#
|
129
|
+
# <p>
|
130
|
+
# When we start in the DFA and reach an accept state that's predicated, we test
|
131
|
+
# those and return the minimum semantically viable alternative. If no
|
132
|
+
# alternatives are viable, we throw an exception.</p>
|
133
|
+
#
|
134
|
+
# <p>
|
135
|
+
# During full LL ATN simulation, closure always evaluates predicates and
|
136
|
+
# on-the-fly. This is crucial to reducing the configuration set size during
|
137
|
+
# closure. It hits a landmine when parsing with the Java grammar, for example,
|
138
|
+
# without this on-the-fly evaluation.</p>
|
139
|
+
#
|
140
|
+
# <p>
|
141
|
+
# <strong>SHARING DFA</strong></p>
|
142
|
+
#
|
143
|
+
# <p>
|
144
|
+
# All instances of the same parser share the same decision DFAs through a
|
145
|
+
# static field. Each instance gets its own ATN simulator but they share the
|
146
|
+
# same {@link #decisionToDFA} field. They also share a
|
147
|
+
# {@link PredictionContextCache} object that makes sure that all
|
148
|
+
# {@link PredictionContext} objects are shared among the DFA states. This makes
|
149
|
+
# a big size difference.</p>
|
150
|
+
#
|
151
|
+
# <p>
|
152
|
+
# <strong>THREAD SAFETY</strong></p>
|
153
|
+
#
|
154
|
+
# <p>
|
155
|
+
# The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
|
156
|
+
# it adds a new DFA object to that array. {@link #addDFAEdge}
|
157
|
+
# locks on the DFA for the current decision when setting the
|
158
|
+
# {@link DFAState#edges} field. {@link #addDFAState} locks on
|
159
|
+
# the DFA for the current decision when looking up a DFA state to see if it
|
160
|
+
# already exists. We must make sure that all requests to add DFA states that
|
161
|
+
# are equivalent result in the same shared DFA object. This is because lots of
|
162
|
+
# threads will be trying to update the DFA at once. The
|
163
|
+
# {@link #addDFAState} method also locks inside the DFA lock
|
164
|
+
# but this time on the shared context cache when it rebuilds the
|
165
|
+
# configurations' {@link PredictionContext} objects using cached
|
166
|
+
# subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
|
167
|
+
# safe as long as we can guarantee that all threads referencing
|
168
|
+
# {@code s.edge[t]} get the same physical target {@link DFAState}, or
|
169
|
+
# {@code null}. Once into the DFA, the DFA simulation does not reference the
|
170
|
+
# {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
|
171
|
+
# targets. The DFA simulator will either find {@link DFAState#edges} to be
|
172
|
+
# {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
|
173
|
+
# {@code dfa.edges[t]} to be non-null. The
|
174
|
+
# {@link #addDFAEdge} method could be racing to set the field
|
175
|
+
# but in either case the DFA simulator works; if {@code null}, and requests ATN
|
176
|
+
# simulation. It could also race trying to get {@code dfa.edges[t]}, but either
|
177
|
+
# way it will work because it's not doing a test and set operation.</p>
|
178
|
+
#
|
179
|
+
# <p>
|
180
|
+
# <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage
|
181
|
+
# Parsing)</strong></p>
|
182
|
+
#
|
183
|
+
# <p>
|
184
|
+
# Sam pointed out that if SLL does not give a syntax error, then there is no
|
185
|
+
# point in doing full LL, which is slower. We only have to try LL if we get a
|
186
|
+
# syntax error. For maximum speed, Sam starts the parser set to pure SLL
|
187
|
+
# mode with the {@link BailErrorStrategy}:</p>
|
188
|
+
#
|
189
|
+
# <pre>
|
190
|
+
# parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
|
191
|
+
# parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
|
192
|
+
# </pre>
|
193
|
+
#
|
194
|
+
# <p>
|
195
|
+
# If it does not get a syntax error, then we're done. If it does get a syntax
|
196
|
+
# error, we need to retry with the combined SLL/LL strategy.</p>
|
197
|
+
#
|
198
|
+
# <p>
|
199
|
+
# The reason this works is as follows. If there are no SLL conflicts, then the
|
200
|
+
# grammar is SLL (at least for that input set). If there is an SLL conflict,
|
201
|
+
# the full LL analysis must yield a set of viable alternatives which is a
|
202
|
+
# subset of the alternatives reported by SLL. If the LL set is a singleton,
|
203
|
+
# then the grammar is LL but not SLL. If the LL set is the same size as the SLL
|
204
|
+
# set, the decision is SLL. If the LL set has size > 1, then that decision
|
205
|
+
# is truly ambiguous on the current input. If the LL set is smaller, then the
|
206
|
+
# SLL conflict resolution might choose an alternative that the full LL would
|
207
|
+
# rule out as a possibility based upon better context information. If that's
|
208
|
+
# the case, then the SLL parse will definitely get an error because the full LL
|
209
|
+
# analysis says it's not viable. If SLL conflict resolution chooses an
|
210
|
+
# alternative within the LL set, them both SLL and LL would choose the same
|
211
|
+
# alternative because they both choose the minimum of multiple conflicting
|
212
|
+
# alternatives.</p>
|
213
|
+
#
|
214
|
+
# <p>
|
215
|
+
# Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
|
216
|
+
# a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL
|
217
|
+
# parsing will get an error because SLL will pursue alternative 1. If
|
218
|
+
# <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
|
219
|
+
# choose the same alternative because alternative one is the minimum of either
|
220
|
+
# set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax
|
221
|
+
# error. If <em>s</em> is {@code {1}} then SLL will succeed.</p>
|
222
|
+
#
|
223
|
+
# <p>
|
224
|
+
# Of course, if the input is invalid, then we will get an error for sure in
|
225
|
+
# both SLL and LL parsing. Erroneous input will therefore require 2 passes over
|
226
|
+
# the input.</p>
|
227
|
+
#
|
228
|
+
|
229
|
+
class ParserATNSimulator < ATNSimulator
|
230
|
+
include PredictionContextFunctions
|
231
|
+
|
232
|
+
class << self
|
233
|
+
attr_reader :debug, :dfa_debug, :debug_list_atn_decisions,:retry_debug
|
234
|
+
end
|
235
|
+
@@debug = false
|
236
|
+
@@dfa_debug = false
|
237
|
+
@@debug_list_atn_decisions = false
|
238
|
+
@@retry_debug = false
|
239
|
+
|
240
|
+
def debug; @@debug ;end
|
241
|
+
def dfa_debug; @@dfa_debug ;end
|
242
|
+
|
243
|
+
def debug_list_atn_decisions; @@debug_list_atn_decisions ; end
|
244
|
+
def retry_debug ; @@retry_debug ; end
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
attr_accessor :decisionToDFA, :startIndex
|
249
|
+
attr_accessor :parser, :predictionMode, :input, :outerContext, :mergeCache
|
250
|
+
attr_accessor :_dfa
|
251
|
+
|
252
|
+
def initialize(parser, atn, decisionToDFA, sharedContextCache)
|
253
|
+
super(atn, sharedContextCache)
|
254
|
+
self.parser = parser
|
255
|
+
self.decisionToDFA = decisionToDFA
|
256
|
+
# SLL, LL, or LL + exact ambig detection?#
|
257
|
+
self.predictionMode = PredictionMode.LL
|
258
|
+
# LAME globals to avoid parameters!!!!! I need these down deep in predTransition
|
259
|
+
self.input = nil
|
260
|
+
self.startIndex = 0
|
261
|
+
self.outerContext = nil
|
262
|
+
# Each prediction operation uses a cache for merge of prediction contexts.
|
263
|
+
# Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
|
264
|
+
# isn't synchronized but we're ok since two threads shouldn't reuse same
|
265
|
+
# parser/atnsim object because it can only handle one input at a time.
|
266
|
+
# This maps graphs a and b to merged result c. (a,b)→c. We can avoid
|
267
|
+
# the merge if we ever see a and b again. Note that (b,a)→c should
|
268
|
+
# also be examined during cache lookup.
|
269
|
+
#
|
270
|
+
self.mergeCache = nil
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
def reset()
|
275
|
+
end
|
276
|
+
|
277
|
+
def adaptivePredict(input, decision, outerContext)
|
278
|
+
if self.debug or self.debug_list_atn_decisions then
|
279
|
+
s1 = "adaptivePredict decision #{decision} exec LA(1)=="
|
280
|
+
s2 = "#{self.getLookaheadName(input)} line #{input.LT(1).line}:#{input.LT(1).column}"
|
281
|
+
puts "#{s1}#{s2}"
|
282
|
+
end
|
283
|
+
# type_check(TokenStream, input)
|
284
|
+
# type_check(ParserRuleContext, outerContext)
|
285
|
+
self.input = input
|
286
|
+
self.startIndex = input.index
|
287
|
+
self.outerContext = outerContext
|
288
|
+
|
289
|
+
dfa = self.decisionToDFA[decision]
|
290
|
+
@_dfa = dfa
|
291
|
+
m = input.mark()
|
292
|
+
index = input.index
|
293
|
+
|
294
|
+
# Now we are certain to have a specific decision's DFA
|
295
|
+
# But, do we still need an initial state?
|
296
|
+
begin
|
297
|
+
if dfa.precedenceDfa then
|
298
|
+
# the start state for a precedence DFA depends on the current
|
299
|
+
# parser precedence, and is provided by a DFA method.
|
300
|
+
s0 = dfa.getPrecedenceStartState(self.parser.getPrecedence())
|
301
|
+
else
|
302
|
+
# the start state for a "regular" DFA is just s0
|
303
|
+
s0 = dfa.s0
|
304
|
+
end
|
305
|
+
|
306
|
+
if s0.nil?
|
307
|
+
if outerContext.nil?
|
308
|
+
outerContext = ParserRuleContext.EMPTY
|
309
|
+
end
|
310
|
+
if self.debug or self.debug_list_atn_decisions
|
311
|
+
puts "predictATN decision #{dfa.decision
|
312
|
+
} exec LA(1)==#{self.getLookaheadName(input)
|
313
|
+
}, outerContext=#{outerContext.to_s}"
|
314
|
+
# }, outerContext=#{outerContext.toString(self.parser)}"
|
315
|
+
end
|
316
|
+
# If this is not a precedence DFA, we check the ATN start state
|
317
|
+
# to determine if this ATN start state is the decision for the
|
318
|
+
# closure block that determines whether a precedence rule
|
319
|
+
# should continue or complete.
|
320
|
+
#
|
321
|
+
if not dfa.precedenceDfa and dfa.atnStartState.kind_of?(StarLoopEntryState) then
|
322
|
+
if dfa.atnStartState.precedenceRuleDecision
|
323
|
+
dfa.setPrecedenceDfa(true)
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
fullCtx = false
|
328
|
+
type_check(ParserRuleContext.EMPTY(), ParserRuleContext)
|
329
|
+
s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
|
330
|
+
|
331
|
+
if dfa.precedenceDfa
|
332
|
+
# If this is a precedence DFA, we use applyPrecedenceFilter
|
333
|
+
# to convert the computed start state to a precedence start
|
334
|
+
# state. We then use DFA.setPrecedenceStartState to set the
|
335
|
+
# appropriate start state for the precedence level rather
|
336
|
+
# than simply setting DFA.s0.
|
337
|
+
#
|
338
|
+
s0_closure = self.applyPrecedenceFilter(s0_closure)
|
339
|
+
s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
|
340
|
+
dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
|
341
|
+
else
|
342
|
+
s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
|
343
|
+
dfa.s0 = s0
|
344
|
+
end
|
345
|
+
end
|
346
|
+
alt = self.execATN(dfa, s0, input, index, outerContext)
|
347
|
+
if self.debug
|
348
|
+
puts "DFA after predictATN: #{dfa.toString(self.parser.tokenNames)}"
|
349
|
+
end
|
350
|
+
return alt
|
351
|
+
ensure
|
352
|
+
self.mergeCache = nil# wack cache after each prediction
|
353
|
+
input.seek(index)
|
354
|
+
input.release(m)
|
355
|
+
@_dfa = nil
|
356
|
+
end
|
357
|
+
end
|
358
|
+
# Performs ATN simulation to compute a predicted alternative based
|
359
|
+
# upon the remaining input, but also updates the DFA cache to avoid
|
360
|
+
# having to traverse the ATN again for the same input sequence.
|
361
|
+
|
362
|
+
# There are some key conditions we're looking for after computing a new
|
363
|
+
# set of ATN configs (proposed DFA state):
|
364
|
+
# if the set is empty, there is no viable alternative for current symbol
|
365
|
+
# does the state uniquely predict an alternative?
|
366
|
+
# does the state have a conflict that would prevent us from
|
367
|
+
# putting it on the work list?
|
368
|
+
|
369
|
+
# We also have some key operations to do:
|
370
|
+
# add an edge from previous DFA state to potentially new DFA state, D,
|
371
|
+
# upon current symbol but only if adding to work list, which means in all
|
372
|
+
# cases except no viable alternative (and possibly non-greedy decisions?)
|
373
|
+
# collecting predicates and adding semantic context to DFA accept states
|
374
|
+
# adding rule context to context-sensitive DFA accept states
|
375
|
+
# consuming an input symbol
|
376
|
+
# reporting a conflict
|
377
|
+
# reporting an ambiguity
|
378
|
+
# reporting a context sensitivity
|
379
|
+
# reporting insufficient predicates
|
380
|
+
|
381
|
+
# cover these cases:
|
382
|
+
# dead end
|
383
|
+
# single alt
|
384
|
+
# single alt + preds
|
385
|
+
# conflict
|
386
|
+
# conflict + preds
|
387
|
+
#
|
388
|
+
def execATN(dfa, s0, input, startIndex, outerContext)
|
389
|
+
type_check( outerContext, ParserRuleContext )
|
390
|
+
if self.debug or self.debug_list_atn_decisions
|
391
|
+
print "execATN decision #{dfa.decision
|
392
|
+
} exec LA(1)==#{self.getLookaheadName(input)
|
393
|
+
} line #{input.LT(1).line}:#{input.LT(1).column}"
|
394
|
+
end
|
395
|
+
previousD = s0
|
396
|
+
|
397
|
+
if self.debug
|
398
|
+
print "s0 = #{s0}"
|
399
|
+
end
|
400
|
+
t = input.LA(1)
|
401
|
+
while true do # while more work
|
402
|
+
cD = self.getExistingTargetState(previousD, t)
|
403
|
+
if cD.nil?
|
404
|
+
cD = self.computeTargetState(dfa, previousD, t)
|
405
|
+
end
|
406
|
+
if cD.equal? ATNSimulator::ERROR
|
407
|
+
# if any configs in previous dipped into outer context, that
|
408
|
+
# means that input up to t actually finished entry rule
|
409
|
+
# at least for SLL decision. Full LL doesn't dip into outer
|
410
|
+
# so don't need special case.
|
411
|
+
# We will get an error no matter what so delay until after
|
412
|
+
# decision; better error message. Also, no reachable target
|
413
|
+
# ATN states in SLL implies LL will also get nowhere.
|
414
|
+
# If conflict in states that dip out, choose min since we
|
415
|
+
# will get error no matter what.
|
416
|
+
e = self.noViableAlt(input, outerContext, previousD.configs, startIndex)
|
417
|
+
input.seek(startIndex)
|
418
|
+
alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext)
|
419
|
+
if alt!=ATN::INVALID_ALT_NUMBER
|
420
|
+
return alt
|
421
|
+
end
|
422
|
+
raise e
|
423
|
+
end
|
424
|
+
if cD.requiresFullContext and self.predictionMode != PredictionMode.SLL
|
425
|
+
# IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
|
426
|
+
conflictingAlts = nil
|
427
|
+
if cD.predicates then
|
428
|
+
if self.debug
|
429
|
+
print("DFA state has preds in DFA sim LL failover")
|
430
|
+
end
|
431
|
+
conflictIndex = input.index
|
432
|
+
if conflictIndex != startIndex
|
433
|
+
input.seek(startIndex)
|
434
|
+
end
|
435
|
+
conflictingAlts = self.evalSemanticContext(cD.predicates, outerContext, true)
|
436
|
+
if conflictingAlts.length==1
|
437
|
+
if self.debug
|
438
|
+
print("Full LL avoided")
|
439
|
+
end
|
440
|
+
return conflictingAlts.min
|
441
|
+
end
|
442
|
+
if conflictIndex != startIndex
|
443
|
+
# restore the index so reporting the fallback to full
|
444
|
+
# context occurs with the index at the correct spot
|
445
|
+
input.seek(conflictIndex)
|
446
|
+
end
|
447
|
+
end
|
448
|
+
if self.dfa_debug
|
449
|
+
print "ctx sensitive state #{outerContext} in #{cD}"
|
450
|
+
end
|
451
|
+
fullCtx = true
|
452
|
+
s0_closure = self.computeStartState(dfa.atnStartState, outerContext, fullCtx)
|
453
|
+
self.reportAttemptingFullContext(dfa, conflictingAlts, cD.configs, startIndex, input.index)
|
454
|
+
alt = self.execATNWithFullContext(dfa, cD, s0_closure, input, startIndex, outerContext)
|
455
|
+
return alt
|
456
|
+
end
|
457
|
+
|
458
|
+
if cD.isAcceptState
|
459
|
+
if cD.predicates.nil?
|
460
|
+
return cD.prediction
|
461
|
+
end
|
462
|
+
stopIndex = input.index
|
463
|
+
input.seek(startIndex)
|
464
|
+
alts = self.evalSemanticContext(cD.predicates, outerContext, true)
|
465
|
+
if alts.length==0
|
466
|
+
raise self.noViableAlt(input, outerContext, cD.configs, startIndex)
|
467
|
+
elsif alts.length==1
|
468
|
+
return alts.min
|
469
|
+
else
|
470
|
+
# report ambiguity after predicate evaluation to make sure the correct
|
471
|
+
# set of ambig alts is reported.
|
472
|
+
self.reportAmbiguity(dfa, cD, startIndex, stopIndex, false, alts, cD.configs)
|
473
|
+
return alts.min
|
474
|
+
end
|
475
|
+
end
|
476
|
+
previousD = cD
|
477
|
+
|
478
|
+
if t != Token::EOF
|
479
|
+
input.consume()
|
480
|
+
t = input.LA(1)
|
481
|
+
end
|
482
|
+
end
|
483
|
+
end
|
484
|
+
#
|
485
|
+
# Get an existing target state for an edge in the DFA. If the target state
|
486
|
+
# for the edge has not yet been computed or is otherwise not available,
|
487
|
+
# this method returns {@code null}.
|
488
|
+
#
|
489
|
+
# @param previousD The current DFA state
|
490
|
+
# @param t The next input symbol
|
491
|
+
# @return The existing target DFA state for the given input symbol
|
492
|
+
# {@code t}, or {@code null} if the target state for this edge is not
|
493
|
+
# already cached
|
494
|
+
#
|
495
|
+
def getExistingTargetState(previousD, t)
|
496
|
+
edges = previousD.edges
|
497
|
+
if edges.nil? or t + 1 < 0 or t + 1 >= edges.length
|
498
|
+
return nil
|
499
|
+
else
|
500
|
+
return edges[t + 1]
|
501
|
+
end
|
502
|
+
end
|
503
|
+
#
|
504
|
+
# Compute a target state for an edge in the DFA, and attempt to add the
|
505
|
+
# computed state and corresponding edge to the DFA.
|
506
|
+
#
|
507
|
+
# @param dfa The DFA
|
508
|
+
# @param previousD The current DFA state
|
509
|
+
# @param t The next input symbol
|
510
|
+
#
|
511
|
+
# @return The computed target DFA state for the given input symbol
|
512
|
+
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
|
513
|
+
# returns {@link #ERROR}.
|
514
|
+
#
|
515
|
+
def computeTargetState(dfa, previousD, t)
|
516
|
+
reach = self.computeReachSet(previousD.configs, t, false)
|
517
|
+
if reach.nil?
|
518
|
+
self.addDFAEdge(dfa, previousD, t, ATNSimulator::ERROR)
|
519
|
+
return ATNSimulator::ERROR
|
520
|
+
end
|
521
|
+
|
522
|
+
# create new target state; we'll add to DFA after it's complete
|
523
|
+
cD = DFAState.new(nil,reach)
|
524
|
+
|
525
|
+
predictedAlt = self.getUniqueAlt(reach)
|
526
|
+
|
527
|
+
if self.debug
|
528
|
+
altSubSets = PredictionMode.getConflictingAltSubsets(reach)
|
529
|
+
puts "SLL altSubSets=#{altSubSets}, configs=#{reach
|
530
|
+
}, predict=#{predictedAlt
|
531
|
+
}, allSubsetsConflict=#{PredictionMode.allSubsetsConflict(altSubSets)
|
532
|
+
}, conflictingAlts=#{self.getConflictingAlts(reach)}"
|
533
|
+
end
|
534
|
+
if predictedAlt!=ATN::INVALID_ALT_NUMBER
|
535
|
+
# NO CONFLICT, UNIQUELY PREDICTED ALT
|
536
|
+
cD.isAcceptState = true
|
537
|
+
cD.configs.uniqueAlt = predictedAlt
|
538
|
+
cD.prediction = predictedAlt
|
539
|
+
elsif PredictionMode.hasSLLConflictTerminatingPrediction(self.predictionMode, reach)
|
540
|
+
# MORE THAN ONE VIABLE ALTERNATIVE
|
541
|
+
cD.configs.conflictingAlts = self.getConflictingAlts(reach)
|
542
|
+
cD.requiresFullContext = true
|
543
|
+
# in SLL-only mode, we will stop at this state and return the minimum alt
|
544
|
+
cD.isAcceptState = true
|
545
|
+
cD.prediction = cD.configs.conflictingAlts.min
|
546
|
+
end
|
547
|
+
if cD.isAcceptState and cD.configs.hasSemanticContext
|
548
|
+
self.predicateDFAState(cD, self.atn.getDecisionState(dfa.decision))
|
549
|
+
if cD.predicates then
|
550
|
+
cD.prediction = ATN::INVALID_ALT_NUMBER
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# all adds to dfa are done after we've created full D state
|
555
|
+
cD = self.addDFAEdge(dfa, previousD, t, cD)
|
556
|
+
return cD
|
557
|
+
end
|
558
|
+
def predicateDFAState(dfaState, decisionState)
|
559
|
+
# We need to test all predicates, even in DFA states that
|
560
|
+
# uniquely predict alternative.
|
561
|
+
nalts = decisionState.transitions.length
|
562
|
+
# Update DFA so reach becomes accept state with (predicate,alt)
|
563
|
+
# pairs if preds found for conflicting alts
|
564
|
+
altsToCollectPredsFrom = self.getConflictingAltsOrUniqueAlt(dfaState.configs)
|
565
|
+
altToPred = self.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts)
|
566
|
+
if altToPred
|
567
|
+
dfaState.predicates = self.getPredicatePredictions(altsToCollectPredsFrom, altToPred)
|
568
|
+
dfaState.prediction = ATN::INVALID_ALT_NUMBER # make sure we use preds
|
569
|
+
else
|
570
|
+
# There are preds in configs but they might go away
|
571
|
+
# when OR'd together like {p}? || NONE == NONE. If neither
|
572
|
+
# alt has preds, resolve to min alt
|
573
|
+
dfaState.prediction = altsToCollectPredsFrom.min
|
574
|
+
end
|
575
|
+
end
|
576
|
+
# comes back with reach.uniqueAlt set to a valid alt
|
577
|
+
def execATNWithFullContext(dfa, cD, # how far we got before failing over
|
578
|
+
s0, input, startIndex, outerContext)
|
579
|
+
if self.debug or self.debug_list_atn_decisions
|
580
|
+
print "execATNWithFullContext #{s0}"
|
581
|
+
end
|
582
|
+
fullCtx = true
|
583
|
+
foundExactAmbig = false
|
584
|
+
reach = nil
|
585
|
+
previous = s0
|
586
|
+
input.seek(startIndex)
|
587
|
+
t = input.LA(1)
|
588
|
+
predictedAlt = -1
|
589
|
+
while true do
|
590
|
+
reach = self.computeReachSet(previous, t, fullCtx)
|
591
|
+
if reach.nil?
|
592
|
+
# if any configs in previous dipped into outer context, that
|
593
|
+
# means that input up to t actually finished entry rule
|
594
|
+
# at least for LL decision. Full LL doesn't dip into outer
|
595
|
+
# so don't need special case.
|
596
|
+
# We will get an error no matter what so delay until after
|
597
|
+
# decision; better error message. Also, no reachable target
|
598
|
+
# ATN states in SLL implies LL will also get nowhere.
|
599
|
+
# If conflict in states that dip out, choose min since we
|
600
|
+
# will get error no matter what.
|
601
|
+
e = self.noViableAlt(input, outerContext, previous, startIndex)
|
602
|
+
input.seek(startIndex)
|
603
|
+
alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext)
|
604
|
+
if alt!=ATN::INVALID_ALT_NUMBER
|
605
|
+
return alt
|
606
|
+
else
|
607
|
+
raise e
|
608
|
+
end
|
609
|
+
end
|
610
|
+
altSubSets = PredictionMode.getConflictingAltSubsets(reach)
|
611
|
+
if self.debug
|
612
|
+
print "LL altSubSets=#{altSubSets}, predict=#{PredictionMode.getUniqueAlt(altSubSets)
|
613
|
+
}, resolvesToJustOneViableAlt=#{PredictionMode.resolvesToJustOneViableAlt(altSubSets)}"
|
614
|
+
end
|
615
|
+
reach.uniqueAlt = self.getUniqueAlt(reach)
|
616
|
+
# unique prediction?
|
617
|
+
if reach.uniqueAlt!=ATN::INVALID_ALT_NUMBER then
|
618
|
+
predictedAlt = reach.uniqueAlt
|
619
|
+
break
|
620
|
+
elsif self.predictionMode != PredictionMode.LL_EXACT_AMBIG_DETECTION
|
621
|
+
predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets)
|
622
|
+
if predictedAlt != ATN::INVALID_ALT_NUMBER
|
623
|
+
break
|
624
|
+
end
|
625
|
+
else
|
626
|
+
# In exact ambiguity mode, we never try to terminate early.
|
627
|
+
# Just keeps scarfing until we know what the conflict is
|
628
|
+
if PredictionMode.allSubsetsConflict(altSubSets) and PredictionMode.allSubsetsEqual(altSubSets)
|
629
|
+
foundExactAmbig = true
|
630
|
+
predictedAlt = PredictionMode.getSingleViableAlt(altSubSets)
|
631
|
+
break
|
632
|
+
end
|
633
|
+
# else there are multiple non-conflicting subsets or
|
634
|
+
# we're not sure what the ambiguity is yet.
|
635
|
+
# So, keep going.
|
636
|
+
end
|
637
|
+
previous = reach
|
638
|
+
if t != Token::EOF
|
639
|
+
input.consume()
|
640
|
+
t = input.LA(1)
|
641
|
+
end
|
642
|
+
end
|
643
|
+
# If the configuration set uniquely predicts an alternative,
|
644
|
+
# without conflict, then we know that it's a full LL decision
|
645
|
+
# not SLL.
|
646
|
+
if reach.uniqueAlt != ATN::INVALID_ALT_NUMBER
|
647
|
+
self.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index)
|
648
|
+
return predictedAlt
|
649
|
+
end
|
650
|
+
# We do not check predicates here because we have checked them
|
651
|
+
# on-the-fly when doing full context prediction.
|
652
|
+
|
653
|
+
#
|
654
|
+
# In non-exact ambiguity detection mode, we might actually be able to
|
655
|
+
# detect an exact ambiguity, but I'm not going to spend the cycles
|
656
|
+
# needed to check. We only emit ambiguity warnings in exact ambiguity
|
657
|
+
# mode.
|
658
|
+
#
|
659
|
+
# For example, we might know that we have conflicting configurations.
|
660
|
+
# But, that does not mean that there is no way forward without a
|
661
|
+
# conflict. It's possible to have nonconflicting alt subsets as in:
|
662
|
+
|
663
|
+
# altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}]
|
664
|
+
|
665
|
+
# from
|
666
|
+
#
|
667
|
+
# [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]),
|
668
|
+
# (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])]
|
669
|
+
#
|
670
|
+
# In this case, (17,1,[5 $]) indicates there is some next sequence that
|
671
|
+
# would resolve this without conflict to alternative 1. Any other viable
|
672
|
+
# next sequence, however, is associated with a conflict. We stop
|
673
|
+
# looking for input because no amount of further lookahead will alter
|
674
|
+
# the fact that we should predict alternative 1. We just can't say for
|
675
|
+
# sure that there is an ambiguity without looking further.
|
676
|
+
|
677
|
+
self.reportAmbiguity(dfa, cD, startIndex, input.index, foundExactAmbig, nil, reach)
|
678
|
+
|
679
|
+
return predictedAlt
|
680
|
+
end
|
681
|
+
def computeReachSet(closure, t, fullCtx)
|
682
|
+
if self.debug
|
683
|
+
print "in computeReachSet, starting closure: #{closure}"
|
684
|
+
end
|
685
|
+
|
686
|
+
if self.mergeCache.nil?
|
687
|
+
self.mergeCache = Hash.new
|
688
|
+
end
|
689
|
+
|
690
|
+
intermediate = ATNConfigSet.new(fullCtx)
|
691
|
+
|
692
|
+
# Configurations already in a rule stop state indicate reaching the end
|
693
|
+
# of the decision rule (local context) or end of the start rule (full
|
694
|
+
# context). Once reached, these configurations are never updated by a
|
695
|
+
# closure operation, so they are handled separately for the performance
|
696
|
+
# advantage of having a smaller intermediate set when calling closure.
|
697
|
+
#
|
698
|
+
# For full-context reach operations, separate handling is required to
|
699
|
+
# ensure that the alternative matching the longest overall sequence is
|
700
|
+
# chosen when multiple such configurations can match the input.
|
701
|
+
|
702
|
+
skippedStopStates = nil
|
703
|
+
|
704
|
+
# First figure out where we can reach on input t
|
705
|
+
closure.each do |c|
|
706
|
+
if self.debug
|
707
|
+
puts "testing #{self.getTokenName(t)} at #{c}"
|
708
|
+
end
|
709
|
+
|
710
|
+
if c.state.kind_of? RuleStopState then
|
711
|
+
#assert c.context.isEmpty()
|
712
|
+
if fullCtx or t == Token::EOF
|
713
|
+
if skippedStopStates.nil?
|
714
|
+
skippedStopStates = Array.new
|
715
|
+
end
|
716
|
+
skippedStopStates.push(c)
|
717
|
+
end
|
718
|
+
next
|
719
|
+
end
|
720
|
+
#for trans in c.state.transitions do
|
721
|
+
c.state.transitions.each do |trans|
|
722
|
+
target = self.getReachableTarget(trans, t)
|
723
|
+
if target
|
724
|
+
intermediate.add(ATNConfig.createConfigState(c,target), self.mergeCache)
|
725
|
+
end
|
726
|
+
end
|
727
|
+
end
|
728
|
+
# Now figure out where the reach operation can take us...
|
729
|
+
|
730
|
+
reach = nil
|
731
|
+
|
732
|
+
# This block optimizes the reach operation for intermediate sets which
|
733
|
+
# trivially indicate a termination state for the overall
|
734
|
+
# adaptivePredict operation.
|
735
|
+
#
|
736
|
+
# The conditions assume that intermediate
|
737
|
+
# contains all configurations relevant to the reach set, but this
|
738
|
+
# condition is not true when one or more configurations have been
|
739
|
+
# withheld in skippedStopStates.
|
740
|
+
#
|
741
|
+
if skippedStopStates.nil?
|
742
|
+
if intermediate.length==1
|
743
|
+
# Don't pursue the closure if there is just one state.
|
744
|
+
# It can only have one alternative; just add to result
|
745
|
+
# Also don't pursue the closure if there is unique alternative
|
746
|
+
# among the configurations.
|
747
|
+
reach = intermediate
|
748
|
+
elsif self.getUniqueAlt(intermediate)!=ATN::INVALID_ALT_NUMBER
|
749
|
+
# Also don't pursue the closure if there is unique alternative
|
750
|
+
# among the configurations.
|
751
|
+
reach = intermediate
|
752
|
+
end
|
753
|
+
end
|
754
|
+
# If the reach set could not be trivially determined, perform a closure
|
755
|
+
# operation on the intermediate set to compute its initial value.
|
756
|
+
#
|
757
|
+
if reach.nil?
|
758
|
+
reach = ATNConfigSet.new(fullCtx)
|
759
|
+
closureBusy = Set.new()
|
760
|
+
treatEofAsEpsilon = t == Token::EOF
|
761
|
+
intermediate.each {|c|
|
762
|
+
self.closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon)
|
763
|
+
}
|
764
|
+
end
|
765
|
+
if t == Token::EOF
|
766
|
+
# After consuming EOF no additional input is possible, so we are
|
767
|
+
# only interested in configurations which reached the end of the
|
768
|
+
# decision rule (local context) or end of the start rule (full
|
769
|
+
# context). Update reach to contain only these configurations. This
|
770
|
+
# handles both explicit EOF transitions in the grammar and implicit
|
771
|
+
# EOF transitions following the end of the decision or start rule.
|
772
|
+
#
|
773
|
+
# When reach==intermediate, no closure operation was performed. In
|
774
|
+
# this case, removeAllConfigsNotInRuleStopState needs to check for
|
775
|
+
# reachable rule stop states as well as configurations already in
|
776
|
+
# a rule stop state.
|
777
|
+
#
|
778
|
+
# This is handled before the configurations in skippedStopStates,
|
779
|
+
# because any configurations potentially added from that list are
|
780
|
+
# already guaranteed to meet this condition whether or not it's
|
781
|
+
# required.
|
782
|
+
#
|
783
|
+
reach = self.removeAllConfigsNotInRuleStopState(reach, reach.equal?(intermediate))
|
784
|
+
end
|
785
|
+
# If skippedStopStates is not null, then it contains at least one
|
786
|
+
# configuration. For full-context reach operations, these
|
787
|
+
# configurations reached the end of the start rule, in which case we
|
788
|
+
# only add them back to reach if no configuration during the current
|
789
|
+
# closure operation reached such a state. This ensures adaptivePredict
|
790
|
+
# chooses an alternative matching the longest overall sequence when
|
791
|
+
# multiple alternatives are viable.
|
792
|
+
#
|
793
|
+
if skippedStopStates and ( (not fullCtx) or (not PredictionMode.hasConfigInRuleStopState(reach)))
|
794
|
+
#assert len(skippedStopStates)>0
|
795
|
+
skippedStopStates.each {|c| reach.add(c, self.mergeCache) }
|
796
|
+
end
|
797
|
+
if reach.empty?
|
798
|
+
return nil
|
799
|
+
else
|
800
|
+
return reach
|
801
|
+
end
|
802
|
+
end
|
803
|
+
#
|
804
|
+
# Return a configuration set containing only the configurations from
|
805
|
+
# {@code configs} which are in a {@link RuleStopState}. If all
|
806
|
+
# configurations in {@code configs} are already in a rule stop state, this
|
807
|
+
# method simply returns {@code configs}.
|
808
|
+
#
|
809
|
+
# <p>When {@code lookToEndOfRule} is true, this method uses
|
810
|
+
# {@link ATN#nextTokens} for each configuration in {@code configs} which is
|
811
|
+
# not already in a rule stop state to see if a rule stop state is reachable
|
812
|
+
# from the configuration via epsilon-only transitions.</p>
|
813
|
+
#
|
814
|
+
# @param configs the configuration set to update
|
815
|
+
# @param lookToEndOfRule when true, this method checks for rule stop states
|
816
|
+
# reachable by epsilon-only transitions from each configuration in
|
817
|
+
# {@code configs}.
|
818
|
+
#
|
819
|
+
# @return {@code configs} if all configurations in {@code configs} are in a
|
820
|
+
# rule stop state, otherwise return a new configuration set containing only
|
821
|
+
# the configurations from {@code configs} which are in a rule stop state
|
822
|
+
#
|
823
|
+
def removeAllConfigsNotInRuleStopState(configs, lookToEndOfRule)
|
824
|
+
if PredictionMode.allConfigsInRuleStopStates(configs)
|
825
|
+
return configs
|
826
|
+
end
|
827
|
+
result = ATNConfigSet.new(configs.fullCtx)
|
828
|
+
configs.each do |config|
|
829
|
+
if config.state.kind_of? RuleStopState then
|
830
|
+
result.add(config, self.mergeCache)
|
831
|
+
next
|
832
|
+
end
|
833
|
+
if lookToEndOfRule and config.state.epsilonOnlyTransitions
|
834
|
+
nextTokens = self.atn.nextTokens(config.state)
|
835
|
+
if nextTokens.member? Token::EPSILON then
|
836
|
+
endOfRuleState = self.atn.ruleToStopState[config.state.ruleIndex]
|
837
|
+
result.add(ATNConfig.new(endOfRuleState, nil, nil, nil, config), self.mergeCache)
|
838
|
+
end
|
839
|
+
end
|
840
|
+
end
|
841
|
+
return result
|
842
|
+
end
|
843
|
+
def computeStartState(p, ctx, fullCtx)
|
844
|
+
type_check(p, ATNState)
|
845
|
+
type_check(ctx, RuleContext)
|
846
|
+
|
847
|
+
# always at least the implicit call to start rule
|
848
|
+
initialContext = PredictionContextFromRuleContext(self.atn, ctx)
|
849
|
+
configs = ATNConfigSet.new(fullCtx)
|
850
|
+
|
851
|
+
p.transitions.each_index do |i|
|
852
|
+
target = p.transitions[i].target
|
853
|
+
c = ATNConfig.new(target, i+1, initialContext)
|
854
|
+
closureBusy = Set.new
|
855
|
+
self.closure(c, configs, closureBusy, true, fullCtx, false)
|
856
|
+
end
|
857
|
+
return configs
|
858
|
+
end
|
859
|
+
#
|
860
|
+
# This method transforms the start state computed by
|
861
|
+
# {@link #computeStartState} to the special start state used by a
|
862
|
+
# precedence DFA for a particular precedence value. The transformation
|
863
|
+
# process applies the following changes to the start state's configuration
|
864
|
+
# set.
|
865
|
+
#
|
866
|
+
# <ol>
|
867
|
+
# <li>Evaluate the precedence predicates for each configuration using
|
868
|
+
# {@link SemanticContext#evalPrecedence}.</li>
|
869
|
+
# <li>Remove all configurations which predict an alternative greater than
|
870
|
+
# 1, for which another configuration that predicts alternative 1 is in the
|
871
|
+
# same ATN state with the same prediction context. This transformation is
|
872
|
+
# valid for the following reasons:
|
873
|
+
# <ul>
|
874
|
+
# <li>The closure block cannot contain any epsilon transitions which bypass
|
875
|
+
# the body of the closure, so all states reachable via alternative 1 are
|
876
|
+
# part of the precedence alternatives of the transformed left-recursive
|
877
|
+
# rule.</li>
|
878
|
+
# <li>The "primary" portion of a left recursive rule cannot contain an
|
879
|
+
# epsilon transition, so the only way an alternative other than 1 can exist
|
880
|
+
# in a state that is also reachable via alternative 1 is by nesting calls
|
881
|
+
# to the left-recursive rule, with the outer calls not being at the
|
882
|
+
# preferred precedence level.</li>
|
883
|
+
# </ul>
|
884
|
+
# </li>
|
885
|
+
# </ol>
|
886
|
+
#
|
887
|
+
# <p>
|
888
|
+
# The prediction context must be considered by this filter to address
|
889
|
+
# situations like the following.
|
890
|
+
# </p>
|
891
|
+
# <code>
|
892
|
+
# <pre>
|
893
|
+
# grammar TA;
|
894
|
+
# prog: statement* EOF;
|
895
|
+
# statement: letterA | statement letterA 'b' ;
|
896
|
+
# letterA: 'a';
|
897
|
+
# </pre>
|
898
|
+
# </code>
|
899
|
+
# <p>
|
900
|
+
# If the above grammar, the ATN state immediately before the token
|
901
|
+
# reference {@code 'a'} in {@code letterA} is reachable from the left edge
|
902
|
+
# of both the primary and closure blocks of the left-recursive rule
|
903
|
+
# {@code statement}. The prediction context associated with each of these
|
904
|
+
# configurations distinguishes between them, and prevents the alternative
|
905
|
+
# which stepped out to {@code prog} (and then back in to {@code statement}
|
906
|
+
# from being eliminated by the filter.
|
907
|
+
# </p>
|
908
|
+
#
|
909
|
+
# @param configs The configuration set computed by
|
910
|
+
# {@link #computeStartState} as the start state for the DFA.
|
911
|
+
# @return The transformed configuration set representing the start state
|
912
|
+
# for a precedence DFA at a particular precedence level (determined by
|
913
|
+
# calling {@link Parser#getPrecedence}).
|
914
|
+
#
|
915
|
+
def applyPrecedenceFilter(configs)
|
916
|
+
statesFromAlt1 = Hash.new
|
917
|
+
configSet = ATNConfigSet.new(configs.fullCtx)
|
918
|
+
configs.each do |config|
|
919
|
+
# handle alt 1 first
|
920
|
+
next if config.alt != 1
|
921
|
+
|
922
|
+
updatedContext = config.semanticContext.evalPrecedence(self.parser, self.outerContext)
|
923
|
+
next if updatedContext.nil? # the configuration was eliminated
|
924
|
+
|
925
|
+
statesFromAlt1[config.state.stateNumber] = config.context
|
926
|
+
if updatedContext != config.semanticContext
|
927
|
+
configSet.add(ATNConfig.new(nil,nil,nil, updatedContext, config), self.mergeCache)
|
928
|
+
else
|
929
|
+
configSet.add(config, self.mergeCache)
|
930
|
+
end
|
931
|
+
end
|
932
|
+
configs.each do |config|
|
933
|
+
next if config.alt == 1 # already handled
|
934
|
+
|
935
|
+
# In the future, this elimination step could be updated to also
|
936
|
+
# filter the prediction context for alternatives predicting alt>1
|
937
|
+
# (basically a graph subtraction algorithm).
|
938
|
+
#
|
939
|
+
context = statesFromAlt1[config.state.stateNumber]
|
940
|
+
next if context==config.context # eliminated
|
941
|
+
|
942
|
+
configSet.add(config, self.mergeCache)
|
943
|
+
end
|
944
|
+
return configSet
|
945
|
+
end
|
946
|
+
def getReachableTarget(trans, ttype)
|
947
|
+
if trans.matches(ttype, 0, self.atn.maxTokenType)
|
948
|
+
return trans.target
|
949
|
+
else
|
950
|
+
return nil
|
951
|
+
end
|
952
|
+
end
|
953
|
+
|
954
|
+
def getPredsForAmbigAlts(ambigAlts, configs, nalts)
|
955
|
+
# REACH=[1|1|[]|0:0, 1|2|[]|0:1]
|
956
|
+
# altToPred starts as an array of all null contexts. The entry at index i
|
957
|
+
# corresponds to alternative i. altToPred[i] may have one of three values:
|
958
|
+
# 1. null: no ATNConfig c is found such that c.alt==i
|
959
|
+
# 2. SemanticContext.NONE: At least one ATNConfig c exists such that
|
960
|
+
# c.alt==i and c.semanticContext==SemanticContext.NONE. In other words,
|
961
|
+
# alt i has at least one unpredicated config.
|
962
|
+
# 3. Non-NONE Semantic Context: There exists at least one, and for all
|
963
|
+
# ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE.
|
964
|
+
#
|
965
|
+
# From this, it is clear that NONE||anything==NONE.
|
966
|
+
#
|
967
|
+
altToPred = Array.new(nalts + 1)
|
968
|
+
configs.each do |c|
|
969
|
+
if ambigAlts.member? c.alt
|
970
|
+
altToPred[c.alt] = SemanticContext.orContext(altToPred[c.alt], c.semanticContext)
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
nPredAlts = 0
|
975
|
+
for i in 1..nalts do
|
976
|
+
if altToPred[i].nil?
|
977
|
+
altToPred[i] = SemanticContext.NONE
|
978
|
+
elsif ! altToPred[i].equal? SemanticContext.NONE
|
979
|
+
nPredAlts = nPredAlts + 1
|
980
|
+
end
|
981
|
+
end
|
982
|
+
# nonambig alts are null in altToPred
|
983
|
+
if nPredAlts==0
|
984
|
+
altToPred = nil
|
985
|
+
end
|
986
|
+
if self.debug
|
987
|
+
puts "getPredsForAmbigAlts result #{altToPred}"
|
988
|
+
end
|
989
|
+
return altToPred
|
990
|
+
end
|
991
|
+
def getPredicatePredictions(ambigAlts, altToPred)
|
992
|
+
pairs = Array.new
|
993
|
+
containsPredicate = false
|
994
|
+
|
995
|
+
altToPred.each_index do |i|
|
996
|
+
pred = altToPred[i]
|
997
|
+
# unpredicated is indicated by SemanticContext.NONE
|
998
|
+
# assert pred is not None
|
999
|
+
if ambigAlts and ambigAlts.member? i
|
1000
|
+
pairs.push(PredPrediction.new(pred, i))
|
1001
|
+
end
|
1002
|
+
if ! pred.equal?(SemanticContext.NONE) then
|
1003
|
+
containsPredicate = true
|
1004
|
+
end
|
1005
|
+
end
|
1006
|
+
if not containsPredicate
|
1007
|
+
return nil
|
1008
|
+
end
|
1009
|
+
return pairs
|
1010
|
+
end
|
1011
|
+
#
|
1012
|
+
# This method is used to improve the localization of error messages by
|
1013
|
+
# choosing an alternative rather than throwing a
|
1014
|
+
# {@link NoViableAltException} in particular prediction scenarios where the
|
1015
|
+
# {@link #ERROR} state was reached during ATN simulation.
|
1016
|
+
#
|
1017
|
+
# <p>
|
1018
|
+
# The default implementation of this method uses the following
|
1019
|
+
# algorithm to identify an ATN configuration which successfully parsed the
|
1020
|
+
# decision entry rule. Choosing such an alternative ensures that the
|
1021
|
+
# {@link ParserRuleContext} returned by the calling rule will be complete
|
1022
|
+
# and valid, and the syntax error will be reported later at a more
|
1023
|
+
# localized location.</p>
|
1024
|
+
#
|
1025
|
+
# <ul>
|
1026
|
+
# <li>If a syntactically valid path or paths reach the end of the decision rule and
|
1027
|
+
# they are semantically valid if predicated, return the min associated alt.</li>
|
1028
|
+
# <li>Else, if a semantically invalid but syntactically valid path exist
|
1029
|
+
# or paths exist, return the minimum associated alt.
|
1030
|
+
# </li>
|
1031
|
+
# <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li>
|
1032
|
+
# </ul>
|
1033
|
+
#
|
1034
|
+
# <p>
|
1035
|
+
# In some scenarios, the algorithm described above could predict an
|
1036
|
+
# alternative which will result in a {@link FailedPredicateException} in
|
1037
|
+
# the parser. Specifically, this could occur if the <em>only</em> configuration
|
1038
|
+
# capable of successfully parsing to the end of the decision rule is
|
1039
|
+
# blocked by a semantic predicate. By choosing this alternative within
|
1040
|
+
# {@link #adaptivePredict} instead of throwing a
|
1041
|
+
# {@link NoViableAltException}, the resulting
|
1042
|
+
# {@link FailedPredicateException} in the parser will identify the specific
|
1043
|
+
# predicate which is preventing the parser from successfully parsing the
|
1044
|
+
# decision rule, which helps developers identify and correct logic errors
|
1045
|
+
# in semantic predicates.
|
1046
|
+
# </p>
|
1047
|
+
#
|
1048
|
+
# @param configs The ATN configurations which were valid immediately before
|
1049
|
+
# the {@link #ERROR} state was reached
|
1050
|
+
# @param outerContext The is the \gamma_0 initial parser context from the paper
|
1051
|
+
# or the parser stack at the instant before prediction commences.
|
1052
|
+
#
|
1053
|
+
# @return The value to return from {@link #adaptivePredict}, or
|
1054
|
+
# {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not
|
1055
|
+
# identified and {@link #adaptivePredict} should report an error instead.
|
1056
|
+
#
|
1057
|
+
def getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs, outerContext)
|
1058
|
+
semValidConfigs, semInvalidConfigs = self.splitAccordingToSemanticValidity(configs, outerContext)
|
1059
|
+
alt = self.getAltThatFinishedDecisionEntryRule(semValidConfigs)
|
1060
|
+
if alt!=ATN::INVALID_ALT_NUMBER # semantically/syntactically viable path exists
|
1061
|
+
return alt
|
1062
|
+
end
|
1063
|
+
# Is there a syntactically valid path with a failed pred?
|
1064
|
+
if semInvalidConfigs.length>0
|
1065
|
+
alt = self.getAltThatFinishedDecisionEntryRule(semInvalidConfigs)
|
1066
|
+
if alt!=ATN::INVALID_ALT_NUMBER # syntactically viable path exists
|
1067
|
+
return alt
|
1068
|
+
end
|
1069
|
+
end
|
1070
|
+
return ATN::INVALID_ALT_NUMBER
|
1071
|
+
end
|
1072
|
+
def getAltThatFinishedDecisionEntryRule(configs)
|
1073
|
+
alts = Set.new()
|
1074
|
+
configs.each do |c|
|
1075
|
+
if c.reachesIntoOuterContext>0 or (c.state.kind_of? RuleStopState and c.context.hasEmptyPath() )
|
1076
|
+
alts.add(c.alt)
|
1077
|
+
end
|
1078
|
+
end
|
1079
|
+
if alts.empty?
|
1080
|
+
return ATN::INVALID_ALT_NUMBER
|
1081
|
+
else
|
1082
|
+
return alts.min
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
# Walk the list of configurations and split them according to
|
1086
|
+
# those that have preds evaluating to true/false. If no pred, assume
|
1087
|
+
# true pred and include in succeeded set. Returns Pair of sets.
|
1088
|
+
#
|
1089
|
+
# Create a new set so as not to alter the incoming parameter.
|
1090
|
+
#
|
1091
|
+
# Assumption: the input stream has been restored to the starting point
|
1092
|
+
# prediction, which is where predicates need to evaluate.
|
1093
|
+
#
|
1094
|
+
def splitAccordingToSemanticValidity(configs, outerContext)
|
1095
|
+
succeeded = ATNConfigSet.new(configs.fullCtx)
|
1096
|
+
failed = ATNConfigSet.new(configs.fullCtx)
|
1097
|
+
configs.each do |c|
|
1098
|
+
if ! c.semanticContext.equal? SemanticContext.NONE
|
1099
|
+
predicateEvaluationResult = c.semanticContext.eval(self.parser, outerContext)
|
1100
|
+
if predicateEvaluationResult
|
1101
|
+
succeeded.add(c)
|
1102
|
+
else
|
1103
|
+
failed.add(c)
|
1104
|
+
end
|
1105
|
+
else
|
1106
|
+
succeeded.add(c)
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
return [succeeded,failed]
|
1110
|
+
end
|
1111
|
+
# Look through a list of predicate/alt pairs, returning alts for the
|
1112
|
+
# pairs that win. A {@code NONE} predicate indicates an alt containing an
|
1113
|
+
# unpredicated config which behaves as "always true." If !complete
|
1114
|
+
# then we stop at the first predicate that evaluates to true. This
|
1115
|
+
# includes pairs with null predicates.
|
1116
|
+
#
|
1117
|
+
def evalSemanticContext( predPredictions, outerContext, complete)
|
1118
|
+
predictions = Set.new()
|
1119
|
+
|
1120
|
+
predPredictions.each do |pair|
|
1121
|
+
if pair.pred.equal? SemanticContext.NONE
|
1122
|
+
predictions.add(pair.alt)
|
1123
|
+
break if not complete
|
1124
|
+
next
|
1125
|
+
end
|
1126
|
+
predicateEvaluationResult = pair.pred.eval(self.parser, outerContext)
|
1127
|
+
if self.debug or self.dfa_debug
|
1128
|
+
puts "eval pred #{pair}=#{predicateEvaluationResult}"
|
1129
|
+
end
|
1130
|
+
if predicateEvaluationResult
|
1131
|
+
if self.debug or self.dfa_debug
|
1132
|
+
puts "PREDICT #{pair.alt}"
|
1133
|
+
end
|
1134
|
+
predictions.add(pair.alt)
|
1135
|
+
break if not complete
|
1136
|
+
end
|
1137
|
+
end
|
1138
|
+
return predictions
|
1139
|
+
end
|
1140
|
+
# TODO: If we are doing predicates, there is no point in pursuing
|
1141
|
+
# closure operations if we reach a DFA state that uniquely predicts
|
1142
|
+
# alternative. We will not be caching that DFA state and it is a
|
1143
|
+
# waste to pursue the closure. Might have to advance when we do
|
1144
|
+
# ambig detection thought :(
|
1145
|
+
#
|
1146
|
+
|
1147
|
+
def closure(config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon)
|
1148
|
+
initialDepth = 0
|
1149
|
+
self.closureCheckingStopState(config, configs, closureBusy, collectPredicates,
|
1150
|
+
fullCtx, initialDepth, treatEofAsEpsilon)
|
1151
|
+
#assert not fullCtx or not configs.dipsIntoOuterContext
|
1152
|
+
end
|
1153
|
+
|
1154
|
+
|
1155
|
+
def closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
|
1156
|
+
if self.debug
|
1157
|
+
puts "closure(#{config.toString(self.parser,true)})"
|
1158
|
+
end
|
1159
|
+
|
1160
|
+
if config.state.kind_of? RuleStopState then
|
1161
|
+
# We hit rule end. If we have context info, use it
|
1162
|
+
# run thru all possible stack tops in ctx
|
1163
|
+
if not config.context.isEmpty() then
|
1164
|
+
# for i in range(0, len(config.context)):
|
1165
|
+
0.upto(config.context.length - 1).each do |i|
|
1166
|
+
if config.context.getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE
|
1167
|
+
if fullCtx
|
1168
|
+
configs.add(ATNConfig.new(config.state,nil,PredictionContext.EMPTY,nil,config), self.mergeCache)
|
1169
|
+
next
|
1170
|
+
else
|
1171
|
+
# we have no context info, just chase follow links (if greedy)
|
1172
|
+
if self.debug
|
1173
|
+
puts "FALLING off rule " + self.getRuleName(config.state.ruleIndex)
|
1174
|
+
end
|
1175
|
+
self.closure_(config, configs, closureBusy, collectPredicates,
|
1176
|
+
fullCtx, depth, treatEofAsEpsilon)
|
1177
|
+
end
|
1178
|
+
next
|
1179
|
+
end
|
1180
|
+
returnState = self.atn.states[config.context.getReturnState(i)]
|
1181
|
+
newContext = config.context.getParent(i) # "pop" return state
|
1182
|
+
c = ATNConfig.new(returnState, config.alt, newContext, config.semanticContext)
|
1183
|
+
# While we have context to pop back from, we may have
|
1184
|
+
# gotten that context AFTER having falling off a rule.
|
1185
|
+
# Make sure we track that we are now out of context.
|
1186
|
+
c.reachesIntoOuterContext = config.reachesIntoOuterContext
|
1187
|
+
# assert depth > - 2**63
|
1188
|
+
self.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon)
|
1189
|
+
end
|
1190
|
+
return
|
1191
|
+
elsif fullCtx
|
1192
|
+
# reached end of start rule
|
1193
|
+
configs.add(config, self.mergeCache)
|
1194
|
+
return
|
1195
|
+
else
|
1196
|
+
# else if we have no context info, just chase follow links (if greedy)
|
1197
|
+
if self.debug
|
1198
|
+
puts "FALLING off rule #{self.getRuleName(config.state.ruleIndex)}"
|
1199
|
+
end
|
1200
|
+
end
|
1201
|
+
end
|
1202
|
+
self.closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
|
1203
|
+
end
|
1204
|
+
# Do the actual work of walking epsilon edges#
|
1205
|
+
def closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
|
1206
|
+
p = config.state
|
1207
|
+
# optimization
|
1208
|
+
if not p.epsilonOnlyTransitions
|
1209
|
+
configs.add(config, self.mergeCache)
|
1210
|
+
# make sure to not return here, because EOF transitions can act as
|
1211
|
+
# both epsilon transitions and non-epsilon transitions.
|
1212
|
+
end
|
1213
|
+
p.transitions.each do |t|
|
1214
|
+
continueCollecting = collectPredicates and not t.kind_of? ActionTransition
|
1215
|
+
c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
|
1216
|
+
if c
|
1217
|
+
newDepth = depth
|
1218
|
+
if config.state.kind_of? RuleStopState
|
1219
|
+
#assert not fullCtx
|
1220
|
+
# target fell off end of rule; mark resulting c as having dipped into outer context
|
1221
|
+
# We can't get here if incoming config was rule stop and we had context
|
1222
|
+
# track how far we dip into outer context. Might
|
1223
|
+
# come in handy and we avoid evaluating context dependent
|
1224
|
+
# preds if this is > 0.
|
1225
|
+
if closureBusy.member? c
|
1226
|
+
# avoid infinite recursion for right-recursive rules
|
1227
|
+
next
|
1228
|
+
end
|
1229
|
+
closureBusy.add(c)
|
1230
|
+
|
1231
|
+
# if @_dfa && @_dfa.isPrecedenceDfa() then
|
1232
|
+
# outermostPrecedenceReturn = t.outermostPrecedenceReturn()
|
1233
|
+
# if outermostPrecedenceReturn == @_dfa.atnStartState.ruleIndex then
|
1234
|
+
# c.setPrecedenceFilterSuppressed(true)
|
1235
|
+
# end
|
1236
|
+
# end
|
1237
|
+
# if (_dfa != null && _dfa.isPrecedenceDfa()) {
|
1238
|
+
# int outermostPrecedenceReturn = ((EpsilonTransition)t).outermostPrecedenceReturn();
|
1239
|
+
# if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) {
|
1240
|
+
# c.setPrecedenceFilterSuppressed(true);
|
1241
|
+
# }
|
1242
|
+
# }
|
1243
|
+
|
1244
|
+
|
1245
|
+
|
1246
|
+
c.reachesIntoOuterContext =c.reachesIntoOuterContext + 1
|
1247
|
+
configs.dipsIntoOuterContext = true # TODO: can remove? only care when we add to set per middle of this method
|
1248
|
+
# !assert newDepth > - 2**63
|
1249
|
+
newDepth = newDepth - 1
|
1250
|
+
puts "dips into outer ctx: #{c}" if self.debug
|
1251
|
+
elsif t.kind_of? RuleTransition
|
1252
|
+
# latch when newDepth goes negative - once we step out of the entry context we can't return
|
1253
|
+
if newDepth >= 0
|
1254
|
+
newDepth =newDepth + 1
|
1255
|
+
end
|
1256
|
+
end
|
1257
|
+
|
1258
|
+
self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
|
1259
|
+
end
|
1260
|
+
end
|
1261
|
+
end
|
1262
|
+
|
1263
|
+
def getRuleName(index)
|
1264
|
+
if self.parser and index>=0
|
1265
|
+
return self.parser.ruleNames[index]
|
1266
|
+
else
|
1267
|
+
return "<rule #{index}>"
|
1268
|
+
end
|
1269
|
+
end
|
1270
|
+
|
1271
|
+
def getEpsilonTarget(config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon)
|
1272
|
+
tt = t.serializationType
|
1273
|
+
case tt
|
1274
|
+
when Transition::RULE
|
1275
|
+
return self.ruleTransition(config, t)
|
1276
|
+
when Transition::PRECEDENCE
|
1277
|
+
return self.precedenceTransition(config, t, collectPredicates, inContext, fullCtx)
|
1278
|
+
when Transition::PREDICATE
|
1279
|
+
return self.predTransition(config, t, collectPredicates, inContext, fullCtx)
|
1280
|
+
when Transition::ACTION
|
1281
|
+
return self.actionTransition(config, t)
|
1282
|
+
when Transition::EPSILON
|
1283
|
+
return ATNConfig.new(t.target,nil,nil,nil, config)
|
1284
|
+
else
|
1285
|
+
if [ Transition::ATOM, Transition::RANGE, Transition::SET ].member?(tt) then
|
1286
|
+
# EOF transitions act like epsilon transitions after the first EOF
|
1287
|
+
# transition is traversed
|
1288
|
+
# if treatEofAsEpsilon then
|
1289
|
+
# if t.matches(Token::EOF, 0, 1) then
|
1290
|
+
# return ATNConfig.createConfigState(config, t.target)
|
1291
|
+
# end
|
1292
|
+
# end
|
1293
|
+
if treatEofAsEpsilon and t.matches(Token::EOF, 0, 1) then
|
1294
|
+
return ATNConfig.createConfigState(config, t.target)
|
1295
|
+
end
|
1296
|
+
end
|
1297
|
+
return nil
|
1298
|
+
end
|
1299
|
+
end
|
1300
|
+
def actionTransition(config, t)
|
1301
|
+
if self.debug
|
1302
|
+
puts "ACTION edge #{t.ruleIndex}:#{t.actionIndex}"
|
1303
|
+
end
|
1304
|
+
return ATNConfig.new(t.target,nil,nil,nil, config)
|
1305
|
+
end
|
1306
|
+
def precedenceTransition(config, pt, collectPredicates, inContext, fullCtx)
|
1307
|
+
if self.debug
|
1308
|
+
puts "PRED (collectPredicates=#{collectPredicates}) #{pt.precedence}>=_p, ctx dependent=true"
|
1309
|
+
if self.parser
|
1310
|
+
puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
|
1311
|
+
end
|
1312
|
+
end
|
1313
|
+
c = nil
|
1314
|
+
if collectPredicates and inContext
|
1315
|
+
if fullCtx
|
1316
|
+
# In full context mode, we can evaluate predicates on-the-fly
|
1317
|
+
# during closure, which dramatically reduces the size of
|
1318
|
+
# the config sets. It also obviates the need to test predicates
|
1319
|
+
# later during conflict resolution.
|
1320
|
+
currentPosition = self.input.index
|
1321
|
+
self.input.seek(self.startIndex)
|
1322
|
+
predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
|
1323
|
+
self.input.seek(currentPosition)
|
1324
|
+
if predSucceeds
|
1325
|
+
c = ATNConfig.new(pt.target,nil,nil,nil,config) # no pred context
|
1326
|
+
end
|
1327
|
+
else
|
1328
|
+
newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
|
1329
|
+
c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
|
1330
|
+
end
|
1331
|
+
else
|
1332
|
+
c = ATNConfig.new(pt.target,nil,nil,nil,config)
|
1333
|
+
end
|
1334
|
+
|
1335
|
+
if self.debug
|
1336
|
+
puts "config from pred transition=#{c}"
|
1337
|
+
end
|
1338
|
+
return c
|
1339
|
+
end
|
1340
|
+
def predTransition(config, pt, collectPredicates, inContext, fullCtx)
|
1341
|
+
if self.debug
|
1342
|
+
puts "PRED (collectPredicates=#{collectPredicates}) #{pt.ruleIndex}:#{pt.predIndex}, ctx dependent=#{pt.isCtxDependent}"
|
1343
|
+
if self.parser
|
1344
|
+
puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
|
1345
|
+
end
|
1346
|
+
end
|
1347
|
+
c = nil
|
1348
|
+
if collectPredicates and (not pt.isCtxDependent or (pt.isCtxDependent and inContext))
|
1349
|
+
if fullCtx
|
1350
|
+
# In full context mode, we can evaluate predicates on-the-fly
|
1351
|
+
# during closure, which dramatically reduces the size of
|
1352
|
+
# the config sets. It also obviates the need to test predicates
|
1353
|
+
# later during conflict resolution.
|
1354
|
+
currentPosition = self.input.index
|
1355
|
+
self.input.seek(self.startIndex)
|
1356
|
+
predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
|
1357
|
+
self.input.seek(currentPosition)
|
1358
|
+
if predSucceeds
|
1359
|
+
c = ATNConfig.new(pt.target,nil,nil,nil, config) # no pred context
|
1360
|
+
end
|
1361
|
+
else
|
1362
|
+
newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
|
1363
|
+
c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
|
1364
|
+
end
|
1365
|
+
else
|
1366
|
+
c = ATNConfig.new(pt.target, nil,nil,nil,config)
|
1367
|
+
end
|
1368
|
+
|
1369
|
+
if self.debug
|
1370
|
+
puts "config from pred transition=#{c}"
|
1371
|
+
end
|
1372
|
+
return c
|
1373
|
+
end
|
1374
|
+
def ruleTransition(config, t)
|
1375
|
+
if self.debug
|
1376
|
+
puts "CALL rule #{self.getRuleName(t.target.ruleIndex) }, ctx=#{config.context}"
|
1377
|
+
end
|
1378
|
+
returnState = t.followState
|
1379
|
+
newContext = SingletonPredictionContext.create(config.context, returnState.stateNumber)
|
1380
|
+
return ATNConfig.new(t.target, nil,newContext, nil,config )
|
1381
|
+
end
|
1382
|
+
def getConflictingAlts(configs)
|
1383
|
+
altsets = PredictionMode.getConflictingAltSubsets(configs)
|
1384
|
+
return PredictionMode.getAlts(altsets)
|
1385
|
+
end
|
1386
|
+
# Sam pointed out a problem with the previous definition, v3, of
|
1387
|
+
# ambiguous states. If we have another state associated with conflicting
|
1388
|
+
# alternatives, we should keep going. For example, the following grammar
|
1389
|
+
#
|
1390
|
+
# s : (ID | ID ID?) ';' ;
|
1391
|
+
#
|
1392
|
+
# When the ATN simulation reaches the state before ';', it has a DFA
|
1393
|
+
# state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally
|
1394
|
+
# 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node
|
1395
|
+
# because alternative to has another way to continue, via [6|2|[]].
|
1396
|
+
# The key is that we have a single state that has config's only associated
|
1397
|
+
# with a single alternative, 2, and crucially the state transitions
|
1398
|
+
# among the configurations are all non-epsilon transitions. That means
|
1399
|
+
# we don't consider any conflicts that include alternative 2. So, we
|
1400
|
+
# ignore the conflict between alts 1 and 2. We ignore a set of
|
1401
|
+
# conflicting alts when there is an intersection with an alternative
|
1402
|
+
# associated with a single alt state in the state→config-list map.
|
1403
|
+
#
|
1404
|
+
# It's also the case that we might have two conflicting configurations but
|
1405
|
+
# also a 3rd nonconflicting configuration for a different alternative:
|
1406
|
+
# [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar:
|
1407
|
+
#
|
1408
|
+
# a : A | A | A B ;
|
1409
|
+
#
|
1410
|
+
# After matching input A, we reach the stop state for rule A, state 1.
|
1411
|
+
# State 8 is the state right before B. Clearly alternatives 1 and 2
|
1412
|
+
# conflict and no amount of further lookahead will separate the two.
|
1413
|
+
# However, alternative 3 will be able to continue and so we do not
|
1414
|
+
# stop working on this state. In the previous example, we're concerned
|
1415
|
+
# with states associated with the conflicting alternatives. Here alt
|
1416
|
+
# 3 is not associated with the conflicting configs, but since we can continue
|
1417
|
+
# looking for input reasonably, I don't declare the state done. We
|
1418
|
+
# ignore a set of conflicting alts when we have an alternative
|
1419
|
+
# that we still need to pursue.
|
1420
|
+
#
|
1421
|
+
|
1422
|
+
def getConflictingAltsOrUniqueAlt(configs)
|
1423
|
+
conflictingAlts = nil
|
1424
|
+
if configs.uniqueAlt!= ATN::INVALID_ALT_NUMBER
|
1425
|
+
conflictingAlts = Set.new()
|
1426
|
+
conflictingAlts.add(configs.uniqueAlt)
|
1427
|
+
else
|
1428
|
+
conflictingAlts = configs.conflictingAlts
|
1429
|
+
end
|
1430
|
+
return conflictingAlts
|
1431
|
+
end
|
1432
|
+
def getTokenName(t)
|
1433
|
+
if t==Token::EOF
|
1434
|
+
return "EOF"
|
1435
|
+
end
|
1436
|
+
if self.parser and self.parser.tokenNames then
|
1437
|
+
if t >= self.parser.tokenNames.length() then
|
1438
|
+
puts "#{t} ttype out of range: #{self.parser.tokenNames}"
|
1439
|
+
puts self.parser.getInputStream().getTokens().to_s
|
1440
|
+
else
|
1441
|
+
return self.parser.tokenNames[t] + "<#{t}>"
|
1442
|
+
end
|
1443
|
+
end
|
1444
|
+
return t.to_s
|
1445
|
+
end
|
1446
|
+
def getLookaheadName(input)
|
1447
|
+
return getTokenName(input.LA(1))
|
1448
|
+
end
|
1449
|
+
# Used for debugging in adaptivePredict around execATN but I cut
|
1450
|
+
# it out for clarity now that alg. works well. We can leave this
|
1451
|
+
# "dead" code for a bit.
|
1452
|
+
#
|
1453
|
+
def dumpDeadEndConfigs(nvae)
|
1454
|
+
print "dead end configs: "
|
1455
|
+
nvae.getDeadEndConfigs().each do |c|
|
1456
|
+
trans = "no edges"
|
1457
|
+
if c.state.transitions.length>0 then
|
1458
|
+
t = c.state.transitions[0]
|
1459
|
+
if t.kind_of? AtomTransition then
|
1460
|
+
trans = "Atom #{self.getTokenName(t.label)}"
|
1461
|
+
elsif t.kind_of? SetTransition then
|
1462
|
+
#trans = ("~" if neg else "")+"Set "+ str(t.set)
|
1463
|
+
if t.kind_of? NotSetTransition then
|
1464
|
+
neg = "~"
|
1465
|
+
else
|
1466
|
+
neg = ""
|
1467
|
+
end
|
1468
|
+
trans = "#{neg}Set #{t.set}"
|
1469
|
+
end
|
1470
|
+
end
|
1471
|
+
# STDERR.puts "#{c.toString(self.parser, true)}:#{trans}"
|
1472
|
+
end
|
1473
|
+
end
|
1474
|
+
def noViableAlt(input, outerContext, configs, startIndex)
|
1475
|
+
return NoViableAltException.new(self.parser, input, input.get(startIndex), input.LT(1), configs, outerContext)
|
1476
|
+
end
|
1477
|
+
|
1478
|
+
def getUniqueAlt(configs)
|
1479
|
+
alt = ATN::INVALID_ALT_NUMBER
|
1480
|
+
configs.each do |c|
|
1481
|
+
if alt == ATN::INVALID_ALT_NUMBER
|
1482
|
+
alt = c.alt # found first alt
|
1483
|
+
elsif c.alt!=alt
|
1484
|
+
return ATN::INVALID_ALT_NUMBER
|
1485
|
+
end
|
1486
|
+
end
|
1487
|
+
return alt
|
1488
|
+
end
|
1489
|
+
#
|
1490
|
+
# Add an edge to the DFA, if possible. This method calls
|
1491
|
+
# {@link #addDFAState} to ensure the {@code to} state is present in the
|
1492
|
+
# DFA. If {@code from} is {@code null}, or if {@code t} is outside the
|
1493
|
+
# range of edges that can be represented in the DFA tables, this method
|
1494
|
+
# returns without adding the edge to the DFA.
|
1495
|
+
#
|
1496
|
+
# <p>If {@code to} is {@code null}, this method returns {@code null}.
|
1497
|
+
# Otherwise, this method returns the {@link DFAState} returned by calling
|
1498
|
+
# {@link #addDFAState} for the {@code to} state.</p>
|
1499
|
+
#
|
1500
|
+
# @param dfa The DFA
|
1501
|
+
# @param from The source state for the edge
|
1502
|
+
# @param t The input symbol
|
1503
|
+
# @param to The target state for the edge
|
1504
|
+
#
|
1505
|
+
# @return If {@code to} is {@code null}, this method returns {@code null};
|
1506
|
+
# otherwise this method returns the result of calling {@link #addDFAState}
|
1507
|
+
# on {@code to}
|
1508
|
+
#
|
1509
|
+
def addDFAEdge(dfa, from_, t, to)
|
1510
|
+
if self.debug
|
1511
|
+
puts "EDGE #{from_} -> #{to} upon #{self.getTokenName(t)}"
|
1512
|
+
end
|
1513
|
+
|
1514
|
+
if to.nil?
|
1515
|
+
return nil
|
1516
|
+
end
|
1517
|
+
|
1518
|
+
to = self.addDFAState(dfa, to) # used existing if possible not incoming
|
1519
|
+
if from_.nil? or t < -1 or t > self.atn.maxTokenType
|
1520
|
+
return to
|
1521
|
+
end
|
1522
|
+
|
1523
|
+
if from_.edges.nil? then
|
1524
|
+
from_.edges = Array.new(self.atn.maxTokenType + 2)
|
1525
|
+
end
|
1526
|
+
from_.edges[t+1] = to # connect
|
1527
|
+
|
1528
|
+
if self.debug
|
1529
|
+
if self.parser.nil?
|
1530
|
+
names = nil
|
1531
|
+
else
|
1532
|
+
names = self.parser.tokenNames
|
1533
|
+
end
|
1534
|
+
print "DFA=\n#{dfa.toString(names)}"
|
1535
|
+
end
|
1536
|
+
return to
|
1537
|
+
end
|
1538
|
+
#
|
1539
|
+
# Add state {@code D} to the DFA if it is not already present, and return
|
1540
|
+
# the actual instance stored in the DFA. If a state equivalent to {@code D}
|
1541
|
+
# is already in the DFA, the existing state is returned. Otherwise this
|
1542
|
+
# method returns {@code D} after adding it to the DFA.
|
1543
|
+
#
|
1544
|
+
# <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and
|
1545
|
+
# does not change the DFA.</p>
|
1546
|
+
#
|
1547
|
+
# @param dfa The dfa
|
1548
|
+
# @param D The DFA state to add
|
1549
|
+
# @return The state stored in the DFA. This will be either the existing
|
1550
|
+
# state if {@code D} is already in the DFA, or {@code D} itself if the
|
1551
|
+
# state was not already present.
|
1552
|
+
#
|
1553
|
+
def addDFAState(dfa, cD)
|
1554
|
+
if cD.equal? ATNSimulator::ERROR
|
1555
|
+
return cD
|
1556
|
+
end
|
1557
|
+
|
1558
|
+
existing = dfa.states[cD]
|
1559
|
+
if existing
|
1560
|
+
return existing
|
1561
|
+
end
|
1562
|
+
|
1563
|
+
cD.stateNumber = dfa.states.length
|
1564
|
+
if not cD.configs.readonly
|
1565
|
+
cD.configs.optimizeConfigs(self)
|
1566
|
+
cD.configs.setReadonly(true)
|
1567
|
+
end
|
1568
|
+
dfa.states[cD] = cD
|
1569
|
+
if self.debug
|
1570
|
+
puts "adding new DFA state: #{cD}"
|
1571
|
+
end
|
1572
|
+
return cD
|
1573
|
+
end
|
1574
|
+
def reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex)
|
1575
|
+
if self.debug or self.retry_debug
|
1576
|
+
interval = startIndex..stopIndex
|
1577
|
+
puts "reportAttemptingFullContext decision=#{dfa.decision}:#{configs}, input=#{
|
1578
|
+
self.parser.getTokenStream().getText(interval)}"
|
1579
|
+
end
|
1580
|
+
if self.parser
|
1581
|
+
self.parser.getErrorListenerDispatch().reportAttemptingFullContext(self.parser, dfa, startIndex, stopIndex, conflictingAlts, configs)
|
1582
|
+
end
|
1583
|
+
end
|
1584
|
+
def reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex)
|
1585
|
+
if self.debug or self.retry_debug
|
1586
|
+
interval = startIndex..stopIndex
|
1587
|
+
puts "reportContextSensitivity decision=#{dfa.decision}:#{configs}, input=#{
|
1588
|
+
self.parser.getTokenStream().getText(interval)}"
|
1589
|
+
end
|
1590
|
+
if self.parser
|
1591
|
+
self.parser.getErrorListenerDispatch().reportContextSensitivity(self.parser, dfa, startIndex, stopIndex, prediction, configs)
|
1592
|
+
end
|
1593
|
+
end
|
1594
|
+
|
1595
|
+
# If context sensitive parsing, we know it's ambiguity not conflict#
|
1596
|
+
def reportAmbiguity(dfa, cD, startIndex, stopIndex, exact, ambigAlts, configs)
|
1597
|
+
if self.debug or self.retry_debug
|
1598
|
+
# ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn);
|
1599
|
+
# int i = 1;
|
1600
|
+
# for (Transition t : dfa.atnStartState.transitions) {
|
1601
|
+
# print("ALT "+i+"=");
|
1602
|
+
# print(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size());
|
1603
|
+
# TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(),
|
1604
|
+
# startIndex, stopIndex);
|
1605
|
+
# if ( path!=null ) {
|
1606
|
+
# print("path = "+path.toStringTree());
|
1607
|
+
# for (TraceTree leaf : path.leaves) {
|
1608
|
+
# List<ATNState> states = path.getPathToNode(leaf);
|
1609
|
+
# print("states="+states);
|
1610
|
+
# }
|
1611
|
+
# }
|
1612
|
+
# i++;
|
1613
|
+
# }
|
1614
|
+
interval = startIndex..stopIndex
|
1615
|
+
puts "reportAmbiguity #{ambigAlts}:#{configs}, input=#{
|
1616
|
+
self.parser.getTokenStream().getText(interval)}"
|
1617
|
+
end
|
1618
|
+
if self.parser
|
1619
|
+
self.parser.getErrorListenerDispatch().reportAmbiguity(self.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
|
1620
|
+
end
|
1621
|
+
end
|
1622
|
+
end
|