antlr4 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,311 @@
1
+
2
+ class LexerActionType
3
+ # include JavaSymbols
4
+ CHANNEL = 0 #The type of a {@link LexerChannelAction} action.
5
+ CUSTOM = 1 #The type of a {@link LexerCustomAction} action.
6
+ MODE = 2 #The type of a {@link LexerModeAction} action.
7
+ MORE = 3 #The type of a {@link LexerMoreAction} action.
8
+ POP_MODE = 4 #The type of a {@link LexerPopModeAction} action.
9
+ PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action.
10
+ SKIP = 6 #The type of a {@link LexerSkipAction} action.
11
+ TYPE = 7 #The type of a {@link LexerTypeAction} action.
12
+ end
13
+
14
+ class LexerAction
15
+
16
+ attr_accessor :actionType, :isPositionDependent
17
+ def initialize(action)
18
+ self.actionType = action
19
+ self.isPositionDependent = false
20
+ end
21
+
22
+ def hash
23
+ self.actionType.to_s.hash
24
+ end
25
+
26
+ def eql?(other)
27
+ self == other
28
+ end
29
+
30
+ def ==(other)
31
+ self.equal? other
32
+ end
33
+ end
34
+
35
+ #
36
+ # Implements the {@code skip} lexer action by calling {@link Lexer#skip}.
37
+ #
38
+ # <p>The {@code skip} command does not have any parameters, so this action is
39
+ # implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
40
+ class LexerSkipAction < LexerAction
41
+
42
+ # Provides a singleton instance of this parameterless lexer action.
43
+ @@INSTANCE = nil
44
+ def self.INSTANCE
45
+ if @@INSTANCE.nil?
46
+ @@INSTANCE = LexerSkipAction.new()
47
+ end
48
+ @@INSTANCE
49
+ end
50
+ def initialize()
51
+ super(LexerActionType::SKIP)
52
+ end
53
+
54
+ def execute(lexer)
55
+ lexer.skip()
56
+ end
57
+
58
+ def to_s
59
+ return "skip"
60
+ end
61
+ end
62
+
63
+ # Implements the {@code type} lexer action by calling {@link Lexer#setType}
64
+ # with the assigned type.
65
+ class LexerTypeAction < LexerAction
66
+
67
+ attr_accessor :type
68
+ def initialize(_type)
69
+ super(LexerActionType::TYPE)
70
+ self.type = _type
71
+ end
72
+ def execute(lexer)
73
+ lexer.type = self.type
74
+ end
75
+
76
+ def hash
77
+ return "#{self.actionType}#{self.type}".hash
78
+ end
79
+
80
+ def ==(other)
81
+ self.equal?(other) or other.kind_of?(LexerTypeAction) and self.type == other.type
82
+ end
83
+ def to_s
84
+ return "type(#{self.type})"
85
+ end
86
+ end
87
+
88
+ # Implements the {@code pushMode} lexer action by calling
89
+ # {@link Lexer#pushMode} with the assigned mode.
90
+ class LexerPushModeAction < LexerAction
91
+
92
+ attr_accessor :mode
93
+ def initialize(_mode)
94
+ super(LexerActionType::PUSH_MODE)
95
+ self.mode = _mode
96
+ end
97
+
98
+ # <p>This action is implemented by calling {@link Lexer#pushMode} with the
99
+ # value provided by {@link #getMode}.</p>
100
+ def execute(lexer)
101
+ lexer.pushMode(self.mode)
102
+ end
103
+
104
+ def hash
105
+ "#{self.actionType}#{self.mode}".hash
106
+ end
107
+
108
+ def ==(other)
109
+ self.equal?(other) or other.kind_of?(LexerPushModeAction) and self.mode == other.mode
110
+ end
111
+
112
+ def to_s
113
+ "pushMode(#{self.mode})"
114
+ end
115
+ end
116
+
117
+ # Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}.
118
+ #
119
+ # <p>The {@code popMode} command does not have any parameters, so this action is
120
+ # implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
121
+ class LexerPopModeAction < LexerAction
122
+
123
+ @@INSTANCE = nil
124
+ def self.INSTANCE
125
+ @@INSTANCE = new() if @@INSTANCE.nil?
126
+ @@INSTANCE
127
+ end
128
+
129
+ def initialize
130
+ super(LexerActionType::POP_MODE)
131
+ end
132
+
133
+ # <p>This action is implemented by calling {@link Lexer#popMode}.</p>
134
+ def execute(lexer)
135
+ lexer.popMode()
136
+ end
137
+
138
+ def to_s
139
+ return "popMode"
140
+ end
141
+ end
142
+
143
+ # Implements the {@code more} lexer action by calling {@link Lexer#more}.
144
+ #
145
+ # <p>The {@code more} command does not have any parameters, so this action is
146
+ # implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
147
+ class LexerMoreAction < LexerAction
148
+
149
+ @@INSTANCE = nil
150
+ def self.INSTANCE
151
+ @@INSTANCE = new() if @@INSTANCE.nil?
152
+ @@INSTANCE
153
+ end
154
+
155
+ def initialize
156
+ super(LexerActionType::MORE)
157
+ end
158
+
159
+ # <p>This action is implemented by calling {@link Lexer#popMode}.</p>
160
+ def execute(lexer)
161
+ lexer.more()
162
+ end
163
+
164
+ def to_s
165
+ return "more"
166
+ end
167
+ end
168
+
169
+ # Implements the {@code mode} lexer action by calling {@link Lexer#mode} with
170
+ # the assigned mode.
171
+ class LexerModeAction < LexerAction
172
+
173
+ attr_accessor :mode
174
+ def initialize(_mode)
175
+ super(LexerActionType::MODE)
176
+ self.mode = _mode
177
+ end
178
+
179
+ # <p>This action is implemented by calling {@link Lexer#mode} with the
180
+ # value provided by {@link #getMode}.</p>
181
+ def execute(lexer)
182
+ lexer.mode = self.mode
183
+ end
184
+
185
+ def hash
186
+ "#{self.actionType}#{self.mode}".hash
187
+ end
188
+
189
+ def ==(other)
190
+ self.equal?(other)or other.kind_of?(LexerModeAction)and self.mode == other.mode
191
+ end
192
+
193
+ def to_s
194
+ "mode(#{self.mode})"
195
+ end
196
+ end
197
+ # Executes a custom lexer action by calling {@link Recognizer#action} with the
198
+ # rule and action indexes assigned to the custom action. The implementation of
199
+ # a custom action is added to the generated code for the lexer in an override
200
+ # of {@link Recognizer#action} when the grammar is compiled.
201
+ #
202
+ # <p>This class may represent embedded actions created with the <code>{...}</code>
203
+ # syntax in ANTLR 4, as well as actions created for lexer commands where the
204
+ # command argument could not be evaluated when the grammar was compiled.</p>
205
+
206
+ class LexerCustomAction < LexerAction
207
+
208
+ # Constructs a custom lexer action with the specified rule and action
209
+ # indexes.
210
+ #
211
+ # @param ruleIndex The rule index to use for calls to
212
+ # {@link Recognizer#action}.
213
+ # @param actionIndex The action index to use for calls to
214
+ # {@link Recognizer#action}.
215
+ #/
216
+ attr_accessor :ruleIndex, :actionIndex, :isPositionDependent
217
+ def initialize(rule_index, action_index)
218
+ super(LexerActionType::CUSTOM)
219
+ @ruleIndex = rule_index
220
+ @actionIndex = action_index
221
+ @isPositionDependent = true
222
+ end
223
+ # <p>Custom actions are implemented by calling {@link Lexer#action} with the
224
+ # appropriate rule and action indexes.</p>
225
+ def execute(lexer)
226
+ lexer.action(nil, self.ruleIndex, self.actionIndex)
227
+ end
228
+ def hash
229
+ "#{self.actionType}#{self.ruleIndex}#{self.actionIndex}".hash
230
+ end
231
+
232
+ def ==( other)
233
+ self.equal?(other) or other.kind_of?( LexerCustomAction) \
234
+ and self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex
235
+ end
236
+ end
237
+ # Implements the {@code channel} lexer action by calling
238
+ # {@link Lexer#setChannel} with the assigned channel.
239
+ class LexerChannelAction < LexerAction
240
+
241
+ # Constructs a new {@code channel} action with the specified channel value.
242
+ # @param channel The channel value to pass to {@link Lexer#setChannel}.
243
+ attr_accessor :channel
244
+ def initialize(_channel)
245
+ super(LexerActionType::CHANNEL)
246
+ self.channel = _channel
247
+ end
248
+
249
+ # <p>This action is implemented by calling {@link Lexer#setChannel} with the
250
+ # value provided by {@link #getChannel}.</p>
251
+ def execute(lexer)
252
+ lexer.channel = self.channel
253
+ end
254
+ def hash
255
+ "#{self.actionType}#{self.channel}".hash
256
+ end
257
+
258
+ def ==(other)
259
+ self.equal?(other) or other.kind_of?(LexerChannelAction) \
260
+ and self.channel == other.channel
261
+ end
262
+
263
+ def to_s
264
+ return "channel(#{self.channel})"
265
+ end
266
+ end
267
+ # This implementation of {@link LexerAction} is used for tracking input offsets
268
+ # for position-dependent actions within a {@link LexerActionExecutor}.
269
+ #
270
+ # <p>This action is not serialized as part of the ATN, and is only required for
271
+ # position-dependent lexer actions which appear at a location other than the
272
+ # end of a rule. For more information about DFA optimizations employed for
273
+ # lexer actions, see {@link LexerActionExecutor#append} and
274
+ # {@link LexerActionExecutor#fixOffsetBeforeMatch}.</p>
275
+ class LexerIndexedCustomAction < LexerAction
276
+
277
+ # Constructs a new indexed custom action by associating a character offset
278
+ # with a {@link LexerAction}.
279
+ #
280
+ # <p>Note: This class is only required for lexer actions for which
281
+ # {@link LexerAction#isPositionDependent} returns {@code true}.</p>
282
+ #
283
+ # @param offset The offset into the input {@link CharStream}, relative to
284
+ # the token start index, at which the specified lexer action should be
285
+ # executed.
286
+ # @param action The lexer action to execute at a particular offset in the
287
+ # input {@link CharStream}.
288
+ attr_accessor :offset, :action, :isPositionDependent
289
+ def initialize(_offset, _action)
290
+ super(action.actionType)
291
+ self.offset = _offset
292
+ self.action = _action
293
+ self.isPositionDependent = true
294
+ end
295
+
296
+ # <p>This method calls {@link #execute} on the result of {@link #getAction}
297
+ # using the provided {@code lexer}.</p>
298
+ def execute(lexer)
299
+ # assume the input stream position was properly set by the calling code
300
+ self.action.execute(lexer)
301
+ end
302
+
303
+ def hash
304
+ "#{self.actionType}#{self.offset}#{self.action}".hash
305
+ end
306
+
307
+ def ==(other)
308
+ self.equal?(other) or other.kind_of?(LexerIndexedCustomAction) \
309
+ and self.offset == other.offset and self.action == other.action
310
+ end
311
+ end
@@ -0,0 +1,134 @@
1
+ # Represents an executor for a sequence of lexer actions which traversed during
2
+ # the matching operation of a lexer rule (token).
3
+ #
4
+ # <p>The executor tracks position information for position-dependent lexer actions
5
+ # efficiently, ensuring that actions appearing only at the end of the rule do
6
+ # not cause bloating of the {@link DFA} created for the lexer.</p>
7
+
8
+ class LexerActionExecutor
9
+
10
+ attr_accessor :hashCode, :lexerActions
11
+ def initialize(_lexerActions=Array.new)
12
+ @lexerActions = _lexerActions
13
+ # Caches the result of {@link #hashCode} since the hash code is an element
14
+ # of the performance-critical {@link LexerATNConfig#hashCode} operation.
15
+ @hashCode = self.lexerActions.map(&:to_s).join('').hash
16
+ end
17
+
18
+ # Creates a {@link LexerActionExecutor} which executes the actions for
19
+ # the input {@code lexerActionExecutor} followed by a specified
20
+ # {@code lexerAction}.
21
+ #
22
+ # @param lexerActionExecutor The executor for actions already traversed by
23
+ # the lexer while matching a token within a particular
24
+ # {@link LexerATNConfig}. If this is {@code null}, the method behaves as
25
+ # though it were an empty executor.
26
+ # @param lexerAction The lexer action to execute after the actions
27
+ # specified in {@code lexerActionExecutor}.
28
+ #
29
+ # @return A {@link LexerActionExecutor} for executing the combine actions
30
+ # of {@code lexerActionExecutor} and {@code lexerAction}.
31
+ def self.append(lexerActionExecutor, lexerAction)
32
+ if lexerActionExecutor.nil?
33
+ return LexerActionExecutor.new([ lexerAction ])
34
+ end
35
+ lexerActions = lexerActionExecutor.lexerActions.concat(lexerAction )
36
+ return LexerActionExecutor.new(lexerActions)
37
+ end
38
+
39
+ # Creates a {@link LexerActionExecutor} which encodes the current offset
40
+ # for position-dependent lexer actions.
41
+ #
42
+ # <p>Normally, when the executor encounters lexer actions where
43
+ # {@link LexerAction#isPositionDependent} returns {@code true}, it calls
44
+ # {@link IntStream#seek} on the input {@link CharStream} to set the input
45
+ # position to the <em>end</em> of the current token. This behavior provides
46
+ # for efficient DFA representation of lexer actions which appear at the end
47
+ # of a lexer rule, even when the lexer rule matches a variable number of
48
+ # characters.</p>
49
+ #
50
+ # <p>Prior to traversing a match transition in the ATN, the current offset
51
+ # from the token start index is assigned to all position-dependent lexer
52
+ # actions which have not already been assigned a fixed offset. By storing
53
+ # the offsets relative to the token start index, the DFA representation of
54
+ # lexer actions which appear in the middle of tokens remains efficient due
55
+ # to sharing among tokens of the same length, regardless of their absolute
56
+ # position in the input stream.</p>
57
+ #
58
+ # <p>If the current executor already has offsets assigned to all
59
+ # position-dependent lexer actions, the method returns {@code this}.</p>
60
+ #
61
+ # @param offset The current offset to assign to all position-dependent
62
+ # lexer actions which do not already have offsets assigned.
63
+ #
64
+ # @return A {@link LexerActionExecutor} which stores input stream offsets
65
+ # for all position-dependent lexer actions.
66
+ #/
67
+ def fixOffsetBeforeMatch(offset)
68
+ updatedLexerActions = nil
69
+ @lexerActions.each_index {|i|
70
+ if @lexerActions[i].isPositionDependent and not @lexerActions[i].kind_of?(LexerIndexedCustomAction) then
71
+ if updatedLexerActions.nil? then
72
+ updatedLexerActions = @lexerActions.map{|x| x}
73
+ end
74
+ updatedLexerActions[i] = LexerIndexedCustomAction.new(offset, @lexerActions[i])
75
+ end
76
+ }
77
+ if updatedLexerActions.nil?
78
+ return self
79
+ else
80
+ return LexerActionExecutor.new(updatedLexerActions)
81
+ end
82
+ end
83
+
84
+
85
+ # Execute the actions encapsulated by this executor within the context of a
86
+ # particular {@link Lexer}.
87
+ #
88
+ # <p>This method calls {@link IntStream#seek} to set the position of the
89
+ # {@code input} {@link CharStream} prior to calling
90
+ # {@link LexerAction#execute} on a position-dependent action. Before the
91
+ # method returns, the input position will be restored to the same position
92
+ # it was in when the method was invoked.</p>
93
+ #
94
+ # @param lexer The lexer instance.
95
+ # @param input The input stream which is the source for the current token.
96
+ # When this method is called, the current {@link IntStream#index} for
97
+ # {@code input} should be the start of the following token, i.e. 1
98
+ # character past the end of the current token.
99
+ # @param startIndex The token start index. This value may be passed to
100
+ # {@link IntStream#seek} to set the {@code input} position to the beginning
101
+ # of the token.
102
+ #/
103
+ def execute(lexer, input, startIndex)
104
+ requiresSeek = false
105
+ stopIndex = input.index
106
+ begin
107
+ self.lexerActions.each { |lexerAction|
108
+ if lexerAction.kind_of? LexerIndexedCustomAction
109
+ offset = lexerAction.offset
110
+ input.seek(startIndex + offset)
111
+ lexerAction = lexerAction.action
112
+ requiresSeek = (startIndex + offset) != stopIndex
113
+ elsif lexerAction.isPositionDependent
114
+ input.seek(stopIndex)
115
+ requiresSeek = false
116
+ end
117
+ lexerAction.execute(lexer)
118
+ }
119
+ ensure
120
+ input.seek(stopIndex) if requiresSeek
121
+ end
122
+ end
123
+ def hash
124
+ return self.hashCode
125
+ end
126
+
127
+ def eql?(other)
128
+ self == other
129
+ end
130
+ def ==( other)
131
+ self.equal?(other) or (other.kind_of?(LexerActionExecutor) and
132
+ self.hashCode == other.hashCode and self.lexerActions == other.lexerActions)
133
+ end
134
+ end
@@ -0,0 +1,1622 @@
1
+ #
2
+ # The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
3
+ #
4
+ # <p>
5
+ # The basic complexity of the adaptive strategy makes it harder to understand.
6
+ # We begin with ATN simulation to build paths in a DFA. Subsequent prediction
7
+ # requests go through the DFA first. If they reach a state without an edge for
8
+ # the current symbol, the algorithm fails over to the ATN simulation to
9
+ # complete the DFA path for the current input (until it finds a conflict state
10
+ # or uniquely predicting state).</p>
11
+ #
12
+ # <p>
13
+ # All of that is done without using the outer context because we want to create
14
+ # a DFA that is not dependent upon the rule invocation stack when we do a
15
+ # prediction. One DFA works in all contexts. We avoid using context not
16
+ # necessarily because it's slower, although it can be, but because of the DFA
17
+ # caching problem. The closure routine only considers the rule invocation stack
18
+ # created during prediction beginning in the decision rule. For example, if
19
+ # prediction occurs without invoking another rule's ATN, there are no context
20
+ # stacks in the configurations. When lack of context leads to a conflict, we
21
+ # don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
22
+ # strategy (versus full LL(*)).</p>
23
+ #
24
+ # <p>
25
+ # When SLL yields a configuration set with conflict, we rewind the input and
26
+ # retry the ATN simulation, this time using full outer context without adding
27
+ # to the DFA. Configuration context stacks will be the full invocation stacks
28
+ # from the start rule. If we get a conflict using full context, then we can
29
+ # definitively say we have a true ambiguity for that input sequence. If we
30
+ # don't get a conflict, it implies that the decision is sensitive to the outer
31
+ # context. (It is not context-sensitive in the sense of context-sensitive
32
+ # grammars.)</p>
33
+ #
34
+ # <p>
35
+ # The next time we reach this DFA state with an SLL conflict, through DFA
36
+ # simulation, we will again retry the ATN simulation using full context mode.
37
+ # This is slow because we can't save the results and have to "interpret" the
38
+ # ATN each time we get that input.</p>
39
+ #
40
+ # <p>
41
+ # <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p>
42
+ #
43
+ # <p>
44
+ # We could cache results from full context to predicted alternative easily and
45
+ # that saves a lot of time but doesn't work in presence of predicates. The set
46
+ # of visible predicates from the ATN start state changes depending on the
47
+ # context, because closure can fall off the end of a rule. I tried to cache
48
+ # tuples (stack context, semantic context, predicted alt) but it was slower
49
+ # than interpreting and much more complicated. Also required a huge amount of
50
+ # memory. The goal is not to create the world's fastest parser anyway. I'd like
51
+ # to keep this algorithm simple. By launching multiple threads, we can improve
52
+ # the speed of parsing across a large number of files.</p>
53
+ #
54
+ # <p>
55
+ # There is no strict ordering between the amount of input used by SLL vs LL,
56
+ # which makes it really hard to build a cache for full context. Let's say that
57
+ # we have input A B C that leads to an SLL conflict with full context X. That
58
+ # implies that using X we might only use A B but we could also use A B C D to
59
+ # resolve conflict. Input A B C D could predict alternative 1 in one position
60
+ # in the input and A B C E could predict alternative 2 in another position in
61
+ # input. The conflicting SLL configurations could still be non-unique in the
62
+ # full context prediction, which would lead us to requiring more input than the
63
+ # original A B C. To make a prediction cache work, we have to track the exact
64
+ # input used during the previous prediction. That amounts to a cache that maps
65
+ # X to a specific DFA for that context.</p>
66
+ #
67
+ # <p>
68
+ # Something should be done for left-recursive expression predictions. They are
69
+ # likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
70
+ # with full LL thing Sam does.</p>
71
+ #
72
+ # <p>
73
+ # <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p>
74
+ #
75
+ # <p>
76
+ # We avoid doing full context retry when the outer context is empty, we did not
77
+ # dip into the outer context by falling off the end of the decision state rule,
78
+ # or when we force SLL mode.</p>
79
+ #
80
+ # <p>
81
+ # As an example of the not dip into outer context case, consider as super
82
+ # constructor calls versus function calls. One grammar might look like
83
+ # this:</p>
84
+ #
85
+ # <pre>
86
+ # ctorBody
87
+ # : '{' superCall? stat* '}'
88
+ # ;
89
+ # </pre>
90
+ #
91
+ # <p>
92
+ # Or, you might see something like</p>
93
+ #
94
+ # <pre>
95
+ # stat
96
+ # : superCall ';'
97
+ # | expression ';'
98
+ # | ...
99
+ # ;
100
+ # </pre>
101
+ #
102
+ # <p>
103
+ # In both cases I believe that no closure operations will dip into the outer
104
+ # context. In the first case ctorBody in the worst case will stop at the '}'.
105
+ # In the 2nd case it should stop at the ';'. Both cases should stay within the
106
+ # entry rule and not dip into the outer context.</p>
107
+ #
108
+ # <p>
109
+ # <strong>PREDICATES</strong></p>
110
+ #
111
+ # <p>
112
+ # Predicates are always evaluated if present in either SLL or LL both. SLL and
113
+ # LL simulation deals with predicates differently. SLL collects predicates as
114
+ # it performs closure operations like ANTLR v3 did. It delays predicate
115
+ # evaluation until it reaches and accept state. This allows us to cache the SLL
116
+ # ATN simulation whereas, if we had evaluated predicates on-the-fly during
117
+ # closure, the DFA state configuration sets would be different and we couldn't
118
+ # build up a suitable DFA.</p>
119
+ #
120
+ # <p>
121
+ # When building a DFA accept state during ATN simulation, we evaluate any
122
+ # predicates and return the sole semantically valid alternative. If there is
123
+ # more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
124
+ # we throw an exception. Alternatives without predicates act like they have
125
+ # true predicates. The simple way to think about it is to strip away all
126
+ # alternatives with false predicates and choose the minimum alternative that
127
+ # remains.</p>
128
+ #
129
+ # <p>
130
+ # When we start in the DFA and reach an accept state that's predicated, we test
131
+ # those and return the minimum semantically viable alternative. If no
132
+ # alternatives are viable, we throw an exception.</p>
133
+ #
134
+ # <p>
135
+ # During full LL ATN simulation, closure always evaluates predicates and
136
+ # on-the-fly. This is crucial to reducing the configuration set size during
137
+ # closure. It hits a landmine when parsing with the Java grammar, for example,
138
+ # without this on-the-fly evaluation.</p>
139
+ #
140
+ # <p>
141
+ # <strong>SHARING DFA</strong></p>
142
+ #
143
+ # <p>
144
+ # All instances of the same parser share the same decision DFAs through a
145
+ # static field. Each instance gets its own ATN simulator but they share the
146
+ # same {@link #decisionToDFA} field. They also share a
147
+ # {@link PredictionContextCache} object that makes sure that all
148
+ # {@link PredictionContext} objects are shared among the DFA states. This makes
149
+ # a big size difference.</p>
150
+ #
151
+ # <p>
152
+ # <strong>THREAD SAFETY</strong></p>
153
+ #
154
+ # <p>
155
+ # The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
156
+ # it adds a new DFA object to that array. {@link #addDFAEdge}
157
+ # locks on the DFA for the current decision when setting the
158
+ # {@link DFAState#edges} field. {@link #addDFAState} locks on
159
+ # the DFA for the current decision when looking up a DFA state to see if it
160
+ # already exists. We must make sure that all requests to add DFA states that
161
+ # are equivalent result in the same shared DFA object. This is because lots of
162
+ # threads will be trying to update the DFA at once. The
163
+ # {@link #addDFAState} method also locks inside the DFA lock
164
+ # but this time on the shared context cache when it rebuilds the
165
+ # configurations' {@link PredictionContext} objects using cached
166
+ # subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
167
+ # safe as long as we can guarantee that all threads referencing
168
+ # {@code s.edge[t]} get the same physical target {@link DFAState}, or
169
+ # {@code null}. Once into the DFA, the DFA simulation does not reference the
170
+ # {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
171
+ # targets. The DFA simulator will either find {@link DFAState#edges} to be
172
+ # {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
173
+ # {@code dfa.edges[t]} to be non-null. The
174
+ # {@link #addDFAEdge} method could be racing to set the field
175
+ # but in either case the DFA simulator works; if {@code null}, and requests ATN
176
+ # simulation. It could also race trying to get {@code dfa.edges[t]}, but either
177
+ # way it will work because it's not doing a test and set operation.</p>
178
+ #
179
+ # <p>
180
+ # <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage
181
+ # Parsing)</strong></p>
182
+ #
183
+ # <p>
184
+ # Sam pointed out that if SLL does not give a syntax error, then there is no
185
+ # point in doing full LL, which is slower. We only have to try LL if we get a
186
+ # syntax error. For maximum speed, Sam starts the parser set to pure SLL
187
+ # mode with the {@link BailErrorStrategy}:</p>
188
+ #
189
+ # <pre>
190
+ # parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
191
+ # parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
192
+ # </pre>
193
+ #
194
+ # <p>
195
+ # If it does not get a syntax error, then we're done. If it does get a syntax
196
+ # error, we need to retry with the combined SLL/LL strategy.</p>
197
+ #
198
+ # <p>
199
+ # The reason this works is as follows. If there are no SLL conflicts, then the
200
+ # grammar is SLL (at least for that input set). If there is an SLL conflict,
201
+ # the full LL analysis must yield a set of viable alternatives which is a
202
+ # subset of the alternatives reported by SLL. If the LL set is a singleton,
203
+ # then the grammar is LL but not SLL. If the LL set is the same size as the SLL
204
+ # set, the decision is SLL. If the LL set has size &gt; 1, then that decision
205
+ # is truly ambiguous on the current input. If the LL set is smaller, then the
206
+ # SLL conflict resolution might choose an alternative that the full LL would
207
+ # rule out as a possibility based upon better context information. If that's
208
+ # the case, then the SLL parse will definitely get an error because the full LL
209
+ # analysis says it's not viable. If SLL conflict resolution chooses an
210
+ # alternative within the LL set, them both SLL and LL would choose the same
211
+ # alternative because they both choose the minimum of multiple conflicting
212
+ # alternatives.</p>
213
+ #
214
+ # <p>
215
+ # Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
216
+ # a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL
217
+ # parsing will get an error because SLL will pursue alternative 1. If
218
+ # <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
219
+ # choose the same alternative because alternative one is the minimum of either
220
+ # set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax
221
+ # error. If <em>s</em> is {@code {1}} then SLL will succeed.</p>
222
+ #
223
+ # <p>
224
+ # Of course, if the input is invalid, then we will get an error for sure in
225
+ # both SLL and LL parsing. Erroneous input will therefore require 2 passes over
226
+ # the input.</p>
227
+ #
228
+
229
+ class ParserATNSimulator < ATNSimulator
230
+ include PredictionContextFunctions
231
+
232
+ class << self
233
+ attr_reader :debug, :dfa_debug, :debug_list_atn_decisions,:retry_debug
234
+ end
235
+ @@debug = false
236
+ @@dfa_debug = false
237
+ @@debug_list_atn_decisions = false
238
+ @@retry_debug = false
239
+
240
+ def debug; @@debug ;end
241
+ def dfa_debug; @@dfa_debug ;end
242
+
243
+ def debug_list_atn_decisions; @@debug_list_atn_decisions ; end
244
+ def retry_debug ; @@retry_debug ; end
245
+
246
+
247
+
248
+ attr_accessor :decisionToDFA, :startIndex
249
+ attr_accessor :parser, :predictionMode, :input, :outerContext, :mergeCache
250
+ attr_accessor :_dfa
251
+
252
+ def initialize(parser, atn, decisionToDFA, sharedContextCache)
253
+ super(atn, sharedContextCache)
254
+ self.parser = parser
255
+ self.decisionToDFA = decisionToDFA
256
+ # SLL, LL, or LL + exact ambig detection?#
257
+ self.predictionMode = PredictionMode.LL
258
+ # LAME globals to avoid parameters!!!!! I need these down deep in predTransition
259
+ self.input = nil
260
+ self.startIndex = 0
261
+ self.outerContext = nil
262
+ # Each prediction operation uses a cache for merge of prediction contexts.
263
+ # Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
264
+ # isn't synchronized but we're ok since two threads shouldn't reuse same
265
+ # parser/atnsim object because it can only handle one input at a time.
266
+ # This maps graphs a and b to merged result c. (a,b)&rarr;c. We can avoid
267
+ # the merge if we ever see a and b again. Note that (b,a)&rarr;c should
268
+ # also be examined during cache lookup.
269
+ #
270
+ self.mergeCache = nil
271
+ end
272
+
273
+
274
+ def reset()
275
+ end
276
+
277
+ def adaptivePredict(input, decision, outerContext)
278
+ if self.debug or self.debug_list_atn_decisions then
279
+ s1 = "adaptivePredict decision #{decision} exec LA(1)=="
280
+ s2 = "#{self.getLookaheadName(input)} line #{input.LT(1).line}:#{input.LT(1).column}"
281
+ puts "#{s1}#{s2}"
282
+ end
283
+ # type_check(TokenStream, input)
284
+ # type_check(ParserRuleContext, outerContext)
285
+ self.input = input
286
+ self.startIndex = input.index
287
+ self.outerContext = outerContext
288
+
289
+ dfa = self.decisionToDFA[decision]
290
+ @_dfa = dfa
291
+ m = input.mark()
292
+ index = input.index
293
+
294
+ # Now we are certain to have a specific decision's DFA
295
+ # But, do we still need an initial state?
296
+ begin
297
+ if dfa.precedenceDfa then
298
+ # the start state for a precedence DFA depends on the current
299
+ # parser precedence, and is provided by a DFA method.
300
+ s0 = dfa.getPrecedenceStartState(self.parser.getPrecedence())
301
+ else
302
+ # the start state for a "regular" DFA is just s0
303
+ s0 = dfa.s0
304
+ end
305
+
306
+ if s0.nil?
307
+ if outerContext.nil?
308
+ outerContext = ParserRuleContext.EMPTY
309
+ end
310
+ if self.debug or self.debug_list_atn_decisions
311
+ puts "predictATN decision #{dfa.decision
312
+ } exec LA(1)==#{self.getLookaheadName(input)
313
+ }, outerContext=#{outerContext.to_s}"
314
+ # }, outerContext=#{outerContext.toString(self.parser)}"
315
+ end
316
+ # If this is not a precedence DFA, we check the ATN start state
317
+ # to determine if this ATN start state is the decision for the
318
+ # closure block that determines whether a precedence rule
319
+ # should continue or complete.
320
+ #
321
+ if not dfa.precedenceDfa and dfa.atnStartState.kind_of?(StarLoopEntryState) then
322
+ if dfa.atnStartState.precedenceRuleDecision
323
+ dfa.setPrecedenceDfa(true)
324
+ end
325
+ end
326
+
327
+ fullCtx = false
328
+ type_check(ParserRuleContext.EMPTY(), ParserRuleContext)
329
+ s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
330
+
331
+ if dfa.precedenceDfa
332
+ # If this is a precedence DFA, we use applyPrecedenceFilter
333
+ # to convert the computed start state to a precedence start
334
+ # state. We then use DFA.setPrecedenceStartState to set the
335
+ # appropriate start state for the precedence level rather
336
+ # than simply setting DFA.s0.
337
+ #
338
+ s0_closure = self.applyPrecedenceFilter(s0_closure)
339
+ s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
340
+ dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
341
+ else
342
+ s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
343
+ dfa.s0 = s0
344
+ end
345
+ end
346
+ alt = self.execATN(dfa, s0, input, index, outerContext)
347
+ if self.debug
348
+ puts "DFA after predictATN: #{dfa.toString(self.parser.tokenNames)}"
349
+ end
350
+ return alt
351
+ ensure
352
+ self.mergeCache = nil# wack cache after each prediction
353
+ input.seek(index)
354
+ input.release(m)
355
+ @_dfa = nil
356
+ end
357
+ end
358
+ # Performs ATN simulation to compute a predicted alternative based
359
+ # upon the remaining input, but also updates the DFA cache to avoid
360
+ # having to traverse the ATN again for the same input sequence.
361
+
362
+ # There are some key conditions we're looking for after computing a new
363
+ # set of ATN configs (proposed DFA state):
364
+ # if the set is empty, there is no viable alternative for current symbol
365
+ # does the state uniquely predict an alternative?
366
+ # does the state have a conflict that would prevent us from
367
+ # putting it on the work list?
368
+
369
+ # We also have some key operations to do:
370
+ # add an edge from previous DFA state to potentially new DFA state, D,
371
+ # upon current symbol but only if adding to work list, which means in all
372
+ # cases except no viable alternative (and possibly non-greedy decisions?)
373
+ # collecting predicates and adding semantic context to DFA accept states
374
+ # adding rule context to context-sensitive DFA accept states
375
+ # consuming an input symbol
376
+ # reporting a conflict
377
+ # reporting an ambiguity
378
+ # reporting a context sensitivity
379
+ # reporting insufficient predicates
380
+
381
+ # cover these cases:
382
+ # dead end
383
+ # single alt
384
+ # single alt + preds
385
+ # conflict
386
+ # conflict + preds
387
+ #
388
+ def execATN(dfa, s0, input, startIndex, outerContext)
389
+ type_check( outerContext, ParserRuleContext )
390
+ if self.debug or self.debug_list_atn_decisions
391
+ print "execATN decision #{dfa.decision
392
+ } exec LA(1)==#{self.getLookaheadName(input)
393
+ } line #{input.LT(1).line}:#{input.LT(1).column}"
394
+ end
395
+ previousD = s0
396
+
397
+ if self.debug
398
+ print "s0 = #{s0}"
399
+ end
400
+ t = input.LA(1)
401
+ while true do # while more work
402
+ cD = self.getExistingTargetState(previousD, t)
403
+ if cD.nil?
404
+ cD = self.computeTargetState(dfa, previousD, t)
405
+ end
406
+ if cD.equal? ATNSimulator::ERROR
407
+ # if any configs in previous dipped into outer context, that
408
+ # means that input up to t actually finished entry rule
409
+ # at least for SLL decision. Full LL doesn't dip into outer
410
+ # so don't need special case.
411
+ # We will get an error no matter what so delay until after
412
+ # decision; better error message. Also, no reachable target
413
+ # ATN states in SLL implies LL will also get nowhere.
414
+ # If conflict in states that dip out, choose min since we
415
+ # will get error no matter what.
416
+ e = self.noViableAlt(input, outerContext, previousD.configs, startIndex)
417
+ input.seek(startIndex)
418
+ alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext)
419
+ if alt!=ATN::INVALID_ALT_NUMBER
420
+ return alt
421
+ end
422
+ raise e
423
+ end
424
+ if cD.requiresFullContext and self.predictionMode != PredictionMode.SLL
425
+ # IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
426
+ conflictingAlts = nil
427
+ if cD.predicates then
428
+ if self.debug
429
+ print("DFA state has preds in DFA sim LL failover")
430
+ end
431
+ conflictIndex = input.index
432
+ if conflictIndex != startIndex
433
+ input.seek(startIndex)
434
+ end
435
+ conflictingAlts = self.evalSemanticContext(cD.predicates, outerContext, true)
436
+ if conflictingAlts.length==1
437
+ if self.debug
438
+ print("Full LL avoided")
439
+ end
440
+ return conflictingAlts.min
441
+ end
442
+ if conflictIndex != startIndex
443
+ # restore the index so reporting the fallback to full
444
+ # context occurs with the index at the correct spot
445
+ input.seek(conflictIndex)
446
+ end
447
+ end
448
+ if self.dfa_debug
449
+ print "ctx sensitive state #{outerContext} in #{cD}"
450
+ end
451
+ fullCtx = true
452
+ s0_closure = self.computeStartState(dfa.atnStartState, outerContext, fullCtx)
453
+ self.reportAttemptingFullContext(dfa, conflictingAlts, cD.configs, startIndex, input.index)
454
+ alt = self.execATNWithFullContext(dfa, cD, s0_closure, input, startIndex, outerContext)
455
+ return alt
456
+ end
457
+
458
+ if cD.isAcceptState
459
+ if cD.predicates.nil?
460
+ return cD.prediction
461
+ end
462
+ stopIndex = input.index
463
+ input.seek(startIndex)
464
+ alts = self.evalSemanticContext(cD.predicates, outerContext, true)
465
+ if alts.length==0
466
+ raise self.noViableAlt(input, outerContext, cD.configs, startIndex)
467
+ elsif alts.length==1
468
+ return alts.min
469
+ else
470
+ # report ambiguity after predicate evaluation to make sure the correct
471
+ # set of ambig alts is reported.
472
+ self.reportAmbiguity(dfa, cD, startIndex, stopIndex, false, alts, cD.configs)
473
+ return alts.min
474
+ end
475
+ end
476
+ previousD = cD
477
+
478
+ if t != Token::EOF
479
+ input.consume()
480
+ t = input.LA(1)
481
+ end
482
+ end
483
+ end
484
+ #
485
+ # Get an existing target state for an edge in the DFA. If the target state
486
+ # for the edge has not yet been computed or is otherwise not available,
487
+ # this method returns {@code null}.
488
+ #
489
+ # @param previousD The current DFA state
490
+ # @param t The next input symbol
491
+ # @return The existing target DFA state for the given input symbol
492
+ # {@code t}, or {@code null} if the target state for this edge is not
493
+ # already cached
494
+ #
495
+ def getExistingTargetState(previousD, t)
496
+ edges = previousD.edges
497
+ if edges.nil? or t + 1 < 0 or t + 1 >= edges.length
498
+ return nil
499
+ else
500
+ return edges[t + 1]
501
+ end
502
+ end
503
+ #
504
+ # Compute a target state for an edge in the DFA, and attempt to add the
505
+ # computed state and corresponding edge to the DFA.
506
+ #
507
+ # @param dfa The DFA
508
+ # @param previousD The current DFA state
509
+ # @param t The next input symbol
510
+ #
511
+ # @return The computed target DFA state for the given input symbol
512
+ # {@code t}. If {@code t} does not lead to a valid DFA state, this method
513
+ # returns {@link #ERROR}.
514
+ #
515
+ def computeTargetState(dfa, previousD, t)
516
+ reach = self.computeReachSet(previousD.configs, t, false)
517
+ if reach.nil?
518
+ self.addDFAEdge(dfa, previousD, t, ATNSimulator::ERROR)
519
+ return ATNSimulator::ERROR
520
+ end
521
+
522
+ # create new target state; we'll add to DFA after it's complete
523
+ cD = DFAState.new(nil,reach)
524
+
525
+ predictedAlt = self.getUniqueAlt(reach)
526
+
527
+ if self.debug
528
+ altSubSets = PredictionMode.getConflictingAltSubsets(reach)
529
+ puts "SLL altSubSets=#{altSubSets}, configs=#{reach
530
+ }, predict=#{predictedAlt
531
+ }, allSubsetsConflict=#{PredictionMode.allSubsetsConflict(altSubSets)
532
+ }, conflictingAlts=#{self.getConflictingAlts(reach)}"
533
+ end
534
+ if predictedAlt!=ATN::INVALID_ALT_NUMBER
535
+ # NO CONFLICT, UNIQUELY PREDICTED ALT
536
+ cD.isAcceptState = true
537
+ cD.configs.uniqueAlt = predictedAlt
538
+ cD.prediction = predictedAlt
539
+ elsif PredictionMode.hasSLLConflictTerminatingPrediction(self.predictionMode, reach)
540
+ # MORE THAN ONE VIABLE ALTERNATIVE
541
+ cD.configs.conflictingAlts = self.getConflictingAlts(reach)
542
+ cD.requiresFullContext = true
543
+ # in SLL-only mode, we will stop at this state and return the minimum alt
544
+ cD.isAcceptState = true
545
+ cD.prediction = cD.configs.conflictingAlts.min
546
+ end
547
+ if cD.isAcceptState and cD.configs.hasSemanticContext
548
+ self.predicateDFAState(cD, self.atn.getDecisionState(dfa.decision))
549
+ if cD.predicates then
550
+ cD.prediction = ATN::INVALID_ALT_NUMBER
551
+ end
552
+ end
553
+
554
+ # all adds to dfa are done after we've created full D state
555
+ cD = self.addDFAEdge(dfa, previousD, t, cD)
556
+ return cD
557
+ end
558
+ def predicateDFAState(dfaState, decisionState)
559
+ # We need to test all predicates, even in DFA states that
560
+ # uniquely predict alternative.
561
+ nalts = decisionState.transitions.length
562
+ # Update DFA so reach becomes accept state with (predicate,alt)
563
+ # pairs if preds found for conflicting alts
564
+ altsToCollectPredsFrom = self.getConflictingAltsOrUniqueAlt(dfaState.configs)
565
+ altToPred = self.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts)
566
+ if altToPred
567
+ dfaState.predicates = self.getPredicatePredictions(altsToCollectPredsFrom, altToPred)
568
+ dfaState.prediction = ATN::INVALID_ALT_NUMBER # make sure we use preds
569
+ else
570
+ # There are preds in configs but they might go away
571
+ # when OR'd together like {p}? || NONE == NONE. If neither
572
+ # alt has preds, resolve to min alt
573
+ dfaState.prediction = altsToCollectPredsFrom.min
574
+ end
575
+ end
576
+ # comes back with reach.uniqueAlt set to a valid alt
577
+ def execATNWithFullContext(dfa, cD, # how far we got before failing over
578
+ s0, input, startIndex, outerContext)
579
+ if self.debug or self.debug_list_atn_decisions
580
+ print "execATNWithFullContext #{s0}"
581
+ end
582
+ fullCtx = true
583
+ foundExactAmbig = false
584
+ reach = nil
585
+ previous = s0
586
+ input.seek(startIndex)
587
+ t = input.LA(1)
588
+ predictedAlt = -1
589
+ while true do
590
+ reach = self.computeReachSet(previous, t, fullCtx)
591
+ if reach.nil?
592
+ # if any configs in previous dipped into outer context, that
593
+ # means that input up to t actually finished entry rule
594
+ # at least for LL decision. Full LL doesn't dip into outer
595
+ # so don't need special case.
596
+ # We will get an error no matter what so delay until after
597
+ # decision; better error message. Also, no reachable target
598
+ # ATN states in SLL implies LL will also get nowhere.
599
+ # If conflict in states that dip out, choose min since we
600
+ # will get error no matter what.
601
+ e = self.noViableAlt(input, outerContext, previous, startIndex)
602
+ input.seek(startIndex)
603
+ alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext)
604
+ if alt!=ATN::INVALID_ALT_NUMBER
605
+ return alt
606
+ else
607
+ raise e
608
+ end
609
+ end
610
+ altSubSets = PredictionMode.getConflictingAltSubsets(reach)
611
+ if self.debug
612
+ print "LL altSubSets=#{altSubSets}, predict=#{PredictionMode.getUniqueAlt(altSubSets)
613
+ }, resolvesToJustOneViableAlt=#{PredictionMode.resolvesToJustOneViableAlt(altSubSets)}"
614
+ end
615
+ reach.uniqueAlt = self.getUniqueAlt(reach)
616
+ # unique prediction?
617
+ if reach.uniqueAlt!=ATN::INVALID_ALT_NUMBER then
618
+ predictedAlt = reach.uniqueAlt
619
+ break
620
+ elsif self.predictionMode != PredictionMode.LL_EXACT_AMBIG_DETECTION
621
+ predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets)
622
+ if predictedAlt != ATN::INVALID_ALT_NUMBER
623
+ break
624
+ end
625
+ else
626
+ # In exact ambiguity mode, we never try to terminate early.
627
+ # Just keeps scarfing until we know what the conflict is
628
+ if PredictionMode.allSubsetsConflict(altSubSets) and PredictionMode.allSubsetsEqual(altSubSets)
629
+ foundExactAmbig = true
630
+ predictedAlt = PredictionMode.getSingleViableAlt(altSubSets)
631
+ break
632
+ end
633
+ # else there are multiple non-conflicting subsets or
634
+ # we're not sure what the ambiguity is yet.
635
+ # So, keep going.
636
+ end
637
+ previous = reach
638
+ if t != Token::EOF
639
+ input.consume()
640
+ t = input.LA(1)
641
+ end
642
+ end
643
+ # If the configuration set uniquely predicts an alternative,
644
+ # without conflict, then we know that it's a full LL decision
645
+ # not SLL.
646
+ if reach.uniqueAlt != ATN::INVALID_ALT_NUMBER
647
+ self.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index)
648
+ return predictedAlt
649
+ end
650
+ # We do not check predicates here because we have checked them
651
+ # on-the-fly when doing full context prediction.
652
+
653
+ #
654
+ # In non-exact ambiguity detection mode, we might actually be able to
655
+ # detect an exact ambiguity, but I'm not going to spend the cycles
656
+ # needed to check. We only emit ambiguity warnings in exact ambiguity
657
+ # mode.
658
+ #
659
+ # For example, we might know that we have conflicting configurations.
660
+ # But, that does not mean that there is no way forward without a
661
+ # conflict. It's possible to have nonconflicting alt subsets as in:
662
+
663
+ # altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}]
664
+
665
+ # from
666
+ #
667
+ # [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]),
668
+ # (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])]
669
+ #
670
+ # In this case, (17,1,[5 $]) indicates there is some next sequence that
671
+ # would resolve this without conflict to alternative 1. Any other viable
672
+ # next sequence, however, is associated with a conflict. We stop
673
+ # looking for input because no amount of further lookahead will alter
674
+ # the fact that we should predict alternative 1. We just can't say for
675
+ # sure that there is an ambiguity without looking further.
676
+
677
+ self.reportAmbiguity(dfa, cD, startIndex, input.index, foundExactAmbig, nil, reach)
678
+
679
+ return predictedAlt
680
+ end
681
+ def computeReachSet(closure, t, fullCtx)
682
+ if self.debug
683
+ print "in computeReachSet, starting closure: #{closure}"
684
+ end
685
+
686
+ if self.mergeCache.nil?
687
+ self.mergeCache = Hash.new
688
+ end
689
+
690
+ intermediate = ATNConfigSet.new(fullCtx)
691
+
692
+ # Configurations already in a rule stop state indicate reaching the end
693
+ # of the decision rule (local context) or end of the start rule (full
694
+ # context). Once reached, these configurations are never updated by a
695
+ # closure operation, so they are handled separately for the performance
696
+ # advantage of having a smaller intermediate set when calling closure.
697
+ #
698
+ # For full-context reach operations, separate handling is required to
699
+ # ensure that the alternative matching the longest overall sequence is
700
+ # chosen when multiple such configurations can match the input.
701
+
702
+ skippedStopStates = nil
703
+
704
+ # First figure out where we can reach on input t
705
+ closure.each do |c|
706
+ if self.debug
707
+ puts "testing #{self.getTokenName(t)} at #{c}"
708
+ end
709
+
710
+ if c.state.kind_of? RuleStopState then
711
+ #assert c.context.isEmpty()
712
+ if fullCtx or t == Token::EOF
713
+ if skippedStopStates.nil?
714
+ skippedStopStates = Array.new
715
+ end
716
+ skippedStopStates.push(c)
717
+ end
718
+ next
719
+ end
720
+ #for trans in c.state.transitions do
721
+ c.state.transitions.each do |trans|
722
+ target = self.getReachableTarget(trans, t)
723
+ if target
724
+ intermediate.add(ATNConfig.createConfigState(c,target), self.mergeCache)
725
+ end
726
+ end
727
+ end
728
+ # Now figure out where the reach operation can take us...
729
+
730
+ reach = nil
731
+
732
+ # This block optimizes the reach operation for intermediate sets which
733
+ # trivially indicate a termination state for the overall
734
+ # adaptivePredict operation.
735
+ #
736
+ # The conditions assume that intermediate
737
+ # contains all configurations relevant to the reach set, but this
738
+ # condition is not true when one or more configurations have been
739
+ # withheld in skippedStopStates.
740
+ #
741
+ if skippedStopStates.nil?
742
+ if intermediate.length==1
743
+ # Don't pursue the closure if there is just one state.
744
+ # It can only have one alternative; just add to result
745
+ # Also don't pursue the closure if there is unique alternative
746
+ # among the configurations.
747
+ reach = intermediate
748
+ elsif self.getUniqueAlt(intermediate)!=ATN::INVALID_ALT_NUMBER
749
+ # Also don't pursue the closure if there is unique alternative
750
+ # among the configurations.
751
+ reach = intermediate
752
+ end
753
+ end
754
+ # If the reach set could not be trivially determined, perform a closure
755
+ # operation on the intermediate set to compute its initial value.
756
+ #
757
+ if reach.nil?
758
+ reach = ATNConfigSet.new(fullCtx)
759
+ closureBusy = Set.new()
760
+ treatEofAsEpsilon = t == Token::EOF
761
+ intermediate.each {|c|
762
+ self.closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon)
763
+ }
764
+ end
765
+ if t == Token::EOF
766
+ # After consuming EOF no additional input is possible, so we are
767
+ # only interested in configurations which reached the end of the
768
+ # decision rule (local context) or end of the start rule (full
769
+ # context). Update reach to contain only these configurations. This
770
+ # handles both explicit EOF transitions in the grammar and implicit
771
+ # EOF transitions following the end of the decision or start rule.
772
+ #
773
+ # When reach==intermediate, no closure operation was performed. In
774
+ # this case, removeAllConfigsNotInRuleStopState needs to check for
775
+ # reachable rule stop states as well as configurations already in
776
+ # a rule stop state.
777
+ #
778
+ # This is handled before the configurations in skippedStopStates,
779
+ # because any configurations potentially added from that list are
780
+ # already guaranteed to meet this condition whether or not it's
781
+ # required.
782
+ #
783
+ reach = self.removeAllConfigsNotInRuleStopState(reach, reach.equal?(intermediate))
784
+ end
785
+ # If skippedStopStates is not null, then it contains at least one
786
+ # configuration. For full-context reach operations, these
787
+ # configurations reached the end of the start rule, in which case we
788
+ # only add them back to reach if no configuration during the current
789
+ # closure operation reached such a state. This ensures adaptivePredict
790
+ # chooses an alternative matching the longest overall sequence when
791
+ # multiple alternatives are viable.
792
+ #
793
+ if skippedStopStates and ( (not fullCtx) or (not PredictionMode.hasConfigInRuleStopState(reach)))
794
+ #assert len(skippedStopStates)>0
795
+ skippedStopStates.each {|c| reach.add(c, self.mergeCache) }
796
+ end
797
+ if reach.empty?
798
+ return nil
799
+ else
800
+ return reach
801
+ end
802
+ end
803
+ #
804
+ # Return a configuration set containing only the configurations from
805
+ # {@code configs} which are in a {@link RuleStopState}. If all
806
+ # configurations in {@code configs} are already in a rule stop state, this
807
+ # method simply returns {@code configs}.
808
+ #
809
+ # <p>When {@code lookToEndOfRule} is true, this method uses
810
+ # {@link ATN#nextTokens} for each configuration in {@code configs} which is
811
+ # not already in a rule stop state to see if a rule stop state is reachable
812
+ # from the configuration via epsilon-only transitions.</p>
813
+ #
814
+ # @param configs the configuration set to update
815
+ # @param lookToEndOfRule when true, this method checks for rule stop states
816
+ # reachable by epsilon-only transitions from each configuration in
817
+ # {@code configs}.
818
+ #
819
+ # @return {@code configs} if all configurations in {@code configs} are in a
820
+ # rule stop state, otherwise return a new configuration set containing only
821
+ # the configurations from {@code configs} which are in a rule stop state
822
+ #
823
+ def removeAllConfigsNotInRuleStopState(configs, lookToEndOfRule)
824
+ if PredictionMode.allConfigsInRuleStopStates(configs)
825
+ return configs
826
+ end
827
+ result = ATNConfigSet.new(configs.fullCtx)
828
+ configs.each do |config|
829
+ if config.state.kind_of? RuleStopState then
830
+ result.add(config, self.mergeCache)
831
+ next
832
+ end
833
+ if lookToEndOfRule and config.state.epsilonOnlyTransitions
834
+ nextTokens = self.atn.nextTokens(config.state)
835
+ if nextTokens.member? Token::EPSILON then
836
+ endOfRuleState = self.atn.ruleToStopState[config.state.ruleIndex]
837
+ result.add(ATNConfig.new(endOfRuleState, nil, nil, nil, config), self.mergeCache)
838
+ end
839
+ end
840
+ end
841
+ return result
842
+ end
843
+ def computeStartState(p, ctx, fullCtx)
844
+ type_check(p, ATNState)
845
+ type_check(ctx, RuleContext)
846
+
847
+ # always at least the implicit call to start rule
848
+ initialContext = PredictionContextFromRuleContext(self.atn, ctx)
849
+ configs = ATNConfigSet.new(fullCtx)
850
+
851
+ p.transitions.each_index do |i|
852
+ target = p.transitions[i].target
853
+ c = ATNConfig.new(target, i+1, initialContext)
854
+ closureBusy = Set.new
855
+ self.closure(c, configs, closureBusy, true, fullCtx, false)
856
+ end
857
+ return configs
858
+ end
859
+ #
860
+ # This method transforms the start state computed by
861
+ # {@link #computeStartState} to the special start state used by a
862
+ # precedence DFA for a particular precedence value. The transformation
863
+ # process applies the following changes to the start state's configuration
864
+ # set.
865
+ #
866
+ # <ol>
867
+ # <li>Evaluate the precedence predicates for each configuration using
868
+ # {@link SemanticContext#evalPrecedence}.</li>
869
+ # <li>Remove all configurations which predict an alternative greater than
870
+ # 1, for which another configuration that predicts alternative 1 is in the
871
+ # same ATN state with the same prediction context. This transformation is
872
+ # valid for the following reasons:
873
+ # <ul>
874
+ # <li>The closure block cannot contain any epsilon transitions which bypass
875
+ # the body of the closure, so all states reachable via alternative 1 are
876
+ # part of the precedence alternatives of the transformed left-recursive
877
+ # rule.</li>
878
+ # <li>The "primary" portion of a left recursive rule cannot contain an
879
+ # epsilon transition, so the only way an alternative other than 1 can exist
880
+ # in a state that is also reachable via alternative 1 is by nesting calls
881
+ # to the left-recursive rule, with the outer calls not being at the
882
+ # preferred precedence level.</li>
883
+ # </ul>
884
+ # </li>
885
+ # </ol>
886
+ #
887
+ # <p>
888
+ # The prediction context must be considered by this filter to address
889
+ # situations like the following.
890
+ # </p>
891
+ # <code>
892
+ # <pre>
893
+ # grammar TA;
894
+ # prog: statement* EOF;
895
+ # statement: letterA | statement letterA 'b' ;
896
+ # letterA: 'a';
897
+ # </pre>
898
+ # </code>
899
+ # <p>
900
+ # If the above grammar, the ATN state immediately before the token
901
+ # reference {@code 'a'} in {@code letterA} is reachable from the left edge
902
+ # of both the primary and closure blocks of the left-recursive rule
903
+ # {@code statement}. The prediction context associated with each of these
904
+ # configurations distinguishes between them, and prevents the alternative
905
+ # which stepped out to {@code prog} (and then back in to {@code statement}
906
+ # from being eliminated by the filter.
907
+ # </p>
908
+ #
909
+ # @param configs The configuration set computed by
910
+ # {@link #computeStartState} as the start state for the DFA.
911
+ # @return The transformed configuration set representing the start state
912
+ # for a precedence DFA at a particular precedence level (determined by
913
+ # calling {@link Parser#getPrecedence}).
914
+ #
915
+ def applyPrecedenceFilter(configs)
916
+ statesFromAlt1 = Hash.new
917
+ configSet = ATNConfigSet.new(configs.fullCtx)
918
+ configs.each do |config|
919
+ # handle alt 1 first
920
+ next if config.alt != 1
921
+
922
+ updatedContext = config.semanticContext.evalPrecedence(self.parser, self.outerContext)
923
+ next if updatedContext.nil? # the configuration was eliminated
924
+
925
+ statesFromAlt1[config.state.stateNumber] = config.context
926
+ if updatedContext != config.semanticContext
927
+ configSet.add(ATNConfig.new(nil,nil,nil, updatedContext, config), self.mergeCache)
928
+ else
929
+ configSet.add(config, self.mergeCache)
930
+ end
931
+ end
932
+ configs.each do |config|
933
+ next if config.alt == 1 # already handled
934
+
935
+ # In the future, this elimination step could be updated to also
936
+ # filter the prediction context for alternatives predicting alt>1
937
+ # (basically a graph subtraction algorithm).
938
+ #
939
+ context = statesFromAlt1[config.state.stateNumber]
940
+ next if context==config.context # eliminated
941
+
942
+ configSet.add(config, self.mergeCache)
943
+ end
944
+ return configSet
945
+ end
946
+ def getReachableTarget(trans, ttype)
947
+ if trans.matches(ttype, 0, self.atn.maxTokenType)
948
+ return trans.target
949
+ else
950
+ return nil
951
+ end
952
+ end
953
+
954
+ def getPredsForAmbigAlts(ambigAlts, configs, nalts)
955
+ # REACH=[1|1|[]|0:0, 1|2|[]|0:1]
956
+ # altToPred starts as an array of all null contexts. The entry at index i
957
+ # corresponds to alternative i. altToPred[i] may have one of three values:
958
+ # 1. null: no ATNConfig c is found such that c.alt==i
959
+ # 2. SemanticContext.NONE: At least one ATNConfig c exists such that
960
+ # c.alt==i and c.semanticContext==SemanticContext.NONE. In other words,
961
+ # alt i has at least one unpredicated config.
962
+ # 3. Non-NONE Semantic Context: There exists at least one, and for all
963
+ # ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE.
964
+ #
965
+ # From this, it is clear that NONE||anything==NONE.
966
+ #
967
+ altToPred = Array.new(nalts + 1)
968
+ configs.each do |c|
969
+ if ambigAlts.member? c.alt
970
+ altToPred[c.alt] = SemanticContext.orContext(altToPred[c.alt], c.semanticContext)
971
+ end
972
+ end
973
+
974
+ nPredAlts = 0
975
+ for i in 1..nalts do
976
+ if altToPred[i].nil?
977
+ altToPred[i] = SemanticContext.NONE
978
+ elsif ! altToPred[i].equal? SemanticContext.NONE
979
+ nPredAlts = nPredAlts + 1
980
+ end
981
+ end
982
+ # nonambig alts are null in altToPred
983
+ if nPredAlts==0
984
+ altToPred = nil
985
+ end
986
+ if self.debug
987
+ puts "getPredsForAmbigAlts result #{altToPred}"
988
+ end
989
+ return altToPred
990
+ end
991
+ def getPredicatePredictions(ambigAlts, altToPred)
992
+ pairs = Array.new
993
+ containsPredicate = false
994
+
995
+ altToPred.each_index do |i|
996
+ pred = altToPred[i]
997
+ # unpredicated is indicated by SemanticContext.NONE
998
+ # assert pred is not None
999
+ if ambigAlts and ambigAlts.member? i
1000
+ pairs.push(PredPrediction.new(pred, i))
1001
+ end
1002
+ if ! pred.equal?(SemanticContext.NONE) then
1003
+ containsPredicate = true
1004
+ end
1005
+ end
1006
+ if not containsPredicate
1007
+ return nil
1008
+ end
1009
+ return pairs
1010
+ end
1011
+ #
1012
+ # This method is used to improve the localization of error messages by
1013
+ # choosing an alternative rather than throwing a
1014
+ # {@link NoViableAltException} in particular prediction scenarios where the
1015
+ # {@link #ERROR} state was reached during ATN simulation.
1016
+ #
1017
+ # <p>
1018
+ # The default implementation of this method uses the following
1019
+ # algorithm to identify an ATN configuration which successfully parsed the
1020
+ # decision entry rule. Choosing such an alternative ensures that the
1021
+ # {@link ParserRuleContext} returned by the calling rule will be complete
1022
+ # and valid, and the syntax error will be reported later at a more
1023
+ # localized location.</p>
1024
+ #
1025
+ # <ul>
1026
+ # <li>If a syntactically valid path or paths reach the end of the decision rule and
1027
+ # they are semantically valid if predicated, return the min associated alt.</li>
1028
+ # <li>Else, if a semantically invalid but syntactically valid path exist
1029
+ # or paths exist, return the minimum associated alt.
1030
+ # </li>
1031
+ # <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li>
1032
+ # </ul>
1033
+ #
1034
+ # <p>
1035
+ # In some scenarios, the algorithm described above could predict an
1036
+ # alternative which will result in a {@link FailedPredicateException} in
1037
+ # the parser. Specifically, this could occur if the <em>only</em> configuration
1038
+ # capable of successfully parsing to the end of the decision rule is
1039
+ # blocked by a semantic predicate. By choosing this alternative within
1040
+ # {@link #adaptivePredict} instead of throwing a
1041
+ # {@link NoViableAltException}, the resulting
1042
+ # {@link FailedPredicateException} in the parser will identify the specific
1043
+ # predicate which is preventing the parser from successfully parsing the
1044
+ # decision rule, which helps developers identify and correct logic errors
1045
+ # in semantic predicates.
1046
+ # </p>
1047
+ #
1048
+ # @param configs The ATN configurations which were valid immediately before
1049
+ # the {@link #ERROR} state was reached
1050
+ # @param outerContext The is the \gamma_0 initial parser context from the paper
1051
+ # or the parser stack at the instant before prediction commences.
1052
+ #
1053
+ # @return The value to return from {@link #adaptivePredict}, or
1054
+ # {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not
1055
+ # identified and {@link #adaptivePredict} should report an error instead.
1056
+ #
1057
+ def getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs, outerContext)
1058
+ semValidConfigs, semInvalidConfigs = self.splitAccordingToSemanticValidity(configs, outerContext)
1059
+ alt = self.getAltThatFinishedDecisionEntryRule(semValidConfigs)
1060
+ if alt!=ATN::INVALID_ALT_NUMBER # semantically/syntactically viable path exists
1061
+ return alt
1062
+ end
1063
+ # Is there a syntactically valid path with a failed pred?
1064
+ if semInvalidConfigs.length>0
1065
+ alt = self.getAltThatFinishedDecisionEntryRule(semInvalidConfigs)
1066
+ if alt!=ATN::INVALID_ALT_NUMBER # syntactically viable path exists
1067
+ return alt
1068
+ end
1069
+ end
1070
+ return ATN::INVALID_ALT_NUMBER
1071
+ end
1072
+ def getAltThatFinishedDecisionEntryRule(configs)
1073
+ alts = Set.new()
1074
+ configs.each do |c|
1075
+ if c.reachesIntoOuterContext>0 or (c.state.kind_of? RuleStopState and c.context.hasEmptyPath() )
1076
+ alts.add(c.alt)
1077
+ end
1078
+ end
1079
+ if alts.empty?
1080
+ return ATN::INVALID_ALT_NUMBER
1081
+ else
1082
+ return alts.min
1083
+ end
1084
+ end
1085
+ # Walk the list of configurations and split them according to
1086
+ # those that have preds evaluating to true/false. If no pred, assume
1087
+ # true pred and include in succeeded set. Returns Pair of sets.
1088
+ #
1089
+ # Create a new set so as not to alter the incoming parameter.
1090
+ #
1091
+ # Assumption: the input stream has been restored to the starting point
1092
+ # prediction, which is where predicates need to evaluate.
1093
+ #
1094
+ def splitAccordingToSemanticValidity(configs, outerContext)
1095
+ succeeded = ATNConfigSet.new(configs.fullCtx)
1096
+ failed = ATNConfigSet.new(configs.fullCtx)
1097
+ configs.each do |c|
1098
+ if ! c.semanticContext.equal? SemanticContext.NONE
1099
+ predicateEvaluationResult = c.semanticContext.eval(self.parser, outerContext)
1100
+ if predicateEvaluationResult
1101
+ succeeded.add(c)
1102
+ else
1103
+ failed.add(c)
1104
+ end
1105
+ else
1106
+ succeeded.add(c)
1107
+ end
1108
+ end
1109
+ return [succeeded,failed]
1110
+ end
1111
+ # Look through a list of predicate/alt pairs, returning alts for the
1112
+ # pairs that win. A {@code NONE} predicate indicates an alt containing an
1113
+ # unpredicated config which behaves as "always true." If !complete
1114
+ # then we stop at the first predicate that evaluates to true. This
1115
+ # includes pairs with null predicates.
1116
+ #
1117
+ def evalSemanticContext( predPredictions, outerContext, complete)
1118
+ predictions = Set.new()
1119
+
1120
+ predPredictions.each do |pair|
1121
+ if pair.pred.equal? SemanticContext.NONE
1122
+ predictions.add(pair.alt)
1123
+ break if not complete
1124
+ next
1125
+ end
1126
+ predicateEvaluationResult = pair.pred.eval(self.parser, outerContext)
1127
+ if self.debug or self.dfa_debug
1128
+ puts "eval pred #{pair}=#{predicateEvaluationResult}"
1129
+ end
1130
+ if predicateEvaluationResult
1131
+ if self.debug or self.dfa_debug
1132
+ puts "PREDICT #{pair.alt}"
1133
+ end
1134
+ predictions.add(pair.alt)
1135
+ break if not complete
1136
+ end
1137
+ end
1138
+ return predictions
1139
+ end
1140
+ # TODO: If we are doing predicates, there is no point in pursuing
1141
+ # closure operations if we reach a DFA state that uniquely predicts
1142
+ # alternative. We will not be caching that DFA state and it is a
1143
+ # waste to pursue the closure. Might have to advance when we do
1144
+ # ambig detection thought :(
1145
+ #
1146
+
1147
+ def closure(config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon)
1148
+ initialDepth = 0
1149
+ self.closureCheckingStopState(config, configs, closureBusy, collectPredicates,
1150
+ fullCtx, initialDepth, treatEofAsEpsilon)
1151
+ #assert not fullCtx or not configs.dipsIntoOuterContext
1152
+ end
1153
+
1154
+
1155
+ def closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
1156
+ if self.debug
1157
+ puts "closure(#{config.toString(self.parser,true)})"
1158
+ end
1159
+
1160
+ if config.state.kind_of? RuleStopState then
1161
+ # We hit rule end. If we have context info, use it
1162
+ # run thru all possible stack tops in ctx
1163
+ if not config.context.isEmpty() then
1164
+ # for i in range(0, len(config.context)):
1165
+ 0.upto(config.context.length - 1).each do |i|
1166
+ if config.context.getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE
1167
+ if fullCtx
1168
+ configs.add(ATNConfig.new(config.state,nil,PredictionContext.EMPTY,nil,config), self.mergeCache)
1169
+ next
1170
+ else
1171
+ # we have no context info, just chase follow links (if greedy)
1172
+ if self.debug
1173
+ puts "FALLING off rule " + self.getRuleName(config.state.ruleIndex)
1174
+ end
1175
+ self.closure_(config, configs, closureBusy, collectPredicates,
1176
+ fullCtx, depth, treatEofAsEpsilon)
1177
+ end
1178
+ next
1179
+ end
1180
+ returnState = self.atn.states[config.context.getReturnState(i)]
1181
+ newContext = config.context.getParent(i) # "pop" return state
1182
+ c = ATNConfig.new(returnState, config.alt, newContext, config.semanticContext)
1183
+ # While we have context to pop back from, we may have
1184
+ # gotten that context AFTER having falling off a rule.
1185
+ # Make sure we track that we are now out of context.
1186
+ c.reachesIntoOuterContext = config.reachesIntoOuterContext
1187
+ # assert depth > - 2**63
1188
+ self.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon)
1189
+ end
1190
+ return
1191
+ elsif fullCtx
1192
+ # reached end of start rule
1193
+ configs.add(config, self.mergeCache)
1194
+ return
1195
+ else
1196
+ # else if we have no context info, just chase follow links (if greedy)
1197
+ if self.debug
1198
+ puts "FALLING off rule #{self.getRuleName(config.state.ruleIndex)}"
1199
+ end
1200
+ end
1201
+ end
1202
+ self.closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
1203
+ end
1204
+ # Do the actual work of walking epsilon edges#
1205
+ def closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
1206
+ p = config.state
1207
+ # optimization
1208
+ if not p.epsilonOnlyTransitions
1209
+ configs.add(config, self.mergeCache)
1210
+ # make sure to not return here, because EOF transitions can act as
1211
+ # both epsilon transitions and non-epsilon transitions.
1212
+ end
1213
+ p.transitions.each do |t|
1214
+ continueCollecting = collectPredicates and not t.kind_of? ActionTransition
1215
+ c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
1216
+ if c
1217
+ newDepth = depth
1218
+ if config.state.kind_of? RuleStopState
1219
+ #assert not fullCtx
1220
+ # target fell off end of rule; mark resulting c as having dipped into outer context
1221
+ # We can't get here if incoming config was rule stop and we had context
1222
+ # track how far we dip into outer context. Might
1223
+ # come in handy and we avoid evaluating context dependent
1224
+ # preds if this is > 0.
1225
+ if closureBusy.member? c
1226
+ # avoid infinite recursion for right-recursive rules
1227
+ next
1228
+ end
1229
+ closureBusy.add(c)
1230
+
1231
+ # if @_dfa && @_dfa.isPrecedenceDfa() then
1232
+ # outermostPrecedenceReturn = t.outermostPrecedenceReturn()
1233
+ # if outermostPrecedenceReturn == @_dfa.atnStartState.ruleIndex then
1234
+ # c.setPrecedenceFilterSuppressed(true)
1235
+ # end
1236
+ # end
1237
+ # if (_dfa != null && _dfa.isPrecedenceDfa()) {
1238
+ # int outermostPrecedenceReturn = ((EpsilonTransition)t).outermostPrecedenceReturn();
1239
+ # if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) {
1240
+ # c.setPrecedenceFilterSuppressed(true);
1241
+ # }
1242
+ # }
1243
+
1244
+
1245
+
1246
+ c.reachesIntoOuterContext =c.reachesIntoOuterContext + 1
1247
+ configs.dipsIntoOuterContext = true # TODO: can remove? only care when we add to set per middle of this method
1248
+ # !assert newDepth > - 2**63
1249
+ newDepth = newDepth - 1
1250
+ puts "dips into outer ctx: #{c}" if self.debug
1251
+ elsif t.kind_of? RuleTransition
1252
+ # latch when newDepth goes negative - once we step out of the entry context we can't return
1253
+ if newDepth >= 0
1254
+ newDepth =newDepth + 1
1255
+ end
1256
+ end
1257
+
1258
+ self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
1259
+ end
1260
+ end
1261
+ end
1262
+
1263
+ def getRuleName(index)
1264
+ if self.parser and index>=0
1265
+ return self.parser.ruleNames[index]
1266
+ else
1267
+ return "<rule #{index}>"
1268
+ end
1269
+ end
1270
+
1271
+ def getEpsilonTarget(config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon)
1272
+ tt = t.serializationType
1273
+ case tt
1274
+ when Transition::RULE
1275
+ return self.ruleTransition(config, t)
1276
+ when Transition::PRECEDENCE
1277
+ return self.precedenceTransition(config, t, collectPredicates, inContext, fullCtx)
1278
+ when Transition::PREDICATE
1279
+ return self.predTransition(config, t, collectPredicates, inContext, fullCtx)
1280
+ when Transition::ACTION
1281
+ return self.actionTransition(config, t)
1282
+ when Transition::EPSILON
1283
+ return ATNConfig.new(t.target,nil,nil,nil, config)
1284
+ else
1285
+ if [ Transition::ATOM, Transition::RANGE, Transition::SET ].member?(tt) then
1286
+ # EOF transitions act like epsilon transitions after the first EOF
1287
+ # transition is traversed
1288
+ # if treatEofAsEpsilon then
1289
+ # if t.matches(Token::EOF, 0, 1) then
1290
+ # return ATNConfig.createConfigState(config, t.target)
1291
+ # end
1292
+ # end
1293
+ if treatEofAsEpsilon and t.matches(Token::EOF, 0, 1) then
1294
+ return ATNConfig.createConfigState(config, t.target)
1295
+ end
1296
+ end
1297
+ return nil
1298
+ end
1299
+ end
1300
+ def actionTransition(config, t)
1301
+ if self.debug
1302
+ puts "ACTION edge #{t.ruleIndex}:#{t.actionIndex}"
1303
+ end
1304
+ return ATNConfig.new(t.target,nil,nil,nil, config)
1305
+ end
1306
+ def precedenceTransition(config, pt, collectPredicates, inContext, fullCtx)
1307
+ if self.debug
1308
+ puts "PRED (collectPredicates=#{collectPredicates}) #{pt.precedence}>=_p, ctx dependent=true"
1309
+ if self.parser
1310
+ puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
1311
+ end
1312
+ end
1313
+ c = nil
1314
+ if collectPredicates and inContext
1315
+ if fullCtx
1316
+ # In full context mode, we can evaluate predicates on-the-fly
1317
+ # during closure, which dramatically reduces the size of
1318
+ # the config sets. It also obviates the need to test predicates
1319
+ # later during conflict resolution.
1320
+ currentPosition = self.input.index
1321
+ self.input.seek(self.startIndex)
1322
+ predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
1323
+ self.input.seek(currentPosition)
1324
+ if predSucceeds
1325
+ c = ATNConfig.new(pt.target,nil,nil,nil,config) # no pred context
1326
+ end
1327
+ else
1328
+ newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
1329
+ c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
1330
+ end
1331
+ else
1332
+ c = ATNConfig.new(pt.target,nil,nil,nil,config)
1333
+ end
1334
+
1335
+ if self.debug
1336
+ puts "config from pred transition=#{c}"
1337
+ end
1338
+ return c
1339
+ end
1340
+ def predTransition(config, pt, collectPredicates, inContext, fullCtx)
1341
+ if self.debug
1342
+ puts "PRED (collectPredicates=#{collectPredicates}) #{pt.ruleIndex}:#{pt.predIndex}, ctx dependent=#{pt.isCtxDependent}"
1343
+ if self.parser
1344
+ puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
1345
+ end
1346
+ end
1347
+ c = nil
1348
+ if collectPredicates and (not pt.isCtxDependent or (pt.isCtxDependent and inContext))
1349
+ if fullCtx
1350
+ # In full context mode, we can evaluate predicates on-the-fly
1351
+ # during closure, which dramatically reduces the size of
1352
+ # the config sets. It also obviates the need to test predicates
1353
+ # later during conflict resolution.
1354
+ currentPosition = self.input.index
1355
+ self.input.seek(self.startIndex)
1356
+ predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
1357
+ self.input.seek(currentPosition)
1358
+ if predSucceeds
1359
+ c = ATNConfig.new(pt.target,nil,nil,nil, config) # no pred context
1360
+ end
1361
+ else
1362
+ newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
1363
+ c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
1364
+ end
1365
+ else
1366
+ c = ATNConfig.new(pt.target, nil,nil,nil,config)
1367
+ end
1368
+
1369
+ if self.debug
1370
+ puts "config from pred transition=#{c}"
1371
+ end
1372
+ return c
1373
+ end
1374
+ def ruleTransition(config, t)
1375
+ if self.debug
1376
+ puts "CALL rule #{self.getRuleName(t.target.ruleIndex) }, ctx=#{config.context}"
1377
+ end
1378
+ returnState = t.followState
1379
+ newContext = SingletonPredictionContext.create(config.context, returnState.stateNumber)
1380
+ return ATNConfig.new(t.target, nil,newContext, nil,config )
1381
+ end
1382
+ def getConflictingAlts(configs)
1383
+ altsets = PredictionMode.getConflictingAltSubsets(configs)
1384
+ return PredictionMode.getAlts(altsets)
1385
+ end
1386
+ # Sam pointed out a problem with the previous definition, v3, of
1387
+ # ambiguous states. If we have another state associated with conflicting
1388
+ # alternatives, we should keep going. For example, the following grammar
1389
+ #
1390
+ # s : (ID | ID ID?) ';' ;
1391
+ #
1392
+ # When the ATN simulation reaches the state before ';', it has a DFA
1393
+ # state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally
1394
+ # 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node
1395
+ # because alternative to has another way to continue, via [6|2|[]].
1396
+ # The key is that we have a single state that has config's only associated
1397
+ # with a single alternative, 2, and crucially the state transitions
1398
+ # among the configurations are all non-epsilon transitions. That means
1399
+ # we don't consider any conflicts that include alternative 2. So, we
1400
+ # ignore the conflict between alts 1 and 2. We ignore a set of
1401
+ # conflicting alts when there is an intersection with an alternative
1402
+ # associated with a single alt state in the state&rarr;config-list map.
1403
+ #
1404
+ # It's also the case that we might have two conflicting configurations but
1405
+ # also a 3rd nonconflicting configuration for a different alternative:
1406
+ # [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar:
1407
+ #
1408
+ # a : A | A | A B ;
1409
+ #
1410
+ # After matching input A, we reach the stop state for rule A, state 1.
1411
+ # State 8 is the state right before B. Clearly alternatives 1 and 2
1412
+ # conflict and no amount of further lookahead will separate the two.
1413
+ # However, alternative 3 will be able to continue and so we do not
1414
+ # stop working on this state. In the previous example, we're concerned
1415
+ # with states associated with the conflicting alternatives. Here alt
1416
+ # 3 is not associated with the conflicting configs, but since we can continue
1417
+ # looking for input reasonably, I don't declare the state done. We
1418
+ # ignore a set of conflicting alts when we have an alternative
1419
+ # that we still need to pursue.
1420
+ #
1421
+
1422
+ def getConflictingAltsOrUniqueAlt(configs)
1423
+ conflictingAlts = nil
1424
+ if configs.uniqueAlt!= ATN::INVALID_ALT_NUMBER
1425
+ conflictingAlts = Set.new()
1426
+ conflictingAlts.add(configs.uniqueAlt)
1427
+ else
1428
+ conflictingAlts = configs.conflictingAlts
1429
+ end
1430
+ return conflictingAlts
1431
+ end
1432
+ def getTokenName(t)
1433
+ if t==Token::EOF
1434
+ return "EOF"
1435
+ end
1436
+ if self.parser and self.parser.tokenNames then
1437
+ if t >= self.parser.tokenNames.length() then
1438
+ puts "#{t} ttype out of range: #{self.parser.tokenNames}"
1439
+ puts self.parser.getInputStream().getTokens().to_s
1440
+ else
1441
+ return self.parser.tokenNames[t] + "<#{t}>"
1442
+ end
1443
+ end
1444
+ return t.to_s
1445
+ end
1446
+ def getLookaheadName(input)
1447
+ return getTokenName(input.LA(1))
1448
+ end
1449
+ # Used for debugging in adaptivePredict around execATN but I cut
1450
+ # it out for clarity now that alg. works well. We can leave this
1451
+ # "dead" code for a bit.
1452
+ #
1453
+ def dumpDeadEndConfigs(nvae)
1454
+ print "dead end configs: "
1455
+ nvae.getDeadEndConfigs().each do |c|
1456
+ trans = "no edges"
1457
+ if c.state.transitions.length>0 then
1458
+ t = c.state.transitions[0]
1459
+ if t.kind_of? AtomTransition then
1460
+ trans = "Atom #{self.getTokenName(t.label)}"
1461
+ elsif t.kind_of? SetTransition then
1462
+ #trans = ("~" if neg else "")+"Set "+ str(t.set)
1463
+ if t.kind_of? NotSetTransition then
1464
+ neg = "~"
1465
+ else
1466
+ neg = ""
1467
+ end
1468
+ trans = "#{neg}Set #{t.set}"
1469
+ end
1470
+ end
1471
+ # STDERR.puts "#{c.toString(self.parser, true)}:#{trans}"
1472
+ end
1473
+ end
1474
+ def noViableAlt(input, outerContext, configs, startIndex)
1475
+ return NoViableAltException.new(self.parser, input, input.get(startIndex), input.LT(1), configs, outerContext)
1476
+ end
1477
+
1478
+ def getUniqueAlt(configs)
1479
+ alt = ATN::INVALID_ALT_NUMBER
1480
+ configs.each do |c|
1481
+ if alt == ATN::INVALID_ALT_NUMBER
1482
+ alt = c.alt # found first alt
1483
+ elsif c.alt!=alt
1484
+ return ATN::INVALID_ALT_NUMBER
1485
+ end
1486
+ end
1487
+ return alt
1488
+ end
1489
+ #
1490
+ # Add an edge to the DFA, if possible. This method calls
1491
+ # {@link #addDFAState} to ensure the {@code to} state is present in the
1492
+ # DFA. If {@code from} is {@code null}, or if {@code t} is outside the
1493
+ # range of edges that can be represented in the DFA tables, this method
1494
+ # returns without adding the edge to the DFA.
1495
+ #
1496
+ # <p>If {@code to} is {@code null}, this method returns {@code null}.
1497
+ # Otherwise, this method returns the {@link DFAState} returned by calling
1498
+ # {@link #addDFAState} for the {@code to} state.</p>
1499
+ #
1500
+ # @param dfa The DFA
1501
+ # @param from The source state for the edge
1502
+ # @param t The input symbol
1503
+ # @param to The target state for the edge
1504
+ #
1505
+ # @return If {@code to} is {@code null}, this method returns {@code null};
1506
+ # otherwise this method returns the result of calling {@link #addDFAState}
1507
+ # on {@code to}
1508
+ #
1509
+ def addDFAEdge(dfa, from_, t, to)
1510
+ if self.debug
1511
+ puts "EDGE #{from_} -> #{to} upon #{self.getTokenName(t)}"
1512
+ end
1513
+
1514
+ if to.nil?
1515
+ return nil
1516
+ end
1517
+
1518
+ to = self.addDFAState(dfa, to) # used existing if possible not incoming
1519
+ if from_.nil? or t < -1 or t > self.atn.maxTokenType
1520
+ return to
1521
+ end
1522
+
1523
+ if from_.edges.nil? then
1524
+ from_.edges = Array.new(self.atn.maxTokenType + 2)
1525
+ end
1526
+ from_.edges[t+1] = to # connect
1527
+
1528
+ if self.debug
1529
+ if self.parser.nil?
1530
+ names = nil
1531
+ else
1532
+ names = self.parser.tokenNames
1533
+ end
1534
+ print "DFA=\n#{dfa.toString(names)}"
1535
+ end
1536
+ return to
1537
+ end
1538
+ #
1539
+ # Add state {@code D} to the DFA if it is not already present, and return
1540
+ # the actual instance stored in the DFA. If a state equivalent to {@code D}
1541
+ # is already in the DFA, the existing state is returned. Otherwise this
1542
+ # method returns {@code D} after adding it to the DFA.
1543
+ #
1544
+ # <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and
1545
+ # does not change the DFA.</p>
1546
+ #
1547
+ # @param dfa The dfa
1548
+ # @param D The DFA state to add
1549
+ # @return The state stored in the DFA. This will be either the existing
1550
+ # state if {@code D} is already in the DFA, or {@code D} itself if the
1551
+ # state was not already present.
1552
+ #
1553
+ def addDFAState(dfa, cD)
1554
+ if cD.equal? ATNSimulator::ERROR
1555
+ return cD
1556
+ end
1557
+
1558
+ existing = dfa.states[cD]
1559
+ if existing
1560
+ return existing
1561
+ end
1562
+
1563
+ cD.stateNumber = dfa.states.length
1564
+ if not cD.configs.readonly
1565
+ cD.configs.optimizeConfigs(self)
1566
+ cD.configs.setReadonly(true)
1567
+ end
1568
+ dfa.states[cD] = cD
1569
+ if self.debug
1570
+ puts "adding new DFA state: #{cD}"
1571
+ end
1572
+ return cD
1573
+ end
1574
+ def reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex)
1575
+ if self.debug or self.retry_debug
1576
+ interval = startIndex..stopIndex
1577
+ puts "reportAttemptingFullContext decision=#{dfa.decision}:#{configs}, input=#{
1578
+ self.parser.getTokenStream().getText(interval)}"
1579
+ end
1580
+ if self.parser
1581
+ self.parser.getErrorListenerDispatch().reportAttemptingFullContext(self.parser, dfa, startIndex, stopIndex, conflictingAlts, configs)
1582
+ end
1583
+ end
1584
+ def reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex)
1585
+ if self.debug or self.retry_debug
1586
+ interval = startIndex..stopIndex
1587
+ puts "reportContextSensitivity decision=#{dfa.decision}:#{configs}, input=#{
1588
+ self.parser.getTokenStream().getText(interval)}"
1589
+ end
1590
+ if self.parser
1591
+ self.parser.getErrorListenerDispatch().reportContextSensitivity(self.parser, dfa, startIndex, stopIndex, prediction, configs)
1592
+ end
1593
+ end
1594
+
1595
+ # If context sensitive parsing, we know it's ambiguity not conflict#
1596
+ def reportAmbiguity(dfa, cD, startIndex, stopIndex, exact, ambigAlts, configs)
1597
+ if self.debug or self.retry_debug
1598
+ # ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn);
1599
+ # int i = 1;
1600
+ # for (Transition t : dfa.atnStartState.transitions) {
1601
+ # print("ALT "+i+"=");
1602
+ # print(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size());
1603
+ # TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(),
1604
+ # startIndex, stopIndex);
1605
+ # if ( path!=null ) {
1606
+ # print("path = "+path.toStringTree());
1607
+ # for (TraceTree leaf : path.leaves) {
1608
+ # List<ATNState> states = path.getPathToNode(leaf);
1609
+ # print("states="+states);
1610
+ # }
1611
+ # }
1612
+ # i++;
1613
+ # }
1614
+ interval = startIndex..stopIndex
1615
+ puts "reportAmbiguity #{ambigAlts}:#{configs}, input=#{
1616
+ self.parser.getTokenStream().getText(interval)}"
1617
+ end
1618
+ if self.parser
1619
+ self.parser.getErrorListenerDispatch().reportAmbiguity(self.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
1620
+ end
1621
+ end
1622
+ end