antlr4 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,311 @@
1
+
2
+ class LexerActionType
3
+ # include JavaSymbols
4
+ CHANNEL = 0 #The type of a {@link LexerChannelAction} action.
5
+ CUSTOM = 1 #The type of a {@link LexerCustomAction} action.
6
+ MODE = 2 #The type of a {@link LexerModeAction} action.
7
+ MORE = 3 #The type of a {@link LexerMoreAction} action.
8
+ POP_MODE = 4 #The type of a {@link LexerPopModeAction} action.
9
+ PUSH_MODE = 5 #The type of a {@link LexerPushModeAction} action.
10
+ SKIP = 6 #The type of a {@link LexerSkipAction} action.
11
+ TYPE = 7 #The type of a {@link LexerTypeAction} action.
12
+ end
13
+
14
+ class LexerAction
15
+
16
+ attr_accessor :actionType, :isPositionDependent
17
+ def initialize(action)
18
+ self.actionType = action
19
+ self.isPositionDependent = false
20
+ end
21
+
22
+ def hash
23
+ self.actionType.to_s.hash
24
+ end
25
+
26
+ def eql?(other)
27
+ self == other
28
+ end
29
+
30
+ def ==(other)
31
+ self.equal? other
32
+ end
33
+ end
34
+
35
+ #
36
+ # Implements the {@code skip} lexer action by calling {@link Lexer#skip}.
37
+ #
38
+ # <p>The {@code skip} command does not have any parameters, so this action is
39
+ # implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
40
+ class LexerSkipAction < LexerAction
41
+
42
+ # Provides a singleton instance of this parameterless lexer action.
43
+ @@INSTANCE = nil
44
+ def self.INSTANCE
45
+ if @@INSTANCE.nil?
46
+ @@INSTANCE = LexerSkipAction.new()
47
+ end
48
+ @@INSTANCE
49
+ end
50
+ def initialize()
51
+ super(LexerActionType::SKIP)
52
+ end
53
+
54
+ def execute(lexer)
55
+ lexer.skip()
56
+ end
57
+
58
+ def to_s
59
+ return "skip"
60
+ end
61
+ end
62
+
63
+ # Implements the {@code type} lexer action by calling {@link Lexer#setType}
64
+ # with the assigned type.
65
+ class LexerTypeAction < LexerAction
66
+
67
+ attr_accessor :type
68
+ def initialize(_type)
69
+ super(LexerActionType::TYPE)
70
+ self.type = _type
71
+ end
72
+ def execute(lexer)
73
+ lexer.type = self.type
74
+ end
75
+
76
+ def hash
77
+ return "#{self.actionType}#{self.type}".hash
78
+ end
79
+
80
+ def ==(other)
81
+ self.equal?(other) or other.kind_of?(LexerTypeAction) and self.type == other.type
82
+ end
83
+ def to_s
84
+ return "type(#{self.type})"
85
+ end
86
+ end
87
+
88
+ # Implements the {@code pushMode} lexer action by calling
89
+ # {@link Lexer#pushMode} with the assigned mode.
90
+ class LexerPushModeAction < LexerAction
91
+
92
+ attr_accessor :mode
93
+ def initialize(_mode)
94
+ super(LexerActionType::PUSH_MODE)
95
+ self.mode = _mode
96
+ end
97
+
98
+ # <p>This action is implemented by calling {@link Lexer#pushMode} with the
99
+ # value provided by {@link #getMode}.</p>
100
+ def execute(lexer)
101
+ lexer.pushMode(self.mode)
102
+ end
103
+
104
+ def hash
105
+ "#{self.actionType}#{self.mode}".hash
106
+ end
107
+
108
+ def ==(other)
109
+ self.equal?(other) or other.kind_of?(LexerPushModeAction) and self.mode == other.mode
110
+ end
111
+
112
+ def to_s
113
+ "pushMode(#{self.mode})"
114
+ end
115
+ end
116
+
117
+ # Implements the {@code popMode} lexer action by calling {@link Lexer#popMode}.
118
+ #
119
+ # <p>The {@code popMode} command does not have any parameters, so this action is
120
+ # implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
121
+ class LexerPopModeAction < LexerAction
122
+
123
+ @@INSTANCE = nil
124
+ def self.INSTANCE
125
+ @@INSTANCE = new() if @@INSTANCE.nil?
126
+ @@INSTANCE
127
+ end
128
+
129
+ def initialize
130
+ super(LexerActionType::POP_MODE)
131
+ end
132
+
133
+ # <p>This action is implemented by calling {@link Lexer#popMode}.</p>
134
+ def execute(lexer)
135
+ lexer.popMode()
136
+ end
137
+
138
+ def to_s
139
+ return "popMode"
140
+ end
141
+ end
142
+
143
+ # Implements the {@code more} lexer action by calling {@link Lexer#more}.
144
+ #
145
+ # <p>The {@code more} command does not have any parameters, so this action is
146
+ # implemented as a singleton instance exposed by {@link #INSTANCE}.</p>
147
+ class LexerMoreAction < LexerAction
148
+
149
+ @@INSTANCE = nil
150
+ def self.INSTANCE
151
+ @@INSTANCE = new() if @@INSTANCE.nil?
152
+ @@INSTANCE
153
+ end
154
+
155
+ def initialize
156
+ super(LexerActionType::MORE)
157
+ end
158
+
159
+ # <p>This action is implemented by calling {@link Lexer#popMode}.</p>
160
+ def execute(lexer)
161
+ lexer.more()
162
+ end
163
+
164
+ def to_s
165
+ return "more"
166
+ end
167
+ end
168
+
169
+ # Implements the {@code mode} lexer action by calling {@link Lexer#mode} with
170
+ # the assigned mode.
171
+ class LexerModeAction < LexerAction
172
+
173
+ attr_accessor :mode
174
+ def initialize(_mode)
175
+ super(LexerActionType::MODE)
176
+ self.mode = _mode
177
+ end
178
+
179
+ # <p>This action is implemented by calling {@link Lexer#mode} with the
180
+ # value provided by {@link #getMode}.</p>
181
+ def execute(lexer)
182
+ lexer.mode = self.mode
183
+ end
184
+
185
+ def hash
186
+ "#{self.actionType}#{self.mode}".hash
187
+ end
188
+
189
+ def ==(other)
190
+ self.equal?(other)or other.kind_of?(LexerModeAction)and self.mode == other.mode
191
+ end
192
+
193
+ def to_s
194
+ "mode(#{self.mode})"
195
+ end
196
+ end
197
+ # Executes a custom lexer action by calling {@link Recognizer#action} with the
198
+ # rule and action indexes assigned to the custom action. The implementation of
199
+ # a custom action is added to the generated code for the lexer in an override
200
+ # of {@link Recognizer#action} when the grammar is compiled.
201
+ #
202
+ # <p>This class may represent embedded actions created with the <code>{...}</code>
203
+ # syntax in ANTLR 4, as well as actions created for lexer commands where the
204
+ # command argument could not be evaluated when the grammar was compiled.</p>
205
+
206
+ class LexerCustomAction < LexerAction
207
+
208
+ # Constructs a custom lexer action with the specified rule and action
209
+ # indexes.
210
+ #
211
+ # @param ruleIndex The rule index to use for calls to
212
+ # {@link Recognizer#action}.
213
+ # @param actionIndex The action index to use for calls to
214
+ # {@link Recognizer#action}.
215
+ #/
216
+ attr_accessor :ruleIndex, :actionIndex, :isPositionDependent
217
+ def initialize(rule_index, action_index)
218
+ super(LexerActionType::CUSTOM)
219
+ @ruleIndex = rule_index
220
+ @actionIndex = action_index
221
+ @isPositionDependent = true
222
+ end
223
+ # <p>Custom actions are implemented by calling {@link Lexer#action} with the
224
+ # appropriate rule and action indexes.</p>
225
+ def execute(lexer)
226
+ lexer.action(nil, self.ruleIndex, self.actionIndex)
227
+ end
228
+ def hash
229
+ "#{self.actionType}#{self.ruleIndex}#{self.actionIndex}".hash
230
+ end
231
+
232
+ def ==( other)
233
+ self.equal?(other) or other.kind_of?( LexerCustomAction) \
234
+ and self.ruleIndex == other.ruleIndex and self.actionIndex == other.actionIndex
235
+ end
236
+ end
237
+ # Implements the {@code channel} lexer action by calling
238
+ # {@link Lexer#setChannel} with the assigned channel.
239
+ class LexerChannelAction < LexerAction
240
+
241
+ # Constructs a new {@code channel} action with the specified channel value.
242
+ # @param channel The channel value to pass to {@link Lexer#setChannel}.
243
+ attr_accessor :channel
244
+ def initialize(_channel)
245
+ super(LexerActionType::CHANNEL)
246
+ self.channel = _channel
247
+ end
248
+
249
+ # <p>This action is implemented by calling {@link Lexer#setChannel} with the
250
+ # value provided by {@link #getChannel}.</p>
251
+ def execute(lexer)
252
+ lexer.channel = self.channel
253
+ end
254
+ def hash
255
+ "#{self.actionType}#{self.channel}".hash
256
+ end
257
+
258
+ def ==(other)
259
+ self.equal?(other) or other.kind_of?(LexerChannelAction) \
260
+ and self.channel == other.channel
261
+ end
262
+
263
+ def to_s
264
+ return "channel(#{self.channel})"
265
+ end
266
+ end
267
+ # This implementation of {@link LexerAction} is used for tracking input offsets
268
+ # for position-dependent actions within a {@link LexerActionExecutor}.
269
+ #
270
+ # <p>This action is not serialized as part of the ATN, and is only required for
271
+ # position-dependent lexer actions which appear at a location other than the
272
+ # end of a rule. For more information about DFA optimizations employed for
273
+ # lexer actions, see {@link LexerActionExecutor#append} and
274
+ # {@link LexerActionExecutor#fixOffsetBeforeMatch}.</p>
275
+ class LexerIndexedCustomAction < LexerAction
276
+
277
+ # Constructs a new indexed custom action by associating a character offset
278
+ # with a {@link LexerAction}.
279
+ #
280
+ # <p>Note: This class is only required for lexer actions for which
281
+ # {@link LexerAction#isPositionDependent} returns {@code true}.</p>
282
+ #
283
+ # @param offset The offset into the input {@link CharStream}, relative to
284
+ # the token start index, at which the specified lexer action should be
285
+ # executed.
286
+ # @param action The lexer action to execute at a particular offset in the
287
+ # input {@link CharStream}.
288
+ attr_accessor :offset, :action, :isPositionDependent
289
+ def initialize(_offset, _action)
290
+ super(action.actionType)
291
+ self.offset = _offset
292
+ self.action = _action
293
+ self.isPositionDependent = true
294
+ end
295
+
296
+ # <p>This method calls {@link #execute} on the result of {@link #getAction}
297
+ # using the provided {@code lexer}.</p>
298
+ def execute(lexer)
299
+ # assume the input stream position was properly set by the calling code
300
+ self.action.execute(lexer)
301
+ end
302
+
303
+ def hash
304
+ "#{self.actionType}#{self.offset}#{self.action}".hash
305
+ end
306
+
307
+ def ==(other)
308
+ self.equal?(other) or other.kind_of?(LexerIndexedCustomAction) \
309
+ and self.offset == other.offset and self.action == other.action
310
+ end
311
+ end
@@ -0,0 +1,134 @@
1
+ # Represents an executor for a sequence of lexer actions which traversed during
2
+ # the matching operation of a lexer rule (token).
3
+ #
4
+ # <p>The executor tracks position information for position-dependent lexer actions
5
+ # efficiently, ensuring that actions appearing only at the end of the rule do
6
+ # not cause bloating of the {@link DFA} created for the lexer.</p>
7
+
8
+ class LexerActionExecutor
9
+
10
+ attr_accessor :hashCode, :lexerActions
11
+ def initialize(_lexerActions=Array.new)
12
+ @lexerActions = _lexerActions
13
+ # Caches the result of {@link #hashCode} since the hash code is an element
14
+ # of the performance-critical {@link LexerATNConfig#hashCode} operation.
15
+ @hashCode = self.lexerActions.map(&:to_s).join('').hash
16
+ end
17
+
18
+ # Creates a {@link LexerActionExecutor} which executes the actions for
19
+ # the input {@code lexerActionExecutor} followed by a specified
20
+ # {@code lexerAction}.
21
+ #
22
+ # @param lexerActionExecutor The executor for actions already traversed by
23
+ # the lexer while matching a token within a particular
24
+ # {@link LexerATNConfig}. If this is {@code null}, the method behaves as
25
+ # though it were an empty executor.
26
+ # @param lexerAction The lexer action to execute after the actions
27
+ # specified in {@code lexerActionExecutor}.
28
+ #
29
+ # @return A {@link LexerActionExecutor} for executing the combine actions
30
+ # of {@code lexerActionExecutor} and {@code lexerAction}.
31
+ def self.append(lexerActionExecutor, lexerAction)
32
+ if lexerActionExecutor.nil?
33
+ return LexerActionExecutor.new([ lexerAction ])
34
+ end
35
+ lexerActions = lexerActionExecutor.lexerActions.concat(lexerAction )
36
+ return LexerActionExecutor.new(lexerActions)
37
+ end
38
+
39
+ # Creates a {@link LexerActionExecutor} which encodes the current offset
40
+ # for position-dependent lexer actions.
41
+ #
42
+ # <p>Normally, when the executor encounters lexer actions where
43
+ # {@link LexerAction#isPositionDependent} returns {@code true}, it calls
44
+ # {@link IntStream#seek} on the input {@link CharStream} to set the input
45
+ # position to the <em>end</em> of the current token. This behavior provides
46
+ # for efficient DFA representation of lexer actions which appear at the end
47
+ # of a lexer rule, even when the lexer rule matches a variable number of
48
+ # characters.</p>
49
+ #
50
+ # <p>Prior to traversing a match transition in the ATN, the current offset
51
+ # from the token start index is assigned to all position-dependent lexer
52
+ # actions which have not already been assigned a fixed offset. By storing
53
+ # the offsets relative to the token start index, the DFA representation of
54
+ # lexer actions which appear in the middle of tokens remains efficient due
55
+ # to sharing among tokens of the same length, regardless of their absolute
56
+ # position in the input stream.</p>
57
+ #
58
+ # <p>If the current executor already has offsets assigned to all
59
+ # position-dependent lexer actions, the method returns {@code this}.</p>
60
+ #
61
+ # @param offset The current offset to assign to all position-dependent
62
+ # lexer actions which do not already have offsets assigned.
63
+ #
64
+ # @return A {@link LexerActionExecutor} which stores input stream offsets
65
+ # for all position-dependent lexer actions.
66
+ #/
67
+ def fixOffsetBeforeMatch(offset)
68
+ updatedLexerActions = nil
69
+ @lexerActions.each_index {|i|
70
+ if @lexerActions[i].isPositionDependent and not @lexerActions[i].kind_of?(LexerIndexedCustomAction) then
71
+ if updatedLexerActions.nil? then
72
+ updatedLexerActions = @lexerActions.map{|x| x}
73
+ end
74
+ updatedLexerActions[i] = LexerIndexedCustomAction.new(offset, @lexerActions[i])
75
+ end
76
+ }
77
+ if updatedLexerActions.nil?
78
+ return self
79
+ else
80
+ return LexerActionExecutor.new(updatedLexerActions)
81
+ end
82
+ end
83
+
84
+
85
+ # Execute the actions encapsulated by this executor within the context of a
86
+ # particular {@link Lexer}.
87
+ #
88
+ # <p>This method calls {@link IntStream#seek} to set the position of the
89
+ # {@code input} {@link CharStream} prior to calling
90
+ # {@link LexerAction#execute} on a position-dependent action. Before the
91
+ # method returns, the input position will be restored to the same position
92
+ # it was in when the method was invoked.</p>
93
+ #
94
+ # @param lexer The lexer instance.
95
+ # @param input The input stream which is the source for the current token.
96
+ # When this method is called, the current {@link IntStream#index} for
97
+ # {@code input} should be the start of the following token, i.e. 1
98
+ # character past the end of the current token.
99
+ # @param startIndex The token start index. This value may be passed to
100
+ # {@link IntStream#seek} to set the {@code input} position to the beginning
101
+ # of the token.
102
+ #/
103
+ def execute(lexer, input, startIndex)
104
+ requiresSeek = false
105
+ stopIndex = input.index
106
+ begin
107
+ self.lexerActions.each { |lexerAction|
108
+ if lexerAction.kind_of? LexerIndexedCustomAction
109
+ offset = lexerAction.offset
110
+ input.seek(startIndex + offset)
111
+ lexerAction = lexerAction.action
112
+ requiresSeek = (startIndex + offset) != stopIndex
113
+ elsif lexerAction.isPositionDependent
114
+ input.seek(stopIndex)
115
+ requiresSeek = false
116
+ end
117
+ lexerAction.execute(lexer)
118
+ }
119
+ ensure
120
+ input.seek(stopIndex) if requiresSeek
121
+ end
122
+ end
123
+ def hash
124
+ return self.hashCode
125
+ end
126
+
127
+ def eql?(other)
128
+ self == other
129
+ end
130
+ def ==( other)
131
+ self.equal?(other) or (other.kind_of?(LexerActionExecutor) and
132
+ self.hashCode == other.hashCode and self.lexerActions == other.lexerActions)
133
+ end
134
+ end
@@ -0,0 +1,1622 @@
1
+ #
2
+ # The embodiment of the adaptive LL(*), ALL(*), parsing strategy.
3
+ #
4
+ # <p>
5
+ # The basic complexity of the adaptive strategy makes it harder to understand.
6
+ # We begin with ATN simulation to build paths in a DFA. Subsequent prediction
7
+ # requests go through the DFA first. If they reach a state without an edge for
8
+ # the current symbol, the algorithm fails over to the ATN simulation to
9
+ # complete the DFA path for the current input (until it finds a conflict state
10
+ # or uniquely predicting state).</p>
11
+ #
12
+ # <p>
13
+ # All of that is done without using the outer context because we want to create
14
+ # a DFA that is not dependent upon the rule invocation stack when we do a
15
+ # prediction. One DFA works in all contexts. We avoid using context not
16
+ # necessarily because it's slower, although it can be, but because of the DFA
17
+ # caching problem. The closure routine only considers the rule invocation stack
18
+ # created during prediction beginning in the decision rule. For example, if
19
+ # prediction occurs without invoking another rule's ATN, there are no context
20
+ # stacks in the configurations. When lack of context leads to a conflict, we
21
+ # don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
22
+ # strategy (versus full LL(*)).</p>
23
+ #
24
+ # <p>
25
+ # When SLL yields a configuration set with conflict, we rewind the input and
26
+ # retry the ATN simulation, this time using full outer context without adding
27
+ # to the DFA. Configuration context stacks will be the full invocation stacks
28
+ # from the start rule. If we get a conflict using full context, then we can
29
+ # definitively say we have a true ambiguity for that input sequence. If we
30
+ # don't get a conflict, it implies that the decision is sensitive to the outer
31
+ # context. (It is not context-sensitive in the sense of context-sensitive
32
+ # grammars.)</p>
33
+ #
34
+ # <p>
35
+ # The next time we reach this DFA state with an SLL conflict, through DFA
36
+ # simulation, we will again retry the ATN simulation using full context mode.
37
+ # This is slow because we can't save the results and have to "interpret" the
38
+ # ATN each time we get that input.</p>
39
+ #
40
+ # <p>
41
+ # <strong>CACHING FULL CONTEXT PREDICTIONS</strong></p>
42
+ #
43
+ # <p>
44
+ # We could cache results from full context to predicted alternative easily and
45
+ # that saves a lot of time but doesn't work in presence of predicates. The set
46
+ # of visible predicates from the ATN start state changes depending on the
47
+ # context, because closure can fall off the end of a rule. I tried to cache
48
+ # tuples (stack context, semantic context, predicted alt) but it was slower
49
+ # than interpreting and much more complicated. Also required a huge amount of
50
+ # memory. The goal is not to create the world's fastest parser anyway. I'd like
51
+ # to keep this algorithm simple. By launching multiple threads, we can improve
52
+ # the speed of parsing across a large number of files.</p>
53
+ #
54
+ # <p>
55
+ # There is no strict ordering between the amount of input used by SLL vs LL,
56
+ # which makes it really hard to build a cache for full context. Let's say that
57
+ # we have input A B C that leads to an SLL conflict with full context X. That
58
+ # implies that using X we might only use A B but we could also use A B C D to
59
+ # resolve conflict. Input A B C D could predict alternative 1 in one position
60
+ # in the input and A B C E could predict alternative 2 in another position in
61
+ # input. The conflicting SLL configurations could still be non-unique in the
62
+ # full context prediction, which would lead us to requiring more input than the
63
+ # original A B C. To make a prediction cache work, we have to track the exact
64
+ # input used during the previous prediction. That amounts to a cache that maps
65
+ # X to a specific DFA for that context.</p>
66
+ #
67
+ # <p>
68
+ # Something should be done for left-recursive expression predictions. They are
69
+ # likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
70
+ # with full LL thing Sam does.</p>
71
+ #
72
+ # <p>
73
+ # <strong>AVOIDING FULL CONTEXT PREDICTION</strong></p>
74
+ #
75
+ # <p>
76
+ # We avoid doing full context retry when the outer context is empty, we did not
77
+ # dip into the outer context by falling off the end of the decision state rule,
78
+ # or when we force SLL mode.</p>
79
+ #
80
+ # <p>
81
+ # As an example of the not dip into outer context case, consider as super
82
+ # constructor calls versus function calls. One grammar might look like
83
+ # this:</p>
84
+ #
85
+ # <pre>
86
+ # ctorBody
87
+ # : '{' superCall? stat* '}'
88
+ # ;
89
+ # </pre>
90
+ #
91
+ # <p>
92
+ # Or, you might see something like</p>
93
+ #
94
+ # <pre>
95
+ # stat
96
+ # : superCall ';'
97
+ # | expression ';'
98
+ # | ...
99
+ # ;
100
+ # </pre>
101
+ #
102
+ # <p>
103
+ # In both cases I believe that no closure operations will dip into the outer
104
+ # context. In the first case ctorBody in the worst case will stop at the '}'.
105
+ # In the 2nd case it should stop at the ';'. Both cases should stay within the
106
+ # entry rule and not dip into the outer context.</p>
107
+ #
108
+ # <p>
109
+ # <strong>PREDICATES</strong></p>
110
+ #
111
+ # <p>
112
+ # Predicates are always evaluated if present in either SLL or LL both. SLL and
113
+ # LL simulation deals with predicates differently. SLL collects predicates as
114
+ # it performs closure operations like ANTLR v3 did. It delays predicate
115
+ # evaluation until it reaches and accept state. This allows us to cache the SLL
116
+ # ATN simulation whereas, if we had evaluated predicates on-the-fly during
117
+ # closure, the DFA state configuration sets would be different and we couldn't
118
+ # build up a suitable DFA.</p>
119
+ #
120
+ # <p>
121
+ # When building a DFA accept state during ATN simulation, we evaluate any
122
+ # predicates and return the sole semantically valid alternative. If there is
123
+ # more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
124
+ # we throw an exception. Alternatives without predicates act like they have
125
+ # true predicates. The simple way to think about it is to strip away all
126
+ # alternatives with false predicates and choose the minimum alternative that
127
+ # remains.</p>
128
+ #
129
+ # <p>
130
+ # When we start in the DFA and reach an accept state that's predicated, we test
131
+ # those and return the minimum semantically viable alternative. If no
132
+ # alternatives are viable, we throw an exception.</p>
133
+ #
134
+ # <p>
135
+ # During full LL ATN simulation, closure always evaluates predicates and
136
+ # on-the-fly. This is crucial to reducing the configuration set size during
137
+ # closure. It hits a landmine when parsing with the Java grammar, for example,
138
+ # without this on-the-fly evaluation.</p>
139
+ #
140
+ # <p>
141
+ # <strong>SHARING DFA</strong></p>
142
+ #
143
+ # <p>
144
+ # All instances of the same parser share the same decision DFAs through a
145
+ # static field. Each instance gets its own ATN simulator but they share the
146
+ # same {@link #decisionToDFA} field. They also share a
147
+ # {@link PredictionContextCache} object that makes sure that all
148
+ # {@link PredictionContext} objects are shared among the DFA states. This makes
149
+ # a big size difference.</p>
150
+ #
151
+ # <p>
152
+ # <strong>THREAD SAFETY</strong></p>
153
+ #
154
+ # <p>
155
+ # The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
156
+ # it adds a new DFA object to that array. {@link #addDFAEdge}
157
+ # locks on the DFA for the current decision when setting the
158
+ # {@link DFAState#edges} field. {@link #addDFAState} locks on
159
+ # the DFA for the current decision when looking up a DFA state to see if it
160
+ # already exists. We must make sure that all requests to add DFA states that
161
+ # are equivalent result in the same shared DFA object. This is because lots of
162
+ # threads will be trying to update the DFA at once. The
163
+ # {@link #addDFAState} method also locks inside the DFA lock
164
+ # but this time on the shared context cache when it rebuilds the
165
+ # configurations' {@link PredictionContext} objects using cached
166
+ # subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
167
+ # safe as long as we can guarantee that all threads referencing
168
+ # {@code s.edge[t]} get the same physical target {@link DFAState}, or
169
+ # {@code null}. Once into the DFA, the DFA simulation does not reference the
170
+ # {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
171
+ # targets. The DFA simulator will either find {@link DFAState#edges} to be
172
+ # {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
173
+ # {@code dfa.edges[t]} to be non-null. The
174
+ # {@link #addDFAEdge} method could be racing to set the field
175
+ # but in either case the DFA simulator works; if {@code null}, and requests ATN
176
+ # simulation. It could also race trying to get {@code dfa.edges[t]}, but either
177
+ # way it will work because it's not doing a test and set operation.</p>
178
+ #
179
+ # <p>
180
+ # <strong>Starting with SLL then failing to combined SLL/LL (Two-Stage
181
+ # Parsing)</strong></p>
182
+ #
183
+ # <p>
184
+ # Sam pointed out that if SLL does not give a syntax error, then there is no
185
+ # point in doing full LL, which is slower. We only have to try LL if we get a
186
+ # syntax error. For maximum speed, Sam starts the parser set to pure SLL
187
+ # mode with the {@link BailErrorStrategy}:</p>
188
+ #
189
+ # <pre>
190
+ # parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
191
+ # parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
192
+ # </pre>
193
+ #
194
+ # <p>
195
+ # If it does not get a syntax error, then we're done. If it does get a syntax
196
+ # error, we need to retry with the combined SLL/LL strategy.</p>
197
+ #
198
+ # <p>
199
+ # The reason this works is as follows. If there are no SLL conflicts, then the
200
+ # grammar is SLL (at least for that input set). If there is an SLL conflict,
201
+ # the full LL analysis must yield a set of viable alternatives which is a
202
+ # subset of the alternatives reported by SLL. If the LL set is a singleton,
203
+ # then the grammar is LL but not SLL. If the LL set is the same size as the SLL
204
+ # set, the decision is SLL. If the LL set has size &gt; 1, then that decision
205
+ # is truly ambiguous on the current input. If the LL set is smaller, then the
206
+ # SLL conflict resolution might choose an alternative that the full LL would
207
+ # rule out as a possibility based upon better context information. If that's
208
+ # the case, then the SLL parse will definitely get an error because the full LL
209
+ # analysis says it's not viable. If SLL conflict resolution chooses an
210
+ # alternative within the LL set, them both SLL and LL would choose the same
211
+ # alternative because they both choose the minimum of multiple conflicting
212
+ # alternatives.</p>
213
+ #
214
+ # <p>
215
+ # Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
216
+ # a smaller LL set called <em>s</em>. If <em>s</em> is {@code {2, 3}}, then SLL
217
+ # parsing will get an error because SLL will pursue alternative 1. If
218
+ # <em>s</em> is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
219
+ # choose the same alternative because alternative one is the minimum of either
220
+ # set. If <em>s</em> is {@code {2}} or {@code {3}} then SLL will get a syntax
221
+ # error. If <em>s</em> is {@code {1}} then SLL will succeed.</p>
222
+ #
223
+ # <p>
224
+ # Of course, if the input is invalid, then we will get an error for sure in
225
+ # both SLL and LL parsing. Erroneous input will therefore require 2 passes over
226
+ # the input.</p>
227
+ #
228
+
229
+ class ParserATNSimulator < ATNSimulator
230
+ include PredictionContextFunctions
231
+
232
+ class << self
233
+ attr_reader :debug, :dfa_debug, :debug_list_atn_decisions,:retry_debug
234
+ end
235
+ @@debug = false
236
+ @@dfa_debug = false
237
+ @@debug_list_atn_decisions = false
238
+ @@retry_debug = false
239
+
240
+ def debug; @@debug ;end
241
+ def dfa_debug; @@dfa_debug ;end
242
+
243
+ def debug_list_atn_decisions; @@debug_list_atn_decisions ; end
244
+ def retry_debug ; @@retry_debug ; end
245
+
246
+
247
+
248
+ attr_accessor :decisionToDFA, :startIndex
249
+ attr_accessor :parser, :predictionMode, :input, :outerContext, :mergeCache
250
+ attr_accessor :_dfa
251
+
252
+ def initialize(parser, atn, decisionToDFA, sharedContextCache)
253
+ super(atn, sharedContextCache)
254
+ self.parser = parser
255
+ self.decisionToDFA = decisionToDFA
256
+ # SLL, LL, or LL + exact ambig detection?#
257
+ self.predictionMode = PredictionMode.LL
258
+ # LAME globals to avoid parameters!!!!! I need these down deep in predTransition
259
+ self.input = nil
260
+ self.startIndex = 0
261
+ self.outerContext = nil
262
+ # Each prediction operation uses a cache for merge of prediction contexts.
263
+ # Don't keep around as it wastes huge amounts of memory. DoubleKeyMap
264
+ # isn't synchronized but we're ok since two threads shouldn't reuse same
265
+ # parser/atnsim object because it can only handle one input at a time.
266
+ # This maps graphs a and b to merged result c. (a,b)&rarr;c. We can avoid
267
+ # the merge if we ever see a and b again. Note that (b,a)&rarr;c should
268
+ # also be examined during cache lookup.
269
+ #
270
+ self.mergeCache = nil
271
+ end
272
+
273
+
274
+ def reset()
275
+ end
276
+
277
+ def adaptivePredict(input, decision, outerContext)
278
+ if self.debug or self.debug_list_atn_decisions then
279
+ s1 = "adaptivePredict decision #{decision} exec LA(1)=="
280
+ s2 = "#{self.getLookaheadName(input)} line #{input.LT(1).line}:#{input.LT(1).column}"
281
+ puts "#{s1}#{s2}"
282
+ end
283
+ # type_check(TokenStream, input)
284
+ # type_check(ParserRuleContext, outerContext)
285
+ self.input = input
286
+ self.startIndex = input.index
287
+ self.outerContext = outerContext
288
+
289
+ dfa = self.decisionToDFA[decision]
290
+ @_dfa = dfa
291
+ m = input.mark()
292
+ index = input.index
293
+
294
+ # Now we are certain to have a specific decision's DFA
295
+ # But, do we still need an initial state?
296
+ begin
297
+ if dfa.precedenceDfa then
298
+ # the start state for a precedence DFA depends on the current
299
+ # parser precedence, and is provided by a DFA method.
300
+ s0 = dfa.getPrecedenceStartState(self.parser.getPrecedence())
301
+ else
302
+ # the start state for a "regular" DFA is just s0
303
+ s0 = dfa.s0
304
+ end
305
+
306
+ if s0.nil?
307
+ if outerContext.nil?
308
+ outerContext = ParserRuleContext.EMPTY
309
+ end
310
+ if self.debug or self.debug_list_atn_decisions
311
+ puts "predictATN decision #{dfa.decision
312
+ } exec LA(1)==#{self.getLookaheadName(input)
313
+ }, outerContext=#{outerContext.to_s}"
314
+ # }, outerContext=#{outerContext.toString(self.parser)}"
315
+ end
316
+ # If this is not a precedence DFA, we check the ATN start state
317
+ # to determine if this ATN start state is the decision for the
318
+ # closure block that determines whether a precedence rule
319
+ # should continue or complete.
320
+ #
321
+ if not dfa.precedenceDfa and dfa.atnStartState.kind_of?(StarLoopEntryState) then
322
+ if dfa.atnStartState.precedenceRuleDecision
323
+ dfa.setPrecedenceDfa(true)
324
+ end
325
+ end
326
+
327
+ fullCtx = false
328
+ type_check(ParserRuleContext.EMPTY(), ParserRuleContext)
329
+ s0_closure = self.computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, fullCtx)
330
+
331
+ if dfa.precedenceDfa
332
+ # If this is a precedence DFA, we use applyPrecedenceFilter
333
+ # to convert the computed start state to a precedence start
334
+ # state. We then use DFA.setPrecedenceStartState to set the
335
+ # appropriate start state for the precedence level rather
336
+ # than simply setting DFA.s0.
337
+ #
338
+ s0_closure = self.applyPrecedenceFilter(s0_closure)
339
+ s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
340
+ dfa.setPrecedenceStartState(self.parser.getPrecedence(), s0)
341
+ else
342
+ s0 = self.addDFAState(dfa, DFAState.new(nil,s0_closure))
343
+ dfa.s0 = s0
344
+ end
345
+ end
346
+ alt = self.execATN(dfa, s0, input, index, outerContext)
347
+ if self.debug
348
+ puts "DFA after predictATN: #{dfa.toString(self.parser.tokenNames)}"
349
+ end
350
+ return alt
351
+ ensure
352
+ self.mergeCache = nil# wack cache after each prediction
353
+ input.seek(index)
354
+ input.release(m)
355
+ @_dfa = nil
356
+ end
357
+ end
358
+ # Performs ATN simulation to compute a predicted alternative based
359
+ # upon the remaining input, but also updates the DFA cache to avoid
360
+ # having to traverse the ATN again for the same input sequence.
361
+
362
+ # There are some key conditions we're looking for after computing a new
363
+ # set of ATN configs (proposed DFA state):
364
+ # if the set is empty, there is no viable alternative for current symbol
365
+ # does the state uniquely predict an alternative?
366
+ # does the state have a conflict that would prevent us from
367
+ # putting it on the work list?
368
+
369
+ # We also have some key operations to do:
370
+ # add an edge from previous DFA state to potentially new DFA state, D,
371
+ # upon current symbol but only if adding to work list, which means in all
372
+ # cases except no viable alternative (and possibly non-greedy decisions?)
373
+ # collecting predicates and adding semantic context to DFA accept states
374
+ # adding rule context to context-sensitive DFA accept states
375
+ # consuming an input symbol
376
+ # reporting a conflict
377
+ # reporting an ambiguity
378
+ # reporting a context sensitivity
379
+ # reporting insufficient predicates
380
+
381
+ # cover these cases:
382
+ # dead end
383
+ # single alt
384
+ # single alt + preds
385
+ # conflict
386
+ # conflict + preds
387
+ #
388
+ def execATN(dfa, s0, input, startIndex, outerContext)
389
+ type_check( outerContext, ParserRuleContext )
390
+ if self.debug or self.debug_list_atn_decisions
391
+ print "execATN decision #{dfa.decision
392
+ } exec LA(1)==#{self.getLookaheadName(input)
393
+ } line #{input.LT(1).line}:#{input.LT(1).column}"
394
+ end
395
+ previousD = s0
396
+
397
+ if self.debug
398
+ print "s0 = #{s0}"
399
+ end
400
+ t = input.LA(1)
401
+ while true do # while more work
402
+ cD = self.getExistingTargetState(previousD, t)
403
+ if cD.nil?
404
+ cD = self.computeTargetState(dfa, previousD, t)
405
+ end
406
+ if cD.equal? ATNSimulator::ERROR
407
+ # if any configs in previous dipped into outer context, that
408
+ # means that input up to t actually finished entry rule
409
+ # at least for SLL decision. Full LL doesn't dip into outer
410
+ # so don't need special case.
411
+ # We will get an error no matter what so delay until after
412
+ # decision; better error message. Also, no reachable target
413
+ # ATN states in SLL implies LL will also get nowhere.
414
+ # If conflict in states that dip out, choose min since we
415
+ # will get error no matter what.
416
+ e = self.noViableAlt(input, outerContext, previousD.configs, startIndex)
417
+ input.seek(startIndex)
418
+ alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previousD.configs, outerContext)
419
+ if alt!=ATN::INVALID_ALT_NUMBER
420
+ return alt
421
+ end
422
+ raise e
423
+ end
424
+ if cD.requiresFullContext and self.predictionMode != PredictionMode.SLL
425
+ # IF PREDS, MIGHT RESOLVE TO SINGLE ALT => SLL (or syntax error)
426
+ conflictingAlts = nil
427
+ if cD.predicates then
428
+ if self.debug
429
+ print("DFA state has preds in DFA sim LL failover")
430
+ end
431
+ conflictIndex = input.index
432
+ if conflictIndex != startIndex
433
+ input.seek(startIndex)
434
+ end
435
+ conflictingAlts = self.evalSemanticContext(cD.predicates, outerContext, true)
436
+ if conflictingAlts.length==1
437
+ if self.debug
438
+ print("Full LL avoided")
439
+ end
440
+ return conflictingAlts.min
441
+ end
442
+ if conflictIndex != startIndex
443
+ # restore the index so reporting the fallback to full
444
+ # context occurs with the index at the correct spot
445
+ input.seek(conflictIndex)
446
+ end
447
+ end
448
+ if self.dfa_debug
449
+ print "ctx sensitive state #{outerContext} in #{cD}"
450
+ end
451
+ fullCtx = true
452
+ s0_closure = self.computeStartState(dfa.atnStartState, outerContext, fullCtx)
453
+ self.reportAttemptingFullContext(dfa, conflictingAlts, cD.configs, startIndex, input.index)
454
+ alt = self.execATNWithFullContext(dfa, cD, s0_closure, input, startIndex, outerContext)
455
+ return alt
456
+ end
457
+
458
+ if cD.isAcceptState
459
+ if cD.predicates.nil?
460
+ return cD.prediction
461
+ end
462
+ stopIndex = input.index
463
+ input.seek(startIndex)
464
+ alts = self.evalSemanticContext(cD.predicates, outerContext, true)
465
+ if alts.length==0
466
+ raise self.noViableAlt(input, outerContext, cD.configs, startIndex)
467
+ elsif alts.length==1
468
+ return alts.min
469
+ else
470
+ # report ambiguity after predicate evaluation to make sure the correct
471
+ # set of ambig alts is reported.
472
+ self.reportAmbiguity(dfa, cD, startIndex, stopIndex, false, alts, cD.configs)
473
+ return alts.min
474
+ end
475
+ end
476
+ previousD = cD
477
+
478
+ if t != Token::EOF
479
+ input.consume()
480
+ t = input.LA(1)
481
+ end
482
+ end
483
+ end
484
+ #
485
+ # Get an existing target state for an edge in the DFA. If the target state
486
+ # for the edge has not yet been computed or is otherwise not available,
487
+ # this method returns {@code null}.
488
+ #
489
+ # @param previousD The current DFA state
490
+ # @param t The next input symbol
491
+ # @return The existing target DFA state for the given input symbol
492
+ # {@code t}, or {@code null} if the target state for this edge is not
493
+ # already cached
494
+ #
495
+ def getExistingTargetState(previousD, t)
496
+ edges = previousD.edges
497
+ if edges.nil? or t + 1 < 0 or t + 1 >= edges.length
498
+ return nil
499
+ else
500
+ return edges[t + 1]
501
+ end
502
+ end
503
+ #
504
+ # Compute a target state for an edge in the DFA, and attempt to add the
505
+ # computed state and corresponding edge to the DFA.
506
+ #
507
+ # @param dfa The DFA
508
+ # @param previousD The current DFA state
509
+ # @param t The next input symbol
510
+ #
511
+ # @return The computed target DFA state for the given input symbol
512
+ # {@code t}. If {@code t} does not lead to a valid DFA state, this method
513
+ # returns {@link #ERROR}.
514
+ #
515
+ def computeTargetState(dfa, previousD, t)
516
+ reach = self.computeReachSet(previousD.configs, t, false)
517
+ if reach.nil?
518
+ self.addDFAEdge(dfa, previousD, t, ATNSimulator::ERROR)
519
+ return ATNSimulator::ERROR
520
+ end
521
+
522
+ # create new target state; we'll add to DFA after it's complete
523
+ cD = DFAState.new(nil,reach)
524
+
525
+ predictedAlt = self.getUniqueAlt(reach)
526
+
527
+ if self.debug
528
+ altSubSets = PredictionMode.getConflictingAltSubsets(reach)
529
+ puts "SLL altSubSets=#{altSubSets}, configs=#{reach
530
+ }, predict=#{predictedAlt
531
+ }, allSubsetsConflict=#{PredictionMode.allSubsetsConflict(altSubSets)
532
+ }, conflictingAlts=#{self.getConflictingAlts(reach)}"
533
+ end
534
+ if predictedAlt!=ATN::INVALID_ALT_NUMBER
535
+ # NO CONFLICT, UNIQUELY PREDICTED ALT
536
+ cD.isAcceptState = true
537
+ cD.configs.uniqueAlt = predictedAlt
538
+ cD.prediction = predictedAlt
539
+ elsif PredictionMode.hasSLLConflictTerminatingPrediction(self.predictionMode, reach)
540
+ # MORE THAN ONE VIABLE ALTERNATIVE
541
+ cD.configs.conflictingAlts = self.getConflictingAlts(reach)
542
+ cD.requiresFullContext = true
543
+ # in SLL-only mode, we will stop at this state and return the minimum alt
544
+ cD.isAcceptState = true
545
+ cD.prediction = cD.configs.conflictingAlts.min
546
+ end
547
+ if cD.isAcceptState and cD.configs.hasSemanticContext
548
+ self.predicateDFAState(cD, self.atn.getDecisionState(dfa.decision))
549
+ if cD.predicates then
550
+ cD.prediction = ATN::INVALID_ALT_NUMBER
551
+ end
552
+ end
553
+
554
+ # all adds to dfa are done after we've created full D state
555
+ cD = self.addDFAEdge(dfa, previousD, t, cD)
556
+ return cD
557
+ end
558
+ def predicateDFAState(dfaState, decisionState)
559
+ # We need to test all predicates, even in DFA states that
560
+ # uniquely predict alternative.
561
+ nalts = decisionState.transitions.length
562
+ # Update DFA so reach becomes accept state with (predicate,alt)
563
+ # pairs if preds found for conflicting alts
564
+ altsToCollectPredsFrom = self.getConflictingAltsOrUniqueAlt(dfaState.configs)
565
+ altToPred = self.getPredsForAmbigAlts(altsToCollectPredsFrom, dfaState.configs, nalts)
566
+ if altToPred
567
+ dfaState.predicates = self.getPredicatePredictions(altsToCollectPredsFrom, altToPred)
568
+ dfaState.prediction = ATN::INVALID_ALT_NUMBER # make sure we use preds
569
+ else
570
+ # There are preds in configs but they might go away
571
+ # when OR'd together like {p}? || NONE == NONE. If neither
572
+ # alt has preds, resolve to min alt
573
+ dfaState.prediction = altsToCollectPredsFrom.min
574
+ end
575
+ end
576
+ # comes back with reach.uniqueAlt set to a valid alt
577
+ def execATNWithFullContext(dfa, cD, # how far we got before failing over
578
+ s0, input, startIndex, outerContext)
579
+ if self.debug or self.debug_list_atn_decisions
580
+ print "execATNWithFullContext #{s0}"
581
+ end
582
+ fullCtx = true
583
+ foundExactAmbig = false
584
+ reach = nil
585
+ previous = s0
586
+ input.seek(startIndex)
587
+ t = input.LA(1)
588
+ predictedAlt = -1
589
+ while true do
590
+ reach = self.computeReachSet(previous, t, fullCtx)
591
+ if reach.nil?
592
+ # if any configs in previous dipped into outer context, that
593
+ # means that input up to t actually finished entry rule
594
+ # at least for LL decision. Full LL doesn't dip into outer
595
+ # so don't need special case.
596
+ # We will get an error no matter what so delay until after
597
+ # decision; better error message. Also, no reachable target
598
+ # ATN states in SLL implies LL will also get nowhere.
599
+ # If conflict in states that dip out, choose min since we
600
+ # will get error no matter what.
601
+ e = self.noViableAlt(input, outerContext, previous, startIndex)
602
+ input.seek(startIndex)
603
+ alt = self.getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(previous, outerContext)
604
+ if alt!=ATN::INVALID_ALT_NUMBER
605
+ return alt
606
+ else
607
+ raise e
608
+ end
609
+ end
610
+ altSubSets = PredictionMode.getConflictingAltSubsets(reach)
611
+ if self.debug
612
+ print "LL altSubSets=#{altSubSets}, predict=#{PredictionMode.getUniqueAlt(altSubSets)
613
+ }, resolvesToJustOneViableAlt=#{PredictionMode.resolvesToJustOneViableAlt(altSubSets)}"
614
+ end
615
+ reach.uniqueAlt = self.getUniqueAlt(reach)
616
+ # unique prediction?
617
+ if reach.uniqueAlt!=ATN::INVALID_ALT_NUMBER then
618
+ predictedAlt = reach.uniqueAlt
619
+ break
620
+ elsif self.predictionMode != PredictionMode.LL_EXACT_AMBIG_DETECTION
621
+ predictedAlt = PredictionMode.resolvesToJustOneViableAlt(altSubSets)
622
+ if predictedAlt != ATN::INVALID_ALT_NUMBER
623
+ break
624
+ end
625
+ else
626
+ # In exact ambiguity mode, we never try to terminate early.
627
+ # Just keeps scarfing until we know what the conflict is
628
+ if PredictionMode.allSubsetsConflict(altSubSets) and PredictionMode.allSubsetsEqual(altSubSets)
629
+ foundExactAmbig = true
630
+ predictedAlt = PredictionMode.getSingleViableAlt(altSubSets)
631
+ break
632
+ end
633
+ # else there are multiple non-conflicting subsets or
634
+ # we're not sure what the ambiguity is yet.
635
+ # So, keep going.
636
+ end
637
+ previous = reach
638
+ if t != Token::EOF
639
+ input.consume()
640
+ t = input.LA(1)
641
+ end
642
+ end
643
+ # If the configuration set uniquely predicts an alternative,
644
+ # without conflict, then we know that it's a full LL decision
645
+ # not SLL.
646
+ if reach.uniqueAlt != ATN::INVALID_ALT_NUMBER
647
+ self.reportContextSensitivity(dfa, predictedAlt, reach, startIndex, input.index)
648
+ return predictedAlt
649
+ end
650
+ # We do not check predicates here because we have checked them
651
+ # on-the-fly when doing full context prediction.
652
+
653
+ #
654
+ # In non-exact ambiguity detection mode, we might actually be able to
655
+ # detect an exact ambiguity, but I'm not going to spend the cycles
656
+ # needed to check. We only emit ambiguity warnings in exact ambiguity
657
+ # mode.
658
+ #
659
+ # For example, we might know that we have conflicting configurations.
660
+ # But, that does not mean that there is no way forward without a
661
+ # conflict. It's possible to have nonconflicting alt subsets as in:
662
+
663
+ # altSubSets=[{1, 2}, {1, 2}, {1}, {1, 2}]
664
+
665
+ # from
666
+ #
667
+ # [(17,1,[5 $]), (13,1,[5 10 $]), (21,1,[5 10 $]), (11,1,[$]),
668
+ # (13,2,[5 10 $]), (21,2,[5 10 $]), (11,2,[$])]
669
+ #
670
+ # In this case, (17,1,[5 $]) indicates there is some next sequence that
671
+ # would resolve this without conflict to alternative 1. Any other viable
672
+ # next sequence, however, is associated with a conflict. We stop
673
+ # looking for input because no amount of further lookahead will alter
674
+ # the fact that we should predict alternative 1. We just can't say for
675
+ # sure that there is an ambiguity without looking further.
676
+
677
+ self.reportAmbiguity(dfa, cD, startIndex, input.index, foundExactAmbig, nil, reach)
678
+
679
+ return predictedAlt
680
+ end
681
+ def computeReachSet(closure, t, fullCtx)
682
+ if self.debug
683
+ print "in computeReachSet, starting closure: #{closure}"
684
+ end
685
+
686
+ if self.mergeCache.nil?
687
+ self.mergeCache = Hash.new
688
+ end
689
+
690
+ intermediate = ATNConfigSet.new(fullCtx)
691
+
692
+ # Configurations already in a rule stop state indicate reaching the end
693
+ # of the decision rule (local context) or end of the start rule (full
694
+ # context). Once reached, these configurations are never updated by a
695
+ # closure operation, so they are handled separately for the performance
696
+ # advantage of having a smaller intermediate set when calling closure.
697
+ #
698
+ # For full-context reach operations, separate handling is required to
699
+ # ensure that the alternative matching the longest overall sequence is
700
+ # chosen when multiple such configurations can match the input.
701
+
702
+ skippedStopStates = nil
703
+
704
+ # First figure out where we can reach on input t
705
+ closure.each do |c|
706
+ if self.debug
707
+ puts "testing #{self.getTokenName(t)} at #{c}"
708
+ end
709
+
710
+ if c.state.kind_of? RuleStopState then
711
+ #assert c.context.isEmpty()
712
+ if fullCtx or t == Token::EOF
713
+ if skippedStopStates.nil?
714
+ skippedStopStates = Array.new
715
+ end
716
+ skippedStopStates.push(c)
717
+ end
718
+ next
719
+ end
720
+ #for trans in c.state.transitions do
721
+ c.state.transitions.each do |trans|
722
+ target = self.getReachableTarget(trans, t)
723
+ if target
724
+ intermediate.add(ATNConfig.createConfigState(c,target), self.mergeCache)
725
+ end
726
+ end
727
+ end
728
+ # Now figure out where the reach operation can take us...
729
+
730
+ reach = nil
731
+
732
+ # This block optimizes the reach operation for intermediate sets which
733
+ # trivially indicate a termination state for the overall
734
+ # adaptivePredict operation.
735
+ #
736
+ # The conditions assume that intermediate
737
+ # contains all configurations relevant to the reach set, but this
738
+ # condition is not true when one or more configurations have been
739
+ # withheld in skippedStopStates.
740
+ #
741
+ if skippedStopStates.nil?
742
+ if intermediate.length==1
743
+ # Don't pursue the closure if there is just one state.
744
+ # It can only have one alternative; just add to result
745
+ # Also don't pursue the closure if there is unique alternative
746
+ # among the configurations.
747
+ reach = intermediate
748
+ elsif self.getUniqueAlt(intermediate)!=ATN::INVALID_ALT_NUMBER
749
+ # Also don't pursue the closure if there is unique alternative
750
+ # among the configurations.
751
+ reach = intermediate
752
+ end
753
+ end
754
+ # If the reach set could not be trivially determined, perform a closure
755
+ # operation on the intermediate set to compute its initial value.
756
+ #
757
+ if reach.nil?
758
+ reach = ATNConfigSet.new(fullCtx)
759
+ closureBusy = Set.new()
760
+ treatEofAsEpsilon = t == Token::EOF
761
+ intermediate.each {|c|
762
+ self.closure(c, reach, closureBusy, false, fullCtx, treatEofAsEpsilon)
763
+ }
764
+ end
765
+ if t == Token::EOF
766
+ # After consuming EOF no additional input is possible, so we are
767
+ # only interested in configurations which reached the end of the
768
+ # decision rule (local context) or end of the start rule (full
769
+ # context). Update reach to contain only these configurations. This
770
+ # handles both explicit EOF transitions in the grammar and implicit
771
+ # EOF transitions following the end of the decision or start rule.
772
+ #
773
+ # When reach==intermediate, no closure operation was performed. In
774
+ # this case, removeAllConfigsNotInRuleStopState needs to check for
775
+ # reachable rule stop states as well as configurations already in
776
+ # a rule stop state.
777
+ #
778
+ # This is handled before the configurations in skippedStopStates,
779
+ # because any configurations potentially added from that list are
780
+ # already guaranteed to meet this condition whether or not it's
781
+ # required.
782
+ #
783
+ reach = self.removeAllConfigsNotInRuleStopState(reach, reach.equal?(intermediate))
784
+ end
785
+ # If skippedStopStates is not null, then it contains at least one
786
+ # configuration. For full-context reach operations, these
787
+ # configurations reached the end of the start rule, in which case we
788
+ # only add them back to reach if no configuration during the current
789
+ # closure operation reached such a state. This ensures adaptivePredict
790
+ # chooses an alternative matching the longest overall sequence when
791
+ # multiple alternatives are viable.
792
+ #
793
+ if skippedStopStates and ( (not fullCtx) or (not PredictionMode.hasConfigInRuleStopState(reach)))
794
+ #assert len(skippedStopStates)>0
795
+ skippedStopStates.each {|c| reach.add(c, self.mergeCache) }
796
+ end
797
+ if reach.empty?
798
+ return nil
799
+ else
800
+ return reach
801
+ end
802
+ end
803
+ #
804
+ # Return a configuration set containing only the configurations from
805
+ # {@code configs} which are in a {@link RuleStopState}. If all
806
+ # configurations in {@code configs} are already in a rule stop state, this
807
+ # method simply returns {@code configs}.
808
+ #
809
+ # <p>When {@code lookToEndOfRule} is true, this method uses
810
+ # {@link ATN#nextTokens} for each configuration in {@code configs} which is
811
+ # not already in a rule stop state to see if a rule stop state is reachable
812
+ # from the configuration via epsilon-only transitions.</p>
813
+ #
814
+ # @param configs the configuration set to update
815
+ # @param lookToEndOfRule when true, this method checks for rule stop states
816
+ # reachable by epsilon-only transitions from each configuration in
817
+ # {@code configs}.
818
+ #
819
+ # @return {@code configs} if all configurations in {@code configs} are in a
820
+ # rule stop state, otherwise return a new configuration set containing only
821
+ # the configurations from {@code configs} which are in a rule stop state
822
+ #
823
+ def removeAllConfigsNotInRuleStopState(configs, lookToEndOfRule)
824
+ if PredictionMode.allConfigsInRuleStopStates(configs)
825
+ return configs
826
+ end
827
+ result = ATNConfigSet.new(configs.fullCtx)
828
+ configs.each do |config|
829
+ if config.state.kind_of? RuleStopState then
830
+ result.add(config, self.mergeCache)
831
+ next
832
+ end
833
+ if lookToEndOfRule and config.state.epsilonOnlyTransitions
834
+ nextTokens = self.atn.nextTokens(config.state)
835
+ if nextTokens.member? Token::EPSILON then
836
+ endOfRuleState = self.atn.ruleToStopState[config.state.ruleIndex]
837
+ result.add(ATNConfig.new(endOfRuleState, nil, nil, nil, config), self.mergeCache)
838
+ end
839
+ end
840
+ end
841
+ return result
842
+ end
843
+ def computeStartState(p, ctx, fullCtx)
844
+ type_check(p, ATNState)
845
+ type_check(ctx, RuleContext)
846
+
847
+ # always at least the implicit call to start rule
848
+ initialContext = PredictionContextFromRuleContext(self.atn, ctx)
849
+ configs = ATNConfigSet.new(fullCtx)
850
+
851
+ p.transitions.each_index do |i|
852
+ target = p.transitions[i].target
853
+ c = ATNConfig.new(target, i+1, initialContext)
854
+ closureBusy = Set.new
855
+ self.closure(c, configs, closureBusy, true, fullCtx, false)
856
+ end
857
+ return configs
858
+ end
859
+ #
860
+ # This method transforms the start state computed by
861
+ # {@link #computeStartState} to the special start state used by a
862
+ # precedence DFA for a particular precedence value. The transformation
863
+ # process applies the following changes to the start state's configuration
864
+ # set.
865
+ #
866
+ # <ol>
867
+ # <li>Evaluate the precedence predicates for each configuration using
868
+ # {@link SemanticContext#evalPrecedence}.</li>
869
+ # <li>Remove all configurations which predict an alternative greater than
870
+ # 1, for which another configuration that predicts alternative 1 is in the
871
+ # same ATN state with the same prediction context. This transformation is
872
+ # valid for the following reasons:
873
+ # <ul>
874
+ # <li>The closure block cannot contain any epsilon transitions which bypass
875
+ # the body of the closure, so all states reachable via alternative 1 are
876
+ # part of the precedence alternatives of the transformed left-recursive
877
+ # rule.</li>
878
+ # <li>The "primary" portion of a left recursive rule cannot contain an
879
+ # epsilon transition, so the only way an alternative other than 1 can exist
880
+ # in a state that is also reachable via alternative 1 is by nesting calls
881
+ # to the left-recursive rule, with the outer calls not being at the
882
+ # preferred precedence level.</li>
883
+ # </ul>
884
+ # </li>
885
+ # </ol>
886
+ #
887
+ # <p>
888
+ # The prediction context must be considered by this filter to address
889
+ # situations like the following.
890
+ # </p>
891
+ # <code>
892
+ # <pre>
893
+ # grammar TA;
894
+ # prog: statement* EOF;
895
+ # statement: letterA | statement letterA 'b' ;
896
+ # letterA: 'a';
897
+ # </pre>
898
+ # </code>
899
+ # <p>
900
+ # If the above grammar, the ATN state immediately before the token
901
+ # reference {@code 'a'} in {@code letterA} is reachable from the left edge
902
+ # of both the primary and closure blocks of the left-recursive rule
903
+ # {@code statement}. The prediction context associated with each of these
904
+ # configurations distinguishes between them, and prevents the alternative
905
+ # which stepped out to {@code prog} (and then back in to {@code statement}
906
+ # from being eliminated by the filter.
907
+ # </p>
908
+ #
909
+ # @param configs The configuration set computed by
910
+ # {@link #computeStartState} as the start state for the DFA.
911
+ # @return The transformed configuration set representing the start state
912
+ # for a precedence DFA at a particular precedence level (determined by
913
+ # calling {@link Parser#getPrecedence}).
914
+ #
915
+ def applyPrecedenceFilter(configs)
916
+ statesFromAlt1 = Hash.new
917
+ configSet = ATNConfigSet.new(configs.fullCtx)
918
+ configs.each do |config|
919
+ # handle alt 1 first
920
+ next if config.alt != 1
921
+
922
+ updatedContext = config.semanticContext.evalPrecedence(self.parser, self.outerContext)
923
+ next if updatedContext.nil? # the configuration was eliminated
924
+
925
+ statesFromAlt1[config.state.stateNumber] = config.context
926
+ if updatedContext != config.semanticContext
927
+ configSet.add(ATNConfig.new(nil,nil,nil, updatedContext, config), self.mergeCache)
928
+ else
929
+ configSet.add(config, self.mergeCache)
930
+ end
931
+ end
932
+ configs.each do |config|
933
+ next if config.alt == 1 # already handled
934
+
935
+ # In the future, this elimination step could be updated to also
936
+ # filter the prediction context for alternatives predicting alt>1
937
+ # (basically a graph subtraction algorithm).
938
+ #
939
+ context = statesFromAlt1[config.state.stateNumber]
940
+ next if context==config.context # eliminated
941
+
942
+ configSet.add(config, self.mergeCache)
943
+ end
944
+ return configSet
945
+ end
946
+ def getReachableTarget(trans, ttype)
947
+ if trans.matches(ttype, 0, self.atn.maxTokenType)
948
+ return trans.target
949
+ else
950
+ return nil
951
+ end
952
+ end
953
+
954
+ def getPredsForAmbigAlts(ambigAlts, configs, nalts)
955
+ # REACH=[1|1|[]|0:0, 1|2|[]|0:1]
956
+ # altToPred starts as an array of all null contexts. The entry at index i
957
+ # corresponds to alternative i. altToPred[i] may have one of three values:
958
+ # 1. null: no ATNConfig c is found such that c.alt==i
959
+ # 2. SemanticContext.NONE: At least one ATNConfig c exists such that
960
+ # c.alt==i and c.semanticContext==SemanticContext.NONE. In other words,
961
+ # alt i has at least one unpredicated config.
962
+ # 3. Non-NONE Semantic Context: There exists at least one, and for all
963
+ # ATNConfig c such that c.alt==i, c.semanticContext!=SemanticContext.NONE.
964
+ #
965
+ # From this, it is clear that NONE||anything==NONE.
966
+ #
967
+ altToPred = Array.new(nalts + 1)
968
+ configs.each do |c|
969
+ if ambigAlts.member? c.alt
970
+ altToPred[c.alt] = SemanticContext.orContext(altToPred[c.alt], c.semanticContext)
971
+ end
972
+ end
973
+
974
+ nPredAlts = 0
975
+ for i in 1..nalts do
976
+ if altToPred[i].nil?
977
+ altToPred[i] = SemanticContext.NONE
978
+ elsif ! altToPred[i].equal? SemanticContext.NONE
979
+ nPredAlts = nPredAlts + 1
980
+ end
981
+ end
982
+ # nonambig alts are null in altToPred
983
+ if nPredAlts==0
984
+ altToPred = nil
985
+ end
986
+ if self.debug
987
+ puts "getPredsForAmbigAlts result #{altToPred}"
988
+ end
989
+ return altToPred
990
+ end
991
+ def getPredicatePredictions(ambigAlts, altToPred)
992
+ pairs = Array.new
993
+ containsPredicate = false
994
+
995
+ altToPred.each_index do |i|
996
+ pred = altToPred[i]
997
+ # unpredicated is indicated by SemanticContext.NONE
998
+ # assert pred is not None
999
+ if ambigAlts and ambigAlts.member? i
1000
+ pairs.push(PredPrediction.new(pred, i))
1001
+ end
1002
+ if ! pred.equal?(SemanticContext.NONE) then
1003
+ containsPredicate = true
1004
+ end
1005
+ end
1006
+ if not containsPredicate
1007
+ return nil
1008
+ end
1009
+ return pairs
1010
+ end
1011
+ #
1012
+ # This method is used to improve the localization of error messages by
1013
+ # choosing an alternative rather than throwing a
1014
+ # {@link NoViableAltException} in particular prediction scenarios where the
1015
+ # {@link #ERROR} state was reached during ATN simulation.
1016
+ #
1017
+ # <p>
1018
+ # The default implementation of this method uses the following
1019
+ # algorithm to identify an ATN configuration which successfully parsed the
1020
+ # decision entry rule. Choosing such an alternative ensures that the
1021
+ # {@link ParserRuleContext} returned by the calling rule will be complete
1022
+ # and valid, and the syntax error will be reported later at a more
1023
+ # localized location.</p>
1024
+ #
1025
+ # <ul>
1026
+ # <li>If a syntactically valid path or paths reach the end of the decision rule and
1027
+ # they are semantically valid if predicated, return the min associated alt.</li>
1028
+ # <li>Else, if a semantically invalid but syntactically valid path exist
1029
+ # or paths exist, return the minimum associated alt.
1030
+ # </li>
1031
+ # <li>Otherwise, return {@link ATN#INVALID_ALT_NUMBER}.</li>
1032
+ # </ul>
1033
+ #
1034
+ # <p>
1035
+ # In some scenarios, the algorithm described above could predict an
1036
+ # alternative which will result in a {@link FailedPredicateException} in
1037
+ # the parser. Specifically, this could occur if the <em>only</em> configuration
1038
+ # capable of successfully parsing to the end of the decision rule is
1039
+ # blocked by a semantic predicate. By choosing this alternative within
1040
+ # {@link #adaptivePredict} instead of throwing a
1041
+ # {@link NoViableAltException}, the resulting
1042
+ # {@link FailedPredicateException} in the parser will identify the specific
1043
+ # predicate which is preventing the parser from successfully parsing the
1044
+ # decision rule, which helps developers identify and correct logic errors
1045
+ # in semantic predicates.
1046
+ # </p>
1047
+ #
1048
+ # @param configs The ATN configurations which were valid immediately before
1049
+ # the {@link #ERROR} state was reached
1050
+ # @param outerContext The is the \gamma_0 initial parser context from the paper
1051
+ # or the parser stack at the instant before prediction commences.
1052
+ #
1053
+ # @return The value to return from {@link #adaptivePredict}, or
1054
+ # {@link ATN#INVALID_ALT_NUMBER} if a suitable alternative was not
1055
+ # identified and {@link #adaptivePredict} should report an error instead.
1056
+ #
1057
+ def getSynValidOrSemInvalidAltThatFinishedDecisionEntryRule(configs, outerContext)
1058
+ semValidConfigs, semInvalidConfigs = self.splitAccordingToSemanticValidity(configs, outerContext)
1059
+ alt = self.getAltThatFinishedDecisionEntryRule(semValidConfigs)
1060
+ if alt!=ATN::INVALID_ALT_NUMBER # semantically/syntactically viable path exists
1061
+ return alt
1062
+ end
1063
+ # Is there a syntactically valid path with a failed pred?
1064
+ if semInvalidConfigs.length>0
1065
+ alt = self.getAltThatFinishedDecisionEntryRule(semInvalidConfigs)
1066
+ if alt!=ATN::INVALID_ALT_NUMBER # syntactically viable path exists
1067
+ return alt
1068
+ end
1069
+ end
1070
+ return ATN::INVALID_ALT_NUMBER
1071
+ end
1072
+ def getAltThatFinishedDecisionEntryRule(configs)
1073
+ alts = Set.new()
1074
+ configs.each do |c|
1075
+ if c.reachesIntoOuterContext>0 or (c.state.kind_of? RuleStopState and c.context.hasEmptyPath() )
1076
+ alts.add(c.alt)
1077
+ end
1078
+ end
1079
+ if alts.empty?
1080
+ return ATN::INVALID_ALT_NUMBER
1081
+ else
1082
+ return alts.min
1083
+ end
1084
+ end
1085
+ # Walk the list of configurations and split them according to
1086
+ # those that have preds evaluating to true/false. If no pred, assume
1087
+ # true pred and include in succeeded set. Returns Pair of sets.
1088
+ #
1089
+ # Create a new set so as not to alter the incoming parameter.
1090
+ #
1091
+ # Assumption: the input stream has been restored to the starting point
1092
+ # prediction, which is where predicates need to evaluate.
1093
+ #
1094
+ def splitAccordingToSemanticValidity(configs, outerContext)
1095
+ succeeded = ATNConfigSet.new(configs.fullCtx)
1096
+ failed = ATNConfigSet.new(configs.fullCtx)
1097
+ configs.each do |c|
1098
+ if ! c.semanticContext.equal? SemanticContext.NONE
1099
+ predicateEvaluationResult = c.semanticContext.eval(self.parser, outerContext)
1100
+ if predicateEvaluationResult
1101
+ succeeded.add(c)
1102
+ else
1103
+ failed.add(c)
1104
+ end
1105
+ else
1106
+ succeeded.add(c)
1107
+ end
1108
+ end
1109
+ return [succeeded,failed]
1110
+ end
1111
+ # Look through a list of predicate/alt pairs, returning alts for the
1112
+ # pairs that win. A {@code NONE} predicate indicates an alt containing an
1113
+ # unpredicated config which behaves as "always true." If !complete
1114
+ # then we stop at the first predicate that evaluates to true. This
1115
+ # includes pairs with null predicates.
1116
+ #
1117
+ def evalSemanticContext( predPredictions, outerContext, complete)
1118
+ predictions = Set.new()
1119
+
1120
+ predPredictions.each do |pair|
1121
+ if pair.pred.equal? SemanticContext.NONE
1122
+ predictions.add(pair.alt)
1123
+ break if not complete
1124
+ next
1125
+ end
1126
+ predicateEvaluationResult = pair.pred.eval(self.parser, outerContext)
1127
+ if self.debug or self.dfa_debug
1128
+ puts "eval pred #{pair}=#{predicateEvaluationResult}"
1129
+ end
1130
+ if predicateEvaluationResult
1131
+ if self.debug or self.dfa_debug
1132
+ puts "PREDICT #{pair.alt}"
1133
+ end
1134
+ predictions.add(pair.alt)
1135
+ break if not complete
1136
+ end
1137
+ end
1138
+ return predictions
1139
+ end
1140
+ # TODO: If we are doing predicates, there is no point in pursuing
1141
+ # closure operations if we reach a DFA state that uniquely predicts
1142
+ # alternative. We will not be caching that DFA state and it is a
1143
+ # waste to pursue the closure. Might have to advance when we do
1144
+ # ambig detection thought :(
1145
+ #
1146
+
1147
+ def closure(config, configs, closureBusy, collectPredicates, fullCtx, treatEofAsEpsilon)
1148
+ initialDepth = 0
1149
+ self.closureCheckingStopState(config, configs, closureBusy, collectPredicates,
1150
+ fullCtx, initialDepth, treatEofAsEpsilon)
1151
+ #assert not fullCtx or not configs.dipsIntoOuterContext
1152
+ end
1153
+
1154
+
1155
+ def closureCheckingStopState(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
1156
+ if self.debug
1157
+ puts "closure(#{config.toString(self.parser,true)})"
1158
+ end
1159
+
1160
+ if config.state.kind_of? RuleStopState then
1161
+ # We hit rule end. If we have context info, use it
1162
+ # run thru all possible stack tops in ctx
1163
+ if not config.context.isEmpty() then
1164
+ # for i in range(0, len(config.context)):
1165
+ 0.upto(config.context.length - 1).each do |i|
1166
+ if config.context.getReturnState(i) == PredictionContext::EMPTY_RETURN_STATE
1167
+ if fullCtx
1168
+ configs.add(ATNConfig.new(config.state,nil,PredictionContext.EMPTY,nil,config), self.mergeCache)
1169
+ next
1170
+ else
1171
+ # we have no context info, just chase follow links (if greedy)
1172
+ if self.debug
1173
+ puts "FALLING off rule " + self.getRuleName(config.state.ruleIndex)
1174
+ end
1175
+ self.closure_(config, configs, closureBusy, collectPredicates,
1176
+ fullCtx, depth, treatEofAsEpsilon)
1177
+ end
1178
+ next
1179
+ end
1180
+ returnState = self.atn.states[config.context.getReturnState(i)]
1181
+ newContext = config.context.getParent(i) # "pop" return state
1182
+ c = ATNConfig.new(returnState, config.alt, newContext, config.semanticContext)
1183
+ # While we have context to pop back from, we may have
1184
+ # gotten that context AFTER having falling off a rule.
1185
+ # Make sure we track that we are now out of context.
1186
+ c.reachesIntoOuterContext = config.reachesIntoOuterContext
1187
+ # assert depth > - 2**63
1188
+ self.closureCheckingStopState(c, configs, closureBusy, collectPredicates, fullCtx, depth - 1, treatEofAsEpsilon)
1189
+ end
1190
+ return
1191
+ elsif fullCtx
1192
+ # reached end of start rule
1193
+ configs.add(config, self.mergeCache)
1194
+ return
1195
+ else
1196
+ # else if we have no context info, just chase follow links (if greedy)
1197
+ if self.debug
1198
+ puts "FALLING off rule #{self.getRuleName(config.state.ruleIndex)}"
1199
+ end
1200
+ end
1201
+ end
1202
+ self.closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
1203
+ end
1204
+ # Do the actual work of walking epsilon edges#
1205
+ def closure_(config, configs, closureBusy, collectPredicates, fullCtx, depth, treatEofAsEpsilon)
1206
+ p = config.state
1207
+ # optimization
1208
+ if not p.epsilonOnlyTransitions
1209
+ configs.add(config, self.mergeCache)
1210
+ # make sure to not return here, because EOF transitions can act as
1211
+ # both epsilon transitions and non-epsilon transitions.
1212
+ end
1213
+ p.transitions.each do |t|
1214
+ continueCollecting = collectPredicates and not t.kind_of? ActionTransition
1215
+ c = self.getEpsilonTarget(config, t, continueCollecting, depth == 0, fullCtx, treatEofAsEpsilon)
1216
+ if c
1217
+ newDepth = depth
1218
+ if config.state.kind_of? RuleStopState
1219
+ #assert not fullCtx
1220
+ # target fell off end of rule; mark resulting c as having dipped into outer context
1221
+ # We can't get here if incoming config was rule stop and we had context
1222
+ # track how far we dip into outer context. Might
1223
+ # come in handy and we avoid evaluating context dependent
1224
+ # preds if this is > 0.
1225
+ if closureBusy.member? c
1226
+ # avoid infinite recursion for right-recursive rules
1227
+ next
1228
+ end
1229
+ closureBusy.add(c)
1230
+
1231
+ # if @_dfa && @_dfa.isPrecedenceDfa() then
1232
+ # outermostPrecedenceReturn = t.outermostPrecedenceReturn()
1233
+ # if outermostPrecedenceReturn == @_dfa.atnStartState.ruleIndex then
1234
+ # c.setPrecedenceFilterSuppressed(true)
1235
+ # end
1236
+ # end
1237
+ # if (_dfa != null && _dfa.isPrecedenceDfa()) {
1238
+ # int outermostPrecedenceReturn = ((EpsilonTransition)t).outermostPrecedenceReturn();
1239
+ # if (outermostPrecedenceReturn == _dfa.atnStartState.ruleIndex) {
1240
+ # c.setPrecedenceFilterSuppressed(true);
1241
+ # }
1242
+ # }
1243
+
1244
+
1245
+
1246
+ c.reachesIntoOuterContext =c.reachesIntoOuterContext + 1
1247
+ configs.dipsIntoOuterContext = true # TODO: can remove? only care when we add to set per middle of this method
1248
+ # !assert newDepth > - 2**63
1249
+ newDepth = newDepth - 1
1250
+ puts "dips into outer ctx: #{c}" if self.debug
1251
+ elsif t.kind_of? RuleTransition
1252
+ # latch when newDepth goes negative - once we step out of the entry context we can't return
1253
+ if newDepth >= 0
1254
+ newDepth =newDepth + 1
1255
+ end
1256
+ end
1257
+
1258
+ self.closureCheckingStopState(c, configs, closureBusy, continueCollecting, fullCtx, newDepth, treatEofAsEpsilon)
1259
+ end
1260
+ end
1261
+ end
1262
+
1263
+ def getRuleName(index)
1264
+ if self.parser and index>=0
1265
+ return self.parser.ruleNames[index]
1266
+ else
1267
+ return "<rule #{index}>"
1268
+ end
1269
+ end
1270
+
1271
+ def getEpsilonTarget(config, t, collectPredicates, inContext, fullCtx, treatEofAsEpsilon)
1272
+ tt = t.serializationType
1273
+ case tt
1274
+ when Transition::RULE
1275
+ return self.ruleTransition(config, t)
1276
+ when Transition::PRECEDENCE
1277
+ return self.precedenceTransition(config, t, collectPredicates, inContext, fullCtx)
1278
+ when Transition::PREDICATE
1279
+ return self.predTransition(config, t, collectPredicates, inContext, fullCtx)
1280
+ when Transition::ACTION
1281
+ return self.actionTransition(config, t)
1282
+ when Transition::EPSILON
1283
+ return ATNConfig.new(t.target,nil,nil,nil, config)
1284
+ else
1285
+ if [ Transition::ATOM, Transition::RANGE, Transition::SET ].member?(tt) then
1286
+ # EOF transitions act like epsilon transitions after the first EOF
1287
+ # transition is traversed
1288
+ # if treatEofAsEpsilon then
1289
+ # if t.matches(Token::EOF, 0, 1) then
1290
+ # return ATNConfig.createConfigState(config, t.target)
1291
+ # end
1292
+ # end
1293
+ if treatEofAsEpsilon and t.matches(Token::EOF, 0, 1) then
1294
+ return ATNConfig.createConfigState(config, t.target)
1295
+ end
1296
+ end
1297
+ return nil
1298
+ end
1299
+ end
1300
+ def actionTransition(config, t)
1301
+ if self.debug
1302
+ puts "ACTION edge #{t.ruleIndex}:#{t.actionIndex}"
1303
+ end
1304
+ return ATNConfig.new(t.target,nil,nil,nil, config)
1305
+ end
1306
+ def precedenceTransition(config, pt, collectPredicates, inContext, fullCtx)
1307
+ if self.debug
1308
+ puts "PRED (collectPredicates=#{collectPredicates}) #{pt.precedence}>=_p, ctx dependent=true"
1309
+ if self.parser
1310
+ puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
1311
+ end
1312
+ end
1313
+ c = nil
1314
+ if collectPredicates and inContext
1315
+ if fullCtx
1316
+ # In full context mode, we can evaluate predicates on-the-fly
1317
+ # during closure, which dramatically reduces the size of
1318
+ # the config sets. It also obviates the need to test predicates
1319
+ # later during conflict resolution.
1320
+ currentPosition = self.input.index
1321
+ self.input.seek(self.startIndex)
1322
+ predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
1323
+ self.input.seek(currentPosition)
1324
+ if predSucceeds
1325
+ c = ATNConfig.new(pt.target,nil,nil,nil,config) # no pred context
1326
+ end
1327
+ else
1328
+ newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
1329
+ c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
1330
+ end
1331
+ else
1332
+ c = ATNConfig.new(pt.target,nil,nil,nil,config)
1333
+ end
1334
+
1335
+ if self.debug
1336
+ puts "config from pred transition=#{c}"
1337
+ end
1338
+ return c
1339
+ end
1340
+ def predTransition(config, pt, collectPredicates, inContext, fullCtx)
1341
+ if self.debug
1342
+ puts "PRED (collectPredicates=#{collectPredicates}) #{pt.ruleIndex}:#{pt.predIndex}, ctx dependent=#{pt.isCtxDependent}"
1343
+ if self.parser
1344
+ puts "context surrounding pred is #{self.parser.getRuleInvocationStack()}"
1345
+ end
1346
+ end
1347
+ c = nil
1348
+ if collectPredicates and (not pt.isCtxDependent or (pt.isCtxDependent and inContext))
1349
+ if fullCtx
1350
+ # In full context mode, we can evaluate predicates on-the-fly
1351
+ # during closure, which dramatically reduces the size of
1352
+ # the config sets. It also obviates the need to test predicates
1353
+ # later during conflict resolution.
1354
+ currentPosition = self.input.index
1355
+ self.input.seek(self.startIndex)
1356
+ predSucceeds = pt.getPredicate().eval(self.parser, self.outerContext)
1357
+ self.input.seek(currentPosition)
1358
+ if predSucceeds
1359
+ c = ATNConfig.new(pt.target,nil,nil,nil, config) # no pred context
1360
+ end
1361
+ else
1362
+ newSemCtx = SemanticContext.andContext(config.semanticContext, pt.getPredicate())
1363
+ c = ATNConfig.new(pt.target, nil,nil,newSemCtx, config)
1364
+ end
1365
+ else
1366
+ c = ATNConfig.new(pt.target, nil,nil,nil,config)
1367
+ end
1368
+
1369
+ if self.debug
1370
+ puts "config from pred transition=#{c}"
1371
+ end
1372
+ return c
1373
+ end
1374
+ def ruleTransition(config, t)
1375
+ if self.debug
1376
+ puts "CALL rule #{self.getRuleName(t.target.ruleIndex) }, ctx=#{config.context}"
1377
+ end
1378
+ returnState = t.followState
1379
+ newContext = SingletonPredictionContext.create(config.context, returnState.stateNumber)
1380
+ return ATNConfig.new(t.target, nil,newContext, nil,config )
1381
+ end
1382
+ def getConflictingAlts(configs)
1383
+ altsets = PredictionMode.getConflictingAltSubsets(configs)
1384
+ return PredictionMode.getAlts(altsets)
1385
+ end
1386
+ # Sam pointed out a problem with the previous definition, v3, of
1387
+ # ambiguous states. If we have another state associated with conflicting
1388
+ # alternatives, we should keep going. For example, the following grammar
1389
+ #
1390
+ # s : (ID | ID ID?) ';' ;
1391
+ #
1392
+ # When the ATN simulation reaches the state before ';', it has a DFA
1393
+ # state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally
1394
+ # 12|1|[] and 12|2|[] conflict, but we cannot stop processing this node
1395
+ # because alternative to has another way to continue, via [6|2|[]].
1396
+ # The key is that we have a single state that has config's only associated
1397
+ # with a single alternative, 2, and crucially the state transitions
1398
+ # among the configurations are all non-epsilon transitions. That means
1399
+ # we don't consider any conflicts that include alternative 2. So, we
1400
+ # ignore the conflict between alts 1 and 2. We ignore a set of
1401
+ # conflicting alts when there is an intersection with an alternative
1402
+ # associated with a single alt state in the state&rarr;config-list map.
1403
+ #
1404
+ # It's also the case that we might have two conflicting configurations but
1405
+ # also a 3rd nonconflicting configuration for a different alternative:
1406
+ # [1|1|[], 1|2|[], 8|3|[]]. This can come about from grammar:
1407
+ #
1408
+ # a : A | A | A B ;
1409
+ #
1410
+ # After matching input A, we reach the stop state for rule A, state 1.
1411
+ # State 8 is the state right before B. Clearly alternatives 1 and 2
1412
+ # conflict and no amount of further lookahead will separate the two.
1413
+ # However, alternative 3 will be able to continue and so we do not
1414
+ # stop working on this state. In the previous example, we're concerned
1415
+ # with states associated with the conflicting alternatives. Here alt
1416
+ # 3 is not associated with the conflicting configs, but since we can continue
1417
+ # looking for input reasonably, I don't declare the state done. We
1418
+ # ignore a set of conflicting alts when we have an alternative
1419
+ # that we still need to pursue.
1420
+ #
1421
+
1422
+ def getConflictingAltsOrUniqueAlt(configs)
1423
+ conflictingAlts = nil
1424
+ if configs.uniqueAlt!= ATN::INVALID_ALT_NUMBER
1425
+ conflictingAlts = Set.new()
1426
+ conflictingAlts.add(configs.uniqueAlt)
1427
+ else
1428
+ conflictingAlts = configs.conflictingAlts
1429
+ end
1430
+ return conflictingAlts
1431
+ end
1432
+ def getTokenName(t)
1433
+ if t==Token::EOF
1434
+ return "EOF"
1435
+ end
1436
+ if self.parser and self.parser.tokenNames then
1437
+ if t >= self.parser.tokenNames.length() then
1438
+ puts "#{t} ttype out of range: #{self.parser.tokenNames}"
1439
+ puts self.parser.getInputStream().getTokens().to_s
1440
+ else
1441
+ return self.parser.tokenNames[t] + "<#{t}>"
1442
+ end
1443
+ end
1444
+ return t.to_s
1445
+ end
1446
+ def getLookaheadName(input)
1447
+ return getTokenName(input.LA(1))
1448
+ end
1449
+ # Used for debugging in adaptivePredict around execATN but I cut
1450
+ # it out for clarity now that alg. works well. We can leave this
1451
+ # "dead" code for a bit.
1452
+ #
1453
+ def dumpDeadEndConfigs(nvae)
1454
+ print "dead end configs: "
1455
+ nvae.getDeadEndConfigs().each do |c|
1456
+ trans = "no edges"
1457
+ if c.state.transitions.length>0 then
1458
+ t = c.state.transitions[0]
1459
+ if t.kind_of? AtomTransition then
1460
+ trans = "Atom #{self.getTokenName(t.label)}"
1461
+ elsif t.kind_of? SetTransition then
1462
+ #trans = ("~" if neg else "")+"Set "+ str(t.set)
1463
+ if t.kind_of? NotSetTransition then
1464
+ neg = "~"
1465
+ else
1466
+ neg = ""
1467
+ end
1468
+ trans = "#{neg}Set #{t.set}"
1469
+ end
1470
+ end
1471
+ # STDERR.puts "#{c.toString(self.parser, true)}:#{trans}"
1472
+ end
1473
+ end
1474
+ def noViableAlt(input, outerContext, configs, startIndex)
1475
+ return NoViableAltException.new(self.parser, input, input.get(startIndex), input.LT(1), configs, outerContext)
1476
+ end
1477
+
1478
+ def getUniqueAlt(configs)
1479
+ alt = ATN::INVALID_ALT_NUMBER
1480
+ configs.each do |c|
1481
+ if alt == ATN::INVALID_ALT_NUMBER
1482
+ alt = c.alt # found first alt
1483
+ elsif c.alt!=alt
1484
+ return ATN::INVALID_ALT_NUMBER
1485
+ end
1486
+ end
1487
+ return alt
1488
+ end
1489
+ #
1490
+ # Add an edge to the DFA, if possible. This method calls
1491
+ # {@link #addDFAState} to ensure the {@code to} state is present in the
1492
+ # DFA. If {@code from} is {@code null}, or if {@code t} is outside the
1493
+ # range of edges that can be represented in the DFA tables, this method
1494
+ # returns without adding the edge to the DFA.
1495
+ #
1496
+ # <p>If {@code to} is {@code null}, this method returns {@code null}.
1497
+ # Otherwise, this method returns the {@link DFAState} returned by calling
1498
+ # {@link #addDFAState} for the {@code to} state.</p>
1499
+ #
1500
+ # @param dfa The DFA
1501
+ # @param from The source state for the edge
1502
+ # @param t The input symbol
1503
+ # @param to The target state for the edge
1504
+ #
1505
+ # @return If {@code to} is {@code null}, this method returns {@code null};
1506
+ # otherwise this method returns the result of calling {@link #addDFAState}
1507
+ # on {@code to}
1508
+ #
1509
+ def addDFAEdge(dfa, from_, t, to)
1510
+ if self.debug
1511
+ puts "EDGE #{from_} -> #{to} upon #{self.getTokenName(t)}"
1512
+ end
1513
+
1514
+ if to.nil?
1515
+ return nil
1516
+ end
1517
+
1518
+ to = self.addDFAState(dfa, to) # used existing if possible not incoming
1519
+ if from_.nil? or t < -1 or t > self.atn.maxTokenType
1520
+ return to
1521
+ end
1522
+
1523
+ if from_.edges.nil? then
1524
+ from_.edges = Array.new(self.atn.maxTokenType + 2)
1525
+ end
1526
+ from_.edges[t+1] = to # connect
1527
+
1528
+ if self.debug
1529
+ if self.parser.nil?
1530
+ names = nil
1531
+ else
1532
+ names = self.parser.tokenNames
1533
+ end
1534
+ print "DFA=\n#{dfa.toString(names)}"
1535
+ end
1536
+ return to
1537
+ end
1538
+ #
1539
+ # Add state {@code D} to the DFA if it is not already present, and return
1540
+ # the actual instance stored in the DFA. If a state equivalent to {@code D}
1541
+ # is already in the DFA, the existing state is returned. Otherwise this
1542
+ # method returns {@code D} after adding it to the DFA.
1543
+ #
1544
+ # <p>If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and
1545
+ # does not change the DFA.</p>
1546
+ #
1547
+ # @param dfa The dfa
1548
+ # @param D The DFA state to add
1549
+ # @return The state stored in the DFA. This will be either the existing
1550
+ # state if {@code D} is already in the DFA, or {@code D} itself if the
1551
+ # state was not already present.
1552
+ #
1553
+ def addDFAState(dfa, cD)
1554
+ if cD.equal? ATNSimulator::ERROR
1555
+ return cD
1556
+ end
1557
+
1558
+ existing = dfa.states[cD]
1559
+ if existing
1560
+ return existing
1561
+ end
1562
+
1563
+ cD.stateNumber = dfa.states.length
1564
+ if not cD.configs.readonly
1565
+ cD.configs.optimizeConfigs(self)
1566
+ cD.configs.setReadonly(true)
1567
+ end
1568
+ dfa.states[cD] = cD
1569
+ if self.debug
1570
+ puts "adding new DFA state: #{cD}"
1571
+ end
1572
+ return cD
1573
+ end
1574
+ def reportAttemptingFullContext(dfa, conflictingAlts, configs, startIndex, stopIndex)
1575
+ if self.debug or self.retry_debug
1576
+ interval = startIndex..stopIndex
1577
+ puts "reportAttemptingFullContext decision=#{dfa.decision}:#{configs}, input=#{
1578
+ self.parser.getTokenStream().getText(interval)}"
1579
+ end
1580
+ if self.parser
1581
+ self.parser.getErrorListenerDispatch().reportAttemptingFullContext(self.parser, dfa, startIndex, stopIndex, conflictingAlts, configs)
1582
+ end
1583
+ end
1584
+ def reportContextSensitivity(dfa, prediction, configs, startIndex, stopIndex)
1585
+ if self.debug or self.retry_debug
1586
+ interval = startIndex..stopIndex
1587
+ puts "reportContextSensitivity decision=#{dfa.decision}:#{configs}, input=#{
1588
+ self.parser.getTokenStream().getText(interval)}"
1589
+ end
1590
+ if self.parser
1591
+ self.parser.getErrorListenerDispatch().reportContextSensitivity(self.parser, dfa, startIndex, stopIndex, prediction, configs)
1592
+ end
1593
+ end
1594
+
1595
+ # If context sensitive parsing, we know it's ambiguity not conflict#
1596
+ def reportAmbiguity(dfa, cD, startIndex, stopIndex, exact, ambigAlts, configs)
1597
+ if self.debug or self.retry_debug
1598
+ # ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn);
1599
+ # int i = 1;
1600
+ # for (Transition t : dfa.atnStartState.transitions) {
1601
+ # print("ALT "+i+"=");
1602
+ # print(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size());
1603
+ # TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(),
1604
+ # startIndex, stopIndex);
1605
+ # if ( path!=null ) {
1606
+ # print("path = "+path.toStringTree());
1607
+ # for (TraceTree leaf : path.leaves) {
1608
+ # List<ATNState> states = path.getPathToNode(leaf);
1609
+ # print("states="+states);
1610
+ # }
1611
+ # }
1612
+ # i++;
1613
+ # }
1614
+ interval = startIndex..stopIndex
1615
+ puts "reportAmbiguity #{ambigAlts}:#{configs}, input=#{
1616
+ self.parser.getTokenStream().getText(interval)}"
1617
+ end
1618
+ if self.parser
1619
+ self.parser.getErrorListenerDispatch().reportAmbiguity(self.parser, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
1620
+ end
1621
+ end
1622
+ end