antlr4 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,43 @@
1
+
2
+ class ATNSimulator
3
+
4
+ # Must distinguish between missing edge and edge we know leads nowhere#/
5
+ ERROR = DFAState.new(0x7FFFFFFF,ATNConfigSet.new())
6
+
7
+ # The context cache maps all PredictionContext objects that are ==
8
+ # to a single cached copy. This cache is shared across all contexts
9
+ # in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
10
+ # to use only cached nodes/graphs in addDFAState(). We don't want to
11
+ # fill this during closure() since there are lots of contexts that
12
+ # pop up but are not used ever again. It also greatly slows down closure().
13
+ #
14
+ # <p>This cache makes a huge difference in memory and a little bit in speed.
15
+ # For the Java grammar on java.*, it dropped the memory requirements
16
+ # at the end from 25M to 16M. We don't store any of the full context
17
+ # graphs in the DFA because they are limited to local context only,
18
+ # but apparently there's a lot of repetition there as well. We optimize
19
+ # the config contexts before storing the config set in the DFA states
20
+ # by literally rebuilding them with cached subgraphs only.</p>
21
+ #
22
+ # <p>I tried a cache for use during closure operations, that was
23
+ # whacked after each adaptivePredict(). It cost a little bit
24
+ # more time I think and doesn't save on the overall footprint
25
+ # so it's not worth the complexity.</p>
26
+ #/
27
+ include PredictionContextFunctions
28
+
29
+ attr_accessor :atn, :sharedContextCache
30
+ def initialize(atn, sharedContextCache)
31
+ raise Exception.new("ATN is nil") if atn.nil?
32
+ self.atn = atn
33
+ self.sharedContextCache = sharedContextCache
34
+ end
35
+ def getCachedContext(context)
36
+ if self.sharedContextCache.nil? then
37
+ return context
38
+ end
39
+ visited = Hash.new
40
+ return getCachedPredictionContext(context, self.sharedContextCache, visited)
41
+ end
42
+ end
43
+
@@ -0,0 +1,253 @@
1
+
2
+ #from antlr4.atn.Transition import Transition
3
+ INITIAL_NUM_TRANSITIONS = 4
4
+
5
+ class ATNState
6
+
7
+ # constants for serialization
8
+ INVALID_TYPE = 0
9
+ BASIC = 1
10
+ RULE_START = 2
11
+ BLOCK_START = 3
12
+ PLUS_BLOCK_START = 4
13
+ STAR_BLOCK_START = 5
14
+ TOKEN_START = 6
15
+ RULE_STOP = 7
16
+ BLOCK_END = 8
17
+ STAR_LOOP_BACK = 9
18
+ STAR_LOOP_ENTRY = 10
19
+ PLUS_LOOP_BACK = 11
20
+ LOOP_END = 12
21
+
22
+
23
+ INVALID_STATE_NUMBER = -1
24
+
25
+ attr_accessor :atn, :stateNumber, :stateType, :ruleIndex
26
+ attr_accessor :epsilonOnlyTransitions ,:transitions, :nextTokenWithinRule
27
+ attr :serializationNames
28
+ def initialize()
29
+ # Which ATN are we in?
30
+ @atn = nil
31
+ @stateNumber = ATNState::INVALID_STATE_NUMBER
32
+ @stateType = nil
33
+ @ruleIndex = 0 # at runtime, we don't have Rule objects
34
+ @epsilonOnlyTransitions = false
35
+ # Track the transitions emanating from this ATN state.
36
+ @transitions = Array.new
37
+ # Used to cache lookahead during parsing, not used during construction
38
+ @nextTokenWithinRule = nil
39
+ @serializationNames = [
40
+ "INVALID",
41
+ "BASIC",
42
+ "RULE_START",
43
+ "BLOCK_START",
44
+ "PLUS_BLOCK_START",
45
+ "STAR_BLOCK_START",
46
+ "TOKEN_START",
47
+ "RULE_STOP",
48
+ "BLOCK_END",
49
+ "STAR_LOOP_BACK",
50
+ "STAR_LOOP_ENTRY",
51
+ "PLUS_LOOP_BACK",
52
+ "LOOP_END" ]
53
+ end
54
+
55
+ def hash
56
+ return self.stateNumber
57
+ end
58
+
59
+ def ==(other)
60
+ if other.kind_of? ATNState then
61
+ other and self.stateNumber==other.stateNumber
62
+ else
63
+ false
64
+ end
65
+ end
66
+ def onlyHasEpsilonTransitions
67
+ self.epsilonOnlyTransitions
68
+ end
69
+ def isNonGreedyExitState
70
+ return false
71
+ end
72
+
73
+ def to_s
74
+ self.stateNumber.to_s
75
+ end
76
+ def inspect
77
+ "<ATNState #{self.stateNumber.to_s} >"
78
+ end
79
+ def addTransition(trans, index=-1)
80
+ if self.transitions.length==0
81
+ self.epsilonOnlyTransitions = trans.isEpsilon
82
+ elsif self.epsilonOnlyTransitions != trans.isEpsilon
83
+ self.epsilonOnlyTransitions = false
84
+ # TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
85
+ end
86
+ if index==-1
87
+ self.transitions.push(trans)
88
+ else
89
+ self.transitions.insert(index, trans)
90
+ end
91
+ end
92
+ end
93
+
94
+ class BasicState < ATNState
95
+ def initialize
96
+ super()
97
+ # self.stateNumber = ATNState::BASIC
98
+ self.stateType = ATNState::BASIC
99
+ end
100
+ end
101
+
102
+ class DecisionState < ATNState
103
+
104
+ attr_accessor :decision ,:nonGreedy
105
+ def initialize
106
+ super()
107
+ self.decision = -1
108
+ self.nonGreedy = false
109
+
110
+ end
111
+ end
112
+ # INVALID_TYPE = 0
113
+ # BASIC = 1
114
+ # RULE_START = 2
115
+ # BLOCK_START = 3
116
+ # PLUS_BLOCK_START = 4
117
+ # STAR_BLOCK_START = 5
118
+ # TOKEN_START = 6
119
+ # RULE_STOP = 7
120
+ # BLOCK_END = 8
121
+ # STAR_LOOP_BACK = 9
122
+ # STAR_LOOP_ENTRY = 10
123
+ # PLUS_LOOP_BACK = 11
124
+ # LOOP_END = 12
125
+ # The start of a regular {@code (...)} block.
126
+ class BlockStartState < DecisionState
127
+
128
+ attr_accessor :endState
129
+ def initialize
130
+ super()
131
+ self.endState = nil
132
+ end
133
+ end
134
+
135
+ class BasicBlockStartState < BlockStartState
136
+
137
+ def initialize
138
+ super()
139
+ self.stateType = ATNState::BLOCK_START
140
+ end
141
+ end
142
+
143
+ # Terminal node of a simple {@code (a|b|c)} block.
144
+ class BlockEndState < ATNState
145
+
146
+ attr_accessor :startState
147
+ def initialize
148
+ super()
149
+ self.stateType = ATNState::BLOCK_END
150
+ self.startState = nil
151
+ end
152
+ end
153
+
154
+ # The last node in the ATN for a rule, unless that rule is the start symbol.
155
+ # In that case, there is one transition to EOF. Later, we might encode
156
+ # references to all calls to this rule to compute FOLLOW sets for
157
+ # error handling.
158
+ #
159
+ class RuleStopState < ATNState
160
+
161
+ attr_accessor :stopState
162
+ def initialize
163
+ super()
164
+ self.stateType = ATNState::RULE_STOP
165
+ end
166
+ end
167
+
168
+ class RuleStartState < ATNState
169
+
170
+ attr_accessor :stopState, :isPrecedenceRule
171
+ def initialize
172
+ super()
173
+ self.stateType = ATNState::RULE_START
174
+ self.stopState = nil
175
+ self.isPrecedenceRule = false
176
+ end
177
+ end
178
+
179
+ # Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
180
+ # one to the loop back to start of the block and one to exit.
181
+ #
182
+ class PlusLoopbackState < DecisionState
183
+
184
+ def initialize
185
+ super()
186
+ self.stateType = ATNState::PLUS_LOOP_BACK
187
+ end
188
+ end
189
+
190
+ # Start of {@code (A|B|...)+} loop. Technically a decision state, but
191
+ # we don't use for code generation; somebody might need it, so I'm defining
192
+ # it for completeness. In reality, the {@link PlusLoopbackState} node is the
193
+ # real decision-making note for {@code A+}.
194
+ #
195
+ class PlusBlockStartState < BlockStartState
196
+
197
+ attr_accessor :loopBackState
198
+ def initialize
199
+ super()
200
+ self.stateType = ATNState::PLUS_BLOCK_START
201
+ self.loopBackState = nil
202
+ end
203
+ end
204
+
205
+ # The block that begins a closure loop.
206
+ class StarBlockStartState < BlockStartState
207
+
208
+ def initialize
209
+ super()
210
+ self.stateType = ATNState::STAR_BLOCK_START
211
+ end
212
+ end
213
+
214
+ class StarLoopbackState < ATNState
215
+
216
+ def initialize
217
+ super()
218
+ self.stateType = ATNState::STAR_LOOP_BACK
219
+ end
220
+ end
221
+
222
+
223
+ class StarLoopEntryState < DecisionState
224
+
225
+ attr_accessor :loopBackState, :precedenceRuleDecision
226
+ def initialize
227
+ super()
228
+ self.stateType = ATNState::STAR_LOOP_ENTRY
229
+ self.loopBackState = nil
230
+ # Indicates whether this state can benefit from a precedence DFA during SLL decision making.
231
+ self.precedenceRuleDecision = nil
232
+ end
233
+ end
234
+
235
+ # Mark the end of a * or + loop.
236
+ class LoopEndState < ATNState
237
+
238
+ attr_accessor :loopBackState
239
+ def initialize
240
+ super()
241
+ self.stateType = ATNState::LOOP_END
242
+ self.loopBackState = nil
243
+ end
244
+ end
245
+
246
+ # The Tokens rule start state linking to each lexer rule start state */
247
+ class TokensStartState < DecisionState
248
+
249
+ def initialize
250
+ super()
251
+ self.stateType = ATNState::TOKEN_START
252
+ end
253
+ end
@@ -0,0 +1,22 @@
1
+
2
+ class ATNType
3
+ LEXER = 0
4
+ PARSER = 1
5
+
6
+ def self.LEXER
7
+ ATNType::LEXER
8
+ end
9
+ def self.PARSER
10
+ ATNType::PARSER
11
+ end
12
+ def self.fromOrdinal(i)
13
+ case i
14
+ when ATNType::LEXER then
15
+ ATNType::LEXER
16
+ when ATNType::PARSER then
17
+ ATNType::PARSER
18
+ else
19
+ raise Exception.new("ATNType: Unknown value:#{i} ")
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,612 @@
1
+ # When we hit an accept state in either the DFA or the ATN, we
2
+ # have to notify the character stream to start buffering characters
3
+ # via {@link IntStream#mark} and record the current state. The current sim state
4
+ # includes the current index into the input, the current line,
5
+ # and current character position in that line. Note that the Lexer is
6
+ # tracking the starting line and characterization of the token. These
7
+ # variables track the "state" of the simulator when it hits an accept state.
8
+ #
9
+ # <p>We track these variables separately for the DFA and ATN simulation
10
+ # because the DFA simulation often has to fail over to the ATN
11
+ # simulation. If the ATN simulation fails, we need the DFA to fall
12
+ # back to its previously accepted state, if any. If the ATN succeeds,
13
+ # then the ATN does the accept and the DFA simulator that invoked it
14
+ # can simply return the predicted token type.</p>
15
+
16
+ class SimState
17
+
18
+ attr_accessor :index, :line, :column, :dfaState
19
+ def initialize
20
+ self.reset()
21
+ end
22
+
23
+ def reset
24
+ @index = -1
25
+ @line = 0
26
+ @column = -1
27
+ @dfaState = nil
28
+ end
29
+ end
30
+
31
+
32
+ class LexerATNSimulator < ATNSimulator
33
+ #include JavaSymbols
34
+
35
+ class << self
36
+ attr_reader :debug, :dfa_debug, :match_calls
37
+ end
38
+ @@debug = false
39
+ @@dfa_debug = false
40
+ @@match_calls = 0
41
+ def debug; @@debug ;end
42
+ def dfa_debug; @@dfa_debug ;end
43
+ def match_calls; @@match_calls ;end
44
+
45
+ MIN_DFA_EDGE = 0
46
+ MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
47
+
48
+ attr_accessor :decisionToDFA, :recog, :startIndex, :line, :column
49
+ attr_accessor :mode, :prevAccept
50
+
51
+ def initialize(_recog, _atn, decision_to_dfa, shared_context_cache)
52
+ super(_atn, shared_context_cache)
53
+
54
+ if decision_to_dfa.nil? then
55
+ raise Exception.new("Error: #{self.class} decisionToDFA is nil.")
56
+ end
57
+ @decisionToDFA = decision_to_dfa
58
+ @recog = _recog
59
+ # The current token's starting index into the character stream.
60
+ # Shared across DFA to ATN simulation in case the ATN fails and the
61
+ # DFA did not have a previous accept state. In this case, we use the
62
+ # ATN-generated exception object.
63
+ @startIndex = -1
64
+ # line number 1..n within the input#/
65
+ @line = 1
66
+ # The index of the character relative to the beginning of the line 0..n-1#/
67
+ @column = 0
68
+ @mode = Lexer::DEFAULT_MODE
69
+ # Used during DFA/ATN exec to record the most recent accept configuration info
70
+ self.prevAccept = SimState.new()
71
+ end
72
+
73
+
74
+ def copyState(simulator)
75
+ self.column = simulator.column
76
+ self.line = simulator.line
77
+ self.mode = simulator.mode
78
+ self.startIndex = simulator.startIndex
79
+ end
80
+ def match(input, mode)
81
+ @@match_calls =@@match_calls + 1
82
+ self.mode = mode
83
+ mark = input.mark()
84
+ begin
85
+ self.startIndex = input.index
86
+ self.prevAccept.reset()
87
+ dfa = self.decisionToDFA[mode]
88
+ type_check(dfa, DFA)
89
+ if dfa and dfa.s0.nil? then
90
+ return self.matchATN(input)
91
+ else
92
+ return self.execATN(input, dfa.s0)
93
+ end
94
+ ensure
95
+ input.release(mark)
96
+ end
97
+ end
98
+ def reset
99
+ self.prevAccept.reset()
100
+ @startIndex = -1
101
+ @line = 1
102
+ @column = 0
103
+ @mode = Lexer::DEFAULT_MODE
104
+ end
105
+ def clearDFA()
106
+ raise Exception.new("not implemented")
107
+ end
108
+ def matchATN(input)
109
+ startState = self.atn.modeToStartState[self.mode]
110
+
111
+ if self.debug then
112
+ print "matchATN mode #{self.mode} start: #{startState}"
113
+ end
114
+
115
+ old_mode = self.mode
116
+ s0_closure = self.computeStartState(input, startState)
117
+ suppressEdge = s0_closure.hasSemanticContext
118
+ s0_closure.hasSemanticContext = false
119
+
120
+ nxt = self.addDFAState(s0_closure)
121
+ if not suppressEdge then
122
+ self.decisionToDFA[self.mode].s0 = nxt
123
+ end
124
+
125
+ predict = self.execATN(input, nxt)
126
+
127
+ if self.debug then
128
+ print "DFA after matchATN: #{self.decisionToDFA[old_mode].toLexerString()}"
129
+ end
130
+
131
+ return predict
132
+ end
133
+ def execATN(input, ds0)
134
+ if self.debug then
135
+ puts "start state closure=#{ds0.configs.to_s}"
136
+ end
137
+
138
+ t = input.LA(1)
139
+ s = ds0 # s is current/from DFA state
140
+
141
+ raise Exception.new("s is nil") if s.nil?
142
+
143
+ while true do # while more work
144
+ if self.debug then
145
+ puts "execATN loop starting closure: #{s.configs}"
146
+ end
147
+
148
+ # As we move src->trg, src->trg, we keep track of the previous trg to
149
+ # avoid looking up the DFA state again, which is expensive.
150
+ # If the previous target was already part of the DFA, we might
151
+ # be able to avoid doing a reach operation upon t. If s!=null,
152
+ # it means that semantic predicates didn't prevent us from
153
+ # creating a DFA state. Once we know s!=null, we check to see if
154
+ # the DFA state has an edge already for t. If so, we can just reuse
155
+ # it's configuration set; there's no point in re-computing it.
156
+ # This is kind of like doing DFA simulation within the ATN
157
+ # simulation because DFA simulation is really just a way to avoid
158
+ # computing reach/closure sets. Technically, once we know that
159
+ # we have a previously added DFA state, we could jump over to
160
+ # the DFA simulator. But, that would mean popping back and forth
161
+ # a lot and making things more complicated algorithmically.
162
+ # This optimization makes a lot of sense for loops within DFA.
163
+ # A character will take us back to an existing DFA state
164
+ # that already has lots of edges out of it. e.g., .* in comments.
165
+ # print("Target for:" + str(s) + " and:" + str(t))
166
+ target = self.getExistingTargetState(s, t)
167
+ # print("Existing:" + str(target))
168
+ if target.nil? then
169
+ target = self.computeTargetState(input, s, t)
170
+ end
171
+ # print("Computed:" + str(target))
172
+ break if target.equal? ATNSimulator::ERROR
173
+
174
+ if target.isAcceptState
175
+ self.captureSimState(self.prevAccept, input, target)
176
+ if t == Token::EOF
177
+ break
178
+ end
179
+ end
180
+
181
+ if t != Token::EOF
182
+ self.consume(input)
183
+ t = input.LA(1)
184
+ end
185
+
186
+ s = target # flip; current DFA target becomes new src/from state
187
+ end
188
+
189
+ return self.failOrAccept(self.prevAccept, input, s.configs, t)
190
+ end
191
+
192
+ # Get an existing target state for an edge in the DFA. If the target state
193
+ # for the edge has not yet been computed or is otherwise not available,
194
+ # this method returns {@code null}.
195
+ #
196
+ # @param s The current DFA state
197
+ # @param t The next input symbol
198
+ # @return The existing target DFA state for the given input symbol
199
+ # {@code t}, or {@code null} if the target state for this edge is not
200
+ # already cached
201
+ def getExistingTargetState(s, t)
202
+ if s.edges.nil? or t < LexerATNSimulator::MIN_DFA_EDGE or t > LexerATNSimulator::MAX_DFA_EDGE
203
+ return nil
204
+ end
205
+
206
+ target = s.edges[t - LexerATNSimulator::MIN_DFA_EDGE]
207
+ if self.debug and not target.nil?
208
+ puts "reuse state #{s.stateNumber} edge to #{target.stateNumber}"
209
+ end
210
+
211
+ return target
212
+ end
213
+
214
+ # Compute a target state for an edge in the DFA, and attempt to add the
215
+ # computed state and corresponding edge to the DFA.
216
+ #
217
+ # @param input The input stream
218
+ # @param s The current DFA state
219
+ # @param t The next input symbol
220
+ #
221
+ # @return The computed target DFA state for the given input symbol
222
+ # {@code t}. If {@code t} does not lead to a valid DFA state, this method
223
+ # returns {@link #ERROR}.
224
+ def computeTargetState(input, s, t)
225
+ reach = OrderedATNConfigSet.new()
226
+
227
+ # if we don't find an existing DFA state
228
+ # Fill reach starting from closure, following t transitions
229
+ self.getReachableConfigSet(input, s.configs, reach, t)
230
+
231
+ if reach.length==0 # we got nowhere on t from s
232
+ if not reach.hasSemanticContext
233
+ # we got nowhere on t, don't throw out this knowledge; it'd
234
+ # cause a failover from DFA later.
235
+ self.addDFAEdge(s, t, ATNSimulator::ERROR)
236
+ end
237
+ # stop when we can't match any more char
238
+ return ATNSimulator::ERROR
239
+ end
240
+
241
+ # Add an edge from s to target DFA found/created for reach
242
+ return self.addDFAEdge(s, t, nil, reach)
243
+ end
244
+ def failOrAccept(prevAccept, input, reach, t)
245
+ if not self.prevAccept.dfaState.nil?
246
+ lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
247
+ self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
248
+ return prevAccept.dfaState.prediction
249
+ else
250
+ # if no accept and EOF is first char, return EOF
251
+ if t==Token::EOF and input.index==self.startIndex
252
+ return Token::EOF
253
+ end
254
+ raise LexerNoViableAltException.new(self.recog, input, self.startIndex, reach)
255
+ end
256
+ end
257
+ # Given a starting configuration set, figure out all ATN configurations
258
+ # we can reach upon input {@code t}. Parameter {@code reach} is a return
259
+ # parameter.
260
+ def getReachableConfigSet(input, closure, reach, t)
261
+ # this is used to skip processing for configs which have a lower priority
262
+ # than a config that already reached an accept state for the same rule
263
+ skipAlt = ATN::INVALID_ALT_NUMBER
264
+ for cfg in closure do
265
+ currentAltReachedAcceptState = ( cfg.alt == skipAlt )
266
+ if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision
267
+ next
268
+ end
269
+
270
+ if self.debug
271
+ puts "testing #{self.getTokenName(t)} at #{cfg.toString(self.recog, true)}"
272
+ end
273
+
274
+ for trans in cfg.state.transitions do # for each transition
275
+ target = self.getReachableTarget(trans, t)
276
+ if target
277
+ lexerActionExecutor = cfg.lexerActionExecutor
278
+ if lexerActionExecutor
279
+ lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
280
+ end
281
+ treatEofAsEpsilon = (t == Token::EOF)
282
+ config = LexerATNConfig.new(target, nil, nil, nil, lexerActionExecutor, cfg)
283
+ if self.closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)
284
+ # any remaining configs for this alt have a lower priority than
285
+ # the one that just reached an accept state.
286
+ skipAlt = cfg.alt
287
+ break
288
+ end
289
+ end
290
+
291
+ end
292
+ end
293
+ end
294
+ def accept(input, lexerActionExecutor, start_index, index, _line, charPos)
295
+ if self.debug
296
+ puts "ACTION #{lexerActionExecutor}"
297
+ end
298
+
299
+ # seek to after last char in token
300
+ input.seek(index)
301
+ self.line = _line
302
+ self.column = charPos
303
+ if input.LA(1) != Token::EOF
304
+ self.consume(input)
305
+ end
306
+ if lexerActionExecutor and self.recog
307
+ lexerActionExecutor.execute(self.recog, input, start_index)
308
+ end
309
+ end
310
+
311
+ def getReachableTarget(trans, t)
312
+ if trans.matches(t, 0, 0xFFFE)
313
+ return trans.target
314
+ else
315
+ return nil
316
+ end
317
+ end
318
+
319
+ def computeStartState(input, p)
320
+ initialContext = PredictionContext.EMPTY
321
+ configs = OrderedATNConfigSet.new()
322
+ p.transitions.each_index do |i|
323
+ target = p.transitions[i].target
324
+ c = LexerATNConfig.new(target, i+1, initialContext)
325
+ self.closure(input, c, configs, false, false, false)
326
+ end
327
+ return configs
328
+ end
329
+
330
+ # Since the alternatives within any lexer decision are ordered by
331
+ # preference, this method stops pursuing the closure as soon as an accept
332
+ # state is reached. After the first accept state is reached by depth-first
333
+ # search from {@code config}, all other (potentially reachable) states for
334
+ # this rule would have a lower priority.
335
+ #
336
+ # @return {@code true} if an accept state is reached, otherwise
337
+ # {@code false}.
338
+ def closure(input, config, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
339
+ if self.debug
340
+ puts "closure(#{config.toString(self.recog, true)})"
341
+ end
342
+
343
+ if config.state.kind_of? RuleStopState
344
+ if self.debug
345
+ if self.recog
346
+ puts "closure at #{self.recog.getRuleNames[config.state.ruleIndex]} rule stop #{ config}"
347
+ else
348
+ puts "closure at rule stop #{ config}"
349
+ end
350
+ end
351
+
352
+ if config.context.nil? or config.context.hasEmptyPath()
353
+ if config.context.nil? or config.context.isEmpty()
354
+ configs.add(config)
355
+ return true
356
+ else
357
+ configs.add(LexerATNConfig.new(config.state, nil,PredictionContext.EMPTY,nil,nil,config) )
358
+ currentAltReachedAcceptState = true
359
+ end
360
+ end
361
+ if config.context and not config.context.isEmpty() then
362
+ 0.upto(config.context.length - 1) do |i|
363
+ if config.context.getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE
364
+ newContext = config.context.getParent(i) # "pop" return state
365
+ returnState = self.atn.states[config.context.getReturnState(i)]
366
+ c = LexerATNConfig.new(returnState,nil,newContext, nil, nil, config )
367
+ currentAltReachedAcceptState = self.closure(input, c, configs,
368
+ currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
369
+ end
370
+ end
371
+ end
372
+ return currentAltReachedAcceptState
373
+ end
374
+ # optimization
375
+ if not config.state.epsilonOnlyTransitions then
376
+ if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision
377
+ configs.add(config)
378
+ end
379
+ end
380
+
381
+ #for t in config.state.transitions do
382
+ config.state.transitions.each do |t|
383
+ c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
384
+ if c then
385
+ currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
386
+ end
387
+ end
388
+ return currentAltReachedAcceptState
389
+ end
390
+ # side-effect: can alter configs.hasSemanticContext
391
+ def getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
392
+ c = nil
393
+ if t.serializationType==Transition::RULE then
394
+ newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
395
+ c = LexerATNConfig.new(t.target, nil, newContext, nil,nil, config)
396
+ elsif t.serializationType==Transition::PRECEDENCE
397
+ raise UnsupportedOperationException.new("Precedence predicates are not supported in lexers.")
398
+ elsif t.serializationType==Transition::PREDICATE
399
+ # Track traversing semantic predicates. If we traverse,
400
+ # we cannot add a DFA state for this "reach" computation
401
+ # because the DFA would not test the predicate again in the
402
+ # future. Rather than creating collections of semantic predicates
403
+ # like v3 and testing them on prediction, v4 will test them on the
404
+ # fly all the time using the ATN not the DFA. This is slower but
405
+ # semantically it's not used that often. One of the key elements to
406
+ # this predicate mechanism is not adding DFA states that see
407
+ # predicates immediately afterwards in the ATN. For example,
408
+
409
+ # a : ID {p1}? | ID {p2}? ;
410
+
411
+ # should create the start state for rule 'a' (to save start state
412
+ # competition), but should not create target of ID state. The
413
+ # collection of ATN states the following ID references includes
414
+ # states reached by traversing predicates. Since this is when we
415
+ # test them, we cannot cash the DFA state target of ID.
416
+ if self.debug
417
+ print "EVAL rule #{t.ruleIndex}:#{t.predIndex}"
418
+ end
419
+ configs.hasSemanticContext = true
420
+ if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative)
421
+ c = LexerATNConfig(t.target,nil,nil,nil,nil, config)
422
+ end
423
+ elsif t.serializationType==Transition::ACTION
424
+ if config.context.nil? or config.context.hasEmptyPath()
425
+ # execute actions anywhere in the start rule for a token.
426
+ #
427
+ # TODO: if the entry rule is invoked recursively, some
428
+ # actions may be executed during the recursive call. The
429
+ # problem can appear when hasEmptyPath() is true but
430
+ # isEmpty() is false. In this case, the config needs to be
431
+ # split into two contexts - one with just the empty path
432
+ # and another with everything but the empty path.
433
+ # Unfortunately, the current algorithm does not allow
434
+ # getEpsilonTarget to return two configurations, so
435
+ # additional modifications are needed before we can support
436
+ # the split operation.
437
+ lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
438
+ self.atn.lexerActions[t.actionIndex])
439
+ c = LexerATNConfig.new(t.target,nil,nil,nil, lexerActionExecutor, config)
440
+ else
441
+ # ignore actions in referenced rules
442
+ c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
443
+ end
444
+ elsif t.serializationType==Transition::EPSILON
445
+ c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
446
+ elsif [ Transition::ATOM, Transition::RANGE, Transition::SET ].member? t.serializationType
447
+ if treatEofAsEpsilon
448
+ if t.matches(Token::EOF, 0, 0xFFFF)
449
+ c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
450
+ end
451
+ end
452
+ end
453
+ return c
454
+ end
455
+ # Evaluate a predicate specified in the lexer.
456
+ #
457
+ # <p>If {@code speculative} is {@code true}, this method was called before
458
+ # {@link #consume} for the matched character. This method should call
459
+ # {@link #consume} before evaluating the predicate to ensure position
460
+ # sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
461
+ # and {@link Lexer#getcolumn}, properly reflect the current
462
+ # lexer state. This method should restore {@code input} and the simulator
463
+ # to the original state before returning (i.e. undo the actions made by the
464
+ # call to {@link #consume}.</p>
465
+ #
466
+ # @param input The input stream.
467
+ # @param ruleIndex The rule containing the predicate.
468
+ # @param predIndex The index of the predicate within the rule.
469
+ # @param speculative {@code true} if the current index in {@code input} is
470
+ # one character before the predicate's location.
471
+ #
472
+ # @return {@code true} if the specified predicate evaluates to
473
+ # {@code true}.
474
+ #/
475
+ def evaluatePredicate(input, ruleIndex, predIndex, speculative)
476
+ # assume true if no recognizer was provided
477
+ return true if self.recog.nil?
478
+
479
+ if not speculative then
480
+ return self.recog.sempred(nil, ruleIndex, predIndex)
481
+ end
482
+
483
+ savedcolumn = self.column
484
+ savedLine = self.line
485
+ index = input.index
486
+ marker = input.mark()
487
+ begin
488
+ self.consume(input)
489
+ return self.recog.sempred(nil, ruleIndex, predIndex)
490
+ ensure
491
+ self.column = savedcolumn
492
+ self.line = savedLine
493
+ input.seek(index)
494
+ input.release(marker)
495
+ end
496
+ end
497
+ def captureSimState(settings, input, dfaState)
498
+ settings.index = input.index
499
+ settings.line = self.line
500
+ settings.column = self.column
501
+ settings.dfaState = dfaState
502
+ end
503
+
504
+ def addDFAEdge(from_, tk, to=nil, cfgs=nil)
505
+
506
+ if to.nil? and cfgs then
507
+ # leading to this call, ATNConfigSet.hasSemanticContext is used as a
508
+ # marker indicating dynamic predicate evaluation makes this edge
509
+ # dependent on the specific input sequence, so the static edge in the
510
+ # DFA should be omitted. The target DFAState is still created since
511
+ # execATN has the ability to resynchronize with the DFA state cache
512
+ # following the predicate evaluation step.
513
+ #
514
+ # TJP notes: next time through the DFA, we see a pred again and eval.
515
+ # If that gets us to a previously created (but dangling) DFA
516
+ # state, we can continue in pure DFA mode from there.
517
+ #/
518
+ suppressEdge = cfgs.hasSemanticContext
519
+ cfgs.hasSemanticContext = false
520
+
521
+ to = self.addDFAState(cfgs)
522
+
523
+ if suppressEdge then
524
+ return to
525
+ end
526
+ end
527
+ # add the edge
528
+ if tk < LexerATNSimulator::MIN_DFA_EDGE or tk > LexerATNSimulator::MAX_DFA_EDGE
529
+ # Only track edges within the DFA bounds
530
+ return to
531
+ end
532
+
533
+ if self.debug
534
+ puts "EDGE #{from_} -> #{to} upon #{tk.chr}"
535
+ end
536
+
537
+ if from_.edges.nil?
538
+ # make room for tokens 1..n and -1 masquerading as index 0
539
+ # from_.edges = [nil] * (LexerATNSimulator::MAX_DFA_EDGE -
540
+ # LexerATNSimulator::MIN_DFA_EDGE + 1)
541
+ from_.edges = Array.new
542
+ end
543
+
544
+ from_.edges[tk - LexerATNSimulator::MIN_DFA_EDGE] = to # connect
545
+
546
+ return to
547
+ end
548
+
549
+ # Add a new DFA state if there isn't one with this set of
550
+ # configurations already. This method also detects the first
551
+ # configuration containing an ATN rule stop state. Later, when
552
+ # traversing the DFA, we will know which rule to accept.
553
+ def addDFAState(configs) # -> DFAState:
554
+ # the lexer evaluates predicates on-the-fly; by this point configs
555
+ # should not contain any configurations with unevaluated predicates.
556
+ # assert not configs.hasSemanticContext
557
+ proposed = DFAState.new(nil,configs)
558
+ firstConfigWithRuleStopState = nil
559
+ # for c in configs.each do |c|:
560
+ configs.each do |c|
561
+ if c.state.kind_of? RuleStopState then
562
+ firstConfigWithRuleStopState = c
563
+ break
564
+ end
565
+ end
566
+
567
+ if firstConfigWithRuleStopState then
568
+ proposed.isAcceptState = true
569
+ proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
570
+ proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
571
+ end
572
+
573
+ dfa = self.decisionToDFA[self.mode]
574
+ existing = dfa.states[proposed]
575
+ if existing then
576
+ return existing
577
+ end
578
+
579
+ newState = proposed
580
+
581
+ newState.stateNumber = dfa.states.length
582
+ configs.setReadonly(true)
583
+ newState.configs = configs
584
+ dfa.states[newState] = newState
585
+ return newState
586
+ end
587
+ def getDFA(mode)
588
+ return self.decisionToDFA[mode]
589
+ end
590
+ # Get the text matched so far for the current token.
591
+ def getText(input)
592
+ # index is first lookahead char, don't include.
593
+ return input.getText(self.startIndex, input.index-1)
594
+ end
595
+ def consume(input)
596
+ curChar = input.LA(1)
597
+ if curChar=="\n".ord then
598
+ self.line = self.line + 1
599
+ self.column = 0
600
+ else
601
+ self.column = self.column + 1
602
+ end
603
+ input.consume()
604
+ end
605
+ def getTokenName(t)
606
+ if t==-1
607
+ return "EOF"
608
+ else
609
+ return "'#{t.chr}'"
610
+ end
611
+ end
612
+ end