antlr4 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,43 @@
1
+
2
+ class ATNSimulator
3
+
4
+ # Must distinguish between missing edge and edge we know leads nowhere#/
5
+ ERROR = DFAState.new(0x7FFFFFFF,ATNConfigSet.new())
6
+
7
+ # The context cache maps all PredictionContext objects that are ==
8
+ # to a single cached copy. This cache is shared across all contexts
9
+ # in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
10
+ # to use only cached nodes/graphs in addDFAState(). We don't want to
11
+ # fill this during closure() since there are lots of contexts that
12
+ # pop up but are not used ever again. It also greatly slows down closure().
13
+ #
14
+ # <p>This cache makes a huge difference in memory and a little bit in speed.
15
+ # For the Java grammar on java.*, it dropped the memory requirements
16
+ # at the end from 25M to 16M. We don't store any of the full context
17
+ # graphs in the DFA because they are limited to local context only,
18
+ # but apparently there's a lot of repetition there as well. We optimize
19
+ # the config contexts before storing the config set in the DFA states
20
+ # by literally rebuilding them with cached subgraphs only.</p>
21
+ #
22
+ # <p>I tried a cache for use during closure operations, that was
23
+ # whacked after each adaptivePredict(). It cost a little bit
24
+ # more time I think and doesn't save on the overall footprint
25
+ # so it's not worth the complexity.</p>
26
+ #/
27
+ include PredictionContextFunctions
28
+
29
+ attr_accessor :atn, :sharedContextCache
30
+ def initialize(atn, sharedContextCache)
31
+ raise Exception.new("ATN is nil") if atn.nil?
32
+ self.atn = atn
33
+ self.sharedContextCache = sharedContextCache
34
+ end
35
+ def getCachedContext(context)
36
+ if self.sharedContextCache.nil? then
37
+ return context
38
+ end
39
+ visited = Hash.new
40
+ return getCachedPredictionContext(context, self.sharedContextCache, visited)
41
+ end
42
+ end
43
+
@@ -0,0 +1,253 @@
1
+
2
+ #from antlr4.atn.Transition import Transition
3
+ INITIAL_NUM_TRANSITIONS = 4
4
+
5
+ class ATNState
6
+
7
+ # constants for serialization
8
+ INVALID_TYPE = 0
9
+ BASIC = 1
10
+ RULE_START = 2
11
+ BLOCK_START = 3
12
+ PLUS_BLOCK_START = 4
13
+ STAR_BLOCK_START = 5
14
+ TOKEN_START = 6
15
+ RULE_STOP = 7
16
+ BLOCK_END = 8
17
+ STAR_LOOP_BACK = 9
18
+ STAR_LOOP_ENTRY = 10
19
+ PLUS_LOOP_BACK = 11
20
+ LOOP_END = 12
21
+
22
+
23
+ INVALID_STATE_NUMBER = -1
24
+
25
+ attr_accessor :atn, :stateNumber, :stateType, :ruleIndex
26
+ attr_accessor :epsilonOnlyTransitions ,:transitions, :nextTokenWithinRule
27
+ attr :serializationNames
28
+ def initialize()
29
+ # Which ATN are we in?
30
+ @atn = nil
31
+ @stateNumber = ATNState::INVALID_STATE_NUMBER
32
+ @stateType = nil
33
+ @ruleIndex = 0 # at runtime, we don't have Rule objects
34
+ @epsilonOnlyTransitions = false
35
+ # Track the transitions emanating from this ATN state.
36
+ @transitions = Array.new
37
+ # Used to cache lookahead during parsing, not used during construction
38
+ @nextTokenWithinRule = nil
39
+ @serializationNames = [
40
+ "INVALID",
41
+ "BASIC",
42
+ "RULE_START",
43
+ "BLOCK_START",
44
+ "PLUS_BLOCK_START",
45
+ "STAR_BLOCK_START",
46
+ "TOKEN_START",
47
+ "RULE_STOP",
48
+ "BLOCK_END",
49
+ "STAR_LOOP_BACK",
50
+ "STAR_LOOP_ENTRY",
51
+ "PLUS_LOOP_BACK",
52
+ "LOOP_END" ]
53
+ end
54
+
55
+ def hash
56
+ return self.stateNumber
57
+ end
58
+
59
+ def ==(other)
60
+ if other.kind_of? ATNState then
61
+ other and self.stateNumber==other.stateNumber
62
+ else
63
+ false
64
+ end
65
+ end
66
+ def onlyHasEpsilonTransitions
67
+ self.epsilonOnlyTransitions
68
+ end
69
+ def isNonGreedyExitState
70
+ return false
71
+ end
72
+
73
+ def to_s
74
+ self.stateNumber.to_s
75
+ end
76
+ def inspect
77
+ "<ATNState #{self.stateNumber.to_s} >"
78
+ end
79
+ def addTransition(trans, index=-1)
80
+ if self.transitions.length==0
81
+ self.epsilonOnlyTransitions = trans.isEpsilon
82
+ elsif self.epsilonOnlyTransitions != trans.isEpsilon
83
+ self.epsilonOnlyTransitions = false
84
+ # TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
85
+ end
86
+ if index==-1
87
+ self.transitions.push(trans)
88
+ else
89
+ self.transitions.insert(index, trans)
90
+ end
91
+ end
92
+ end
93
+
94
+ class BasicState < ATNState
95
+ def initialize
96
+ super()
97
+ # self.stateNumber = ATNState::BASIC
98
+ self.stateType = ATNState::BASIC
99
+ end
100
+ end
101
+
102
+ class DecisionState < ATNState
103
+
104
+ attr_accessor :decision ,:nonGreedy
105
+ def initialize
106
+ super()
107
+ self.decision = -1
108
+ self.nonGreedy = false
109
+
110
+ end
111
+ end
112
+ # INVALID_TYPE = 0
113
+ # BASIC = 1
114
+ # RULE_START = 2
115
+ # BLOCK_START = 3
116
+ # PLUS_BLOCK_START = 4
117
+ # STAR_BLOCK_START = 5
118
+ # TOKEN_START = 6
119
+ # RULE_STOP = 7
120
+ # BLOCK_END = 8
121
+ # STAR_LOOP_BACK = 9
122
+ # STAR_LOOP_ENTRY = 10
123
+ # PLUS_LOOP_BACK = 11
124
+ # LOOP_END = 12
125
+ # The start of a regular {@code (...)} block.
126
+ class BlockStartState < DecisionState
127
+
128
+ attr_accessor :endState
129
+ def initialize
130
+ super()
131
+ self.endState = nil
132
+ end
133
+ end
134
+
135
+ class BasicBlockStartState < BlockStartState
136
+
137
+ def initialize
138
+ super()
139
+ self.stateType = ATNState::BLOCK_START
140
+ end
141
+ end
142
+
143
+ # Terminal node of a simple {@code (a|b|c)} block.
144
+ class BlockEndState < ATNState
145
+
146
+ attr_accessor :startState
147
+ def initialize
148
+ super()
149
+ self.stateType = ATNState::BLOCK_END
150
+ self.startState = nil
151
+ end
152
+ end
153
+
154
+ # The last node in the ATN for a rule, unless that rule is the start symbol.
155
+ # In that case, there is one transition to EOF. Later, we might encode
156
+ # references to all calls to this rule to compute FOLLOW sets for
157
+ # error handling.
158
+ #
159
+ class RuleStopState < ATNState
160
+
161
+ attr_accessor :stopState
162
+ def initialize
163
+ super()
164
+ self.stateType = ATNState::RULE_STOP
165
+ end
166
+ end
167
+
168
+ class RuleStartState < ATNState
169
+
170
+ attr_accessor :stopState, :isPrecedenceRule
171
+ def initialize
172
+ super()
173
+ self.stateType = ATNState::RULE_START
174
+ self.stopState = nil
175
+ self.isPrecedenceRule = false
176
+ end
177
+ end
178
+
179
+ # Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
180
+ # one to the loop back to start of the block and one to exit.
181
+ #
182
+ class PlusLoopbackState < DecisionState
183
+
184
+ def initialize
185
+ super()
186
+ self.stateType = ATNState::PLUS_LOOP_BACK
187
+ end
188
+ end
189
+
190
+ # Start of {@code (A|B|...)+} loop. Technically a decision state, but
191
+ # we don't use for code generation; somebody might need it, so I'm defining
192
+ # it for completeness. In reality, the {@link PlusLoopbackState} node is the
193
+ # real decision-making note for {@code A+}.
194
+ #
195
+ class PlusBlockStartState < BlockStartState
196
+
197
+ attr_accessor :loopBackState
198
+ def initialize
199
+ super()
200
+ self.stateType = ATNState::PLUS_BLOCK_START
201
+ self.loopBackState = nil
202
+ end
203
+ end
204
+
205
+ # The block that begins a closure loop.
206
+ class StarBlockStartState < BlockStartState
207
+
208
+ def initialize
209
+ super()
210
+ self.stateType = ATNState::STAR_BLOCK_START
211
+ end
212
+ end
213
+
214
+ class StarLoopbackState < ATNState
215
+
216
+ def initialize
217
+ super()
218
+ self.stateType = ATNState::STAR_LOOP_BACK
219
+ end
220
+ end
221
+
222
+
223
+ class StarLoopEntryState < DecisionState
224
+
225
+ attr_accessor :loopBackState, :precedenceRuleDecision
226
+ def initialize
227
+ super()
228
+ self.stateType = ATNState::STAR_LOOP_ENTRY
229
+ self.loopBackState = nil
230
+ # Indicates whether this state can benefit from a precedence DFA during SLL decision making.
231
+ self.precedenceRuleDecision = nil
232
+ end
233
+ end
234
+
235
+ # Mark the end of a * or + loop.
236
+ class LoopEndState < ATNState
237
+
238
+ attr_accessor :loopBackState
239
+ def initialize
240
+ super()
241
+ self.stateType = ATNState::LOOP_END
242
+ self.loopBackState = nil
243
+ end
244
+ end
245
+
246
+ # The Tokens rule start state linking to each lexer rule start state */
247
+ class TokensStartState < DecisionState
248
+
249
+ def initialize
250
+ super()
251
+ self.stateType = ATNState::TOKEN_START
252
+ end
253
+ end
@@ -0,0 +1,22 @@
1
+
2
+ class ATNType
3
+ LEXER = 0
4
+ PARSER = 1
5
+
6
+ def self.LEXER
7
+ ATNType::LEXER
8
+ end
9
+ def self.PARSER
10
+ ATNType::PARSER
11
+ end
12
+ def self.fromOrdinal(i)
13
+ case i
14
+ when ATNType::LEXER then
15
+ ATNType::LEXER
16
+ when ATNType::PARSER then
17
+ ATNType::PARSER
18
+ else
19
+ raise Exception.new("ATNType: Unknown value:#{i} ")
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,612 @@
1
+ # When we hit an accept state in either the DFA or the ATN, we
2
+ # have to notify the character stream to start buffering characters
3
+ # via {@link IntStream#mark} and record the current state. The current sim state
4
+ # includes the current index into the input, the current line,
5
+ # and current character position in that line. Note that the Lexer is
6
+ # tracking the starting line and characterization of the token. These
7
+ # variables track the "state" of the simulator when it hits an accept state.
8
+ #
9
+ # <p>We track these variables separately for the DFA and ATN simulation
10
+ # because the DFA simulation often has to fail over to the ATN
11
+ # simulation. If the ATN simulation fails, we need the DFA to fall
12
+ # back to its previously accepted state, if any. If the ATN succeeds,
13
+ # then the ATN does the accept and the DFA simulator that invoked it
14
+ # can simply return the predicted token type.</p>
15
+
16
+ class SimState
17
+
18
+ attr_accessor :index, :line, :column, :dfaState
19
+ def initialize
20
+ self.reset()
21
+ end
22
+
23
+ def reset
24
+ @index = -1
25
+ @line = 0
26
+ @column = -1
27
+ @dfaState = nil
28
+ end
29
+ end
30
+
31
+
32
+ class LexerATNSimulator < ATNSimulator
33
+ #include JavaSymbols
34
+
35
+ class << self
36
+ attr_reader :debug, :dfa_debug, :match_calls
37
+ end
38
+ @@debug = false
39
+ @@dfa_debug = false
40
+ @@match_calls = 0
41
+ def debug; @@debug ;end
42
+ def dfa_debug; @@dfa_debug ;end
43
+ def match_calls; @@match_calls ;end
44
+
45
+ MIN_DFA_EDGE = 0
46
+ MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
47
+
48
+ attr_accessor :decisionToDFA, :recog, :startIndex, :line, :column
49
+ attr_accessor :mode, :prevAccept
50
+
51
+ def initialize(_recog, _atn, decision_to_dfa, shared_context_cache)
52
+ super(_atn, shared_context_cache)
53
+
54
+ if decision_to_dfa.nil? then
55
+ raise Exception.new("Error: #{self.class} decisionToDFA is nil.")
56
+ end
57
+ @decisionToDFA = decision_to_dfa
58
+ @recog = _recog
59
+ # The current token's starting index into the character stream.
60
+ # Shared across DFA to ATN simulation in case the ATN fails and the
61
+ # DFA did not have a previous accept state. In this case, we use the
62
+ # ATN-generated exception object.
63
+ @startIndex = -1
64
+ # line number 1..n within the input#/
65
+ @line = 1
66
+ # The index of the character relative to the beginning of the line 0..n-1#/
67
+ @column = 0
68
+ @mode = Lexer::DEFAULT_MODE
69
+ # Used during DFA/ATN exec to record the most recent accept configuration info
70
+ self.prevAccept = SimState.new()
71
+ end
72
+
73
+
74
+ def copyState(simulator)
75
+ self.column = simulator.column
76
+ self.line = simulator.line
77
+ self.mode = simulator.mode
78
+ self.startIndex = simulator.startIndex
79
+ end
80
+ def match(input, mode)
81
+ @@match_calls =@@match_calls + 1
82
+ self.mode = mode
83
+ mark = input.mark()
84
+ begin
85
+ self.startIndex = input.index
86
+ self.prevAccept.reset()
87
+ dfa = self.decisionToDFA[mode]
88
+ type_check(dfa, DFA)
89
+ if dfa and dfa.s0.nil? then
90
+ return self.matchATN(input)
91
+ else
92
+ return self.execATN(input, dfa.s0)
93
+ end
94
+ ensure
95
+ input.release(mark)
96
+ end
97
+ end
98
+ def reset
99
+ self.prevAccept.reset()
100
+ @startIndex = -1
101
+ @line = 1
102
+ @column = 0
103
+ @mode = Lexer::DEFAULT_MODE
104
+ end
105
+ def clearDFA()
106
+ raise Exception.new("not implemented")
107
+ end
108
+ def matchATN(input)
109
+ startState = self.atn.modeToStartState[self.mode]
110
+
111
+ if self.debug then
112
+ print "matchATN mode #{self.mode} start: #{startState}"
113
+ end
114
+
115
+ old_mode = self.mode
116
+ s0_closure = self.computeStartState(input, startState)
117
+ suppressEdge = s0_closure.hasSemanticContext
118
+ s0_closure.hasSemanticContext = false
119
+
120
+ nxt = self.addDFAState(s0_closure)
121
+ if not suppressEdge then
122
+ self.decisionToDFA[self.mode].s0 = nxt
123
+ end
124
+
125
+ predict = self.execATN(input, nxt)
126
+
127
+ if self.debug then
128
+ print "DFA after matchATN: #{self.decisionToDFA[old_mode].toLexerString()}"
129
+ end
130
+
131
+ return predict
132
+ end
133
+ def execATN(input, ds0)
134
+ if self.debug then
135
+ puts "start state closure=#{ds0.configs.to_s}"
136
+ end
137
+
138
+ t = input.LA(1)
139
+ s = ds0 # s is current/from DFA state
140
+
141
+ raise Exception.new("s is nil") if s.nil?
142
+
143
+ while true do # while more work
144
+ if self.debug then
145
+ puts "execATN loop starting closure: #{s.configs}"
146
+ end
147
+
148
+ # As we move src->trg, src->trg, we keep track of the previous trg to
149
+ # avoid looking up the DFA state again, which is expensive.
150
+ # If the previous target was already part of the DFA, we might
151
+ # be able to avoid doing a reach operation upon t. If s!=null,
152
+ # it means that semantic predicates didn't prevent us from
153
+ # creating a DFA state. Once we know s!=null, we check to see if
154
+ # the DFA state has an edge already for t. If so, we can just reuse
155
+ # it's configuration set; there's no point in re-computing it.
156
+ # This is kind of like doing DFA simulation within the ATN
157
+ # simulation because DFA simulation is really just a way to avoid
158
+ # computing reach/closure sets. Technically, once we know that
159
+ # we have a previously added DFA state, we could jump over to
160
+ # the DFA simulator. But, that would mean popping back and forth
161
+ # a lot and making things more complicated algorithmically.
162
+ # This optimization makes a lot of sense for loops within DFA.
163
+ # A character will take us back to an existing DFA state
164
+ # that already has lots of edges out of it. e.g., .* in comments.
165
+ # print("Target for:" + str(s) + " and:" + str(t))
166
+ target = self.getExistingTargetState(s, t)
167
+ # print("Existing:" + str(target))
168
+ if target.nil? then
169
+ target = self.computeTargetState(input, s, t)
170
+ end
171
+ # print("Computed:" + str(target))
172
+ break if target.equal? ATNSimulator::ERROR
173
+
174
+ if target.isAcceptState
175
+ self.captureSimState(self.prevAccept, input, target)
176
+ if t == Token::EOF
177
+ break
178
+ end
179
+ end
180
+
181
+ if t != Token::EOF
182
+ self.consume(input)
183
+ t = input.LA(1)
184
+ end
185
+
186
+ s = target # flip; current DFA target becomes new src/from state
187
+ end
188
+
189
+ return self.failOrAccept(self.prevAccept, input, s.configs, t)
190
+ end
191
+
192
+ # Get an existing target state for an edge in the DFA. If the target state
193
+ # for the edge has not yet been computed or is otherwise not available,
194
+ # this method returns {@code null}.
195
+ #
196
+ # @param s The current DFA state
197
+ # @param t The next input symbol
198
+ # @return The existing target DFA state for the given input symbol
199
+ # {@code t}, or {@code null} if the target state for this edge is not
200
+ # already cached
201
+ def getExistingTargetState(s, t)
202
+ if s.edges.nil? or t < LexerATNSimulator::MIN_DFA_EDGE or t > LexerATNSimulator::MAX_DFA_EDGE
203
+ return nil
204
+ end
205
+
206
+ target = s.edges[t - LexerATNSimulator::MIN_DFA_EDGE]
207
+ if self.debug and not target.nil?
208
+ puts "reuse state #{s.stateNumber} edge to #{target.stateNumber}"
209
+ end
210
+
211
+ return target
212
+ end
213
+
214
+ # Compute a target state for an edge in the DFA, and attempt to add the
215
+ # computed state and corresponding edge to the DFA.
216
+ #
217
+ # @param input The input stream
218
+ # @param s The current DFA state
219
+ # @param t The next input symbol
220
+ #
221
+ # @return The computed target DFA state for the given input symbol
222
+ # {@code t}. If {@code t} does not lead to a valid DFA state, this method
223
+ # returns {@link #ERROR}.
224
+ def computeTargetState(input, s, t)
225
+ reach = OrderedATNConfigSet.new()
226
+
227
+ # if we don't find an existing DFA state
228
+ # Fill reach starting from closure, following t transitions
229
+ self.getReachableConfigSet(input, s.configs, reach, t)
230
+
231
+ if reach.length==0 # we got nowhere on t from s
232
+ if not reach.hasSemanticContext
233
+ # we got nowhere on t, don't throw out this knowledge; it'd
234
+ # cause a failover from DFA later.
235
+ self.addDFAEdge(s, t, ATNSimulator::ERROR)
236
+ end
237
+ # stop when we can't match any more char
238
+ return ATNSimulator::ERROR
239
+ end
240
+
241
+ # Add an edge from s to target DFA found/created for reach
242
+ return self.addDFAEdge(s, t, nil, reach)
243
+ end
244
+ def failOrAccept(prevAccept, input, reach, t)
245
+ if not self.prevAccept.dfaState.nil?
246
+ lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
247
+ self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
248
+ return prevAccept.dfaState.prediction
249
+ else
250
+ # if no accept and EOF is first char, return EOF
251
+ if t==Token::EOF and input.index==self.startIndex
252
+ return Token::EOF
253
+ end
254
+ raise LexerNoViableAltException.new(self.recog, input, self.startIndex, reach)
255
+ end
256
+ end
257
+ # Given a starting configuration set, figure out all ATN configurations
258
+ # we can reach upon input {@code t}. Parameter {@code reach} is a return
259
+ # parameter.
260
+ def getReachableConfigSet(input, closure, reach, t)
261
+ # this is used to skip processing for configs which have a lower priority
262
+ # than a config that already reached an accept state for the same rule
263
+ skipAlt = ATN::INVALID_ALT_NUMBER
264
+ for cfg in closure do
265
+ currentAltReachedAcceptState = ( cfg.alt == skipAlt )
266
+ if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision
267
+ next
268
+ end
269
+
270
+ if self.debug
271
+ puts "testing #{self.getTokenName(t)} at #{cfg.toString(self.recog, true)}"
272
+ end
273
+
274
+ for trans in cfg.state.transitions do # for each transition
275
+ target = self.getReachableTarget(trans, t)
276
+ if target
277
+ lexerActionExecutor = cfg.lexerActionExecutor
278
+ if lexerActionExecutor
279
+ lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
280
+ end
281
+ treatEofAsEpsilon = (t == Token::EOF)
282
+ config = LexerATNConfig.new(target, nil, nil, nil, lexerActionExecutor, cfg)
283
+ if self.closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)
284
+ # any remaining configs for this alt have a lower priority than
285
+ # the one that just reached an accept state.
286
+ skipAlt = cfg.alt
287
+ break
288
+ end
289
+ end
290
+
291
+ end
292
+ end
293
+ end
294
+ def accept(input, lexerActionExecutor, start_index, index, _line, charPos)
295
+ if self.debug
296
+ puts "ACTION #{lexerActionExecutor}"
297
+ end
298
+
299
+ # seek to after last char in token
300
+ input.seek(index)
301
+ self.line = _line
302
+ self.column = charPos
303
+ if input.LA(1) != Token::EOF
304
+ self.consume(input)
305
+ end
306
+ if lexerActionExecutor and self.recog
307
+ lexerActionExecutor.execute(self.recog, input, start_index)
308
+ end
309
+ end
310
+
311
+ def getReachableTarget(trans, t)
312
+ if trans.matches(t, 0, 0xFFFE)
313
+ return trans.target
314
+ else
315
+ return nil
316
+ end
317
+ end
318
+
319
+ def computeStartState(input, p)
320
+ initialContext = PredictionContext.EMPTY
321
+ configs = OrderedATNConfigSet.new()
322
+ p.transitions.each_index do |i|
323
+ target = p.transitions[i].target
324
+ c = LexerATNConfig.new(target, i+1, initialContext)
325
+ self.closure(input, c, configs, false, false, false)
326
+ end
327
+ return configs
328
+ end
329
+
330
+ # Since the alternatives within any lexer decision are ordered by
331
+ # preference, this method stops pursuing the closure as soon as an accept
332
+ # state is reached. After the first accept state is reached by depth-first
333
+ # search from {@code config}, all other (potentially reachable) states for
334
+ # this rule would have a lower priority.
335
+ #
336
+ # @return {@code true} if an accept state is reached, otherwise
337
+ # {@code false}.
338
+ def closure(input, config, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
339
+ if self.debug
340
+ puts "closure(#{config.toString(self.recog, true)})"
341
+ end
342
+
343
+ if config.state.kind_of? RuleStopState
344
+ if self.debug
345
+ if self.recog
346
+ puts "closure at #{self.recog.getRuleNames[config.state.ruleIndex]} rule stop #{ config}"
347
+ else
348
+ puts "closure at rule stop #{ config}"
349
+ end
350
+ end
351
+
352
+ if config.context.nil? or config.context.hasEmptyPath()
353
+ if config.context.nil? or config.context.isEmpty()
354
+ configs.add(config)
355
+ return true
356
+ else
357
+ configs.add(LexerATNConfig.new(config.state, nil,PredictionContext.EMPTY,nil,nil,config) )
358
+ currentAltReachedAcceptState = true
359
+ end
360
+ end
361
+ if config.context and not config.context.isEmpty() then
362
+ 0.upto(config.context.length - 1) do |i|
363
+ if config.context.getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE
364
+ newContext = config.context.getParent(i) # "pop" return state
365
+ returnState = self.atn.states[config.context.getReturnState(i)]
366
+ c = LexerATNConfig.new(returnState,nil,newContext, nil, nil, config )
367
+ currentAltReachedAcceptState = self.closure(input, c, configs,
368
+ currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
369
+ end
370
+ end
371
+ end
372
+ return currentAltReachedAcceptState
373
+ end
374
+ # optimization
375
+ if not config.state.epsilonOnlyTransitions then
376
+ if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision
377
+ configs.add(config)
378
+ end
379
+ end
380
+
381
+ #for t in config.state.transitions do
382
+ config.state.transitions.each do |t|
383
+ c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
384
+ if c then
385
+ currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
386
+ end
387
+ end
388
+ return currentAltReachedAcceptState
389
+ end
390
+ # side-effect: can alter configs.hasSemanticContext
391
+ def getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
392
+ c = nil
393
+ if t.serializationType==Transition::RULE then
394
+ newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
395
+ c = LexerATNConfig.new(t.target, nil, newContext, nil,nil, config)
396
+ elsif t.serializationType==Transition::PRECEDENCE
397
+ raise UnsupportedOperationException.new("Precedence predicates are not supported in lexers.")
398
+ elsif t.serializationType==Transition::PREDICATE
399
+ # Track traversing semantic predicates. If we traverse,
400
+ # we cannot add a DFA state for this "reach" computation
401
+ # because the DFA would not test the predicate again in the
402
+ # future. Rather than creating collections of semantic predicates
403
+ # like v3 and testing them on prediction, v4 will test them on the
404
+ # fly all the time using the ATN not the DFA. This is slower but
405
+ # semantically it's not used that often. One of the key elements to
406
+ # this predicate mechanism is not adding DFA states that see
407
+ # predicates immediately afterwards in the ATN. For example,
408
+
409
+ # a : ID {p1}? | ID {p2}? ;
410
+
411
+ # should create the start state for rule 'a' (to save start state
412
+ # competition), but should not create target of ID state. The
413
+ # collection of ATN states the following ID references includes
414
+ # states reached by traversing predicates. Since this is when we
415
+ # test them, we cannot cash the DFA state target of ID.
416
+ if self.debug
417
+ print "EVAL rule #{t.ruleIndex}:#{t.predIndex}"
418
+ end
419
+ configs.hasSemanticContext = true
420
+ if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative)
421
+ c = LexerATNConfig(t.target,nil,nil,nil,nil, config)
422
+ end
423
+ elsif t.serializationType==Transition::ACTION
424
+ if config.context.nil? or config.context.hasEmptyPath()
425
+ # execute actions anywhere in the start rule for a token.
426
+ #
427
+ # TODO: if the entry rule is invoked recursively, some
428
+ # actions may be executed during the recursive call. The
429
+ # problem can appear when hasEmptyPath() is true but
430
+ # isEmpty() is false. In this case, the config needs to be
431
+ # split into two contexts - one with just the empty path
432
+ # and another with everything but the empty path.
433
+ # Unfortunately, the current algorithm does not allow
434
+ # getEpsilonTarget to return two configurations, so
435
+ # additional modifications are needed before we can support
436
+ # the split operation.
437
+ lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
438
+ self.atn.lexerActions[t.actionIndex])
439
+ c = LexerATNConfig.new(t.target,nil,nil,nil, lexerActionExecutor, config)
440
+ else
441
+ # ignore actions in referenced rules
442
+ c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
443
+ end
444
+ elsif t.serializationType==Transition::EPSILON
445
+ c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
446
+ elsif [ Transition::ATOM, Transition::RANGE, Transition::SET ].member? t.serializationType
447
+ if treatEofAsEpsilon
448
+ if t.matches(Token::EOF, 0, 0xFFFF)
449
+ c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
450
+ end
451
+ end
452
+ end
453
+ return c
454
+ end
455
+ # Evaluate a predicate specified in the lexer.
456
+ #
457
+ # <p>If {@code speculative} is {@code true}, this method was called before
458
+ # {@link #consume} for the matched character. This method should call
459
+ # {@link #consume} before evaluating the predicate to ensure position
460
+ # sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
461
+ # and {@link Lexer#getcolumn}, properly reflect the current
462
+ # lexer state. This method should restore {@code input} and the simulator
463
+ # to the original state before returning (i.e. undo the actions made by the
464
+ # call to {@link #consume}.</p>
465
+ #
466
+ # @param input The input stream.
467
+ # @param ruleIndex The rule containing the predicate.
468
+ # @param predIndex The index of the predicate within the rule.
469
+ # @param speculative {@code true} if the current index in {@code input} is
470
+ # one character before the predicate's location.
471
+ #
472
+ # @return {@code true} if the specified predicate evaluates to
473
+ # {@code true}.
474
+ #/
475
+ def evaluatePredicate(input, ruleIndex, predIndex, speculative)
476
+ # assume true if no recognizer was provided
477
+ return true if self.recog.nil?
478
+
479
+ if not speculative then
480
+ return self.recog.sempred(nil, ruleIndex, predIndex)
481
+ end
482
+
483
+ savedcolumn = self.column
484
+ savedLine = self.line
485
+ index = input.index
486
+ marker = input.mark()
487
+ begin
488
+ self.consume(input)
489
+ return self.recog.sempred(nil, ruleIndex, predIndex)
490
+ ensure
491
+ self.column = savedcolumn
492
+ self.line = savedLine
493
+ input.seek(index)
494
+ input.release(marker)
495
+ end
496
+ end
497
+ def captureSimState(settings, input, dfaState)
498
+ settings.index = input.index
499
+ settings.line = self.line
500
+ settings.column = self.column
501
+ settings.dfaState = dfaState
502
+ end
503
+
504
+ def addDFAEdge(from_, tk, to=nil, cfgs=nil)
505
+
506
+ if to.nil? and cfgs then
507
+ # leading to this call, ATNConfigSet.hasSemanticContext is used as a
508
+ # marker indicating dynamic predicate evaluation makes this edge
509
+ # dependent on the specific input sequence, so the static edge in the
510
+ # DFA should be omitted. The target DFAState is still created since
511
+ # execATN has the ability to resynchronize with the DFA state cache
512
+ # following the predicate evaluation step.
513
+ #
514
+ # TJP notes: next time through the DFA, we see a pred again and eval.
515
+ # If that gets us to a previously created (but dangling) DFA
516
+ # state, we can continue in pure DFA mode from there.
517
+ #/
518
+ suppressEdge = cfgs.hasSemanticContext
519
+ cfgs.hasSemanticContext = false
520
+
521
+ to = self.addDFAState(cfgs)
522
+
523
+ if suppressEdge then
524
+ return to
525
+ end
526
+ end
527
+ # add the edge
528
+ if tk < LexerATNSimulator::MIN_DFA_EDGE or tk > LexerATNSimulator::MAX_DFA_EDGE
529
+ # Only track edges within the DFA bounds
530
+ return to
531
+ end
532
+
533
+ if self.debug
534
+ puts "EDGE #{from_} -> #{to} upon #{tk.chr}"
535
+ end
536
+
537
+ if from_.edges.nil?
538
+ # make room for tokens 1..n and -1 masquerading as index 0
539
+ # from_.edges = [nil] * (LexerATNSimulator::MAX_DFA_EDGE -
540
+ # LexerATNSimulator::MIN_DFA_EDGE + 1)
541
+ from_.edges = Array.new
542
+ end
543
+
544
+ from_.edges[tk - LexerATNSimulator::MIN_DFA_EDGE] = to # connect
545
+
546
+ return to
547
+ end
548
+
549
+ # Add a new DFA state if there isn't one with this set of
550
+ # configurations already. This method also detects the first
551
+ # configuration containing an ATN rule stop state. Later, when
552
+ # traversing the DFA, we will know which rule to accept.
553
+ def addDFAState(configs) # -> DFAState:
554
+ # the lexer evaluates predicates on-the-fly; by this point configs
555
+ # should not contain any configurations with unevaluated predicates.
556
+ # assert not configs.hasSemanticContext
557
+ proposed = DFAState.new(nil,configs)
558
+ firstConfigWithRuleStopState = nil
559
+ # for c in configs.each do |c|:
560
+ configs.each do |c|
561
+ if c.state.kind_of? RuleStopState then
562
+ firstConfigWithRuleStopState = c
563
+ break
564
+ end
565
+ end
566
+
567
+ if firstConfigWithRuleStopState then
568
+ proposed.isAcceptState = true
569
+ proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
570
+ proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
571
+ end
572
+
573
+ dfa = self.decisionToDFA[self.mode]
574
+ existing = dfa.states[proposed]
575
+ if existing then
576
+ return existing
577
+ end
578
+
579
+ newState = proposed
580
+
581
+ newState.stateNumber = dfa.states.length
582
+ configs.setReadonly(true)
583
+ newState.configs = configs
584
+ dfa.states[newState] = newState
585
+ return newState
586
+ end
587
+ def getDFA(mode)
588
+ return self.decisionToDFA[mode]
589
+ end
590
+ # Get the text matched so far for the current token.
591
+ def getText(input)
592
+ # index is first lookahead char, don't include.
593
+ return input.getText(self.startIndex, input.index-1)
594
+ end
595
+ def consume(input)
596
+ curChar = input.LA(1)
597
+ if curChar=="\n".ord then
598
+ self.line = self.line + 1
599
+ self.column = 0
600
+ else
601
+ self.column = self.column + 1
602
+ end
603
+ input.consume()
604
+ end
605
+ def getTokenName(t)
606
+ if t==-1
607
+ return "EOF"
608
+ else
609
+ return "'#{t.chr}'"
610
+ end
611
+ end
612
+ end