antlr4 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
|
2
|
+
class ATNSimulator
|
3
|
+
|
4
|
+
# Must distinguish between missing edge and edge we know leads nowhere#/
|
5
|
+
ERROR = DFAState.new(0x7FFFFFFF,ATNConfigSet.new())
|
6
|
+
|
7
|
+
# The context cache maps all PredictionContext objects that are ==
|
8
|
+
# to a single cached copy. This cache is shared across all contexts
|
9
|
+
# in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
|
10
|
+
# to use only cached nodes/graphs in addDFAState(). We don't want to
|
11
|
+
# fill this during closure() since there are lots of contexts that
|
12
|
+
# pop up but are not used ever again. It also greatly slows down closure().
|
13
|
+
#
|
14
|
+
# <p>This cache makes a huge difference in memory and a little bit in speed.
|
15
|
+
# For the Java grammar on java.*, it dropped the memory requirements
|
16
|
+
# at the end from 25M to 16M. We don't store any of the full context
|
17
|
+
# graphs in the DFA because they are limited to local context only,
|
18
|
+
# but apparently there's a lot of repetition there as well. We optimize
|
19
|
+
# the config contexts before storing the config set in the DFA states
|
20
|
+
# by literally rebuilding them with cached subgraphs only.</p>
|
21
|
+
#
|
22
|
+
# <p>I tried a cache for use during closure operations, that was
|
23
|
+
# whacked after each adaptivePredict(). It cost a little bit
|
24
|
+
# more time I think and doesn't save on the overall footprint
|
25
|
+
# so it's not worth the complexity.</p>
|
26
|
+
#/
|
27
|
+
include PredictionContextFunctions
|
28
|
+
|
29
|
+
attr_accessor :atn, :sharedContextCache
|
30
|
+
def initialize(atn, sharedContextCache)
|
31
|
+
raise Exception.new("ATN is nil") if atn.nil?
|
32
|
+
self.atn = atn
|
33
|
+
self.sharedContextCache = sharedContextCache
|
34
|
+
end
|
35
|
+
def getCachedContext(context)
|
36
|
+
if self.sharedContextCache.nil? then
|
37
|
+
return context
|
38
|
+
end
|
39
|
+
visited = Hash.new
|
40
|
+
return getCachedPredictionContext(context, self.sharedContextCache, visited)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
#from antlr4.atn.Transition import Transition
|
3
|
+
INITIAL_NUM_TRANSITIONS = 4
|
4
|
+
|
5
|
+
class ATNState
|
6
|
+
|
7
|
+
# constants for serialization
|
8
|
+
INVALID_TYPE = 0
|
9
|
+
BASIC = 1
|
10
|
+
RULE_START = 2
|
11
|
+
BLOCK_START = 3
|
12
|
+
PLUS_BLOCK_START = 4
|
13
|
+
STAR_BLOCK_START = 5
|
14
|
+
TOKEN_START = 6
|
15
|
+
RULE_STOP = 7
|
16
|
+
BLOCK_END = 8
|
17
|
+
STAR_LOOP_BACK = 9
|
18
|
+
STAR_LOOP_ENTRY = 10
|
19
|
+
PLUS_LOOP_BACK = 11
|
20
|
+
LOOP_END = 12
|
21
|
+
|
22
|
+
|
23
|
+
INVALID_STATE_NUMBER = -1
|
24
|
+
|
25
|
+
attr_accessor :atn, :stateNumber, :stateType, :ruleIndex
|
26
|
+
attr_accessor :epsilonOnlyTransitions ,:transitions, :nextTokenWithinRule
|
27
|
+
attr :serializationNames
|
28
|
+
def initialize()
|
29
|
+
# Which ATN are we in?
|
30
|
+
@atn = nil
|
31
|
+
@stateNumber = ATNState::INVALID_STATE_NUMBER
|
32
|
+
@stateType = nil
|
33
|
+
@ruleIndex = 0 # at runtime, we don't have Rule objects
|
34
|
+
@epsilonOnlyTransitions = false
|
35
|
+
# Track the transitions emanating from this ATN state.
|
36
|
+
@transitions = Array.new
|
37
|
+
# Used to cache lookahead during parsing, not used during construction
|
38
|
+
@nextTokenWithinRule = nil
|
39
|
+
@serializationNames = [
|
40
|
+
"INVALID",
|
41
|
+
"BASIC",
|
42
|
+
"RULE_START",
|
43
|
+
"BLOCK_START",
|
44
|
+
"PLUS_BLOCK_START",
|
45
|
+
"STAR_BLOCK_START",
|
46
|
+
"TOKEN_START",
|
47
|
+
"RULE_STOP",
|
48
|
+
"BLOCK_END",
|
49
|
+
"STAR_LOOP_BACK",
|
50
|
+
"STAR_LOOP_ENTRY",
|
51
|
+
"PLUS_LOOP_BACK",
|
52
|
+
"LOOP_END" ]
|
53
|
+
end
|
54
|
+
|
55
|
+
def hash
|
56
|
+
return self.stateNumber
|
57
|
+
end
|
58
|
+
|
59
|
+
def ==(other)
|
60
|
+
if other.kind_of? ATNState then
|
61
|
+
other and self.stateNumber==other.stateNumber
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
def onlyHasEpsilonTransitions
|
67
|
+
self.epsilonOnlyTransitions
|
68
|
+
end
|
69
|
+
def isNonGreedyExitState
|
70
|
+
return false
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_s
|
74
|
+
self.stateNumber.to_s
|
75
|
+
end
|
76
|
+
def inspect
|
77
|
+
"<ATNState #{self.stateNumber.to_s} >"
|
78
|
+
end
|
79
|
+
def addTransition(trans, index=-1)
|
80
|
+
if self.transitions.length==0
|
81
|
+
self.epsilonOnlyTransitions = trans.isEpsilon
|
82
|
+
elsif self.epsilonOnlyTransitions != trans.isEpsilon
|
83
|
+
self.epsilonOnlyTransitions = false
|
84
|
+
# TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
|
85
|
+
end
|
86
|
+
if index==-1
|
87
|
+
self.transitions.push(trans)
|
88
|
+
else
|
89
|
+
self.transitions.insert(index, trans)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class BasicState < ATNState
|
95
|
+
def initialize
|
96
|
+
super()
|
97
|
+
# self.stateNumber = ATNState::BASIC
|
98
|
+
self.stateType = ATNState::BASIC
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class DecisionState < ATNState
|
103
|
+
|
104
|
+
attr_accessor :decision ,:nonGreedy
|
105
|
+
def initialize
|
106
|
+
super()
|
107
|
+
self.decision = -1
|
108
|
+
self.nonGreedy = false
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
112
|
+
# INVALID_TYPE = 0
|
113
|
+
# BASIC = 1
|
114
|
+
# RULE_START = 2
|
115
|
+
# BLOCK_START = 3
|
116
|
+
# PLUS_BLOCK_START = 4
|
117
|
+
# STAR_BLOCK_START = 5
|
118
|
+
# TOKEN_START = 6
|
119
|
+
# RULE_STOP = 7
|
120
|
+
# BLOCK_END = 8
|
121
|
+
# STAR_LOOP_BACK = 9
|
122
|
+
# STAR_LOOP_ENTRY = 10
|
123
|
+
# PLUS_LOOP_BACK = 11
|
124
|
+
# LOOP_END = 12
|
125
|
+
# The start of a regular {@code (...)} block.
|
126
|
+
class BlockStartState < DecisionState
|
127
|
+
|
128
|
+
attr_accessor :endState
|
129
|
+
def initialize
|
130
|
+
super()
|
131
|
+
self.endState = nil
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class BasicBlockStartState < BlockStartState
|
136
|
+
|
137
|
+
def initialize
|
138
|
+
super()
|
139
|
+
self.stateType = ATNState::BLOCK_START
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Terminal node of a simple {@code (a|b|c)} block.
|
144
|
+
class BlockEndState < ATNState
|
145
|
+
|
146
|
+
attr_accessor :startState
|
147
|
+
def initialize
|
148
|
+
super()
|
149
|
+
self.stateType = ATNState::BLOCK_END
|
150
|
+
self.startState = nil
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# The last node in the ATN for a rule, unless that rule is the start symbol.
|
155
|
+
# In that case, there is one transition to EOF. Later, we might encode
|
156
|
+
# references to all calls to this rule to compute FOLLOW sets for
|
157
|
+
# error handling.
|
158
|
+
#
|
159
|
+
class RuleStopState < ATNState
|
160
|
+
|
161
|
+
attr_accessor :stopState
|
162
|
+
def initialize
|
163
|
+
super()
|
164
|
+
self.stateType = ATNState::RULE_STOP
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
class RuleStartState < ATNState
|
169
|
+
|
170
|
+
attr_accessor :stopState, :isPrecedenceRule
|
171
|
+
def initialize
|
172
|
+
super()
|
173
|
+
self.stateType = ATNState::RULE_START
|
174
|
+
self.stopState = nil
|
175
|
+
self.isPrecedenceRule = false
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
|
180
|
+
# one to the loop back to start of the block and one to exit.
|
181
|
+
#
|
182
|
+
class PlusLoopbackState < DecisionState
|
183
|
+
|
184
|
+
def initialize
|
185
|
+
super()
|
186
|
+
self.stateType = ATNState::PLUS_LOOP_BACK
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Start of {@code (A|B|...)+} loop. Technically a decision state, but
|
191
|
+
# we don't use for code generation; somebody might need it, so I'm defining
|
192
|
+
# it for completeness. In reality, the {@link PlusLoopbackState} node is the
|
193
|
+
# real decision-making note for {@code A+}.
|
194
|
+
#
|
195
|
+
class PlusBlockStartState < BlockStartState
|
196
|
+
|
197
|
+
attr_accessor :loopBackState
|
198
|
+
def initialize
|
199
|
+
super()
|
200
|
+
self.stateType = ATNState::PLUS_BLOCK_START
|
201
|
+
self.loopBackState = nil
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# The block that begins a closure loop.
|
206
|
+
class StarBlockStartState < BlockStartState
|
207
|
+
|
208
|
+
def initialize
|
209
|
+
super()
|
210
|
+
self.stateType = ATNState::STAR_BLOCK_START
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class StarLoopbackState < ATNState
|
215
|
+
|
216
|
+
def initialize
|
217
|
+
super()
|
218
|
+
self.stateType = ATNState::STAR_LOOP_BACK
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
class StarLoopEntryState < DecisionState
|
224
|
+
|
225
|
+
attr_accessor :loopBackState, :precedenceRuleDecision
|
226
|
+
def initialize
|
227
|
+
super()
|
228
|
+
self.stateType = ATNState::STAR_LOOP_ENTRY
|
229
|
+
self.loopBackState = nil
|
230
|
+
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
|
231
|
+
self.precedenceRuleDecision = nil
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# Mark the end of a * or + loop.
|
236
|
+
class LoopEndState < ATNState
|
237
|
+
|
238
|
+
attr_accessor :loopBackState
|
239
|
+
def initialize
|
240
|
+
super()
|
241
|
+
self.stateType = ATNState::LOOP_END
|
242
|
+
self.loopBackState = nil
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# The Tokens rule start state linking to each lexer rule start state */
|
247
|
+
class TokensStartState < DecisionState
|
248
|
+
|
249
|
+
def initialize
|
250
|
+
super()
|
251
|
+
self.stateType = ATNState::TOKEN_START
|
252
|
+
end
|
253
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
class ATNType
|
3
|
+
LEXER = 0
|
4
|
+
PARSER = 1
|
5
|
+
|
6
|
+
def self.LEXER
|
7
|
+
ATNType::LEXER
|
8
|
+
end
|
9
|
+
def self.PARSER
|
10
|
+
ATNType::PARSER
|
11
|
+
end
|
12
|
+
def self.fromOrdinal(i)
|
13
|
+
case i
|
14
|
+
when ATNType::LEXER then
|
15
|
+
ATNType::LEXER
|
16
|
+
when ATNType::PARSER then
|
17
|
+
ATNType::PARSER
|
18
|
+
else
|
19
|
+
raise Exception.new("ATNType: Unknown value:#{i} ")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,612 @@
|
|
1
|
+
# When we hit an accept state in either the DFA or the ATN, we
|
2
|
+
# have to notify the character stream to start buffering characters
|
3
|
+
# via {@link IntStream#mark} and record the current state. The current sim state
|
4
|
+
# includes the current index into the input, the current line,
|
5
|
+
# and current character position in that line. Note that the Lexer is
|
6
|
+
# tracking the starting line and characterization of the token. These
|
7
|
+
# variables track the "state" of the simulator when it hits an accept state.
|
8
|
+
#
|
9
|
+
# <p>We track these variables separately for the DFA and ATN simulation
|
10
|
+
# because the DFA simulation often has to fail over to the ATN
|
11
|
+
# simulation. If the ATN simulation fails, we need the DFA to fall
|
12
|
+
# back to its previously accepted state, if any. If the ATN succeeds,
|
13
|
+
# then the ATN does the accept and the DFA simulator that invoked it
|
14
|
+
# can simply return the predicted token type.</p>
|
15
|
+
|
16
|
+
class SimState
|
17
|
+
|
18
|
+
attr_accessor :index, :line, :column, :dfaState
|
19
|
+
def initialize
|
20
|
+
self.reset()
|
21
|
+
end
|
22
|
+
|
23
|
+
def reset
|
24
|
+
@index = -1
|
25
|
+
@line = 0
|
26
|
+
@column = -1
|
27
|
+
@dfaState = nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
class LexerATNSimulator < ATNSimulator
|
33
|
+
#include JavaSymbols
|
34
|
+
|
35
|
+
class << self
|
36
|
+
attr_reader :debug, :dfa_debug, :match_calls
|
37
|
+
end
|
38
|
+
@@debug = false
|
39
|
+
@@dfa_debug = false
|
40
|
+
@@match_calls = 0
|
41
|
+
def debug; @@debug ;end
|
42
|
+
def dfa_debug; @@dfa_debug ;end
|
43
|
+
def match_calls; @@match_calls ;end
|
44
|
+
|
45
|
+
MIN_DFA_EDGE = 0
|
46
|
+
MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
|
47
|
+
|
48
|
+
attr_accessor :decisionToDFA, :recog, :startIndex, :line, :column
|
49
|
+
attr_accessor :mode, :prevAccept
|
50
|
+
|
51
|
+
def initialize(_recog, _atn, decision_to_dfa, shared_context_cache)
|
52
|
+
super(_atn, shared_context_cache)
|
53
|
+
|
54
|
+
if decision_to_dfa.nil? then
|
55
|
+
raise Exception.new("Error: #{self.class} decisionToDFA is nil.")
|
56
|
+
end
|
57
|
+
@decisionToDFA = decision_to_dfa
|
58
|
+
@recog = _recog
|
59
|
+
# The current token's starting index into the character stream.
|
60
|
+
# Shared across DFA to ATN simulation in case the ATN fails and the
|
61
|
+
# DFA did not have a previous accept state. In this case, we use the
|
62
|
+
# ATN-generated exception object.
|
63
|
+
@startIndex = -1
|
64
|
+
# line number 1..n within the input#/
|
65
|
+
@line = 1
|
66
|
+
# The index of the character relative to the beginning of the line 0..n-1#/
|
67
|
+
@column = 0
|
68
|
+
@mode = Lexer::DEFAULT_MODE
|
69
|
+
# Used during DFA/ATN exec to record the most recent accept configuration info
|
70
|
+
self.prevAccept = SimState.new()
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def copyState(simulator)
|
75
|
+
self.column = simulator.column
|
76
|
+
self.line = simulator.line
|
77
|
+
self.mode = simulator.mode
|
78
|
+
self.startIndex = simulator.startIndex
|
79
|
+
end
|
80
|
+
def match(input, mode)
|
81
|
+
@@match_calls =@@match_calls + 1
|
82
|
+
self.mode = mode
|
83
|
+
mark = input.mark()
|
84
|
+
begin
|
85
|
+
self.startIndex = input.index
|
86
|
+
self.prevAccept.reset()
|
87
|
+
dfa = self.decisionToDFA[mode]
|
88
|
+
type_check(dfa, DFA)
|
89
|
+
if dfa and dfa.s0.nil? then
|
90
|
+
return self.matchATN(input)
|
91
|
+
else
|
92
|
+
return self.execATN(input, dfa.s0)
|
93
|
+
end
|
94
|
+
ensure
|
95
|
+
input.release(mark)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
def reset
|
99
|
+
self.prevAccept.reset()
|
100
|
+
@startIndex = -1
|
101
|
+
@line = 1
|
102
|
+
@column = 0
|
103
|
+
@mode = Lexer::DEFAULT_MODE
|
104
|
+
end
|
105
|
+
def clearDFA()
|
106
|
+
raise Exception.new("not implemented")
|
107
|
+
end
|
108
|
+
def matchATN(input)
|
109
|
+
startState = self.atn.modeToStartState[self.mode]
|
110
|
+
|
111
|
+
if self.debug then
|
112
|
+
print "matchATN mode #{self.mode} start: #{startState}"
|
113
|
+
end
|
114
|
+
|
115
|
+
old_mode = self.mode
|
116
|
+
s0_closure = self.computeStartState(input, startState)
|
117
|
+
suppressEdge = s0_closure.hasSemanticContext
|
118
|
+
s0_closure.hasSemanticContext = false
|
119
|
+
|
120
|
+
nxt = self.addDFAState(s0_closure)
|
121
|
+
if not suppressEdge then
|
122
|
+
self.decisionToDFA[self.mode].s0 = nxt
|
123
|
+
end
|
124
|
+
|
125
|
+
predict = self.execATN(input, nxt)
|
126
|
+
|
127
|
+
if self.debug then
|
128
|
+
print "DFA after matchATN: #{self.decisionToDFA[old_mode].toLexerString()}"
|
129
|
+
end
|
130
|
+
|
131
|
+
return predict
|
132
|
+
end
|
133
|
+
def execATN(input, ds0)
|
134
|
+
if self.debug then
|
135
|
+
puts "start state closure=#{ds0.configs.to_s}"
|
136
|
+
end
|
137
|
+
|
138
|
+
t = input.LA(1)
|
139
|
+
s = ds0 # s is current/from DFA state
|
140
|
+
|
141
|
+
raise Exception.new("s is nil") if s.nil?
|
142
|
+
|
143
|
+
while true do # while more work
|
144
|
+
if self.debug then
|
145
|
+
puts "execATN loop starting closure: #{s.configs}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# As we move src->trg, src->trg, we keep track of the previous trg to
|
149
|
+
# avoid looking up the DFA state again, which is expensive.
|
150
|
+
# If the previous target was already part of the DFA, we might
|
151
|
+
# be able to avoid doing a reach operation upon t. If s!=null,
|
152
|
+
# it means that semantic predicates didn't prevent us from
|
153
|
+
# creating a DFA state. Once we know s!=null, we check to see if
|
154
|
+
# the DFA state has an edge already for t. If so, we can just reuse
|
155
|
+
# it's configuration set; there's no point in re-computing it.
|
156
|
+
# This is kind of like doing DFA simulation within the ATN
|
157
|
+
# simulation because DFA simulation is really just a way to avoid
|
158
|
+
# computing reach/closure sets. Technically, once we know that
|
159
|
+
# we have a previously added DFA state, we could jump over to
|
160
|
+
# the DFA simulator. But, that would mean popping back and forth
|
161
|
+
# a lot and making things more complicated algorithmically.
|
162
|
+
# This optimization makes a lot of sense for loops within DFA.
|
163
|
+
# A character will take us back to an existing DFA state
|
164
|
+
# that already has lots of edges out of it. e.g., .* in comments.
|
165
|
+
# print("Target for:" + str(s) + " and:" + str(t))
|
166
|
+
target = self.getExistingTargetState(s, t)
|
167
|
+
# print("Existing:" + str(target))
|
168
|
+
if target.nil? then
|
169
|
+
target = self.computeTargetState(input, s, t)
|
170
|
+
end
|
171
|
+
# print("Computed:" + str(target))
|
172
|
+
break if target.equal? ATNSimulator::ERROR
|
173
|
+
|
174
|
+
if target.isAcceptState
|
175
|
+
self.captureSimState(self.prevAccept, input, target)
|
176
|
+
if t == Token::EOF
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if t != Token::EOF
|
182
|
+
self.consume(input)
|
183
|
+
t = input.LA(1)
|
184
|
+
end
|
185
|
+
|
186
|
+
s = target # flip; current DFA target becomes new src/from state
|
187
|
+
end
|
188
|
+
|
189
|
+
return self.failOrAccept(self.prevAccept, input, s.configs, t)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Get an existing target state for an edge in the DFA. If the target state
|
193
|
+
# for the edge has not yet been computed or is otherwise not available,
|
194
|
+
# this method returns {@code null}.
|
195
|
+
#
|
196
|
+
# @param s The current DFA state
|
197
|
+
# @param t The next input symbol
|
198
|
+
# @return The existing target DFA state for the given input symbol
|
199
|
+
# {@code t}, or {@code null} if the target state for this edge is not
|
200
|
+
# already cached
|
201
|
+
def getExistingTargetState(s, t)
|
202
|
+
if s.edges.nil? or t < LexerATNSimulator::MIN_DFA_EDGE or t > LexerATNSimulator::MAX_DFA_EDGE
|
203
|
+
return nil
|
204
|
+
end
|
205
|
+
|
206
|
+
target = s.edges[t - LexerATNSimulator::MIN_DFA_EDGE]
|
207
|
+
if self.debug and not target.nil?
|
208
|
+
puts "reuse state #{s.stateNumber} edge to #{target.stateNumber}"
|
209
|
+
end
|
210
|
+
|
211
|
+
return target
|
212
|
+
end
|
213
|
+
|
214
|
+
# Compute a target state for an edge in the DFA, and attempt to add the
|
215
|
+
# computed state and corresponding edge to the DFA.
|
216
|
+
#
|
217
|
+
# @param input The input stream
|
218
|
+
# @param s The current DFA state
|
219
|
+
# @param t The next input symbol
|
220
|
+
#
|
221
|
+
# @return The computed target DFA state for the given input symbol
|
222
|
+
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
|
223
|
+
# returns {@link #ERROR}.
|
224
|
+
def computeTargetState(input, s, t)
|
225
|
+
reach = OrderedATNConfigSet.new()
|
226
|
+
|
227
|
+
# if we don't find an existing DFA state
|
228
|
+
# Fill reach starting from closure, following t transitions
|
229
|
+
self.getReachableConfigSet(input, s.configs, reach, t)
|
230
|
+
|
231
|
+
if reach.length==0 # we got nowhere on t from s
|
232
|
+
if not reach.hasSemanticContext
|
233
|
+
# we got nowhere on t, don't throw out this knowledge; it'd
|
234
|
+
# cause a failover from DFA later.
|
235
|
+
self.addDFAEdge(s, t, ATNSimulator::ERROR)
|
236
|
+
end
|
237
|
+
# stop when we can't match any more char
|
238
|
+
return ATNSimulator::ERROR
|
239
|
+
end
|
240
|
+
|
241
|
+
# Add an edge from s to target DFA found/created for reach
|
242
|
+
return self.addDFAEdge(s, t, nil, reach)
|
243
|
+
end
|
244
|
+
def failOrAccept(prevAccept, input, reach, t)
|
245
|
+
if not self.prevAccept.dfaState.nil?
|
246
|
+
lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
|
247
|
+
self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
|
248
|
+
return prevAccept.dfaState.prediction
|
249
|
+
else
|
250
|
+
# if no accept and EOF is first char, return EOF
|
251
|
+
if t==Token::EOF and input.index==self.startIndex
|
252
|
+
return Token::EOF
|
253
|
+
end
|
254
|
+
raise LexerNoViableAltException.new(self.recog, input, self.startIndex, reach)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
# Given a starting configuration set, figure out all ATN configurations
|
258
|
+
# we can reach upon input {@code t}. Parameter {@code reach} is a return
|
259
|
+
# parameter.
|
260
|
+
def getReachableConfigSet(input, closure, reach, t)
|
261
|
+
# this is used to skip processing for configs which have a lower priority
|
262
|
+
# than a config that already reached an accept state for the same rule
|
263
|
+
skipAlt = ATN::INVALID_ALT_NUMBER
|
264
|
+
for cfg in closure do
|
265
|
+
currentAltReachedAcceptState = ( cfg.alt == skipAlt )
|
266
|
+
if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision
|
267
|
+
next
|
268
|
+
end
|
269
|
+
|
270
|
+
if self.debug
|
271
|
+
puts "testing #{self.getTokenName(t)} at #{cfg.toString(self.recog, true)}"
|
272
|
+
end
|
273
|
+
|
274
|
+
for trans in cfg.state.transitions do # for each transition
|
275
|
+
target = self.getReachableTarget(trans, t)
|
276
|
+
if target
|
277
|
+
lexerActionExecutor = cfg.lexerActionExecutor
|
278
|
+
if lexerActionExecutor
|
279
|
+
lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
|
280
|
+
end
|
281
|
+
treatEofAsEpsilon = (t == Token::EOF)
|
282
|
+
config = LexerATNConfig.new(target, nil, nil, nil, lexerActionExecutor, cfg)
|
283
|
+
if self.closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)
|
284
|
+
# any remaining configs for this alt have a lower priority than
|
285
|
+
# the one that just reached an accept state.
|
286
|
+
skipAlt = cfg.alt
|
287
|
+
break
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
def accept(input, lexerActionExecutor, start_index, index, _line, charPos)
|
295
|
+
if self.debug
|
296
|
+
puts "ACTION #{lexerActionExecutor}"
|
297
|
+
end
|
298
|
+
|
299
|
+
# seek to after last char in token
|
300
|
+
input.seek(index)
|
301
|
+
self.line = _line
|
302
|
+
self.column = charPos
|
303
|
+
if input.LA(1) != Token::EOF
|
304
|
+
self.consume(input)
|
305
|
+
end
|
306
|
+
if lexerActionExecutor and self.recog
|
307
|
+
lexerActionExecutor.execute(self.recog, input, start_index)
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def getReachableTarget(trans, t)
|
312
|
+
if trans.matches(t, 0, 0xFFFE)
|
313
|
+
return trans.target
|
314
|
+
else
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def computeStartState(input, p)
|
320
|
+
initialContext = PredictionContext.EMPTY
|
321
|
+
configs = OrderedATNConfigSet.new()
|
322
|
+
p.transitions.each_index do |i|
|
323
|
+
target = p.transitions[i].target
|
324
|
+
c = LexerATNConfig.new(target, i+1, initialContext)
|
325
|
+
self.closure(input, c, configs, false, false, false)
|
326
|
+
end
|
327
|
+
return configs
|
328
|
+
end
|
329
|
+
|
330
|
+
# Since the alternatives within any lexer decision are ordered by
|
331
|
+
# preference, this method stops pursuing the closure as soon as an accept
|
332
|
+
# state is reached. After the first accept state is reached by depth-first
|
333
|
+
# search from {@code config}, all other (potentially reachable) states for
|
334
|
+
# this rule would have a lower priority.
|
335
|
+
#
|
336
|
+
# @return {@code true} if an accept state is reached, otherwise
|
337
|
+
# {@code false}.
|
338
|
+
def closure(input, config, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
339
|
+
if self.debug
|
340
|
+
puts "closure(#{config.toString(self.recog, true)})"
|
341
|
+
end
|
342
|
+
|
343
|
+
if config.state.kind_of? RuleStopState
|
344
|
+
if self.debug
|
345
|
+
if self.recog
|
346
|
+
puts "closure at #{self.recog.getRuleNames[config.state.ruleIndex]} rule stop #{ config}"
|
347
|
+
else
|
348
|
+
puts "closure at rule stop #{ config}"
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
if config.context.nil? or config.context.hasEmptyPath()
|
353
|
+
if config.context.nil? or config.context.isEmpty()
|
354
|
+
configs.add(config)
|
355
|
+
return true
|
356
|
+
else
|
357
|
+
configs.add(LexerATNConfig.new(config.state, nil,PredictionContext.EMPTY,nil,nil,config) )
|
358
|
+
currentAltReachedAcceptState = true
|
359
|
+
end
|
360
|
+
end
|
361
|
+
if config.context and not config.context.isEmpty() then
|
362
|
+
0.upto(config.context.length - 1) do |i|
|
363
|
+
if config.context.getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE
|
364
|
+
newContext = config.context.getParent(i) # "pop" return state
|
365
|
+
returnState = self.atn.states[config.context.getReturnState(i)]
|
366
|
+
c = LexerATNConfig.new(returnState,nil,newContext, nil, nil, config )
|
367
|
+
currentAltReachedAcceptState = self.closure(input, c, configs,
|
368
|
+
currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
return currentAltReachedAcceptState
|
373
|
+
end
|
374
|
+
# optimization
|
375
|
+
if not config.state.epsilonOnlyTransitions then
|
376
|
+
if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision
|
377
|
+
configs.add(config)
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
#for t in config.state.transitions do
|
382
|
+
config.state.transitions.each do |t|
|
383
|
+
c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
|
384
|
+
if c then
|
385
|
+
currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
return currentAltReachedAcceptState
|
389
|
+
end
|
390
|
+
# side-effect: can alter configs.hasSemanticContext
|
391
|
+
def getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
|
392
|
+
c = nil
|
393
|
+
if t.serializationType==Transition::RULE then
|
394
|
+
newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
|
395
|
+
c = LexerATNConfig.new(t.target, nil, newContext, nil,nil, config)
|
396
|
+
elsif t.serializationType==Transition::PRECEDENCE
|
397
|
+
raise UnsupportedOperationException.new("Precedence predicates are not supported in lexers.")
|
398
|
+
elsif t.serializationType==Transition::PREDICATE
|
399
|
+
# Track traversing semantic predicates. If we traverse,
|
400
|
+
# we cannot add a DFA state for this "reach" computation
|
401
|
+
# because the DFA would not test the predicate again in the
|
402
|
+
# future. Rather than creating collections of semantic predicates
|
403
|
+
# like v3 and testing them on prediction, v4 will test them on the
|
404
|
+
# fly all the time using the ATN not the DFA. This is slower but
|
405
|
+
# semantically it's not used that often. One of the key elements to
|
406
|
+
# this predicate mechanism is not adding DFA states that see
|
407
|
+
# predicates immediately afterwards in the ATN. For example,
|
408
|
+
|
409
|
+
# a : ID {p1}? | ID {p2}? ;
|
410
|
+
|
411
|
+
# should create the start state for rule 'a' (to save start state
|
412
|
+
# competition), but should not create target of ID state. The
|
413
|
+
# collection of ATN states the following ID references includes
|
414
|
+
# states reached by traversing predicates. Since this is when we
|
415
|
+
# test them, we cannot cash the DFA state target of ID.
|
416
|
+
if self.debug
|
417
|
+
print "EVAL rule #{t.ruleIndex}:#{t.predIndex}"
|
418
|
+
end
|
419
|
+
configs.hasSemanticContext = true
|
420
|
+
if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative)
|
421
|
+
c = LexerATNConfig(t.target,nil,nil,nil,nil, config)
|
422
|
+
end
|
423
|
+
elsif t.serializationType==Transition::ACTION
|
424
|
+
if config.context.nil? or config.context.hasEmptyPath()
|
425
|
+
# execute actions anywhere in the start rule for a token.
|
426
|
+
#
|
427
|
+
# TODO: if the entry rule is invoked recursively, some
|
428
|
+
# actions may be executed during the recursive call. The
|
429
|
+
# problem can appear when hasEmptyPath() is true but
|
430
|
+
# isEmpty() is false. In this case, the config needs to be
|
431
|
+
# split into two contexts - one with just the empty path
|
432
|
+
# and another with everything but the empty path.
|
433
|
+
# Unfortunately, the current algorithm does not allow
|
434
|
+
# getEpsilonTarget to return two configurations, so
|
435
|
+
# additional modifications are needed before we can support
|
436
|
+
# the split operation.
|
437
|
+
lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
|
438
|
+
self.atn.lexerActions[t.actionIndex])
|
439
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil, lexerActionExecutor, config)
|
440
|
+
else
|
441
|
+
# ignore actions in referenced rules
|
442
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
|
443
|
+
end
|
444
|
+
elsif t.serializationType==Transition::EPSILON
|
445
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
|
446
|
+
elsif [ Transition::ATOM, Transition::RANGE, Transition::SET ].member? t.serializationType
|
447
|
+
if treatEofAsEpsilon
|
448
|
+
if t.matches(Token::EOF, 0, 0xFFFF)
|
449
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
|
450
|
+
end
|
451
|
+
end
|
452
|
+
end
|
453
|
+
return c
|
454
|
+
end
|
455
|
+
# Evaluate a predicate specified in the lexer.
|
456
|
+
#
|
457
|
+
# <p>If {@code speculative} is {@code true}, this method was called before
|
458
|
+
# {@link #consume} for the matched character. This method should call
|
459
|
+
# {@link #consume} before evaluating the predicate to ensure position
|
460
|
+
# sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
|
461
|
+
# and {@link Lexer#getcolumn}, properly reflect the current
|
462
|
+
# lexer state. This method should restore {@code input} and the simulator
|
463
|
+
# to the original state before returning (i.e. undo the actions made by the
|
464
|
+
# call to {@link #consume}.</p>
|
465
|
+
#
|
466
|
+
# @param input The input stream.
|
467
|
+
# @param ruleIndex The rule containing the predicate.
|
468
|
+
# @param predIndex The index of the predicate within the rule.
|
469
|
+
# @param speculative {@code true} if the current index in {@code input} is
|
470
|
+
# one character before the predicate's location.
|
471
|
+
#
|
472
|
+
# @return {@code true} if the specified predicate evaluates to
|
473
|
+
# {@code true}.
|
474
|
+
#/
|
475
|
+
def evaluatePredicate(input, ruleIndex, predIndex, speculative)
|
476
|
+
# assume true if no recognizer was provided
|
477
|
+
return true if self.recog.nil?
|
478
|
+
|
479
|
+
if not speculative then
|
480
|
+
return self.recog.sempred(nil, ruleIndex, predIndex)
|
481
|
+
end
|
482
|
+
|
483
|
+
savedcolumn = self.column
|
484
|
+
savedLine = self.line
|
485
|
+
index = input.index
|
486
|
+
marker = input.mark()
|
487
|
+
begin
|
488
|
+
self.consume(input)
|
489
|
+
return self.recog.sempred(nil, ruleIndex, predIndex)
|
490
|
+
ensure
|
491
|
+
self.column = savedcolumn
|
492
|
+
self.line = savedLine
|
493
|
+
input.seek(index)
|
494
|
+
input.release(marker)
|
495
|
+
end
|
496
|
+
end
|
497
|
+
def captureSimState(settings, input, dfaState)
|
498
|
+
settings.index = input.index
|
499
|
+
settings.line = self.line
|
500
|
+
settings.column = self.column
|
501
|
+
settings.dfaState = dfaState
|
502
|
+
end
|
503
|
+
|
504
|
+
def addDFAEdge(from_, tk, to=nil, cfgs=nil)
|
505
|
+
|
506
|
+
if to.nil? and cfgs then
|
507
|
+
# leading to this call, ATNConfigSet.hasSemanticContext is used as a
|
508
|
+
# marker indicating dynamic predicate evaluation makes this edge
|
509
|
+
# dependent on the specific input sequence, so the static edge in the
|
510
|
+
# DFA should be omitted. The target DFAState is still created since
|
511
|
+
# execATN has the ability to resynchronize with the DFA state cache
|
512
|
+
# following the predicate evaluation step.
|
513
|
+
#
|
514
|
+
# TJP notes: next time through the DFA, we see a pred again and eval.
|
515
|
+
# If that gets us to a previously created (but dangling) DFA
|
516
|
+
# state, we can continue in pure DFA mode from there.
|
517
|
+
#/
|
518
|
+
suppressEdge = cfgs.hasSemanticContext
|
519
|
+
cfgs.hasSemanticContext = false
|
520
|
+
|
521
|
+
to = self.addDFAState(cfgs)
|
522
|
+
|
523
|
+
if suppressEdge then
|
524
|
+
return to
|
525
|
+
end
|
526
|
+
end
|
527
|
+
# add the edge
|
528
|
+
if tk < LexerATNSimulator::MIN_DFA_EDGE or tk > LexerATNSimulator::MAX_DFA_EDGE
|
529
|
+
# Only track edges within the DFA bounds
|
530
|
+
return to
|
531
|
+
end
|
532
|
+
|
533
|
+
if self.debug
|
534
|
+
puts "EDGE #{from_} -> #{to} upon #{tk.chr}"
|
535
|
+
end
|
536
|
+
|
537
|
+
if from_.edges.nil?
|
538
|
+
# make room for tokens 1..n and -1 masquerading as index 0
|
539
|
+
# from_.edges = [nil] * (LexerATNSimulator::MAX_DFA_EDGE -
|
540
|
+
# LexerATNSimulator::MIN_DFA_EDGE + 1)
|
541
|
+
from_.edges = Array.new
|
542
|
+
end
|
543
|
+
|
544
|
+
from_.edges[tk - LexerATNSimulator::MIN_DFA_EDGE] = to # connect
|
545
|
+
|
546
|
+
return to
|
547
|
+
end
|
548
|
+
|
549
|
+
# Add a new DFA state if there isn't one with this set of
|
550
|
+
# configurations already. This method also detects the first
|
551
|
+
# configuration containing an ATN rule stop state. Later, when
|
552
|
+
# traversing the DFA, we will know which rule to accept.
|
553
|
+
def addDFAState(configs) # -> DFAState:
|
554
|
+
# the lexer evaluates predicates on-the-fly; by this point configs
|
555
|
+
# should not contain any configurations with unevaluated predicates.
|
556
|
+
# assert not configs.hasSemanticContext
|
557
|
+
proposed = DFAState.new(nil,configs)
|
558
|
+
firstConfigWithRuleStopState = nil
|
559
|
+
# for c in configs.each do |c|:
|
560
|
+
configs.each do |c|
|
561
|
+
if c.state.kind_of? RuleStopState then
|
562
|
+
firstConfigWithRuleStopState = c
|
563
|
+
break
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
if firstConfigWithRuleStopState then
|
568
|
+
proposed.isAcceptState = true
|
569
|
+
proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
|
570
|
+
proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
|
571
|
+
end
|
572
|
+
|
573
|
+
dfa = self.decisionToDFA[self.mode]
|
574
|
+
existing = dfa.states[proposed]
|
575
|
+
if existing then
|
576
|
+
return existing
|
577
|
+
end
|
578
|
+
|
579
|
+
newState = proposed
|
580
|
+
|
581
|
+
newState.stateNumber = dfa.states.length
|
582
|
+
configs.setReadonly(true)
|
583
|
+
newState.configs = configs
|
584
|
+
dfa.states[newState] = newState
|
585
|
+
return newState
|
586
|
+
end
|
587
|
+
def getDFA(mode)
|
588
|
+
return self.decisionToDFA[mode]
|
589
|
+
end
|
590
|
+
# Get the text matched so far for the current token.
|
591
|
+
def getText(input)
|
592
|
+
# index is first lookahead char, don't include.
|
593
|
+
return input.getText(self.startIndex, input.index-1)
|
594
|
+
end
|
595
|
+
def consume(input)
|
596
|
+
curChar = input.LA(1)
|
597
|
+
if curChar=="\n".ord then
|
598
|
+
self.line = self.line + 1
|
599
|
+
self.column = 0
|
600
|
+
else
|
601
|
+
self.column = self.column + 1
|
602
|
+
end
|
603
|
+
input.consume()
|
604
|
+
end
|
605
|
+
def getTokenName(t)
|
606
|
+
if t==-1
|
607
|
+
return "EOF"
|
608
|
+
else
|
609
|
+
return "'#{t.chr}'"
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|