antlr4 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
|
2
|
+
class ATNSimulator
|
3
|
+
|
4
|
+
# Must distinguish between missing edge and edge we know leads nowhere#/
|
5
|
+
ERROR = DFAState.new(0x7FFFFFFF,ATNConfigSet.new())
|
6
|
+
|
7
|
+
# The context cache maps all PredictionContext objects that are ==
|
8
|
+
# to a single cached copy. This cache is shared across all contexts
|
9
|
+
# in all ATNConfigs in all DFA states. We rebuild each ATNConfigSet
|
10
|
+
# to use only cached nodes/graphs in addDFAState(). We don't want to
|
11
|
+
# fill this during closure() since there are lots of contexts that
|
12
|
+
# pop up but are not used ever again. It also greatly slows down closure().
|
13
|
+
#
|
14
|
+
# <p>This cache makes a huge difference in memory and a little bit in speed.
|
15
|
+
# For the Java grammar on java.*, it dropped the memory requirements
|
16
|
+
# at the end from 25M to 16M. We don't store any of the full context
|
17
|
+
# graphs in the DFA because they are limited to local context only,
|
18
|
+
# but apparently there's a lot of repetition there as well. We optimize
|
19
|
+
# the config contexts before storing the config set in the DFA states
|
20
|
+
# by literally rebuilding them with cached subgraphs only.</p>
|
21
|
+
#
|
22
|
+
# <p>I tried a cache for use during closure operations, that was
|
23
|
+
# whacked after each adaptivePredict(). It cost a little bit
|
24
|
+
# more time I think and doesn't save on the overall footprint
|
25
|
+
# so it's not worth the complexity.</p>
|
26
|
+
#/
|
27
|
+
include PredictionContextFunctions
|
28
|
+
|
29
|
+
attr_accessor :atn, :sharedContextCache
|
30
|
+
def initialize(atn, sharedContextCache)
|
31
|
+
raise Exception.new("ATN is nil") if atn.nil?
|
32
|
+
self.atn = atn
|
33
|
+
self.sharedContextCache = sharedContextCache
|
34
|
+
end
|
35
|
+
def getCachedContext(context)
|
36
|
+
if self.sharedContextCache.nil? then
|
37
|
+
return context
|
38
|
+
end
|
39
|
+
visited = Hash.new
|
40
|
+
return getCachedPredictionContext(context, self.sharedContextCache, visited)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,253 @@
|
|
1
|
+
|
2
|
+
#from antlr4.atn.Transition import Transition
|
3
|
+
INITIAL_NUM_TRANSITIONS = 4
|
4
|
+
|
5
|
+
class ATNState
|
6
|
+
|
7
|
+
# constants for serialization
|
8
|
+
INVALID_TYPE = 0
|
9
|
+
BASIC = 1
|
10
|
+
RULE_START = 2
|
11
|
+
BLOCK_START = 3
|
12
|
+
PLUS_BLOCK_START = 4
|
13
|
+
STAR_BLOCK_START = 5
|
14
|
+
TOKEN_START = 6
|
15
|
+
RULE_STOP = 7
|
16
|
+
BLOCK_END = 8
|
17
|
+
STAR_LOOP_BACK = 9
|
18
|
+
STAR_LOOP_ENTRY = 10
|
19
|
+
PLUS_LOOP_BACK = 11
|
20
|
+
LOOP_END = 12
|
21
|
+
|
22
|
+
|
23
|
+
INVALID_STATE_NUMBER = -1
|
24
|
+
|
25
|
+
attr_accessor :atn, :stateNumber, :stateType, :ruleIndex
|
26
|
+
attr_accessor :epsilonOnlyTransitions ,:transitions, :nextTokenWithinRule
|
27
|
+
attr :serializationNames
|
28
|
+
def initialize()
|
29
|
+
# Which ATN are we in?
|
30
|
+
@atn = nil
|
31
|
+
@stateNumber = ATNState::INVALID_STATE_NUMBER
|
32
|
+
@stateType = nil
|
33
|
+
@ruleIndex = 0 # at runtime, we don't have Rule objects
|
34
|
+
@epsilonOnlyTransitions = false
|
35
|
+
# Track the transitions emanating from this ATN state.
|
36
|
+
@transitions = Array.new
|
37
|
+
# Used to cache lookahead during parsing, not used during construction
|
38
|
+
@nextTokenWithinRule = nil
|
39
|
+
@serializationNames = [
|
40
|
+
"INVALID",
|
41
|
+
"BASIC",
|
42
|
+
"RULE_START",
|
43
|
+
"BLOCK_START",
|
44
|
+
"PLUS_BLOCK_START",
|
45
|
+
"STAR_BLOCK_START",
|
46
|
+
"TOKEN_START",
|
47
|
+
"RULE_STOP",
|
48
|
+
"BLOCK_END",
|
49
|
+
"STAR_LOOP_BACK",
|
50
|
+
"STAR_LOOP_ENTRY",
|
51
|
+
"PLUS_LOOP_BACK",
|
52
|
+
"LOOP_END" ]
|
53
|
+
end
|
54
|
+
|
55
|
+
def hash
|
56
|
+
return self.stateNumber
|
57
|
+
end
|
58
|
+
|
59
|
+
def ==(other)
|
60
|
+
if other.kind_of? ATNState then
|
61
|
+
other and self.stateNumber==other.stateNumber
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
def onlyHasEpsilonTransitions
|
67
|
+
self.epsilonOnlyTransitions
|
68
|
+
end
|
69
|
+
def isNonGreedyExitState
|
70
|
+
return false
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_s
|
74
|
+
self.stateNumber.to_s
|
75
|
+
end
|
76
|
+
def inspect
|
77
|
+
"<ATNState #{self.stateNumber.to_s} >"
|
78
|
+
end
|
79
|
+
def addTransition(trans, index=-1)
|
80
|
+
if self.transitions.length==0
|
81
|
+
self.epsilonOnlyTransitions = trans.isEpsilon
|
82
|
+
elsif self.epsilonOnlyTransitions != trans.isEpsilon
|
83
|
+
self.epsilonOnlyTransitions = false
|
84
|
+
# TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
|
85
|
+
end
|
86
|
+
if index==-1
|
87
|
+
self.transitions.push(trans)
|
88
|
+
else
|
89
|
+
self.transitions.insert(index, trans)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class BasicState < ATNState
|
95
|
+
def initialize
|
96
|
+
super()
|
97
|
+
# self.stateNumber = ATNState::BASIC
|
98
|
+
self.stateType = ATNState::BASIC
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
class DecisionState < ATNState
|
103
|
+
|
104
|
+
attr_accessor :decision ,:nonGreedy
|
105
|
+
def initialize
|
106
|
+
super()
|
107
|
+
self.decision = -1
|
108
|
+
self.nonGreedy = false
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
112
|
+
# INVALID_TYPE = 0
|
113
|
+
# BASIC = 1
|
114
|
+
# RULE_START = 2
|
115
|
+
# BLOCK_START = 3
|
116
|
+
# PLUS_BLOCK_START = 4
|
117
|
+
# STAR_BLOCK_START = 5
|
118
|
+
# TOKEN_START = 6
|
119
|
+
# RULE_STOP = 7
|
120
|
+
# BLOCK_END = 8
|
121
|
+
# STAR_LOOP_BACK = 9
|
122
|
+
# STAR_LOOP_ENTRY = 10
|
123
|
+
# PLUS_LOOP_BACK = 11
|
124
|
+
# LOOP_END = 12
|
125
|
+
# The start of a regular {@code (...)} block.
|
126
|
+
class BlockStartState < DecisionState
|
127
|
+
|
128
|
+
attr_accessor :endState
|
129
|
+
def initialize
|
130
|
+
super()
|
131
|
+
self.endState = nil
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
class BasicBlockStartState < BlockStartState
|
136
|
+
|
137
|
+
def initialize
|
138
|
+
super()
|
139
|
+
self.stateType = ATNState::BLOCK_START
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Terminal node of a simple {@code (a|b|c)} block.
|
144
|
+
class BlockEndState < ATNState
|
145
|
+
|
146
|
+
attr_accessor :startState
|
147
|
+
def initialize
|
148
|
+
super()
|
149
|
+
self.stateType = ATNState::BLOCK_END
|
150
|
+
self.startState = nil
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# The last node in the ATN for a rule, unless that rule is the start symbol.
|
155
|
+
# In that case, there is one transition to EOF. Later, we might encode
|
156
|
+
# references to all calls to this rule to compute FOLLOW sets for
|
157
|
+
# error handling.
|
158
|
+
#
|
159
|
+
class RuleStopState < ATNState
|
160
|
+
|
161
|
+
attr_accessor :stopState
|
162
|
+
def initialize
|
163
|
+
super()
|
164
|
+
self.stateType = ATNState::RULE_STOP
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
class RuleStartState < ATNState
|
169
|
+
|
170
|
+
attr_accessor :stopState, :isPrecedenceRule
|
171
|
+
def initialize
|
172
|
+
super()
|
173
|
+
self.stateType = ATNState::RULE_START
|
174
|
+
self.stopState = nil
|
175
|
+
self.isPrecedenceRule = false
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
|
180
|
+
# one to the loop back to start of the block and one to exit.
|
181
|
+
#
|
182
|
+
class PlusLoopbackState < DecisionState
|
183
|
+
|
184
|
+
def initialize
|
185
|
+
super()
|
186
|
+
self.stateType = ATNState::PLUS_LOOP_BACK
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Start of {@code (A|B|...)+} loop. Technically a decision state, but
|
191
|
+
# we don't use for code generation; somebody might need it, so I'm defining
|
192
|
+
# it for completeness. In reality, the {@link PlusLoopbackState} node is the
|
193
|
+
# real decision-making note for {@code A+}.
|
194
|
+
#
|
195
|
+
class PlusBlockStartState < BlockStartState
|
196
|
+
|
197
|
+
attr_accessor :loopBackState
|
198
|
+
def initialize
|
199
|
+
super()
|
200
|
+
self.stateType = ATNState::PLUS_BLOCK_START
|
201
|
+
self.loopBackState = nil
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# The block that begins a closure loop.
|
206
|
+
class StarBlockStartState < BlockStartState
|
207
|
+
|
208
|
+
def initialize
|
209
|
+
super()
|
210
|
+
self.stateType = ATNState::STAR_BLOCK_START
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class StarLoopbackState < ATNState
|
215
|
+
|
216
|
+
def initialize
|
217
|
+
super()
|
218
|
+
self.stateType = ATNState::STAR_LOOP_BACK
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
class StarLoopEntryState < DecisionState
|
224
|
+
|
225
|
+
attr_accessor :loopBackState, :precedenceRuleDecision
|
226
|
+
def initialize
|
227
|
+
super()
|
228
|
+
self.stateType = ATNState::STAR_LOOP_ENTRY
|
229
|
+
self.loopBackState = nil
|
230
|
+
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
|
231
|
+
self.precedenceRuleDecision = nil
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# Mark the end of a * or + loop.
|
236
|
+
class LoopEndState < ATNState
|
237
|
+
|
238
|
+
attr_accessor :loopBackState
|
239
|
+
def initialize
|
240
|
+
super()
|
241
|
+
self.stateType = ATNState::LOOP_END
|
242
|
+
self.loopBackState = nil
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# The Tokens rule start state linking to each lexer rule start state */
|
247
|
+
class TokensStartState < DecisionState
|
248
|
+
|
249
|
+
def initialize
|
250
|
+
super()
|
251
|
+
self.stateType = ATNState::TOKEN_START
|
252
|
+
end
|
253
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
class ATNType
|
3
|
+
LEXER = 0
|
4
|
+
PARSER = 1
|
5
|
+
|
6
|
+
def self.LEXER
|
7
|
+
ATNType::LEXER
|
8
|
+
end
|
9
|
+
def self.PARSER
|
10
|
+
ATNType::PARSER
|
11
|
+
end
|
12
|
+
def self.fromOrdinal(i)
|
13
|
+
case i
|
14
|
+
when ATNType::LEXER then
|
15
|
+
ATNType::LEXER
|
16
|
+
when ATNType::PARSER then
|
17
|
+
ATNType::PARSER
|
18
|
+
else
|
19
|
+
raise Exception.new("ATNType: Unknown value:#{i} ")
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,612 @@
|
|
1
|
+
# When we hit an accept state in either the DFA or the ATN, we
|
2
|
+
# have to notify the character stream to start buffering characters
|
3
|
+
# via {@link IntStream#mark} and record the current state. The current sim state
|
4
|
+
# includes the current index into the input, the current line,
|
5
|
+
# and current character position in that line. Note that the Lexer is
|
6
|
+
# tracking the starting line and characterization of the token. These
|
7
|
+
# variables track the "state" of the simulator when it hits an accept state.
|
8
|
+
#
|
9
|
+
# <p>We track these variables separately for the DFA and ATN simulation
|
10
|
+
# because the DFA simulation often has to fail over to the ATN
|
11
|
+
# simulation. If the ATN simulation fails, we need the DFA to fall
|
12
|
+
# back to its previously accepted state, if any. If the ATN succeeds,
|
13
|
+
# then the ATN does the accept and the DFA simulator that invoked it
|
14
|
+
# can simply return the predicted token type.</p>
|
15
|
+
|
16
|
+
class SimState
|
17
|
+
|
18
|
+
attr_accessor :index, :line, :column, :dfaState
|
19
|
+
def initialize
|
20
|
+
self.reset()
|
21
|
+
end
|
22
|
+
|
23
|
+
def reset
|
24
|
+
@index = -1
|
25
|
+
@line = 0
|
26
|
+
@column = -1
|
27
|
+
@dfaState = nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
class LexerATNSimulator < ATNSimulator
|
33
|
+
#include JavaSymbols
|
34
|
+
|
35
|
+
class << self
|
36
|
+
attr_reader :debug, :dfa_debug, :match_calls
|
37
|
+
end
|
38
|
+
@@debug = false
|
39
|
+
@@dfa_debug = false
|
40
|
+
@@match_calls = 0
|
41
|
+
def debug; @@debug ;end
|
42
|
+
def dfa_debug; @@dfa_debug ;end
|
43
|
+
def match_calls; @@match_calls ;end
|
44
|
+
|
45
|
+
MIN_DFA_EDGE = 0
|
46
|
+
MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
|
47
|
+
|
48
|
+
attr_accessor :decisionToDFA, :recog, :startIndex, :line, :column
|
49
|
+
attr_accessor :mode, :prevAccept
|
50
|
+
|
51
|
+
def initialize(_recog, _atn, decision_to_dfa, shared_context_cache)
|
52
|
+
super(_atn, shared_context_cache)
|
53
|
+
|
54
|
+
if decision_to_dfa.nil? then
|
55
|
+
raise Exception.new("Error: #{self.class} decisionToDFA is nil.")
|
56
|
+
end
|
57
|
+
@decisionToDFA = decision_to_dfa
|
58
|
+
@recog = _recog
|
59
|
+
# The current token's starting index into the character stream.
|
60
|
+
# Shared across DFA to ATN simulation in case the ATN fails and the
|
61
|
+
# DFA did not have a previous accept state. In this case, we use the
|
62
|
+
# ATN-generated exception object.
|
63
|
+
@startIndex = -1
|
64
|
+
# line number 1..n within the input#/
|
65
|
+
@line = 1
|
66
|
+
# The index of the character relative to the beginning of the line 0..n-1#/
|
67
|
+
@column = 0
|
68
|
+
@mode = Lexer::DEFAULT_MODE
|
69
|
+
# Used during DFA/ATN exec to record the most recent accept configuration info
|
70
|
+
self.prevAccept = SimState.new()
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def copyState(simulator)
|
75
|
+
self.column = simulator.column
|
76
|
+
self.line = simulator.line
|
77
|
+
self.mode = simulator.mode
|
78
|
+
self.startIndex = simulator.startIndex
|
79
|
+
end
|
80
|
+
def match(input, mode)
|
81
|
+
@@match_calls =@@match_calls + 1
|
82
|
+
self.mode = mode
|
83
|
+
mark = input.mark()
|
84
|
+
begin
|
85
|
+
self.startIndex = input.index
|
86
|
+
self.prevAccept.reset()
|
87
|
+
dfa = self.decisionToDFA[mode]
|
88
|
+
type_check(dfa, DFA)
|
89
|
+
if dfa and dfa.s0.nil? then
|
90
|
+
return self.matchATN(input)
|
91
|
+
else
|
92
|
+
return self.execATN(input, dfa.s0)
|
93
|
+
end
|
94
|
+
ensure
|
95
|
+
input.release(mark)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
def reset
|
99
|
+
self.prevAccept.reset()
|
100
|
+
@startIndex = -1
|
101
|
+
@line = 1
|
102
|
+
@column = 0
|
103
|
+
@mode = Lexer::DEFAULT_MODE
|
104
|
+
end
|
105
|
+
def clearDFA()
|
106
|
+
raise Exception.new("not implemented")
|
107
|
+
end
|
108
|
+
def matchATN(input)
|
109
|
+
startState = self.atn.modeToStartState[self.mode]
|
110
|
+
|
111
|
+
if self.debug then
|
112
|
+
print "matchATN mode #{self.mode} start: #{startState}"
|
113
|
+
end
|
114
|
+
|
115
|
+
old_mode = self.mode
|
116
|
+
s0_closure = self.computeStartState(input, startState)
|
117
|
+
suppressEdge = s0_closure.hasSemanticContext
|
118
|
+
s0_closure.hasSemanticContext = false
|
119
|
+
|
120
|
+
nxt = self.addDFAState(s0_closure)
|
121
|
+
if not suppressEdge then
|
122
|
+
self.decisionToDFA[self.mode].s0 = nxt
|
123
|
+
end
|
124
|
+
|
125
|
+
predict = self.execATN(input, nxt)
|
126
|
+
|
127
|
+
if self.debug then
|
128
|
+
print "DFA after matchATN: #{self.decisionToDFA[old_mode].toLexerString()}"
|
129
|
+
end
|
130
|
+
|
131
|
+
return predict
|
132
|
+
end
|
133
|
+
def execATN(input, ds0)
|
134
|
+
if self.debug then
|
135
|
+
puts "start state closure=#{ds0.configs.to_s}"
|
136
|
+
end
|
137
|
+
|
138
|
+
t = input.LA(1)
|
139
|
+
s = ds0 # s is current/from DFA state
|
140
|
+
|
141
|
+
raise Exception.new("s is nil") if s.nil?
|
142
|
+
|
143
|
+
while true do # while more work
|
144
|
+
if self.debug then
|
145
|
+
puts "execATN loop starting closure: #{s.configs}"
|
146
|
+
end
|
147
|
+
|
148
|
+
# As we move src->trg, src->trg, we keep track of the previous trg to
|
149
|
+
# avoid looking up the DFA state again, which is expensive.
|
150
|
+
# If the previous target was already part of the DFA, we might
|
151
|
+
# be able to avoid doing a reach operation upon t. If s!=null,
|
152
|
+
# it means that semantic predicates didn't prevent us from
|
153
|
+
# creating a DFA state. Once we know s!=null, we check to see if
|
154
|
+
# the DFA state has an edge already for t. If so, we can just reuse
|
155
|
+
# it's configuration set; there's no point in re-computing it.
|
156
|
+
# This is kind of like doing DFA simulation within the ATN
|
157
|
+
# simulation because DFA simulation is really just a way to avoid
|
158
|
+
# computing reach/closure sets. Technically, once we know that
|
159
|
+
# we have a previously added DFA state, we could jump over to
|
160
|
+
# the DFA simulator. But, that would mean popping back and forth
|
161
|
+
# a lot and making things more complicated algorithmically.
|
162
|
+
# This optimization makes a lot of sense for loops within DFA.
|
163
|
+
# A character will take us back to an existing DFA state
|
164
|
+
# that already has lots of edges out of it. e.g., .* in comments.
|
165
|
+
# print("Target for:" + str(s) + " and:" + str(t))
|
166
|
+
target = self.getExistingTargetState(s, t)
|
167
|
+
# print("Existing:" + str(target))
|
168
|
+
if target.nil? then
|
169
|
+
target = self.computeTargetState(input, s, t)
|
170
|
+
end
|
171
|
+
# print("Computed:" + str(target))
|
172
|
+
break if target.equal? ATNSimulator::ERROR
|
173
|
+
|
174
|
+
if target.isAcceptState
|
175
|
+
self.captureSimState(self.prevAccept, input, target)
|
176
|
+
if t == Token::EOF
|
177
|
+
break
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
if t != Token::EOF
|
182
|
+
self.consume(input)
|
183
|
+
t = input.LA(1)
|
184
|
+
end
|
185
|
+
|
186
|
+
s = target # flip; current DFA target becomes new src/from state
|
187
|
+
end
|
188
|
+
|
189
|
+
return self.failOrAccept(self.prevAccept, input, s.configs, t)
|
190
|
+
end
|
191
|
+
|
192
|
+
# Get an existing target state for an edge in the DFA. If the target state
|
193
|
+
# for the edge has not yet been computed or is otherwise not available,
|
194
|
+
# this method returns {@code null}.
|
195
|
+
#
|
196
|
+
# @param s The current DFA state
|
197
|
+
# @param t The next input symbol
|
198
|
+
# @return The existing target DFA state for the given input symbol
|
199
|
+
# {@code t}, or {@code null} if the target state for this edge is not
|
200
|
+
# already cached
|
201
|
+
def getExistingTargetState(s, t)
|
202
|
+
if s.edges.nil? or t < LexerATNSimulator::MIN_DFA_EDGE or t > LexerATNSimulator::MAX_DFA_EDGE
|
203
|
+
return nil
|
204
|
+
end
|
205
|
+
|
206
|
+
target = s.edges[t - LexerATNSimulator::MIN_DFA_EDGE]
|
207
|
+
if self.debug and not target.nil?
|
208
|
+
puts "reuse state #{s.stateNumber} edge to #{target.stateNumber}"
|
209
|
+
end
|
210
|
+
|
211
|
+
return target
|
212
|
+
end
|
213
|
+
|
214
|
+
# Compute a target state for an edge in the DFA, and attempt to add the
|
215
|
+
# computed state and corresponding edge to the DFA.
|
216
|
+
#
|
217
|
+
# @param input The input stream
|
218
|
+
# @param s The current DFA state
|
219
|
+
# @param t The next input symbol
|
220
|
+
#
|
221
|
+
# @return The computed target DFA state for the given input symbol
|
222
|
+
# {@code t}. If {@code t} does not lead to a valid DFA state, this method
|
223
|
+
# returns {@link #ERROR}.
|
224
|
+
def computeTargetState(input, s, t)
|
225
|
+
reach = OrderedATNConfigSet.new()
|
226
|
+
|
227
|
+
# if we don't find an existing DFA state
|
228
|
+
# Fill reach starting from closure, following t transitions
|
229
|
+
self.getReachableConfigSet(input, s.configs, reach, t)
|
230
|
+
|
231
|
+
if reach.length==0 # we got nowhere on t from s
|
232
|
+
if not reach.hasSemanticContext
|
233
|
+
# we got nowhere on t, don't throw out this knowledge; it'd
|
234
|
+
# cause a failover from DFA later.
|
235
|
+
self.addDFAEdge(s, t, ATNSimulator::ERROR)
|
236
|
+
end
|
237
|
+
# stop when we can't match any more char
|
238
|
+
return ATNSimulator::ERROR
|
239
|
+
end
|
240
|
+
|
241
|
+
# Add an edge from s to target DFA found/created for reach
|
242
|
+
return self.addDFAEdge(s, t, nil, reach)
|
243
|
+
end
|
244
|
+
def failOrAccept(prevAccept, input, reach, t)
|
245
|
+
if not self.prevAccept.dfaState.nil?
|
246
|
+
lexerActionExecutor = prevAccept.dfaState.lexerActionExecutor
|
247
|
+
self.accept(input, lexerActionExecutor, self.startIndex, prevAccept.index, prevAccept.line, prevAccept.column)
|
248
|
+
return prevAccept.dfaState.prediction
|
249
|
+
else
|
250
|
+
# if no accept and EOF is first char, return EOF
|
251
|
+
if t==Token::EOF and input.index==self.startIndex
|
252
|
+
return Token::EOF
|
253
|
+
end
|
254
|
+
raise LexerNoViableAltException.new(self.recog, input, self.startIndex, reach)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
# Given a starting configuration set, figure out all ATN configurations
|
258
|
+
# we can reach upon input {@code t}. Parameter {@code reach} is a return
|
259
|
+
# parameter.
|
260
|
+
def getReachableConfigSet(input, closure, reach, t)
|
261
|
+
# this is used to skip processing for configs which have a lower priority
|
262
|
+
# than a config that already reached an accept state for the same rule
|
263
|
+
skipAlt = ATN::INVALID_ALT_NUMBER
|
264
|
+
for cfg in closure do
|
265
|
+
currentAltReachedAcceptState = ( cfg.alt == skipAlt )
|
266
|
+
if currentAltReachedAcceptState and cfg.passedThroughNonGreedyDecision
|
267
|
+
next
|
268
|
+
end
|
269
|
+
|
270
|
+
if self.debug
|
271
|
+
puts "testing #{self.getTokenName(t)} at #{cfg.toString(self.recog, true)}"
|
272
|
+
end
|
273
|
+
|
274
|
+
for trans in cfg.state.transitions do # for each transition
|
275
|
+
target = self.getReachableTarget(trans, t)
|
276
|
+
if target
|
277
|
+
lexerActionExecutor = cfg.lexerActionExecutor
|
278
|
+
if lexerActionExecutor
|
279
|
+
lexerActionExecutor = lexerActionExecutor.fixOffsetBeforeMatch(input.index - self.startIndex)
|
280
|
+
end
|
281
|
+
treatEofAsEpsilon = (t == Token::EOF)
|
282
|
+
config = LexerATNConfig.new(target, nil, nil, nil, lexerActionExecutor, cfg)
|
283
|
+
if self.closure(input, config, reach, currentAltReachedAcceptState, true, treatEofAsEpsilon)
|
284
|
+
# any remaining configs for this alt have a lower priority than
|
285
|
+
# the one that just reached an accept state.
|
286
|
+
skipAlt = cfg.alt
|
287
|
+
break
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
def accept(input, lexerActionExecutor, start_index, index, _line, charPos)
|
295
|
+
if self.debug
|
296
|
+
puts "ACTION #{lexerActionExecutor}"
|
297
|
+
end
|
298
|
+
|
299
|
+
# seek to after last char in token
|
300
|
+
input.seek(index)
|
301
|
+
self.line = _line
|
302
|
+
self.column = charPos
|
303
|
+
if input.LA(1) != Token::EOF
|
304
|
+
self.consume(input)
|
305
|
+
end
|
306
|
+
if lexerActionExecutor and self.recog
|
307
|
+
lexerActionExecutor.execute(self.recog, input, start_index)
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
def getReachableTarget(trans, t)
|
312
|
+
if trans.matches(t, 0, 0xFFFE)
|
313
|
+
return trans.target
|
314
|
+
else
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
def computeStartState(input, p)
|
320
|
+
initialContext = PredictionContext.EMPTY
|
321
|
+
configs = OrderedATNConfigSet.new()
|
322
|
+
p.transitions.each_index do |i|
|
323
|
+
target = p.transitions[i].target
|
324
|
+
c = LexerATNConfig.new(target, i+1, initialContext)
|
325
|
+
self.closure(input, c, configs, false, false, false)
|
326
|
+
end
|
327
|
+
return configs
|
328
|
+
end
|
329
|
+
|
330
|
+
# Since the alternatives within any lexer decision are ordered by
|
331
|
+
# preference, this method stops pursuing the closure as soon as an accept
|
332
|
+
# state is reached. After the first accept state is reached by depth-first
|
333
|
+
# search from {@code config}, all other (potentially reachable) states for
|
334
|
+
# this rule would have a lower priority.
|
335
|
+
#
|
336
|
+
# @return {@code true} if an accept state is reached, otherwise
|
337
|
+
# {@code false}.
|
338
|
+
def closure(input, config, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
339
|
+
if self.debug
|
340
|
+
puts "closure(#{config.toString(self.recog, true)})"
|
341
|
+
end
|
342
|
+
|
343
|
+
if config.state.kind_of? RuleStopState
|
344
|
+
if self.debug
|
345
|
+
if self.recog
|
346
|
+
puts "closure at #{self.recog.getRuleNames[config.state.ruleIndex]} rule stop #{ config}"
|
347
|
+
else
|
348
|
+
puts "closure at rule stop #{ config}"
|
349
|
+
end
|
350
|
+
end
|
351
|
+
|
352
|
+
if config.context.nil? or config.context.hasEmptyPath()
|
353
|
+
if config.context.nil? or config.context.isEmpty()
|
354
|
+
configs.add(config)
|
355
|
+
return true
|
356
|
+
else
|
357
|
+
configs.add(LexerATNConfig.new(config.state, nil,PredictionContext.EMPTY,nil,nil,config) )
|
358
|
+
currentAltReachedAcceptState = true
|
359
|
+
end
|
360
|
+
end
|
361
|
+
if config.context and not config.context.isEmpty() then
|
362
|
+
0.upto(config.context.length - 1) do |i|
|
363
|
+
if config.context.getReturnState(i) != PredictionContext::EMPTY_RETURN_STATE
|
364
|
+
newContext = config.context.getParent(i) # "pop" return state
|
365
|
+
returnState = self.atn.states[config.context.getReturnState(i)]
|
366
|
+
c = LexerATNConfig.new(returnState,nil,newContext, nil, nil, config )
|
367
|
+
currentAltReachedAcceptState = self.closure(input, c, configs,
|
368
|
+
currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
369
|
+
end
|
370
|
+
end
|
371
|
+
end
|
372
|
+
return currentAltReachedAcceptState
|
373
|
+
end
|
374
|
+
# optimization
|
375
|
+
if not config.state.epsilonOnlyTransitions then
|
376
|
+
if not currentAltReachedAcceptState or not config.passedThroughNonGreedyDecision
|
377
|
+
configs.add(config)
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
#for t in config.state.transitions do
|
382
|
+
config.state.transitions.each do |t|
|
383
|
+
c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
|
384
|
+
if c then
|
385
|
+
currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
|
386
|
+
end
|
387
|
+
end
|
388
|
+
return currentAltReachedAcceptState
|
389
|
+
end
|
390
|
+
# side-effect: can alter configs.hasSemanticContext
|
391
|
+
def getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
|
392
|
+
c = nil
|
393
|
+
if t.serializationType==Transition::RULE then
|
394
|
+
newContext = SingletonPredictionContext.create(config.context, t.followState.stateNumber)
|
395
|
+
c = LexerATNConfig.new(t.target, nil, newContext, nil,nil, config)
|
396
|
+
elsif t.serializationType==Transition::PRECEDENCE
|
397
|
+
raise UnsupportedOperationException.new("Precedence predicates are not supported in lexers.")
|
398
|
+
elsif t.serializationType==Transition::PREDICATE
|
399
|
+
# Track traversing semantic predicates. If we traverse,
|
400
|
+
# we cannot add a DFA state for this "reach" computation
|
401
|
+
# because the DFA would not test the predicate again in the
|
402
|
+
# future. Rather than creating collections of semantic predicates
|
403
|
+
# like v3 and testing them on prediction, v4 will test them on the
|
404
|
+
# fly all the time using the ATN not the DFA. This is slower but
|
405
|
+
# semantically it's not used that often. One of the key elements to
|
406
|
+
# this predicate mechanism is not adding DFA states that see
|
407
|
+
# predicates immediately afterwards in the ATN. For example,
|
408
|
+
|
409
|
+
# a : ID {p1}? | ID {p2}? ;
|
410
|
+
|
411
|
+
# should create the start state for rule 'a' (to save start state
|
412
|
+
# competition), but should not create target of ID state. The
|
413
|
+
# collection of ATN states the following ID references includes
|
414
|
+
# states reached by traversing predicates. Since this is when we
|
415
|
+
# test them, we cannot cash the DFA state target of ID.
|
416
|
+
if self.debug
|
417
|
+
print "EVAL rule #{t.ruleIndex}:#{t.predIndex}"
|
418
|
+
end
|
419
|
+
configs.hasSemanticContext = true
|
420
|
+
if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative)
|
421
|
+
c = LexerATNConfig(t.target,nil,nil,nil,nil, config)
|
422
|
+
end
|
423
|
+
elsif t.serializationType==Transition::ACTION
|
424
|
+
if config.context.nil? or config.context.hasEmptyPath()
|
425
|
+
# execute actions anywhere in the start rule for a token.
|
426
|
+
#
|
427
|
+
# TODO: if the entry rule is invoked recursively, some
|
428
|
+
# actions may be executed during the recursive call. The
|
429
|
+
# problem can appear when hasEmptyPath() is true but
|
430
|
+
# isEmpty() is false. In this case, the config needs to be
|
431
|
+
# split into two contexts - one with just the empty path
|
432
|
+
# and another with everything but the empty path.
|
433
|
+
# Unfortunately, the current algorithm does not allow
|
434
|
+
# getEpsilonTarget to return two configurations, so
|
435
|
+
# additional modifications are needed before we can support
|
436
|
+
# the split operation.
|
437
|
+
lexerActionExecutor = LexerActionExecutor.append(config.lexerActionExecutor,
|
438
|
+
self.atn.lexerActions[t.actionIndex])
|
439
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil, lexerActionExecutor, config)
|
440
|
+
else
|
441
|
+
# ignore actions in referenced rules
|
442
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
|
443
|
+
end
|
444
|
+
elsif t.serializationType==Transition::EPSILON
|
445
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
|
446
|
+
elsif [ Transition::ATOM, Transition::RANGE, Transition::SET ].member? t.serializationType
|
447
|
+
if treatEofAsEpsilon
|
448
|
+
if t.matches(Token::EOF, 0, 0xFFFF)
|
449
|
+
c = LexerATNConfig.new(t.target,nil,nil,nil,nil, config)
|
450
|
+
end
|
451
|
+
end
|
452
|
+
end
|
453
|
+
return c
|
454
|
+
end
|
455
|
+
# Evaluate a predicate specified in the lexer.
|
456
|
+
#
|
457
|
+
# <p>If {@code speculative} is {@code true}, this method was called before
|
458
|
+
# {@link #consume} for the matched character. This method should call
|
459
|
+
# {@link #consume} before evaluating the predicate to ensure position
|
460
|
+
# sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
|
461
|
+
# and {@link Lexer#getcolumn}, properly reflect the current
|
462
|
+
# lexer state. This method should restore {@code input} and the simulator
|
463
|
+
# to the original state before returning (i.e. undo the actions made by the
|
464
|
+
# call to {@link #consume}.</p>
|
465
|
+
#
|
466
|
+
# @param input The input stream.
|
467
|
+
# @param ruleIndex The rule containing the predicate.
|
468
|
+
# @param predIndex The index of the predicate within the rule.
|
469
|
+
# @param speculative {@code true} if the current index in {@code input} is
|
470
|
+
# one character before the predicate's location.
|
471
|
+
#
|
472
|
+
# @return {@code true} if the specified predicate evaluates to
|
473
|
+
# {@code true}.
|
474
|
+
#/
|
475
|
+
def evaluatePredicate(input, ruleIndex, predIndex, speculative)
|
476
|
+
# assume true if no recognizer was provided
|
477
|
+
return true if self.recog.nil?
|
478
|
+
|
479
|
+
if not speculative then
|
480
|
+
return self.recog.sempred(nil, ruleIndex, predIndex)
|
481
|
+
end
|
482
|
+
|
483
|
+
savedcolumn = self.column
|
484
|
+
savedLine = self.line
|
485
|
+
index = input.index
|
486
|
+
marker = input.mark()
|
487
|
+
begin
|
488
|
+
self.consume(input)
|
489
|
+
return self.recog.sempred(nil, ruleIndex, predIndex)
|
490
|
+
ensure
|
491
|
+
self.column = savedcolumn
|
492
|
+
self.line = savedLine
|
493
|
+
input.seek(index)
|
494
|
+
input.release(marker)
|
495
|
+
end
|
496
|
+
end
|
497
|
+
def captureSimState(settings, input, dfaState)
|
498
|
+
settings.index = input.index
|
499
|
+
settings.line = self.line
|
500
|
+
settings.column = self.column
|
501
|
+
settings.dfaState = dfaState
|
502
|
+
end
|
503
|
+
|
504
|
+
def addDFAEdge(from_, tk, to=nil, cfgs=nil)
|
505
|
+
|
506
|
+
if to.nil? and cfgs then
|
507
|
+
# leading to this call, ATNConfigSet.hasSemanticContext is used as a
|
508
|
+
# marker indicating dynamic predicate evaluation makes this edge
|
509
|
+
# dependent on the specific input sequence, so the static edge in the
|
510
|
+
# DFA should be omitted. The target DFAState is still created since
|
511
|
+
# execATN has the ability to resynchronize with the DFA state cache
|
512
|
+
# following the predicate evaluation step.
|
513
|
+
#
|
514
|
+
# TJP notes: next time through the DFA, we see a pred again and eval.
|
515
|
+
# If that gets us to a previously created (but dangling) DFA
|
516
|
+
# state, we can continue in pure DFA mode from there.
|
517
|
+
#/
|
518
|
+
suppressEdge = cfgs.hasSemanticContext
|
519
|
+
cfgs.hasSemanticContext = false
|
520
|
+
|
521
|
+
to = self.addDFAState(cfgs)
|
522
|
+
|
523
|
+
if suppressEdge then
|
524
|
+
return to
|
525
|
+
end
|
526
|
+
end
|
527
|
+
# add the edge
|
528
|
+
if tk < LexerATNSimulator::MIN_DFA_EDGE or tk > LexerATNSimulator::MAX_DFA_EDGE
|
529
|
+
# Only track edges within the DFA bounds
|
530
|
+
return to
|
531
|
+
end
|
532
|
+
|
533
|
+
if self.debug
|
534
|
+
puts "EDGE #{from_} -> #{to} upon #{tk.chr}"
|
535
|
+
end
|
536
|
+
|
537
|
+
if from_.edges.nil?
|
538
|
+
# make room for tokens 1..n and -1 masquerading as index 0
|
539
|
+
# from_.edges = [nil] * (LexerATNSimulator::MAX_DFA_EDGE -
|
540
|
+
# LexerATNSimulator::MIN_DFA_EDGE + 1)
|
541
|
+
from_.edges = Array.new
|
542
|
+
end
|
543
|
+
|
544
|
+
from_.edges[tk - LexerATNSimulator::MIN_DFA_EDGE] = to # connect
|
545
|
+
|
546
|
+
return to
|
547
|
+
end
|
548
|
+
|
549
|
+
# Add a new DFA state if there isn't one with this set of
|
550
|
+
# configurations already. This method also detects the first
|
551
|
+
# configuration containing an ATN rule stop state. Later, when
|
552
|
+
# traversing the DFA, we will know which rule to accept.
|
553
|
+
def addDFAState(configs) # -> DFAState:
|
554
|
+
# the lexer evaluates predicates on-the-fly; by this point configs
|
555
|
+
# should not contain any configurations with unevaluated predicates.
|
556
|
+
# assert not configs.hasSemanticContext
|
557
|
+
proposed = DFAState.new(nil,configs)
|
558
|
+
firstConfigWithRuleStopState = nil
|
559
|
+
# for c in configs.each do |c|:
|
560
|
+
configs.each do |c|
|
561
|
+
if c.state.kind_of? RuleStopState then
|
562
|
+
firstConfigWithRuleStopState = c
|
563
|
+
break
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
if firstConfigWithRuleStopState then
|
568
|
+
proposed.isAcceptState = true
|
569
|
+
proposed.lexerActionExecutor = firstConfigWithRuleStopState.lexerActionExecutor
|
570
|
+
proposed.prediction = self.atn.ruleToTokenType[firstConfigWithRuleStopState.state.ruleIndex]
|
571
|
+
end
|
572
|
+
|
573
|
+
dfa = self.decisionToDFA[self.mode]
|
574
|
+
existing = dfa.states[proposed]
|
575
|
+
if existing then
|
576
|
+
return existing
|
577
|
+
end
|
578
|
+
|
579
|
+
newState = proposed
|
580
|
+
|
581
|
+
newState.stateNumber = dfa.states.length
|
582
|
+
configs.setReadonly(true)
|
583
|
+
newState.configs = configs
|
584
|
+
dfa.states[newState] = newState
|
585
|
+
return newState
|
586
|
+
end
|
587
|
+
def getDFA(mode)
|
588
|
+
return self.decisionToDFA[mode]
|
589
|
+
end
|
590
|
+
# Get the text matched so far for the current token.
|
591
|
+
def getText(input)
|
592
|
+
# index is first lookahead char, don't include.
|
593
|
+
return input.getText(self.startIndex, input.index-1)
|
594
|
+
end
|
595
|
+
def consume(input)
|
596
|
+
curChar = input.LA(1)
|
597
|
+
if curChar=="\n".ord then
|
598
|
+
self.line = self.line + 1
|
599
|
+
self.column = 0
|
600
|
+
else
|
601
|
+
self.column = self.column + 1
|
602
|
+
end
|
603
|
+
input.consume()
|
604
|
+
end
|
605
|
+
def getTokenName(t)
|
606
|
+
if t==-1
|
607
|
+
return "EOF"
|
608
|
+
else
|
609
|
+
return "'#{t.chr}'"
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|