antlr4 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,177 @@
1
+
2
+ class LL1Analyzer
3
+ # Special value added to the lookahead sets to indicate that we hit
4
+ # a predicate during analysis if {@code seeThruPreds==false}.
5
+ HIT_PRED = Token::INVALID_TYPE
6
+
7
+ attr_accessor :atn
8
+ def initialize(atn)
9
+ @atn = atn
10
+ end
11
+
12
+ #*
13
+ # Calculates the SLL(1) expected lookahead set for each outgoing transition
14
+ # of an {@link ATNState}. The returned array has one element for each
15
+ # outgoing transition in {@code s}. If the closure from transition
16
+ # <em>i</em> leads to a semantic predicate before matching a symbol, the
17
+ # element at index <em>i</em> of the result will be {@code null}.
18
+ #
19
+ # @param s the ATN state
20
+ # @return the expected symbols for each outgoing transition of {@code s}.
21
+ #/
22
+ def getDecisionLookahead(s)
23
+ return nil if s.nil?
24
+
25
+ count = s.transitions.length()
26
+ look = Array.new
27
+ for alt in 0..count-1
28
+ look[alt] = Set.new()
29
+ lookBusy = Set.new()
30
+ seeThruPreds = false # fail to get lookahead upon pred
31
+ self._LOOK(s.transition(alt).target, nil, PredictionContext.EMPTY, \
32
+ look[alt], lookBusy, Set.new(), seeThruPreds, false)
33
+ # Wipe out lookahead for this alternative if we found nothing
34
+ # or we had a predicate when we !seeThruPreds
35
+ if look[alt].length==0 or look[alt].member? LL1Analyzer::HIT_PRED then
36
+ look[alt] = nil
37
+ end
38
+ end
39
+ return look
40
+ end
41
+
42
+ #*
43
+ # Compute set of tokens that can follow {@code s} in the ATN in the
44
+ # specified {@code ctx}.
45
+ #
46
+ # <p>If {@code ctx} is {@code null} and the end of the rule containing
47
+ # {@code s} is reached, {@link Token#EPSILON} is added to the result set.
48
+ # If {@code ctx} is not {@code null} and the end of the outermost rule is
49
+ # reached, {@link Token#EOF} is added to the result set.</p>
50
+ #
51
+ # @param s the ATN state
52
+ # @param stopState the ATN state to stop at. This can be a
53
+ # {@link BlockEndState} to detect epsilon paths through a closure.
54
+ # @param ctx the complete parser context, or {@code null} if the context
55
+ # should be ignored
56
+ #
57
+ # @return The set of tokens that can follow {@code s} in the ATN in the
58
+ # specified {@code ctx}.
59
+ #/
60
+ def LOOK(s, stopState=nil, ctx=nil )
61
+ r = IntervalSet.new()
62
+ seeThruPreds = true # ignore preds; get all lookahead
63
+ if not ctx.nil? then
64
+ lookContext = PredictionContextFromRuleContext.new(s.atn, ctx)
65
+ else
66
+ lookContext = nil
67
+ end
68
+ # lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None
69
+ self._LOOK(s, stopState, lookContext, r, Set.new(), Set.new(), seeThruPreds, true)
70
+ return r
71
+ end
72
+
73
+ #*
74
+ # Compute set of tokens that can follow {@code s} in the ATN in the
75
+ # specified {@code ctx}.
76
+ #
77
+ # <p>If {@code ctx} is {@code null} and {@code stopState} or the end of the
78
+ # rule containing {@code s} is reached, {@link Token#EPSILON} is added to
79
+ # the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
80
+ # {@code true} and {@code stopState} or the end of the outermost rule is
81
+ # reached, {@link Token#EOF} is added to the result set.</p>
82
+ #
83
+ # @param s the ATN state.
84
+ # @param stopState the ATN state to stop at. This can be a
85
+ # {@link BlockEndState} to detect epsilon paths through a closure.
86
+ # @param ctx The outer context, or {@code null} if the outer context should
87
+ # not be used.
88
+ # @param look The result lookahead set.
89
+ # @param lookBusy A set used for preventing epsilon closures in the ATN
90
+ # from causing a stack overflow. Outside code should pass
91
+ # {@code new HashSet<ATNConfig>} for this argument.
92
+ # @param calledRuleStack A set used for preventing left recursion in the
93
+ # ATN from causing a stack overflow. Outside code should pass
94
+ # {@code new BitSet()} for this argument.
95
+ # @param seeThruPreds {@code true} to true semantic predicates as
96
+ # implicitly {@code true} and "see through them", otherwise {@code false}
97
+ # to treat semantic predicates as opaque and add {@link #HIT_PRED} to the
98
+ # result if one is encountered.
99
+ # @param addEOF Add {@link Token#EOF} to the result if the end of the
100
+ # outermost context is reached. This parameter has no effect if {@code ctx}
101
+ # is {@code null}.
102
+ #/
103
+ def _LOOK(s, stopState, ctx, look, lookBusy, \
104
+ calledRuleStack, seeThruPreds, addEOF)
105
+ c = ATNConfig.new(s, 0, ctx)
106
+
107
+ return if lookBusy.member? c
108
+
109
+ lookBusy.add(c)
110
+
111
+ if s == stopState then
112
+ if ctx.nil? then
113
+ look.addOne(Token::EPSILON)
114
+ return
115
+ elsif ctx.isEmpty() and addEOF
116
+ look.addOne(Token::EOF)
117
+ return
118
+ end
119
+ end
120
+
121
+ if s.kind_of? RuleStopState then
122
+ if ctx.nil? then
123
+ look.addOne(Token::EPSILON)
124
+ return
125
+ elsif ctx.isEmpty() and addEOF
126
+ look.addOne(Token::EOF)
127
+ return
128
+ end
129
+ if PredictionContext.EMPTY != ctx
130
+ # run thru all possible stack tops in ctx
131
+ for i in 0..ctx.length-1 do
132
+ returnState = self.atn.states[ctx.getReturnState(i)]
133
+ removed = calledRuleStack.member? returnState.ruleIndex
134
+ begin
135
+ calledRuleStack.delete(returnState.ruleIndex)
136
+ self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
137
+ ensure
138
+ calledRuleStack.add(returnState.ruleIndex) if removed
139
+ end
140
+ end
141
+ return
142
+ end
143
+ end
144
+ for t in s.transitions do
145
+ if t.class == RuleTransition then
146
+ next if calledRuleStack.member? t.target.ruleIndex
147
+
148
+ newContext = SingletonPredictionContext.create(ctx, t.followState.stateNumber)
149
+
150
+ begin
151
+ calledRuleStack.add(t.target.ruleIndex)
152
+ self._LOOK(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
153
+ ensure
154
+ calledRuleStack.remove(t.target.ruleIndex)
155
+ end
156
+ elsif t.kind_of? AbstractPredicateTransition then
157
+ if seeThruPreds
158
+ self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
159
+ else
160
+ look.addOne(LL1Analyzer::HIT_PRED)
161
+ end
162
+ elsif t.isEpsilon
163
+ self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
164
+ elsif t.class == WildcardTransition
165
+ look.addRange( Token::MIN_USER_TOKEN_TYPE..self.atn.maxTokenType )
166
+ else
167
+ set = t.label
168
+ if not set.nil? then
169
+ if t.kind_of? NotSetTransition then
170
+ set = set.complement IntervalSet.of(Token::MIN_USER_TOKEN_TYPE, self.atn.maxTokenType)
171
+ end
172
+ look.addSet(set)
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,335 @@
1
+ # A lexer is recognizer that draws input symbols from a character stream.
2
+ # lexer grammars result in a subclass of self object. A Lexer object
3
+ # uses simplified match() and error recovery mechanisms in the interest
4
+ # of speed.
5
+
6
+ class Lexer < TokenSource
7
+
8
+ DEFAULT_MODE = 0
9
+ MORE = -2
10
+ SKIP = -3
11
+
12
+ DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL
13
+ HIDDEN = Token::HIDDEN_CHANNEL
14
+ MIN_CHAR_VALUE = "\u0000"
15
+ MAX_CHAR_VALUE = "\uFFFE"
16
+
17
+ attr_accessor :input, :factory, :tokenFactorySourcePair #, :interp
18
+ attr_accessor :token, :tokenStartCharIndex, :tokenStartLine, :tokenStartColumn
19
+ attr_accessor :hitEOF, :channel,:type, :modeStack, :mode, :text
20
+
21
+ def initialize(_input)
22
+ super()
23
+ @input = _input
24
+ @factory = CommonTokenFactory.DEFAULT
25
+ @tokenFactorySourcePair = [self, _input]
26
+
27
+ @interp = nil # child classes must populate this
28
+
29
+ # The goal of all lexer rules/methods is to create a token object.
30
+ # self is an instance variable as multiple rules may collaborate to
31
+ # create a single token. nextToken will return self object after
32
+ # matching lexer rule(s). If you subclass to allow multiple token
33
+ # emissions, then set self to the last token to be matched or
34
+ # something nonnull so that the auto token emit mechanism will not
35
+ # emit another token.
36
+ @token = nil
37
+
38
+ # What character index in the stream did the current token start at?
39
+ # Needed, for example, to get the text for current token. Set at
40
+ # the start of nextToken.
41
+ @tokenStartCharIndex = -1
42
+
43
+ # The line on which the first character of the token resides#/
44
+ @tokenStartLine = -1
45
+
46
+ # The character position of first character within the line#/
47
+ @tokenStartColumn = -1
48
+
49
+ # Once we see EOF on char stream, next token will be EOF.
50
+ # If you have DONE : EOF ; then you see DONE EOF.
51
+ @hitEOF = false
52
+
53
+ # The channel number for the current token#/
54
+ @channel = Token::DEFAULT_CHANNEL
55
+
56
+ # The token type for the current token#/
57
+ @type = Token::INVALID_TYPE
58
+
59
+ @modeStack = Array.new
60
+ @mode = Lexer::DEFAULT_MODE
61
+
62
+ # You can set the text for the current token to override what is in
63
+ # the input char buffer. Use setText() or can set self instance var.
64
+ #/
65
+ @text = nil
66
+ end
67
+
68
+ def reset
69
+ # wack Lexer state variables
70
+ if not self.input.nil? then
71
+ self.input.seek(0) # rewind the input
72
+ end
73
+ self.token = nil
74
+ self.type = Token::INVALID_TYPE
75
+ self.channel = Token::DEFAULT_CHANNEL
76
+ self.tokenStartCharIndex = -1
77
+ self.tokenStartColumn = -1
78
+ self.tokenStartLine = -1
79
+ self.text = nil
80
+
81
+ self.hitEOF = false
82
+ self.mode = Lexer::DEFAULT_MODE
83
+ self.modeStack = Array.new
84
+
85
+ self.interp.reset()
86
+ end
87
+
88
+ # Return a token from self source; i.e., match a token on the char
89
+ # stream.
90
+ def nextToken
91
+ if self.input.nil?
92
+ raise IllegalStateException.new("nextToken requires a non-null input stream.")
93
+ end
94
+
95
+ # Mark start location in char stream so unbuffered streams are
96
+ # guaranteed at least have text of current token
97
+ tokenStartMarker = self.input.mark()
98
+ begin
99
+ while true do
100
+ if self.hitEOF then
101
+ self.emitEOF()
102
+ return self.token
103
+ end
104
+ self.token = nil
105
+ self.channel = Token::DEFAULT_CHANNEL
106
+ self.tokenStartCharIndex = self.input.index
107
+ self.tokenStartColumn = self.interp.column
108
+ self.tokenStartLine = self.interp.line
109
+ self.text = nil
110
+ continueOuter = false
111
+ while true do
112
+ self.type = Token::INVALID_TYPE
113
+ ttype = Lexer::SKIP
114
+ begin
115
+ ttype = self.interp.match(self.input, self.mode)
116
+ rescue LexerNoViableAltException => e
117
+ self.notifyListeners(e) # report error
118
+ self.recover(e)
119
+ end
120
+ if self.input.LA(1)==Token::EOF then
121
+ self.hitEOF = true
122
+ end
123
+ if self.type == Token::INVALID_TYPE
124
+ self.type = ttype
125
+
126
+ end
127
+ if self.type == Lexer::SKIP
128
+ continueOuter = true
129
+ break
130
+ end
131
+ if self.type!= Lexer::MORE
132
+ break
133
+ end
134
+ end
135
+ next if continueOuter
136
+ self.emit() if self.token.nil?
137
+ return self.token
138
+ end
139
+ ensure
140
+ # make sure we release marker after match or
141
+ # unbuffered char stream will keep buffering
142
+ self.input.release(tokenStartMarker)
143
+ end
144
+ end
145
+
146
+ # Instruct the lexer to skip creating a token for current lexer rule
147
+ # and look for another token. nextToken() knows to keep looking when
148
+ # a lexer rule finishes with token set to SKIP_TOKEN. Recall that
149
+ # if token==null at end of any token rule, it creates one for you
150
+ # and emits it.
151
+ #/
152
+ def skip
153
+ self.type = Lexer::SKIP
154
+ end
155
+ def more
156
+ self.type = Lexer::MORE
157
+ end
158
+ def pushMode(m)
159
+ if self.interp.debug then
160
+ puts "pushMode #{m}"
161
+ end
162
+ self.modeStack.push(self.mode)
163
+ self.mode = m
164
+ end
165
+ def popMode
166
+ if self.modeStack.empty? then
167
+ raise Exception.new("Empty Stack")
168
+ end
169
+ if self.interp.debug then
170
+ puts "popMode back to #{self.modeStack.slice(0,self.modeStack.length-1)}"
171
+ end
172
+ self.mode = self.modeStack.pop()
173
+ return self.mode
174
+ end
175
+
176
+ # Set the char stream and reset the lexer#/
177
+ def inputStream
178
+ return self.input
179
+ end
180
+
181
+ def inputStream=(input)
182
+ self.input = nil
183
+ self.tokenFactorySourcePair = [self, nil]
184
+ self.reset()
185
+ self.input = input
186
+ self.tokenFactorySourcePair = [self, self.input]
187
+ end
188
+
189
+ def sourceName
190
+ return self.input.sourceName
191
+ end
192
+
193
+ # By default does not support multiple emits per nextToken invocation
194
+ # for efficiency reasons. Subclass and override self method, nextToken,
195
+ # and getToken (to push tokens into a list and pull from that list
196
+ # rather than a single variable as self implementation does).
197
+ #/
198
+ def emitToken(token)
199
+ self.token = token
200
+ end
201
+
202
+ # The standard method called to automatically emit a token at the
203
+ # outermost lexical rule. The token object should point into the
204
+ # char buffer start..stop. If there is a text override in 'text',
205
+ # use that to set the token's text. Override self method to emit
206
+ # custom Token objects or provide a new factory.
207
+ #/
208
+ def emit
209
+ t = self.factory.create(self.tokenFactorySourcePair, self.type, self.text, self.channel, self.tokenStartCharIndex,
210
+ self.getCharIndex()-1, self.tokenStartLine, self.tokenStartColumn)
211
+ self.emitToken(t)
212
+ return t
213
+ end
214
+
215
+ def emitEOF()
216
+ cpos = self.column
217
+ # The character position for EOF is one beyond the position of
218
+ # the previous token's last character
219
+ if not self.token.nil? then
220
+ n = self.token.stop - self.token.start + 1
221
+ cpos = self.token.column + n
222
+ end
223
+ eof = self.factory.create(self.tokenFactorySourcePair, Token::EOF, nil, Token::DEFAULT_CHANNEL, self.input.index,
224
+ self.input.index-1, self.line, cpos)
225
+ self.emitToken(eof)
226
+ return eof
227
+ end
228
+
229
+ def line
230
+ return self.interp.line
231
+ end
232
+
233
+ def line=(line)
234
+ self.interp.line = line
235
+ end
236
+
237
+ def column
238
+ return self.interp.column
239
+ end
240
+
241
+ def column=(column)
242
+ self.interp.column = column
243
+ end
244
+
245
+ # What is the index of the current character of lookahead?#/
246
+ def getCharIndex()
247
+ return self.input.index
248
+ end
249
+
250
+ # Return the text matched so far for the current token or any
251
+ # text override.
252
+ def text
253
+ if not @text.nil? then
254
+ @text
255
+ else
256
+ self.interp.getText(self.input)
257
+ end
258
+ end
259
+
260
+ # Set the complete text of self token; it wipes any previous
261
+ # changes to the text.
262
+ def text=(txt)
263
+ @text = txt
264
+ end
265
+
266
+ # Return a list of all Token objects in input char stream.
267
+ # Forces load of all tokens. Does not include EOF token.
268
+ #/
269
+ def getAllTokens
270
+ tokens = Array.new
271
+ t = self.nextToken()
272
+ while t.type!=Token::EOF do
273
+ tokens.push(t)
274
+ t = self.nextToken()
275
+ end
276
+ return tokens
277
+ end
278
+ def notifyListeners(e) # :LexerNoViableAltException):
279
+ start = self.tokenStartCharIndex
280
+ stop = self.input.index
281
+ text = self.input.getText(start, stop)
282
+ msg = "token recognition error at: '#{self.getErrorDisplay(text) }'"
283
+ listener = self.getErrorListenerDispatch()
284
+ listener.syntaxError(self, nil, self.tokenStartLine, self.tokenStartColumn, msg, e)
285
+ end
286
+
287
+ def getErrorDisplay(s)
288
+ StringIO.open do |buf|
289
+ s.chars.each{|c| buf.write(self.getErrorDisplayForChar(c)) }
290
+ return buf.string()
291
+ end
292
+ end
293
+ def getErrorDisplayForChar(c)
294
+ begin
295
+ cc = c[0].ord
296
+ rescue ArgumentError
297
+ cc = "\ufffd".ord
298
+ end
299
+ if cc==Token::EOF then
300
+ return "<EOF>"
301
+ elsif c == "\n"
302
+ return "\\n"
303
+ elsif c=="\t"
304
+ return "\\t"
305
+ elsif c=="\r"
306
+ return "\\r"
307
+ else
308
+ return c
309
+ end
310
+ end
311
+ def getCharErrorDisplay(c)
312
+ return "'" + self.getErrorDisplayForChar(c) + "'"
313
+ end
314
+
315
+ # Lexers can normally match any char in it's vocabulary after matching
316
+ # a token, so do the easy thing and just kill a character and hope
317
+ # it all works out. You can instead use the rule invocation stack
318
+ # to do sophisticated error recovery if you are in a fragment rule.
319
+ #/
320
+ def recover(re) # :RecognitionException):
321
+ if self.input.LA(1) != Token::EOF then
322
+ if re.kind_of? LexerNoViableAltException then
323
+ # skip a char and try again
324
+ self.interp.consume(self.input)
325
+ else
326
+ # TODO: Do we lose character or line position information?
327
+ self.input.consume()
328
+ end
329
+ end
330
+ end
331
+ def getRuleNames
332
+ self.ruleNames
333
+ end
334
+ end
335
+