antlr4 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,177 @@
1
+
2
+ class LL1Analyzer
3
+ # Special value added to the lookahead sets to indicate that we hit
4
+ # a predicate during analysis if {@code seeThruPreds==false}.
5
+ HIT_PRED = Token::INVALID_TYPE
6
+
7
+ attr_accessor :atn
8
+ def initialize(atn)
9
+ @atn = atn
10
+ end
11
+
12
+ #*
13
+ # Calculates the SLL(1) expected lookahead set for each outgoing transition
14
+ # of an {@link ATNState}. The returned array has one element for each
15
+ # outgoing transition in {@code s}. If the closure from transition
16
+ # <em>i</em> leads to a semantic predicate before matching a symbol, the
17
+ # element at index <em>i</em> of the result will be {@code null}.
18
+ #
19
+ # @param s the ATN state
20
+ # @return the expected symbols for each outgoing transition of {@code s}.
21
+ #/
22
+ def getDecisionLookahead(s)
23
+ return nil if s.nil?
24
+
25
+ count = s.transitions.length()
26
+ look = Array.new
27
+ for alt in 0..count-1
28
+ look[alt] = Set.new()
29
+ lookBusy = Set.new()
30
+ seeThruPreds = false # fail to get lookahead upon pred
31
+ self._LOOK(s.transition(alt).target, nil, PredictionContext.EMPTY, \
32
+ look[alt], lookBusy, Set.new(), seeThruPreds, false)
33
+ # Wipe out lookahead for this alternative if we found nothing
34
+ # or we had a predicate when we !seeThruPreds
35
+ if look[alt].length==0 or look[alt].member? LL1Analyzer::HIT_PRED then
36
+ look[alt] = nil
37
+ end
38
+ end
39
+ return look
40
+ end
41
+
42
+ #*
43
+ # Compute set of tokens that can follow {@code s} in the ATN in the
44
+ # specified {@code ctx}.
45
+ #
46
+ # <p>If {@code ctx} is {@code null} and the end of the rule containing
47
+ # {@code s} is reached, {@link Token#EPSILON} is added to the result set.
48
+ # If {@code ctx} is not {@code null} and the end of the outermost rule is
49
+ # reached, {@link Token#EOF} is added to the result set.</p>
50
+ #
51
+ # @param s the ATN state
52
+ # @param stopState the ATN state to stop at. This can be a
53
+ # {@link BlockEndState} to detect epsilon paths through a closure.
54
+ # @param ctx the complete parser context, or {@code null} if the context
55
+ # should be ignored
56
+ #
57
+ # @return The set of tokens that can follow {@code s} in the ATN in the
58
+ # specified {@code ctx}.
59
+ #/
60
+ def LOOK(s, stopState=nil, ctx=nil )
61
+ r = IntervalSet.new()
62
+ seeThruPreds = true # ignore preds; get all lookahead
63
+ if not ctx.nil? then
64
+ lookContext = PredictionContextFromRuleContext.new(s.atn, ctx)
65
+ else
66
+ lookContext = nil
67
+ end
68
+ # lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None
69
+ self._LOOK(s, stopState, lookContext, r, Set.new(), Set.new(), seeThruPreds, true)
70
+ return r
71
+ end
72
+
73
+ #*
74
+ # Compute set of tokens that can follow {@code s} in the ATN in the
75
+ # specified {@code ctx}.
76
+ #
77
+ # <p>If {@code ctx} is {@code null} and {@code stopState} or the end of the
78
+ # rule containing {@code s} is reached, {@link Token#EPSILON} is added to
79
+ # the result set. If {@code ctx} is not {@code null} and {@code addEOF} is
80
+ # {@code true} and {@code stopState} or the end of the outermost rule is
81
+ # reached, {@link Token#EOF} is added to the result set.</p>
82
+ #
83
+ # @param s the ATN state.
84
+ # @param stopState the ATN state to stop at. This can be a
85
+ # {@link BlockEndState} to detect epsilon paths through a closure.
86
+ # @param ctx The outer context, or {@code null} if the outer context should
87
+ # not be used.
88
+ # @param look The result lookahead set.
89
+ # @param lookBusy A set used for preventing epsilon closures in the ATN
90
+ # from causing a stack overflow. Outside code should pass
91
+ # {@code new HashSet<ATNConfig>} for this argument.
92
+ # @param calledRuleStack A set used for preventing left recursion in the
93
+ # ATN from causing a stack overflow. Outside code should pass
94
+ # {@code new BitSet()} for this argument.
95
+ # @param seeThruPreds {@code true} to true semantic predicates as
96
+ # implicitly {@code true} and "see through them", otherwise {@code false}
97
+ # to treat semantic predicates as opaque and add {@link #HIT_PRED} to the
98
+ # result if one is encountered.
99
+ # @param addEOF Add {@link Token#EOF} to the result if the end of the
100
+ # outermost context is reached. This parameter has no effect if {@code ctx}
101
+ # is {@code null}.
102
+ #/
103
+ def _LOOK(s, stopState, ctx, look, lookBusy, \
104
+ calledRuleStack, seeThruPreds, addEOF)
105
+ c = ATNConfig.new(s, 0, ctx)
106
+
107
+ return if lookBusy.member? c
108
+
109
+ lookBusy.add(c)
110
+
111
+ if s == stopState then
112
+ if ctx.nil? then
113
+ look.addOne(Token::EPSILON)
114
+ return
115
+ elsif ctx.isEmpty() and addEOF
116
+ look.addOne(Token::EOF)
117
+ return
118
+ end
119
+ end
120
+
121
+ if s.kind_of? RuleStopState then
122
+ if ctx.nil? then
123
+ look.addOne(Token::EPSILON)
124
+ return
125
+ elsif ctx.isEmpty() and addEOF
126
+ look.addOne(Token::EOF)
127
+ return
128
+ end
129
+ if PredictionContext.EMPTY != ctx
130
+ # run thru all possible stack tops in ctx
131
+ for i in 0..ctx.length-1 do
132
+ returnState = self.atn.states[ctx.getReturnState(i)]
133
+ removed = calledRuleStack.member? returnState.ruleIndex
134
+ begin
135
+ calledRuleStack.delete(returnState.ruleIndex)
136
+ self._LOOK(returnState, stopState, ctx.getParent(i), look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
137
+ ensure
138
+ calledRuleStack.add(returnState.ruleIndex) if removed
139
+ end
140
+ end
141
+ return
142
+ end
143
+ end
144
+ for t in s.transitions do
145
+ if t.class == RuleTransition then
146
+ next if calledRuleStack.member? t.target.ruleIndex
147
+
148
+ newContext = SingletonPredictionContext.create(ctx, t.followState.stateNumber)
149
+
150
+ begin
151
+ calledRuleStack.add(t.target.ruleIndex)
152
+ self._LOOK(t.target, stopState, newContext, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
153
+ ensure
154
+ calledRuleStack.remove(t.target.ruleIndex)
155
+ end
156
+ elsif t.kind_of? AbstractPredicateTransition then
157
+ if seeThruPreds
158
+ self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
159
+ else
160
+ look.addOne(LL1Analyzer::HIT_PRED)
161
+ end
162
+ elsif t.isEpsilon
163
+ self._LOOK(t.target, stopState, ctx, look, lookBusy, calledRuleStack, seeThruPreds, addEOF)
164
+ elsif t.class == WildcardTransition
165
+ look.addRange( Token::MIN_USER_TOKEN_TYPE..self.atn.maxTokenType )
166
+ else
167
+ set = t.label
168
+ if not set.nil? then
169
+ if t.kind_of? NotSetTransition then
170
+ set = set.complement IntervalSet.of(Token::MIN_USER_TOKEN_TYPE, self.atn.maxTokenType)
171
+ end
172
+ look.addSet(set)
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,335 @@
1
+ # A lexer is recognizer that draws input symbols from a character stream.
2
+ # lexer grammars result in a subclass of self object. A Lexer object
3
+ # uses simplified match() and error recovery mechanisms in the interest
4
+ # of speed.
5
+
6
+ class Lexer < TokenSource
7
+
8
+ DEFAULT_MODE = 0
9
+ MORE = -2
10
+ SKIP = -3
11
+
12
+ DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL
13
+ HIDDEN = Token::HIDDEN_CHANNEL
14
+ MIN_CHAR_VALUE = "\u0000"
15
+ MAX_CHAR_VALUE = "\uFFFE"
16
+
17
+ attr_accessor :input, :factory, :tokenFactorySourcePair #, :interp
18
+ attr_accessor :token, :tokenStartCharIndex, :tokenStartLine, :tokenStartColumn
19
+ attr_accessor :hitEOF, :channel,:type, :modeStack, :mode, :text
20
+
21
+ def initialize(_input)
22
+ super()
23
+ @input = _input
24
+ @factory = CommonTokenFactory.DEFAULT
25
+ @tokenFactorySourcePair = [self, _input]
26
+
27
+ @interp = nil # child classes must populate this
28
+
29
+ # The goal of all lexer rules/methods is to create a token object.
30
+ # self is an instance variable as multiple rules may collaborate to
31
+ # create a single token. nextToken will return self object after
32
+ # matching lexer rule(s). If you subclass to allow multiple token
33
+ # emissions, then set self to the last token to be matched or
34
+ # something nonnull so that the auto token emit mechanism will not
35
+ # emit another token.
36
+ @token = nil
37
+
38
+ # What character index in the stream did the current token start at?
39
+ # Needed, for example, to get the text for current token. Set at
40
+ # the start of nextToken.
41
+ @tokenStartCharIndex = -1
42
+
43
+ # The line on which the first character of the token resides#/
44
+ @tokenStartLine = -1
45
+
46
+ # The character position of first character within the line#/
47
+ @tokenStartColumn = -1
48
+
49
+ # Once we see EOF on char stream, next token will be EOF.
50
+ # If you have DONE : EOF ; then you see DONE EOF.
51
+ @hitEOF = false
52
+
53
+ # The channel number for the current token#/
54
+ @channel = Token::DEFAULT_CHANNEL
55
+
56
+ # The token type for the current token#/
57
+ @type = Token::INVALID_TYPE
58
+
59
+ @modeStack = Array.new
60
+ @mode = Lexer::DEFAULT_MODE
61
+
62
+ # You can set the text for the current token to override what is in
63
+ # the input char buffer. Use setText() or can set self instance var.
64
+ #/
65
+ @text = nil
66
+ end
67
+
68
+ def reset
69
+ # wack Lexer state variables
70
+ if not self.input.nil? then
71
+ self.input.seek(0) # rewind the input
72
+ end
73
+ self.token = nil
74
+ self.type = Token::INVALID_TYPE
75
+ self.channel = Token::DEFAULT_CHANNEL
76
+ self.tokenStartCharIndex = -1
77
+ self.tokenStartColumn = -1
78
+ self.tokenStartLine = -1
79
+ self.text = nil
80
+
81
+ self.hitEOF = false
82
+ self.mode = Lexer::DEFAULT_MODE
83
+ self.modeStack = Array.new
84
+
85
+ self.interp.reset()
86
+ end
87
+
88
+ # Return a token from self source; i.e., match a token on the char
89
+ # stream.
90
+ def nextToken
91
+ if self.input.nil?
92
+ raise IllegalStateException.new("nextToken requires a non-null input stream.")
93
+ end
94
+
95
+ # Mark start location in char stream so unbuffered streams are
96
+ # guaranteed at least have text of current token
97
+ tokenStartMarker = self.input.mark()
98
+ begin
99
+ while true do
100
+ if self.hitEOF then
101
+ self.emitEOF()
102
+ return self.token
103
+ end
104
+ self.token = nil
105
+ self.channel = Token::DEFAULT_CHANNEL
106
+ self.tokenStartCharIndex = self.input.index
107
+ self.tokenStartColumn = self.interp.column
108
+ self.tokenStartLine = self.interp.line
109
+ self.text = nil
110
+ continueOuter = false
111
+ while true do
112
+ self.type = Token::INVALID_TYPE
113
+ ttype = Lexer::SKIP
114
+ begin
115
+ ttype = self.interp.match(self.input, self.mode)
116
+ rescue LexerNoViableAltException => e
117
+ self.notifyListeners(e) # report error
118
+ self.recover(e)
119
+ end
120
+ if self.input.LA(1)==Token::EOF then
121
+ self.hitEOF = true
122
+ end
123
+ if self.type == Token::INVALID_TYPE
124
+ self.type = ttype
125
+
126
+ end
127
+ if self.type == Lexer::SKIP
128
+ continueOuter = true
129
+ break
130
+ end
131
+ if self.type!= Lexer::MORE
132
+ break
133
+ end
134
+ end
135
+ next if continueOuter
136
+ self.emit() if self.token.nil?
137
+ return self.token
138
+ end
139
+ ensure
140
+ # make sure we release marker after match or
141
+ # unbuffered char stream will keep buffering
142
+ self.input.release(tokenStartMarker)
143
+ end
144
+ end
145
+
146
+ # Instruct the lexer to skip creating a token for current lexer rule
147
+ # and look for another token. nextToken() knows to keep looking when
148
+ # a lexer rule finishes with token set to SKIP_TOKEN. Recall that
149
+ # if token==null at end of any token rule, it creates one for you
150
+ # and emits it.
151
+ #/
152
+ def skip
153
+ self.type = Lexer::SKIP
154
+ end
155
+ def more
156
+ self.type = Lexer::MORE
157
+ end
158
+ def pushMode(m)
159
+ if self.interp.debug then
160
+ puts "pushMode #{m}"
161
+ end
162
+ self.modeStack.push(self.mode)
163
+ self.mode = m
164
+ end
165
+ def popMode
166
+ if self.modeStack.empty? then
167
+ raise Exception.new("Empty Stack")
168
+ end
169
+ if self.interp.debug then
170
+ puts "popMode back to #{self.modeStack.slice(0,self.modeStack.length-1)}"
171
+ end
172
+ self.mode = self.modeStack.pop()
173
+ return self.mode
174
+ end
175
+
176
+ # Set the char stream and reset the lexer#/
177
+ def inputStream
178
+ return self.input
179
+ end
180
+
181
+ def inputStream=(input)
182
+ self.input = nil
183
+ self.tokenFactorySourcePair = [self, nil]
184
+ self.reset()
185
+ self.input = input
186
+ self.tokenFactorySourcePair = [self, self.input]
187
+ end
188
+
189
+ def sourceName
190
+ return self.input.sourceName
191
+ end
192
+
193
+ # By default does not support multiple emits per nextToken invocation
194
+ # for efficiency reasons. Subclass and override self method, nextToken,
195
+ # and getToken (to push tokens into a list and pull from that list
196
+ # rather than a single variable as self implementation does).
197
+ #/
198
+ def emitToken(token)
199
+ self.token = token
200
+ end
201
+
202
+ # The standard method called to automatically emit a token at the
203
+ # outermost lexical rule. The token object should point into the
204
+ # char buffer start..stop. If there is a text override in 'text',
205
+ # use that to set the token's text. Override self method to emit
206
+ # custom Token objects or provide a new factory.
207
+ #/
208
+ def emit
209
+ t = self.factory.create(self.tokenFactorySourcePair, self.type, self.text, self.channel, self.tokenStartCharIndex,
210
+ self.getCharIndex()-1, self.tokenStartLine, self.tokenStartColumn)
211
+ self.emitToken(t)
212
+ return t
213
+ end
214
+
215
+ def emitEOF()
216
+ cpos = self.column
217
+ # The character position for EOF is one beyond the position of
218
+ # the previous token's last character
219
+ if not self.token.nil? then
220
+ n = self.token.stop - self.token.start + 1
221
+ cpos = self.token.column + n
222
+ end
223
+ eof = self.factory.create(self.tokenFactorySourcePair, Token::EOF, nil, Token::DEFAULT_CHANNEL, self.input.index,
224
+ self.input.index-1, self.line, cpos)
225
+ self.emitToken(eof)
226
+ return eof
227
+ end
228
+
229
+ def line
230
+ return self.interp.line
231
+ end
232
+
233
+ def line=(line)
234
+ self.interp.line = line
235
+ end
236
+
237
+ def column
238
+ return self.interp.column
239
+ end
240
+
241
+ def column=(column)
242
+ self.interp.column = column
243
+ end
244
+
245
+ # What is the index of the current character of lookahead?#/
246
+ def getCharIndex()
247
+ return self.input.index
248
+ end
249
+
250
+ # Return the text matched so far for the current token or any
251
+ # text override.
252
+ def text
253
+ if not @text.nil? then
254
+ @text
255
+ else
256
+ self.interp.getText(self.input)
257
+ end
258
+ end
259
+
260
+ # Set the complete text of self token; it wipes any previous
261
+ # changes to the text.
262
+ def text=(txt)
263
+ @text = txt
264
+ end
265
+
266
+ # Return a list of all Token objects in input char stream.
267
+ # Forces load of all tokens. Does not include EOF token.
268
+ #/
269
+ def getAllTokens
270
+ tokens = Array.new
271
+ t = self.nextToken()
272
+ while t.type!=Token::EOF do
273
+ tokens.push(t)
274
+ t = self.nextToken()
275
+ end
276
+ return tokens
277
+ end
278
+ def notifyListeners(e) # :LexerNoViableAltException):
279
+ start = self.tokenStartCharIndex
280
+ stop = self.input.index
281
+ text = self.input.getText(start, stop)
282
+ msg = "token recognition error at: '#{self.getErrorDisplay(text) }'"
283
+ listener = self.getErrorListenerDispatch()
284
+ listener.syntaxError(self, nil, self.tokenStartLine, self.tokenStartColumn, msg, e)
285
+ end
286
+
287
+ def getErrorDisplay(s)
288
+ StringIO.open do |buf|
289
+ s.chars.each{|c| buf.write(self.getErrorDisplayForChar(c)) }
290
+ return buf.string()
291
+ end
292
+ end
293
+ def getErrorDisplayForChar(c)
294
+ begin
295
+ cc = c[0].ord
296
+ rescue ArgumentError
297
+ cc = "\ufffd".ord
298
+ end
299
+ if cc==Token::EOF then
300
+ return "<EOF>"
301
+ elsif c == "\n"
302
+ return "\\n"
303
+ elsif c=="\t"
304
+ return "\\t"
305
+ elsif c=="\r"
306
+ return "\\r"
307
+ else
308
+ return c
309
+ end
310
+ end
311
+ def getCharErrorDisplay(c)
312
+ return "'" + self.getErrorDisplayForChar(c) + "'"
313
+ end
314
+
315
+ # Lexers can normally match any char in it's vocabulary after matching
316
+ # a token, so do the easy thing and just kill a character and hope
317
+ # it all works out. You can instead use the rule invocation stack
318
+ # to do sophisticated error recovery if you are in a fragment rule.
319
+ #/
320
+ def recover(re) # :RecognitionException):
321
+ if self.input.LA(1) != Token::EOF then
322
+ if re.kind_of? LexerNoViableAltException then
323
+ # skip a char and try again
324
+ self.interp.consume(self.input)
325
+ else
326
+ # TODO: Do we lose character or line position information?
327
+ self.input.consume()
328
+ end
329
+ end
330
+ end
331
+ def getRuleNames
332
+ self.ruleNames
333
+ end
334
+ end
335
+