antlr4 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
class RuleTagToken < Token
|
3
|
+
# Constructs a new instance of {@link RuleTagToken} with the specified rule
|
4
|
+
# name, bypass token type, and label.
|
5
|
+
#
|
6
|
+
# @param ruleName The name of the parser rule this rule tag matches.
|
7
|
+
# @param bypassTokenType The bypass token type assigned to the parser rule.
|
8
|
+
# @param label The label associated with the rule tag, or {@code null} if
|
9
|
+
# the rule tag is unlabeled.
|
10
|
+
#
|
11
|
+
# @exception IllegalArgumentException if {@code ruleName} is {@code null}
|
12
|
+
# or empty.
|
13
|
+
|
14
|
+
attr_accessor :label, :ruleName
|
15
|
+
|
16
|
+
def initialize(ruleName, bypassTokenType, label=nil)
|
17
|
+
if ruleName.nil? or ruleName.length ==0 then
|
18
|
+
raise Exception.new("ruleName cannot be null or empty.")
|
19
|
+
end
|
20
|
+
self.source = nil
|
21
|
+
self.type = bypassTokenType # token type of the token
|
22
|
+
self.channel = Token::DEFAULT_CHANNEL # The parser ignores everything not on DEFAULT_CHANNEL
|
23
|
+
self.start = -1 # optional; return -1 if not implemented.
|
24
|
+
self.stop = -1 # optional; return -1 if not implemented.
|
25
|
+
self.tokenIndex = -1 # from 0..n-1 of the token object in the input stream
|
26
|
+
self.line = 0 # line=1..n of the 1st character
|
27
|
+
self.column = -1 # beginning of the line at which it occurs, 0..n-1
|
28
|
+
self.label = label
|
29
|
+
self.ruleName = ruleName
|
30
|
+
@text = getText()
|
31
|
+
end
|
32
|
+
def getText()
|
33
|
+
if self.label.nil? then
|
34
|
+
"<#{@ruleName}>"
|
35
|
+
else
|
36
|
+
"<#{@label}:#{@ruleName}>"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# A {@link Token} object representing a token of a particular type; e.g.,
|
2
|
+
# {@code <ID>}. These tokens are created for {@link TagChunk} chunks where the
|
3
|
+
# tag corresponds to a lexer rule or token type.
|
4
|
+
#
|
5
|
+
|
6
|
+
class TokenTagToken < CommonToken
|
7
|
+
|
8
|
+
# Constructs a new instance of {@link TokenTagToken} with the specified
|
9
|
+
# token name, type, and label.
|
10
|
+
#
|
11
|
+
# @param tokenName The token name.
|
12
|
+
# @param type The token type.
|
13
|
+
# @param label The label associated with the token tag, or {@code null} if
|
14
|
+
# the token tag is unlabeled.
|
15
|
+
#
|
16
|
+
attr_accessor :tokenName, :label
|
17
|
+
def initialize(tokenName, type, label=nil)
|
18
|
+
super(type)
|
19
|
+
self.tokenName = tokenName
|
20
|
+
self.label = label
|
21
|
+
@text = getText()
|
22
|
+
end
|
23
|
+
# <p>The implementation for {@link TokenTagToken} returns the token tag
|
24
|
+
# formatted with {@code <} and {@code >} delimiters.</p>
|
25
|
+
def getText()
|
26
|
+
if self.label.nil?
|
27
|
+
return "<" + self.tokenName + ">"
|
28
|
+
else
|
29
|
+
return "<" + self.label + ":" + self.tokenName + ">"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
# <p>The implementation for {@link TokenTagToken} returns a string of the form
|
33
|
+
# {@code tokenName:type}.</p>
|
34
|
+
#
|
35
|
+
def to_s
|
36
|
+
"#{self.tokenName}:#{self.class}"
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
# The basic notion of a tree has a parent, a payload, and a list of children.
|
2
|
+
# It is the most abstract interface for all the trees used by ANTLR.
|
3
|
+
#
|
4
|
+
|
5
|
+
class Tree
|
6
|
+
end
|
7
|
+
|
8
|
+
class SyntaxTree < Tree
|
9
|
+
end
|
10
|
+
|
11
|
+
class ParseTree < SyntaxTree
|
12
|
+
end
|
13
|
+
|
14
|
+
class RuleNode < ParseTree
|
15
|
+
end
|
16
|
+
|
17
|
+
class TerminalNode < ParseTree
|
18
|
+
end
|
19
|
+
|
20
|
+
class ErrorNode < TerminalNode
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
# This interface defines the basic notion of a parse tree visitor. Generated
|
25
|
+
# visitors implement this interface and the {@code XVisitor} interface for
|
26
|
+
# grammar {@code X}.
|
27
|
+
class ParseTreeVisitor
|
28
|
+
# Visit a parse tree, and return a user-defined result of the operation.
|
29
|
+
#
|
30
|
+
# @param tree The {@link ParseTree} to visit.
|
31
|
+
# @return The result of visiting the parse tree.
|
32
|
+
def visit(tree) # tree:ParseTree
|
33
|
+
end
|
34
|
+
# Visit the children of a node, and return a user-defined result of the
|
35
|
+
# operation.
|
36
|
+
# @param node The {@link RuleNode} whose children should be visited.
|
37
|
+
# @return The result of visiting the children of the node.
|
38
|
+
def visitChildren(node) # node:RuleNode
|
39
|
+
end
|
40
|
+
|
41
|
+
# Visit a terminal node, and return a user-defined result of the operation.
|
42
|
+
#
|
43
|
+
# @param node The {@link TerminalNode} to visit.
|
44
|
+
# @return The result of visiting the node.
|
45
|
+
def visitTerminal(node) # node:TerminalNode
|
46
|
+
end
|
47
|
+
#Visit an error node, and return a user-defined result of the operation.
|
48
|
+
#
|
49
|
+
# @param node The {@link ErrorNode} to visit.
|
50
|
+
# @return The result of visiting the node.
|
51
|
+
def visitErrorNode(node) # node:ErrorNode
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class ParseTreeListener
|
56
|
+
def visitTerminal(node)
|
57
|
+
end
|
58
|
+
|
59
|
+
def visitErrorNode(node)
|
60
|
+
end
|
61
|
+
|
62
|
+
def enterEveryRule(ctx)
|
63
|
+
end
|
64
|
+
|
65
|
+
def exitEveryRule(ctx)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
module NodeImpl
|
70
|
+
|
71
|
+
def self.included(klass)
|
72
|
+
klass.send(:include, NodeImpl::Methods)
|
73
|
+
# klass.send(:extend, NodeImpl::Methods)
|
74
|
+
# klass.send(:extend, NodeImpl::ClassMethods)
|
75
|
+
end
|
76
|
+
module Methods
|
77
|
+
def initialize(symbol)
|
78
|
+
@parentCtx = nil
|
79
|
+
@symbol = symbol
|
80
|
+
end
|
81
|
+
def symbol
|
82
|
+
@symbol
|
83
|
+
end
|
84
|
+
def symbol=(value)
|
85
|
+
@symbol = value
|
86
|
+
end
|
87
|
+
def parentCtx
|
88
|
+
@parentCtx
|
89
|
+
end
|
90
|
+
def parentCtx=(value)
|
91
|
+
@parentCtx = value
|
92
|
+
end
|
93
|
+
|
94
|
+
# def []=(key, value)
|
95
|
+
# super(key, value)
|
96
|
+
# end
|
97
|
+
|
98
|
+
def getChild(i)
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
|
102
|
+
def getSymbol()
|
103
|
+
self.symbol
|
104
|
+
end
|
105
|
+
|
106
|
+
def getParent()
|
107
|
+
self.parentCtx
|
108
|
+
end
|
109
|
+
|
110
|
+
def getPayload()
|
111
|
+
return self.symbol
|
112
|
+
end
|
113
|
+
|
114
|
+
def getSourceInterval()
|
115
|
+
return Antlr4::INVALID_INTERVAL if self.symbol.nil?
|
116
|
+
tokenIndex = self.symbol.tokenIndex
|
117
|
+
return [tokenIndex, tokenIndex]
|
118
|
+
end
|
119
|
+
|
120
|
+
def getChildCount()
|
121
|
+
return 0
|
122
|
+
end
|
123
|
+
|
124
|
+
def accept(visitor)
|
125
|
+
return visitor.visitTerminal(self)
|
126
|
+
end
|
127
|
+
|
128
|
+
def text
|
129
|
+
return self.symbol.text
|
130
|
+
end
|
131
|
+
def getText()
|
132
|
+
return self.symbol.text
|
133
|
+
end
|
134
|
+
|
135
|
+
def to_s
|
136
|
+
if self.symbol.type == Token::EOF then
|
137
|
+
"<EOF>"
|
138
|
+
else
|
139
|
+
self.symbol.text
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
class TerminalNodeImpl < TerminalNode
|
146
|
+
include NodeImpl
|
147
|
+
|
148
|
+
end
|
149
|
+
# Represents a token that was consumed during resynchronization
|
150
|
+
# rather than during a valid match operation. For example,
|
151
|
+
# we will create this kind of a node during single token insertion
|
152
|
+
# and deletion as well as during "consume until error recovery set"
|
153
|
+
# upon no viable alternative exceptions.
|
154
|
+
|
155
|
+
class ErrorNodeImpl < ErrorNode
|
156
|
+
include NodeImpl
|
157
|
+
|
158
|
+
def accept(visitor)
|
159
|
+
return visitor.visitErrorNode(self)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
class ParseTreeWalker
|
165
|
+
# ParseTreeWalker.DEFAULT = ParseTreeWalker()
|
166
|
+
@@default = nil
|
167
|
+
def self.DEFAULT
|
168
|
+
if @@default.nil?
|
169
|
+
@@default = new
|
170
|
+
end
|
171
|
+
@@default
|
172
|
+
end
|
173
|
+
def walk(listener, t)
|
174
|
+
if t.kind_of? ErrorNode then
|
175
|
+
listener.visitErrorNode(t)
|
176
|
+
return
|
177
|
+
elsif t.kind_of? TerminalNode then
|
178
|
+
listener.visitTerminal(t)
|
179
|
+
return
|
180
|
+
end
|
181
|
+
self.enterRule(listener, t)
|
182
|
+
for child in t.getChildren()
|
183
|
+
self.walk(listener, child)
|
184
|
+
end
|
185
|
+
self.exitRule(listener, t)
|
186
|
+
end
|
187
|
+
#
|
188
|
+
# The discovery of a rule node, involves sending two events: the generic
|
189
|
+
# {@link ParseTreeListener#enterEveryRule} and a
|
190
|
+
# {@link RuleContext}-specific event. First we trigger the generic and then
|
191
|
+
# the rule specific. We to them in reverse order upon finishing the node.
|
192
|
+
#
|
193
|
+
def enterRule(listener, r)
|
194
|
+
ctx = r.getRuleContext()
|
195
|
+
listener.enterEveryRule(ctx)
|
196
|
+
ctx.enterRule(listener)
|
197
|
+
end
|
198
|
+
|
199
|
+
def exitRule(listener, r)
|
200
|
+
ctx = r.getRuleContext()
|
201
|
+
ctx.exitRule(listener)
|
202
|
+
listener.exitEveryRule(ctx)
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# A set of utility routines useful for all kinds of ANTLR trees.#
|
2
|
+
#from io import StringIO
|
3
|
+
#from antlr4.Token import Token
|
4
|
+
#from antlr4.Utils import escapeWhitespace
|
5
|
+
#from antlr4.tree.Tree import RuleNode, ErrorNode, TerminalNode, Tree, ParseTree
|
6
|
+
|
7
|
+
class Trees
|
8
|
+
|
9
|
+
# Print out a whole tree in LISP form. {@link #getNodeText} is used on the
|
10
|
+
# node payloads to get the text for the nodes. Detect
|
11
|
+
# parse trees and extract data appropriately.
|
12
|
+
def self.toStringTree(t, ruleNames=nil, recog=nil)
|
13
|
+
if not recog.nil? then
|
14
|
+
ruleNames = recog.ruleNames
|
15
|
+
end
|
16
|
+
s = getNodeText(t, ruleNames).escapeWhitespace(false)
|
17
|
+
if t.getChildCount()==0 then
|
18
|
+
return s
|
19
|
+
end
|
20
|
+
StringIO.open do |buf|
|
21
|
+
buf.write("(")
|
22
|
+
buf.write(s)
|
23
|
+
buf.write(' ')
|
24
|
+
for i in 0..t.getChildCount()-1 do
|
25
|
+
if i > 0 then
|
26
|
+
buf.write(' ')
|
27
|
+
end
|
28
|
+
buf.write(toStringTree(t.getChild(i), ruleNames))
|
29
|
+
end
|
30
|
+
buf.write(")")
|
31
|
+
return buf.string()
|
32
|
+
end
|
33
|
+
end
|
34
|
+
def self.getNodeText(t, ruleNames=nil, recog=nil)
|
35
|
+
if not recog.nil? then
|
36
|
+
ruleNames = recog.ruleNames
|
37
|
+
end
|
38
|
+
if not ruleNames.nil? then
|
39
|
+
if t.kind_of? RuleNode then
|
40
|
+
return ruleNames[t.getRuleContext().getRuleIndex()]
|
41
|
+
elsif t.kind_of? ErrorNode then
|
42
|
+
return t.to_s
|
43
|
+
elsif t.kind_of? TerminalNode then
|
44
|
+
if not t.symbol.nil? then
|
45
|
+
return t.symbol.text
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
# no recog for rule names
|
50
|
+
payload = t.getPayload()
|
51
|
+
if payload.kind_of? Token then
|
52
|
+
return payload.text
|
53
|
+
end
|
54
|
+
return t.getPayload().to_s
|
55
|
+
end
|
56
|
+
|
57
|
+
# Return ordered list of all children of this node
|
58
|
+
def self.getChildren(t)
|
59
|
+
return (0 .. t.getChildCount()-1).map{|i| t.getChild(i) }
|
60
|
+
end
|
61
|
+
|
62
|
+
# Return a list of all ancestors of this node. The first node of
|
63
|
+
# list is the root and the last is the parent of this node.
|
64
|
+
#
|
65
|
+
def self.getAncestors(t)
|
66
|
+
ancestors = []
|
67
|
+
t = t.getParent()
|
68
|
+
while not t.nil? do
|
69
|
+
ancestors.unshift(t) # insert at start
|
70
|
+
t = t.getParent()
|
71
|
+
end
|
72
|
+
return ancestors
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.findAllTokenNodes(t, ttype)
|
76
|
+
return findAllNodes(t, ttype, true)
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.findAllRuleNodes(t, ruleIndex)
|
80
|
+
return findAllNodes(t, ruleIndex, false)
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.findAllNodes(cls, t, index, findTokens)
|
84
|
+
nodes = Array.new
|
85
|
+
_findAllNodes(t, index, findTokens, nodes)
|
86
|
+
return nodes
|
87
|
+
end
|
88
|
+
|
89
|
+
def self._findAllNodes(t, index, findTokens, nodes)
|
90
|
+
#from antlr4.ParserRuleContext import ParserRuleContext
|
91
|
+
# check this node (the root) first
|
92
|
+
if findTokens and t.kind_of? TerminalNode then
|
93
|
+
nodes.push(t) if t.symbol.type==index
|
94
|
+
elsif not findTokens and t.kind_of? ParserRuleContext then
|
95
|
+
nodes.push(t) if t.ruleIndex == index
|
96
|
+
end
|
97
|
+
# check children
|
98
|
+
for i in 0 .. t.getChildCount()-1
|
99
|
+
self._findAllNodes(t.getChild(i), index, findTokens, nodes)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.descendants(t)
|
104
|
+
nodes = Array.new
|
105
|
+
nodes.push(t)
|
106
|
+
for i in 0..t.getChildCount()-1
|
107
|
+
nodes.concat(self.descendants(t.getChild(i)))
|
108
|
+
end
|
109
|
+
return nodes
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,354 @@
|
|
1
|
+
#
|
2
|
+
# Represent a subset of XPath XML path syntax for use in identifying nodes in
|
3
|
+
# parse trees.
|
4
|
+
#
|
5
|
+
# <p>
|
6
|
+
# Split path into words and separators {@code /} and {@code //} via ANTLR
|
7
|
+
# itself then walk path elements from left to right. At each separator-word
|
8
|
+
# pair, find set of nodes. Next stage uses those as work list.</p>
|
9
|
+
#
|
10
|
+
# <p>
|
11
|
+
# The basic interface is
|
12
|
+
# {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
|
13
|
+
# But that is just shorthand for:</p>
|
14
|
+
#
|
15
|
+
# <pre>
|
16
|
+
# {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
|
17
|
+
# return p.{@link #evaluate evaluate}(tree);
|
18
|
+
# </pre>
|
19
|
+
#
|
20
|
+
# <p>
|
21
|
+
# See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
|
22
|
+
# allows operators:</p>
|
23
|
+
#
|
24
|
+
# <dl>
|
25
|
+
# <dt>/</dt> <dd>root</dd>
|
26
|
+
# <dt>//</dt> <dd>anywhere</dd>
|
27
|
+
# <dt>!</dt> <dd>invert; this must appear directly after root or anywhere
|
28
|
+
# operator</dd>
|
29
|
+
# </dl>
|
30
|
+
#
|
31
|
+
# <p>
|
32
|
+
# and path elements:</p>
|
33
|
+
#
|
34
|
+
# <dl>
|
35
|
+
# <dt>ID</dt> <dd>token name</dd>
|
36
|
+
# <dt>'string'</dt> <dd>any string literal token from the grammar</dd>
|
37
|
+
# <dt>expr</dt> <dd>rule name</dd>
|
38
|
+
# <dt>*</dt> <dd>wildcard matching any node</dd>
|
39
|
+
# </dl>
|
40
|
+
#
|
41
|
+
# <p>
|
42
|
+
# Whitespace is not allowed.</p>
|
43
|
+
#
|
44
|
+
|
45
|
+
class XPathLexer < Lexer
|
46
|
+
include JavaSymbols
|
47
|
+
|
48
|
+
@@serializedATN = \
|
49
|
+
"\3\uacf5\uee8c\u4f5d\u8b0d\u4a45\u78bd\u1b2f\u3378\2\n\64\b\1\4\2\t\2" + \
|
50
|
+
"\4\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b\t\b\4\t\t\t\3\2\3\2\3\2\3" + \
|
51
|
+
"\3\3\3\3\4\3\4\3\5\3\5\3\6\3\6\7\6\37\n\6\f\6\16\6\"\13\6\3\6\3\6\3\7" + \
|
52
|
+
"\3\7\5\7(\n\7\3\b\3\b\3\t\3\t\7\t.\n\t\f\t\16\t\61\13\t\3\t\3\t\3/\n\3" + \
|
53
|
+
"\5\1\5\6\1\7\7\1\t\b\1\13\t\2\r\2\1\17\2\1\21\n\1\3\2\4\7\2\62;aa\u00b9" + \
|
54
|
+
"\u00b9\u0302\u0371\u2041\u2042\17\2C\\c|\u00c2\u00d8\u00da\u00f8\u00fa" + \
|
55
|
+
"\u0301\u0372\u037f\u0381\u2001\u200e\u200f\u2072\u2191\u2c02\u2ff1\u3003" + \
|
56
|
+
"\ud801\uf902\ufdd1\ufdf2\uffff\64\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2" + \
|
57
|
+
"\2\t\3\2\2\2\2\13\3\2\2\2\2\21\3\2\2\2\3\23\3\2\2\2\5\26\3\2\2\2\7\30" + \
|
58
|
+
"\3\2\2\2\t\32\3\2\2\2\13\34\3\2\2\2\r\'\3\2\2\2\17)\3\2\2\2\21+\3\2\2" + \
|
59
|
+
"\2\23\24\7\61\2\2\24\25\7\61\2\2\25\4\3\2\2\2\26\27\7\61\2\2\27\6\3\2" + \
|
60
|
+
"\2\2\30\31\7,\2\2\31\b\3\2\2\2\32\33\7#\2\2\33\n\3\2\2\2\34 \5\17\b\2" + \
|
61
|
+
"\35\37\5\r\7\2\36\35\3\2\2\2\37\"\3\2\2\2 \36\3\2\2\2 !\3\2\2\2!#\3\2" + \
|
62
|
+
"\2\2\" \3\2\2\2#$\b\6\2\2$\f\3\2\2\2%(\5\17\b\2&(\t\2\2\2\'%\3\2\2\2\'" + \
|
63
|
+
"&\3\2\2\2(\16\3\2\2\2)*\t\3\2\2*\20\3\2\2\2+/\7)\2\2,.\13\2\2\2-,\3\2" + \
|
64
|
+
"\2\2.\61\3\2\2\2/\60\3\2\2\2/-\3\2\2\2\60\62\3\2\2\2\61/\3\2\2\2\62\63" + \
|
65
|
+
"\7)\2\2\63\22\3\2\2\2\6\2 \'/"
|
66
|
+
|
67
|
+
TOKEN_REF=1
|
68
|
+
RULE_REF=2
|
69
|
+
ANYWHERE=3
|
70
|
+
ROOT=4
|
71
|
+
WILDCARD=5
|
72
|
+
BANG=6
|
73
|
+
ID=7
|
74
|
+
STRING=8
|
75
|
+
|
76
|
+
|
77
|
+
def initialize(input)
|
78
|
+
super(input)
|
79
|
+
self.modeNames = [ "DEFAULT_MODE" ]
|
80
|
+
self.tokenNames = ["<INVALID>", "TOKEN_REF", "RULE_REF", "'//'", "'/'", "'*'", "'!'", "ID", "STRING" ]
|
81
|
+
self.ruleNames = [ "ANYWHERE", "ROOT", "WILDCARD", "BANG", "ID", "NameChar", "NameStartChar", "STRING" ]
|
82
|
+
@ATN = ATNDeserializer.new.deserialize(@@serializedATN)
|
83
|
+
@interp = LexerATNSimulator.new(@ATN, @decisionToDFA, @sharedContextCache)
|
84
|
+
@grammarFileName = "XPathLexer.g4"
|
85
|
+
@decisionToDFA = @ATN.decisionToState.map{|s| DFA.new(s) }
|
86
|
+
@sharedContextCache = PredictionContextCache()
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def action(localctx, ruleIndex, actionIndex)
|
91
|
+
if ruleIndex==4 then
|
92
|
+
self.ID_action(localctx, actionIndex)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def ID_action(localctx, actionIndex)
|
97
|
+
if actionIndex==0 then
|
98
|
+
if self.text[0].is_uppercase?
|
99
|
+
self.type = TOKEN_REF
|
100
|
+
else
|
101
|
+
self.type = RULE_REF
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
class XPath
|
108
|
+
|
109
|
+
WILDCARD = "*" # word not operator/separator
|
110
|
+
NOT = "!" # word for invert operator
|
111
|
+
def self.WILDCARD
|
112
|
+
XPath::WILDCARD
|
113
|
+
end
|
114
|
+
def self.NOT
|
115
|
+
XPath::NOT
|
116
|
+
end
|
117
|
+
|
118
|
+
def initialize(parser, path)
|
119
|
+
self.parser = parser
|
120
|
+
self.path = path
|
121
|
+
self.elements = self.split(path)
|
122
|
+
end
|
123
|
+
def recover(e)
|
124
|
+
raise e
|
125
|
+
end
|
126
|
+
def split(path)
|
127
|
+
input = InputStream.new(path)
|
128
|
+
lexer = XPathLexer.new(input)
|
129
|
+
lexer.recover = recover
|
130
|
+
lexer.removeErrorListeners()
|
131
|
+
lexer.addErrorListener(ErrorListener.new()) # XPathErrorListener does no more
|
132
|
+
tokenStream = CommonTokenStream.new(lexer)
|
133
|
+
begin
|
134
|
+
tokenStream.fill()
|
135
|
+
rescue LexerNoViableAltException => e
|
136
|
+
pos = lexer.getColumn()
|
137
|
+
msg = "Invalid tokens or characters at index #{pos} in path '#{path}'"
|
138
|
+
ex = Exception.new(msg)
|
139
|
+
ex.set_backtrace(e.backtrace)
|
140
|
+
raise ex
|
141
|
+
end
|
142
|
+
|
143
|
+
tokens = tokenStream.getTokens()
|
144
|
+
elements = Array.new
|
145
|
+
n = tokens.length
|
146
|
+
i=0
|
147
|
+
while i < n do
|
148
|
+
el = tokens[i]
|
149
|
+
next_token = nil
|
150
|
+
if [XPathLexer::ROOT, XPathLexer::ANYWHERE].member? el.type then
|
151
|
+
anywhere = el.type == XPathLexer::ANYWHERE
|
152
|
+
i = i + 1
|
153
|
+
next_token = tokens[i]
|
154
|
+
invert = next_token.type==XPathLexer::BANG
|
155
|
+
if invert then
|
156
|
+
i = i + 1
|
157
|
+
next_token = tokens[i]
|
158
|
+
end
|
159
|
+
pathElement = self.getXPathElement(next_token, anywhere)
|
160
|
+
pathElement.invert = invert
|
161
|
+
elements.push(pathElement)
|
162
|
+
i = i + 1
|
163
|
+
elsif [XPathLexer::TOKEN_REF, XPathLexer::RULE_REF, XPathLexer::WILDCARD].member? el.type then
|
164
|
+
elements.push( self.getXPathElement(el, false) )
|
165
|
+
i = i + 1
|
166
|
+
elsif el.type==Token::EOF then
|
167
|
+
break
|
168
|
+
else
|
169
|
+
raise Exception.new("Unknown path element #{el}")
|
170
|
+
end
|
171
|
+
end
|
172
|
+
return elements
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Convert word like {@code#} or {@code ID} or {@code expr} to a path
|
177
|
+
# element. {@code anywhere} is {@code true} if {@code //} precedes the
|
178
|
+
# word.
|
179
|
+
#
|
180
|
+
def getXPathElement(wordToken, anywhere)
|
181
|
+
if wordToken.type==Token::EOF then
|
182
|
+
raise Exception.new("Missing path element at end of path")
|
183
|
+
end
|
184
|
+
word = wordToken.text
|
185
|
+
ttype = self.parser.getTokenType(word)
|
186
|
+
ruleIndex = self.parser.getRuleIndex(word)
|
187
|
+
|
188
|
+
if wordToken.type==XPathLexer::WILDCARD then
|
189
|
+
if anywhere then
|
190
|
+
return XPathWildcardAnywhereElement.new()
|
191
|
+
else
|
192
|
+
return XPathWildcardElement.new()
|
193
|
+
end
|
194
|
+
elsif [XPathLexer::TOKEN_REF, XPathLexer::STRING].member? wordToken.type
|
195
|
+
if ttype==Token::INVALID_TYPE then
|
196
|
+
raise Exception.new("#{word} at index #{wordToken.startIndex} isn't a valid token name")
|
197
|
+
end
|
198
|
+
if anywhere then
|
199
|
+
return XPathTokenAnywhereElement.new(word, ttype)
|
200
|
+
else
|
201
|
+
return XPathTokenElement.new(word, ttype)
|
202
|
+
end
|
203
|
+
else
|
204
|
+
if ruleIndex==-1 then
|
205
|
+
raise Exception( "#{word} at index #{wordToken.getStartIndex()} isn't a valid rule name")
|
206
|
+
end
|
207
|
+
if anywhere
|
208
|
+
return XPathRuleAnywhereElement.new(word, ruleIndex)
|
209
|
+
else
|
210
|
+
return XPathRuleElement.new(word, ruleIndex)
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
def findAll(tree, xpath, parser)
|
216
|
+
p = XPath.new(parser, xpath)
|
217
|
+
return p.evaluate(tree)
|
218
|
+
end
|
219
|
+
#
|
220
|
+
# Return a list of all nodes starting at {@code t} as root that satisfy the
|
221
|
+
# path. The root {@code /} is relative to the node passed to
|
222
|
+
# {@link #evaluate}.
|
223
|
+
#
|
224
|
+
def evaluate(t)
|
225
|
+
dummyRoot = ParserRuleContext.new()
|
226
|
+
dummyRoot.children = [t] # don't set t's parent.
|
227
|
+
work = [dummyRoot]
|
228
|
+
for i in (0 .. (self.elements.length-1)) do
|
229
|
+
next_token = Set.new()
|
230
|
+
for node in work do
|
231
|
+
if node.children.length > 0 then
|
232
|
+
# only try to match next element if it has children
|
233
|
+
# e.g., //func/*/stat might have a token node for which
|
234
|
+
# we can't go looking for stat nodes.
|
235
|
+
matching = self.elements[i].evaluate(node)
|
236
|
+
next_token.union(matching)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
i = i + 1
|
240
|
+
work = next_token
|
241
|
+
end
|
242
|
+
return work
|
243
|
+
end
|
244
|
+
end
|
245
|
+
class XPathElement
|
246
|
+
|
247
|
+
attr_accessor :nodeNode, :invert
|
248
|
+
def initialize(nodename)
|
249
|
+
self.nodeName = nodename
|
250
|
+
self.invert = false
|
251
|
+
end
|
252
|
+
|
253
|
+
def to_s
|
254
|
+
c = "!" if self.invert
|
255
|
+
return "#{self.class.to_s}[#{c}#{self.nodeName}]"
|
256
|
+
end
|
257
|
+
end
|
258
|
+
#
|
259
|
+
# Either {@code ID} at start of path or {@code ...//ID} in middle of path.
|
260
|
+
#
|
261
|
+
class XPathRuleAnywhereElement < XPathElement
|
262
|
+
|
263
|
+
attr_accessor :ruleIndex
|
264
|
+
def initialize(rule_name, rule_index)
|
265
|
+
super(rule_name)
|
266
|
+
self.ruleIndex = rule_index
|
267
|
+
end
|
268
|
+
|
269
|
+
def evaluate(t)
|
270
|
+
return Trees.findAllRuleNodes(t, self.ruleIndex)
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
class XPathRuleElement < XPathRuleAnywhereElement
|
275
|
+
|
276
|
+
def initialize(rulename, ruleindex)
|
277
|
+
super(rulename, ruleindex)
|
278
|
+
end
|
279
|
+
def evaluate(t)
|
280
|
+
# return all children of t that match nodeName
|
281
|
+
nodes = []
|
282
|
+
for c in Trees.getChildren(t) do
|
283
|
+
if c.kind_of? ParserRuleContext then
|
284
|
+
if (c.ruleIndex == self.ruleIndex ) == (not self.invert) then
|
285
|
+
nodes.push(c)
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
return nodes
|
290
|
+
end
|
291
|
+
end
|
292
|
+
class XPathTokenAnywhereElement < XPathElement
|
293
|
+
|
294
|
+
attr_accessor :tokenType
|
295
|
+
def initialize(rulename, tokentype)
|
296
|
+
super(rulename)
|
297
|
+
self.tokenType = tokentype
|
298
|
+
end
|
299
|
+
|
300
|
+
def evaluate(t)
|
301
|
+
return Trees.findAllTokenNodes(t, self.tokenType)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
class XPathTokenElement < XPathTokenAnywhereElement
|
306
|
+
|
307
|
+
def initialize(rulename, tokentype)
|
308
|
+
super(rulename, tokentype)
|
309
|
+
end
|
310
|
+
|
311
|
+
def evaluate(t)
|
312
|
+
# return all children of t that match nodeName
|
313
|
+
nodes = []
|
314
|
+
for c in Trees.getChildren(t) do
|
315
|
+
if c.kind_of? TerminalNode then
|
316
|
+
if (c.symbol.type == self.tokenType ) == (not self.invert) then
|
317
|
+
nodes.push(c)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|
321
|
+
return nodes
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
class XPathWildcardAnywhereElement < XPathElement
|
326
|
+
|
327
|
+
def initialize()
|
328
|
+
super(XPath::WILDCARD)
|
329
|
+
end
|
330
|
+
|
331
|
+
def evaluate(t)
|
332
|
+
if self.invert then
|
333
|
+
return [] # !* is weird but valid (empty)
|
334
|
+
else
|
335
|
+
return Trees.descendants(t)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
class XPathWildcardElement < XPathElement
|
341
|
+
|
342
|
+
def initialize()
|
343
|
+
super(XPath::WILDCARD)
|
344
|
+
end
|
345
|
+
|
346
|
+
|
347
|
+
def evaluate(t)
|
348
|
+
if self.invert then
|
349
|
+
return [] # !* is weird but valid (empty)
|
350
|
+
else
|
351
|
+
return Trees.getChildren(t)
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|