antlr4 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
class Chunk
|
2
|
+
end
|
3
|
+
|
4
|
+
class TagChunk < Chunk
|
5
|
+
|
6
|
+
attr_accessor :tag, :label
|
7
|
+
def initialize(tag, label=nil)
|
8
|
+
self.tag = tag
|
9
|
+
self.label = label
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
if self.label.nil?
|
14
|
+
self.tag
|
15
|
+
else
|
16
|
+
"#{self.label}:#{self.tag}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
class TextChunk < Chunk
|
21
|
+
|
22
|
+
attr_accessor :text
|
23
|
+
def initialize(text)
|
24
|
+
self.text = text
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
"'#{ self.text }'"
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# Represents the result of matching a {@link ParseTree} against a tree pattern.
|
2
|
+
|
3
|
+
class ParseTreeMatch
|
4
|
+
# Constructs a new instance of {@link ParseTreeMatch} from the specified
|
5
|
+
# parse tree and pattern.
|
6
|
+
#
|
7
|
+
# @param tree The parse tree to match against the pattern.
|
8
|
+
# @param pattern The parse tree pattern.
|
9
|
+
# @param labels A mapping from label names to collections of
|
10
|
+
# {@link ParseTree} objects located by the tree pattern matching process.
|
11
|
+
# @param mismatchedNode The first node which failed to match the tree
|
12
|
+
# pattern during the matching process.
|
13
|
+
#
|
14
|
+
# @exception IllegalArgumentException if {@code tree} is {@code null}
|
15
|
+
# @exception IllegalArgumentException if {@code pattern} is {@code null}
|
16
|
+
# @exception IllegalArgumentException if {@code labels} is {@code null}
|
17
|
+
#
|
18
|
+
attr_accessor :tree, :pattern, :labels, :mismatchedNode
|
19
|
+
def initialize(tree, pattern, labels, mismatchedNode)
|
20
|
+
raise Exception.new("tree cannot be null") if tree.nil?
|
21
|
+
raise Exception.new("pattern cannot be null") if pattern.nil?
|
22
|
+
raise Exception.new("labels cannot be null") if labels.nil?
|
23
|
+
self.tree = tree
|
24
|
+
self.pattern = pattern
|
25
|
+
self.labels = labels
|
26
|
+
self.mismatchedNode = mismatchedNode
|
27
|
+
end
|
28
|
+
#
|
29
|
+
# Get the last node associated with a specific {@code label}.
|
30
|
+
#
|
31
|
+
# <p>For example, for pattern {@code <id:ID>}, {@code get("id")} returns the
|
32
|
+
# node matched for that {@code ID}. If more than one node
|
33
|
+
# matched the specified label, only the last is returned. If there is
|
34
|
+
# no node associated with the label, this returns {@code null}.</p>
|
35
|
+
#
|
36
|
+
# <p>Pattern tags like {@code <ID>} and {@code <expr>} without labels are
|
37
|
+
# considered to be labeled with {@code ID} and {@code expr}, respectively.</p>
|
38
|
+
#
|
39
|
+
# @param label The label to check.
|
40
|
+
#
|
41
|
+
# @return The last {@link ParseTree} to match a tag with the specified
|
42
|
+
# label, or {@code null} if no parse tree matched a tag with the label.
|
43
|
+
#
|
44
|
+
def get(label)
|
45
|
+
parseTrees = self.labels.get(label, nil)
|
46
|
+
if parseTrees.nil? or parseTrees.empty? then
|
47
|
+
return nil
|
48
|
+
else
|
49
|
+
return parseTrees[-1]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
#
|
53
|
+
# Return all nodes matching a rule or token tag with the specified label.
|
54
|
+
#
|
55
|
+
# <p>If the {@code label} is the name of a parser rule or token in the
|
56
|
+
# grammar, the resulting list will contain both the parse trees matching
|
57
|
+
# rule or tags explicitly labeled with the label and the complete set of
|
58
|
+
# parse trees matching the labeled and unlabeled tags in the pattern for
|
59
|
+
# the parser rule or token. For example, if {@code label} is {@code "foo"},
|
60
|
+
# the result will contain <em>all</em> of the following.</p>
|
61
|
+
#
|
62
|
+
# <ul>
|
63
|
+
# <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and
|
64
|
+
# {@code <foo:AnyTokenName>}.</li>
|
65
|
+
# <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li>
|
66
|
+
# <li>Parse tree nodes matching tags of the form {@code <foo>}.</li>
|
67
|
+
# </ul>
|
68
|
+
#
|
69
|
+
# @param label The label.
|
70
|
+
#
|
71
|
+
# @return A collection of all {@link ParseTree} nodes matching tags with
|
72
|
+
# the specified {@code label}. If no nodes matched the label, an empty list
|
73
|
+
# is returned.
|
74
|
+
#
|
75
|
+
def getAll(label)
|
76
|
+
self.labels.get(label, Array.new)
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Gets a value indicating whether the match operation succeeded.
|
81
|
+
#
|
82
|
+
# @return {@code true} if the match operation succeeded; otherwise,
|
83
|
+
# {@code false}.
|
84
|
+
#
|
85
|
+
def succeeded
|
86
|
+
return self.mismatchedNode.nil?
|
87
|
+
end
|
88
|
+
#
|
89
|
+
# {@inheritDoc}
|
90
|
+
#
|
91
|
+
def to_s
|
92
|
+
StringIO.open do |buf|
|
93
|
+
buf.write("Match ")
|
94
|
+
if self.succeeded()
|
95
|
+
buf.write("succeeded")
|
96
|
+
else
|
97
|
+
buf.write("failed")
|
98
|
+
end
|
99
|
+
buf.write("; found ")
|
100
|
+
buf.write(self.labels.length.to_s)
|
101
|
+
buf.write(" labels")
|
102
|
+
return buf.string
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# A pattern like {@code <ID> = <expr>;} converted to a {@link ParseTree} by
|
2
|
+
# {@link ParseTreePatternMatcher#compile(String, int)}.
|
3
|
+
#
|
4
|
+
#from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
|
5
|
+
#from antlr4.tree.Tree import ParseTree
|
6
|
+
#from antlr4.xpath.XPath import XPath
|
7
|
+
|
8
|
+
|
9
|
+
class ParseTreePattern
|
10
|
+
|
11
|
+
# Construct a new instance of the {@link ParseTreePattern} class.
|
12
|
+
#
|
13
|
+
# @param matcher The {@link ParseTreePatternMatcher} which created this
|
14
|
+
# tree pattern.
|
15
|
+
# @param pattern The tree pattern in concrete syntax form.
|
16
|
+
# @param patternRuleIndex The parser rule which serves as the root of the
|
17
|
+
# tree pattern.
|
18
|
+
# @param patternTree The tree pattern in {@link ParseTree} form.
|
19
|
+
#
|
20
|
+
attr_accessor :matcher, :patternRuleIndex, :pattern, :patternTree
|
21
|
+
def initialize(matcher, pattern, patternRuleIndex, patternTree)
|
22
|
+
self.matcher = matcher
|
23
|
+
self.patternRuleIndex = patternRuleIndex
|
24
|
+
self.pattern = pattern
|
25
|
+
self.patternTree = patternTree
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Match a specific parse tree against this tree pattern.
|
30
|
+
#
|
31
|
+
# @param tree The parse tree to match against this tree pattern.
|
32
|
+
# @return A {@link ParseTreeMatch} object describing the result of the
|
33
|
+
# match operation. The {@link ParseTreeMatch#succeeded()} method can be
|
34
|
+
# used to determine whether or not the match was successful.
|
35
|
+
#
|
36
|
+
def match(tree)
|
37
|
+
return self.matcher.match(tree, self)
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Determine whether or not a parse tree matches this tree pattern.
|
42
|
+
#
|
43
|
+
# @param tree The parse tree to match against this tree pattern.
|
44
|
+
# @return {@code true} if {@code tree} is a match for the current tree
|
45
|
+
# pattern; otherwise, {@code false}.
|
46
|
+
#
|
47
|
+
def matches(tree)
|
48
|
+
return self.matcher.match(tree, self).succeeded()
|
49
|
+
end
|
50
|
+
|
51
|
+
# Find all nodes using XPath and then try to match those subtrees against
|
52
|
+
# this tree pattern.
|
53
|
+
#
|
54
|
+
# @param tree The {@link ParseTree} to match against this pattern.
|
55
|
+
# @param xpath An expression matching the nodes
|
56
|
+
#
|
57
|
+
# @return A collection of {@link ParseTreeMatch} objects describing the
|
58
|
+
# successful matches. Unsuccessful matches are omitted from the result,
|
59
|
+
# regardless of the reason for the failure.
|
60
|
+
#
|
61
|
+
def findAll(tree, xpath)
|
62
|
+
subtrees = XPath.findAll(tree, xpath, self.matcher.parser)
|
63
|
+
subtrees.map do |t|
|
64
|
+
match = self.match(t)
|
65
|
+
if match.succeeded() then
|
66
|
+
match
|
67
|
+
end
|
68
|
+
end.compact
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,334 @@
|
|
1
|
+
#
|
2
|
+
#from antlr4 import Lexer, CommonTokenStream, ParserRuleContext
|
3
|
+
#from antlr4.InputStream import InputStream
|
4
|
+
#from antlr4.ListTokenSource import ListTokenSource
|
5
|
+
#from antlr4.Token import Token
|
6
|
+
#from antlr4.error.ErrorStrategy import BailErrorStrategy
|
7
|
+
#from antlr4.error.Errors import RecognitionException, ParseCancellationException
|
8
|
+
#from antlr4.tree.Chunk import TagChunk, TextChunk
|
9
|
+
#from antlr4.tree.RuleTagToken import RuleTagToken
|
10
|
+
##from antlr4.tree.TokenTagToken import TokenTagToken
|
11
|
+
#from antlr4.tree.Tree import ParseTree, TerminalNode, RuleNode
|
12
|
+
#from antlr4.tree.ParseTreeMatch import ParseTreeMatch
|
13
|
+
# from antlr4.tree.ParseTreePattern import ParseTreePattern
|
14
|
+
|
15
|
+
class CannotInvokeStartRule < Exception
|
16
|
+
end
|
17
|
+
|
18
|
+
class StartRuleDoesNotConsumeFullPattern < Exception
|
19
|
+
end
|
20
|
+
|
21
|
+
class ParseTreePatternMatcher
|
22
|
+
# Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
|
23
|
+
# {@link Parser} object. The lexer input stream is altered for tokenizing
|
24
|
+
# the tree patterns. The parser is used as a convenient mechanism to get
|
25
|
+
# the grammar name, plus token, rule names.
|
26
|
+
attr_accessor :lexer, :parser, :start, :stop, :escape
|
27
|
+
def initialize(lexer, parser)
|
28
|
+
self.lexer = lexer
|
29
|
+
self.parser = parser
|
30
|
+
self.start = "<"
|
31
|
+
self.stop = ">"
|
32
|
+
self.escape = "\\" # e.g., \< and \> must escape BOTH!
|
33
|
+
end
|
34
|
+
|
35
|
+
# Set the delimiters used for marking rule and token tags within concrete
|
36
|
+
# syntax used by the tree pattern parser.
|
37
|
+
#
|
38
|
+
# @param start The start delimiter.
|
39
|
+
# @param stop The stop delimiter.
|
40
|
+
# @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
|
41
|
+
#
|
42
|
+
# @exception IllegalArgumentException if {@code start} is {@code null} or empty.
|
43
|
+
# @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
|
44
|
+
#
|
45
|
+
def setDelimiters(start, stop, escapeLeft)
|
46
|
+
raise Exception.new("start cannot be null or empty") if start.nil? or start.empty?
|
47
|
+
raise Exception.new("stop cannot be null or empty") if stop.nil? or stop.empty?
|
48
|
+
self.start = start
|
49
|
+
self.stop = stop
|
50
|
+
self.escape = escapeLeft
|
51
|
+
end
|
52
|
+
|
53
|
+
# Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
|
54
|
+
def matchesRuleIndex(tree, pattern, patternRuleIndex)
|
55
|
+
p = self.compileTreePattern(pattern, patternRuleIndex)
|
56
|
+
return self.matches(tree, p)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
|
60
|
+
# compiled pattern instead of a string representation of a tree pattern.
|
61
|
+
#
|
62
|
+
def matchesPattern(tree, pattern)
|
63
|
+
mismatchedNode = self.matchImpl(tree, pattern.patternTree, Hash.new)
|
64
|
+
return mismatchedNode.nil?
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Compare {@code pattern} matched as rule {@code patternRuleIndex} against
|
69
|
+
# {@code tree} and return a {@link ParseTreeMatch} object that contains the
|
70
|
+
# matched elements, or the node at which the match failed.
|
71
|
+
#
|
72
|
+
def matchRuleIndex(tree, pattern, patternRuleIndex)
|
73
|
+
p = self.compileTreePattern(pattern, patternRuleIndex)
|
74
|
+
return self.matchPattern(tree, p)
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Compare {@code pattern} matched against {@code tree} and return a
|
79
|
+
# {@link ParseTreeMatch} object that contains the matched elements, or the
|
80
|
+
# node at which the match failed. Pass in a compiled pattern instead of a
|
81
|
+
# string representation of a tree pattern.
|
82
|
+
#
|
83
|
+
def matchPattern(tree, pattern)
|
84
|
+
labels = Hash.new
|
85
|
+
mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
|
86
|
+
return ParseTreeMatch.new(tree, pattern, labels, mismatchedNode)
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# For repeated use of a tree pattern, compile it to a
|
91
|
+
# {@link ParseTreePattern} using this method.
|
92
|
+
#
|
93
|
+
def compileTreePattern(pattern, patternRuleIndex)
|
94
|
+
tokenList = self.tokenize(pattern)
|
95
|
+
tokenSrc = ListTokenSource.new(tokenList)
|
96
|
+
tokens = CommonTokenStream.new(tokenSrc)
|
97
|
+
# from antlr4.ParserInterpreter import ParserInterpreter
|
98
|
+
|
99
|
+
p = self.parser
|
100
|
+
parserInterp = ParserInterpreter.new(p.grammarFileName, p.tokenNames, p.ruleNames,
|
101
|
+
p.getATNWithBypassAlts(),tokens)
|
102
|
+
tree = nil
|
103
|
+
begin
|
104
|
+
parserInterp.setErrorHandler(BailErrorStrategy())
|
105
|
+
tree = parserInterp.parse(patternRuleIndex)
|
106
|
+
rescue ParseCancellationException => e
|
107
|
+
raise e.cause
|
108
|
+
rescue RecognitionException => e
|
109
|
+
raise e
|
110
|
+
rescue Exception => e
|
111
|
+
raise CannotInvokeStartRule.new(e)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Make sure tree pattern compilation checks for a complete parse
|
115
|
+
if tokens.LA(1)!=Token::EOF then
|
116
|
+
raise StartRuleDoesNotConsumeFullPattern.new()
|
117
|
+
end
|
118
|
+
|
119
|
+
return ParseTreePattern.new(self, pattern, patternRuleIndex, tree)
|
120
|
+
end
|
121
|
+
#
|
122
|
+
# Recursively walk {@code tree} against {@code patternTree}, filling
|
123
|
+
# {@code match.}{@link ParseTreeMatch#labels labels}.
|
124
|
+
#
|
125
|
+
# @return the first node encountered in {@code tree} which does not match
|
126
|
+
# a corresponding node in {@code patternTree}, or {@code null} if the match
|
127
|
+
# was successful. The specific node returned depends on the matching
|
128
|
+
# algorithm used by the implementation, and may be overridden.
|
129
|
+
#
|
130
|
+
def matchImpl(tree, patternTree, labels)
|
131
|
+
raise Exception.new("tree cannot be null") if tree.nil?
|
132
|
+
raise Exception.new("patternTree cannot be null") if patternTree.nil?
|
133
|
+
|
134
|
+
# x and <ID>, x and y, or x and x; or could be mismatched types
|
135
|
+
if tree.kind_of? TerminalNode and patternTree.kind_of? TerminalNode then
|
136
|
+
mismatchedNode = nil
|
137
|
+
# both are tokens and they have same type
|
138
|
+
if tree.symbol.type == patternTree.symbol.type then
|
139
|
+
if patternTree.symbol.kind_of? TokenTagToken then # x and <ID>
|
140
|
+
tokenTagToken = patternTree.symbol
|
141
|
+
# track label->list-of-nodes for both token name and label (if any)
|
142
|
+
self.map(labels, tokenTagToken.tokenName, tree)
|
143
|
+
if not tokenTagToken.label.nil?
|
144
|
+
self.map(labels, tokenTagToken.label, tree)
|
145
|
+
end
|
146
|
+
elsif tree.getText()==patternTree.getText() then
|
147
|
+
# x and x
|
148
|
+
nil
|
149
|
+
else
|
150
|
+
# x and y
|
151
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
152
|
+
end
|
153
|
+
else
|
154
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
155
|
+
end
|
156
|
+
|
157
|
+
return mismatchedNode
|
158
|
+
end
|
159
|
+
|
160
|
+
if tree.kind_of? ParserRuleContext and patternTree.kind_of? ParserRuleContext then
|
161
|
+
mismatchedNode = nil
|
162
|
+
# (expr ...) and <expr>
|
163
|
+
ruleTagToken = self.getRuleTagToken(patternTree)
|
164
|
+
if not ruleTagToken.nil? then
|
165
|
+
m = nil
|
166
|
+
if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex then
|
167
|
+
# track label->list-of-nodes for both rule name and label (if any)
|
168
|
+
self.map(labels, ruleTagToken.ruleName, tree)
|
169
|
+
if not ruleTagToken.label.nil? then
|
170
|
+
self.map(labels, ruleTagToken.label, tree)
|
171
|
+
end
|
172
|
+
else
|
173
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
174
|
+
end
|
175
|
+
return mismatchedNode
|
176
|
+
end
|
177
|
+
|
178
|
+
# (expr ...) and (expr ...)
|
179
|
+
if tree.getChildCount()!=patternTree.getChildCount() then
|
180
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
181
|
+
return mismatchedNode
|
182
|
+
end
|
183
|
+
|
184
|
+
n = tree.getChildCount()
|
185
|
+
for i in 0..n-1 do
|
186
|
+
childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
|
187
|
+
return childMatch if not childMatch.nil?
|
188
|
+
end
|
189
|
+
return mismatchedNode
|
190
|
+
end
|
191
|
+
# if nodes aren't both tokens or both rule nodes, can't match
|
192
|
+
return tree
|
193
|
+
end
|
194
|
+
def map(labels, label, tree)
|
195
|
+
v = labels.get(label, nil)
|
196
|
+
if v.nil?
|
197
|
+
v = Array.new
|
198
|
+
end
|
199
|
+
v.push(tree)
|
200
|
+
labels[label] = v
|
201
|
+
end
|
202
|
+
# Is {@code t} {@code (expr <expr>)} subtree?#
|
203
|
+
def getRuleTagToken(tree)
|
204
|
+
if tree.kind_of? RuleNode then
|
205
|
+
if tree.getChildCount()==1 and tree.getChild(0).kind_of? TerminalNode then
|
206
|
+
c = tree.getChild(0)
|
207
|
+
return c.symbol if c.symbol.kind_of? RuleTagToken
|
208
|
+
end
|
209
|
+
end
|
210
|
+
return nil
|
211
|
+
end
|
212
|
+
def tokenize(pattern)
|
213
|
+
# split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
|
214
|
+
chunks = self.split(pattern)
|
215
|
+
|
216
|
+
# create token stream from text and tags
|
217
|
+
tokens = Array.new
|
218
|
+
for chunk in chunks do
|
219
|
+
if chunk.kind_of? TagChunk then
|
220
|
+
# add special rule token or conjure up new token from name
|
221
|
+
if chunk.tag[0].isupper() then
|
222
|
+
ttype = self.parser.getTokenType(chunk.tag)
|
223
|
+
if ttype==Token::INVALID_TYPE then
|
224
|
+
raise Exception.new("Unknown token #{chunk.tag} in pattern: #{pattern}")
|
225
|
+
end
|
226
|
+
tokens.push(TokenTagToken(chunk.tag, ttype, chunk.label))
|
227
|
+
elsif chunk.tag[0].islower() then
|
228
|
+
ruleIndex = self.parser.getRuleIndex(chunk.tag)
|
229
|
+
if ruleIndex==-1 then
|
230
|
+
raise Exception.new("Unknown rule #{chunk.tag} in pattern: #{pattern}")
|
231
|
+
end
|
232
|
+
ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
|
233
|
+
tokens.push(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
|
234
|
+
else
|
235
|
+
raise Exception.new("Invalid tag #{chunk.tag} in pattern: #{pattern}")
|
236
|
+
end
|
237
|
+
else
|
238
|
+
self.lexer.setInputStream(InputStream.new(chunk.text))
|
239
|
+
t = self.lexer.nextToken()
|
240
|
+
while t.type!=Token::EOF do
|
241
|
+
tokens.push(t)
|
242
|
+
t = self.lexer.nextToken()
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
return tokens
|
247
|
+
end
|
248
|
+
# Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
|
249
|
+
def split(pattern)
|
250
|
+
p = 0
|
251
|
+
n = pattern.length
|
252
|
+
chunks = list()
|
253
|
+
# find all start and stop indexes first, then collect
|
254
|
+
starts = Array.new
|
255
|
+
stops = Array.new
|
256
|
+
while p < n do
|
257
|
+
if p == pattern.find(self.escape + self.start, p) then
|
258
|
+
p = p + self.escape.length + self.start.length
|
259
|
+
elsif p == pattern.find(self.escape + self.stop, p) then
|
260
|
+
p = p + self.escape.length + self.stop.length
|
261
|
+
elsif p == pattern.find(self.start, p) then
|
262
|
+
starts.push(p)
|
263
|
+
p = p + self.start.length
|
264
|
+
elsif p == pattern.find(self.stop, p) then
|
265
|
+
stops.push(p)
|
266
|
+
p = p + self.stop.length
|
267
|
+
else
|
268
|
+
p = p + 1
|
269
|
+
end
|
270
|
+
end
|
271
|
+
nt = starts.length
|
272
|
+
|
273
|
+
if nt > stops.length
|
274
|
+
raise Exception.new("unterminated tag in pattern: #{pattern}")
|
275
|
+
end
|
276
|
+
if nt < stops.length
|
277
|
+
raise Exception.new("missing start tag in pattern: #{pattern}")
|
278
|
+
end
|
279
|
+
|
280
|
+
for i in 0..(nt-1) do
|
281
|
+
if starts[i] >= stops[i] then
|
282
|
+
raise Exception.new("tag delimiters out of order in pattern: " + pattern)
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
# collect into chunks now
|
287
|
+
chunks.push(TextChunk.new(pattern)) if nt==0
|
288
|
+
|
289
|
+
if nt>0 and starts[0]>0 then # copy text up to first tag into chunks
|
290
|
+
text = pattern[0..starts[0]-1]
|
291
|
+
chunks.add(TextChunk.new(text))
|
292
|
+
end
|
293
|
+
|
294
|
+
for i in 0..(nt-1) do
|
295
|
+
# copy inside of <tag>
|
296
|
+
tag = pattern[(starts[i] + self.start.length)..stops[i]-1]
|
297
|
+
ruleOrToken = tag
|
298
|
+
label = nil
|
299
|
+
colon = tag.find(':')
|
300
|
+
if colon >= 0 then
|
301
|
+
label = tag[0..colon-1]
|
302
|
+
ruleOrToken = tag[colon+1..tag.length-1]
|
303
|
+
end
|
304
|
+
chunks.push(TagChunk.new(label, ruleOrToken))
|
305
|
+
if i+1 < (starts.length) then
|
306
|
+
# copy from end of <tag> to start of next
|
307
|
+
text = pattern[(stops[i] + self.stop.length())..starts[i+1]-1]
|
308
|
+
chunks.push(TextChunk.new(text))
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
if nt > 0 then
|
313
|
+
afterLastTag = stops[nt - 1] + self.stop.length
|
314
|
+
if afterLastTag < n then # copy text from end of last tag to end
|
315
|
+
text = pattern[afterLastTag .. n -1]
|
316
|
+
chunks.push(TextChunk.new(text))
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# strip out the escape sequences from text chunks but not tags
|
321
|
+
return chunks.map do |c|
|
322
|
+
if c.kind_of? TextChunk then
|
323
|
+
unescaped = c.text.replace(self.escape, "")
|
324
|
+
if unescaped.length < c.text.length then
|
325
|
+
TextChunk.new(unescaped)
|
326
|
+
else
|
327
|
+
c
|
328
|
+
end
|
329
|
+
else
|
330
|
+
c
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|