antlr4 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +27 -0
- data/README.md +46 -0
- data/lib/antlr4.rb +262 -0
- data/lib/antlr4/BufferedTokenStream.rb +306 -0
- data/lib/antlr4/CommonTokenFactory.rb +53 -0
- data/lib/antlr4/CommonTokenStream.rb +56 -0
- data/lib/antlr4/FileStream.rb +14 -0
- data/lib/antlr4/InputStream.rb +82 -0
- data/lib/antlr4/IntervalSet.rb +341 -0
- data/lib/antlr4/LL1Analyzer.rb +177 -0
- data/lib/antlr4/Lexer.rb +335 -0
- data/lib/antlr4/ListTokenSource.rb +140 -0
- data/lib/antlr4/Parser.rb +562 -0
- data/lib/antlr4/ParserInterpreter.rb +149 -0
- data/lib/antlr4/ParserRuleContext.rb +162 -0
- data/lib/antlr4/PredictionContext.rb +690 -0
- data/lib/antlr4/Recognizer.rb +162 -0
- data/lib/antlr4/RuleContext.rb +226 -0
- data/lib/antlr4/Token.rb +124 -0
- data/lib/antlr4/TokenFactory.rb +3 -0
- data/lib/antlr4/TokenSource.rb +4 -0
- data/lib/antlr4/TokenStream.rb +3 -0
- data/lib/antlr4/TraceListener.rb +23 -0
- data/lib/antlr4/atn/ATN.rb +133 -0
- data/lib/antlr4/atn/ATNConfig.rb +146 -0
- data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
- data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
- data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
- data/lib/antlr4/atn/ATNSimulator.rb +43 -0
- data/lib/antlr4/atn/ATNState.rb +253 -0
- data/lib/antlr4/atn/ATNType.rb +22 -0
- data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
- data/lib/antlr4/atn/LexerAction.rb +311 -0
- data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
- data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
- data/lib/antlr4/atn/PredictionMode.rb +525 -0
- data/lib/antlr4/atn/SemanticContext.rb +355 -0
- data/lib/antlr4/atn/Transition.rb +297 -0
- data/lib/antlr4/base.rb +60 -0
- data/lib/antlr4/dfa/DFA.rb +128 -0
- data/lib/antlr4/dfa/DFASerializer.rb +77 -0
- data/lib/antlr4/dfa/DFAState.rb +133 -0
- data/lib/antlr4/error.rb +151 -0
- data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
- data/lib/antlr4/error/ErrorListener.rb +109 -0
- data/lib/antlr4/error/ErrorStrategy.rb +742 -0
- data/lib/antlr4/tree/Chunk.rb +31 -0
- data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
- data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
- data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
- data/lib/antlr4/tree/RuleTagToken.rb +39 -0
- data/lib/antlr4/tree/TokenTagToken.rb +38 -0
- data/lib/antlr4/tree/Tree.rb +204 -0
- data/lib/antlr4/tree/Trees.rb +111 -0
- data/lib/antlr4/version.rb +5 -0
- data/lib/antlr4/xpath/XPath.rb +354 -0
- data/lib/double_key_map.rb +78 -0
- data/lib/java_symbols.rb +24 -0
- data/lib/uuid.rb +87 -0
- data/test/test_intervalset.rb +664 -0
- data/test/test_tree.rb +140 -0
- data/test/test_uuid.rb +122 -0
- metadata +109 -0
@@ -0,0 +1,31 @@
|
|
1
|
+
class Chunk
|
2
|
+
end
|
3
|
+
|
4
|
+
class TagChunk < Chunk
|
5
|
+
|
6
|
+
attr_accessor :tag, :label
|
7
|
+
def initialize(tag, label=nil)
|
8
|
+
self.tag = tag
|
9
|
+
self.label = label
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_s
|
13
|
+
if self.label.nil?
|
14
|
+
self.tag
|
15
|
+
else
|
16
|
+
"#{self.label}:#{self.tag}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
class TextChunk < Chunk
|
21
|
+
|
22
|
+
attr_accessor :text
|
23
|
+
def initialize(text)
|
24
|
+
self.text = text
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
"'#{ self.text }'"
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# Represents the result of matching a {@link ParseTree} against a tree pattern.
|
2
|
+
|
3
|
+
class ParseTreeMatch
|
4
|
+
# Constructs a new instance of {@link ParseTreeMatch} from the specified
|
5
|
+
# parse tree and pattern.
|
6
|
+
#
|
7
|
+
# @param tree The parse tree to match against the pattern.
|
8
|
+
# @param pattern The parse tree pattern.
|
9
|
+
# @param labels A mapping from label names to collections of
|
10
|
+
# {@link ParseTree} objects located by the tree pattern matching process.
|
11
|
+
# @param mismatchedNode The first node which failed to match the tree
|
12
|
+
# pattern during the matching process.
|
13
|
+
#
|
14
|
+
# @exception IllegalArgumentException if {@code tree} is {@code null}
|
15
|
+
# @exception IllegalArgumentException if {@code pattern} is {@code null}
|
16
|
+
# @exception IllegalArgumentException if {@code labels} is {@code null}
|
17
|
+
#
|
18
|
+
attr_accessor :tree, :pattern, :labels, :mismatchedNode
|
19
|
+
def initialize(tree, pattern, labels, mismatchedNode)
|
20
|
+
raise Exception.new("tree cannot be null") if tree.nil?
|
21
|
+
raise Exception.new("pattern cannot be null") if pattern.nil?
|
22
|
+
raise Exception.new("labels cannot be null") if labels.nil?
|
23
|
+
self.tree = tree
|
24
|
+
self.pattern = pattern
|
25
|
+
self.labels = labels
|
26
|
+
self.mismatchedNode = mismatchedNode
|
27
|
+
end
|
28
|
+
#
|
29
|
+
# Get the last node associated with a specific {@code label}.
|
30
|
+
#
|
31
|
+
# <p>For example, for pattern {@code <id:ID>}, {@code get("id")} returns the
|
32
|
+
# node matched for that {@code ID}. If more than one node
|
33
|
+
# matched the specified label, only the last is returned. If there is
|
34
|
+
# no node associated with the label, this returns {@code null}.</p>
|
35
|
+
#
|
36
|
+
# <p>Pattern tags like {@code <ID>} and {@code <expr>} without labels are
|
37
|
+
# considered to be labeled with {@code ID} and {@code expr}, respectively.</p>
|
38
|
+
#
|
39
|
+
# @param label The label to check.
|
40
|
+
#
|
41
|
+
# @return The last {@link ParseTree} to match a tag with the specified
|
42
|
+
# label, or {@code null} if no parse tree matched a tag with the label.
|
43
|
+
#
|
44
|
+
def get(label)
|
45
|
+
parseTrees = self.labels.get(label, nil)
|
46
|
+
if parseTrees.nil? or parseTrees.empty? then
|
47
|
+
return nil
|
48
|
+
else
|
49
|
+
return parseTrees[-1]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
#
|
53
|
+
# Return all nodes matching a rule or token tag with the specified label.
|
54
|
+
#
|
55
|
+
# <p>If the {@code label} is the name of a parser rule or token in the
|
56
|
+
# grammar, the resulting list will contain both the parse trees matching
|
57
|
+
# rule or tags explicitly labeled with the label and the complete set of
|
58
|
+
# parse trees matching the labeled and unlabeled tags in the pattern for
|
59
|
+
# the parser rule or token. For example, if {@code label} is {@code "foo"},
|
60
|
+
# the result will contain <em>all</em> of the following.</p>
|
61
|
+
#
|
62
|
+
# <ul>
|
63
|
+
# <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and
|
64
|
+
# {@code <foo:AnyTokenName>}.</li>
|
65
|
+
# <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li>
|
66
|
+
# <li>Parse tree nodes matching tags of the form {@code <foo>}.</li>
|
67
|
+
# </ul>
|
68
|
+
#
|
69
|
+
# @param label The label.
|
70
|
+
#
|
71
|
+
# @return A collection of all {@link ParseTree} nodes matching tags with
|
72
|
+
# the specified {@code label}. If no nodes matched the label, an empty list
|
73
|
+
# is returned.
|
74
|
+
#
|
75
|
+
def getAll(label)
|
76
|
+
self.labels.get(label, Array.new)
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Gets a value indicating whether the match operation succeeded.
|
81
|
+
#
|
82
|
+
# @return {@code true} if the match operation succeeded; otherwise,
|
83
|
+
# {@code false}.
|
84
|
+
#
|
85
|
+
def succeeded
|
86
|
+
return self.mismatchedNode.nil?
|
87
|
+
end
|
88
|
+
#
|
89
|
+
# {@inheritDoc}
|
90
|
+
#
|
91
|
+
def to_s
|
92
|
+
StringIO.open do |buf|
|
93
|
+
buf.write("Match ")
|
94
|
+
if self.succeeded()
|
95
|
+
buf.write("succeeded")
|
96
|
+
else
|
97
|
+
buf.write("failed")
|
98
|
+
end
|
99
|
+
buf.write("; found ")
|
100
|
+
buf.write(self.labels.length.to_s)
|
101
|
+
buf.write(" labels")
|
102
|
+
return buf.string
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# A pattern like {@code <ID> = <expr>;} converted to a {@link ParseTree} by
|
2
|
+
# {@link ParseTreePatternMatcher#compile(String, int)}.
|
3
|
+
#
|
4
|
+
#from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
|
5
|
+
#from antlr4.tree.Tree import ParseTree
|
6
|
+
#from antlr4.xpath.XPath import XPath
|
7
|
+
|
8
|
+
|
9
|
+
class ParseTreePattern
|
10
|
+
|
11
|
+
# Construct a new instance of the {@link ParseTreePattern} class.
|
12
|
+
#
|
13
|
+
# @param matcher The {@link ParseTreePatternMatcher} which created this
|
14
|
+
# tree pattern.
|
15
|
+
# @param pattern The tree pattern in concrete syntax form.
|
16
|
+
# @param patternRuleIndex The parser rule which serves as the root of the
|
17
|
+
# tree pattern.
|
18
|
+
# @param patternTree The tree pattern in {@link ParseTree} form.
|
19
|
+
#
|
20
|
+
attr_accessor :matcher, :patternRuleIndex, :pattern, :patternTree
|
21
|
+
def initialize(matcher, pattern, patternRuleIndex, patternTree)
|
22
|
+
self.matcher = matcher
|
23
|
+
self.patternRuleIndex = patternRuleIndex
|
24
|
+
self.pattern = pattern
|
25
|
+
self.patternTree = patternTree
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Match a specific parse tree against this tree pattern.
|
30
|
+
#
|
31
|
+
# @param tree The parse tree to match against this tree pattern.
|
32
|
+
# @return A {@link ParseTreeMatch} object describing the result of the
|
33
|
+
# match operation. The {@link ParseTreeMatch#succeeded()} method can be
|
34
|
+
# used to determine whether or not the match was successful.
|
35
|
+
#
|
36
|
+
def match(tree)
|
37
|
+
return self.matcher.match(tree, self)
|
38
|
+
end
|
39
|
+
|
40
|
+
#
|
41
|
+
# Determine whether or not a parse tree matches this tree pattern.
|
42
|
+
#
|
43
|
+
# @param tree The parse tree to match against this tree pattern.
|
44
|
+
# @return {@code true} if {@code tree} is a match for the current tree
|
45
|
+
# pattern; otherwise, {@code false}.
|
46
|
+
#
|
47
|
+
def matches(tree)
|
48
|
+
return self.matcher.match(tree, self).succeeded()
|
49
|
+
end
|
50
|
+
|
51
|
+
# Find all nodes using XPath and then try to match those subtrees against
|
52
|
+
# this tree pattern.
|
53
|
+
#
|
54
|
+
# @param tree The {@link ParseTree} to match against this pattern.
|
55
|
+
# @param xpath An expression matching the nodes
|
56
|
+
#
|
57
|
+
# @return A collection of {@link ParseTreeMatch} objects describing the
|
58
|
+
# successful matches. Unsuccessful matches are omitted from the result,
|
59
|
+
# regardless of the reason for the failure.
|
60
|
+
#
|
61
|
+
def findAll(tree, xpath)
|
62
|
+
subtrees = XPath.findAll(tree, xpath, self.matcher.parser)
|
63
|
+
subtrees.map do |t|
|
64
|
+
match = self.match(t)
|
65
|
+
if match.succeeded() then
|
66
|
+
match
|
67
|
+
end
|
68
|
+
end.compact
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,334 @@
|
|
1
|
+
#
|
2
|
+
#from antlr4 import Lexer, CommonTokenStream, ParserRuleContext
|
3
|
+
#from antlr4.InputStream import InputStream
|
4
|
+
#from antlr4.ListTokenSource import ListTokenSource
|
5
|
+
#from antlr4.Token import Token
|
6
|
+
#from antlr4.error.ErrorStrategy import BailErrorStrategy
|
7
|
+
#from antlr4.error.Errors import RecognitionException, ParseCancellationException
|
8
|
+
#from antlr4.tree.Chunk import TagChunk, TextChunk
|
9
|
+
#from antlr4.tree.RuleTagToken import RuleTagToken
|
10
|
+
##from antlr4.tree.TokenTagToken import TokenTagToken
|
11
|
+
#from antlr4.tree.Tree import ParseTree, TerminalNode, RuleNode
|
12
|
+
#from antlr4.tree.ParseTreeMatch import ParseTreeMatch
|
13
|
+
# from antlr4.tree.ParseTreePattern import ParseTreePattern
|
14
|
+
|
15
|
+
class CannotInvokeStartRule < Exception
|
16
|
+
end
|
17
|
+
|
18
|
+
class StartRuleDoesNotConsumeFullPattern < Exception
|
19
|
+
end
|
20
|
+
|
21
|
+
class ParseTreePatternMatcher
|
22
|
+
# Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
|
23
|
+
# {@link Parser} object. The lexer input stream is altered for tokenizing
|
24
|
+
# the tree patterns. The parser is used as a convenient mechanism to get
|
25
|
+
# the grammar name, plus token, rule names.
|
26
|
+
attr_accessor :lexer, :parser, :start, :stop, :escape
|
27
|
+
def initialize(lexer, parser)
|
28
|
+
self.lexer = lexer
|
29
|
+
self.parser = parser
|
30
|
+
self.start = "<"
|
31
|
+
self.stop = ">"
|
32
|
+
self.escape = "\\" # e.g., \< and \> must escape BOTH!
|
33
|
+
end
|
34
|
+
|
35
|
+
# Set the delimiters used for marking rule and token tags within concrete
|
36
|
+
# syntax used by the tree pattern parser.
|
37
|
+
#
|
38
|
+
# @param start The start delimiter.
|
39
|
+
# @param stop The stop delimiter.
|
40
|
+
# @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
|
41
|
+
#
|
42
|
+
# @exception IllegalArgumentException if {@code start} is {@code null} or empty.
|
43
|
+
# @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
|
44
|
+
#
|
45
|
+
def setDelimiters(start, stop, escapeLeft)
|
46
|
+
raise Exception.new("start cannot be null or empty") if start.nil? or start.empty?
|
47
|
+
raise Exception.new("stop cannot be null or empty") if stop.nil? or stop.empty?
|
48
|
+
self.start = start
|
49
|
+
self.stop = stop
|
50
|
+
self.escape = escapeLeft
|
51
|
+
end
|
52
|
+
|
53
|
+
# Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
|
54
|
+
def matchesRuleIndex(tree, pattern, patternRuleIndex)
|
55
|
+
p = self.compileTreePattern(pattern, patternRuleIndex)
|
56
|
+
return self.matches(tree, p)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
|
60
|
+
# compiled pattern instead of a string representation of a tree pattern.
|
61
|
+
#
|
62
|
+
def matchesPattern(tree, pattern)
|
63
|
+
mismatchedNode = self.matchImpl(tree, pattern.patternTree, Hash.new)
|
64
|
+
return mismatchedNode.nil?
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Compare {@code pattern} matched as rule {@code patternRuleIndex} against
|
69
|
+
# {@code tree} and return a {@link ParseTreeMatch} object that contains the
|
70
|
+
# matched elements, or the node at which the match failed.
|
71
|
+
#
|
72
|
+
def matchRuleIndex(tree, pattern, patternRuleIndex)
|
73
|
+
p = self.compileTreePattern(pattern, patternRuleIndex)
|
74
|
+
return self.matchPattern(tree, p)
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Compare {@code pattern} matched against {@code tree} and return a
|
79
|
+
# {@link ParseTreeMatch} object that contains the matched elements, or the
|
80
|
+
# node at which the match failed. Pass in a compiled pattern instead of a
|
81
|
+
# string representation of a tree pattern.
|
82
|
+
#
|
83
|
+
def matchPattern(tree, pattern)
|
84
|
+
labels = Hash.new
|
85
|
+
mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
|
86
|
+
return ParseTreeMatch.new(tree, pattern, labels, mismatchedNode)
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# For repeated use of a tree pattern, compile it to a
|
91
|
+
# {@link ParseTreePattern} using this method.
|
92
|
+
#
|
93
|
+
def compileTreePattern(pattern, patternRuleIndex)
|
94
|
+
tokenList = self.tokenize(pattern)
|
95
|
+
tokenSrc = ListTokenSource.new(tokenList)
|
96
|
+
tokens = CommonTokenStream.new(tokenSrc)
|
97
|
+
# from antlr4.ParserInterpreter import ParserInterpreter
|
98
|
+
|
99
|
+
p = self.parser
|
100
|
+
parserInterp = ParserInterpreter.new(p.grammarFileName, p.tokenNames, p.ruleNames,
|
101
|
+
p.getATNWithBypassAlts(),tokens)
|
102
|
+
tree = nil
|
103
|
+
begin
|
104
|
+
parserInterp.setErrorHandler(BailErrorStrategy())
|
105
|
+
tree = parserInterp.parse(patternRuleIndex)
|
106
|
+
rescue ParseCancellationException => e
|
107
|
+
raise e.cause
|
108
|
+
rescue RecognitionException => e
|
109
|
+
raise e
|
110
|
+
rescue Exception => e
|
111
|
+
raise CannotInvokeStartRule.new(e)
|
112
|
+
end
|
113
|
+
|
114
|
+
# Make sure tree pattern compilation checks for a complete parse
|
115
|
+
if tokens.LA(1)!=Token::EOF then
|
116
|
+
raise StartRuleDoesNotConsumeFullPattern.new()
|
117
|
+
end
|
118
|
+
|
119
|
+
return ParseTreePattern.new(self, pattern, patternRuleIndex, tree)
|
120
|
+
end
|
121
|
+
#
|
122
|
+
# Recursively walk {@code tree} against {@code patternTree}, filling
|
123
|
+
# {@code match.}{@link ParseTreeMatch#labels labels}.
|
124
|
+
#
|
125
|
+
# @return the first node encountered in {@code tree} which does not match
|
126
|
+
# a corresponding node in {@code patternTree}, or {@code null} if the match
|
127
|
+
# was successful. The specific node returned depends on the matching
|
128
|
+
# algorithm used by the implementation, and may be overridden.
|
129
|
+
#
|
130
|
+
def matchImpl(tree, patternTree, labels)
|
131
|
+
raise Exception.new("tree cannot be null") if tree.nil?
|
132
|
+
raise Exception.new("patternTree cannot be null") if patternTree.nil?
|
133
|
+
|
134
|
+
# x and <ID>, x and y, or x and x; or could be mismatched types
|
135
|
+
if tree.kind_of? TerminalNode and patternTree.kind_of? TerminalNode then
|
136
|
+
mismatchedNode = nil
|
137
|
+
# both are tokens and they have same type
|
138
|
+
if tree.symbol.type == patternTree.symbol.type then
|
139
|
+
if patternTree.symbol.kind_of? TokenTagToken then # x and <ID>
|
140
|
+
tokenTagToken = patternTree.symbol
|
141
|
+
# track label->list-of-nodes for both token name and label (if any)
|
142
|
+
self.map(labels, tokenTagToken.tokenName, tree)
|
143
|
+
if not tokenTagToken.label.nil?
|
144
|
+
self.map(labels, tokenTagToken.label, tree)
|
145
|
+
end
|
146
|
+
elsif tree.getText()==patternTree.getText() then
|
147
|
+
# x and x
|
148
|
+
nil
|
149
|
+
else
|
150
|
+
# x and y
|
151
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
152
|
+
end
|
153
|
+
else
|
154
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
155
|
+
end
|
156
|
+
|
157
|
+
return mismatchedNode
|
158
|
+
end
|
159
|
+
|
160
|
+
if tree.kind_of? ParserRuleContext and patternTree.kind_of? ParserRuleContext then
|
161
|
+
mismatchedNode = nil
|
162
|
+
# (expr ...) and <expr>
|
163
|
+
ruleTagToken = self.getRuleTagToken(patternTree)
|
164
|
+
if not ruleTagToken.nil? then
|
165
|
+
m = nil
|
166
|
+
if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex then
|
167
|
+
# track label->list-of-nodes for both rule name and label (if any)
|
168
|
+
self.map(labels, ruleTagToken.ruleName, tree)
|
169
|
+
if not ruleTagToken.label.nil? then
|
170
|
+
self.map(labels, ruleTagToken.label, tree)
|
171
|
+
end
|
172
|
+
else
|
173
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
174
|
+
end
|
175
|
+
return mismatchedNode
|
176
|
+
end
|
177
|
+
|
178
|
+
# (expr ...) and (expr ...)
|
179
|
+
if tree.getChildCount()!=patternTree.getChildCount() then
|
180
|
+
mismatchedNode = tree if mismatchedNode.nil?
|
181
|
+
return mismatchedNode
|
182
|
+
end
|
183
|
+
|
184
|
+
n = tree.getChildCount()
|
185
|
+
for i in 0..n-1 do
|
186
|
+
childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
|
187
|
+
return childMatch if not childMatch.nil?
|
188
|
+
end
|
189
|
+
return mismatchedNode
|
190
|
+
end
|
191
|
+
# if nodes aren't both tokens or both rule nodes, can't match
|
192
|
+
return tree
|
193
|
+
end
|
194
|
+
def map(labels, label, tree)
|
195
|
+
v = labels.get(label, nil)
|
196
|
+
if v.nil?
|
197
|
+
v = Array.new
|
198
|
+
end
|
199
|
+
v.push(tree)
|
200
|
+
labels[label] = v
|
201
|
+
end
|
202
|
+
# Is {@code t} {@code (expr <expr>)} subtree?#
|
203
|
+
def getRuleTagToken(tree)
|
204
|
+
if tree.kind_of? RuleNode then
|
205
|
+
if tree.getChildCount()==1 and tree.getChild(0).kind_of? TerminalNode then
|
206
|
+
c = tree.getChild(0)
|
207
|
+
return c.symbol if c.symbol.kind_of? RuleTagToken
|
208
|
+
end
|
209
|
+
end
|
210
|
+
return nil
|
211
|
+
end
|
212
|
+
def tokenize(pattern)
|
213
|
+
# split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
|
214
|
+
chunks = self.split(pattern)
|
215
|
+
|
216
|
+
# create token stream from text and tags
|
217
|
+
tokens = Array.new
|
218
|
+
for chunk in chunks do
|
219
|
+
if chunk.kind_of? TagChunk then
|
220
|
+
# add special rule token or conjure up new token from name
|
221
|
+
if chunk.tag[0].isupper() then
|
222
|
+
ttype = self.parser.getTokenType(chunk.tag)
|
223
|
+
if ttype==Token::INVALID_TYPE then
|
224
|
+
raise Exception.new("Unknown token #{chunk.tag} in pattern: #{pattern}")
|
225
|
+
end
|
226
|
+
tokens.push(TokenTagToken(chunk.tag, ttype, chunk.label))
|
227
|
+
elsif chunk.tag[0].islower() then
|
228
|
+
ruleIndex = self.parser.getRuleIndex(chunk.tag)
|
229
|
+
if ruleIndex==-1 then
|
230
|
+
raise Exception.new("Unknown rule #{chunk.tag} in pattern: #{pattern}")
|
231
|
+
end
|
232
|
+
ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
|
233
|
+
tokens.push(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
|
234
|
+
else
|
235
|
+
raise Exception.new("Invalid tag #{chunk.tag} in pattern: #{pattern}")
|
236
|
+
end
|
237
|
+
else
|
238
|
+
self.lexer.setInputStream(InputStream.new(chunk.text))
|
239
|
+
t = self.lexer.nextToken()
|
240
|
+
while t.type!=Token::EOF do
|
241
|
+
tokens.push(t)
|
242
|
+
t = self.lexer.nextToken()
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
return tokens
|
247
|
+
end
|
248
|
+
# Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
|
249
|
+
def split(pattern)
|
250
|
+
p = 0
|
251
|
+
n = pattern.length
|
252
|
+
chunks = list()
|
253
|
+
# find all start and stop indexes first, then collect
|
254
|
+
starts = Array.new
|
255
|
+
stops = Array.new
|
256
|
+
while p < n do
|
257
|
+
if p == pattern.find(self.escape + self.start, p) then
|
258
|
+
p = p + self.escape.length + self.start.length
|
259
|
+
elsif p == pattern.find(self.escape + self.stop, p) then
|
260
|
+
p = p + self.escape.length + self.stop.length
|
261
|
+
elsif p == pattern.find(self.start, p) then
|
262
|
+
starts.push(p)
|
263
|
+
p = p + self.start.length
|
264
|
+
elsif p == pattern.find(self.stop, p) then
|
265
|
+
stops.push(p)
|
266
|
+
p = p + self.stop.length
|
267
|
+
else
|
268
|
+
p = p + 1
|
269
|
+
end
|
270
|
+
end
|
271
|
+
nt = starts.length
|
272
|
+
|
273
|
+
if nt > stops.length
|
274
|
+
raise Exception.new("unterminated tag in pattern: #{pattern}")
|
275
|
+
end
|
276
|
+
if nt < stops.length
|
277
|
+
raise Exception.new("missing start tag in pattern: #{pattern}")
|
278
|
+
end
|
279
|
+
|
280
|
+
for i in 0..(nt-1) do
|
281
|
+
if starts[i] >= stops[i] then
|
282
|
+
raise Exception.new("tag delimiters out of order in pattern: " + pattern)
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
# collect into chunks now
|
287
|
+
chunks.push(TextChunk.new(pattern)) if nt==0
|
288
|
+
|
289
|
+
if nt>0 and starts[0]>0 then # copy text up to first tag into chunks
|
290
|
+
text = pattern[0..starts[0]-1]
|
291
|
+
chunks.add(TextChunk.new(text))
|
292
|
+
end
|
293
|
+
|
294
|
+
for i in 0..(nt-1) do
|
295
|
+
# copy inside of <tag>
|
296
|
+
tag = pattern[(starts[i] + self.start.length)..stops[i]-1]
|
297
|
+
ruleOrToken = tag
|
298
|
+
label = nil
|
299
|
+
colon = tag.find(':')
|
300
|
+
if colon >= 0 then
|
301
|
+
label = tag[0..colon-1]
|
302
|
+
ruleOrToken = tag[colon+1..tag.length-1]
|
303
|
+
end
|
304
|
+
chunks.push(TagChunk.new(label, ruleOrToken))
|
305
|
+
if i+1 < (starts.length) then
|
306
|
+
# copy from end of <tag> to start of next
|
307
|
+
text = pattern[(stops[i] + self.stop.length())..starts[i+1]-1]
|
308
|
+
chunks.push(TextChunk.new(text))
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
if nt > 0 then
|
313
|
+
afterLastTag = stops[nt - 1] + self.stop.length
|
314
|
+
if afterLastTag < n then # copy text from end of last tag to end
|
315
|
+
text = pattern[afterLastTag .. n -1]
|
316
|
+
chunks.push(TextChunk.new(text))
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# strip out the escape sequences from text chunks but not tags
|
321
|
+
return chunks.map do |c|
|
322
|
+
if c.kind_of? TextChunk then
|
323
|
+
unescaped = c.text.replace(self.escape, "")
|
324
|
+
if unescaped.length < c.text.length then
|
325
|
+
TextChunk.new(unescaped)
|
326
|
+
else
|
327
|
+
c
|
328
|
+
end
|
329
|
+
else
|
330
|
+
c
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|