antlr4 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,31 @@
1
+ class Chunk
2
+ end
3
+
4
+ class TagChunk < Chunk
5
+
6
+ attr_accessor :tag, :label
7
+ def initialize(tag, label=nil)
8
+ self.tag = tag
9
+ self.label = label
10
+ end
11
+
12
+ def to_s
13
+ if self.label.nil?
14
+ self.tag
15
+ else
16
+ "#{self.label}:#{self.tag}"
17
+ end
18
+ end
19
+ end
20
+ class TextChunk < Chunk
21
+
22
+ attr_accessor :text
23
+ def initialize(text)
24
+ self.text = text
25
+ end
26
+
27
+ def to_s
28
+ "'#{ self.text }'"
29
+ end
30
+
31
+ end
@@ -0,0 +1,105 @@
1
+ # Represents the result of matching a {@link ParseTree} against a tree pattern.
2
+
3
+ class ParseTreeMatch
4
+ # Constructs a new instance of {@link ParseTreeMatch} from the specified
5
+ # parse tree and pattern.
6
+ #
7
+ # @param tree The parse tree to match against the pattern.
8
+ # @param pattern The parse tree pattern.
9
+ # @param labels A mapping from label names to collections of
10
+ # {@link ParseTree} objects located by the tree pattern matching process.
11
+ # @param mismatchedNode The first node which failed to match the tree
12
+ # pattern during the matching process.
13
+ #
14
+ # @exception IllegalArgumentException if {@code tree} is {@code null}
15
+ # @exception IllegalArgumentException if {@code pattern} is {@code null}
16
+ # @exception IllegalArgumentException if {@code labels} is {@code null}
17
+ #
18
+ attr_accessor :tree, :pattern, :labels, :mismatchedNode
19
+ def initialize(tree, pattern, labels, mismatchedNode)
20
+ raise Exception.new("tree cannot be null") if tree.nil?
21
+ raise Exception.new("pattern cannot be null") if pattern.nil?
22
+ raise Exception.new("labels cannot be null") if labels.nil?
23
+ self.tree = tree
24
+ self.pattern = pattern
25
+ self.labels = labels
26
+ self.mismatchedNode = mismatchedNode
27
+ end
28
+ #
29
+ # Get the last node associated with a specific {@code label}.
30
+ #
31
+ # <p>For example, for pattern {@code <id:ID>}, {@code get("id")} returns the
32
+ # node matched for that {@code ID}. If more than one node
33
+ # matched the specified label, only the last is returned. If there is
34
+ # no node associated with the label, this returns {@code null}.</p>
35
+ #
36
+ # <p>Pattern tags like {@code <ID>} and {@code <expr>} without labels are
37
+ # considered to be labeled with {@code ID} and {@code expr}, respectively.</p>
38
+ #
39
+ # @param label The label to check.
40
+ #
41
+ # @return The last {@link ParseTree} to match a tag with the specified
42
+ # label, or {@code null} if no parse tree matched a tag with the label.
43
+ #
44
+ def get(label)
45
+ parseTrees = self.labels.get(label, nil)
46
+ if parseTrees.nil? or parseTrees.empty? then
47
+ return nil
48
+ else
49
+ return parseTrees[-1]
50
+ end
51
+ end
52
+ #
53
+ # Return all nodes matching a rule or token tag with the specified label.
54
+ #
55
+ # <p>If the {@code label} is the name of a parser rule or token in the
56
+ # grammar, the resulting list will contain both the parse trees matching
57
+ # rule or tags explicitly labeled with the label and the complete set of
58
+ # parse trees matching the labeled and unlabeled tags in the pattern for
59
+ # the parser rule or token. For example, if {@code label} is {@code "foo"},
60
+ # the result will contain <em>all</em> of the following.</p>
61
+ #
62
+ # <ul>
63
+ # <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and
64
+ # {@code <foo:AnyTokenName>}.</li>
65
+ # <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li>
66
+ # <li>Parse tree nodes matching tags of the form {@code <foo>}.</li>
67
+ # </ul>
68
+ #
69
+ # @param label The label.
70
+ #
71
+ # @return A collection of all {@link ParseTree} nodes matching tags with
72
+ # the specified {@code label}. If no nodes matched the label, an empty list
73
+ # is returned.
74
+ #
75
+ def getAll(label)
76
+ self.labels.get(label, Array.new)
77
+ end
78
+
79
+ #
80
+ # Gets a value indicating whether the match operation succeeded.
81
+ #
82
+ # @return {@code true} if the match operation succeeded; otherwise,
83
+ # {@code false}.
84
+ #
85
+ def succeeded
86
+ return self.mismatchedNode.nil?
87
+ end
88
+ #
89
+ # {@inheritDoc}
90
+ #
91
+ def to_s
92
+ StringIO.open do |buf|
93
+ buf.write("Match ")
94
+ if self.succeeded()
95
+ buf.write("succeeded")
96
+ else
97
+ buf.write("failed")
98
+ end
99
+ buf.write("; found ")
100
+ buf.write(self.labels.length.to_s)
101
+ buf.write(" labels")
102
+ return buf.string
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,70 @@
1
+ # A pattern like {@code <ID> = <expr>;} converted to a {@link ParseTree} by
2
+ # {@link ParseTreePatternMatcher#compile(String, int)}.
3
+ #
4
+ #from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
5
+ #from antlr4.tree.Tree import ParseTree
6
+ #from antlr4.xpath.XPath import XPath
7
+
8
+
9
+ class ParseTreePattern
10
+
11
+ # Construct a new instance of the {@link ParseTreePattern} class.
12
+ #
13
+ # @param matcher The {@link ParseTreePatternMatcher} which created this
14
+ # tree pattern.
15
+ # @param pattern The tree pattern in concrete syntax form.
16
+ # @param patternRuleIndex The parser rule which serves as the root of the
17
+ # tree pattern.
18
+ # @param patternTree The tree pattern in {@link ParseTree} form.
19
+ #
20
+ attr_accessor :matcher, :patternRuleIndex, :pattern, :patternTree
21
+ def initialize(matcher, pattern, patternRuleIndex, patternTree)
22
+ self.matcher = matcher
23
+ self.patternRuleIndex = patternRuleIndex
24
+ self.pattern = pattern
25
+ self.patternTree = patternTree
26
+ end
27
+
28
+ #
29
+ # Match a specific parse tree against this tree pattern.
30
+ #
31
+ # @param tree The parse tree to match against this tree pattern.
32
+ # @return A {@link ParseTreeMatch} object describing the result of the
33
+ # match operation. The {@link ParseTreeMatch#succeeded()} method can be
34
+ # used to determine whether or not the match was successful.
35
+ #
36
+ def match(tree)
37
+ return self.matcher.match(tree, self)
38
+ end
39
+
40
+ #
41
+ # Determine whether or not a parse tree matches this tree pattern.
42
+ #
43
+ # @param tree The parse tree to match against this tree pattern.
44
+ # @return {@code true} if {@code tree} is a match for the current tree
45
+ # pattern; otherwise, {@code false}.
46
+ #
47
+ def matches(tree)
48
+ return self.matcher.match(tree, self).succeeded()
49
+ end
50
+
51
+ # Find all nodes using XPath and then try to match those subtrees against
52
+ # this tree pattern.
53
+ #
54
+ # @param tree The {@link ParseTree} to match against this pattern.
55
+ # @param xpath An expression matching the nodes
56
+ #
57
+ # @return A collection of {@link ParseTreeMatch} objects describing the
58
+ # successful matches. Unsuccessful matches are omitted from the result,
59
+ # regardless of the reason for the failure.
60
+ #
61
+ def findAll(tree, xpath)
62
+ subtrees = XPath.findAll(tree, xpath, self.matcher.parser)
63
+ subtrees.map do |t|
64
+ match = self.match(t)
65
+ if match.succeeded() then
66
+ match
67
+ end
68
+ end.compact
69
+ end
70
+ end
@@ -0,0 +1,334 @@
1
+ #
2
+ #from antlr4 import Lexer, CommonTokenStream, ParserRuleContext
3
+ #from antlr4.InputStream import InputStream
4
+ #from antlr4.ListTokenSource import ListTokenSource
5
+ #from antlr4.Token import Token
6
+ #from antlr4.error.ErrorStrategy import BailErrorStrategy
7
+ #from antlr4.error.Errors import RecognitionException, ParseCancellationException
8
+ #from antlr4.tree.Chunk import TagChunk, TextChunk
9
+ #from antlr4.tree.RuleTagToken import RuleTagToken
10
+ ##from antlr4.tree.TokenTagToken import TokenTagToken
11
+ #from antlr4.tree.Tree import ParseTree, TerminalNode, RuleNode
12
+ #from antlr4.tree.ParseTreeMatch import ParseTreeMatch
13
+ # from antlr4.tree.ParseTreePattern import ParseTreePattern
14
+
15
+ class CannotInvokeStartRule < Exception
16
+ end
17
+
18
+ class StartRuleDoesNotConsumeFullPattern < Exception
19
+ end
20
+
21
+ class ParseTreePatternMatcher
22
+ # Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
23
+ # {@link Parser} object. The lexer input stream is altered for tokenizing
24
+ # the tree patterns. The parser is used as a convenient mechanism to get
25
+ # the grammar name, plus token, rule names.
26
+ attr_accessor :lexer, :parser, :start, :stop, :escape
27
+ def initialize(lexer, parser)
28
+ self.lexer = lexer
29
+ self.parser = parser
30
+ self.start = "<"
31
+ self.stop = ">"
32
+ self.escape = "\\" # e.g., \< and \> must escape BOTH!
33
+ end
34
+
35
+ # Set the delimiters used for marking rule and token tags within concrete
36
+ # syntax used by the tree pattern parser.
37
+ #
38
+ # @param start The start delimiter.
39
+ # @param stop The stop delimiter.
40
+ # @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
41
+ #
42
+ # @exception IllegalArgumentException if {@code start} is {@code null} or empty.
43
+ # @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
44
+ #
45
+ def setDelimiters(start, stop, escapeLeft)
46
+ raise Exception.new("start cannot be null or empty") if start.nil? or start.empty?
47
+ raise Exception.new("stop cannot be null or empty") if stop.nil? or stop.empty?
48
+ self.start = start
49
+ self.stop = stop
50
+ self.escape = escapeLeft
51
+ end
52
+
53
+ # Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
54
+ def matchesRuleIndex(tree, pattern, patternRuleIndex)
55
+ p = self.compileTreePattern(pattern, patternRuleIndex)
56
+ return self.matches(tree, p)
57
+ end
58
+
59
+ # Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
60
+ # compiled pattern instead of a string representation of a tree pattern.
61
+ #
62
+ def matchesPattern(tree, pattern)
63
+ mismatchedNode = self.matchImpl(tree, pattern.patternTree, Hash.new)
64
+ return mismatchedNode.nil?
65
+ end
66
+
67
+ #
68
+ # Compare {@code pattern} matched as rule {@code patternRuleIndex} against
69
+ # {@code tree} and return a {@link ParseTreeMatch} object that contains the
70
+ # matched elements, or the node at which the match failed.
71
+ #
72
+ def matchRuleIndex(tree, pattern, patternRuleIndex)
73
+ p = self.compileTreePattern(pattern, patternRuleIndex)
74
+ return self.matchPattern(tree, p)
75
+ end
76
+
77
+ #
78
+ # Compare {@code pattern} matched against {@code tree} and return a
79
+ # {@link ParseTreeMatch} object that contains the matched elements, or the
80
+ # node at which the match failed. Pass in a compiled pattern instead of a
81
+ # string representation of a tree pattern.
82
+ #
83
+ def matchPattern(tree, pattern)
84
+ labels = Hash.new
85
+ mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
86
+ return ParseTreeMatch.new(tree, pattern, labels, mismatchedNode)
87
+ end
88
+
89
+ #
90
+ # For repeated use of a tree pattern, compile it to a
91
+ # {@link ParseTreePattern} using this method.
92
+ #
93
+ def compileTreePattern(pattern, patternRuleIndex)
94
+ tokenList = self.tokenize(pattern)
95
+ tokenSrc = ListTokenSource.new(tokenList)
96
+ tokens = CommonTokenStream.new(tokenSrc)
97
+ # from antlr4.ParserInterpreter import ParserInterpreter
98
+
99
+ p = self.parser
100
+ parserInterp = ParserInterpreter.new(p.grammarFileName, p.tokenNames, p.ruleNames,
101
+ p.getATNWithBypassAlts(),tokens)
102
+ tree = nil
103
+ begin
104
+ parserInterp.setErrorHandler(BailErrorStrategy())
105
+ tree = parserInterp.parse(patternRuleIndex)
106
+ rescue ParseCancellationException => e
107
+ raise e.cause
108
+ rescue RecognitionException => e
109
+ raise e
110
+ rescue Exception => e
111
+ raise CannotInvokeStartRule.new(e)
112
+ end
113
+
114
+ # Make sure tree pattern compilation checks for a complete parse
115
+ if tokens.LA(1)!=Token::EOF then
116
+ raise StartRuleDoesNotConsumeFullPattern.new()
117
+ end
118
+
119
+ return ParseTreePattern.new(self, pattern, patternRuleIndex, tree)
120
+ end
121
+ #
122
+ # Recursively walk {@code tree} against {@code patternTree}, filling
123
+ # {@code match.}{@link ParseTreeMatch#labels labels}.
124
+ #
125
+ # @return the first node encountered in {@code tree} which does not match
126
+ # a corresponding node in {@code patternTree}, or {@code null} if the match
127
+ # was successful. The specific node returned depends on the matching
128
+ # algorithm used by the implementation, and may be overridden.
129
+ #
130
+ def matchImpl(tree, patternTree, labels)
131
+ raise Exception.new("tree cannot be null") if tree.nil?
132
+ raise Exception.new("patternTree cannot be null") if patternTree.nil?
133
+
134
+ # x and <ID>, x and y, or x and x; or could be mismatched types
135
+ if tree.kind_of? TerminalNode and patternTree.kind_of? TerminalNode then
136
+ mismatchedNode = nil
137
+ # both are tokens and they have same type
138
+ if tree.symbol.type == patternTree.symbol.type then
139
+ if patternTree.symbol.kind_of? TokenTagToken then # x and <ID>
140
+ tokenTagToken = patternTree.symbol
141
+ # track label->list-of-nodes for both token name and label (if any)
142
+ self.map(labels, tokenTagToken.tokenName, tree)
143
+ if not tokenTagToken.label.nil?
144
+ self.map(labels, tokenTagToken.label, tree)
145
+ end
146
+ elsif tree.getText()==patternTree.getText() then
147
+ # x and x
148
+ nil
149
+ else
150
+ # x and y
151
+ mismatchedNode = tree if mismatchedNode.nil?
152
+ end
153
+ else
154
+ mismatchedNode = tree if mismatchedNode.nil?
155
+ end
156
+
157
+ return mismatchedNode
158
+ end
159
+
160
+ if tree.kind_of? ParserRuleContext and patternTree.kind_of? ParserRuleContext then
161
+ mismatchedNode = nil
162
+ # (expr ...) and <expr>
163
+ ruleTagToken = self.getRuleTagToken(patternTree)
164
+ if not ruleTagToken.nil? then
165
+ m = nil
166
+ if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex then
167
+ # track label->list-of-nodes for both rule name and label (if any)
168
+ self.map(labels, ruleTagToken.ruleName, tree)
169
+ if not ruleTagToken.label.nil? then
170
+ self.map(labels, ruleTagToken.label, tree)
171
+ end
172
+ else
173
+ mismatchedNode = tree if mismatchedNode.nil?
174
+ end
175
+ return mismatchedNode
176
+ end
177
+
178
+ # (expr ...) and (expr ...)
179
+ if tree.getChildCount()!=patternTree.getChildCount() then
180
+ mismatchedNode = tree if mismatchedNode.nil?
181
+ return mismatchedNode
182
+ end
183
+
184
+ n = tree.getChildCount()
185
+ for i in 0..n-1 do
186
+ childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
187
+ return childMatch if not childMatch.nil?
188
+ end
189
+ return mismatchedNode
190
+ end
191
+ # if nodes aren't both tokens or both rule nodes, can't match
192
+ return tree
193
+ end
194
+ def map(labels, label, tree)
195
+ v = labels.get(label, nil)
196
+ if v.nil?
197
+ v = Array.new
198
+ end
199
+ v.push(tree)
200
+ labels[label] = v
201
+ end
202
+ # Is {@code t} {@code (expr <expr>)} subtree?#
203
+ def getRuleTagToken(tree)
204
+ if tree.kind_of? RuleNode then
205
+ if tree.getChildCount()==1 and tree.getChild(0).kind_of? TerminalNode then
206
+ c = tree.getChild(0)
207
+ return c.symbol if c.symbol.kind_of? RuleTagToken
208
+ end
209
+ end
210
+ return nil
211
+ end
212
+ def tokenize(pattern)
213
+ # split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
214
+ chunks = self.split(pattern)
215
+
216
+ # create token stream from text and tags
217
+ tokens = Array.new
218
+ for chunk in chunks do
219
+ if chunk.kind_of? TagChunk then
220
+ # add special rule token or conjure up new token from name
221
+ if chunk.tag[0].isupper() then
222
+ ttype = self.parser.getTokenType(chunk.tag)
223
+ if ttype==Token::INVALID_TYPE then
224
+ raise Exception.new("Unknown token #{chunk.tag} in pattern: #{pattern}")
225
+ end
226
+ tokens.push(TokenTagToken(chunk.tag, ttype, chunk.label))
227
+ elsif chunk.tag[0].islower() then
228
+ ruleIndex = self.parser.getRuleIndex(chunk.tag)
229
+ if ruleIndex==-1 then
230
+ raise Exception.new("Unknown rule #{chunk.tag} in pattern: #{pattern}")
231
+ end
232
+ ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
233
+ tokens.push(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
234
+ else
235
+ raise Exception.new("Invalid tag #{chunk.tag} in pattern: #{pattern}")
236
+ end
237
+ else
238
+ self.lexer.setInputStream(InputStream.new(chunk.text))
239
+ t = self.lexer.nextToken()
240
+ while t.type!=Token::EOF do
241
+ tokens.push(t)
242
+ t = self.lexer.nextToken()
243
+ end
244
+ end
245
+ end
246
+ return tokens
247
+ end
248
+ # Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
249
+ def split(pattern)
250
+ p = 0
251
+ n = pattern.length
252
+ chunks = list()
253
+ # find all start and stop indexes first, then collect
254
+ starts = Array.new
255
+ stops = Array.new
256
+ while p < n do
257
+ if p == pattern.find(self.escape + self.start, p) then
258
+ p = p + self.escape.length + self.start.length
259
+ elsif p == pattern.find(self.escape + self.stop, p) then
260
+ p = p + self.escape.length + self.stop.length
261
+ elsif p == pattern.find(self.start, p) then
262
+ starts.push(p)
263
+ p = p + self.start.length
264
+ elsif p == pattern.find(self.stop, p) then
265
+ stops.push(p)
266
+ p = p + self.stop.length
267
+ else
268
+ p = p + 1
269
+ end
270
+ end
271
+ nt = starts.length
272
+
273
+ if nt > stops.length
274
+ raise Exception.new("unterminated tag in pattern: #{pattern}")
275
+ end
276
+ if nt < stops.length
277
+ raise Exception.new("missing start tag in pattern: #{pattern}")
278
+ end
279
+
280
+ for i in 0..(nt-1) do
281
+ if starts[i] >= stops[i] then
282
+ raise Exception.new("tag delimiters out of order in pattern: " + pattern)
283
+ end
284
+ end
285
+
286
+ # collect into chunks now
287
+ chunks.push(TextChunk.new(pattern)) if nt==0
288
+
289
+ if nt>0 and starts[0]>0 then # copy text up to first tag into chunks
290
+ text = pattern[0..starts[0]-1]
291
+ chunks.add(TextChunk.new(text))
292
+ end
293
+
294
+ for i in 0..(nt-1) do
295
+ # copy inside of <tag>
296
+ tag = pattern[(starts[i] + self.start.length)..stops[i]-1]
297
+ ruleOrToken = tag
298
+ label = nil
299
+ colon = tag.find(':')
300
+ if colon >= 0 then
301
+ label = tag[0..colon-1]
302
+ ruleOrToken = tag[colon+1..tag.length-1]
303
+ end
304
+ chunks.push(TagChunk.new(label, ruleOrToken))
305
+ if i+1 < (starts.length) then
306
+ # copy from end of <tag> to start of next
307
+ text = pattern[(stops[i] + self.stop.length())..starts[i+1]-1]
308
+ chunks.push(TextChunk.new(text))
309
+ end
310
+ end
311
+
312
+ if nt > 0 then
313
+ afterLastTag = stops[nt - 1] + self.stop.length
314
+ if afterLastTag < n then # copy text from end of last tag to end
315
+ text = pattern[afterLastTag .. n -1]
316
+ chunks.push(TextChunk.new(text))
317
+ end
318
+ end
319
+
320
+ # strip out the escape sequences from text chunks but not tags
321
+ return chunks.map do |c|
322
+ if c.kind_of? TextChunk then
323
+ unescaped = c.text.replace(self.escape, "")
324
+ if unescaped.length < c.text.length then
325
+ TextChunk.new(unescaped)
326
+ else
327
+ c
328
+ end
329
+ else
330
+ c
331
+ end
332
+ end
333
+ end
334
+ end