antlr4 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +27 -0
  3. data/README.md +46 -0
  4. data/lib/antlr4.rb +262 -0
  5. data/lib/antlr4/BufferedTokenStream.rb +306 -0
  6. data/lib/antlr4/CommonTokenFactory.rb +53 -0
  7. data/lib/antlr4/CommonTokenStream.rb +56 -0
  8. data/lib/antlr4/FileStream.rb +14 -0
  9. data/lib/antlr4/InputStream.rb +82 -0
  10. data/lib/antlr4/IntervalSet.rb +341 -0
  11. data/lib/antlr4/LL1Analyzer.rb +177 -0
  12. data/lib/antlr4/Lexer.rb +335 -0
  13. data/lib/antlr4/ListTokenSource.rb +140 -0
  14. data/lib/antlr4/Parser.rb +562 -0
  15. data/lib/antlr4/ParserInterpreter.rb +149 -0
  16. data/lib/antlr4/ParserRuleContext.rb +162 -0
  17. data/lib/antlr4/PredictionContext.rb +690 -0
  18. data/lib/antlr4/Recognizer.rb +162 -0
  19. data/lib/antlr4/RuleContext.rb +226 -0
  20. data/lib/antlr4/Token.rb +124 -0
  21. data/lib/antlr4/TokenFactory.rb +3 -0
  22. data/lib/antlr4/TokenSource.rb +4 -0
  23. data/lib/antlr4/TokenStream.rb +3 -0
  24. data/lib/antlr4/TraceListener.rb +23 -0
  25. data/lib/antlr4/atn/ATN.rb +133 -0
  26. data/lib/antlr4/atn/ATNConfig.rb +146 -0
  27. data/lib/antlr4/atn/ATNConfigSet.rb +215 -0
  28. data/lib/antlr4/atn/ATNDeserializationOptions.rb +62 -0
  29. data/lib/antlr4/atn/ATNDeserializer.rb +604 -0
  30. data/lib/antlr4/atn/ATNSimulator.rb +43 -0
  31. data/lib/antlr4/atn/ATNState.rb +253 -0
  32. data/lib/antlr4/atn/ATNType.rb +22 -0
  33. data/lib/antlr4/atn/LexerATNSimulator.rb +612 -0
  34. data/lib/antlr4/atn/LexerAction.rb +311 -0
  35. data/lib/antlr4/atn/LexerActionExecutor.rb +134 -0
  36. data/lib/antlr4/atn/ParserATNSimulator.rb +1622 -0
  37. data/lib/antlr4/atn/PredictionMode.rb +525 -0
  38. data/lib/antlr4/atn/SemanticContext.rb +355 -0
  39. data/lib/antlr4/atn/Transition.rb +297 -0
  40. data/lib/antlr4/base.rb +60 -0
  41. data/lib/antlr4/dfa/DFA.rb +128 -0
  42. data/lib/antlr4/dfa/DFASerializer.rb +77 -0
  43. data/lib/antlr4/dfa/DFAState.rb +133 -0
  44. data/lib/antlr4/error.rb +151 -0
  45. data/lib/antlr4/error/DiagnosticErrorListener.rb +136 -0
  46. data/lib/antlr4/error/ErrorListener.rb +109 -0
  47. data/lib/antlr4/error/ErrorStrategy.rb +742 -0
  48. data/lib/antlr4/tree/Chunk.rb +31 -0
  49. data/lib/antlr4/tree/ParseTreeMatch.rb +105 -0
  50. data/lib/antlr4/tree/ParseTreePattern.rb +70 -0
  51. data/lib/antlr4/tree/ParseTreePatternMatcher.rb +334 -0
  52. data/lib/antlr4/tree/RuleTagToken.rb +39 -0
  53. data/lib/antlr4/tree/TokenTagToken.rb +38 -0
  54. data/lib/antlr4/tree/Tree.rb +204 -0
  55. data/lib/antlr4/tree/Trees.rb +111 -0
  56. data/lib/antlr4/version.rb +5 -0
  57. data/lib/antlr4/xpath/XPath.rb +354 -0
  58. data/lib/double_key_map.rb +78 -0
  59. data/lib/java_symbols.rb +24 -0
  60. data/lib/uuid.rb +87 -0
  61. data/test/test_intervalset.rb +664 -0
  62. data/test/test_tree.rb +140 -0
  63. data/test/test_uuid.rb +122 -0
  64. metadata +109 -0
@@ -0,0 +1,31 @@
1
+ class Chunk
2
+ end
3
+
4
+ class TagChunk < Chunk
5
+
6
+ attr_accessor :tag, :label
7
+ def initialize(tag, label=nil)
8
+ self.tag = tag
9
+ self.label = label
10
+ end
11
+
12
+ def to_s
13
+ if self.label.nil?
14
+ self.tag
15
+ else
16
+ "#{self.label}:#{self.tag}"
17
+ end
18
+ end
19
+ end
20
+ class TextChunk < Chunk
21
+
22
+ attr_accessor :text
23
+ def initialize(text)
24
+ self.text = text
25
+ end
26
+
27
+ def to_s
28
+ "'#{ self.text }'"
29
+ end
30
+
31
+ end
@@ -0,0 +1,105 @@
1
+ # Represents the result of matching a {@link ParseTree} against a tree pattern.
2
+
3
+ class ParseTreeMatch
4
+ # Constructs a new instance of {@link ParseTreeMatch} from the specified
5
+ # parse tree and pattern.
6
+ #
7
+ # @param tree The parse tree to match against the pattern.
8
+ # @param pattern The parse tree pattern.
9
+ # @param labels A mapping from label names to collections of
10
+ # {@link ParseTree} objects located by the tree pattern matching process.
11
+ # @param mismatchedNode The first node which failed to match the tree
12
+ # pattern during the matching process.
13
+ #
14
+ # @exception IllegalArgumentException if {@code tree} is {@code null}
15
+ # @exception IllegalArgumentException if {@code pattern} is {@code null}
16
+ # @exception IllegalArgumentException if {@code labels} is {@code null}
17
+ #
18
+ attr_accessor :tree, :pattern, :labels, :mismatchedNode
19
+ def initialize(tree, pattern, labels, mismatchedNode)
20
+ raise Exception.new("tree cannot be null") if tree.nil?
21
+ raise Exception.new("pattern cannot be null") if pattern.nil?
22
+ raise Exception.new("labels cannot be null") if labels.nil?
23
+ self.tree = tree
24
+ self.pattern = pattern
25
+ self.labels = labels
26
+ self.mismatchedNode = mismatchedNode
27
+ end
28
+ #
29
+ # Get the last node associated with a specific {@code label}.
30
+ #
31
+ # <p>For example, for pattern {@code <id:ID>}, {@code get("id")} returns the
32
+ # node matched for that {@code ID}. If more than one node
33
+ # matched the specified label, only the last is returned. If there is
34
+ # no node associated with the label, this returns {@code null}.</p>
35
+ #
36
+ # <p>Pattern tags like {@code <ID>} and {@code <expr>} without labels are
37
+ # considered to be labeled with {@code ID} and {@code expr}, respectively.</p>
38
+ #
39
+ # @param label The label to check.
40
+ #
41
+ # @return The last {@link ParseTree} to match a tag with the specified
42
+ # label, or {@code null} if no parse tree matched a tag with the label.
43
+ #
44
+ def get(label)
45
+ parseTrees = self.labels.get(label, nil)
46
+ if parseTrees.nil? or parseTrees.empty? then
47
+ return nil
48
+ else
49
+ return parseTrees[-1]
50
+ end
51
+ end
52
+ #
53
+ # Return all nodes matching a rule or token tag with the specified label.
54
+ #
55
+ # <p>If the {@code label} is the name of a parser rule or token in the
56
+ # grammar, the resulting list will contain both the parse trees matching
57
+ # rule or tags explicitly labeled with the label and the complete set of
58
+ # parse trees matching the labeled and unlabeled tags in the pattern for
59
+ # the parser rule or token. For example, if {@code label} is {@code "foo"},
60
+ # the result will contain <em>all</em> of the following.</p>
61
+ #
62
+ # <ul>
63
+ # <li>Parse tree nodes matching tags of the form {@code <foo:anyRuleName>} and
64
+ # {@code <foo:AnyTokenName>}.</li>
65
+ # <li>Parse tree nodes matching tags of the form {@code <anyLabel:foo>}.</li>
66
+ # <li>Parse tree nodes matching tags of the form {@code <foo>}.</li>
67
+ # </ul>
68
+ #
69
+ # @param label The label.
70
+ #
71
+ # @return A collection of all {@link ParseTree} nodes matching tags with
72
+ # the specified {@code label}. If no nodes matched the label, an empty list
73
+ # is returned.
74
+ #
75
+ def getAll(label)
76
+ self.labels.get(label, Array.new)
77
+ end
78
+
79
+ #
80
+ # Gets a value indicating whether the match operation succeeded.
81
+ #
82
+ # @return {@code true} if the match operation succeeded; otherwise,
83
+ # {@code false}.
84
+ #
85
+ def succeeded
86
+ return self.mismatchedNode.nil?
87
+ end
88
+ #
89
+ # {@inheritDoc}
90
+ #
91
+ def to_s
92
+ StringIO.open do |buf|
93
+ buf.write("Match ")
94
+ if self.succeeded()
95
+ buf.write("succeeded")
96
+ else
97
+ buf.write("failed")
98
+ end
99
+ buf.write("; found ")
100
+ buf.write(self.labels.length.to_s)
101
+ buf.write(" labels")
102
+ return buf.string
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,70 @@
1
+ # A pattern like {@code <ID> = <expr>;} converted to a {@link ParseTree} by
2
+ # {@link ParseTreePatternMatcher#compile(String, int)}.
3
+ #
4
+ #from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher
5
+ #from antlr4.tree.Tree import ParseTree
6
+ #from antlr4.xpath.XPath import XPath
7
+
8
+
9
+ class ParseTreePattern
10
+
11
+ # Construct a new instance of the {@link ParseTreePattern} class.
12
+ #
13
+ # @param matcher The {@link ParseTreePatternMatcher} which created this
14
+ # tree pattern.
15
+ # @param pattern The tree pattern in concrete syntax form.
16
+ # @param patternRuleIndex The parser rule which serves as the root of the
17
+ # tree pattern.
18
+ # @param patternTree The tree pattern in {@link ParseTree} form.
19
+ #
20
+ attr_accessor :matcher, :patternRuleIndex, :pattern, :patternTree
21
+ def initialize(matcher, pattern, patternRuleIndex, patternTree)
22
+ self.matcher = matcher
23
+ self.patternRuleIndex = patternRuleIndex
24
+ self.pattern = pattern
25
+ self.patternTree = patternTree
26
+ end
27
+
28
+ #
29
+ # Match a specific parse tree against this tree pattern.
30
+ #
31
+ # @param tree The parse tree to match against this tree pattern.
32
+ # @return A {@link ParseTreeMatch} object describing the result of the
33
+ # match operation. The {@link ParseTreeMatch#succeeded()} method can be
34
+ # used to determine whether or not the match was successful.
35
+ #
36
+ def match(tree)
37
+ return self.matcher.match(tree, self)
38
+ end
39
+
40
+ #
41
+ # Determine whether or not a parse tree matches this tree pattern.
42
+ #
43
+ # @param tree The parse tree to match against this tree pattern.
44
+ # @return {@code true} if {@code tree} is a match for the current tree
45
+ # pattern; otherwise, {@code false}.
46
+ #
47
+ def matches(tree)
48
+ return self.matcher.match(tree, self).succeeded()
49
+ end
50
+
51
+ # Find all nodes using XPath and then try to match those subtrees against
52
+ # this tree pattern.
53
+ #
54
+ # @param tree The {@link ParseTree} to match against this pattern.
55
+ # @param xpath An expression matching the nodes
56
+ #
57
+ # @return A collection of {@link ParseTreeMatch} objects describing the
58
+ # successful matches. Unsuccessful matches are omitted from the result,
59
+ # regardless of the reason for the failure.
60
+ #
61
+ def findAll(tree, xpath)
62
+ subtrees = XPath.findAll(tree, xpath, self.matcher.parser)
63
+ subtrees.map do |t|
64
+ match = self.match(t)
65
+ if match.succeeded() then
66
+ match
67
+ end
68
+ end.compact
69
+ end
70
+ end
@@ -0,0 +1,334 @@
1
+ #
2
+ #from antlr4 import Lexer, CommonTokenStream, ParserRuleContext
3
+ #from antlr4.InputStream import InputStream
4
+ #from antlr4.ListTokenSource import ListTokenSource
5
+ #from antlr4.Token import Token
6
+ #from antlr4.error.ErrorStrategy import BailErrorStrategy
7
+ #from antlr4.error.Errors import RecognitionException, ParseCancellationException
8
+ #from antlr4.tree.Chunk import TagChunk, TextChunk
9
+ #from antlr4.tree.RuleTagToken import RuleTagToken
10
+ ##from antlr4.tree.TokenTagToken import TokenTagToken
11
+ #from antlr4.tree.Tree import ParseTree, TerminalNode, RuleNode
12
+ #from antlr4.tree.ParseTreeMatch import ParseTreeMatch
13
+ # from antlr4.tree.ParseTreePattern import ParseTreePattern
14
+
15
+ class CannotInvokeStartRule < Exception
16
+ end
17
+
18
+ class StartRuleDoesNotConsumeFullPattern < Exception
19
+ end
20
+
21
+ class ParseTreePatternMatcher
22
+ # Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
23
+ # {@link Parser} object. The lexer input stream is altered for tokenizing
24
+ # the tree patterns. The parser is used as a convenient mechanism to get
25
+ # the grammar name, plus token, rule names.
26
+ attr_accessor :lexer, :parser, :start, :stop, :escape
27
+ def initialize(lexer, parser)
28
+ self.lexer = lexer
29
+ self.parser = parser
30
+ self.start = "<"
31
+ self.stop = ">"
32
+ self.escape = "\\" # e.g., \< and \> must escape BOTH!
33
+ end
34
+
35
+ # Set the delimiters used for marking rule and token tags within concrete
36
+ # syntax used by the tree pattern parser.
37
+ #
38
+ # @param start The start delimiter.
39
+ # @param stop The stop delimiter.
40
+ # @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
41
+ #
42
+ # @exception IllegalArgumentException if {@code start} is {@code null} or empty.
43
+ # @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
44
+ #
45
+ def setDelimiters(start, stop, escapeLeft)
46
+ raise Exception.new("start cannot be null or empty") if start.nil? or start.empty?
47
+ raise Exception.new("stop cannot be null or empty") if stop.nil? or stop.empty?
48
+ self.start = start
49
+ self.stop = stop
50
+ self.escape = escapeLeft
51
+ end
52
+
53
+ # Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
54
+ def matchesRuleIndex(tree, pattern, patternRuleIndex)
55
+ p = self.compileTreePattern(pattern, patternRuleIndex)
56
+ return self.matches(tree, p)
57
+ end
58
+
59
+ # Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
60
+ # compiled pattern instead of a string representation of a tree pattern.
61
+ #
62
+ def matchesPattern(tree, pattern)
63
+ mismatchedNode = self.matchImpl(tree, pattern.patternTree, Hash.new)
64
+ return mismatchedNode.nil?
65
+ end
66
+
67
+ #
68
+ # Compare {@code pattern} matched as rule {@code patternRuleIndex} against
69
+ # {@code tree} and return a {@link ParseTreeMatch} object that contains the
70
+ # matched elements, or the node at which the match failed.
71
+ #
72
+ def matchRuleIndex(tree, pattern, patternRuleIndex)
73
+ p = self.compileTreePattern(pattern, patternRuleIndex)
74
+ return self.matchPattern(tree, p)
75
+ end
76
+
77
+ #
78
+ # Compare {@code pattern} matched against {@code tree} and return a
79
+ # {@link ParseTreeMatch} object that contains the matched elements, or the
80
+ # node at which the match failed. Pass in a compiled pattern instead of a
81
+ # string representation of a tree pattern.
82
+ #
83
+ def matchPattern(tree, pattern)
84
+ labels = Hash.new
85
+ mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
86
+ return ParseTreeMatch.new(tree, pattern, labels, mismatchedNode)
87
+ end
88
+
89
+ #
90
+ # For repeated use of a tree pattern, compile it to a
91
+ # {@link ParseTreePattern} using this method.
92
+ #
93
+ def compileTreePattern(pattern, patternRuleIndex)
94
+ tokenList = self.tokenize(pattern)
95
+ tokenSrc = ListTokenSource.new(tokenList)
96
+ tokens = CommonTokenStream.new(tokenSrc)
97
+ # from antlr4.ParserInterpreter import ParserInterpreter
98
+
99
+ p = self.parser
100
+ parserInterp = ParserInterpreter.new(p.grammarFileName, p.tokenNames, p.ruleNames,
101
+ p.getATNWithBypassAlts(),tokens)
102
+ tree = nil
103
+ begin
104
+ parserInterp.setErrorHandler(BailErrorStrategy())
105
+ tree = parserInterp.parse(patternRuleIndex)
106
+ rescue ParseCancellationException => e
107
+ raise e.cause
108
+ rescue RecognitionException => e
109
+ raise e
110
+ rescue Exception => e
111
+ raise CannotInvokeStartRule.new(e)
112
+ end
113
+
114
+ # Make sure tree pattern compilation checks for a complete parse
115
+ if tokens.LA(1)!=Token::EOF then
116
+ raise StartRuleDoesNotConsumeFullPattern.new()
117
+ end
118
+
119
+ return ParseTreePattern.new(self, pattern, patternRuleIndex, tree)
120
+ end
121
+ #
122
+ # Recursively walk {@code tree} against {@code patternTree}, filling
123
+ # {@code match.}{@link ParseTreeMatch#labels labels}.
124
+ #
125
+ # @return the first node encountered in {@code tree} which does not match
126
+ # a corresponding node in {@code patternTree}, or {@code null} if the match
127
+ # was successful. The specific node returned depends on the matching
128
+ # algorithm used by the implementation, and may be overridden.
129
+ #
130
+ def matchImpl(tree, patternTree, labels)
131
+ raise Exception.new("tree cannot be null") if tree.nil?
132
+ raise Exception.new("patternTree cannot be null") if patternTree.nil?
133
+
134
+ # x and <ID>, x and y, or x and x; or could be mismatched types
135
+ if tree.kind_of? TerminalNode and patternTree.kind_of? TerminalNode then
136
+ mismatchedNode = nil
137
+ # both are tokens and they have same type
138
+ if tree.symbol.type == patternTree.symbol.type then
139
+ if patternTree.symbol.kind_of? TokenTagToken then # x and <ID>
140
+ tokenTagToken = patternTree.symbol
141
+ # track label->list-of-nodes for both token name and label (if any)
142
+ self.map(labels, tokenTagToken.tokenName, tree)
143
+ if not tokenTagToken.label.nil?
144
+ self.map(labels, tokenTagToken.label, tree)
145
+ end
146
+ elsif tree.getText()==patternTree.getText() then
147
+ # x and x
148
+ nil
149
+ else
150
+ # x and y
151
+ mismatchedNode = tree if mismatchedNode.nil?
152
+ end
153
+ else
154
+ mismatchedNode = tree if mismatchedNode.nil?
155
+ end
156
+
157
+ return mismatchedNode
158
+ end
159
+
160
+ if tree.kind_of? ParserRuleContext and patternTree.kind_of? ParserRuleContext then
161
+ mismatchedNode = nil
162
+ # (expr ...) and <expr>
163
+ ruleTagToken = self.getRuleTagToken(patternTree)
164
+ if not ruleTagToken.nil? then
165
+ m = nil
166
+ if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex then
167
+ # track label->list-of-nodes for both rule name and label (if any)
168
+ self.map(labels, ruleTagToken.ruleName, tree)
169
+ if not ruleTagToken.label.nil? then
170
+ self.map(labels, ruleTagToken.label, tree)
171
+ end
172
+ else
173
+ mismatchedNode = tree if mismatchedNode.nil?
174
+ end
175
+ return mismatchedNode
176
+ end
177
+
178
+ # (expr ...) and (expr ...)
179
+ if tree.getChildCount()!=patternTree.getChildCount() then
180
+ mismatchedNode = tree if mismatchedNode.nil?
181
+ return mismatchedNode
182
+ end
183
+
184
+ n = tree.getChildCount()
185
+ for i in 0..n-1 do
186
+ childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
187
+ return childMatch if not childMatch.nil?
188
+ end
189
+ return mismatchedNode
190
+ end
191
+ # if nodes aren't both tokens or both rule nodes, can't match
192
+ return tree
193
+ end
194
+ def map(labels, label, tree)
195
+ v = labels.get(label, nil)
196
+ if v.nil?
197
+ v = Array.new
198
+ end
199
+ v.push(tree)
200
+ labels[label] = v
201
+ end
202
+ # Is {@code t} {@code (expr <expr>)} subtree?#
203
+ def getRuleTagToken(tree)
204
+ if tree.kind_of? RuleNode then
205
+ if tree.getChildCount()==1 and tree.getChild(0).kind_of? TerminalNode then
206
+ c = tree.getChild(0)
207
+ return c.symbol if c.symbol.kind_of? RuleTagToken
208
+ end
209
+ end
210
+ return nil
211
+ end
212
+ def tokenize(pattern)
213
+ # split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
214
+ chunks = self.split(pattern)
215
+
216
+ # create token stream from text and tags
217
+ tokens = Array.new
218
+ for chunk in chunks do
219
+ if chunk.kind_of? TagChunk then
220
+ # add special rule token or conjure up new token from name
221
+ if chunk.tag[0].isupper() then
222
+ ttype = self.parser.getTokenType(chunk.tag)
223
+ if ttype==Token::INVALID_TYPE then
224
+ raise Exception.new("Unknown token #{chunk.tag} in pattern: #{pattern}")
225
+ end
226
+ tokens.push(TokenTagToken(chunk.tag, ttype, chunk.label))
227
+ elsif chunk.tag[0].islower() then
228
+ ruleIndex = self.parser.getRuleIndex(chunk.tag)
229
+ if ruleIndex==-1 then
230
+ raise Exception.new("Unknown rule #{chunk.tag} in pattern: #{pattern}")
231
+ end
232
+ ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
233
+ tokens.push(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
234
+ else
235
+ raise Exception.new("Invalid tag #{chunk.tag} in pattern: #{pattern}")
236
+ end
237
+ else
238
+ self.lexer.setInputStream(InputStream.new(chunk.text))
239
+ t = self.lexer.nextToken()
240
+ while t.type!=Token::EOF do
241
+ tokens.push(t)
242
+ t = self.lexer.nextToken()
243
+ end
244
+ end
245
+ end
246
+ return tokens
247
+ end
248
+ # Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
249
+ def split(pattern)
250
+ p = 0
251
+ n = pattern.length
252
+ chunks = list()
253
+ # find all start and stop indexes first, then collect
254
+ starts = Array.new
255
+ stops = Array.new
256
+ while p < n do
257
+ if p == pattern.find(self.escape + self.start, p) then
258
+ p = p + self.escape.length + self.start.length
259
+ elsif p == pattern.find(self.escape + self.stop, p) then
260
+ p = p + self.escape.length + self.stop.length
261
+ elsif p == pattern.find(self.start, p) then
262
+ starts.push(p)
263
+ p = p + self.start.length
264
+ elsif p == pattern.find(self.stop, p) then
265
+ stops.push(p)
266
+ p = p + self.stop.length
267
+ else
268
+ p = p + 1
269
+ end
270
+ end
271
+ nt = starts.length
272
+
273
+ if nt > stops.length
274
+ raise Exception.new("unterminated tag in pattern: #{pattern}")
275
+ end
276
+ if nt < stops.length
277
+ raise Exception.new("missing start tag in pattern: #{pattern}")
278
+ end
279
+
280
+ for i in 0..(nt-1) do
281
+ if starts[i] >= stops[i] then
282
+ raise Exception.new("tag delimiters out of order in pattern: " + pattern)
283
+ end
284
+ end
285
+
286
+ # collect into chunks now
287
+ chunks.push(TextChunk.new(pattern)) if nt==0
288
+
289
+ if nt>0 and starts[0]>0 then # copy text up to first tag into chunks
290
+ text = pattern[0..starts[0]-1]
291
+ chunks.add(TextChunk.new(text))
292
+ end
293
+
294
+ for i in 0..(nt-1) do
295
+ # copy inside of <tag>
296
+ tag = pattern[(starts[i] + self.start.length)..stops[i]-1]
297
+ ruleOrToken = tag
298
+ label = nil
299
+ colon = tag.find(':')
300
+ if colon >= 0 then
301
+ label = tag[0..colon-1]
302
+ ruleOrToken = tag[colon+1..tag.length-1]
303
+ end
304
+ chunks.push(TagChunk.new(label, ruleOrToken))
305
+ if i+1 < (starts.length) then
306
+ # copy from end of <tag> to start of next
307
+ text = pattern[(stops[i] + self.stop.length())..starts[i+1]-1]
308
+ chunks.push(TextChunk.new(text))
309
+ end
310
+ end
311
+
312
+ if nt > 0 then
313
+ afterLastTag = stops[nt - 1] + self.stop.length
314
+ if afterLastTag < n then # copy text from end of last tag to end
315
+ text = pattern[afterLastTag .. n -1]
316
+ chunks.push(TextChunk.new(text))
317
+ end
318
+ end
319
+
320
+ # strip out the escape sequences from text chunks but not tags
321
+ return chunks.map do |c|
322
+ if c.kind_of? TextChunk then
323
+ unescaped = c.text.replace(self.escape, "")
324
+ if unescaped.length < c.text.length then
325
+ TextChunk.new(unescaped)
326
+ else
327
+ c
328
+ end
329
+ else
330
+ c
331
+ end
332
+ end
333
+ end
334
+ end