omlish 0.0.0.dev57__py3-none-any.whl → 0.0.0.dev58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omlish/__about__.py +2 -2
- omlish/antlr/__init__.py +0 -0
- omlish/antlr/_runtime/BufferedTokenStream.py +305 -0
- omlish/antlr/_runtime/CommonTokenFactory.py +64 -0
- omlish/antlr/_runtime/CommonTokenStream.py +90 -0
- omlish/antlr/_runtime/FileStream.py +30 -0
- omlish/antlr/_runtime/InputStream.py +90 -0
- omlish/antlr/_runtime/IntervalSet.py +183 -0
- omlish/antlr/_runtime/LL1Analyzer.py +176 -0
- omlish/antlr/_runtime/Lexer.py +332 -0
- omlish/antlr/_runtime/ListTokenSource.py +147 -0
- omlish/antlr/_runtime/Parser.py +583 -0
- omlish/antlr/_runtime/ParserInterpreter.py +173 -0
- omlish/antlr/_runtime/ParserRuleContext.py +189 -0
- omlish/antlr/_runtime/PredictionContext.py +632 -0
- omlish/antlr/_runtime/Recognizer.py +150 -0
- omlish/antlr/_runtime/RuleContext.py +230 -0
- omlish/antlr/_runtime/StdinStream.py +14 -0
- omlish/antlr/_runtime/Token.py +158 -0
- omlish/antlr/_runtime/TokenStreamRewriter.py +258 -0
- omlish/antlr/_runtime/Utils.py +36 -0
- omlish/antlr/_runtime/__init__.py +24 -0
- omlish/antlr/_runtime/_pygrun.py +174 -0
- omlish/antlr/_runtime/atn/ATN.py +135 -0
- omlish/antlr/_runtime/atn/ATNConfig.py +162 -0
- omlish/antlr/_runtime/atn/ATNConfigSet.py +215 -0
- omlish/antlr/_runtime/atn/ATNDeserializationOptions.py +27 -0
- omlish/antlr/_runtime/atn/ATNDeserializer.py +449 -0
- omlish/antlr/_runtime/atn/ATNSimulator.py +50 -0
- omlish/antlr/_runtime/atn/ATNState.py +267 -0
- omlish/antlr/_runtime/atn/ATNType.py +20 -0
- omlish/antlr/_runtime/atn/LexerATNSimulator.py +573 -0
- omlish/antlr/_runtime/atn/LexerAction.py +301 -0
- omlish/antlr/_runtime/atn/LexerActionExecutor.py +146 -0
- omlish/antlr/_runtime/atn/ParserATNSimulator.py +1664 -0
- omlish/antlr/_runtime/atn/PredictionMode.py +502 -0
- omlish/antlr/_runtime/atn/SemanticContext.py +333 -0
- omlish/antlr/_runtime/atn/Transition.py +271 -0
- omlish/antlr/_runtime/atn/__init__.py +4 -0
- omlish/antlr/_runtime/dfa/DFA.py +136 -0
- omlish/antlr/_runtime/dfa/DFASerializer.py +76 -0
- omlish/antlr/_runtime/dfa/DFAState.py +129 -0
- omlish/antlr/_runtime/dfa/__init__.py +4 -0
- omlish/antlr/_runtime/error/DiagnosticErrorListener.py +110 -0
- omlish/antlr/_runtime/error/ErrorListener.py +75 -0
- omlish/antlr/_runtime/error/ErrorStrategy.py +712 -0
- omlish/antlr/_runtime/error/Errors.py +176 -0
- omlish/antlr/_runtime/error/__init__.py +4 -0
- omlish/antlr/_runtime/tree/Chunk.py +33 -0
- omlish/antlr/_runtime/tree/ParseTreeMatch.py +121 -0
- omlish/antlr/_runtime/tree/ParseTreePattern.py +75 -0
- omlish/antlr/_runtime/tree/ParseTreePatternMatcher.py +377 -0
- omlish/antlr/_runtime/tree/RuleTagToken.py +53 -0
- omlish/antlr/_runtime/tree/TokenTagToken.py +50 -0
- omlish/antlr/_runtime/tree/Tree.py +194 -0
- omlish/antlr/_runtime/tree/Trees.py +114 -0
- omlish/antlr/_runtime/tree/__init__.py +2 -0
- omlish/antlr/_runtime/xpath/XPath.py +272 -0
- omlish/antlr/_runtime/xpath/XPathLexer.py +98 -0
- omlish/antlr/_runtime/xpath/__init__.py +4 -0
- {omlish-0.0.0.dev57.dist-info → omlish-0.0.0.dev58.dist-info}/METADATA +1 -1
- {omlish-0.0.0.dev57.dist-info → omlish-0.0.0.dev58.dist-info}/RECORD +66 -7
- {omlish-0.0.0.dev57.dist-info → omlish-0.0.0.dev58.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev57.dist-info → omlish-0.0.0.dev58.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev57.dist-info → omlish-0.0.0.dev58.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev57.dist-info → omlish-0.0.0.dev58.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,377 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
6
|
+
# Use of this file is governed by the BSD 3-clause license that
|
7
|
+
# can be found in the LICENSE.txt file in the project root.
|
8
|
+
#
|
9
|
+
|
10
|
+
#
|
11
|
+
# A tree pattern matching mechanism for ANTLR {@link ParseTree}s.
|
12
|
+
#
|
13
|
+
# <p>Patterns are strings of source input text with special tags representing
|
14
|
+
# token or rule references such as:</p>
|
15
|
+
#
|
16
|
+
# <p>{@code <ID> = <expr>;}</p>
|
17
|
+
#
|
18
|
+
# <p>Given a pattern start rule such as {@code statement}, this object constructs
|
19
|
+
# a {@link ParseTree} with placeholders for the {@code ID} and {@code expr}
|
20
|
+
# subtree. Then the {@link #match} routines can compare an actual
|
21
|
+
# {@link ParseTree} from a parse with this pattern. Tag {@code <ID>} matches
|
22
|
+
# any {@code ID} token and tag {@code <expr>} references the result of the
|
23
|
+
# {@code expr} rule (generally an instance of {@code ExprContext}.</p>
|
24
|
+
#
|
25
|
+
# <p>Pattern {@code x = 0;} is a similar pattern that matches the same pattern
|
26
|
+
# except that it requires the identifier to be {@code x} and the expression to
|
27
|
+
# be {@code 0}.</p>
|
28
|
+
#
|
29
|
+
# <p>The {@link #matches} routines return {@code true} or {@code false} based
|
30
|
+
# upon a match for the tree rooted at the parameter sent in. The
|
31
|
+
# {@link #match} routines return a {@link ParseTreeMatch} object that
|
32
|
+
# contains the parse tree, the parse tree pattern, and a map from tag name to
|
33
|
+
# matched nodes (more below). A subtree that fails to match, returns with
|
34
|
+
# {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not
|
35
|
+
# match.</p>
|
36
|
+
#
|
37
|
+
# <p>For efficiency, you can compile a tree pattern in string form to a
|
38
|
+
# {@link ParseTreePattern} object.</p>
|
39
|
+
#
|
40
|
+
# <p>See {@code TestParseTreeMatcher} for lots of examples.
|
41
|
+
# {@link ParseTreePattern} has two static helper methods:
|
42
|
+
# {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that
|
43
|
+
# are easy to use but not super efficient because they create new
|
44
|
+
# {@link ParseTreePatternMatcher} objects each time and have to compile the
|
45
|
+
# pattern in string form before using it.</p>
|
46
|
+
#
|
47
|
+
# <p>The lexer and parser that you pass into the {@link ParseTreePatternMatcher}
|
48
|
+
# constructor are used to parse the pattern in string form. The lexer converts
|
49
|
+
# the {@code <ID> = <expr>;} into a sequence of four tokens (assuming lexer
|
50
|
+
# throws out whitespace or puts it on a hidden channel). Be aware that the
|
51
|
+
# input stream is reset for the lexer (but not the parser; a
|
52
|
+
# {@link ParserInterpreter} is created to parse the input.). Any user-defined
|
53
|
+
# fields you have put into the lexer might get changed when this mechanism asks
|
54
|
+
# it to scan the pattern string.</p>
|
55
|
+
#
|
56
|
+
# <p>Normally a parser does not accept token {@code <expr>} as a valid
|
57
|
+
# {@code expr} but, from the parser passed in, we create a special version of
|
58
|
+
# the underlying grammar representation (an {@link ATN}) that allows imaginary
|
59
|
+
# tokens representing rules ({@code <expr>}) to match entire rules. We call
|
60
|
+
# these <em>bypass alternatives</em>.</p>
|
61
|
+
#
|
62
|
+
# <p>Delimiters are {@code <} and {@code >}, with {@code \} as the escape string
|
63
|
+
# by default, but you can set them to whatever you want using
|
64
|
+
# {@link #setDelimiters}. You must escape both start and stop strings
|
65
|
+
# {@code \<} and {@code \>}.</p>
|
66
|
+
#
|
67
|
+
from ..CommonTokenStream import CommonTokenStream
|
68
|
+
from ..InputStream import InputStream
|
69
|
+
from ..ParserRuleContext import ParserRuleContext
|
70
|
+
from ..Lexer import Lexer
|
71
|
+
from ..ListTokenSource import ListTokenSource
|
72
|
+
from ..Token import Token
|
73
|
+
from ..error.ErrorStrategy import BailErrorStrategy
|
74
|
+
from ..error.Errors import RecognitionException, ParseCancellationException
|
75
|
+
from .Chunk import TagChunk, TextChunk
|
76
|
+
from .RuleTagToken import RuleTagToken
|
77
|
+
from .TokenTagToken import TokenTagToken
|
78
|
+
from .Tree import ParseTree, TerminalNode, RuleNode
|
79
|
+
|
80
|
+
# need forward declaration
|
81
|
+
Parser = None
|
82
|
+
ParseTreePattern = None
|
83
|
+
|
84
|
+
class CannotInvokeStartRule(Exception):
|
85
|
+
|
86
|
+
def __init__(self, e:Exception):
|
87
|
+
super().__init__(e)
|
88
|
+
|
89
|
+
class StartRuleDoesNotConsumeFullPattern(Exception):
|
90
|
+
|
91
|
+
pass
|
92
|
+
|
93
|
+
|
94
|
+
class ParseTreePatternMatcher(object):
|
95
|
+
__slots__ = ('lexer', 'parser', 'start', 'stop', 'escape')
|
96
|
+
|
97
|
+
# Constructs a {@link ParseTreePatternMatcher} or from a {@link Lexer} and
|
98
|
+
# {@link Parser} object. The lexer input stream is altered for tokenizing
|
99
|
+
# the tree patterns. The parser is used as a convenient mechanism to get
|
100
|
+
# the grammar name, plus token, rule names.
|
101
|
+
def __init__(self, lexer:Lexer, parser:Parser):
|
102
|
+
self.lexer = lexer
|
103
|
+
self.parser = parser
|
104
|
+
self.start = "<"
|
105
|
+
self.stop = ">"
|
106
|
+
self.escape = "\\" # e.g., \< and \> must escape BOTH!
|
107
|
+
|
108
|
+
# Set the delimiters used for marking rule and token tags within concrete
|
109
|
+
# syntax used by the tree pattern parser.
|
110
|
+
#
|
111
|
+
# @param start The start delimiter.
|
112
|
+
# @param stop The stop delimiter.
|
113
|
+
# @param escapeLeft The escape sequence to use for escaping a start or stop delimiter.
|
114
|
+
#
|
115
|
+
# @exception IllegalArgumentException if {@code start} is {@code null} or empty.
|
116
|
+
# @exception IllegalArgumentException if {@code stop} is {@code null} or empty.
|
117
|
+
#
|
118
|
+
def setDelimiters(self, start:str, stop:str, escapeLeft:str):
|
119
|
+
if start is None or len(start)==0:
|
120
|
+
raise Exception("start cannot be null or empty")
|
121
|
+
if stop is None or len(stop)==0:
|
122
|
+
raise Exception("stop cannot be null or empty")
|
123
|
+
self.start = start
|
124
|
+
self.stop = stop
|
125
|
+
self.escape = escapeLeft
|
126
|
+
|
127
|
+
# Does {@code pattern} matched as rule {@code patternRuleIndex} match {@code tree}?#
|
128
|
+
def matchesRuleIndex(self, tree:ParseTree, pattern:str, patternRuleIndex:int):
|
129
|
+
p = self.compileTreePattern(pattern, patternRuleIndex)
|
130
|
+
return self.matches(tree, p)
|
131
|
+
|
132
|
+
# Does {@code pattern} matched as rule patternRuleIndex match tree? Pass in a
|
133
|
+
# compiled pattern instead of a string representation of a tree pattern.
|
134
|
+
#
|
135
|
+
def matchesPattern(self, tree:ParseTree, pattern:ParseTreePattern):
|
136
|
+
mismatchedNode = self.matchImpl(tree, pattern.patternTree, dict())
|
137
|
+
return mismatchedNode is None
|
138
|
+
|
139
|
+
#
|
140
|
+
# Compare {@code pattern} matched as rule {@code patternRuleIndex} against
|
141
|
+
# {@code tree} and return a {@link ParseTreeMatch} object that contains the
|
142
|
+
# matched elements, or the node at which the match failed.
|
143
|
+
#
|
144
|
+
def matchRuleIndex(self, tree:ParseTree, pattern:str, patternRuleIndex:int):
|
145
|
+
p = self.compileTreePattern(pattern, patternRuleIndex)
|
146
|
+
return self.matchPattern(tree, p)
|
147
|
+
|
148
|
+
#
|
149
|
+
# Compare {@code pattern} matched against {@code tree} and return a
|
150
|
+
# {@link ParseTreeMatch} object that contains the matched elements, or the
|
151
|
+
# node at which the match failed. Pass in a compiled pattern instead of a
|
152
|
+
# string representation of a tree pattern.
|
153
|
+
#
|
154
|
+
def matchPattern(self, tree:ParseTree, pattern:ParseTreePattern):
|
155
|
+
labels = dict()
|
156
|
+
mismatchedNode = self.matchImpl(tree, pattern.patternTree, labels)
|
157
|
+
from .ParseTreeMatch import ParseTreeMatch
|
158
|
+
return ParseTreeMatch(tree, pattern, labels, mismatchedNode)
|
159
|
+
|
160
|
+
#
|
161
|
+
# For repeated use of a tree pattern, compile it to a
|
162
|
+
# {@link ParseTreePattern} using this method.
|
163
|
+
#
|
164
|
+
def compileTreePattern(self, pattern:str, patternRuleIndex:int):
|
165
|
+
tokenList = self.tokenize(pattern)
|
166
|
+
tokenSrc = ListTokenSource(tokenList)
|
167
|
+
tokens = CommonTokenStream(tokenSrc)
|
168
|
+
from ..ParserInterpreter import ParserInterpreter
|
169
|
+
parserInterp = ParserInterpreter(self.parser.grammarFileName, self.parser.tokenNames,
|
170
|
+
self.parser.ruleNames, self.parser.getATNWithBypassAlts(),tokens)
|
171
|
+
tree = None
|
172
|
+
try:
|
173
|
+
parserInterp.setErrorHandler(BailErrorStrategy())
|
174
|
+
tree = parserInterp.parse(patternRuleIndex)
|
175
|
+
except ParseCancellationException as e:
|
176
|
+
raise e.cause
|
177
|
+
except RecognitionException as e:
|
178
|
+
raise e
|
179
|
+
except Exception as e:
|
180
|
+
raise CannotInvokeStartRule(e)
|
181
|
+
|
182
|
+
# Make sure tree pattern compilation checks for a complete parse
|
183
|
+
if tokens.LA(1)!=Token.EOF:
|
184
|
+
raise StartRuleDoesNotConsumeFullPattern()
|
185
|
+
|
186
|
+
from .ParseTreePattern import ParseTreePattern
|
187
|
+
return ParseTreePattern(self, pattern, patternRuleIndex, tree)
|
188
|
+
|
189
|
+
#
|
190
|
+
# Recursively walk {@code tree} against {@code patternTree}, filling
|
191
|
+
# {@code match.}{@link ParseTreeMatch#labels labels}.
|
192
|
+
#
|
193
|
+
# @return the first node encountered in {@code tree} which does not match
|
194
|
+
# a corresponding node in {@code patternTree}, or {@code null} if the match
|
195
|
+
# was successful. The specific node returned depends on the matching
|
196
|
+
# algorithm used by the implementation, and may be overridden.
|
197
|
+
#
|
198
|
+
def matchImpl(self, tree:ParseTree, patternTree:ParseTree, labels:dict):
|
199
|
+
if tree is None:
|
200
|
+
raise Exception("tree cannot be null")
|
201
|
+
if patternTree is None:
|
202
|
+
raise Exception("patternTree cannot be null")
|
203
|
+
|
204
|
+
# x and <ID>, x and y, or x and x; or could be mismatched types
|
205
|
+
if isinstance(tree, TerminalNode) and isinstance(patternTree, TerminalNode ):
|
206
|
+
mismatchedNode = None
|
207
|
+
# both are tokens and they have same type
|
208
|
+
if tree.symbol.type == patternTree.symbol.type:
|
209
|
+
if isinstance( patternTree.symbol, TokenTagToken ): # x and <ID>
|
210
|
+
tokenTagToken = patternTree.symbol
|
211
|
+
# track label->list-of-nodes for both token name and label (if any)
|
212
|
+
self.map(labels, tokenTagToken.tokenName, tree)
|
213
|
+
if tokenTagToken.label is not None:
|
214
|
+
self.map(labels, tokenTagToken.label, tree)
|
215
|
+
elif tree.getText()==patternTree.getText():
|
216
|
+
# x and x
|
217
|
+
pass
|
218
|
+
else:
|
219
|
+
# x and y
|
220
|
+
if mismatchedNode is None:
|
221
|
+
mismatchedNode = tree
|
222
|
+
else:
|
223
|
+
if mismatchedNode is None:
|
224
|
+
mismatchedNode = tree
|
225
|
+
|
226
|
+
return mismatchedNode
|
227
|
+
|
228
|
+
if isinstance(tree, ParserRuleContext) and isinstance(patternTree, ParserRuleContext):
|
229
|
+
mismatchedNode = None
|
230
|
+
# (expr ...) and <expr>
|
231
|
+
ruleTagToken = self.getRuleTagToken(patternTree)
|
232
|
+
if ruleTagToken is not None:
|
233
|
+
m = None
|
234
|
+
if tree.ruleContext.ruleIndex == patternTree.ruleContext.ruleIndex:
|
235
|
+
# track label->list-of-nodes for both rule name and label (if any)
|
236
|
+
self.map(labels, ruleTagToken.ruleName, tree)
|
237
|
+
if ruleTagToken.label is not None:
|
238
|
+
self.map(labels, ruleTagToken.label, tree)
|
239
|
+
else:
|
240
|
+
if mismatchedNode is None:
|
241
|
+
mismatchedNode = tree
|
242
|
+
|
243
|
+
return mismatchedNode
|
244
|
+
|
245
|
+
# (expr ...) and (expr ...)
|
246
|
+
if tree.getChildCount()!=patternTree.getChildCount():
|
247
|
+
if mismatchedNode is None:
|
248
|
+
mismatchedNode = tree
|
249
|
+
return mismatchedNode
|
250
|
+
|
251
|
+
n = tree.getChildCount()
|
252
|
+
for i in range(0, n):
|
253
|
+
childMatch = self.matchImpl(tree.getChild(i), patternTree.getChild(i), labels)
|
254
|
+
if childMatch is not None:
|
255
|
+
return childMatch
|
256
|
+
|
257
|
+
return mismatchedNode
|
258
|
+
|
259
|
+
# if nodes aren't both tokens or both rule nodes, can't match
|
260
|
+
return tree
|
261
|
+
|
262
|
+
def map(self, labels, label, tree):
|
263
|
+
v = labels.get(label, None)
|
264
|
+
if v is None:
|
265
|
+
v = list()
|
266
|
+
labels[label] = v
|
267
|
+
v.append(tree)
|
268
|
+
|
269
|
+
# Is {@code t} {@code (expr <expr>)} subtree?#
|
270
|
+
def getRuleTagToken(self, tree:ParseTree):
|
271
|
+
if isinstance( tree, RuleNode ):
|
272
|
+
if tree.getChildCount()==1 and isinstance(tree.getChild(0), TerminalNode ):
|
273
|
+
c = tree.getChild(0)
|
274
|
+
if isinstance( c.symbol, RuleTagToken ):
|
275
|
+
return c.symbol
|
276
|
+
return None
|
277
|
+
|
278
|
+
def tokenize(self, pattern:str):
|
279
|
+
# split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
|
280
|
+
chunks = self.split(pattern)
|
281
|
+
|
282
|
+
# create token stream from text and tags
|
283
|
+
tokens = list()
|
284
|
+
for chunk in chunks:
|
285
|
+
if isinstance( chunk, TagChunk ):
|
286
|
+
# add special rule token or conjure up new token from name
|
287
|
+
if chunk.tag[0].isupper():
|
288
|
+
ttype = self.parser.getTokenType(chunk.tag)
|
289
|
+
if ttype==Token.INVALID_TYPE:
|
290
|
+
raise Exception("Unknown token " + str(chunk.tag) + " in pattern: " + pattern)
|
291
|
+
tokens.append(TokenTagToken(chunk.tag, ttype, chunk.label))
|
292
|
+
elif chunk.tag[0].islower():
|
293
|
+
ruleIndex = self.parser.getRuleIndex(chunk.tag)
|
294
|
+
if ruleIndex==-1:
|
295
|
+
raise Exception("Unknown rule " + str(chunk.tag) + " in pattern: " + pattern)
|
296
|
+
ruleImaginaryTokenType = self.parser.getATNWithBypassAlts().ruleToTokenType[ruleIndex]
|
297
|
+
tokens.append(RuleTagToken(chunk.tag, ruleImaginaryTokenType, chunk.label))
|
298
|
+
else:
|
299
|
+
raise Exception("invalid tag: " + str(chunk.tag) + " in pattern: " + pattern)
|
300
|
+
else:
|
301
|
+
self.lexer.setInputStream(InputStream(chunk.text))
|
302
|
+
t = self.lexer.nextToken()
|
303
|
+
while t.type!=Token.EOF:
|
304
|
+
tokens.append(t)
|
305
|
+
t = self.lexer.nextToken()
|
306
|
+
return tokens
|
307
|
+
|
308
|
+
# Split {@code <ID> = <e:expr> ;} into 4 chunks for tokenizing by {@link #tokenize}.#
|
309
|
+
def split(self, pattern:str):
|
310
|
+
p = 0
|
311
|
+
n = len(pattern)
|
312
|
+
chunks = list()
|
313
|
+
# find all start and stop indexes first, then collect
|
314
|
+
starts = list()
|
315
|
+
stops = list()
|
316
|
+
while p < n :
|
317
|
+
if p == pattern.find(self.escape + self.start, p):
|
318
|
+
p += len(self.escape) + len(self.start)
|
319
|
+
elif p == pattern.find(self.escape + self.stop, p):
|
320
|
+
p += len(self.escape) + len(self.stop)
|
321
|
+
elif p == pattern.find(self.start, p):
|
322
|
+
starts.append(p)
|
323
|
+
p += len(self.start)
|
324
|
+
elif p == pattern.find(self.stop, p):
|
325
|
+
stops.append(p)
|
326
|
+
p += len(self.stop)
|
327
|
+
else:
|
328
|
+
p += 1
|
329
|
+
|
330
|
+
nt = len(starts)
|
331
|
+
|
332
|
+
if nt > len(stops):
|
333
|
+
raise Exception("unterminated tag in pattern: " + pattern)
|
334
|
+
if nt < len(stops):
|
335
|
+
raise Exception("missing start tag in pattern: " + pattern)
|
336
|
+
|
337
|
+
for i in range(0, nt):
|
338
|
+
if starts[i] >= stops[i]:
|
339
|
+
raise Exception("tag delimiters out of order in pattern: " + pattern)
|
340
|
+
|
341
|
+
# collect into chunks now
|
342
|
+
if nt==0:
|
343
|
+
chunks.append(TextChunk(pattern))
|
344
|
+
|
345
|
+
if nt>0 and starts[0]>0: # copy text up to first tag into chunks
|
346
|
+
text = pattern[0:starts[0]]
|
347
|
+
chunks.add(TextChunk(text))
|
348
|
+
|
349
|
+
for i in range(0, nt):
|
350
|
+
# copy inside of <tag>
|
351
|
+
tag = pattern[starts[i] + len(self.start) : stops[i]]
|
352
|
+
ruleOrToken = tag
|
353
|
+
label = None
|
354
|
+
colon = tag.find(':')
|
355
|
+
if colon >= 0:
|
356
|
+
label = tag[0:colon]
|
357
|
+
ruleOrToken = tag[colon+1 : len(tag)]
|
358
|
+
chunks.append(TagChunk(label, ruleOrToken))
|
359
|
+
if i+1 < len(starts):
|
360
|
+
# copy from end of <tag> to start of next
|
361
|
+
text = pattern[stops[i] + len(self.stop) : starts[i + 1]]
|
362
|
+
chunks.append(TextChunk(text))
|
363
|
+
|
364
|
+
if nt > 0 :
|
365
|
+
afterLastTag = stops[nt - 1] + len(self.stop)
|
366
|
+
if afterLastTag < n : # copy text from end of last tag to end
|
367
|
+
text = pattern[afterLastTag : n]
|
368
|
+
chunks.append(TextChunk(text))
|
369
|
+
|
370
|
+
# strip out the escape sequences from text chunks but not tags
|
371
|
+
for i in range(0, len(chunks)):
|
372
|
+
c = chunks[i]
|
373
|
+
if isinstance( c, TextChunk ):
|
374
|
+
unescaped = c.text.replace(self.escape, "")
|
375
|
+
if len(unescaped) < len(c.text):
|
376
|
+
chunks[i] = TextChunk(unescaped)
|
377
|
+
return chunks
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
6
|
+
# Use of this file is governed by the BSD 3-clause license that
|
7
|
+
# can be found in the LICENSE.txt file in the project root.
|
8
|
+
#
|
9
|
+
|
10
|
+
#
|
11
|
+
# A {@link Token} object representing an entire subtree matched by a parser
|
12
|
+
# rule; e.g., {@code <expr>}. These tokens are created for {@link TagChunk}
|
13
|
+
# chunks where the tag corresponds to a parser rule.
|
14
|
+
#
|
15
|
+
from ..Token import Token
|
16
|
+
|
17
|
+
|
18
|
+
class RuleTagToken(Token):
|
19
|
+
__slots__ = ('label', 'ruleName')
|
20
|
+
#
|
21
|
+
# Constructs a new instance of {@link RuleTagToken} with the specified rule
|
22
|
+
# name, bypass token type, and label.
|
23
|
+
#
|
24
|
+
# @param ruleName The name of the parser rule this rule tag matches.
|
25
|
+
# @param bypassTokenType The bypass token type assigned to the parser rule.
|
26
|
+
# @param label The label associated with the rule tag, or {@code null} if
|
27
|
+
# the rule tag is unlabeled.
|
28
|
+
#
|
29
|
+
# @exception IllegalArgumentException if {@code ruleName} is {@code null}
|
30
|
+
# or empty.
|
31
|
+
|
32
|
+
def __init__(self, ruleName:str, bypassTokenType:int, label:str=None):
|
33
|
+
if ruleName is None or len(ruleName)==0:
|
34
|
+
raise Exception("ruleName cannot be null or empty.")
|
35
|
+
self.source = None
|
36
|
+
self.type = bypassTokenType # token type of the token
|
37
|
+
self.channel = Token.DEFAULT_CHANNEL # The parser ignores everything not on DEFAULT_CHANNEL
|
38
|
+
self.start = -1 # optional; return -1 if not implemented.
|
39
|
+
self.stop = -1 # optional; return -1 if not implemented.
|
40
|
+
self.tokenIndex = -1 # from 0..n-1 of the token object in the input stream
|
41
|
+
self.line = 0 # line=1..n of the 1st character
|
42
|
+
self.column = -1 # beginning of the line at which it occurs, 0..n-1
|
43
|
+
self.label = label
|
44
|
+
self._text = self.getText() # text of the token.
|
45
|
+
|
46
|
+
self.ruleName = ruleName
|
47
|
+
|
48
|
+
|
49
|
+
def getText(self):
|
50
|
+
if self.label is None:
|
51
|
+
return "<" + self.ruleName + ">"
|
52
|
+
else:
|
53
|
+
return "<" + self.label + ":" + self.ruleName + ">"
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
6
|
+
# Use of this file is governed by the BSD 3-clause license that
|
7
|
+
# can be found in the LICENSE.txt file in the project root.
|
8
|
+
#
|
9
|
+
|
10
|
+
#
|
11
|
+
# A {@link Token} object representing a token of a particular type; e.g.,
|
12
|
+
# {@code <ID>}. These tokens are created for {@link TagChunk} chunks where the
|
13
|
+
# tag corresponds to a lexer rule or token type.
|
14
|
+
#
|
15
|
+
from ..Token import CommonToken
|
16
|
+
|
17
|
+
|
18
|
+
class TokenTagToken(CommonToken):
|
19
|
+
__slots__ = ('tokenName', 'label')
|
20
|
+
# Constructs a new instance of {@link TokenTagToken} with the specified
|
21
|
+
# token name, type, and label.
|
22
|
+
#
|
23
|
+
# @param tokenName The token name.
|
24
|
+
# @param type The token type.
|
25
|
+
# @param label The label associated with the token tag, or {@code null} if
|
26
|
+
# the token tag is unlabeled.
|
27
|
+
#
|
28
|
+
def __init__(self, tokenName:str, type:int, label:str=None):
|
29
|
+
super().__init__(type=type)
|
30
|
+
self.tokenName = tokenName
|
31
|
+
self.label = label
|
32
|
+
self._text = self.getText()
|
33
|
+
|
34
|
+
#
|
35
|
+
# {@inheritDoc}
|
36
|
+
#
|
37
|
+
# <p>The implementation for {@link TokenTagToken} returns the token tag
|
38
|
+
# formatted with {@code <} and {@code >} delimiters.</p>
|
39
|
+
#
|
40
|
+
def getText(self):
|
41
|
+
if self.label is None:
|
42
|
+
return "<" + self.tokenName + ">"
|
43
|
+
else:
|
44
|
+
return "<" + self.label + ":" + self.tokenName + ">"
|
45
|
+
|
46
|
+
# <p>The implementation for {@link TokenTagToken} returns a string of the form
|
47
|
+
# {@code tokenName:type}.</p>
|
48
|
+
#
|
49
|
+
def __str__(self):
|
50
|
+
return self.tokenName + ":" + str(self.type)
|
@@ -0,0 +1,194 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
5
|
+
# Use of this file is governed by the BSD 3-clause license that
|
6
|
+
# can be found in the LICENSE.txt file in the project root.
|
7
|
+
#/
|
8
|
+
|
9
|
+
|
10
|
+
# The basic notion of a tree has a parent, a payload, and a list of children.
|
11
|
+
# It is the most abstract interface for all the trees used by ANTLR.
|
12
|
+
#/
|
13
|
+
from ..Token import Token
|
14
|
+
|
15
|
+
INVALID_INTERVAL = (-1, -2)
|
16
|
+
|
17
|
+
class Tree(object):
|
18
|
+
pass
|
19
|
+
|
20
|
+
class SyntaxTree(Tree):
|
21
|
+
pass
|
22
|
+
|
23
|
+
class ParseTree(SyntaxTree):
|
24
|
+
pass
|
25
|
+
|
26
|
+
class RuleNode(ParseTree):
|
27
|
+
pass
|
28
|
+
|
29
|
+
class TerminalNode(ParseTree):
|
30
|
+
pass
|
31
|
+
|
32
|
+
class ErrorNode(TerminalNode):
|
33
|
+
pass
|
34
|
+
|
35
|
+
class ParseTreeVisitor(object):
|
36
|
+
def visit(self, tree):
|
37
|
+
return tree.accept(self)
|
38
|
+
|
39
|
+
def visitChildren(self, node):
|
40
|
+
result = self.defaultResult()
|
41
|
+
n = node.getChildCount()
|
42
|
+
for i in range(n):
|
43
|
+
if not self.shouldVisitNextChild(node, result):
|
44
|
+
return result
|
45
|
+
|
46
|
+
c = node.getChild(i)
|
47
|
+
childResult = c.accept(self)
|
48
|
+
result = self.aggregateResult(result, childResult)
|
49
|
+
|
50
|
+
return result
|
51
|
+
|
52
|
+
def visitTerminal(self, node):
|
53
|
+
return self.defaultResult()
|
54
|
+
|
55
|
+
def visitErrorNode(self, node):
|
56
|
+
return self.defaultResult()
|
57
|
+
|
58
|
+
def defaultResult(self):
|
59
|
+
return None
|
60
|
+
|
61
|
+
def aggregateResult(self, aggregate, nextResult):
|
62
|
+
return nextResult
|
63
|
+
|
64
|
+
def shouldVisitNextChild(self, node, currentResult):
|
65
|
+
return True
|
66
|
+
|
67
|
+
ParserRuleContext = None
|
68
|
+
|
69
|
+
class ParseTreeListener(object):
|
70
|
+
|
71
|
+
def visitTerminal(self, node:TerminalNode):
|
72
|
+
pass
|
73
|
+
|
74
|
+
def visitErrorNode(self, node:ErrorNode):
|
75
|
+
pass
|
76
|
+
|
77
|
+
def enterEveryRule(self, ctx:ParserRuleContext):
|
78
|
+
pass
|
79
|
+
|
80
|
+
def exitEveryRule(self, ctx:ParserRuleContext):
|
81
|
+
pass
|
82
|
+
|
83
|
+
del ParserRuleContext
|
84
|
+
|
85
|
+
class TerminalNodeImpl(TerminalNode):
|
86
|
+
__slots__ = ('parentCtx', 'symbol')
|
87
|
+
|
88
|
+
def __init__(self, symbol:Token):
|
89
|
+
self.parentCtx = None
|
90
|
+
self.symbol = symbol
|
91
|
+
def __setattr__(self, key, value):
|
92
|
+
super().__setattr__(key, value)
|
93
|
+
|
94
|
+
def getChild(self, i:int):
|
95
|
+
return None
|
96
|
+
|
97
|
+
def getSymbol(self):
|
98
|
+
return self.symbol
|
99
|
+
|
100
|
+
def getParent(self):
|
101
|
+
return self.parentCtx
|
102
|
+
|
103
|
+
def getPayload(self):
|
104
|
+
return self.symbol
|
105
|
+
|
106
|
+
def getSourceInterval(self):
|
107
|
+
if self.symbol is None:
|
108
|
+
return INVALID_INTERVAL
|
109
|
+
tokenIndex = self.symbol.tokenIndex
|
110
|
+
return (tokenIndex, tokenIndex)
|
111
|
+
|
112
|
+
def getChildCount(self):
|
113
|
+
return 0
|
114
|
+
|
115
|
+
def accept(self, visitor:ParseTreeVisitor):
|
116
|
+
return visitor.visitTerminal(self)
|
117
|
+
|
118
|
+
def getText(self):
|
119
|
+
return self.symbol.text
|
120
|
+
|
121
|
+
def __str__(self):
|
122
|
+
if self.symbol.type == Token.EOF:
|
123
|
+
return "<EOF>"
|
124
|
+
else:
|
125
|
+
return self.symbol.text
|
126
|
+
|
127
|
+
# Represents a token that was consumed during resynchronization
|
128
|
+
# rather than during a valid match operation. For example,
|
129
|
+
# we will create this kind of a node during single token insertion
|
130
|
+
# and deletion as well as during "consume until error recovery set"
|
131
|
+
# upon no viable alternative exceptions.
|
132
|
+
|
133
|
+
class ErrorNodeImpl(TerminalNodeImpl,ErrorNode):
|
134
|
+
|
135
|
+
def __init__(self, token:Token):
|
136
|
+
super().__init__(token)
|
137
|
+
|
138
|
+
def accept(self, visitor:ParseTreeVisitor):
|
139
|
+
return visitor.visitErrorNode(self)
|
140
|
+
|
141
|
+
|
142
|
+
class ParseTreeWalker(object):
|
143
|
+
|
144
|
+
DEFAULT = None
|
145
|
+
|
146
|
+
def walk(self, listener:ParseTreeListener, t:ParseTree):
|
147
|
+
"""
|
148
|
+
Performs a walk on the given parse tree starting at the root and going down recursively
|
149
|
+
with depth-first search. On each node, {@link ParseTreeWalker#enterRule} is called before
|
150
|
+
recursively walking down into child nodes, then
|
151
|
+
{@link ParseTreeWalker#exitRule} is called after the recursive call to wind up.
|
152
|
+
@param listener The listener used by the walker to process grammar rules
|
153
|
+
@param t The parse tree to be walked on
|
154
|
+
"""
|
155
|
+
if isinstance(t, ErrorNode):
|
156
|
+
listener.visitErrorNode(t)
|
157
|
+
return
|
158
|
+
elif isinstance(t, TerminalNode):
|
159
|
+
listener.visitTerminal(t)
|
160
|
+
return
|
161
|
+
self.enterRule(listener, t)
|
162
|
+
for child in t.getChildren():
|
163
|
+
self.walk(listener, child)
|
164
|
+
self.exitRule(listener, t)
|
165
|
+
|
166
|
+
#
|
167
|
+
# The discovery of a rule node, involves sending two events: the generic
|
168
|
+
# {@link ParseTreeListener#enterEveryRule} and a
|
169
|
+
# {@link RuleContext}-specific event. First we trigger the generic and then
|
170
|
+
# the rule specific. We to them in reverse order upon finishing the node.
|
171
|
+
#
|
172
|
+
def enterRule(self, listener:ParseTreeListener, r:RuleNode):
|
173
|
+
"""
|
174
|
+
Enters a grammar rule by first triggering the generic event {@link ParseTreeListener#enterEveryRule}
|
175
|
+
then by triggering the event specific to the given parse tree node
|
176
|
+
@param listener The listener responding to the trigger events
|
177
|
+
@param r The grammar rule containing the rule context
|
178
|
+
"""
|
179
|
+
ctx = r.getRuleContext()
|
180
|
+
listener.enterEveryRule(ctx)
|
181
|
+
ctx.enterRule(listener)
|
182
|
+
|
183
|
+
def exitRule(self, listener:ParseTreeListener, r:RuleNode):
|
184
|
+
"""
|
185
|
+
Exits a grammar rule by first triggering the event specific to the given parse tree node
|
186
|
+
then by triggering the generic event {@link ParseTreeListener#exitEveryRule}
|
187
|
+
@param listener The listener responding to the trigger events
|
188
|
+
@param r The grammar rule containing the rule context
|
189
|
+
"""
|
190
|
+
ctx = r.getRuleContext()
|
191
|
+
ctx.exitRule(listener)
|
192
|
+
listener.exitEveryRule(ctx)
|
193
|
+
|
194
|
+
ParseTreeWalker.DEFAULT = ParseTreeWalker()
|