omextra 0.0.0.dev437__py3-none-any.whl → 0.0.0.dev439__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omextra/formats/json/_antlr/JsonLexer.py +1 -1
- omextra/formats/json/_antlr/JsonListener.py +1 -1
- omextra/formats/json/_antlr/JsonParser.py +1 -1
- omextra/formats/json/_antlr/JsonVisitor.py +1 -1
- omextra/formats/json5/Json5.g4 +168 -0
- omextra/formats/json5/__init__.py +0 -0
- omextra/formats/json5/_antlr/Json5Lexer.py +354 -0
- omextra/formats/json5/_antlr/Json5Listener.py +79 -0
- omextra/formats/json5/_antlr/Json5Parser.py +617 -0
- omextra/formats/json5/_antlr/Json5Visitor.py +52 -0
- omextra/formats/json5/_antlr/__init__.py +0 -0
- omextra/formats/json5/parsing.py +101 -0
- omextra/specs/proto/_antlr/Protobuf3Lexer.py +1 -1
- omextra/specs/proto/_antlr/Protobuf3Listener.py +1 -1
- omextra/specs/proto/_antlr/Protobuf3Parser.py +1 -1
- omextra/specs/proto/_antlr/Protobuf3Visitor.py +1 -1
- omextra/specs/proto/parsing.py +2 -2
- omextra/sql/parsing/_antlr/MinisqlLexer.py +1 -1
- omextra/sql/parsing/_antlr/MinisqlListener.py +1 -1
- omextra/sql/parsing/_antlr/MinisqlParser.py +1 -1
- omextra/sql/parsing/_antlr/MinisqlVisitor.py +1 -1
- omextra/sql/parsing/parsing.py +3 -3
- omextra/text/antlr/__init__.py +3 -0
- omextra/text/antlr/_runtime/BufferedTokenStream.py +305 -0
- omextra/text/antlr/_runtime/CommonTokenFactory.py +64 -0
- omextra/text/antlr/_runtime/CommonTokenStream.py +90 -0
- omextra/text/antlr/_runtime/FileStream.py +30 -0
- omextra/text/antlr/_runtime/InputStream.py +90 -0
- omextra/text/antlr/_runtime/IntervalSet.py +183 -0
- omextra/text/antlr/_runtime/LICENSE.txt +28 -0
- omextra/text/antlr/_runtime/LL1Analyzer.py +176 -0
- omextra/text/antlr/_runtime/Lexer.py +332 -0
- omextra/text/antlr/_runtime/ListTokenSource.py +147 -0
- omextra/text/antlr/_runtime/Parser.py +583 -0
- omextra/text/antlr/_runtime/ParserInterpreter.py +173 -0
- omextra/text/antlr/_runtime/ParserRuleContext.py +189 -0
- omextra/text/antlr/_runtime/PredictionContext.py +632 -0
- omextra/text/antlr/_runtime/Recognizer.py +150 -0
- omextra/text/antlr/_runtime/RuleContext.py +230 -0
- omextra/text/antlr/_runtime/StdinStream.py +14 -0
- omextra/text/antlr/_runtime/Token.py +158 -0
- omextra/text/antlr/_runtime/TokenStreamRewriter.py +258 -0
- omextra/text/antlr/_runtime/Utils.py +36 -0
- omextra/text/antlr/_runtime/__init__.py +2 -0
- omextra/text/antlr/_runtime/_all.py +24 -0
- omextra/text/antlr/_runtime/_pygrun.py +174 -0
- omextra/text/antlr/_runtime/atn/ATN.py +135 -0
- omextra/text/antlr/_runtime/atn/ATNConfig.py +162 -0
- omextra/text/antlr/_runtime/atn/ATNConfigSet.py +215 -0
- omextra/text/antlr/_runtime/atn/ATNDeserializationOptions.py +27 -0
- omextra/text/antlr/_runtime/atn/ATNDeserializer.py +449 -0
- omextra/text/antlr/_runtime/atn/ATNSimulator.py +50 -0
- omextra/text/antlr/_runtime/atn/ATNState.py +267 -0
- omextra/text/antlr/_runtime/atn/ATNType.py +20 -0
- omextra/text/antlr/_runtime/atn/LexerATNSimulator.py +573 -0
- omextra/text/antlr/_runtime/atn/LexerAction.py +301 -0
- omextra/text/antlr/_runtime/atn/LexerActionExecutor.py +146 -0
- omextra/text/antlr/_runtime/atn/ParserATNSimulator.py +1664 -0
- omextra/text/antlr/_runtime/atn/PredictionMode.py +502 -0
- omextra/text/antlr/_runtime/atn/SemanticContext.py +333 -0
- omextra/text/antlr/_runtime/atn/Transition.py +271 -0
- omextra/text/antlr/_runtime/atn/__init__.py +4 -0
- omextra/text/antlr/_runtime/dfa/DFA.py +136 -0
- omextra/text/antlr/_runtime/dfa/DFASerializer.py +76 -0
- omextra/text/antlr/_runtime/dfa/DFAState.py +129 -0
- omextra/text/antlr/_runtime/dfa/__init__.py +4 -0
- omextra/text/antlr/_runtime/error/DiagnosticErrorListener.py +111 -0
- omextra/text/antlr/_runtime/error/ErrorListener.py +75 -0
- omextra/text/antlr/_runtime/error/ErrorStrategy.py +712 -0
- omextra/text/antlr/_runtime/error/Errors.py +176 -0
- omextra/text/antlr/_runtime/error/__init__.py +4 -0
- omextra/text/antlr/_runtime/tree/Chunk.py +33 -0
- omextra/text/antlr/_runtime/tree/ParseTreeMatch.py +121 -0
- omextra/text/antlr/_runtime/tree/ParseTreePattern.py +75 -0
- omextra/text/antlr/_runtime/tree/ParseTreePatternMatcher.py +377 -0
- omextra/text/antlr/_runtime/tree/RuleTagToken.py +53 -0
- omextra/text/antlr/_runtime/tree/TokenTagToken.py +50 -0
- omextra/text/antlr/_runtime/tree/Tree.py +194 -0
- omextra/text/antlr/_runtime/tree/Trees.py +114 -0
- omextra/text/antlr/_runtime/tree/__init__.py +2 -0
- omextra/text/antlr/_runtime/xpath/XPath.py +278 -0
- omextra/text/antlr/_runtime/xpath/XPathLexer.py +98 -0
- omextra/text/antlr/_runtime/xpath/__init__.py +4 -0
- omextra/text/antlr/cli/consts.py +1 -1
- omextra/text/antlr/delimit.py +110 -0
- omextra/text/antlr/dot.py +42 -0
- omextra/text/antlr/errors.py +14 -0
- omextra/text/antlr/input.py +96 -0
- omextra/text/antlr/parsing.py +55 -0
- omextra/text/antlr/runtime.py +102 -0
- omextra/text/antlr/utils.py +38 -0
- omextra-0.0.0.dev439.dist-info/METADATA +28 -0
- omextra-0.0.0.dev439.dist-info/RECORD +144 -0
- omextra-0.0.0.dev437.dist-info/METADATA +0 -73
- omextra-0.0.0.dev437.dist-info/RECORD +0 -69
- {omextra-0.0.0.dev437.dist-info → omextra-0.0.0.dev439.dist-info}/WHEEL +0 -0
- {omextra-0.0.0.dev437.dist-info → omextra-0.0.0.dev439.dist-info}/entry_points.txt +0 -0
- {omextra-0.0.0.dev437.dist-info → omextra-0.0.0.dev439.dist-info}/licenses/LICENSE +0 -0
- {omextra-0.0.0.dev437.dist-info → omextra-0.0.0.dev439.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
# ruff: noqa: N802 N803
|
2
|
+
import typing as ta
|
3
|
+
|
4
|
+
from omlish.formats.json5.errors import Json5Error
|
5
|
+
from omlish.formats.json5.literals import LITERAL_VALUES
|
6
|
+
from omlish.formats.json5.literals import parse_number_literal
|
7
|
+
from omlish.formats.json5.literals import parse_string_literal
|
8
|
+
|
9
|
+
from ...text import antlr
|
10
|
+
from ._antlr.Json5Lexer import Json5Lexer # type: ignore
|
11
|
+
from ._antlr.Json5Parser import Json5Parser # type: ignore
|
12
|
+
from ._antlr.Json5Visitor import Json5Visitor # type: ignore
|
13
|
+
|
14
|
+
|
15
|
+
##
|
16
|
+
|
17
|
+
|
18
|
+
class Json5ParseVisitor(antlr.parsing.StandardParseTreeVisitor, Json5Visitor):
|
19
|
+
def visitArr(self, ctx: Json5Parser.ArrContext):
|
20
|
+
return [self.visit(e) for e in ctx.value()]
|
21
|
+
|
22
|
+
def visitKey(self, ctx: Json5Parser.KeyContext):
|
23
|
+
if (s := ctx.STRING()) is not None:
|
24
|
+
return parse_string_literal(s.getText())
|
25
|
+
|
26
|
+
elif (i := ctx.IDENTIFIER()) is not None:
|
27
|
+
return parse_string_literal(''.join(['"', i.getText(), '"']))
|
28
|
+
|
29
|
+
elif (l := ctx.LITERAL()) is not None:
|
30
|
+
return LITERAL_VALUES[l.getText()]
|
31
|
+
|
32
|
+
elif (n := ctx.NUMERIC_LITERAL()) is not None:
|
33
|
+
return n.getText()
|
34
|
+
|
35
|
+
else:
|
36
|
+
raise RuntimeError(ctx)
|
37
|
+
|
38
|
+
def visitNumber(self, ctx: Json5Parser.NumberContext):
|
39
|
+
return parse_number_literal(ctx.getText())
|
40
|
+
|
41
|
+
def visitObj(self, ctx: Json5Parser.ObjContext):
|
42
|
+
dct: dict[ta.Any, ta.Any] = {}
|
43
|
+
for pair in ctx.pair():
|
44
|
+
key, value = self.visit(pair)
|
45
|
+
dct[key] = value
|
46
|
+
return dct
|
47
|
+
|
48
|
+
def visitPair(self, ctx: Json5Parser.PairContext):
|
49
|
+
key = self.visit(ctx.key())
|
50
|
+
value = self.visit(ctx.value())
|
51
|
+
return (key, value)
|
52
|
+
|
53
|
+
def visitValue(self, ctx: Json5Parser.ValueContext):
|
54
|
+
if (s := ctx.STRING()) is not None:
|
55
|
+
return parse_string_literal(s.getText())
|
56
|
+
|
57
|
+
elif (n := ctx.LITERAL()) is not None:
|
58
|
+
return LITERAL_VALUES[n.getText()]
|
59
|
+
|
60
|
+
else:
|
61
|
+
return super().visitChildren(ctx)
|
62
|
+
|
63
|
+
|
64
|
+
def _make_parser(buf: str) -> Json5Parser:
|
65
|
+
return antlr.parsing.make_parser(
|
66
|
+
buf,
|
67
|
+
Json5Lexer,
|
68
|
+
Json5Parser,
|
69
|
+
silent_errors=True,
|
70
|
+
)
|
71
|
+
|
72
|
+
|
73
|
+
def parse(buf: str) -> ta.Any:
|
74
|
+
try:
|
75
|
+
root = _make_parser(buf).json5()
|
76
|
+
|
77
|
+
except antlr.errors.ParseError as e:
|
78
|
+
raise Json5Error from e
|
79
|
+
|
80
|
+
if antlr.parsing.is_eof_context(root):
|
81
|
+
raise Json5Error('Empty input')
|
82
|
+
|
83
|
+
visitor = Json5ParseVisitor()
|
84
|
+
return visitor.visit(root)
|
85
|
+
|
86
|
+
|
87
|
+
def parse_many(buf: str) -> ta.Iterator[ta.Any]:
|
88
|
+
try:
|
89
|
+
parser = _make_parser(buf)
|
90
|
+
|
91
|
+
while True:
|
92
|
+
if parser.getInputStream().LT(1).type == antlr.runtime.Token.EOF:
|
93
|
+
break
|
94
|
+
|
95
|
+
value = parser.value()
|
96
|
+
|
97
|
+
visitor = Json5ParseVisitor()
|
98
|
+
yield visitor.visit(value)
|
99
|
+
|
100
|
+
except antlr.errors.ParseError as e:
|
101
|
+
raise Json5Error from e
|
omextra/specs/proto/parsing.py
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
import typing as ta
|
3
3
|
|
4
4
|
from omlish import check
|
5
|
-
from omlish.text.antlr import runtime as antlr4
|
6
|
-
from omlish.text.antlr.errors import SilentRaisingErrorListener
|
7
5
|
|
6
|
+
from ...text.antlr import runtime as antlr4
|
7
|
+
from ...text.antlr.errors import SilentRaisingErrorListener
|
8
8
|
from . import nodes as no
|
9
9
|
from ._antlr.Protobuf3Lexer import Protobuf3Lexer # type: ignore
|
10
10
|
from ._antlr.Protobuf3Parser import Protobuf3Parser # type: ignore
|
omextra/sql/parsing/parsing.py
CHANGED
@@ -3,10 +3,10 @@ import typing as ta
|
|
3
3
|
|
4
4
|
from omlish import check
|
5
5
|
from omlish.sql import queries as no
|
6
|
-
from omlish.text.antlr import runtime as antlr4
|
7
|
-
from omlish.text.antlr.delimit import DelimitingLexer
|
8
|
-
from omlish.text.antlr.errors import SilentRaisingErrorListener
|
9
6
|
|
7
|
+
from ...text.antlr import runtime as antlr4
|
8
|
+
from ...text.antlr.delimit import DelimitingLexer
|
9
|
+
from ...text.antlr.errors import SilentRaisingErrorListener
|
10
10
|
from ._antlr.MinisqlLexer import MinisqlLexer # type: ignore
|
11
11
|
from ._antlr.MinisqlParser import MinisqlParser # type: ignore
|
12
12
|
from ._antlr.MinisqlVisitor import MinisqlVisitor # type: ignore
|
omextra/text/antlr/__init__.py
CHANGED
@@ -0,0 +1,305 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
6
|
+
# Use of this file is governed by the BSD 3-clause license that
|
7
|
+
# can be found in the LICENSE.txt file in the project root.
|
8
|
+
|
9
|
+
# This implementation of {@link TokenStream} loads tokens from a
|
10
|
+
# {@link TokenSource} on-demand, and places the tokens in a buffer to provide
|
11
|
+
# access to any previous token by index.
|
12
|
+
#
|
13
|
+
# <p>
|
14
|
+
# This token stream ignores the value of {@link Token#getChannel}. If your
|
15
|
+
# parser requires the token stream filter tokens to only those on a particular
|
16
|
+
# channel, such as {@link Token#DEFAULT_CHANNEL} or
|
17
|
+
# {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a
|
18
|
+
# {@link CommonTokenStream}.</p>
|
19
|
+
from io import StringIO
|
20
|
+
from .Token import Token
|
21
|
+
from .error.Errors import IllegalStateException
|
22
|
+
|
23
|
+
# need forward declaration
|
24
|
+
Lexer = None
|
25
|
+
|
26
|
+
# this is just to keep meaningful parameter types to Parser
|
27
|
+
class TokenStream(object):
|
28
|
+
|
29
|
+
pass
|
30
|
+
|
31
|
+
|
32
|
+
class BufferedTokenStream(TokenStream):
|
33
|
+
__slots__ = ('tokenSource', 'tokens', 'index', 'fetchedEOF')
|
34
|
+
|
35
|
+
def __init__(self, tokenSource:Lexer):
|
36
|
+
# The {@link TokenSource} from which tokens for this stream are fetched.
|
37
|
+
self.tokenSource = tokenSource
|
38
|
+
|
39
|
+
# A collection of all tokens fetched from the token source. The list is
|
40
|
+
# considered a complete view of the input once {@link #fetchedEOF} is set
|
41
|
+
# to {@code true}.
|
42
|
+
self.tokens = []
|
43
|
+
|
44
|
+
# The index into {@link #tokens} of the current token (next token to
|
45
|
+
# {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be
|
46
|
+
# {@link #LT LT(1)}.
|
47
|
+
#
|
48
|
+
# <p>This field is set to -1 when the stream is first constructed or when
|
49
|
+
# {@link #setTokenSource} is called, indicating that the first token has
|
50
|
+
# not yet been fetched from the token source. For additional information,
|
51
|
+
# see the documentation of {@link IntStream} for a description of
|
52
|
+
# Initializing Methods.</p>
|
53
|
+
self.index = -1
|
54
|
+
|
55
|
+
# Indicates whether the {@link Token#EOF} token has been fetched from
|
56
|
+
# {@link #tokenSource} and added to {@link #tokens}. This field improves
|
57
|
+
# performance for the following cases:
|
58
|
+
#
|
59
|
+
# <ul>
|
60
|
+
# <li>{@link #consume}: The lookahead check in {@link #consume} to prevent
|
61
|
+
# consuming the EOF symbol is optimized by checking the values of
|
62
|
+
# {@link #fetchedEOF} and {@link #p} instead of calling {@link #LA}.</li>
|
63
|
+
# <li>{@link #fetch}: The check to prevent adding multiple EOF symbols into
|
64
|
+
# {@link #tokens} is trivial with this field.</li>
|
65
|
+
# <ul>
|
66
|
+
self.fetchedEOF = False
|
67
|
+
|
68
|
+
def mark(self):
|
69
|
+
return 0
|
70
|
+
|
71
|
+
def release(self, marker:int):
|
72
|
+
# no resources to release
|
73
|
+
pass
|
74
|
+
|
75
|
+
def reset(self):
|
76
|
+
self.seek(0)
|
77
|
+
|
78
|
+
def seek(self, index:int):
|
79
|
+
self.lazyInit()
|
80
|
+
self.index = self.adjustSeekIndex(index)
|
81
|
+
|
82
|
+
def get(self, index:int):
|
83
|
+
self.lazyInit()
|
84
|
+
return self.tokens[index]
|
85
|
+
|
86
|
+
def consume(self):
|
87
|
+
skipEofCheck = False
|
88
|
+
if self.index >= 0:
|
89
|
+
if self.fetchedEOF:
|
90
|
+
# the last token in tokens is EOF. skip check if p indexes any
|
91
|
+
# fetched token except the last.
|
92
|
+
skipEofCheck = self.index < len(self.tokens) - 1
|
93
|
+
else:
|
94
|
+
# no EOF token in tokens. skip check if p indexes a fetched token.
|
95
|
+
skipEofCheck = self.index < len(self.tokens)
|
96
|
+
else:
|
97
|
+
# not yet initialized
|
98
|
+
skipEofCheck = False
|
99
|
+
|
100
|
+
if not skipEofCheck and self.LA(1) == Token.EOF:
|
101
|
+
raise IllegalStateException("cannot consume EOF")
|
102
|
+
|
103
|
+
if self.sync(self.index + 1):
|
104
|
+
self.index = self.adjustSeekIndex(self.index + 1)
|
105
|
+
|
106
|
+
# Make sure index {@code i} in tokens has a token.
|
107
|
+
#
|
108
|
+
# @return {@code true} if a token is located at index {@code i}, otherwise
|
109
|
+
# {@code false}.
|
110
|
+
# @see #get(int i)
|
111
|
+
#/
|
112
|
+
def sync(self, i:int):
|
113
|
+
n = i - len(self.tokens) + 1 # how many more elements we need?
|
114
|
+
if n > 0 :
|
115
|
+
fetched = self.fetch(n)
|
116
|
+
return fetched >= n
|
117
|
+
return True
|
118
|
+
|
119
|
+
# Add {@code n} elements to buffer.
|
120
|
+
#
|
121
|
+
# @return The actual number of elements added to the buffer.
|
122
|
+
#/
|
123
|
+
def fetch(self, n:int):
|
124
|
+
if self.fetchedEOF:
|
125
|
+
return 0
|
126
|
+
for i in range(0, n):
|
127
|
+
t = self.tokenSource.nextToken()
|
128
|
+
t.tokenIndex = len(self.tokens)
|
129
|
+
self.tokens.append(t)
|
130
|
+
if t.type==Token.EOF:
|
131
|
+
self.fetchedEOF = True
|
132
|
+
return i + 1
|
133
|
+
return n
|
134
|
+
|
135
|
+
|
136
|
+
# Get all tokens from start..stop inclusively#/
|
137
|
+
def getTokens(self, start:int, stop:int, types:set=None):
|
138
|
+
if start<0 or stop<0:
|
139
|
+
return None
|
140
|
+
self.lazyInit()
|
141
|
+
subset = []
|
142
|
+
if stop >= len(self.tokens):
|
143
|
+
stop = len(self.tokens)-1
|
144
|
+
for i in range(start, stop):
|
145
|
+
t = self.tokens[i]
|
146
|
+
if t.type==Token.EOF:
|
147
|
+
break
|
148
|
+
if types is None or t.type in types:
|
149
|
+
subset.append(t)
|
150
|
+
return subset
|
151
|
+
|
152
|
+
def LA(self, i:int):
|
153
|
+
return self.LT(i).type
|
154
|
+
|
155
|
+
def LB(self, k:int):
|
156
|
+
if (self.index-k) < 0:
|
157
|
+
return None
|
158
|
+
return self.tokens[self.index-k]
|
159
|
+
|
160
|
+
def LT(self, k:int):
|
161
|
+
self.lazyInit()
|
162
|
+
if k==0:
|
163
|
+
return None
|
164
|
+
if k < 0:
|
165
|
+
return self.LB(-k)
|
166
|
+
i = self.index + k - 1
|
167
|
+
self.sync(i)
|
168
|
+
if i >= len(self.tokens): # return EOF token
|
169
|
+
# EOF must be last token
|
170
|
+
return self.tokens[len(self.tokens)-1]
|
171
|
+
return self.tokens[i]
|
172
|
+
|
173
|
+
# Allowed derived classes to modify the behavior of operations which change
|
174
|
+
# the current stream position by adjusting the target token index of a seek
|
175
|
+
# operation. The default implementation simply returns {@code i}. If an
|
176
|
+
# exception is thrown in this method, the current stream index should not be
|
177
|
+
# changed.
|
178
|
+
#
|
179
|
+
# <p>For example, {@link CommonTokenStream} overrides this method to ensure that
|
180
|
+
# the seek target is always an on-channel token.</p>
|
181
|
+
#
|
182
|
+
# @param i The target token index.
|
183
|
+
# @return The adjusted target token index.
|
184
|
+
|
185
|
+
def adjustSeekIndex(self, i:int):
|
186
|
+
return i
|
187
|
+
|
188
|
+
def lazyInit(self):
|
189
|
+
if self.index == -1:
|
190
|
+
self.setup()
|
191
|
+
|
192
|
+
def setup(self):
|
193
|
+
self.sync(0)
|
194
|
+
self.index = self.adjustSeekIndex(0)
|
195
|
+
|
196
|
+
# Reset this token stream by setting its token source.#/
|
197
|
+
def setTokenSource(self, tokenSource:Lexer):
|
198
|
+
self.tokenSource = tokenSource
|
199
|
+
self.tokens = []
|
200
|
+
self.index = -1
|
201
|
+
self.fetchedEOF = False
|
202
|
+
|
203
|
+
|
204
|
+
# Given a starting index, return the index of the next token on channel.
|
205
|
+
# Return i if tokens[i] is on channel. Return the index of the EOF token
|
206
|
+
# if there are no tokens on channel between i and EOF.
|
207
|
+
#/
|
208
|
+
def nextTokenOnChannel(self, i:int, channel:int):
|
209
|
+
self.sync(i)
|
210
|
+
if i>=len(self.tokens):
|
211
|
+
return len(self.tokens) - 1
|
212
|
+
token = self.tokens[i]
|
213
|
+
while token.channel!=channel:
|
214
|
+
if token.type==Token.EOF:
|
215
|
+
return i
|
216
|
+
i += 1
|
217
|
+
self.sync(i)
|
218
|
+
token = self.tokens[i]
|
219
|
+
return i
|
220
|
+
|
221
|
+
# Given a starting index, return the index of the previous token on channel.
|
222
|
+
# Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
223
|
+
# on channel between i and 0.
|
224
|
+
def previousTokenOnChannel(self, i:int, channel:int):
|
225
|
+
while i>=0 and self.tokens[i].channel!=channel:
|
226
|
+
i -= 1
|
227
|
+
return i
|
228
|
+
|
229
|
+
# Collect all tokens on specified channel to the right of
|
230
|
+
# the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
|
231
|
+
# EOF. If channel is -1, find any non default channel token.
|
232
|
+
def getHiddenTokensToRight(self, tokenIndex:int, channel:int=-1):
|
233
|
+
self.lazyInit()
|
234
|
+
if tokenIndex<0 or tokenIndex>=len(self.tokens):
|
235
|
+
raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
|
236
|
+
from .Lexer import Lexer
|
237
|
+
nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL)
|
238
|
+
from_ = tokenIndex+1
|
239
|
+
# if none onchannel to right, nextOnChannel=-1 so set to = last token
|
240
|
+
to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel
|
241
|
+
return self.filterForChannel(from_, to, channel)
|
242
|
+
|
243
|
+
|
244
|
+
# Collect all tokens on specified channel to the left of
|
245
|
+
# the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
|
246
|
+
# If channel is -1, find any non default channel token.
|
247
|
+
def getHiddenTokensToLeft(self, tokenIndex:int, channel:int=-1):
|
248
|
+
self.lazyInit()
|
249
|
+
if tokenIndex<0 or tokenIndex>=len(self.tokens):
|
250
|
+
raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1))
|
251
|
+
from .Lexer import Lexer
|
252
|
+
prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL)
|
253
|
+
if prevOnChannel == tokenIndex - 1:
|
254
|
+
return None
|
255
|
+
# if none on channel to left, prevOnChannel=-1 then from=0
|
256
|
+
from_ = prevOnChannel+1
|
257
|
+
to = tokenIndex-1
|
258
|
+
return self.filterForChannel(from_, to, channel)
|
259
|
+
|
260
|
+
|
261
|
+
def filterForChannel(self, left:int, right:int, channel:int):
|
262
|
+
hidden = []
|
263
|
+
for i in range(left, right+1):
|
264
|
+
t = self.tokens[i]
|
265
|
+
if channel==-1:
|
266
|
+
from .Lexer import Lexer
|
267
|
+
if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL:
|
268
|
+
hidden.append(t)
|
269
|
+
elif t.channel==channel:
|
270
|
+
hidden.append(t)
|
271
|
+
if len(hidden)==0:
|
272
|
+
return None
|
273
|
+
return hidden
|
274
|
+
|
275
|
+
def getSourceName(self):
|
276
|
+
return self.tokenSource.getSourceName()
|
277
|
+
|
278
|
+
# Get the text of all tokens in this buffer.#/
|
279
|
+
def getText(self, start:int=None, stop:int=None):
|
280
|
+
self.lazyInit()
|
281
|
+
self.fill()
|
282
|
+
if isinstance(start, Token):
|
283
|
+
start = start.tokenIndex
|
284
|
+
elif start is None:
|
285
|
+
start = 0
|
286
|
+
if isinstance(stop, Token):
|
287
|
+
stop = stop.tokenIndex
|
288
|
+
elif stop is None or stop >= len(self.tokens):
|
289
|
+
stop = len(self.tokens) - 1
|
290
|
+
if start < 0 or stop < 0 or stop < start:
|
291
|
+
return ""
|
292
|
+
with StringIO() as buf:
|
293
|
+
for i in range(start, stop+1):
|
294
|
+
t = self.tokens[i]
|
295
|
+
if t.type==Token.EOF:
|
296
|
+
break
|
297
|
+
buf.write(t.text)
|
298
|
+
return buf.getvalue()
|
299
|
+
|
300
|
+
|
301
|
+
# Get all tokens from lexer until EOF#/
|
302
|
+
def fill(self):
|
303
|
+
self.lazyInit()
|
304
|
+
while self.fetch(1000)==1000:
|
305
|
+
pass
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
6
|
+
# Use of this file is governed by the BSD 3-clause license that
|
7
|
+
# can be found in the LICENSE.txt file in the project root.
|
8
|
+
#
|
9
|
+
|
10
|
+
#
|
11
|
+
# This default implementation of {@link TokenFactory} creates
|
12
|
+
# {@link CommonToken} objects.
|
13
|
+
#
|
14
|
+
from .Token import CommonToken
|
15
|
+
|
16
|
+
class TokenFactory(object):
|
17
|
+
|
18
|
+
pass
|
19
|
+
|
20
|
+
class CommonTokenFactory(TokenFactory):
|
21
|
+
__slots__ = 'copyText'
|
22
|
+
|
23
|
+
#
|
24
|
+
# The default {@link CommonTokenFactory} instance.
|
25
|
+
#
|
26
|
+
# <p>
|
27
|
+
# This token factory does not explicitly copy token text when constructing
|
28
|
+
# tokens.</p>
|
29
|
+
#
|
30
|
+
DEFAULT = None
|
31
|
+
|
32
|
+
def __init__(self, copyText:bool=False):
|
33
|
+
# Indicates whether {@link CommonToken#setText} should be called after
|
34
|
+
# constructing tokens to explicitly set the text. This is useful for cases
|
35
|
+
# where the input stream might not be able to provide arbitrary substrings
|
36
|
+
# of text from the input after the lexer creates a token (e.g. the
|
37
|
+
# implementation of {@link CharStream#getText} in
|
38
|
+
# {@link UnbufferedCharStream} throws an
|
39
|
+
# {@link UnsupportedOperationException}). Explicitly setting the token text
|
40
|
+
# allows {@link Token#getText} to be called at any time regardless of the
|
41
|
+
# input stream implementation.
|
42
|
+
#
|
43
|
+
# <p>
|
44
|
+
# The default value is {@code false} to avoid the performance and memory
|
45
|
+
# overhead of copying text for every token unless explicitly requested.</p>
|
46
|
+
#
|
47
|
+
self.copyText = copyText
|
48
|
+
|
49
|
+
def create(self, source, type:int, text:str, channel:int, start:int, stop:int, line:int, column:int):
|
50
|
+
t = CommonToken(source, type, channel, start, stop)
|
51
|
+
t.line = line
|
52
|
+
t.column = column
|
53
|
+
if text is not None:
|
54
|
+
t.text = text
|
55
|
+
elif self.copyText and source[1] is not None:
|
56
|
+
t.text = source[1].getText(start,stop)
|
57
|
+
return t
|
58
|
+
|
59
|
+
def createThin(self, type:int, text:str):
|
60
|
+
t = CommonToken(type=type)
|
61
|
+
t.text = text
|
62
|
+
return t
|
63
|
+
|
64
|
+
CommonTokenFactory.DEFAULT = CommonTokenFactory()
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# type: ignore
|
2
|
+
# ruff: noqa
|
3
|
+
# flake8: noqa
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
|
6
|
+
# Use of this file is governed by the BSD 3-clause license that
|
7
|
+
# can be found in the LICENSE.txt file in the project root.
|
8
|
+
#/
|
9
|
+
|
10
|
+
#
|
11
|
+
# This class extends {@link BufferedTokenStream} with functionality to filter
|
12
|
+
# token streams to tokens on a particular channel (tokens where
|
13
|
+
# {@link Token#getChannel} returns a particular value).
|
14
|
+
#
|
15
|
+
# <p>
|
16
|
+
# This token stream provides access to all tokens by index or when calling
|
17
|
+
# methods like {@link #getText}. The channel filtering is only used for code
|
18
|
+
# accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and
|
19
|
+
# {@link #LB}.</p>
|
20
|
+
#
|
21
|
+
# <p>
|
22
|
+
# By default, tokens are placed on the default channel
|
23
|
+
# ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the
|
24
|
+
# {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to
|
25
|
+
# call {@link Lexer#setChannel}.
|
26
|
+
# </p>
|
27
|
+
#
|
28
|
+
# <p>
|
29
|
+
# Note: lexer rules which use the {@code ->skip} lexer command or call
|
30
|
+
# {@link Lexer#skip} do not produce tokens at all, so input text matched by
|
31
|
+
# such a rule will not be available as part of the token stream, regardless of
|
32
|
+
# channel.</p>
|
33
|
+
#/
|
34
|
+
|
35
|
+
from .BufferedTokenStream import BufferedTokenStream
|
36
|
+
from .Lexer import Lexer
|
37
|
+
from .Token import Token
|
38
|
+
|
39
|
+
|
40
|
+
class CommonTokenStream(BufferedTokenStream):
|
41
|
+
__slots__ = 'channel'
|
42
|
+
|
43
|
+
def __init__(self, lexer:Lexer, channel:int=Token.DEFAULT_CHANNEL):
|
44
|
+
super().__init__(lexer)
|
45
|
+
self.channel = channel
|
46
|
+
|
47
|
+
def adjustSeekIndex(self, i:int):
|
48
|
+
return self.nextTokenOnChannel(i, self.channel)
|
49
|
+
|
50
|
+
def LB(self, k:int):
|
51
|
+
if k==0 or (self.index-k)<0:
|
52
|
+
return None
|
53
|
+
i = self.index
|
54
|
+
n = 1
|
55
|
+
# find k good tokens looking backwards
|
56
|
+
while n <= k:
|
57
|
+
# skip off-channel tokens
|
58
|
+
i = self.previousTokenOnChannel(i - 1, self.channel)
|
59
|
+
n += 1
|
60
|
+
if i < 0:
|
61
|
+
return None
|
62
|
+
return self.tokens[i]
|
63
|
+
|
64
|
+
def LT(self, k:int):
|
65
|
+
self.lazyInit()
|
66
|
+
if k == 0:
|
67
|
+
return None
|
68
|
+
if k < 0:
|
69
|
+
return self.LB(-k)
|
70
|
+
i = self.index
|
71
|
+
n = 1 # we know tokens[pos] is a good one
|
72
|
+
# find k good tokens
|
73
|
+
while n < k:
|
74
|
+
# skip off-channel tokens, but make sure to not look past EOF
|
75
|
+
if self.sync(i + 1):
|
76
|
+
i = self.nextTokenOnChannel(i + 1, self.channel)
|
77
|
+
n += 1
|
78
|
+
return self.tokens[i]
|
79
|
+
|
80
|
+
# Count EOF just once.#/
|
81
|
+
def getNumberOfOnChannelTokens(self):
|
82
|
+
n = 0
|
83
|
+
self.fill()
|
84
|
+
for i in range(0, len(self.tokens)):
|
85
|
+
t = self.tokens[i]
|
86
|
+
if t.channel==self.channel:
|
87
|
+
n += 1
|
88
|
+
if t.type==Token.EOF:
|
89
|
+
break
|
90
|
+
return n
|