omextra 0.0.0.dev497__py3-none-any.whl → 0.0.0.dev499__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omextra/text/abnf/__init__.py +45 -14
- omextra/text/abnf/_dataclasses.py +246 -0
- omextra/text/abnf/base.py +6 -279
- omextra/text/abnf/core.py +22 -10
- omextra/text/abnf/grammars.py +235 -0
- omextra/text/abnf/matches.py +145 -0
- omextra/text/abnf/meta.py +39 -17
- omextra/text/abnf/ops.py +67 -5
- omextra/text/abnf/opto.py +167 -64
- omextra/text/abnf/parsing.py +53 -5
- omextra/text/abnf/utils.py +38 -41
- omextra/text/abnf/visitors.py +1 -1
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/METADATA +2 -2
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/RECORD +18 -16
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/WHEEL +0 -0
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/entry_points.txt +0 -0
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/licenses/LICENSE +0 -0
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/top_level.txt +0 -0
omextra/text/abnf/core.py
CHANGED
|
@@ -3,13 +3,15 @@ https://datatracker.ietf.org/doc/html/rfc5234
|
|
|
3
3
|
"""
|
|
4
4
|
import typing as ta
|
|
5
5
|
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
6
|
+
from .grammars import Channel
|
|
7
|
+
from .grammars import Grammar
|
|
8
|
+
from .grammars import Rule
|
|
8
9
|
from .ops import concat
|
|
9
10
|
from .ops import either
|
|
10
11
|
from .ops import literal
|
|
11
12
|
from .ops import repeat
|
|
12
13
|
from .ops import rule
|
|
14
|
+
from .opto import optimize_grammar
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
##
|
|
@@ -23,6 +25,7 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
23
25
|
literal('\x41', '\x5a'),
|
|
24
26
|
literal('\x61', '\x7a'),
|
|
25
27
|
),
|
|
28
|
+
channel=Channel.CONTENT,
|
|
26
29
|
),
|
|
27
30
|
|
|
28
31
|
Rule(
|
|
@@ -31,11 +34,13 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
31
34
|
literal('0'),
|
|
32
35
|
literal('1'),
|
|
33
36
|
),
|
|
37
|
+
channel=Channel.CONTENT,
|
|
34
38
|
),
|
|
35
39
|
|
|
36
40
|
Rule(
|
|
37
41
|
'CHAR',
|
|
38
42
|
literal('\x01', '\x7f'),
|
|
43
|
+
channel=Channel.CONTENT,
|
|
39
44
|
),
|
|
40
45
|
|
|
41
46
|
Rule(
|
|
@@ -44,12 +49,13 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
44
49
|
literal('\x00', '\x1f'),
|
|
45
50
|
literal('\x7f', case_sensitive=True),
|
|
46
51
|
),
|
|
52
|
+
channel=Channel.CONTENT,
|
|
47
53
|
),
|
|
48
54
|
|
|
49
55
|
Rule(
|
|
50
56
|
'CR',
|
|
51
57
|
literal('\x0d', case_sensitive=True),
|
|
52
|
-
|
|
58
|
+
channel=Channel.SPACE,
|
|
53
59
|
),
|
|
54
60
|
|
|
55
61
|
Rule(
|
|
@@ -58,17 +64,19 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
58
64
|
rule('CR'),
|
|
59
65
|
rule('LF'),
|
|
60
66
|
),
|
|
61
|
-
|
|
67
|
+
channel=Channel.SPACE,
|
|
62
68
|
),
|
|
63
69
|
|
|
64
70
|
Rule(
|
|
65
71
|
'DIGIT',
|
|
66
72
|
literal('\x30', '\x39'),
|
|
73
|
+
channel=Channel.CONTENT,
|
|
67
74
|
),
|
|
68
75
|
|
|
69
76
|
Rule(
|
|
70
77
|
'DQUOTE',
|
|
71
78
|
literal('\x22', case_sensitive=True),
|
|
79
|
+
channel=Channel.CONTENT,
|
|
72
80
|
),
|
|
73
81
|
|
|
74
82
|
Rule(
|
|
@@ -82,18 +90,19 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
82
90
|
literal('E'),
|
|
83
91
|
literal('F'),
|
|
84
92
|
),
|
|
93
|
+
channel=Channel.CONTENT,
|
|
85
94
|
),
|
|
86
95
|
|
|
87
96
|
Rule(
|
|
88
97
|
'HTAB',
|
|
89
98
|
literal('\x09', case_sensitive=True),
|
|
90
|
-
|
|
99
|
+
channel=Channel.SPACE,
|
|
91
100
|
),
|
|
92
101
|
|
|
93
102
|
Rule(
|
|
94
103
|
'LF',
|
|
95
104
|
literal('\x0a', case_sensitive=True),
|
|
96
|
-
|
|
105
|
+
channel=Channel.SPACE,
|
|
97
106
|
),
|
|
98
107
|
|
|
99
108
|
Rule(
|
|
@@ -107,23 +116,25 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
107
116
|
),
|
|
108
117
|
),
|
|
109
118
|
),
|
|
110
|
-
|
|
119
|
+
channel=Channel.SPACE,
|
|
111
120
|
),
|
|
112
121
|
|
|
113
122
|
Rule(
|
|
114
123
|
'OCTET',
|
|
115
124
|
literal('\x00', '\xff'),
|
|
125
|
+
channel=Channel.CONTENT,
|
|
116
126
|
),
|
|
117
127
|
|
|
118
128
|
Rule(
|
|
119
129
|
'SP',
|
|
120
130
|
literal('\x20', case_sensitive=True),
|
|
121
|
-
|
|
131
|
+
channel=Channel.SPACE,
|
|
122
132
|
),
|
|
123
133
|
|
|
124
134
|
Rule(
|
|
125
135
|
'VCHAR',
|
|
126
136
|
literal('\x21', '\x7e'),
|
|
137
|
+
channel=Channel.CONTENT,
|
|
127
138
|
),
|
|
128
139
|
|
|
129
140
|
Rule(
|
|
@@ -132,10 +143,11 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
132
143
|
rule('SP'),
|
|
133
144
|
rule('HTAB'),
|
|
134
145
|
),
|
|
135
|
-
|
|
146
|
+
channel=Channel.SPACE,
|
|
136
147
|
),
|
|
137
148
|
|
|
138
149
|
]
|
|
139
150
|
|
|
140
151
|
|
|
141
|
-
|
|
152
|
+
RAW_CORE_GRAMMAR = Grammar(*CORE_RULES)
|
|
153
|
+
CORE_GRAMMAR = optimize_grammar(RAW_CORE_GRAMMAR)
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import typing as ta
|
|
3
|
+
|
|
4
|
+
from omlish import check
|
|
5
|
+
from omlish import lang
|
|
6
|
+
|
|
7
|
+
from .errors import AbnfError
|
|
8
|
+
from .errors import AbnfIncompleteParseError
|
|
9
|
+
from .matches import Match
|
|
10
|
+
from .matches import longest_match
|
|
11
|
+
from .ops import Op
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
with lang.auto_proxy_import(globals()):
|
|
15
|
+
from . import parsing
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
##
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Channel(enum.Enum):
|
|
22
|
+
STRUCTURE = enum.auto()
|
|
23
|
+
CONTENT = enum.auto()
|
|
24
|
+
COMMENT = enum.auto()
|
|
25
|
+
SPACE = enum.auto()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Rule(lang.Final):
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
name: str,
|
|
32
|
+
op: Op,
|
|
33
|
+
*,
|
|
34
|
+
channel: Channel = Channel.STRUCTURE,
|
|
35
|
+
) -> None:
|
|
36
|
+
super().__init__()
|
|
37
|
+
|
|
38
|
+
self._name = check.non_empty_str(name)
|
|
39
|
+
self._op = check.isinstance(op, Op)
|
|
40
|
+
self._channel = channel
|
|
41
|
+
|
|
42
|
+
self._name_f = name.casefold()
|
|
43
|
+
|
|
44
|
+
def __repr__(self) -> str:
|
|
45
|
+
return f'{self.__class__.__name__}({self._name!r}, channel={self._channel.name})'
|
|
46
|
+
|
|
47
|
+
def replace_op(self, op: Op) -> 'Rule':
|
|
48
|
+
return Rule(
|
|
49
|
+
self._name,
|
|
50
|
+
op,
|
|
51
|
+
channel=self._channel,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def name(self) -> str:
|
|
56
|
+
return self._name
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def name_f(self) -> str:
|
|
60
|
+
return self._name_f
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def op(self) -> Op:
|
|
64
|
+
return self._op
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def channel(self) -> Channel:
|
|
68
|
+
return self._channel
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
#
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class RulesCollection(lang.Final, ta.Collection[Rule]):
|
|
75
|
+
def __init__(self, *rules: ta.Union[Rule, 'RulesCollection']) -> None:
|
|
76
|
+
super().__init__()
|
|
77
|
+
|
|
78
|
+
rules_set: set[Rule] = set()
|
|
79
|
+
rules_by_name: dict[str, Rule] = {}
|
|
80
|
+
rules_by_name_f: dict[str, Rule] = {}
|
|
81
|
+
rules_by_op: dict[Op, Rule] = {}
|
|
82
|
+
|
|
83
|
+
def add(gr: Rule) -> None:
|
|
84
|
+
check.isinstance(gr, Rule)
|
|
85
|
+
|
|
86
|
+
check.not_in(gr, rules_set)
|
|
87
|
+
check.not_in(gr._name, rules_by_name) # noqa
|
|
88
|
+
check.not_in(gr._name_f, rules_by_name_f) # noqa
|
|
89
|
+
check.not_in(gr._op, rules_by_op) # noqa
|
|
90
|
+
|
|
91
|
+
rules_set.add(gr)
|
|
92
|
+
rules_by_name[gr._name] = gr # noqa
|
|
93
|
+
rules_by_name_f[gr._name_f] = gr # noqa
|
|
94
|
+
rules_by_op[gr._op] = gr # noqa
|
|
95
|
+
|
|
96
|
+
for e in rules:
|
|
97
|
+
if isinstance(e, RulesCollection):
|
|
98
|
+
for c in e:
|
|
99
|
+
add(c)
|
|
100
|
+
else:
|
|
101
|
+
add(e)
|
|
102
|
+
|
|
103
|
+
self._rules_set = rules_set
|
|
104
|
+
self._rules_by_name: ta.Mapping[str, Rule] = rules_by_name
|
|
105
|
+
self._rules_by_name_f: ta.Mapping[str, Rule] = rules_by_name_f
|
|
106
|
+
self._rules_by_op: ta.Mapping[Op, Rule] = rules_by_op
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def rules_set(self) -> ta.AbstractSet[Rule]:
|
|
110
|
+
return self._rules_set
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def rules_by_name(self) -> ta.Mapping[str, Rule]:
|
|
114
|
+
return self._rules_by_name
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def rules_by_name_f(self) -> ta.Mapping[str, Rule]:
|
|
118
|
+
return self._rules_by_name_f
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def rules_by_op(self) -> ta.Mapping[Op, Rule]:
|
|
122
|
+
return self._rules_by_op
|
|
123
|
+
|
|
124
|
+
#
|
|
125
|
+
|
|
126
|
+
def __len__(self) -> int:
|
|
127
|
+
return len(self._rules_set)
|
|
128
|
+
|
|
129
|
+
def __iter__(self) -> ta.Iterator[Rule]:
|
|
130
|
+
return iter(self._rules_set)
|
|
131
|
+
|
|
132
|
+
def __contains__(self, item: Rule) -> bool: # type: ignore[override]
|
|
133
|
+
return item in self._rules_set
|
|
134
|
+
|
|
135
|
+
#
|
|
136
|
+
|
|
137
|
+
def rule(self, name: str) -> Rule | None:
|
|
138
|
+
return self._rules_by_name_f.get(name.casefold())
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
##
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Grammar(lang.Final):
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
*rules: Rule | RulesCollection,
|
|
148
|
+
root: Rule | str | None = None,
|
|
149
|
+
) -> None:
|
|
150
|
+
super().__init__()
|
|
151
|
+
|
|
152
|
+
if len(rules) == 1 and isinstance(r0 := rules[0], RulesCollection):
|
|
153
|
+
self._rules = r0
|
|
154
|
+
else:
|
|
155
|
+
self._rules = RulesCollection(*rules)
|
|
156
|
+
|
|
157
|
+
if isinstance(root, str):
|
|
158
|
+
root = self._rules.rules_by_name_f[root.casefold()]
|
|
159
|
+
self._root = root
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def rules(self) -> RulesCollection:
|
|
163
|
+
return self._rules
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def root(self) -> Rule | None:
|
|
167
|
+
return self._root
|
|
168
|
+
|
|
169
|
+
#
|
|
170
|
+
|
|
171
|
+
def rule(self, name: str) -> Rule | None:
|
|
172
|
+
return self._rules.rule(name)
|
|
173
|
+
|
|
174
|
+
def replace_rules(self, *rules: Rule) -> 'Grammar':
|
|
175
|
+
rc = RulesCollection(*rules)
|
|
176
|
+
if rc.rules_set == self._rules.rules_set:
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
return Grammar(
|
|
180
|
+
rc,
|
|
181
|
+
root=self._root.name if self._root is not None else None,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
#
|
|
185
|
+
|
|
186
|
+
def iter_parse(
|
|
187
|
+
self,
|
|
188
|
+
source: str,
|
|
189
|
+
root: Rule | str | None = None,
|
|
190
|
+
*,
|
|
191
|
+
start: int = 0,
|
|
192
|
+
debug: int = 0,
|
|
193
|
+
**kwargs: ta.Any,
|
|
194
|
+
) -> ta.Iterator[Match]:
|
|
195
|
+
if root is None:
|
|
196
|
+
if (root := self._root) is None:
|
|
197
|
+
raise AbnfError('No root or default root specified')
|
|
198
|
+
else:
|
|
199
|
+
if isinstance(root, str):
|
|
200
|
+
root = self._rules.rules_by_name_f[root.casefold()]
|
|
201
|
+
else:
|
|
202
|
+
root = check.in_(check.isinstance(root, Rule), self._rules)
|
|
203
|
+
|
|
204
|
+
return parsing._iter_parse( # noqa
|
|
205
|
+
self,
|
|
206
|
+
source,
|
|
207
|
+
root._op, # noqa
|
|
208
|
+
start,
|
|
209
|
+
debug=debug,
|
|
210
|
+
**kwargs,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def parse(
|
|
214
|
+
self,
|
|
215
|
+
source: str,
|
|
216
|
+
root: str | None = None,
|
|
217
|
+
*,
|
|
218
|
+
start: int = 0,
|
|
219
|
+
complete: bool = False,
|
|
220
|
+
debug: int = 0,
|
|
221
|
+
**kwargs: ta.Any,
|
|
222
|
+
) -> Match | None:
|
|
223
|
+
if (match := longest_match(self.iter_parse(
|
|
224
|
+
source,
|
|
225
|
+
root,
|
|
226
|
+
start=start,
|
|
227
|
+
debug=debug,
|
|
228
|
+
**kwargs,
|
|
229
|
+
))) is None:
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
if complete and (match.start, match.end) != (start, len(source)):
|
|
233
|
+
raise AbnfIncompleteParseError
|
|
234
|
+
|
|
235
|
+
return match
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import itertools
|
|
3
|
+
import typing as ta
|
|
4
|
+
|
|
5
|
+
from omlish import lang
|
|
6
|
+
|
|
7
|
+
from .internal import Regex
|
|
8
|
+
from .ops import CaseInsensitiveStringLiteral
|
|
9
|
+
from .ops import Op
|
|
10
|
+
from .ops import RangeLiteral
|
|
11
|
+
from .ops import RuleRef
|
|
12
|
+
from .ops import StringLiteral
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
##
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@ta.final
|
|
19
|
+
class Match(ta.NamedTuple):
|
|
20
|
+
op: 'Op'
|
|
21
|
+
start: int
|
|
22
|
+
end: int
|
|
23
|
+
children: tuple['Match', ...]
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def length(self) -> int:
|
|
27
|
+
return self.end - self.start
|
|
28
|
+
|
|
29
|
+
#
|
|
30
|
+
|
|
31
|
+
def __repr__(self) -> str:
|
|
32
|
+
return (
|
|
33
|
+
f'{self.__class__.__name__}('
|
|
34
|
+
f'{self.op._match_repr()}, ' # noqa
|
|
35
|
+
f'{self.start}, {self.end}'
|
|
36
|
+
f'{f", {self.children!r}" if self.children else ""})'
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def render_to(
|
|
40
|
+
self,
|
|
41
|
+
write: ta.Callable[[str], ta.Any],
|
|
42
|
+
*,
|
|
43
|
+
indent: int | None = None,
|
|
44
|
+
_depth: int = 0,
|
|
45
|
+
) -> None:
|
|
46
|
+
ix: str | None = (' ' * (indent * _depth)) if indent is not None else None
|
|
47
|
+
if ix:
|
|
48
|
+
write(ix)
|
|
49
|
+
|
|
50
|
+
o = self.op
|
|
51
|
+
|
|
52
|
+
if isinstance(o, (StringLiteral, CaseInsensitiveStringLiteral)):
|
|
53
|
+
write(f'literal<{self.start}-{self.end}>({o.value!r})')
|
|
54
|
+
|
|
55
|
+
elif isinstance(o, RangeLiteral):
|
|
56
|
+
write(f'literal<{self.start}-{self.end}>({o.value.lo!r}-{o.value.hi!r})')
|
|
57
|
+
|
|
58
|
+
elif isinstance(o, Regex):
|
|
59
|
+
write(f'regex<{self.start}-{self.end}>({o.pat.pattern!r})')
|
|
60
|
+
|
|
61
|
+
else:
|
|
62
|
+
write(f'{o.__class__.__name__.lower()}<{self.start}-{self.end}>')
|
|
63
|
+
|
|
64
|
+
if isinstance(o, RuleRef):
|
|
65
|
+
write(f':{o.name}')
|
|
66
|
+
|
|
67
|
+
if self.children:
|
|
68
|
+
write('(')
|
|
69
|
+
if ix is not None:
|
|
70
|
+
write('\n')
|
|
71
|
+
|
|
72
|
+
for i, c in enumerate(self.children):
|
|
73
|
+
if i and ix is None:
|
|
74
|
+
write(', ')
|
|
75
|
+
|
|
76
|
+
c.render_to(write, indent=indent, _depth=_depth + 1)
|
|
77
|
+
|
|
78
|
+
if ix is not None:
|
|
79
|
+
write(',\n')
|
|
80
|
+
|
|
81
|
+
if ix:
|
|
82
|
+
write(ix)
|
|
83
|
+
|
|
84
|
+
write(')')
|
|
85
|
+
|
|
86
|
+
def render(
|
|
87
|
+
self,
|
|
88
|
+
*,
|
|
89
|
+
indent: int | None = None,
|
|
90
|
+
) -> str:
|
|
91
|
+
sb = io.StringIO()
|
|
92
|
+
self.render_to(sb.write, indent=indent)
|
|
93
|
+
return sb.getvalue()
|
|
94
|
+
|
|
95
|
+
def __str__(self) -> str:
|
|
96
|
+
return self.render()
|
|
97
|
+
|
|
98
|
+
#
|
|
99
|
+
|
|
100
|
+
def replace_children(self, *children: 'Match') -> 'Match':
|
|
101
|
+
if lang.seqs_identical(children, self.children):
|
|
102
|
+
return self
|
|
103
|
+
|
|
104
|
+
return self._replace(children=children)
|
|
105
|
+
|
|
106
|
+
def map_children(self, fn: ta.Callable[['Match'], 'Match']) -> 'Match':
|
|
107
|
+
return self.replace_children(*map(fn, self.children))
|
|
108
|
+
|
|
109
|
+
def flat_map_children(self, fn: ta.Callable[['Match'], ta.Iterable['Match']]) -> 'Match':
|
|
110
|
+
return self.replace_children(*itertools.chain.from_iterable(map(fn, self.children)))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
##
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def longest_match(ms: ta.Iterable[Match]) -> Match | None:
|
|
117
|
+
bm: Match | None = None
|
|
118
|
+
bl = 0
|
|
119
|
+
for m in ms:
|
|
120
|
+
l = m.length
|
|
121
|
+
if bm is None or l > bl:
|
|
122
|
+
bm, bl = m, l
|
|
123
|
+
return bm
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def filter_matches(
|
|
127
|
+
fn: ta.Callable[[Match], bool],
|
|
128
|
+
m: Match,
|
|
129
|
+
*,
|
|
130
|
+
keep_children: bool = False,
|
|
131
|
+
) -> Match:
|
|
132
|
+
def inner(x: Match) -> ta.Iterable[Match]:
|
|
133
|
+
if fn(x):
|
|
134
|
+
return (rec(x),)
|
|
135
|
+
|
|
136
|
+
elif keep_children:
|
|
137
|
+
return lang.flatten(inner(c) for c in x.children)
|
|
138
|
+
|
|
139
|
+
else:
|
|
140
|
+
return ()
|
|
141
|
+
|
|
142
|
+
def rec(c: Match) -> Match:
|
|
143
|
+
return c.flat_map_children(inner)
|
|
144
|
+
|
|
145
|
+
return rec(m)
|
omextra/text/abnf/meta.py
CHANGED
|
@@ -7,12 +7,13 @@ from omlish import check
|
|
|
7
7
|
from omlish import dataclasses as dc
|
|
8
8
|
from omlish import lang
|
|
9
9
|
|
|
10
|
-
from .base import Grammar
|
|
11
|
-
from .base import Match
|
|
12
10
|
from .base import Op
|
|
13
|
-
from .base import Rule
|
|
14
11
|
from .core import CORE_RULES
|
|
15
12
|
from .errors import AbnfGrammarParseError
|
|
13
|
+
from .grammars import Channel
|
|
14
|
+
from .grammars import Grammar
|
|
15
|
+
from .grammars import Rule
|
|
16
|
+
from .matches import Match
|
|
16
17
|
from .ops import Repeat
|
|
17
18
|
from .ops import concat
|
|
18
19
|
from .ops import either
|
|
@@ -20,9 +21,10 @@ from .ops import literal
|
|
|
20
21
|
from .ops import option
|
|
21
22
|
from .ops import repeat
|
|
22
23
|
from .ops import rule
|
|
23
|
-
from .opto import
|
|
24
|
+
from .opto import optimize_grammar
|
|
25
|
+
from .utils import filter_match_channels
|
|
24
26
|
from .utils import fix_ws
|
|
25
|
-
from .utils import
|
|
27
|
+
from .utils import only_match_rules
|
|
26
28
|
from .visitors import RuleMatchVisitor
|
|
27
29
|
|
|
28
30
|
|
|
@@ -106,7 +108,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
106
108
|
rule('WSP'),
|
|
107
109
|
),
|
|
108
110
|
),
|
|
109
|
-
|
|
111
|
+
channel=Channel.SPACE,
|
|
110
112
|
),
|
|
111
113
|
|
|
112
114
|
Rule(
|
|
@@ -115,7 +117,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
115
117
|
rule('comment'),
|
|
116
118
|
rule('CRLF'),
|
|
117
119
|
),
|
|
118
|
-
|
|
120
|
+
channel=Channel.SPACE,
|
|
119
121
|
),
|
|
120
122
|
|
|
121
123
|
Rule(
|
|
@@ -130,6 +132,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
130
132
|
),
|
|
131
133
|
rule('CRLF'),
|
|
132
134
|
),
|
|
135
|
+
channel=Channel.COMMENT,
|
|
133
136
|
),
|
|
134
137
|
|
|
135
138
|
Rule(
|
|
@@ -410,12 +413,21 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
410
413
|
]
|
|
411
414
|
|
|
412
415
|
|
|
413
|
-
|
|
416
|
+
RAW_META_GRAMMAR = Grammar(
|
|
414
417
|
*CORE_RULES,
|
|
415
418
|
*META_GRAMMAR_RULES,
|
|
416
419
|
root='rulelist',
|
|
417
420
|
)
|
|
418
421
|
|
|
422
|
+
META_GRAMMAR = optimize_grammar(
|
|
423
|
+
RAW_META_GRAMMAR,
|
|
424
|
+
inline_channels=(
|
|
425
|
+
Channel.CONTENT,
|
|
426
|
+
Channel.COMMENT,
|
|
427
|
+
Channel.SPACE,
|
|
428
|
+
),
|
|
429
|
+
)
|
|
430
|
+
|
|
419
431
|
|
|
420
432
|
##
|
|
421
433
|
|
|
@@ -556,6 +568,9 @@ class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
|
|
|
556
568
|
return self.QuotedString(self._source[m.start + 1:m.end - 1])
|
|
557
569
|
|
|
558
570
|
|
|
571
|
+
##
|
|
572
|
+
|
|
573
|
+
|
|
559
574
|
def parse_grammar(
|
|
560
575
|
source: str,
|
|
561
576
|
*,
|
|
@@ -566,14 +581,22 @@ def parse_grammar(
|
|
|
566
581
|
) -> Grammar:
|
|
567
582
|
source = fix_ws(source)
|
|
568
583
|
|
|
569
|
-
if (mg_m :=
|
|
570
|
-
META_GRAMMAR,
|
|
584
|
+
if (mg_m := META_GRAMMAR.parse(
|
|
571
585
|
source,
|
|
572
586
|
complete=True,
|
|
573
587
|
**kwargs,
|
|
574
588
|
)) is None:
|
|
575
589
|
raise AbnfGrammarParseError(source)
|
|
576
590
|
|
|
591
|
+
mg_m = only_match_rules(mg_m)
|
|
592
|
+
|
|
593
|
+
mg_m = filter_match_channels(
|
|
594
|
+
mg_m,
|
|
595
|
+
META_GRAMMAR,
|
|
596
|
+
keep=(Channel.STRUCTURE,),
|
|
597
|
+
keep_children=True,
|
|
598
|
+
)
|
|
599
|
+
|
|
577
600
|
check.isinstance(mg_m.op, Repeat)
|
|
578
601
|
|
|
579
602
|
mg_rmv = MetaGrammarRuleMatchVisitor(source)
|
|
@@ -582,14 +605,13 @@ def parse_grammar(
|
|
|
582
605
|
for gg_cm in mg_m.children
|
|
583
606
|
]
|
|
584
607
|
|
|
585
|
-
|
|
586
|
-
rules = [
|
|
587
|
-
r.replace_op(optimize_op(r.op))
|
|
588
|
-
for r in rules
|
|
589
|
-
]
|
|
590
|
-
|
|
591
|
-
return Grammar(
|
|
608
|
+
gram = Grammar(
|
|
592
609
|
*rules,
|
|
593
610
|
*(CORE_RULES if not no_core_rules else []),
|
|
594
611
|
root=root,
|
|
595
612
|
)
|
|
613
|
+
|
|
614
|
+
if not no_optimize:
|
|
615
|
+
gram = optimize_grammar(gram)
|
|
616
|
+
|
|
617
|
+
return gram
|