omextra 0.0.0.dev496__py3-none-any.whl → 0.0.0.dev498__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omextra/text/abnf/__init__.py +51 -18
- omextra/text/abnf/_dataclasses.py +246 -0
- omextra/text/abnf/base.py +21 -257
- omextra/text/abnf/core.py +22 -10
- omextra/text/abnf/grammars.py +235 -0
- omextra/text/abnf/internal.py +1 -1
- omextra/text/abnf/matches.py +145 -0
- omextra/text/abnf/meta.py +45 -12
- omextra/text/abnf/ops.py +76 -9
- omextra/text/abnf/opto.py +257 -0
- omextra/text/abnf/parsing.py +134 -20
- omextra/text/abnf/utils.py +38 -41
- omextra/text/abnf/visitors.py +1 -1
- {omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/METADATA +2 -2
- {omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/RECORD +19 -16
- {omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/WHEEL +0 -0
- {omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/entry_points.txt +0 -0
- {omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/licenses/LICENSE +0 -0
- {omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/top_level.txt +0 -0
omextra/text/abnf/core.py
CHANGED
|
@@ -3,13 +3,15 @@ https://datatracker.ietf.org/doc/html/rfc5234
|
|
|
3
3
|
"""
|
|
4
4
|
import typing as ta
|
|
5
5
|
|
|
6
|
-
from .
|
|
7
|
-
from .
|
|
6
|
+
from .grammars import Channel
|
|
7
|
+
from .grammars import Grammar
|
|
8
|
+
from .grammars import Rule
|
|
8
9
|
from .ops import concat
|
|
9
10
|
from .ops import either
|
|
10
11
|
from .ops import literal
|
|
11
12
|
from .ops import repeat
|
|
12
13
|
from .ops import rule
|
|
14
|
+
from .opto import optimize_grammar
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
##
|
|
@@ -23,6 +25,7 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
23
25
|
literal('\x41', '\x5a'),
|
|
24
26
|
literal('\x61', '\x7a'),
|
|
25
27
|
),
|
|
28
|
+
channel=Channel.CONTENT,
|
|
26
29
|
),
|
|
27
30
|
|
|
28
31
|
Rule(
|
|
@@ -31,11 +34,13 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
31
34
|
literal('0'),
|
|
32
35
|
literal('1'),
|
|
33
36
|
),
|
|
37
|
+
channel=Channel.CONTENT,
|
|
34
38
|
),
|
|
35
39
|
|
|
36
40
|
Rule(
|
|
37
41
|
'CHAR',
|
|
38
42
|
literal('\x01', '\x7f'),
|
|
43
|
+
channel=Channel.CONTENT,
|
|
39
44
|
),
|
|
40
45
|
|
|
41
46
|
Rule(
|
|
@@ -44,12 +49,13 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
44
49
|
literal('\x00', '\x1f'),
|
|
45
50
|
literal('\x7f', case_sensitive=True),
|
|
46
51
|
),
|
|
52
|
+
channel=Channel.CONTENT,
|
|
47
53
|
),
|
|
48
54
|
|
|
49
55
|
Rule(
|
|
50
56
|
'CR',
|
|
51
57
|
literal('\x0d', case_sensitive=True),
|
|
52
|
-
|
|
58
|
+
channel=Channel.SPACE,
|
|
53
59
|
),
|
|
54
60
|
|
|
55
61
|
Rule(
|
|
@@ -58,17 +64,19 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
58
64
|
rule('CR'),
|
|
59
65
|
rule('LF'),
|
|
60
66
|
),
|
|
61
|
-
|
|
67
|
+
channel=Channel.SPACE,
|
|
62
68
|
),
|
|
63
69
|
|
|
64
70
|
Rule(
|
|
65
71
|
'DIGIT',
|
|
66
72
|
literal('\x30', '\x39'),
|
|
73
|
+
channel=Channel.CONTENT,
|
|
67
74
|
),
|
|
68
75
|
|
|
69
76
|
Rule(
|
|
70
77
|
'DQUOTE',
|
|
71
78
|
literal('\x22', case_sensitive=True),
|
|
79
|
+
channel=Channel.CONTENT,
|
|
72
80
|
),
|
|
73
81
|
|
|
74
82
|
Rule(
|
|
@@ -82,18 +90,19 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
82
90
|
literal('E'),
|
|
83
91
|
literal('F'),
|
|
84
92
|
),
|
|
93
|
+
channel=Channel.CONTENT,
|
|
85
94
|
),
|
|
86
95
|
|
|
87
96
|
Rule(
|
|
88
97
|
'HTAB',
|
|
89
98
|
literal('\x09', case_sensitive=True),
|
|
90
|
-
|
|
99
|
+
channel=Channel.SPACE,
|
|
91
100
|
),
|
|
92
101
|
|
|
93
102
|
Rule(
|
|
94
103
|
'LF',
|
|
95
104
|
literal('\x0a', case_sensitive=True),
|
|
96
|
-
|
|
105
|
+
channel=Channel.SPACE,
|
|
97
106
|
),
|
|
98
107
|
|
|
99
108
|
Rule(
|
|
@@ -107,23 +116,25 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
107
116
|
),
|
|
108
117
|
),
|
|
109
118
|
),
|
|
110
|
-
|
|
119
|
+
channel=Channel.SPACE,
|
|
111
120
|
),
|
|
112
121
|
|
|
113
122
|
Rule(
|
|
114
123
|
'OCTET',
|
|
115
124
|
literal('\x00', '\xff'),
|
|
125
|
+
channel=Channel.CONTENT,
|
|
116
126
|
),
|
|
117
127
|
|
|
118
128
|
Rule(
|
|
119
129
|
'SP',
|
|
120
130
|
literal('\x20', case_sensitive=True),
|
|
121
|
-
|
|
131
|
+
channel=Channel.SPACE,
|
|
122
132
|
),
|
|
123
133
|
|
|
124
134
|
Rule(
|
|
125
135
|
'VCHAR',
|
|
126
136
|
literal('\x21', '\x7e'),
|
|
137
|
+
channel=Channel.CONTENT,
|
|
127
138
|
),
|
|
128
139
|
|
|
129
140
|
Rule(
|
|
@@ -132,10 +143,11 @@ CORE_RULES: ta.Sequence[Rule] = [
|
|
|
132
143
|
rule('SP'),
|
|
133
144
|
rule('HTAB'),
|
|
134
145
|
),
|
|
135
|
-
|
|
146
|
+
channel=Channel.SPACE,
|
|
136
147
|
),
|
|
137
148
|
|
|
138
149
|
]
|
|
139
150
|
|
|
140
151
|
|
|
141
|
-
|
|
152
|
+
RAW_CORE_GRAMMAR = Grammar(*CORE_RULES)
|
|
153
|
+
CORE_GRAMMAR = optimize_grammar(RAW_CORE_GRAMMAR)
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import typing as ta
|
|
3
|
+
|
|
4
|
+
from omlish import check
|
|
5
|
+
from omlish import lang
|
|
6
|
+
|
|
7
|
+
from .errors import AbnfError
|
|
8
|
+
from .errors import AbnfIncompleteParseError
|
|
9
|
+
from .matches import Match
|
|
10
|
+
from .matches import longest_match
|
|
11
|
+
from .ops import Op
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
with lang.auto_proxy_import(globals()):
|
|
15
|
+
from . import parsing
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
##
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Channel(enum.Enum):
|
|
22
|
+
STRUCTURE = enum.auto()
|
|
23
|
+
CONTENT = enum.auto()
|
|
24
|
+
COMMENT = enum.auto()
|
|
25
|
+
SPACE = enum.auto()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Rule(lang.Final):
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
name: str,
|
|
32
|
+
op: Op,
|
|
33
|
+
*,
|
|
34
|
+
channel: Channel = Channel.STRUCTURE,
|
|
35
|
+
) -> None:
|
|
36
|
+
super().__init__()
|
|
37
|
+
|
|
38
|
+
self._name = check.non_empty_str(name)
|
|
39
|
+
self._op = check.isinstance(op, Op)
|
|
40
|
+
self._channel = channel
|
|
41
|
+
|
|
42
|
+
self._name_f = name.casefold()
|
|
43
|
+
|
|
44
|
+
def __repr__(self) -> str:
|
|
45
|
+
return f'{self.__class__.__name__}({self._name!r}, channel={self._channel.name})'
|
|
46
|
+
|
|
47
|
+
def replace_op(self, op: Op) -> 'Rule':
|
|
48
|
+
return Rule(
|
|
49
|
+
self._name,
|
|
50
|
+
op,
|
|
51
|
+
channel=self._channel,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def name(self) -> str:
|
|
56
|
+
return self._name
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def name_f(self) -> str:
|
|
60
|
+
return self._name_f
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def op(self) -> Op:
|
|
64
|
+
return self._op
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def channel(self) -> Channel:
|
|
68
|
+
return self._channel
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
#
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class RulesCollection(lang.Final, ta.Collection[Rule]):
|
|
75
|
+
def __init__(self, *rules: ta.Union[Rule, 'RulesCollection']) -> None:
|
|
76
|
+
super().__init__()
|
|
77
|
+
|
|
78
|
+
rules_set: set[Rule] = set()
|
|
79
|
+
rules_by_name: dict[str, Rule] = {}
|
|
80
|
+
rules_by_name_f: dict[str, Rule] = {}
|
|
81
|
+
rules_by_op: dict[Op, Rule] = {}
|
|
82
|
+
|
|
83
|
+
def add(gr: Rule) -> None:
|
|
84
|
+
check.isinstance(gr, Rule)
|
|
85
|
+
|
|
86
|
+
check.not_in(gr, rules_set)
|
|
87
|
+
check.not_in(gr._name, rules_by_name) # noqa
|
|
88
|
+
check.not_in(gr._name_f, rules_by_name_f) # noqa
|
|
89
|
+
check.not_in(gr._op, rules_by_op) # noqa
|
|
90
|
+
|
|
91
|
+
rules_set.add(gr)
|
|
92
|
+
rules_by_name[gr._name] = gr # noqa
|
|
93
|
+
rules_by_name_f[gr._name_f] = gr # noqa
|
|
94
|
+
rules_by_op[gr._op] = gr # noqa
|
|
95
|
+
|
|
96
|
+
for e in rules:
|
|
97
|
+
if isinstance(e, RulesCollection):
|
|
98
|
+
for c in e:
|
|
99
|
+
add(c)
|
|
100
|
+
else:
|
|
101
|
+
add(e)
|
|
102
|
+
|
|
103
|
+
self._rules_set = rules_set
|
|
104
|
+
self._rules_by_name: ta.Mapping[str, Rule] = rules_by_name
|
|
105
|
+
self._rules_by_name_f: ta.Mapping[str, Rule] = rules_by_name_f
|
|
106
|
+
self._rules_by_op: ta.Mapping[Op, Rule] = rules_by_op
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def rules_set(self) -> ta.AbstractSet[Rule]:
|
|
110
|
+
return self._rules_set
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def rules_by_name(self) -> ta.Mapping[str, Rule]:
|
|
114
|
+
return self._rules_by_name
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def rules_by_name_f(self) -> ta.Mapping[str, Rule]:
|
|
118
|
+
return self._rules_by_name_f
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def rules_by_op(self) -> ta.Mapping[Op, Rule]:
|
|
122
|
+
return self._rules_by_op
|
|
123
|
+
|
|
124
|
+
#
|
|
125
|
+
|
|
126
|
+
def __len__(self) -> int:
|
|
127
|
+
return len(self._rules_set)
|
|
128
|
+
|
|
129
|
+
def __iter__(self) -> ta.Iterator[Rule]:
|
|
130
|
+
return iter(self._rules_set)
|
|
131
|
+
|
|
132
|
+
def __contains__(self, item: Rule) -> bool: # type: ignore[override]
|
|
133
|
+
return item in self._rules_set
|
|
134
|
+
|
|
135
|
+
#
|
|
136
|
+
|
|
137
|
+
def rule(self, name: str) -> Rule | None:
|
|
138
|
+
return self._rules_by_name_f.get(name.casefold())
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
##
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Grammar(lang.Final):
|
|
145
|
+
def __init__(
|
|
146
|
+
self,
|
|
147
|
+
*rules: Rule | RulesCollection,
|
|
148
|
+
root: Rule | str | None = None,
|
|
149
|
+
) -> None:
|
|
150
|
+
super().__init__()
|
|
151
|
+
|
|
152
|
+
if len(rules) == 1 and isinstance(r0 := rules[0], RulesCollection):
|
|
153
|
+
self._rules = r0
|
|
154
|
+
else:
|
|
155
|
+
self._rules = RulesCollection(*rules)
|
|
156
|
+
|
|
157
|
+
if isinstance(root, str):
|
|
158
|
+
root = self._rules.rules_by_name_f[root.casefold()]
|
|
159
|
+
self._root = root
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def rules(self) -> RulesCollection:
|
|
163
|
+
return self._rules
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def root(self) -> Rule | None:
|
|
167
|
+
return self._root
|
|
168
|
+
|
|
169
|
+
#
|
|
170
|
+
|
|
171
|
+
def rule(self, name: str) -> Rule | None:
|
|
172
|
+
return self._rules.rule(name)
|
|
173
|
+
|
|
174
|
+
def replace_rules(self, *rules: Rule) -> 'Grammar':
|
|
175
|
+
rc = RulesCollection(*rules)
|
|
176
|
+
if rc.rules_set == self._rules.rules_set:
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
return Grammar(
|
|
180
|
+
rc,
|
|
181
|
+
root=self._root.name if self._root is not None else None,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
#
|
|
185
|
+
|
|
186
|
+
def iter_parse(
|
|
187
|
+
self,
|
|
188
|
+
source: str,
|
|
189
|
+
root: Rule | str | None = None,
|
|
190
|
+
*,
|
|
191
|
+
start: int = 0,
|
|
192
|
+
debug: int = 0,
|
|
193
|
+
**kwargs: ta.Any,
|
|
194
|
+
) -> ta.Iterator[Match]:
|
|
195
|
+
if root is None:
|
|
196
|
+
if (root := self._root) is None:
|
|
197
|
+
raise AbnfError('No root or default root specified')
|
|
198
|
+
else:
|
|
199
|
+
if isinstance(root, str):
|
|
200
|
+
root = self._rules.rules_by_name_f[root.casefold()]
|
|
201
|
+
else:
|
|
202
|
+
root = check.in_(check.isinstance(root, Rule), self._rules)
|
|
203
|
+
|
|
204
|
+
return parsing._iter_parse( # noqa
|
|
205
|
+
self,
|
|
206
|
+
source,
|
|
207
|
+
root._op, # noqa
|
|
208
|
+
start,
|
|
209
|
+
debug=debug,
|
|
210
|
+
**kwargs,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def parse(
|
|
214
|
+
self,
|
|
215
|
+
source: str,
|
|
216
|
+
root: str | None = None,
|
|
217
|
+
*,
|
|
218
|
+
start: int = 0,
|
|
219
|
+
complete: bool = False,
|
|
220
|
+
debug: int = 0,
|
|
221
|
+
**kwargs: ta.Any,
|
|
222
|
+
) -> Match | None:
|
|
223
|
+
if (match := longest_match(self.iter_parse(
|
|
224
|
+
source,
|
|
225
|
+
root,
|
|
226
|
+
start=start,
|
|
227
|
+
debug=debug,
|
|
228
|
+
**kwargs,
|
|
229
|
+
))) is None:
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
if complete and (match.start, match.end) != (start, len(source)):
|
|
233
|
+
raise AbnfIncompleteParseError
|
|
234
|
+
|
|
235
|
+
return match
|
omextra/text/abnf/internal.py
CHANGED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import itertools
|
|
3
|
+
import typing as ta
|
|
4
|
+
|
|
5
|
+
from omlish import lang
|
|
6
|
+
|
|
7
|
+
from .internal import Regex
|
|
8
|
+
from .ops import CaseInsensitiveStringLiteral
|
|
9
|
+
from .ops import Op
|
|
10
|
+
from .ops import RangeLiteral
|
|
11
|
+
from .ops import RuleRef
|
|
12
|
+
from .ops import StringLiteral
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
##
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@ta.final
|
|
19
|
+
class Match(ta.NamedTuple):
|
|
20
|
+
op: 'Op'
|
|
21
|
+
start: int
|
|
22
|
+
end: int
|
|
23
|
+
children: tuple['Match', ...]
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def length(self) -> int:
|
|
27
|
+
return self.end - self.start
|
|
28
|
+
|
|
29
|
+
#
|
|
30
|
+
|
|
31
|
+
def __repr__(self) -> str:
|
|
32
|
+
return (
|
|
33
|
+
f'{self.__class__.__name__}('
|
|
34
|
+
f'{self.op._match_repr()}, ' # noqa
|
|
35
|
+
f'{self.start}, {self.end}'
|
|
36
|
+
f'{f", {self.children!r}" if self.children else ""})'
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def render_to(
|
|
40
|
+
self,
|
|
41
|
+
write: ta.Callable[[str], ta.Any],
|
|
42
|
+
*,
|
|
43
|
+
indent: int | None = None,
|
|
44
|
+
_depth: int = 0,
|
|
45
|
+
) -> None:
|
|
46
|
+
ix: str | None = (' ' * (indent * _depth)) if indent is not None else None
|
|
47
|
+
if ix:
|
|
48
|
+
write(ix)
|
|
49
|
+
|
|
50
|
+
o = self.op
|
|
51
|
+
|
|
52
|
+
if isinstance(o, (StringLiteral, CaseInsensitiveStringLiteral)):
|
|
53
|
+
write(f'literal<{self.start}-{self.end}>({o.value!r})')
|
|
54
|
+
|
|
55
|
+
elif isinstance(o, RangeLiteral):
|
|
56
|
+
write(f'literal<{self.start}-{self.end}>({o.value.lo!r}-{o.value.hi!r})')
|
|
57
|
+
|
|
58
|
+
elif isinstance(o, Regex):
|
|
59
|
+
write(f'regex<{self.start}-{self.end}>({o.pat.pattern!r})')
|
|
60
|
+
|
|
61
|
+
else:
|
|
62
|
+
write(f'{o.__class__.__name__.lower()}<{self.start}-{self.end}>')
|
|
63
|
+
|
|
64
|
+
if isinstance(o, RuleRef):
|
|
65
|
+
write(f':{o.name}')
|
|
66
|
+
|
|
67
|
+
if self.children:
|
|
68
|
+
write('(')
|
|
69
|
+
if ix is not None:
|
|
70
|
+
write('\n')
|
|
71
|
+
|
|
72
|
+
for i, c in enumerate(self.children):
|
|
73
|
+
if i and ix is None:
|
|
74
|
+
write(', ')
|
|
75
|
+
|
|
76
|
+
c.render_to(write, indent=indent, _depth=_depth + 1)
|
|
77
|
+
|
|
78
|
+
if ix is not None:
|
|
79
|
+
write(',\n')
|
|
80
|
+
|
|
81
|
+
if ix:
|
|
82
|
+
write(ix)
|
|
83
|
+
|
|
84
|
+
write(')')
|
|
85
|
+
|
|
86
|
+
def render(
|
|
87
|
+
self,
|
|
88
|
+
*,
|
|
89
|
+
indent: int | None = None,
|
|
90
|
+
) -> str:
|
|
91
|
+
sb = io.StringIO()
|
|
92
|
+
self.render_to(sb.write, indent=indent)
|
|
93
|
+
return sb.getvalue()
|
|
94
|
+
|
|
95
|
+
def __str__(self) -> str:
|
|
96
|
+
return self.render()
|
|
97
|
+
|
|
98
|
+
#
|
|
99
|
+
|
|
100
|
+
def replace_children(self, *children: 'Match') -> 'Match':
|
|
101
|
+
if lang.seqs_identical(children, self.children):
|
|
102
|
+
return self
|
|
103
|
+
|
|
104
|
+
return self._replace(children=children)
|
|
105
|
+
|
|
106
|
+
def map_children(self, fn: ta.Callable[['Match'], 'Match']) -> 'Match':
|
|
107
|
+
return self.replace_children(*map(fn, self.children))
|
|
108
|
+
|
|
109
|
+
def flat_map_children(self, fn: ta.Callable[['Match'], ta.Iterable['Match']]) -> 'Match':
|
|
110
|
+
return self.replace_children(*itertools.chain.from_iterable(map(fn, self.children)))
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
##
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def longest_match(ms: ta.Iterable[Match]) -> Match | None:
|
|
117
|
+
bm: Match | None = None
|
|
118
|
+
bl = 0
|
|
119
|
+
for m in ms:
|
|
120
|
+
l = m.length
|
|
121
|
+
if bm is None or l > bl:
|
|
122
|
+
bm, bl = m, l
|
|
123
|
+
return bm
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def filter_matches(
|
|
127
|
+
fn: ta.Callable[[Match], bool],
|
|
128
|
+
m: Match,
|
|
129
|
+
*,
|
|
130
|
+
keep_children: bool = False,
|
|
131
|
+
) -> Match:
|
|
132
|
+
def inner(x: Match) -> ta.Iterable[Match]:
|
|
133
|
+
if fn(x):
|
|
134
|
+
return (rec(x),)
|
|
135
|
+
|
|
136
|
+
elif keep_children:
|
|
137
|
+
return lang.flatten(inner(c) for c in x.children)
|
|
138
|
+
|
|
139
|
+
else:
|
|
140
|
+
return ()
|
|
141
|
+
|
|
142
|
+
def rec(c: Match) -> Match:
|
|
143
|
+
return c.flat_map_children(inner)
|
|
144
|
+
|
|
145
|
+
return rec(m)
|
omextra/text/abnf/meta.py
CHANGED
|
@@ -7,12 +7,13 @@ from omlish import check
|
|
|
7
7
|
from omlish import dataclasses as dc
|
|
8
8
|
from omlish import lang
|
|
9
9
|
|
|
10
|
-
from .base import Grammar
|
|
11
|
-
from .base import Match
|
|
12
10
|
from .base import Op
|
|
13
|
-
from .base import Rule
|
|
14
11
|
from .core import CORE_RULES
|
|
15
12
|
from .errors import AbnfGrammarParseError
|
|
13
|
+
from .grammars import Channel
|
|
14
|
+
from .grammars import Grammar
|
|
15
|
+
from .grammars import Rule
|
|
16
|
+
from .matches import Match
|
|
16
17
|
from .ops import Repeat
|
|
17
18
|
from .ops import concat
|
|
18
19
|
from .ops import either
|
|
@@ -20,8 +21,10 @@ from .ops import literal
|
|
|
20
21
|
from .ops import option
|
|
21
22
|
from .ops import repeat
|
|
22
23
|
from .ops import rule
|
|
24
|
+
from .opto import optimize_grammar
|
|
25
|
+
from .utils import filter_match_channels
|
|
23
26
|
from .utils import fix_ws
|
|
24
|
-
from .utils import
|
|
27
|
+
from .utils import only_match_rules
|
|
25
28
|
from .visitors import RuleMatchVisitor
|
|
26
29
|
|
|
27
30
|
|
|
@@ -105,7 +108,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
105
108
|
rule('WSP'),
|
|
106
109
|
),
|
|
107
110
|
),
|
|
108
|
-
|
|
111
|
+
channel=Channel.SPACE,
|
|
109
112
|
),
|
|
110
113
|
|
|
111
114
|
Rule(
|
|
@@ -114,7 +117,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
114
117
|
rule('comment'),
|
|
115
118
|
rule('CRLF'),
|
|
116
119
|
),
|
|
117
|
-
|
|
120
|
+
channel=Channel.SPACE,
|
|
118
121
|
),
|
|
119
122
|
|
|
120
123
|
Rule(
|
|
@@ -129,6 +132,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
129
132
|
),
|
|
130
133
|
rule('CRLF'),
|
|
131
134
|
),
|
|
135
|
+
channel=Channel.COMMENT,
|
|
132
136
|
),
|
|
133
137
|
|
|
134
138
|
Rule(
|
|
@@ -409,12 +413,21 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
409
413
|
]
|
|
410
414
|
|
|
411
415
|
|
|
412
|
-
|
|
416
|
+
RAW_META_GRAMMAR = Grammar(
|
|
413
417
|
*CORE_RULES,
|
|
414
418
|
*META_GRAMMAR_RULES,
|
|
415
419
|
root='rulelist',
|
|
416
420
|
)
|
|
417
421
|
|
|
422
|
+
META_GRAMMAR = optimize_grammar(
|
|
423
|
+
RAW_META_GRAMMAR,
|
|
424
|
+
inline_channels=(
|
|
425
|
+
Channel.CONTENT,
|
|
426
|
+
Channel.COMMENT,
|
|
427
|
+
Channel.SPACE,
|
|
428
|
+
),
|
|
429
|
+
)
|
|
430
|
+
|
|
418
431
|
|
|
419
432
|
##
|
|
420
433
|
|
|
@@ -555,30 +568,50 @@ class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
|
|
|
555
568
|
return self.QuotedString(self._source[m.start + 1:m.end - 1])
|
|
556
569
|
|
|
557
570
|
|
|
571
|
+
##
|
|
572
|
+
|
|
573
|
+
|
|
558
574
|
def parse_grammar(
|
|
559
575
|
source: str,
|
|
560
576
|
*,
|
|
561
|
-
no_core_rules: bool = False,
|
|
562
577
|
root: str | None = None,
|
|
578
|
+
no_core_rules: bool = False,
|
|
579
|
+
no_optimize: bool = False,
|
|
563
580
|
**kwargs: ta.Any,
|
|
564
581
|
) -> Grammar:
|
|
565
582
|
source = fix_ws(source)
|
|
566
583
|
|
|
567
|
-
if (mg_m :=
|
|
568
|
-
META_GRAMMAR,
|
|
584
|
+
if (mg_m := META_GRAMMAR.parse(
|
|
569
585
|
source,
|
|
570
586
|
complete=True,
|
|
571
587
|
**kwargs,
|
|
572
588
|
)) is None:
|
|
573
589
|
raise AbnfGrammarParseError(source)
|
|
574
590
|
|
|
591
|
+
mg_m = only_match_rules(mg_m)
|
|
592
|
+
|
|
593
|
+
mg_m = filter_match_channels(
|
|
594
|
+
mg_m,
|
|
595
|
+
META_GRAMMAR,
|
|
596
|
+
keep=(Channel.STRUCTURE,),
|
|
597
|
+
keep_children=True,
|
|
598
|
+
)
|
|
599
|
+
|
|
575
600
|
check.isinstance(mg_m.op, Repeat)
|
|
576
601
|
|
|
577
602
|
mg_rmv = MetaGrammarRuleMatchVisitor(source)
|
|
578
|
-
rules = [
|
|
603
|
+
rules = [
|
|
604
|
+
check.isinstance(mg_rmv.visit_match(gg_cm), Rule)
|
|
605
|
+
for gg_cm in mg_m.children
|
|
606
|
+
]
|
|
579
607
|
|
|
580
|
-
|
|
608
|
+
gram = Grammar(
|
|
581
609
|
*rules,
|
|
582
610
|
*(CORE_RULES if not no_core_rules else []),
|
|
583
611
|
root=root,
|
|
584
612
|
)
|
|
613
|
+
|
|
614
|
+
if not no_optimize:
|
|
615
|
+
gram = optimize_grammar(gram)
|
|
616
|
+
|
|
617
|
+
return gram
|