omextra 0.0.0.dev497__py3-none-any.whl → 0.0.0.dev499__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omextra/text/abnf/__init__.py +45 -14
- omextra/text/abnf/_dataclasses.py +246 -0
- omextra/text/abnf/base.py +6 -279
- omextra/text/abnf/core.py +22 -10
- omextra/text/abnf/grammars.py +235 -0
- omextra/text/abnf/matches.py +145 -0
- omextra/text/abnf/meta.py +39 -17
- omextra/text/abnf/ops.py +67 -5
- omextra/text/abnf/opto.py +167 -64
- omextra/text/abnf/parsing.py +53 -5
- omextra/text/abnf/utils.py +38 -41
- omextra/text/abnf/visitors.py +1 -1
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/METADATA +2 -2
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/RECORD +18 -16
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/WHEEL +0 -0
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/entry_points.txt +0 -0
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/licenses/LICENSE +0 -0
- {omextra-0.0.0.dev497.dist-info → omextra-0.0.0.dev499.dist-info}/top_level.txt +0 -0
omextra/text/abnf/ops.py
CHANGED
|
@@ -7,6 +7,7 @@ from omlish import lang
|
|
|
7
7
|
from .base import CompositeOp
|
|
8
8
|
from .base import LeafOp
|
|
9
9
|
from .base import Op
|
|
10
|
+
from .base import OpTuple
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
##
|
|
@@ -106,19 +107,51 @@ class Concat(CompositeOp, lang.Final):
|
|
|
106
107
|
def __init__(self, *children: Op) -> None:
|
|
107
108
|
super().__init__()
|
|
108
109
|
|
|
109
|
-
|
|
110
|
+
check.arg(len(children) > 1)
|
|
111
|
+
for i, c in enumerate(children):
|
|
110
112
|
check.isinstance(c, Op)
|
|
113
|
+
if i:
|
|
114
|
+
check.state(not (isinstance(c, Concat) and isinstance(children[i - 1], Concat)))
|
|
111
115
|
self._children = children
|
|
112
116
|
|
|
113
117
|
@property
|
|
114
|
-
def children(self) ->
|
|
118
|
+
def children(self) -> OpTuple:
|
|
115
119
|
return self._children
|
|
116
120
|
|
|
117
121
|
def __repr__(self) -> str:
|
|
118
122
|
return f'{self.__class__.__name__}@{id(self):x}({", ".join(map(repr, self._children))})'
|
|
119
123
|
|
|
124
|
+
def replace_children(self, *children: Op) -> Op:
|
|
125
|
+
if children == self._children:
|
|
126
|
+
return self
|
|
127
|
+
|
|
128
|
+
return concat(*children)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def concat(*children: Op) -> Op:
|
|
132
|
+
if len(children) == 1:
|
|
133
|
+
return children[0]
|
|
134
|
+
|
|
135
|
+
check.not_empty(children)
|
|
120
136
|
|
|
121
|
-
|
|
137
|
+
lst: list[Op | list[Op]] = []
|
|
138
|
+
for c in children:
|
|
139
|
+
if (
|
|
140
|
+
lst and
|
|
141
|
+
isinstance(c, Concat) and
|
|
142
|
+
isinstance(ll := lst[-1], (Concat, list))
|
|
143
|
+
):
|
|
144
|
+
if isinstance(ll, list):
|
|
145
|
+
ll.extend(c.children)
|
|
146
|
+
else:
|
|
147
|
+
lst.append([*ta.cast(list, lst.pop()), *c.children])
|
|
148
|
+
else:
|
|
149
|
+
lst.append(c)
|
|
150
|
+
|
|
151
|
+
if len(lst) == 1:
|
|
152
|
+
return Concat(*e) if isinstance(e := lst[0], list) else e
|
|
153
|
+
|
|
154
|
+
return Concat(*[Concat(*e) if isinstance(e, list) else e for e in lst])
|
|
122
155
|
|
|
123
156
|
|
|
124
157
|
##
|
|
@@ -161,12 +194,19 @@ class Repeat(CompositeOp, lang.Final):
|
|
|
161
194
|
return self._child
|
|
162
195
|
|
|
163
196
|
@property
|
|
164
|
-
def children(self) ->
|
|
197
|
+
def children(self) -> OpTuple:
|
|
165
198
|
return (self._child,)
|
|
166
199
|
|
|
167
200
|
def __repr__(self) -> str:
|
|
168
201
|
return f'{self.__class__.__name__}@{id(self):x}({self._times}, {self._child!r})'
|
|
169
202
|
|
|
203
|
+
def replace_children(self, *children: Op) -> Op:
|
|
204
|
+
child = check.single(children)
|
|
205
|
+
if child == self._child:
|
|
206
|
+
return self
|
|
207
|
+
|
|
208
|
+
return Repeat(self._times, child)
|
|
209
|
+
|
|
170
210
|
|
|
171
211
|
@ta.overload
|
|
172
212
|
def repeat(child: Op) -> Repeat: # noqa
|
|
@@ -238,7 +278,7 @@ class Either(CompositeOp, lang.Final):
|
|
|
238
278
|
self._first_match = first_match
|
|
239
279
|
|
|
240
280
|
@property
|
|
241
|
-
def children(self) ->
|
|
281
|
+
def children(self) -> OpTuple:
|
|
242
282
|
return self._children
|
|
243
283
|
|
|
244
284
|
@property
|
|
@@ -252,6 +292,12 @@ class Either(CompositeOp, lang.Final):
|
|
|
252
292
|
f'{", first_match=True" if self._first_match else ""})'
|
|
253
293
|
)
|
|
254
294
|
|
|
295
|
+
def replace_children(self, *children: Op) -> Op:
|
|
296
|
+
if children == self._children:
|
|
297
|
+
return self
|
|
298
|
+
|
|
299
|
+
return Either(*children, first_match=self._first_match)
|
|
300
|
+
|
|
255
301
|
|
|
256
302
|
either = Either
|
|
257
303
|
|
|
@@ -265,12 +311,28 @@ class RuleRef(Op, lang.Final):
|
|
|
265
311
|
super().__init__()
|
|
266
312
|
|
|
267
313
|
self._name = check.non_empty_str(name)
|
|
314
|
+
|
|
268
315
|
self._name_f = name.casefold()
|
|
269
316
|
|
|
317
|
+
def coalesce(self, other: Op) -> Op:
|
|
318
|
+
"""
|
|
319
|
+
Op nodes are compared by identity, and transformations return identical node instances when nothing has changed.
|
|
320
|
+
This method assists with that, preserving RuleRef node identity if the given node is otherwise equal.
|
|
321
|
+
"""
|
|
322
|
+
|
|
323
|
+
if isinstance(other, RuleRef) and other.name_f == self.name_f:
|
|
324
|
+
return self
|
|
325
|
+
|
|
326
|
+
return other
|
|
327
|
+
|
|
270
328
|
@property
|
|
271
329
|
def name(self) -> str:
|
|
272
330
|
return self._name
|
|
273
331
|
|
|
332
|
+
@property
|
|
333
|
+
def name_f(self) -> str:
|
|
334
|
+
return self._name_f
|
|
335
|
+
|
|
274
336
|
def __repr__(self) -> str:
|
|
275
337
|
return f'{self.__class__.__name__}@{id(self):x}({self._name!r})'
|
|
276
338
|
|
omextra/text/abnf/opto.py
CHANGED
|
@@ -1,18 +1,24 @@
|
|
|
1
1
|
"""
|
|
2
2
|
TODO:
|
|
3
|
-
-
|
|
4
|
-
-
|
|
5
|
-
|
|
3
|
+
- origin tracking?
|
|
4
|
+
- minor opts:
|
|
5
|
+
- merge concat(range, range)
|
|
6
6
|
"""
|
|
7
|
+
import abc
|
|
7
8
|
import re
|
|
8
9
|
import typing as ta
|
|
9
10
|
|
|
10
11
|
from omlish import check
|
|
12
|
+
from omlish import dataclasses as dc
|
|
13
|
+
from omlish import lang
|
|
11
14
|
|
|
15
|
+
from .base import CompositeOp
|
|
12
16
|
from .base import Op
|
|
17
|
+
from .grammars import Channel
|
|
18
|
+
from .grammars import Grammar
|
|
19
|
+
from .grammars import Rule
|
|
13
20
|
from .internal import Regex
|
|
14
21
|
from .ops import CaseInsensitiveStringLiteral
|
|
15
|
-
from .ops import CompositeOp
|
|
16
22
|
from .ops import Concat
|
|
17
23
|
from .ops import Either
|
|
18
24
|
from .ops import RangeLiteral
|
|
@@ -24,42 +30,109 @@ from .ops import StringLiteral
|
|
|
24
30
|
##
|
|
25
31
|
|
|
26
32
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
33
|
+
@dc.dataclass(frozen=True)
|
|
34
|
+
class _RegexItem(lang.Abstract):
|
|
35
|
+
@property
|
|
36
|
+
@abc.abstractmethod
|
|
37
|
+
def pat(self) -> str:
|
|
38
|
+
raise NotImplementedError
|
|
30
39
|
|
|
31
|
-
|
|
32
|
-
|
|
40
|
+
@classmethod
|
|
41
|
+
def of_op(cls, op: Op) -> ta.Optional['_RegexItem']:
|
|
42
|
+
if isinstance(op, StringLiteral):
|
|
43
|
+
return _StringLiteralRegexItem(op.value)
|
|
44
|
+
|
|
45
|
+
elif isinstance(op, CaseInsensitiveStringLiteral):
|
|
46
|
+
return _CaseInsensitiveStringLiteralRegexItem(op.value)
|
|
47
|
+
|
|
48
|
+
elif isinstance(op, RangeLiteral):
|
|
49
|
+
lo = re.escape(op.value.lo)
|
|
50
|
+
hi = re.escape(op.value.hi)
|
|
51
|
+
return _RegexRegexItem(f'[{lo}-{hi}]')
|
|
52
|
+
|
|
53
|
+
elif isinstance(op, Regex):
|
|
54
|
+
return _RegexRegexItem(op.pat.pattern)
|
|
55
|
+
|
|
56
|
+
else:
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def of(cls, obj: ta.Union['_RegexItem', Op, None]) -> ta.Optional['_RegexItem']:
|
|
61
|
+
if obj is None:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
elif isinstance(obj, _RegexItem):
|
|
65
|
+
return obj
|
|
66
|
+
|
|
67
|
+
elif isinstance(obj, Op):
|
|
68
|
+
return cls.of_op(obj)
|
|
69
|
+
|
|
70
|
+
else:
|
|
71
|
+
raise TypeError(obj)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dc.dataclass(frozen=True)
|
|
75
|
+
class _StringLiteralRegexItem(_RegexItem, lang.Final):
|
|
76
|
+
s: str
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def pat(self) -> str:
|
|
80
|
+
return re.escape(self.s)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dc.dataclass(frozen=True)
|
|
84
|
+
class _CaseInsensitiveStringLiteralRegexItem(_RegexItem, lang.Final):
|
|
85
|
+
s: str
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def pat(self) -> str:
|
|
89
|
+
return f'(?i:{re.escape(self.s)})'
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dc.dataclass(frozen=True)
|
|
93
|
+
class _RegexRegexItem(_RegexItem, lang.Final):
|
|
94
|
+
ps: str
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def pat(self) -> str:
|
|
98
|
+
return self.ps
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _regex_item_transform_op(op: Op) -> _RegexItem | None:
|
|
102
|
+
if isinstance(op, (StringLiteral, CaseInsensitiveStringLiteral, Regex)):
|
|
103
|
+
return None
|
|
33
104
|
|
|
34
105
|
elif isinstance(op, RangeLiteral):
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return
|
|
106
|
+
# Unlike other leafs we eagerly transform RangeLiteral to a regex as it's probably faster than the python impl,
|
|
107
|
+
# even alone.
|
|
108
|
+
return _RegexItem.of_op(op)
|
|
38
109
|
|
|
39
110
|
elif isinstance(op, RuleRef):
|
|
40
111
|
return None
|
|
41
112
|
|
|
42
|
-
elif isinstance(op, Regex):
|
|
43
|
-
return op.pat.pattern
|
|
44
|
-
|
|
45
113
|
elif isinstance(op, Concat):
|
|
46
|
-
|
|
47
|
-
if
|
|
114
|
+
children = [_regex_item_transform_op(child) or _RegexItem.of(child) for child in op.children]
|
|
115
|
+
if all(ca is not None for ca in children):
|
|
116
|
+
return _RegexRegexItem(''.join(check.not_none(ca).pat for ca in children))
|
|
117
|
+
|
|
118
|
+
if not any(ca is not None for ca in children):
|
|
48
119
|
return None
|
|
49
|
-
|
|
120
|
+
|
|
121
|
+
# FIXME: merge adjacent
|
|
122
|
+
return None
|
|
50
123
|
|
|
51
124
|
elif isinstance(op, Repeat):
|
|
52
|
-
|
|
125
|
+
child = _RegexItem.of(_regex_item_transform_op(op.child))
|
|
126
|
+
if child is None:
|
|
53
127
|
return None
|
|
54
128
|
|
|
55
129
|
# Wrap the child pattern in a non-capturing group if needed to ensure correct quantification. A pattern needs
|
|
56
130
|
# wrapping if it contains multiple elements or operators (e.g., 'ab', 'a|b'). Single character classes [a-z] and
|
|
57
131
|
# single escaped chars don't need wrapping.
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
)
|
|
62
|
-
if needs_group:
|
|
132
|
+
if (
|
|
133
|
+
len(child_pat := child.pat) > 1 and
|
|
134
|
+
not (child_pat.startswith('[') and child_pat.endswith(']'))
|
|
135
|
+
):
|
|
63
136
|
child_pat = f'(?:{child_pat})'
|
|
64
137
|
|
|
65
138
|
times = op.times
|
|
@@ -76,7 +149,7 @@ def _build_op_regex_pat(op: Op, pats_by_op: ta.Mapping[Op, str | None]) -> str |
|
|
|
76
149
|
else:
|
|
77
150
|
quantifier = f'{{{times.min},{times.max}}}'
|
|
78
151
|
|
|
79
|
-
return child_pat + quantifier
|
|
152
|
+
return _RegexRegexItem(child_pat + quantifier)
|
|
80
153
|
|
|
81
154
|
elif isinstance(op, Either):
|
|
82
155
|
# Only convert Either if first_match is True, as regex alternation uses first-match semantics. ABNF Either with
|
|
@@ -84,71 +157,101 @@ def _build_op_regex_pat(op: Op, pats_by_op: ta.Mapping[Op, str | None]) -> str |
|
|
|
84
157
|
if not op.first_match:
|
|
85
158
|
return None
|
|
86
159
|
|
|
87
|
-
|
|
88
|
-
if
|
|
160
|
+
children = [_regex_item_transform_op(child) or _RegexItem.of(child) for child in op.children]
|
|
161
|
+
if all(ca is not None for ca in children):
|
|
162
|
+
# Build regex alternation. Use a capturing group for the alternation
|
|
163
|
+
return _RegexRegexItem(''.join([
|
|
164
|
+
'(',
|
|
165
|
+
'|'.join(check.not_none(ca).pat for ca in children),
|
|
166
|
+
')',
|
|
167
|
+
]))
|
|
168
|
+
|
|
169
|
+
if not any(ca is not None for ca in children):
|
|
89
170
|
return None
|
|
90
171
|
|
|
91
|
-
#
|
|
92
|
-
return
|
|
172
|
+
# FIXME: merge adjacent
|
|
173
|
+
return None
|
|
93
174
|
|
|
94
175
|
else:
|
|
95
176
|
raise TypeError(op)
|
|
96
177
|
|
|
97
178
|
|
|
98
|
-
def
|
|
99
|
-
|
|
179
|
+
def _regex_transform_op(op: Op) -> Op:
|
|
180
|
+
v = _regex_item_transform_op(op)
|
|
100
181
|
|
|
101
|
-
if
|
|
102
|
-
|
|
103
|
-
return op
|
|
104
|
-
|
|
105
|
-
return Regex(re.compile(pat))
|
|
182
|
+
if v is None:
|
|
183
|
+
return op
|
|
106
184
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
if new_children == op.children:
|
|
110
|
-
return op
|
|
185
|
+
elif isinstance(v, _RegexItem):
|
|
186
|
+
return Regex(re.compile(v.pat))
|
|
111
187
|
|
|
112
|
-
|
|
188
|
+
else:
|
|
189
|
+
raise TypeError(v)
|
|
113
190
|
|
|
114
|
-
elif isinstance(op, Repeat):
|
|
115
|
-
new_child = _regex_transform_single_op(op.child, pats_by_op)
|
|
116
|
-
if new_child == op.child:
|
|
117
|
-
return op
|
|
118
191
|
|
|
119
|
-
|
|
192
|
+
##
|
|
120
193
|
|
|
121
|
-
elif isinstance(op, Either):
|
|
122
|
-
new_children = tuple(_regex_transform_single_op(child, pats_by_op) for child in op.children)
|
|
123
|
-
if new_children == op.children:
|
|
124
|
-
return op
|
|
125
194
|
|
|
126
|
-
|
|
195
|
+
def optimize_op(op: Op) -> Op:
|
|
196
|
+
op = _regex_transform_op(op)
|
|
127
197
|
|
|
128
198
|
return op
|
|
129
199
|
|
|
130
200
|
|
|
131
|
-
|
|
132
|
-
|
|
201
|
+
##
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _inline_rules(fn: ta.Callable[[Rule], bool], gram: Grammar) -> Grammar:
|
|
205
|
+
cur_rule: Rule
|
|
206
|
+
inlined_rules: dict[str, Op] = {}
|
|
133
207
|
|
|
134
|
-
def
|
|
135
|
-
|
|
208
|
+
def rec_op(op: Op) -> Op:
|
|
209
|
+
if isinstance(op, RuleRef):
|
|
210
|
+
if op.name_f == cur_rule.name_f:
|
|
211
|
+
return op
|
|
136
212
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
analyze_tree(child)
|
|
213
|
+
if (r := gram.rule(op.name)) is None or not fn(r):
|
|
214
|
+
return op
|
|
140
215
|
|
|
141
|
-
|
|
216
|
+
try:
|
|
217
|
+
return inlined_rules[r.name]
|
|
218
|
+
except KeyError:
|
|
219
|
+
pass
|
|
142
220
|
|
|
143
|
-
|
|
221
|
+
inlined_rules[op.name] = op
|
|
222
|
+
i_op = rec_op(r.op)
|
|
223
|
+
inlined_rules[op.name] = i_op
|
|
144
224
|
|
|
145
|
-
|
|
225
|
+
return op.coalesce(i_op)
|
|
226
|
+
|
|
227
|
+
elif isinstance(op, CompositeOp):
|
|
228
|
+
return op.replace_children(*map(rec_op, op.children))
|
|
229
|
+
|
|
230
|
+
else:
|
|
231
|
+
return op
|
|
232
|
+
|
|
233
|
+
new_rules: list[Rule] = []
|
|
234
|
+
for rule in gram.rules:
|
|
235
|
+
cur_rule = rule
|
|
236
|
+
new_rules.append(rule.replace_op(rec_op(rule.op)))
|
|
237
|
+
|
|
238
|
+
return gram.replace_rules(*new_rules)
|
|
146
239
|
|
|
147
240
|
|
|
148
241
|
##
|
|
149
242
|
|
|
150
243
|
|
|
151
|
-
def
|
|
152
|
-
|
|
244
|
+
def optimize_grammar(
|
|
245
|
+
gram: Grammar,
|
|
246
|
+
*,
|
|
247
|
+
inline_channels: ta.Container[Channel] | None = (Channel.SPACE,),
|
|
248
|
+
) -> Grammar:
|
|
249
|
+
if inline_channels:
|
|
250
|
+
gram = _inline_rules(lambda r: r.channel in inline_channels, gram)
|
|
153
251
|
|
|
154
|
-
|
|
252
|
+
gram = gram.replace_rules(*[
|
|
253
|
+
r.replace_op(optimize_op(r.op))
|
|
254
|
+
for r in gram.rules
|
|
255
|
+
])
|
|
256
|
+
|
|
257
|
+
return gram
|
omextra/text/abnf/parsing.py
CHANGED
|
@@ -2,10 +2,12 @@ import typing as ta
|
|
|
2
2
|
|
|
3
3
|
from omlish import check
|
|
4
4
|
|
|
5
|
-
from .base import Grammar
|
|
6
|
-
from .base import Match
|
|
7
5
|
from .base import Op
|
|
6
|
+
from .grammars import Grammar
|
|
7
|
+
from .grammars import Rule
|
|
8
8
|
from .internal import Regex
|
|
9
|
+
from .matches import Match
|
|
10
|
+
from .matches import longest_match
|
|
9
11
|
from .ops import CaseInsensitiveStringLiteral
|
|
10
12
|
from .ops import Concat
|
|
11
13
|
from .ops import Either
|
|
@@ -35,6 +37,8 @@ class _Parser:
|
|
|
35
37
|
self._source = source
|
|
36
38
|
self._max_steps = max_steps
|
|
37
39
|
|
|
40
|
+
self._rules = self._grammar._rules # Noqa
|
|
41
|
+
|
|
38
42
|
self._dispatch: dict[type[Op], ta.Any] = {
|
|
39
43
|
StringLiteral: self._iter_parse_string_literal,
|
|
40
44
|
CaseInsensitiveStringLiteral: self._iter_parse_case_insensitive_string_literal,
|
|
@@ -67,6 +71,7 @@ class _Parser:
|
|
|
67
71
|
source = self._source[start] # noqa
|
|
68
72
|
except IndexError:
|
|
69
73
|
return
|
|
74
|
+
|
|
70
75
|
# ranges are always case-sensitive
|
|
71
76
|
if (value := op._value).lo <= source <= value.hi: # noqa
|
|
72
77
|
yield Match(op, start, start + 1, ())
|
|
@@ -160,7 +165,7 @@ class _Parser:
|
|
|
160
165
|
return
|
|
161
166
|
|
|
162
167
|
def _iter_parse_rule_ref(self, op: RuleRef, start: int) -> ta.Iterator[Match]:
|
|
163
|
-
cp = self.
|
|
168
|
+
cp = self._rules._rules_by_name_f[op._name_f].op # noqa
|
|
164
169
|
for cm in self.iter_parse(cp, start):
|
|
165
170
|
yield Match(op, cm.start, cm.end, (cm,))
|
|
166
171
|
|
|
@@ -225,7 +230,7 @@ class _DebugParser(_Parser):
|
|
|
225
230
|
ps = check.isinstance(op, RuleRef).name
|
|
226
231
|
else:
|
|
227
232
|
ps = self._op_str(op)
|
|
228
|
-
body = f'{start}:{self._source[start]!r} {ps}'
|
|
233
|
+
body = f'{start}:{self._source[start] if start < len(self._source) else ""!r} {ps}'
|
|
229
234
|
|
|
230
235
|
if self._level > 2:
|
|
231
236
|
self._write(f'{ws}+ {body}')
|
|
@@ -248,7 +253,7 @@ class _DebugParser(_Parser):
|
|
|
248
253
|
self._write(f'{ws}- {body}')
|
|
249
254
|
|
|
250
255
|
|
|
251
|
-
|
|
256
|
+
#
|
|
252
257
|
|
|
253
258
|
|
|
254
259
|
def _iter_parse(
|
|
@@ -276,3 +281,46 @@ def _iter_parse(
|
|
|
276
281
|
)
|
|
277
282
|
|
|
278
283
|
return parser.iter_parse(op, start)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
##
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def iter_parse(
|
|
290
|
+
obj: Grammar | Rule | Op,
|
|
291
|
+
src: str,
|
|
292
|
+
*,
|
|
293
|
+
root: str | None = None,
|
|
294
|
+
start: int = 0,
|
|
295
|
+
) -> ta.Iterator[Match]:
|
|
296
|
+
if isinstance(obj, Grammar):
|
|
297
|
+
gram = obj
|
|
298
|
+
elif isinstance(obj, Rule):
|
|
299
|
+
check.none(root)
|
|
300
|
+
gram = Grammar(obj, root=obj)
|
|
301
|
+
elif isinstance(obj, Op):
|
|
302
|
+
check.none(root)
|
|
303
|
+
gram = Grammar(Rule('root', obj), root='root')
|
|
304
|
+
else:
|
|
305
|
+
raise TypeError(obj)
|
|
306
|
+
|
|
307
|
+
return gram.iter_parse(
|
|
308
|
+
src,
|
|
309
|
+
root,
|
|
310
|
+
start=start,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def parse(
|
|
315
|
+
obj: Grammar | Rule | Op,
|
|
316
|
+
src: str,
|
|
317
|
+
*,
|
|
318
|
+
root: str | None = None,
|
|
319
|
+
start: int = 0,
|
|
320
|
+
) -> Match | None:
|
|
321
|
+
return longest_match(iter_parse(
|
|
322
|
+
obj,
|
|
323
|
+
src,
|
|
324
|
+
root=root,
|
|
325
|
+
start=start,
|
|
326
|
+
))
|
omextra/text/abnf/utils.py
CHANGED
|
@@ -1,62 +1,59 @@
|
|
|
1
|
-
import itertools
|
|
2
1
|
import textwrap
|
|
3
2
|
import typing as ta
|
|
4
3
|
|
|
5
4
|
from omlish import check
|
|
6
5
|
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
6
|
+
from .grammars import Channel
|
|
7
|
+
from .grammars import Grammar
|
|
8
|
+
from .matches import Match
|
|
9
|
+
from .matches import filter_matches
|
|
9
10
|
from .ops import RuleRef
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
##
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
return rec(m)
|
|
16
|
+
def filter_match_channels(
|
|
17
|
+
m: Match,
|
|
18
|
+
g: Grammar,
|
|
19
|
+
*,
|
|
20
|
+
keep: ta.Container[Channel] | None = None,
|
|
21
|
+
remove: ta.Container[Channel] | None = None,
|
|
22
|
+
keep_children: bool = False,
|
|
23
|
+
) -> Match:
|
|
24
|
+
if keep is None and remove is None:
|
|
25
|
+
return m
|
|
26
26
|
|
|
27
|
+
def fn(x: Match) -> bool:
|
|
28
|
+
if not isinstance((rr := x.op), RuleRef):
|
|
29
|
+
return False
|
|
27
30
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
if
|
|
31
|
-
return
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
r = check.not_none(g.rule(rr.name))
|
|
32
|
+
|
|
33
|
+
if keep is not None and r.channel not in keep:
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
if remove is not None and r.channel in remove:
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
return filter_matches(
|
|
42
|
+
fn,
|
|
43
|
+
m,
|
|
44
|
+
keep_children=keep_children,
|
|
45
|
+
)
|
|
35
46
|
|
|
36
47
|
|
|
37
48
|
#
|
|
38
49
|
|
|
39
50
|
|
|
40
|
-
def
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
**kwargs: ta.Any,
|
|
47
|
-
) -> Match | None:
|
|
48
|
-
if (match := grammar.parse(
|
|
49
|
-
source,
|
|
50
|
-
root,
|
|
51
|
-
start=start,
|
|
52
|
-
**kwargs,
|
|
53
|
-
)) is None:
|
|
54
|
-
return None
|
|
55
|
-
|
|
56
|
-
match = only_match_rules(match)
|
|
57
|
-
match = strip_insignificant_match_rules(match, grammar)
|
|
58
|
-
|
|
59
|
-
return match
|
|
51
|
+
def only_match_rules(m: Match) -> Match:
|
|
52
|
+
return filter_matches(
|
|
53
|
+
lambda x: isinstance(x.op, RuleRef),
|
|
54
|
+
m,
|
|
55
|
+
keep_children=True,
|
|
56
|
+
)
|
|
60
57
|
|
|
61
58
|
|
|
62
59
|
##
|
omextra/text/abnf/visitors.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omextra
|
|
3
|
-
Version: 0.0.0.
|
|
3
|
+
Version: 0.0.0.dev499
|
|
4
4
|
Summary: omextra
|
|
5
5
|
Author: wrmsr
|
|
6
6
|
License-Expression: BSD-3-Clause
|
|
@@ -14,7 +14,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
14
14
|
Requires-Python: >=3.13
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: omlish==0.0.0.
|
|
17
|
+
Requires-Dist: omlish==0.0.0.dev499
|
|
18
18
|
Dynamic: license-file
|
|
19
19
|
|
|
20
20
|
# Overview
|