omextra 0.0.0.dev497__py3-none-any.whl → 0.0.0.dev499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
omextra/text/abnf/core.py CHANGED
@@ -3,13 +3,15 @@ https://datatracker.ietf.org/doc/html/rfc5234
3
3
  """
4
4
  import typing as ta
5
5
 
6
- from .base import Grammar
7
- from .base import Rule
6
+ from .grammars import Channel
7
+ from .grammars import Grammar
8
+ from .grammars import Rule
8
9
  from .ops import concat
9
10
  from .ops import either
10
11
  from .ops import literal
11
12
  from .ops import repeat
12
13
  from .ops import rule
14
+ from .opto import optimize_grammar
13
15
 
14
16
 
15
17
  ##
@@ -23,6 +25,7 @@ CORE_RULES: ta.Sequence[Rule] = [
23
25
  literal('\x41', '\x5a'),
24
26
  literal('\x61', '\x7a'),
25
27
  ),
28
+ channel=Channel.CONTENT,
26
29
  ),
27
30
 
28
31
  Rule(
@@ -31,11 +34,13 @@ CORE_RULES: ta.Sequence[Rule] = [
31
34
  literal('0'),
32
35
  literal('1'),
33
36
  ),
37
+ channel=Channel.CONTENT,
34
38
  ),
35
39
 
36
40
  Rule(
37
41
  'CHAR',
38
42
  literal('\x01', '\x7f'),
43
+ channel=Channel.CONTENT,
39
44
  ),
40
45
 
41
46
  Rule(
@@ -44,12 +49,13 @@ CORE_RULES: ta.Sequence[Rule] = [
44
49
  literal('\x00', '\x1f'),
45
50
  literal('\x7f', case_sensitive=True),
46
51
  ),
52
+ channel=Channel.CONTENT,
47
53
  ),
48
54
 
49
55
  Rule(
50
56
  'CR',
51
57
  literal('\x0d', case_sensitive=True),
52
- insignificant=True,
58
+ channel=Channel.SPACE,
53
59
  ),
54
60
 
55
61
  Rule(
@@ -58,17 +64,19 @@ CORE_RULES: ta.Sequence[Rule] = [
58
64
  rule('CR'),
59
65
  rule('LF'),
60
66
  ),
61
- insignificant=True,
67
+ channel=Channel.SPACE,
62
68
  ),
63
69
 
64
70
  Rule(
65
71
  'DIGIT',
66
72
  literal('\x30', '\x39'),
73
+ channel=Channel.CONTENT,
67
74
  ),
68
75
 
69
76
  Rule(
70
77
  'DQUOTE',
71
78
  literal('\x22', case_sensitive=True),
79
+ channel=Channel.CONTENT,
72
80
  ),
73
81
 
74
82
  Rule(
@@ -82,18 +90,19 @@ CORE_RULES: ta.Sequence[Rule] = [
82
90
  literal('E'),
83
91
  literal('F'),
84
92
  ),
93
+ channel=Channel.CONTENT,
85
94
  ),
86
95
 
87
96
  Rule(
88
97
  'HTAB',
89
98
  literal('\x09', case_sensitive=True),
90
- insignificant=True,
99
+ channel=Channel.SPACE,
91
100
  ),
92
101
 
93
102
  Rule(
94
103
  'LF',
95
104
  literal('\x0a', case_sensitive=True),
96
- insignificant=True,
105
+ channel=Channel.SPACE,
97
106
  ),
98
107
 
99
108
  Rule(
@@ -107,23 +116,25 @@ CORE_RULES: ta.Sequence[Rule] = [
107
116
  ),
108
117
  ),
109
118
  ),
110
- insignificant=True,
119
+ channel=Channel.SPACE,
111
120
  ),
112
121
 
113
122
  Rule(
114
123
  'OCTET',
115
124
  literal('\x00', '\xff'),
125
+ channel=Channel.CONTENT,
116
126
  ),
117
127
 
118
128
  Rule(
119
129
  'SP',
120
130
  literal('\x20', case_sensitive=True),
121
- insignificant=True,
131
+ channel=Channel.SPACE,
122
132
  ),
123
133
 
124
134
  Rule(
125
135
  'VCHAR',
126
136
  literal('\x21', '\x7e'),
137
+ channel=Channel.CONTENT,
127
138
  ),
128
139
 
129
140
  Rule(
@@ -132,10 +143,11 @@ CORE_RULES: ta.Sequence[Rule] = [
132
143
  rule('SP'),
133
144
  rule('HTAB'),
134
145
  ),
135
- insignificant=True,
146
+ channel=Channel.SPACE,
136
147
  ),
137
148
 
138
149
  ]
139
150
 
140
151
 
141
- CORE_GRAMMAR = Grammar(*CORE_RULES)
152
+ RAW_CORE_GRAMMAR = Grammar(*CORE_RULES)
153
+ CORE_GRAMMAR = optimize_grammar(RAW_CORE_GRAMMAR)
@@ -0,0 +1,235 @@
1
+ import enum
2
+ import typing as ta
3
+
4
+ from omlish import check
5
+ from omlish import lang
6
+
7
+ from .errors import AbnfError
8
+ from .errors import AbnfIncompleteParseError
9
+ from .matches import Match
10
+ from .matches import longest_match
11
+ from .ops import Op
12
+
13
+
14
+ with lang.auto_proxy_import(globals()):
15
+ from . import parsing
16
+
17
+
18
+ ##
19
+
20
+
21
+ class Channel(enum.Enum):
22
+ STRUCTURE = enum.auto()
23
+ CONTENT = enum.auto()
24
+ COMMENT = enum.auto()
25
+ SPACE = enum.auto()
26
+
27
+
28
+ class Rule(lang.Final):
29
+ def __init__(
30
+ self,
31
+ name: str,
32
+ op: Op,
33
+ *,
34
+ channel: Channel = Channel.STRUCTURE,
35
+ ) -> None:
36
+ super().__init__()
37
+
38
+ self._name = check.non_empty_str(name)
39
+ self._op = check.isinstance(op, Op)
40
+ self._channel = channel
41
+
42
+ self._name_f = name.casefold()
43
+
44
+ def __repr__(self) -> str:
45
+ return f'{self.__class__.__name__}({self._name!r}, channel={self._channel.name})'
46
+
47
+ def replace_op(self, op: Op) -> 'Rule':
48
+ return Rule(
49
+ self._name,
50
+ op,
51
+ channel=self._channel,
52
+ )
53
+
54
+ @property
55
+ def name(self) -> str:
56
+ return self._name
57
+
58
+ @property
59
+ def name_f(self) -> str:
60
+ return self._name_f
61
+
62
+ @property
63
+ def op(self) -> Op:
64
+ return self._op
65
+
66
+ @property
67
+ def channel(self) -> Channel:
68
+ return self._channel
69
+
70
+
71
+ #
72
+
73
+
74
+ class RulesCollection(lang.Final, ta.Collection[Rule]):
75
+ def __init__(self, *rules: ta.Union[Rule, 'RulesCollection']) -> None:
76
+ super().__init__()
77
+
78
+ rules_set: set[Rule] = set()
79
+ rules_by_name: dict[str, Rule] = {}
80
+ rules_by_name_f: dict[str, Rule] = {}
81
+ rules_by_op: dict[Op, Rule] = {}
82
+
83
+ def add(gr: Rule) -> None:
84
+ check.isinstance(gr, Rule)
85
+
86
+ check.not_in(gr, rules_set)
87
+ check.not_in(gr._name, rules_by_name) # noqa
88
+ check.not_in(gr._name_f, rules_by_name_f) # noqa
89
+ check.not_in(gr._op, rules_by_op) # noqa
90
+
91
+ rules_set.add(gr)
92
+ rules_by_name[gr._name] = gr # noqa
93
+ rules_by_name_f[gr._name_f] = gr # noqa
94
+ rules_by_op[gr._op] = gr # noqa
95
+
96
+ for e in rules:
97
+ if isinstance(e, RulesCollection):
98
+ for c in e:
99
+ add(c)
100
+ else:
101
+ add(e)
102
+
103
+ self._rules_set = rules_set
104
+ self._rules_by_name: ta.Mapping[str, Rule] = rules_by_name
105
+ self._rules_by_name_f: ta.Mapping[str, Rule] = rules_by_name_f
106
+ self._rules_by_op: ta.Mapping[Op, Rule] = rules_by_op
107
+
108
+ @property
109
+ def rules_set(self) -> ta.AbstractSet[Rule]:
110
+ return self._rules_set
111
+
112
+ @property
113
+ def rules_by_name(self) -> ta.Mapping[str, Rule]:
114
+ return self._rules_by_name
115
+
116
+ @property
117
+ def rules_by_name_f(self) -> ta.Mapping[str, Rule]:
118
+ return self._rules_by_name_f
119
+
120
+ @property
121
+ def rules_by_op(self) -> ta.Mapping[Op, Rule]:
122
+ return self._rules_by_op
123
+
124
+ #
125
+
126
+ def __len__(self) -> int:
127
+ return len(self._rules_set)
128
+
129
+ def __iter__(self) -> ta.Iterator[Rule]:
130
+ return iter(self._rules_set)
131
+
132
+ def __contains__(self, item: Rule) -> bool: # type: ignore[override]
133
+ return item in self._rules_set
134
+
135
+ #
136
+
137
+ def rule(self, name: str) -> Rule | None:
138
+ return self._rules_by_name_f.get(name.casefold())
139
+
140
+
141
+ ##
142
+
143
+
144
+ class Grammar(lang.Final):
145
+ def __init__(
146
+ self,
147
+ *rules: Rule | RulesCollection,
148
+ root: Rule | str | None = None,
149
+ ) -> None:
150
+ super().__init__()
151
+
152
+ if len(rules) == 1 and isinstance(r0 := rules[0], RulesCollection):
153
+ self._rules = r0
154
+ else:
155
+ self._rules = RulesCollection(*rules)
156
+
157
+ if isinstance(root, str):
158
+ root = self._rules.rules_by_name_f[root.casefold()]
159
+ self._root = root
160
+
161
+ @property
162
+ def rules(self) -> RulesCollection:
163
+ return self._rules
164
+
165
+ @property
166
+ def root(self) -> Rule | None:
167
+ return self._root
168
+
169
+ #
170
+
171
+ def rule(self, name: str) -> Rule | None:
172
+ return self._rules.rule(name)
173
+
174
+ def replace_rules(self, *rules: Rule) -> 'Grammar':
175
+ rc = RulesCollection(*rules)
176
+ if rc.rules_set == self._rules.rules_set:
177
+ return self
178
+
179
+ return Grammar(
180
+ rc,
181
+ root=self._root.name if self._root is not None else None,
182
+ )
183
+
184
+ #
185
+
186
+ def iter_parse(
187
+ self,
188
+ source: str,
189
+ root: Rule | str | None = None,
190
+ *,
191
+ start: int = 0,
192
+ debug: int = 0,
193
+ **kwargs: ta.Any,
194
+ ) -> ta.Iterator[Match]:
195
+ if root is None:
196
+ if (root := self._root) is None:
197
+ raise AbnfError('No root or default root specified')
198
+ else:
199
+ if isinstance(root, str):
200
+ root = self._rules.rules_by_name_f[root.casefold()]
201
+ else:
202
+ root = check.in_(check.isinstance(root, Rule), self._rules)
203
+
204
+ return parsing._iter_parse( # noqa
205
+ self,
206
+ source,
207
+ root._op, # noqa
208
+ start,
209
+ debug=debug,
210
+ **kwargs,
211
+ )
212
+
213
+ def parse(
214
+ self,
215
+ source: str,
216
+ root: str | None = None,
217
+ *,
218
+ start: int = 0,
219
+ complete: bool = False,
220
+ debug: int = 0,
221
+ **kwargs: ta.Any,
222
+ ) -> Match | None:
223
+ if (match := longest_match(self.iter_parse(
224
+ source,
225
+ root,
226
+ start=start,
227
+ debug=debug,
228
+ **kwargs,
229
+ ))) is None:
230
+ return None
231
+
232
+ if complete and (match.start, match.end) != (start, len(source)):
233
+ raise AbnfIncompleteParseError
234
+
235
+ return match
@@ -0,0 +1,145 @@
1
+ import io
2
+ import itertools
3
+ import typing as ta
4
+
5
+ from omlish import lang
6
+
7
+ from .internal import Regex
8
+ from .ops import CaseInsensitiveStringLiteral
9
+ from .ops import Op
10
+ from .ops import RangeLiteral
11
+ from .ops import RuleRef
12
+ from .ops import StringLiteral
13
+
14
+
15
+ ##
16
+
17
+
18
+ @ta.final
19
+ class Match(ta.NamedTuple):
20
+ op: 'Op'
21
+ start: int
22
+ end: int
23
+ children: tuple['Match', ...]
24
+
25
+ @property
26
+ def length(self) -> int:
27
+ return self.end - self.start
28
+
29
+ #
30
+
31
+ def __repr__(self) -> str:
32
+ return (
33
+ f'{self.__class__.__name__}('
34
+ f'{self.op._match_repr()}, ' # noqa
35
+ f'{self.start}, {self.end}'
36
+ f'{f", {self.children!r}" if self.children else ""})'
37
+ )
38
+
39
+ def render_to(
40
+ self,
41
+ write: ta.Callable[[str], ta.Any],
42
+ *,
43
+ indent: int | None = None,
44
+ _depth: int = 0,
45
+ ) -> None:
46
+ ix: str | None = (' ' * (indent * _depth)) if indent is not None else None
47
+ if ix:
48
+ write(ix)
49
+
50
+ o = self.op
51
+
52
+ if isinstance(o, (StringLiteral, CaseInsensitiveStringLiteral)):
53
+ write(f'literal<{self.start}-{self.end}>({o.value!r})')
54
+
55
+ elif isinstance(o, RangeLiteral):
56
+ write(f'literal<{self.start}-{self.end}>({o.value.lo!r}-{o.value.hi!r})')
57
+
58
+ elif isinstance(o, Regex):
59
+ write(f'regex<{self.start}-{self.end}>({o.pat.pattern!r})')
60
+
61
+ else:
62
+ write(f'{o.__class__.__name__.lower()}<{self.start}-{self.end}>')
63
+
64
+ if isinstance(o, RuleRef):
65
+ write(f':{o.name}')
66
+
67
+ if self.children:
68
+ write('(')
69
+ if ix is not None:
70
+ write('\n')
71
+
72
+ for i, c in enumerate(self.children):
73
+ if i and ix is None:
74
+ write(', ')
75
+
76
+ c.render_to(write, indent=indent, _depth=_depth + 1)
77
+
78
+ if ix is not None:
79
+ write(',\n')
80
+
81
+ if ix:
82
+ write(ix)
83
+
84
+ write(')')
85
+
86
+ def render(
87
+ self,
88
+ *,
89
+ indent: int | None = None,
90
+ ) -> str:
91
+ sb = io.StringIO()
92
+ self.render_to(sb.write, indent=indent)
93
+ return sb.getvalue()
94
+
95
+ def __str__(self) -> str:
96
+ return self.render()
97
+
98
+ #
99
+
100
+ def replace_children(self, *children: 'Match') -> 'Match':
101
+ if lang.seqs_identical(children, self.children):
102
+ return self
103
+
104
+ return self._replace(children=children)
105
+
106
+ def map_children(self, fn: ta.Callable[['Match'], 'Match']) -> 'Match':
107
+ return self.replace_children(*map(fn, self.children))
108
+
109
+ def flat_map_children(self, fn: ta.Callable[['Match'], ta.Iterable['Match']]) -> 'Match':
110
+ return self.replace_children(*itertools.chain.from_iterable(map(fn, self.children)))
111
+
112
+
113
+ ##
114
+
115
+
116
+ def longest_match(ms: ta.Iterable[Match]) -> Match | None:
117
+ bm: Match | None = None
118
+ bl = 0
119
+ for m in ms:
120
+ l = m.length
121
+ if bm is None or l > bl:
122
+ bm, bl = m, l
123
+ return bm
124
+
125
+
126
+ def filter_matches(
127
+ fn: ta.Callable[[Match], bool],
128
+ m: Match,
129
+ *,
130
+ keep_children: bool = False,
131
+ ) -> Match:
132
+ def inner(x: Match) -> ta.Iterable[Match]:
133
+ if fn(x):
134
+ return (rec(x),)
135
+
136
+ elif keep_children:
137
+ return lang.flatten(inner(c) for c in x.children)
138
+
139
+ else:
140
+ return ()
141
+
142
+ def rec(c: Match) -> Match:
143
+ return c.flat_map_children(inner)
144
+
145
+ return rec(m)
omextra/text/abnf/meta.py CHANGED
@@ -7,12 +7,13 @@ from omlish import check
7
7
  from omlish import dataclasses as dc
8
8
  from omlish import lang
9
9
 
10
- from .base import Grammar
11
- from .base import Match
12
10
  from .base import Op
13
- from .base import Rule
14
11
  from .core import CORE_RULES
15
12
  from .errors import AbnfGrammarParseError
13
+ from .grammars import Channel
14
+ from .grammars import Grammar
15
+ from .grammars import Rule
16
+ from .matches import Match
16
17
  from .ops import Repeat
17
18
  from .ops import concat
18
19
  from .ops import either
@@ -20,9 +21,10 @@ from .ops import literal
20
21
  from .ops import option
21
22
  from .ops import repeat
22
23
  from .ops import rule
23
- from .opto import optimize_op
24
+ from .opto import optimize_grammar
25
+ from .utils import filter_match_channels
24
26
  from .utils import fix_ws
25
- from .utils import parse_rules
27
+ from .utils import only_match_rules
26
28
  from .visitors import RuleMatchVisitor
27
29
 
28
30
 
@@ -106,7 +108,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
106
108
  rule('WSP'),
107
109
  ),
108
110
  ),
109
- insignificant=True,
111
+ channel=Channel.SPACE,
110
112
  ),
111
113
 
112
114
  Rule(
@@ -115,7 +117,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
115
117
  rule('comment'),
116
118
  rule('CRLF'),
117
119
  ),
118
- insignificant=True,
120
+ channel=Channel.SPACE,
119
121
  ),
120
122
 
121
123
  Rule(
@@ -130,6 +132,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
130
132
  ),
131
133
  rule('CRLF'),
132
134
  ),
135
+ channel=Channel.COMMENT,
133
136
  ),
134
137
 
135
138
  Rule(
@@ -410,12 +413,21 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
410
413
  ]
411
414
 
412
415
 
413
- META_GRAMMAR = Grammar(
416
+ RAW_META_GRAMMAR = Grammar(
414
417
  *CORE_RULES,
415
418
  *META_GRAMMAR_RULES,
416
419
  root='rulelist',
417
420
  )
418
421
 
422
+ META_GRAMMAR = optimize_grammar(
423
+ RAW_META_GRAMMAR,
424
+ inline_channels=(
425
+ Channel.CONTENT,
426
+ Channel.COMMENT,
427
+ Channel.SPACE,
428
+ ),
429
+ )
430
+
419
431
 
420
432
  ##
421
433
 
@@ -556,6 +568,9 @@ class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
556
568
  return self.QuotedString(self._source[m.start + 1:m.end - 1])
557
569
 
558
570
 
571
+ ##
572
+
573
+
559
574
  def parse_grammar(
560
575
  source: str,
561
576
  *,
@@ -566,14 +581,22 @@ def parse_grammar(
566
581
  ) -> Grammar:
567
582
  source = fix_ws(source)
568
583
 
569
- if (mg_m := parse_rules(
570
- META_GRAMMAR,
584
+ if (mg_m := META_GRAMMAR.parse(
571
585
  source,
572
586
  complete=True,
573
587
  **kwargs,
574
588
  )) is None:
575
589
  raise AbnfGrammarParseError(source)
576
590
 
591
+ mg_m = only_match_rules(mg_m)
592
+
593
+ mg_m = filter_match_channels(
594
+ mg_m,
595
+ META_GRAMMAR,
596
+ keep=(Channel.STRUCTURE,),
597
+ keep_children=True,
598
+ )
599
+
577
600
  check.isinstance(mg_m.op, Repeat)
578
601
 
579
602
  mg_rmv = MetaGrammarRuleMatchVisitor(source)
@@ -582,14 +605,13 @@ def parse_grammar(
582
605
  for gg_cm in mg_m.children
583
606
  ]
584
607
 
585
- if not no_optimize:
586
- rules = [
587
- r.replace_op(optimize_op(r.op))
588
- for r in rules
589
- ]
590
-
591
- return Grammar(
608
+ gram = Grammar(
592
609
  *rules,
593
610
  *(CORE_RULES if not no_core_rules else []),
594
611
  root=root,
595
612
  )
613
+
614
+ if not no_optimize:
615
+ gram = optimize_grammar(gram)
616
+
617
+ return gram