omextra 0.0.0.dev471__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ Copyright 2020 Charles Yeomans
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software
4
+ and associated documentation files (the "Software"), to deal in the Software without
5
+ restriction, including without limitation the rights to use, copy, modify, merge, publish,
6
+ distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
7
+ Software is furnished to do so, subject to the following conditions:
8
+
9
+ The above copyright notice and this permission notice shall be included in all copies or
10
+ substantial portions of the Software.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
13
+ BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
14
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
15
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,79 @@
1
+ """
2
+ Parser generator for ABNF grammars.
3
+
4
+ Originally based on library by Charles Yeomans (see LICENSE file):
5
+
6
+ https://github.com/declaresub/abnf/tree/561ced67c0a8afc869ad0de5b39dbe4f6e71b0d8/src/abnf
7
+
8
+ It has however been nearly entirely rewritten.
9
+
10
+ ====
11
+
12
+ TODO:
13
+ - cache lol
14
+ - get greedier
15
+ - match-powered optimizer
16
+ - greedily compile regexes
17
+ - error reporting
18
+ - codegen
19
+ """
20
+
21
+
22
+ from .base import ( # noqa
23
+ Match,
24
+ longest_match,
25
+
26
+ Parser,
27
+
28
+ Rule,
29
+ Grammar,
30
+
31
+ iter_parse,
32
+ parse,
33
+ )
34
+
35
+ from .core import ( # noqa
36
+ CORE_RULES,
37
+ )
38
+
39
+ from .errors import ( # noqa
40
+ AbnfError,
41
+ AbnfGrammarParseError,
42
+ )
43
+
44
+ from .meta import ( # noqa
45
+ META_GRAMMAR_RULES,
46
+ META_GRAMMAR,
47
+
48
+ parse_grammar,
49
+ )
50
+
51
+ from .parsers import ( # noqa
52
+ Literal,
53
+ StringLiteral,
54
+ CaseInsensitiveStringLiteral,
55
+ RangeLiteral,
56
+ literal,
57
+
58
+ Concat,
59
+ concat,
60
+
61
+ Repeat,
62
+ Option,
63
+ repeat,
64
+
65
+ Either,
66
+ either,
67
+
68
+ RuleRef,
69
+ rule,
70
+ )
71
+
72
+ from .utils import ( # noqa
73
+ strip_insignificant_match_rules,
74
+ only_match_rules,
75
+
76
+ parse_rules,
77
+
78
+ fix_grammar_ws,
79
+ )
@@ -0,0 +1,313 @@
1
+ import abc
2
+ import io
3
+ import itertools
4
+ import typing as ta
5
+
6
+ from omlish import check
7
+ from omlish import lang
8
+
9
+ from .errors import AbnfError
10
+
11
+
12
+ with lang.auto_proxy_import(globals()):
13
+ from . import parsers
14
+
15
+
16
+ ##
17
+
18
+
19
+ @ta.final
20
+ class Match(ta.NamedTuple):
21
+ parser: 'Parser'
22
+ start: int
23
+ end: int
24
+ children: tuple['Match', ...]
25
+
26
+ @property
27
+ def length(self) -> int:
28
+ return self.end - self.start
29
+
30
+ #
31
+
32
+ def __repr__(self) -> str:
33
+ return (
34
+ f'{self.__class__.__name__}('
35
+ f'{self.parser._match_repr()}, ' # noqa
36
+ f'{self.start}, {self.end}'
37
+ f'{f", {self.children!r}" if self.children else ""})'
38
+ )
39
+
40
+ def render_to(
41
+ self,
42
+ write: ta.Callable[[str], ta.Any],
43
+ *,
44
+ indent: int | None = None,
45
+ _level: int = 0,
46
+ ) -> None:
47
+ ix: str | None = (' ' * (indent * _level)) if indent is not None else None
48
+ if ix:
49
+ write(ix)
50
+ p = self.parser
51
+ if isinstance(p, (parsers.StringLiteral, parsers.CaseInsensitiveStringLiteral)):
52
+ write(f'literal<{self.start}-{self.end}>({p.value!r})')
53
+ elif isinstance(p, parsers.RangeLiteral):
54
+ write(f'literal<{self.start}-{self.end}>({p.value.lo!r}-{p.value.hi!r})')
55
+ else:
56
+ write(f'{p.__class__.__name__.lower()}<{self.start}-{self.end}>')
57
+ if isinstance(p, parsers.RuleRef):
58
+ write(f':{p.name}')
59
+ if self.children:
60
+ write('(')
61
+ if ix is not None:
62
+ write('\n')
63
+ for i, c in enumerate(self.children):
64
+ if i and ix is None:
65
+ write(', ')
66
+ c.render_to(write, indent=indent, _level=_level + 1)
67
+ if ix is not None:
68
+ write(',\n')
69
+ if ix:
70
+ write(ix)
71
+ write(')')
72
+
73
+ def render(
74
+ self,
75
+ *,
76
+ indent: int | None = None,
77
+ ) -> str:
78
+ sb = io.StringIO()
79
+ self.render_to(sb.write, indent=indent)
80
+ return sb.getvalue()
81
+
82
+ def __str__(self) -> str:
83
+ return self.render()
84
+
85
+ #
86
+
87
+ def map_children(self, fn: ta.Callable[['Match'], 'Match']) -> 'Match':
88
+ return self._replace(children=tuple(map(fn, self.children)))
89
+
90
+ def flat_map_children(self, fn: ta.Callable[['Match'], ta.Iterable['Match']]) -> 'Match':
91
+ return self._replace(children=tuple(itertools.chain.from_iterable(map(fn, self.children))))
92
+
93
+
94
+ def longest_match(ms: ta.Iterable[Match]) -> Match | None:
95
+ bm: Match | None = None
96
+ bl = 0
97
+ for m in ms:
98
+ l = m.length
99
+ if bm is None or l > bl:
100
+ bm, bl = m, l
101
+ return bm
102
+
103
+
104
+ ##
105
+
106
+
107
+ class Parser(lang.Abstract, lang.PackageSealed):
108
+ def _match_repr(self) -> str:
109
+ return f'{self.__class__.__name__}@{id(self)}'
110
+
111
+ @abc.abstractmethod
112
+ def _iter_parse(self, ctx: '_Context', start: int) -> ta.Iterator[Match]:
113
+ raise NotImplementedError
114
+
115
+
116
+ ##
117
+
118
+
119
+ class Rule(lang.Final):
120
+ def __init__(
121
+ self,
122
+ name: str,
123
+ parser: Parser,
124
+ *,
125
+ insignificant: bool = False,
126
+ ) -> None:
127
+ super().__init__()
128
+
129
+ self._name = check.non_empty_str(name)
130
+ self._name_f = name.casefold()
131
+ self._parser = check.isinstance(parser, Parser)
132
+ self._insignificant = insignificant
133
+
134
+ def __repr__(self) -> str:
135
+ return f'{self.__class__.__name__}({self._name!r})'
136
+
137
+ @property
138
+ def name(self) -> str:
139
+ return self._name
140
+
141
+ @property
142
+ def name_f(self) -> str:
143
+ return self._name_f
144
+
145
+ @property
146
+ def parser(self) -> Parser:
147
+ return self._parser
148
+
149
+ @property
150
+ def insignificant(self) -> bool:
151
+ return self._insignificant
152
+
153
+
154
+ class Grammar(lang.Final):
155
+ def __init__(
156
+ self,
157
+ *rules: Rule,
158
+ root: Rule | str | None = None,
159
+ ) -> None:
160
+ super().__init__()
161
+
162
+ rules_set: set[Rule] = set()
163
+ rules_by_name: dict[str, Rule] = {}
164
+ rules_by_name_f: dict[str, Rule] = {}
165
+ rules_by_parser: dict[Parser, Rule] = {}
166
+ for gr in rules:
167
+ check.not_in(gr, rules_set)
168
+ check.not_in(gr._name, rules_by_name) # noqa
169
+ check.not_in(gr._name_f, rules_by_name_f) # noqa
170
+ check.not_in(gr._parser, rules_by_parser) # noqa
171
+ rules_by_name[gr._name] = gr # noqa
172
+ rules_by_name_f[gr._name_f] = gr # noqa
173
+ rules_by_parser[gr._parser] = gr # noqa
174
+ self._rules = rules_set
175
+ self._rules_by_name: ta.Mapping[str, Rule] = rules_by_name
176
+ self._rules_by_name_f: ta.Mapping[str, Rule] = rules_by_name_f
177
+ self._rules_by_parser: ta.Mapping[Parser, Rule] = rules_by_parser
178
+
179
+ if isinstance(root, str):
180
+ root = rules_by_name_f[root.casefold()]
181
+ self._root = root
182
+
183
+ @property
184
+ def root(self) -> Rule | None:
185
+ return self._root
186
+
187
+ def rule(self, name: str) -> Rule | None:
188
+ return self._rules_by_name_f.get(name.casefold())
189
+
190
+ def iter_parse(
191
+ self,
192
+ source: str,
193
+ root: Rule | str | None = None,
194
+ *,
195
+ start: int = 0,
196
+ debug: bool = False,
197
+ ) -> ta.Iterator[Match]:
198
+ if root is None:
199
+ if (root := self._root) is None:
200
+ raise AbnfError('No root or default root specified')
201
+ else:
202
+ if isinstance(root, str):
203
+ root = self._rules_by_name_f[root.casefold()]
204
+ else:
205
+ root = check.in_(check.isinstance(root, Rule), self._rules)
206
+
207
+ ctx_cls: type[_Context]
208
+ if debug:
209
+ ctx_cls = _DebugContext
210
+ else:
211
+ ctx_cls = _Context
212
+ ctx = ctx_cls(self, source)
213
+
214
+ return ctx.iter_parse(root._parser, start) # noqa
215
+
216
+ def parse(
217
+ self,
218
+ source: str,
219
+ root: str | None = None,
220
+ *,
221
+ start: int = 0,
222
+ debug: bool = False,
223
+ ) -> Match | None:
224
+ return longest_match(self.iter_parse(
225
+ source,
226
+ root,
227
+ start=start,
228
+ debug=debug,
229
+ ))
230
+
231
+
232
+ ##
233
+
234
+
235
+ class _Context:
236
+ def __init__(
237
+ self,
238
+ grammar: Grammar,
239
+ source: str,
240
+ ) -> None:
241
+ super().__init__()
242
+
243
+ self._grammar = grammar
244
+ self._source = source
245
+
246
+ @property
247
+ def grammar(self) -> Grammar:
248
+ return self._grammar
249
+
250
+ @property
251
+ def source(self) -> str:
252
+ return self._source
253
+
254
+ def iter_parse(self, parser: Parser, start: int) -> ta.Iterator[Match]:
255
+ return parser._iter_parse(self, start) # noqa
256
+
257
+
258
+ class _DebugContext(_Context):
259
+ _level: int = 0
260
+
261
+ def iter_parse(self, parser: Parser, start: int) -> ta.Iterator[Match]:
262
+ print(f'{" " * self._level}enter: {parser=} {start=}')
263
+ try:
264
+ self._level += 1
265
+ for m in super().iter_parse(parser, start): # noqa
266
+ # print(f'{" " * (self._level - 1)}match: {parser=} {start=}')
267
+ yield m
268
+ finally:
269
+ self._level -= 1
270
+ print(f'{" " * self._level}exit: {parser=} {start=}')
271
+
272
+
273
+ ##
274
+
275
+
276
+ def iter_parse(
277
+ obj: Grammar | Rule | Parser,
278
+ src: str,
279
+ *,
280
+ root: str | None = None,
281
+ start: int = 0,
282
+ ) -> ta.Iterator[Match]:
283
+ if isinstance(obj, Grammar):
284
+ gram = obj
285
+ elif isinstance(obj, Rule):
286
+ check.none(root)
287
+ gram = Grammar(obj, root=obj)
288
+ elif isinstance(obj, Parser):
289
+ check.none(root)
290
+ gram = Grammar(Rule('root', obj), root='root')
291
+ else:
292
+ raise TypeError(obj)
293
+
294
+ return gram.iter_parse(
295
+ src,
296
+ root,
297
+ start=start,
298
+ )
299
+
300
+
301
+ def parse(
302
+ obj: Grammar | Rule | Parser,
303
+ src: str,
304
+ *,
305
+ root: str | None = None,
306
+ start: int = 0,
307
+ ) -> Match | None:
308
+ return longest_match(iter_parse(
309
+ obj,
310
+ src,
311
+ root=root,
312
+ start=start,
313
+ ))
@@ -0,0 +1,141 @@
1
+ """
2
+ https://datatracker.ietf.org/doc/html/rfc5234
3
+ """
4
+ import typing as ta
5
+
6
+ from .base import Grammar
7
+ from .base import Rule
8
+ from .parsers import concat
9
+ from .parsers import either
10
+ from .parsers import literal
11
+ from .parsers import repeat
12
+ from .parsers import rule
13
+
14
+
15
+ ##
16
+
17
+
18
+ CORE_RULES: ta.Sequence[Rule] = [
19
+
20
+ Rule(
21
+ 'ALPHA',
22
+ either(
23
+ literal('\x41', '\x5a'),
24
+ literal('\x61', '\x7a'),
25
+ ),
26
+ ),
27
+
28
+ Rule(
29
+ 'BIT',
30
+ either(
31
+ literal('0'),
32
+ literal('1'),
33
+ ),
34
+ ),
35
+
36
+ Rule(
37
+ 'CHAR',
38
+ literal('\x01', '\x7f'),
39
+ ),
40
+
41
+ Rule(
42
+ 'CTL',
43
+ either(
44
+ literal('\x00', '\x1f'),
45
+ literal('\x7f', case_sensitive=True),
46
+ ),
47
+ ),
48
+
49
+ Rule(
50
+ 'CR',
51
+ literal('\x0d', case_sensitive=True),
52
+ insignificant=True,
53
+ ),
54
+
55
+ Rule(
56
+ 'CRLF',
57
+ concat(
58
+ rule('CR'),
59
+ rule('LF'),
60
+ ),
61
+ insignificant=True,
62
+ ),
63
+
64
+ Rule(
65
+ 'DIGIT',
66
+ literal('\x30', '\x39'),
67
+ ),
68
+
69
+ Rule(
70
+ 'DQUOTE',
71
+ literal('\x22', case_sensitive=True),
72
+ ),
73
+
74
+ Rule(
75
+ 'HEXDIG',
76
+ either(
77
+ rule('DIGIT'),
78
+ literal('A'),
79
+ literal('B'),
80
+ literal('C'),
81
+ literal('D'),
82
+ literal('E'),
83
+ literal('F'),
84
+ ),
85
+ ),
86
+
87
+ Rule(
88
+ 'HTAB',
89
+ literal('\x09', case_sensitive=True),
90
+ insignificant=True,
91
+ ),
92
+
93
+ Rule(
94
+ 'LF',
95
+ literal('\x0a', case_sensitive=True),
96
+ insignificant=True,
97
+ ),
98
+
99
+ Rule(
100
+ 'LWSP',
101
+ repeat(
102
+ either(
103
+ rule('WSP'),
104
+ concat(
105
+ rule('CRLF'),
106
+ rule('WSP'),
107
+ ),
108
+ ),
109
+ ),
110
+ insignificant=True,
111
+ ),
112
+
113
+ Rule(
114
+ 'OCTET',
115
+ literal('\x00', '\xff'),
116
+ ),
117
+
118
+ Rule(
119
+ 'SP',
120
+ literal('\x20', case_sensitive=True),
121
+ insignificant=True,
122
+ ),
123
+
124
+ Rule(
125
+ 'VCHAR',
126
+ literal('\x21', '\x7e'),
127
+ ),
128
+
129
+ Rule(
130
+ 'WSP',
131
+ either(
132
+ rule('SP'),
133
+ rule('HTAB'),
134
+ ),
135
+ insignificant=True,
136
+ ),
137
+
138
+ ]
139
+
140
+
141
+ CORE_GRAMMAR = Grammar(*CORE_RULES)
@@ -0,0 +1,10 @@
1
+ class AbnfError(Exception):
2
+ pass
3
+
4
+
5
+ class AbnfIncompleteParseError(AbnfError):
6
+ pass
7
+
8
+
9
+ class AbnfGrammarParseError(AbnfError):
10
+ pass