omextra 0.0.0.dev471__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,583 @@
1
+ """
2
+ https://datatracker.ietf.org/doc/html/rfc5234
3
+ """
4
+ import typing as ta
5
+
6
+ from omlish import check
7
+ from omlish import dataclasses as dc
8
+ from omlish import lang
9
+
10
+ from .base import Grammar
11
+ from .base import Match
12
+ from .base import Parser
13
+ from .base import Rule
14
+ from .core import CORE_RULES
15
+ from .errors import AbnfGrammarParseError
16
+ from .parsers import Repeat
17
+ from .parsers import concat
18
+ from .parsers import either
19
+ from .parsers import literal
20
+ from .parsers import option
21
+ from .parsers import repeat
22
+ from .parsers import rule
23
+ from .utils import fix_grammar_ws
24
+ from .utils import parse_rules
25
+ from .visitors import RuleVisitor
26
+
27
+
28
+ ##
29
+
30
+
31
+ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
32
+
33
+ Rule(
34
+ 'rulelist',
35
+ repeat(
36
+ 1,
37
+ either(
38
+ rule('rule'),
39
+ concat(
40
+ repeat(
41
+ rule('c-wsp'),
42
+ ),
43
+ rule('c-nl'),
44
+ ),
45
+ ),
46
+ ),
47
+ ),
48
+
49
+ Rule(
50
+ 'rule',
51
+ concat(
52
+ rule('rulename'),
53
+ rule('defined-as'),
54
+ rule('elements'),
55
+ rule('c-nl'),
56
+ ),
57
+ ),
58
+
59
+ Rule(
60
+ 'rulename',
61
+ concat(
62
+ rule('ALPHA'),
63
+ repeat(
64
+ either(
65
+ rule('ALPHA'),
66
+ rule('DIGIT'),
67
+ literal('-'),
68
+ ),
69
+ ),
70
+ ),
71
+ ),
72
+
73
+ Rule(
74
+ 'defined-as',
75
+ concat(
76
+ repeat(
77
+ rule('c-wsp'),
78
+ ),
79
+ either(
80
+ literal('=/'),
81
+ literal('='),
82
+ ),
83
+ repeat(
84
+ rule('c-wsp'),
85
+ ),
86
+ ),
87
+ ),
88
+
89
+ Rule(
90
+ 'elements',
91
+ concat(
92
+ rule('alternation'),
93
+ repeat(
94
+ rule('c-wsp'),
95
+ ),
96
+ ),
97
+ ),
98
+
99
+ Rule(
100
+ 'c-wsp',
101
+ either(
102
+ rule('WSP'),
103
+ concat(
104
+ rule('c-nl'),
105
+ rule('WSP'),
106
+ ),
107
+ ),
108
+ insignificant=True,
109
+ ),
110
+
111
+ Rule(
112
+ 'c-nl',
113
+ either(
114
+ rule('comment'),
115
+ rule('CRLF'),
116
+ ),
117
+ insignificant=True,
118
+ ),
119
+
120
+ Rule(
121
+ 'comment',
122
+ concat(
123
+ literal(';'),
124
+ repeat(
125
+ either(
126
+ rule('WSP'),
127
+ rule('VCHAR'),
128
+ )),
129
+ rule('CRLF'),
130
+ ),
131
+ ),
132
+
133
+ Rule(
134
+ 'alternation',
135
+ concat(
136
+ rule('concatenation'),
137
+ repeat(
138
+ concat(
139
+ repeat(
140
+ rule('c-wsp'),
141
+ ),
142
+ literal('/'),
143
+ repeat(
144
+ rule('c-wsp'),
145
+ ),
146
+ rule('concatenation'),
147
+ ),
148
+ ),
149
+ ),
150
+ ),
151
+
152
+ Rule(
153
+ 'concatenation',
154
+ concat(
155
+ rule('repetition'),
156
+ repeat(
157
+ concat(
158
+ repeat(
159
+ 1,
160
+ rule('c-wsp'),
161
+ ),
162
+ rule('repetition'),
163
+ ),
164
+ ),
165
+ ),
166
+ ),
167
+
168
+ Rule(
169
+ 'repetition',
170
+ concat(
171
+ option(
172
+ rule('repeat'),
173
+ ),
174
+ rule('element'),
175
+ ),
176
+ ),
177
+
178
+ Rule(
179
+ 'repeat',
180
+ either(
181
+ concat(
182
+ repeat(
183
+ rule('DIGIT'),
184
+ ),
185
+ literal('*'),
186
+ repeat(
187
+ rule('DIGIT'),
188
+ ),
189
+ ),
190
+ repeat(
191
+ 1,
192
+ rule('DIGIT'),
193
+ ),
194
+ ),
195
+ ),
196
+
197
+ Rule(
198
+ 'element',
199
+ either(
200
+ rule('rulename'),
201
+ rule('group'),
202
+ rule('option'),
203
+ rule('char-val'),
204
+ rule('num-val'),
205
+ rule('prose-val'),
206
+ ),
207
+ ),
208
+
209
+ Rule(
210
+ 'group',
211
+ concat(
212
+ literal('('),
213
+ repeat(
214
+ rule('c-wsp'),
215
+ ),
216
+ rule('alternation'),
217
+ repeat(
218
+ rule('c-wsp'),
219
+ ),
220
+ literal(')'),
221
+ ),
222
+ ),
223
+
224
+ Rule(
225
+ 'option',
226
+ concat(
227
+ literal('['),
228
+ repeat(
229
+ rule('c-wsp'),
230
+ ),
231
+ rule('alternation'),
232
+ repeat(
233
+ rule('c-wsp'),
234
+ ),
235
+ literal(']'),
236
+ ),
237
+ ),
238
+
239
+ Rule(
240
+ 'num-val',
241
+ concat(
242
+ literal('%'),
243
+ either(
244
+ rule('bin-val'),
245
+ rule('dec-val'),
246
+ rule('hex-val'),
247
+ ),
248
+ ),
249
+ ),
250
+
251
+ Rule(
252
+ 'bin-val',
253
+ concat(
254
+ literal('b'),
255
+ concat(
256
+ repeat(
257
+ 1,
258
+ rule('BIT'),
259
+ ),
260
+ option(
261
+ either(
262
+ repeat(
263
+ 1,
264
+ concat(
265
+ literal('.'),
266
+ repeat(
267
+ 1,
268
+ rule('BIT'),
269
+ ),
270
+ ),
271
+ ),
272
+ concat(
273
+ literal('-'),
274
+ repeat(
275
+ 1,
276
+ rule('BIT'),
277
+ ),
278
+ ),
279
+ ),
280
+ ),
281
+ ),
282
+ ),
283
+ ),
284
+
285
+ Rule(
286
+ 'dec-val',
287
+ concat(
288
+ literal('d'),
289
+ concat(
290
+ repeat(
291
+ 1,
292
+ rule('DIGIT'),
293
+ ),
294
+ option(
295
+ either(
296
+ repeat(
297
+ 1,
298
+ concat(
299
+ literal('.'),
300
+ repeat(
301
+ 1,
302
+ rule('DIGIT'),
303
+ ),
304
+ ),
305
+ ),
306
+ concat(
307
+ literal('-'),
308
+ repeat(
309
+ 1,
310
+ rule('DIGIT'),
311
+ ),
312
+ ),
313
+ ),
314
+ ),
315
+ ),
316
+ ),
317
+ ),
318
+
319
+ Rule(
320
+ 'hex-val',
321
+ concat(
322
+ literal('x'),
323
+ concat(
324
+ repeat(
325
+ 1,
326
+ rule('HEXDIG'),
327
+ ),
328
+ option(
329
+ either(
330
+ repeat(
331
+ 1,
332
+ concat(
333
+ literal('.'),
334
+ repeat(
335
+ 1,
336
+ rule('HEXDIG'),
337
+ ),
338
+ ),
339
+ ),
340
+ concat(
341
+ literal('-'),
342
+ repeat(
343
+ 1,
344
+ rule('HEXDIG'),
345
+ ),
346
+ ),
347
+ ),
348
+ ),
349
+ ),
350
+ ),
351
+ ),
352
+
353
+ Rule(
354
+ 'prose-val',
355
+ concat(
356
+ literal('<'),
357
+ repeat(
358
+ either(
359
+ literal('\x20', '\x3d'),
360
+ literal('\x3f', '\x7e'),
361
+ ),
362
+ ),
363
+ literal('>'),
364
+ ),
365
+ ),
366
+
367
+ # definitions from RFC 7405
368
+ Rule(
369
+ 'char-val',
370
+ either(
371
+ rule('case-insensitive-string'),
372
+ rule('case-sensitive-string'),
373
+ ),
374
+ ),
375
+
376
+ Rule(
377
+ 'case-insensitive-string',
378
+ concat(
379
+ option(
380
+ literal('%i'),
381
+ ),
382
+ rule('quoted-string'),
383
+ ),
384
+ ),
385
+
386
+ Rule(
387
+ 'case-sensitive-string',
388
+ concat(
389
+ literal('%s'),
390
+ rule('quoted-string'),
391
+ ),
392
+ ),
393
+
394
+ Rule(
395
+ 'quoted-string',
396
+ concat(
397
+ rule('DQUOTE'),
398
+ repeat(
399
+ either(
400
+ literal('\x20', '\x21'),
401
+ literal('\x23', '\x7e'),
402
+ ),
403
+ ),
404
+ rule('DQUOTE'),
405
+ ),
406
+ ),
407
+
408
+ ]
409
+
410
+
411
+ META_GRAMMAR = Grammar(
412
+ *CORE_RULES,
413
+ *META_GRAMMAR_RULES,
414
+ root='rulelist',
415
+ )
416
+
417
+
418
+ ##
419
+
420
+
421
+ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
422
+ def __init__(self, source: str) -> None:
423
+ super().__init__()
424
+
425
+ self._source = source
426
+
427
+ @dc.dataclass(frozen=True)
428
+ class RuleName(lang.Final):
429
+ s: str
430
+
431
+ @dc.dataclass(frozen=True)
432
+ class QuotedString(lang.Final):
433
+ s: str
434
+
435
+ @RuleVisitor.register('rule')
436
+ def visit_rule_rule(self, m: Match) -> ta.Any:
437
+ rn_m, _, el_m = m.children
438
+ rn = check.isinstance(self.visit_match(rn_m), self.RuleName).s
439
+ el = self.visit_match(el_m)
440
+ return Rule(rn, el)
441
+
442
+ @RuleVisitor.register('rulename')
443
+ def visit_rulename_rule(self, m: Match) -> ta.Any:
444
+ return self.RuleName(self._source[m.start:m.end])
445
+
446
+ @RuleVisitor.register('elements')
447
+ def visit_elements_rule(self, m: Match) -> ta.Any:
448
+ return self.visit_match(check.single(m.children))
449
+
450
+ @RuleVisitor.register('alternation')
451
+ def visit_alternation_rule(self, m: Match) -> ta.Any:
452
+ if len(m.children) == 1:
453
+ return self.visit_match(m.children[0])
454
+ else:
455
+ return either(*map(self.visit_match, m.children))
456
+
457
+ @RuleVisitor.register('concatenation')
458
+ def visit_concatenation_rule(self, m: Match) -> ta.Any:
459
+ if len(m.children) == 1:
460
+ return self.visit_match(m.children[0])
461
+ else:
462
+ return concat(*map(self.visit_match, m.children))
463
+
464
+ @RuleVisitor.register('repetition')
465
+ def visit_repetition_rule(self, m: Match) -> ta.Any:
466
+ if len(m.children) == 2:
467
+ ti_m, el_m = m.children
468
+ ti = check.isinstance(self.visit_match(ti_m), Repeat.Times)
469
+ el = self.visit_match(el_m)
470
+ return repeat(ti, el)
471
+ elif len(m.children) == 1:
472
+ return self.visit_match(m.children[0])
473
+ else:
474
+ raise ValueError(m)
475
+
476
+ @RuleVisitor.register('repeat')
477
+ def visit_repeat_rule(self, m: Match) -> ta.Any:
478
+ s = check.non_empty_str(self._source[m.start:m.end])
479
+ if s == '*':
480
+ return Repeat.Times(0)
481
+ elif '*' in s:
482
+ check.state(s.count('*') == 1)
483
+ if s.endswith('*'):
484
+ return Repeat.Times(int(s[:-1]))
485
+ else:
486
+ mi, mx = s.split('*')
487
+ return Repeat.Times(int(mi), int(mx))
488
+ else:
489
+ return Repeat.Times(n := int(s), n)
490
+
491
+ @RuleVisitor.register('element')
492
+ def visit_element_rule(self, m: Match) -> ta.Any:
493
+ c = self.visit_match(check.single(m.children))
494
+ if isinstance(c, Parser):
495
+ return c
496
+ elif isinstance(c, self.RuleName):
497
+ return rule(c.s)
498
+ else:
499
+ raise TypeError(c)
500
+
501
+ @RuleVisitor.register('group')
502
+ def visit_group_rule(self, m: Match) -> ta.Any:
503
+ return self.visit_match(check.single(m.children))
504
+
505
+ @RuleVisitor.register('option')
506
+ def visit_option_rule(self, m: Match) -> ta.Any:
507
+ c = self.visit_match(check.single(m.children))
508
+ return option(check.isinstance(c, Parser))
509
+
510
+ @RuleVisitor.register('num-val')
511
+ def visit_num_val_rule(self, m: Match) -> ta.Any:
512
+ return self.visit_match(check.single(m.children))
513
+
514
+ def _parse_num_val(self, s: str, base: int) -> Parser:
515
+ if '-' in s:
516
+ check.not_in('.', s)
517
+ lo, hi = [chr(int(p, base)) for p in s.split('-')]
518
+ return literal(lo, hi)
519
+ elif '.' in s:
520
+ check.not_in('-', s)
521
+ cs = [chr(int(p, base)) for p in s.split('.')]
522
+ return concat(*[literal(c, c) for c in cs])
523
+ else:
524
+ c = chr(int(s, base))
525
+ return literal(c, c)
526
+
527
+ @RuleVisitor.register('dec-val')
528
+ def visit_dec_val_rule(self, m: Match) -> ta.Any:
529
+ return self._parse_num_val(self._source[m.start + 1:m.end], 10)
530
+
531
+ @RuleVisitor.register('hex-val')
532
+ def visit_hex_val_rule(self, m: Match) -> ta.Any:
533
+ return self._parse_num_val(self._source[m.start + 1:m.end], 16)
534
+
535
+ @RuleVisitor.register('char-val')
536
+ def visit_char_val_rule(self, m: Match) -> ta.Any:
537
+ return self.visit_match(check.single(m.children))
538
+
539
+ @RuleVisitor.register('case-sensitive-string')
540
+ def visit_case_sensitive_string_rule(self, m: Match) -> ta.Any:
541
+ c = self.visit_match(check.single(m.children))
542
+ return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=True)
543
+
544
+ @RuleVisitor.register('case-insensitive-string')
545
+ def visit_case_insensitive_string_rule(self, m: Match) -> ta.Any:
546
+ c = self.visit_match(check.single(m.children))
547
+ return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=False)
548
+
549
+ @RuleVisitor.register('quoted-string')
550
+ def visit_quoted_string_rule(self, m: Match) -> ta.Any:
551
+ check.state(m.end - m.start > 2)
552
+ check.state(self._source[m.start] == '"')
553
+ check.state(self._source[m.end - 1] == '"')
554
+ return self.QuotedString(self._source[m.start + 1:m.end - 1])
555
+
556
+
557
+ def parse_grammar(
558
+ source: str,
559
+ *,
560
+ no_core_rules: bool = False,
561
+ root: str | None = None,
562
+ **kwargs: ta.Any,
563
+ ) -> Grammar:
564
+ source = fix_grammar_ws(source)
565
+
566
+ if (mg_m := parse_rules(
567
+ META_GRAMMAR,
568
+ source,
569
+ complete=True,
570
+ **kwargs,
571
+ )) is None:
572
+ raise AbnfGrammarParseError(source)
573
+
574
+ check.isinstance(mg_m.parser, Repeat)
575
+
576
+ mg_rv = MetaGrammarRuleVisitor(source)
577
+ rules = [mg_rv.visit_match(gg_cm) for gg_cm in mg_m.children]
578
+
579
+ return Grammar(
580
+ *rules,
581
+ *(CORE_RULES if not no_core_rules else []),
582
+ root=root,
583
+ )