omextra 0.0.0.dev494__py3-none-any.whl → 0.0.0.dev496__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omextra/text/abnf/__init__.py +17 -4
- omextra/text/abnf/_dataclasses.py +438 -0
- omextra/text/abnf/base.py +49 -82
- omextra/text/abnf/core.py +5 -5
- omextra/text/abnf/docs/__init__.py +0 -0
- omextra/text/abnf/docs/rfc5234.txt +893 -0
- omextra/text/abnf/docs/rfc7405.txt +221 -0
- omextra/text/abnf/internal.py +32 -0
- omextra/text/abnf/meta.py +37 -36
- omextra/text/abnf/ops.py +276 -0
- omextra/text/abnf/parsing.py +212 -0
- omextra/text/abnf/utils.py +4 -9
- omextra/text/abnf/visitors.py +8 -8
- {omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/METADATA +2 -2
- {omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/RECORD +19 -13
- omextra/text/abnf/parsers.py +0 -343
- {omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/WHEEL +0 -0
- {omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/entry_points.txt +0 -0
- {omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/licenses/LICENSE +0 -0
- {omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Internet Engineering Task Force (IETF) P. Kyzivat
|
|
2
|
+
Request for Comments: 7405 December 2014
|
|
3
|
+
Updates: 5234
|
|
4
|
+
Category: Standards Track
|
|
5
|
+
ISSN: 2070-1721
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Case-Sensitive String Support in ABNF
|
|
9
|
+
|
|
10
|
+
Abstract
|
|
11
|
+
|
|
12
|
+
This document extends the base definition of ABNF (Augmented Backus-
|
|
13
|
+
Naur Form) to include a way to specify US-ASCII string literals that
|
|
14
|
+
are matched in a case-sensitive manner.
|
|
15
|
+
|
|
16
|
+
Status of This Memo
|
|
17
|
+
|
|
18
|
+
This is an Internet Standards Track document.
|
|
19
|
+
|
|
20
|
+
This document is a product of the Internet Engineering Task Force
|
|
21
|
+
(IETF). It represents the consensus of the IETF community. It has
|
|
22
|
+
received public review and has been approved for publication by the
|
|
23
|
+
Internet Engineering Steering Group (IESG). Further information on
|
|
24
|
+
Internet Standards is available in Section 2 of RFC 5741.
|
|
25
|
+
|
|
26
|
+
Information about the current status of this document, any errata,
|
|
27
|
+
and how to provide feedback on it may be obtained at
|
|
28
|
+
http://www.rfc-editor.org/info/rfc7405.
|
|
29
|
+
|
|
30
|
+
Copyright Notice
|
|
31
|
+
|
|
32
|
+
Copyright (c) 2014 IETF Trust and the persons identified as the
|
|
33
|
+
document authors. All rights reserved.
|
|
34
|
+
|
|
35
|
+
This document is subject to BCP 78 and the IETF Trust's Legal
|
|
36
|
+
Provisions Relating to IETF Documents
|
|
37
|
+
(http://trustee.ietf.org/license-info) in effect on the date of
|
|
38
|
+
publication of this document. Please review these documents
|
|
39
|
+
carefully, as they describe your rights and restrictions with respect
|
|
40
|
+
to this document. Code Components extracted from this document must
|
|
41
|
+
include Simplified BSD License text as described in Section 4.e of
|
|
42
|
+
the Trust Legal Provisions and are provided without warranty as
|
|
43
|
+
described in the Simplified BSD License.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
Kyzivat Standards Track [Page 1]
|
|
53
|
+
|
|
54
|
+
RFC 7405 Case-Sensitive String Support in ABNF December 2014
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
Table of Contents
|
|
58
|
+
|
|
59
|
+
1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 2
|
|
60
|
+
2. Updates to RFC 5234 . . . . . . . . . . . . . . . . . . . . . 2
|
|
61
|
+
2.1. Terminal Values - Literal Text Strings . . . . . . . . . 3
|
|
62
|
+
2.2. ABNF Definition of ABNF - char-val . . . . . . . . . . . 4
|
|
63
|
+
3. Security Considerations . . . . . . . . . . . . . . . . . . . 4
|
|
64
|
+
4. Normative References . . . . . . . . . . . . . . . . . . . . 4
|
|
65
|
+
Author's Address . . . . . . . . . . . . . . . . . . . . . . . . 4
|
|
66
|
+
|
|
67
|
+
1. Introduction
|
|
68
|
+
|
|
69
|
+
The base definition of ABNF (Augmented Backus-Naur Form) supports US-
|
|
70
|
+
ASCII string literals. The matching of these literals is done in a
|
|
71
|
+
case-insensitive manner. While this is often the desired behavior,
|
|
72
|
+
in some situations, case-sensitive matching of string literals is
|
|
73
|
+
needed. Literals for case-sensitive matching must be specified using
|
|
74
|
+
the numeric representation of those characters, which is inconvenient
|
|
75
|
+
and error prone both to write and read.
|
|
76
|
+
|
|
77
|
+
This document extends ABNF to have two different types of US-ASCII
|
|
78
|
+
string literals. One type is matched using case-sensitive matching,
|
|
79
|
+
while the other is matched using case-insensitive matching. These
|
|
80
|
+
types are denoted using type prefixes similar to the type prefixes
|
|
81
|
+
used with numeric values. If no prefix is used, then case-
|
|
82
|
+
insensitive matching is used (as is consistent with previous
|
|
83
|
+
behavior).
|
|
84
|
+
|
|
85
|
+
This document is structured as a set of changes to the full ABNF
|
|
86
|
+
specification [RFC5234].
|
|
87
|
+
|
|
88
|
+
2. Updates to RFC 5234
|
|
89
|
+
|
|
90
|
+
This document makes changes to two parts of [RFC5234]. The two
|
|
91
|
+
changes are as follows:
|
|
92
|
+
|
|
93
|
+
o Replace the last half of Section 2.3 of [RFC5234] (beginning with
|
|
94
|
+
"ABNF permits the specification of literal text strings") with the
|
|
95
|
+
contents of Section 2.1.
|
|
96
|
+
|
|
97
|
+
o Replace the <char-val> rule in Section 4 of [RFC5234] with the
|
|
98
|
+
contents of Section 2.2.
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
Kyzivat Standards Track [Page 2]
|
|
109
|
+
|
|
110
|
+
RFC 7405 Case-Sensitive String Support in ABNF December 2014
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
2.1. Terminal Values - Literal Text Strings
|
|
114
|
+
|
|
115
|
+
ABNF permits the specification of literal text strings directly,
|
|
116
|
+
enclosed in quotation marks. Hence:
|
|
117
|
+
|
|
118
|
+
command = "command string"
|
|
119
|
+
|
|
120
|
+
Literal text strings are interpreted as a concatenated set of
|
|
121
|
+
printable characters.
|
|
122
|
+
|
|
123
|
+
NOTE:
|
|
124
|
+
|
|
125
|
+
The character set for these strings is US-ASCII.
|
|
126
|
+
|
|
127
|
+
Literal text strings in ABNF may be either case sensitive or case
|
|
128
|
+
insensitive. The form of matching used with a literal text string is
|
|
129
|
+
denoted by a prefix to the quoted string. The following prefixes are
|
|
130
|
+
allowed:
|
|
131
|
+
|
|
132
|
+
%s = case-sensitive
|
|
133
|
+
%i = case-insensitive
|
|
134
|
+
|
|
135
|
+
To be consistent with prior implementations of ABNF, having no prefix
|
|
136
|
+
means that the string is case insensitive and is equivalent to having
|
|
137
|
+
the "%i" prefix.
|
|
138
|
+
|
|
139
|
+
Hence:
|
|
140
|
+
|
|
141
|
+
rulename = %i"aBc"
|
|
142
|
+
|
|
143
|
+
and:
|
|
144
|
+
|
|
145
|
+
rulename = "abc"
|
|
146
|
+
|
|
147
|
+
will both match "abc", "Abc", "aBc", "abC", "ABc", "aBC", "AbC", and
|
|
148
|
+
"ABC".
|
|
149
|
+
|
|
150
|
+
In contrast:
|
|
151
|
+
|
|
152
|
+
rulename = %s"aBc"
|
|
153
|
+
|
|
154
|
+
will match only "aBc" and will not match "abc", "Abc", "abC", "ABc",
|
|
155
|
+
"aBC", "AbC", or "ABC".
|
|
156
|
+
|
|
157
|
+
In the past, the numerical specification of individual characters was
|
|
158
|
+
used to define a case-sensitive rule.
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
Kyzivat Standards Track [Page 3]
|
|
165
|
+
|
|
166
|
+
RFC 7405 Case-Sensitive String Support in ABNF December 2014
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
For example:
|
|
170
|
+
|
|
171
|
+
rulename = %d97 %d98 %d99
|
|
172
|
+
|
|
173
|
+
or
|
|
174
|
+
|
|
175
|
+
rulename = %x61.62.63
|
|
176
|
+
|
|
177
|
+
will match only the string that comprises only the lowercase
|
|
178
|
+
characters, abc. Using a literal text string with a prefix has a
|
|
179
|
+
clear readability advantage over the old way.
|
|
180
|
+
|
|
181
|
+
2.2. ABNF Definition of ABNF - char-val
|
|
182
|
+
|
|
183
|
+
char-val = case-insensitive-string /
|
|
184
|
+
case-sensitive-string
|
|
185
|
+
|
|
186
|
+
case-insensitive-string =
|
|
187
|
+
[ "%i" ] quoted-string
|
|
188
|
+
|
|
189
|
+
case-sensitive-string =
|
|
190
|
+
"%s" quoted-string
|
|
191
|
+
|
|
192
|
+
quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
|
|
193
|
+
; quoted string of SP and VCHAR
|
|
194
|
+
; without DQUOTE
|
|
195
|
+
|
|
196
|
+
3. Security Considerations
|
|
197
|
+
|
|
198
|
+
Security is truly believed to be irrelevant to this document.
|
|
199
|
+
|
|
200
|
+
4. Normative References
|
|
201
|
+
|
|
202
|
+
[RFC5234] Crocker, D. and P. Overell, "Augmented BNF for Syntax
|
|
203
|
+
Specifications: ABNF", STD 68, RFC 5234, January 2008,
|
|
204
|
+
<http:/www.rfc-editor.org/info/rfc5234>.
|
|
205
|
+
|
|
206
|
+
Author's Address
|
|
207
|
+
|
|
208
|
+
Paul Kyzivat
|
|
209
|
+
Massachusetts
|
|
210
|
+
United States
|
|
211
|
+
|
|
212
|
+
EMail: pkyzivat@alum.mit.edu
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
Kyzivat Standards Track [Page 4]
|
|
221
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import typing as ta
|
|
3
|
+
|
|
4
|
+
from omlish import lang
|
|
5
|
+
|
|
6
|
+
from .ops import LeafOp
|
|
7
|
+
from .ops import Op
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InternalOp(Op, lang.Abstract):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
##
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@ta.final
|
|
21
|
+
class Regex(InternalOp, LeafOp, lang.Final):
|
|
22
|
+
def __init__(self, pat: re.Pattern) -> None:
|
|
23
|
+
super().__init__()
|
|
24
|
+
|
|
25
|
+
self._pat = pat
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def pat(self) -> re.Pattern:
|
|
29
|
+
return self._pat
|
|
30
|
+
|
|
31
|
+
def __repr__(self) -> str:
|
|
32
|
+
return f'{self.__class__.__name__}@{id(self):x}({self._pat!r})'
|
omextra/text/abnf/meta.py
CHANGED
|
@@ -9,20 +9,20 @@ from omlish import lang
|
|
|
9
9
|
|
|
10
10
|
from .base import Grammar
|
|
11
11
|
from .base import Match
|
|
12
|
-
from .base import
|
|
12
|
+
from .base import Op
|
|
13
13
|
from .base import Rule
|
|
14
14
|
from .core import CORE_RULES
|
|
15
15
|
from .errors import AbnfGrammarParseError
|
|
16
|
-
from .
|
|
17
|
-
from .
|
|
18
|
-
from .
|
|
19
|
-
from .
|
|
20
|
-
from .
|
|
21
|
-
from .
|
|
22
|
-
from .
|
|
23
|
-
from .utils import
|
|
16
|
+
from .ops import Repeat
|
|
17
|
+
from .ops import concat
|
|
18
|
+
from .ops import either
|
|
19
|
+
from .ops import literal
|
|
20
|
+
from .ops import option
|
|
21
|
+
from .ops import repeat
|
|
22
|
+
from .ops import rule
|
|
23
|
+
from .utils import fix_ws
|
|
24
24
|
from .utils import parse_rules
|
|
25
|
-
from .visitors import
|
|
25
|
+
from .visitors import RuleMatchVisitor
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
##
|
|
@@ -125,7 +125,8 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
|
|
|
125
125
|
either(
|
|
126
126
|
rule('WSP'),
|
|
127
127
|
rule('VCHAR'),
|
|
128
|
-
)
|
|
128
|
+
),
|
|
129
|
+
),
|
|
129
130
|
rule('CRLF'),
|
|
130
131
|
),
|
|
131
132
|
),
|
|
@@ -418,7 +419,7 @@ META_GRAMMAR = Grammar(
|
|
|
418
419
|
##
|
|
419
420
|
|
|
420
421
|
|
|
421
|
-
class
|
|
422
|
+
class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
|
|
422
423
|
def __init__(self, source: str) -> None:
|
|
423
424
|
super().__init__()
|
|
424
425
|
|
|
@@ -432,36 +433,36 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
|
|
|
432
433
|
class QuotedString(lang.Final):
|
|
433
434
|
s: str
|
|
434
435
|
|
|
435
|
-
@
|
|
436
|
+
@RuleMatchVisitor.register('rule')
|
|
436
437
|
def visit_rule_rule(self, m: Match) -> ta.Any:
|
|
437
438
|
rn_m, _, el_m = m.children
|
|
438
439
|
rn = check.isinstance(self.visit_match(rn_m), self.RuleName).s
|
|
439
440
|
el = self.visit_match(el_m)
|
|
440
441
|
return Rule(rn, el)
|
|
441
442
|
|
|
442
|
-
@
|
|
443
|
+
@RuleMatchVisitor.register('rulename')
|
|
443
444
|
def visit_rulename_rule(self, m: Match) -> ta.Any:
|
|
444
445
|
return self.RuleName(self._source[m.start:m.end])
|
|
445
446
|
|
|
446
|
-
@
|
|
447
|
+
@RuleMatchVisitor.register('elements')
|
|
447
448
|
def visit_elements_rule(self, m: Match) -> ta.Any:
|
|
448
449
|
return self.visit_match(check.single(m.children))
|
|
449
450
|
|
|
450
|
-
@
|
|
451
|
+
@RuleMatchVisitor.register('alternation')
|
|
451
452
|
def visit_alternation_rule(self, m: Match) -> ta.Any:
|
|
452
453
|
if len(m.children) == 1:
|
|
453
454
|
return self.visit_match(m.children[0])
|
|
454
455
|
else:
|
|
455
456
|
return either(*map(self.visit_match, m.children))
|
|
456
457
|
|
|
457
|
-
@
|
|
458
|
+
@RuleMatchVisitor.register('concatenation')
|
|
458
459
|
def visit_concatenation_rule(self, m: Match) -> ta.Any:
|
|
459
460
|
if len(m.children) == 1:
|
|
460
461
|
return self.visit_match(m.children[0])
|
|
461
462
|
else:
|
|
462
463
|
return concat(*map(self.visit_match, m.children))
|
|
463
464
|
|
|
464
|
-
@
|
|
465
|
+
@RuleMatchVisitor.register('repetition')
|
|
465
466
|
def visit_repetition_rule(self, m: Match) -> ta.Any:
|
|
466
467
|
if len(m.children) == 2:
|
|
467
468
|
ti_m, el_m = m.children
|
|
@@ -473,7 +474,7 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
|
|
|
473
474
|
else:
|
|
474
475
|
raise ValueError(m)
|
|
475
476
|
|
|
476
|
-
@
|
|
477
|
+
@RuleMatchVisitor.register('repeat')
|
|
477
478
|
def visit_repeat_rule(self, m: Match) -> ta.Any:
|
|
478
479
|
s = check.non_empty_str(self._source[m.start:m.end])
|
|
479
480
|
if s == '*':
|
|
@@ -488,30 +489,30 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
|
|
|
488
489
|
else:
|
|
489
490
|
return Repeat.Times(n := int(s), n)
|
|
490
491
|
|
|
491
|
-
@
|
|
492
|
+
@RuleMatchVisitor.register('element')
|
|
492
493
|
def visit_element_rule(self, m: Match) -> ta.Any:
|
|
493
494
|
c = self.visit_match(check.single(m.children))
|
|
494
|
-
if isinstance(c,
|
|
495
|
+
if isinstance(c, Op):
|
|
495
496
|
return c
|
|
496
497
|
elif isinstance(c, self.RuleName):
|
|
497
498
|
return rule(c.s)
|
|
498
499
|
else:
|
|
499
500
|
raise TypeError(c)
|
|
500
501
|
|
|
501
|
-
@
|
|
502
|
+
@RuleMatchVisitor.register('group')
|
|
502
503
|
def visit_group_rule(self, m: Match) -> ta.Any:
|
|
503
504
|
return self.visit_match(check.single(m.children))
|
|
504
505
|
|
|
505
|
-
@
|
|
506
|
+
@RuleMatchVisitor.register('option')
|
|
506
507
|
def visit_option_rule(self, m: Match) -> ta.Any:
|
|
507
508
|
c = self.visit_match(check.single(m.children))
|
|
508
|
-
return option(check.isinstance(c,
|
|
509
|
+
return option(check.isinstance(c, Op))
|
|
509
510
|
|
|
510
|
-
@
|
|
511
|
+
@RuleMatchVisitor.register('num-val')
|
|
511
512
|
def visit_num_val_rule(self, m: Match) -> ta.Any:
|
|
512
513
|
return self.visit_match(check.single(m.children))
|
|
513
514
|
|
|
514
|
-
def _parse_num_val(self, s: str, base: int) ->
|
|
515
|
+
def _parse_num_val(self, s: str, base: int) -> Op:
|
|
515
516
|
if '-' in s:
|
|
516
517
|
check.not_in('.', s)
|
|
517
518
|
lo, hi = [chr(int(p, base)) for p in s.split('-')]
|
|
@@ -524,29 +525,29 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
|
|
|
524
525
|
c = chr(int(s, base))
|
|
525
526
|
return literal(c, c)
|
|
526
527
|
|
|
527
|
-
@
|
|
528
|
+
@RuleMatchVisitor.register('dec-val')
|
|
528
529
|
def visit_dec_val_rule(self, m: Match) -> ta.Any:
|
|
529
530
|
return self._parse_num_val(self._source[m.start + 1:m.end], 10)
|
|
530
531
|
|
|
531
|
-
@
|
|
532
|
+
@RuleMatchVisitor.register('hex-val')
|
|
532
533
|
def visit_hex_val_rule(self, m: Match) -> ta.Any:
|
|
533
534
|
return self._parse_num_val(self._source[m.start + 1:m.end], 16)
|
|
534
535
|
|
|
535
|
-
@
|
|
536
|
+
@RuleMatchVisitor.register('char-val')
|
|
536
537
|
def visit_char_val_rule(self, m: Match) -> ta.Any:
|
|
537
538
|
return self.visit_match(check.single(m.children))
|
|
538
539
|
|
|
539
|
-
@
|
|
540
|
+
@RuleMatchVisitor.register('case-sensitive-string')
|
|
540
541
|
def visit_case_sensitive_string_rule(self, m: Match) -> ta.Any:
|
|
541
542
|
c = self.visit_match(check.single(m.children))
|
|
542
543
|
return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=True)
|
|
543
544
|
|
|
544
|
-
@
|
|
545
|
+
@RuleMatchVisitor.register('case-insensitive-string')
|
|
545
546
|
def visit_case_insensitive_string_rule(self, m: Match) -> ta.Any:
|
|
546
547
|
c = self.visit_match(check.single(m.children))
|
|
547
548
|
return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=False)
|
|
548
549
|
|
|
549
|
-
@
|
|
550
|
+
@RuleMatchVisitor.register('quoted-string')
|
|
550
551
|
def visit_quoted_string_rule(self, m: Match) -> ta.Any:
|
|
551
552
|
check.state(m.end - m.start > 2)
|
|
552
553
|
check.state(self._source[m.start] == '"')
|
|
@@ -561,7 +562,7 @@ def parse_grammar(
|
|
|
561
562
|
root: str | None = None,
|
|
562
563
|
**kwargs: ta.Any,
|
|
563
564
|
) -> Grammar:
|
|
564
|
-
source =
|
|
565
|
+
source = fix_ws(source)
|
|
565
566
|
|
|
566
567
|
if (mg_m := parse_rules(
|
|
567
568
|
META_GRAMMAR,
|
|
@@ -571,10 +572,10 @@ def parse_grammar(
|
|
|
571
572
|
)) is None:
|
|
572
573
|
raise AbnfGrammarParseError(source)
|
|
573
574
|
|
|
574
|
-
check.isinstance(mg_m.
|
|
575
|
+
check.isinstance(mg_m.op, Repeat)
|
|
575
576
|
|
|
576
|
-
|
|
577
|
-
rules = [
|
|
577
|
+
mg_rmv = MetaGrammarRuleMatchVisitor(source)
|
|
578
|
+
rules = [mg_rmv.visit_match(gg_cm) for gg_cm in mg_m.children]
|
|
578
579
|
|
|
579
580
|
return Grammar(
|
|
580
581
|
*rules,
|