omextra 0.0.0.dev494__py3-none-any.whl → 0.0.0.dev496__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ Internet Engineering Task Force (IETF) P. Kyzivat
2
+ Request for Comments: 7405 December 2014
3
+ Updates: 5234
4
+ Category: Standards Track
5
+ ISSN: 2070-1721
6
+
7
+
8
+ Case-Sensitive String Support in ABNF
9
+
10
+ Abstract
11
+
12
+ This document extends the base definition of ABNF (Augmented Backus-
13
+ Naur Form) to include a way to specify US-ASCII string literals that
14
+ are matched in a case-sensitive manner.
15
+
16
+ Status of This Memo
17
+
18
+ This is an Internet Standards Track document.
19
+
20
+ This document is a product of the Internet Engineering Task Force
21
+ (IETF). It represents the consensus of the IETF community. It has
22
+ received public review and has been approved for publication by the
23
+ Internet Engineering Steering Group (IESG). Further information on
24
+ Internet Standards is available in Section 2 of RFC 5741.
25
+
26
+ Information about the current status of this document, any errata,
27
+ and how to provide feedback on it may be obtained at
28
+ http://www.rfc-editor.org/info/rfc7405.
29
+
30
+ Copyright Notice
31
+
32
+ Copyright (c) 2014 IETF Trust and the persons identified as the
33
+ document authors. All rights reserved.
34
+
35
+ This document is subject to BCP 78 and the IETF Trust's Legal
36
+ Provisions Relating to IETF Documents
37
+ (http://trustee.ietf.org/license-info) in effect on the date of
38
+ publication of this document. Please review these documents
39
+ carefully, as they describe your rights and restrictions with respect
40
+ to this document. Code Components extracted from this document must
41
+ include Simplified BSD License text as described in Section 4.e of
42
+ the Trust Legal Provisions and are provided without warranty as
43
+ described in the Simplified BSD License.
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+ Kyzivat Standards Track [Page 1]
53
+
54
+ RFC 7405 Case-Sensitive String Support in ABNF December 2014
55
+
56
+
57
+ Table of Contents
58
+
59
+ 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 2
60
+ 2. Updates to RFC 5234 . . . . . . . . . . . . . . . . . . . . . 2
61
+ 2.1. Terminal Values - Literal Text Strings . . . . . . . . . 3
62
+ 2.2. ABNF Definition of ABNF - char-val . . . . . . . . . . . 4
63
+ 3. Security Considerations . . . . . . . . . . . . . . . . . . . 4
64
+ 4. Normative References . . . . . . . . . . . . . . . . . . . . 4
65
+ Author's Address . . . . . . . . . . . . . . . . . . . . . . . . 4
66
+
67
+ 1. Introduction
68
+
69
+ The base definition of ABNF (Augmented Backus-Naur Form) supports US-
70
+ ASCII string literals. The matching of these literals is done in a
71
+ case-insensitive manner. While this is often the desired behavior,
72
+ in some situations, case-sensitive matching of string literals is
73
+ needed. Literals for case-sensitive matching must be specified using
74
+ the numeric representation of those characters, which is inconvenient
75
+ and error prone both to write and read.
76
+
77
+ This document extends ABNF to have two different types of US-ASCII
78
+ string literals. One type is matched using case-sensitive matching,
79
+ while the other is matched using case-insensitive matching. These
80
+ types are denoted using type prefixes similar to the type prefixes
81
+ used with numeric values. If no prefix is used, then case-
82
+ insensitive matching is used (as is consistent with previous
83
+ behavior).
84
+
85
+ This document is structured as a set of changes to the full ABNF
86
+ specification [RFC5234].
87
+
88
+ 2. Updates to RFC 5234
89
+
90
+ This document makes changes to two parts of [RFC5234]. The two
91
+ changes are as follows:
92
+
93
+ o Replace the last half of Section 2.3 of [RFC5234] (beginning with
94
+ "ABNF permits the specification of literal text strings") with the
95
+ contents of Section 2.1.
96
+
97
+ o Replace the <char-val> rule in Section 4 of [RFC5234] with the
98
+ contents of Section 2.2.
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+ Kyzivat Standards Track [Page 2]
109
+
110
+ RFC 7405 Case-Sensitive String Support in ABNF December 2014
111
+
112
+
113
+ 2.1. Terminal Values - Literal Text Strings
114
+
115
+ ABNF permits the specification of literal text strings directly,
116
+ enclosed in quotation marks. Hence:
117
+
118
+ command = "command string"
119
+
120
+ Literal text strings are interpreted as a concatenated set of
121
+ printable characters.
122
+
123
+ NOTE:
124
+
125
+ The character set for these strings is US-ASCII.
126
+
127
+ Literal text strings in ABNF may be either case sensitive or case
128
+ insensitive. The form of matching used with a literal text string is
129
+ denoted by a prefix to the quoted string. The following prefixes are
130
+ allowed:
131
+
132
+ %s = case-sensitive
133
+ %i = case-insensitive
134
+
135
+ To be consistent with prior implementations of ABNF, having no prefix
136
+ means that the string is case insensitive and is equivalent to having
137
+ the "%i" prefix.
138
+
139
+ Hence:
140
+
141
+ rulename = %i"aBc"
142
+
143
+ and:
144
+
145
+ rulename = "abc"
146
+
147
+ will both match "abc", "Abc", "aBc", "abC", "ABc", "aBC", "AbC", and
148
+ "ABC".
149
+
150
+ In contrast:
151
+
152
+ rulename = %s"aBc"
153
+
154
+ will match only "aBc" and will not match "abc", "Abc", "abC", "ABc",
155
+ "aBC", "AbC", or "ABC".
156
+
157
+ In the past, the numerical specification of individual characters was
158
+ used to define a case-sensitive rule.
159
+
160
+
161
+
162
+
163
+
164
+ Kyzivat Standards Track [Page 3]
165
+
166
+ RFC 7405 Case-Sensitive String Support in ABNF December 2014
167
+
168
+
169
+ For example:
170
+
171
+ rulename = %d97 %d98 %d99
172
+
173
+ or
174
+
175
+ rulename = %x61.62.63
176
+
177
+ will match only the string that comprises only the lowercase
178
+ characters, abc. Using a literal text string with a prefix has a
179
+ clear readability advantage over the old way.
180
+
181
+ 2.2. ABNF Definition of ABNF - char-val
182
+
183
+ char-val = case-insensitive-string /
184
+ case-sensitive-string
185
+
186
+ case-insensitive-string =
187
+ [ "%i" ] quoted-string
188
+
189
+ case-sensitive-string =
190
+ "%s" quoted-string
191
+
192
+ quoted-string = DQUOTE *(%x20-21 / %x23-7E) DQUOTE
193
+ ; quoted string of SP and VCHAR
194
+ ; without DQUOTE
195
+
196
+ 3. Security Considerations
197
+
198
+ Security is truly believed to be irrelevant to this document.
199
+
200
+ 4. Normative References
201
+
202
+ [RFC5234] Crocker, D. and P. Overell, "Augmented BNF for Syntax
203
+ Specifications: ABNF", STD 68, RFC 5234, January 2008,
204
+ <http:/www.rfc-editor.org/info/rfc5234>.
205
+
206
+ Author's Address
207
+
208
+ Paul Kyzivat
209
+ Massachusetts
210
+ United States
211
+
212
+ EMail: pkyzivat@alum.mit.edu
213
+
214
+
215
+
216
+
217
+
218
+
219
+
220
+ Kyzivat Standards Track [Page 4]
221
+
@@ -0,0 +1,32 @@
1
+ import re
2
+ import typing as ta
3
+
4
+ from omlish import lang
5
+
6
+ from .ops import LeafOp
7
+ from .ops import Op
8
+
9
+
10
+ ##
11
+
12
+
13
+ class InternalOp(Op, lang.Abstract):
14
+ pass
15
+
16
+
17
+ ##
18
+
19
+
20
+ @ta.final
21
+ class Regex(InternalOp, LeafOp, lang.Final):
22
+ def __init__(self, pat: re.Pattern) -> None:
23
+ super().__init__()
24
+
25
+ self._pat = pat
26
+
27
+ @property
28
+ def pat(self) -> re.Pattern:
29
+ return self._pat
30
+
31
+ def __repr__(self) -> str:
32
+ return f'{self.__class__.__name__}@{id(self):x}({self._pat!r})'
omextra/text/abnf/meta.py CHANGED
@@ -9,20 +9,20 @@ from omlish import lang
9
9
 
10
10
  from .base import Grammar
11
11
  from .base import Match
12
- from .base import Parser
12
+ from .base import Op
13
13
  from .base import Rule
14
14
  from .core import CORE_RULES
15
15
  from .errors import AbnfGrammarParseError
16
- from .parsers import Repeat
17
- from .parsers import concat
18
- from .parsers import either
19
- from .parsers import literal
20
- from .parsers import option
21
- from .parsers import repeat
22
- from .parsers import rule
23
- from .utils import fix_grammar_ws
16
+ from .ops import Repeat
17
+ from .ops import concat
18
+ from .ops import either
19
+ from .ops import literal
20
+ from .ops import option
21
+ from .ops import repeat
22
+ from .ops import rule
23
+ from .utils import fix_ws
24
24
  from .utils import parse_rules
25
- from .visitors import RuleVisitor
25
+ from .visitors import RuleMatchVisitor
26
26
 
27
27
 
28
28
  ##
@@ -125,7 +125,8 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
125
125
  either(
126
126
  rule('WSP'),
127
127
  rule('VCHAR'),
128
- )),
128
+ ),
129
+ ),
129
130
  rule('CRLF'),
130
131
  ),
131
132
  ),
@@ -418,7 +419,7 @@ META_GRAMMAR = Grammar(
418
419
  ##
419
420
 
420
421
 
421
- class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
422
+ class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
422
423
  def __init__(self, source: str) -> None:
423
424
  super().__init__()
424
425
 
@@ -432,36 +433,36 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
432
433
  class QuotedString(lang.Final):
433
434
  s: str
434
435
 
435
- @RuleVisitor.register('rule')
436
+ @RuleMatchVisitor.register('rule')
436
437
  def visit_rule_rule(self, m: Match) -> ta.Any:
437
438
  rn_m, _, el_m = m.children
438
439
  rn = check.isinstance(self.visit_match(rn_m), self.RuleName).s
439
440
  el = self.visit_match(el_m)
440
441
  return Rule(rn, el)
441
442
 
442
- @RuleVisitor.register('rulename')
443
+ @RuleMatchVisitor.register('rulename')
443
444
  def visit_rulename_rule(self, m: Match) -> ta.Any:
444
445
  return self.RuleName(self._source[m.start:m.end])
445
446
 
446
- @RuleVisitor.register('elements')
447
+ @RuleMatchVisitor.register('elements')
447
448
  def visit_elements_rule(self, m: Match) -> ta.Any:
448
449
  return self.visit_match(check.single(m.children))
449
450
 
450
- @RuleVisitor.register('alternation')
451
+ @RuleMatchVisitor.register('alternation')
451
452
  def visit_alternation_rule(self, m: Match) -> ta.Any:
452
453
  if len(m.children) == 1:
453
454
  return self.visit_match(m.children[0])
454
455
  else:
455
456
  return either(*map(self.visit_match, m.children))
456
457
 
457
- @RuleVisitor.register('concatenation')
458
+ @RuleMatchVisitor.register('concatenation')
458
459
  def visit_concatenation_rule(self, m: Match) -> ta.Any:
459
460
  if len(m.children) == 1:
460
461
  return self.visit_match(m.children[0])
461
462
  else:
462
463
  return concat(*map(self.visit_match, m.children))
463
464
 
464
- @RuleVisitor.register('repetition')
465
+ @RuleMatchVisitor.register('repetition')
465
466
  def visit_repetition_rule(self, m: Match) -> ta.Any:
466
467
  if len(m.children) == 2:
467
468
  ti_m, el_m = m.children
@@ -473,7 +474,7 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
473
474
  else:
474
475
  raise ValueError(m)
475
476
 
476
- @RuleVisitor.register('repeat')
477
+ @RuleMatchVisitor.register('repeat')
477
478
  def visit_repeat_rule(self, m: Match) -> ta.Any:
478
479
  s = check.non_empty_str(self._source[m.start:m.end])
479
480
  if s == '*':
@@ -488,30 +489,30 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
488
489
  else:
489
490
  return Repeat.Times(n := int(s), n)
490
491
 
491
- @RuleVisitor.register('element')
492
+ @RuleMatchVisitor.register('element')
492
493
  def visit_element_rule(self, m: Match) -> ta.Any:
493
494
  c = self.visit_match(check.single(m.children))
494
- if isinstance(c, Parser):
495
+ if isinstance(c, Op):
495
496
  return c
496
497
  elif isinstance(c, self.RuleName):
497
498
  return rule(c.s)
498
499
  else:
499
500
  raise TypeError(c)
500
501
 
501
- @RuleVisitor.register('group')
502
+ @RuleMatchVisitor.register('group')
502
503
  def visit_group_rule(self, m: Match) -> ta.Any:
503
504
  return self.visit_match(check.single(m.children))
504
505
 
505
- @RuleVisitor.register('option')
506
+ @RuleMatchVisitor.register('option')
506
507
  def visit_option_rule(self, m: Match) -> ta.Any:
507
508
  c = self.visit_match(check.single(m.children))
508
- return option(check.isinstance(c, Parser))
509
+ return option(check.isinstance(c, Op))
509
510
 
510
- @RuleVisitor.register('num-val')
511
+ @RuleMatchVisitor.register('num-val')
511
512
  def visit_num_val_rule(self, m: Match) -> ta.Any:
512
513
  return self.visit_match(check.single(m.children))
513
514
 
514
- def _parse_num_val(self, s: str, base: int) -> Parser:
515
+ def _parse_num_val(self, s: str, base: int) -> Op:
515
516
  if '-' in s:
516
517
  check.not_in('.', s)
517
518
  lo, hi = [chr(int(p, base)) for p in s.split('-')]
@@ -524,29 +525,29 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
524
525
  c = chr(int(s, base))
525
526
  return literal(c, c)
526
527
 
527
- @RuleVisitor.register('dec-val')
528
+ @RuleMatchVisitor.register('dec-val')
528
529
  def visit_dec_val_rule(self, m: Match) -> ta.Any:
529
530
  return self._parse_num_val(self._source[m.start + 1:m.end], 10)
530
531
 
531
- @RuleVisitor.register('hex-val')
532
+ @RuleMatchVisitor.register('hex-val')
532
533
  def visit_hex_val_rule(self, m: Match) -> ta.Any:
533
534
  return self._parse_num_val(self._source[m.start + 1:m.end], 16)
534
535
 
535
- @RuleVisitor.register('char-val')
536
+ @RuleMatchVisitor.register('char-val')
536
537
  def visit_char_val_rule(self, m: Match) -> ta.Any:
537
538
  return self.visit_match(check.single(m.children))
538
539
 
539
- @RuleVisitor.register('case-sensitive-string')
540
+ @RuleMatchVisitor.register('case-sensitive-string')
540
541
  def visit_case_sensitive_string_rule(self, m: Match) -> ta.Any:
541
542
  c = self.visit_match(check.single(m.children))
542
543
  return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=True)
543
544
 
544
- @RuleVisitor.register('case-insensitive-string')
545
+ @RuleMatchVisitor.register('case-insensitive-string')
545
546
  def visit_case_insensitive_string_rule(self, m: Match) -> ta.Any:
546
547
  c = self.visit_match(check.single(m.children))
547
548
  return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=False)
548
549
 
549
- @RuleVisitor.register('quoted-string')
550
+ @RuleMatchVisitor.register('quoted-string')
550
551
  def visit_quoted_string_rule(self, m: Match) -> ta.Any:
551
552
  check.state(m.end - m.start > 2)
552
553
  check.state(self._source[m.start] == '"')
@@ -561,7 +562,7 @@ def parse_grammar(
561
562
  root: str | None = None,
562
563
  **kwargs: ta.Any,
563
564
  ) -> Grammar:
564
- source = fix_grammar_ws(source)
565
+ source = fix_ws(source)
565
566
 
566
567
  if (mg_m := parse_rules(
567
568
  META_GRAMMAR,
@@ -571,10 +572,10 @@ def parse_grammar(
571
572
  )) is None:
572
573
  raise AbnfGrammarParseError(source)
573
574
 
574
- check.isinstance(mg_m.parser, Repeat)
575
+ check.isinstance(mg_m.op, Repeat)
575
576
 
576
- mg_rv = MetaGrammarRuleVisitor(source)
577
- rules = [mg_rv.visit_match(gg_cm) for gg_cm in mg_m.children]
577
+ mg_rmv = MetaGrammarRuleMatchVisitor(source)
578
+ rules = [mg_rmv.visit_match(gg_cm) for gg_cm in mg_m.children]
578
579
 
579
580
  return Grammar(
580
581
  *rules,