PyPI - omextra - Versions diffs - 0.0.0.dev494__py3-none-any.whl → 0.0.0.dev496__py3-none-any.whl - Mend

omextra 0.0.0.dev494py3-none-any.whl → 0.0.0.dev496py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

omextra/text/abnf/__init__.py +17 -4
omextra/text/abnf/_dataclasses.py +438 -0
omextra/text/abnf/base.py +49 -82
omextra/text/abnf/core.py +5 -5
omextra/text/abnf/docs/__init__.py +0 -0
omextra/text/abnf/docs/rfc5234.txt +893 -0
omextra/text/abnf/docs/rfc7405.txt +221 -0
omextra/text/abnf/internal.py +32 -0
omextra/text/abnf/meta.py +37 -36
omextra/text/abnf/ops.py +276 -0
omextra/text/abnf/parsing.py +212 -0
omextra/text/abnf/utils.py +4 -9
omextra/text/abnf/visitors.py +8 -8
{omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/METADATA +2 -2
{omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/RECORD +19 -13
omextra/text/abnf/parsers.py +0 -343
{omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/WHEEL +0 -0
{omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/entry_points.txt +0 -0
{omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/licenses/LICENSE +0 -0
{omextra-0.0.0.dev494.dist-info → omextra-0.0.0.dev496.dist-info}/top_level.txt +0 -0

omextra/text/abnf/docs/rfc7405.txt ADDED Viewed

@@ -0,0 +1,221 @@
+Internet Engineering Task Force (IETF)                        P. Kyzivat
+Request for Comments: 7405                                 December 2014
+Updates: 5234
+Category: Standards Track
+ISSN: 2070-1721
+                 Case-Sensitive String Support in ABNF
+Abstract
+   This document extends the base definition of ABNF (Augmented Backus-
+   Naur Form) to include a way to specify US-ASCII string literals that
+   are matched in a case-sensitive manner.
+Status of This Memo
+   This is an Internet Standards Track document.
+   This document is a product of the Internet Engineering Task Force
+   (IETF).  It represents the consensus of the IETF community.  It has
+   received public review and has been approved for publication by the
+   Internet Engineering Steering Group (IESG).  Further information on
+   Internet Standards is available in Section 2 of RFC 5741.
+   Information about the current status of this document, any errata,
+   and how to provide feedback on it may be obtained at
+   http://www.rfc-editor.org/info/rfc7405.
+Copyright Notice
+   Copyright (c) 2014 IETF Trust and the persons identified as the
+   document authors.  All rights reserved.
+   This document is subject to BCP 78 and the IETF Trust's Legal
+   Provisions Relating to IETF Documents
+   (http://trustee.ietf.org/license-info) in effect on the date of
+   publication of this document.  Please review these documents
+   carefully, as they describe your rights and restrictions with respect
+   to this document.  Code Components extracted from this document must
+   include Simplified BSD License text as described in Section 4.e of
+   the Trust Legal Provisions and are provided without warranty as
+   described in the Simplified BSD License.
+Kyzivat                      Standards Track                    [Page 1]
+RFC 7405          Case-Sensitive String Support in ABNF    December 2014
+Table of Contents
+   1.  Introduction  . . . . . . . . . . . . . . . . . . . . . . . .   2
+   2.  Updates to RFC 5234 . . . . . . . . . . . . . . . . . . . . .   2
+     2.1.  Terminal Values - Literal Text Strings  . . . . . . . . .   3
+     2.2.  ABNF Definition of ABNF - char-val  . . . . . . . . . . .   4
+   3.  Security Considerations . . . . . . . . . . . . . . . . . . .   4
+   4.  Normative References  . . . . . . . . . . . . . . . . . . . .   4
+   Author's Address  . . . . . . . . . . . . . . . . . . . . . . . .   4
+1.  Introduction
+   The base definition of ABNF (Augmented Backus-Naur Form) supports US-
+   ASCII string literals.  The matching of these literals is done in a
+   case-insensitive manner.  While this is often the desired behavior,
+   in some situations, case-sensitive matching of string literals is
+   needed.  Literals for case-sensitive matching must be specified using
+   the numeric representation of those characters, which is inconvenient
+   and error prone both to write and read.
+   This document extends ABNF to have two different types of US-ASCII
+   string literals.  One type is matched using case-sensitive matching,
+   while the other is matched using case-insensitive matching.  These
+   types are denoted using type prefixes similar to the type prefixes
+   used with numeric values.  If no prefix is used, then case-
+   insensitive matching is used (as is consistent with previous
+   behavior).
+   This document is structured as a set of changes to the full ABNF
+   specification [RFC5234].
+2.  Updates to RFC 5234
+   This document makes changes to two parts of [RFC5234].  The two
+   changes are as follows:
+   o  Replace the last half of Section 2.3 of [RFC5234] (beginning with
+      "ABNF permits the specification of literal text strings") with the
+      contents of Section 2.1.
+   o  Replace the <char-val> rule in Section 4 of [RFC5234] with the
+      contents of Section 2.2.
+Kyzivat                      Standards Track                    [Page 2]
+RFC 7405          Case-Sensitive String Support in ABNF    December 2014
+2.1.  Terminal Values - Literal Text Strings
+   ABNF permits the specification of literal text strings directly,
+   enclosed in quotation marks.  Hence:
+         command     =  "command string"
+   Literal text strings are interpreted as a concatenated set of
+   printable characters.
+   NOTE:
+   The character set for these strings is US-ASCII.
+   Literal text strings in ABNF may be either case sensitive or case
+   insensitive.  The form of matching used with a literal text string is
+   denoted by a prefix to the quoted string.  The following prefixes are
+   allowed:
+         %s          =  case-sensitive
+         %i          =  case-insensitive
+   To be consistent with prior implementations of ABNF, having no prefix
+   means that the string is case insensitive and is equivalent to having
+   the "%i" prefix.
+   Hence:
+         rulename = %i"aBc"
+   and:
+         rulename = "abc"
+   will both match "abc", "Abc", "aBc", "abC", "ABc", "aBC", "AbC", and
+   "ABC".
+   In contrast:
+         rulename = %s"aBc"
+   will match only "aBc" and will not match "abc", "Abc", "abC", "ABc",
+   "aBC", "AbC", or "ABC".
+   In the past, the numerical specification of individual characters was
+   used to define a case-sensitive rule.
+Kyzivat                      Standards Track                    [Page 3]
+RFC 7405          Case-Sensitive String Support in ABNF    December 2014
+   For example:
+         rulename    =  %d97 %d98 %d99
+   or
+         rulename    =  %x61.62.63
+   will match only the string that comprises only the lowercase
+   characters, abc.  Using a literal text string with a prefix has a
+   clear readability advantage over the old way.
+2.2.  ABNF Definition of ABNF - char-val
+         char-val       =  case-insensitive-string /
+                           case-sensitive-string
+         case-insensitive-string =
+                           [ "%i" ] quoted-string
+         case-sensitive-string =
+                           "%s" quoted-string
+         quoted-string  =  DQUOTE *(%x20-21 / %x23-7E) DQUOTE
+                                ; quoted string of SP and VCHAR
+                                ;  without DQUOTE
+3.  Security Considerations
+   Security is truly believed to be irrelevant to this document.
+4.  Normative References
+   [RFC5234]  Crocker, D. and P. Overell, "Augmented BNF for Syntax
+              Specifications: ABNF", STD 68, RFC 5234, January 2008,
+              <http:/www.rfc-editor.org/info/rfc5234>.
+Author's Address
+   Paul Kyzivat
+   Massachusetts
+   United States
+   EMail: pkyzivat@alum.mit.edu
+Kyzivat                      Standards Track                    [Page 4]

omextra/text/abnf/internal.py ADDED Viewed

@@ -0,0 +1,32 @@
+import re
+import typing as ta
+from omlish import lang
+from .ops import LeafOp
+from .ops import Op
+##
+class InternalOp(Op, lang.Abstract):
+    pass
+##
+@ta.final
+class Regex(InternalOp, LeafOp, lang.Final):
+    def __init__(self, pat: re.Pattern) -> None:
+        super().__init__()
+        self._pat = pat
+    @property
+    def pat(self) -> re.Pattern:
+        return self._pat
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}@{id(self):x}({self._pat!r})'

omextra/text/abnf/meta.py CHANGED Viewed

@@ -9,20 +9,20 @@ from omlish import lang
 from .base import Grammar
 from .base import Match
-from .base import Parser
+from .base import Op
 from .base import Rule
 from .core import CORE_RULES
 from .errors import AbnfGrammarParseError
-from .parsers import Repeat
-from .parsers import concat
-from .parsers import either
-from .parsers import literal
-from .parsers import option
-from .parsers import repeat
-from .parsers import rule
-from .utils import fix_grammar_ws
+from .ops import Repeat
+from .ops import concat
+from .ops import either
+from .ops import literal
+from .ops import option
+from .ops import repeat
+from .ops import rule
+from .utils import fix_ws
 from .utils import parse_rules
-from .visitors import RuleVisitor
+from .visitors import RuleMatchVisitor
 ##
@@ -125,7 +125,8 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
                 either(
                     rule('WSP'),
                     rule('VCHAR'),
-                )),
+                ),
+            ),
             rule('CRLF'),
         ),
     ),
@@ -418,7 +419,7 @@ META_GRAMMAR = Grammar(
 ##
-class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
+class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
     def __init__(self, source: str) -> None:
         super().__init__()
@@ -432,36 +433,36 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
     class QuotedString(lang.Final):
         s: str
-    @RuleVisitor.register('rule')
+    @RuleMatchVisitor.register('rule')
     def visit_rule_rule(self, m: Match) -> ta.Any:
         rn_m, _, el_m = m.children
         rn = check.isinstance(self.visit_match(rn_m), self.RuleName).s
         el = self.visit_match(el_m)
         return Rule(rn, el)
-    @RuleVisitor.register('rulename')
+    @RuleMatchVisitor.register('rulename')
     def visit_rulename_rule(self, m: Match) -> ta.Any:
         return self.RuleName(self._source[m.start:m.end])
-    @RuleVisitor.register('elements')
+    @RuleMatchVisitor.register('elements')
     def visit_elements_rule(self, m: Match) -> ta.Any:
         return self.visit_match(check.single(m.children))
-    @RuleVisitor.register('alternation')
+    @RuleMatchVisitor.register('alternation')
     def visit_alternation_rule(self, m: Match) -> ta.Any:
         if len(m.children) == 1:
             return self.visit_match(m.children[0])
         else:
             return either(*map(self.visit_match, m.children))
-    @RuleVisitor.register('concatenation')
+    @RuleMatchVisitor.register('concatenation')
     def visit_concatenation_rule(self, m: Match) -> ta.Any:
         if len(m.children) == 1:
             return self.visit_match(m.children[0])
         else:
             return concat(*map(self.visit_match, m.children))
-    @RuleVisitor.register('repetition')
+    @RuleMatchVisitor.register('repetition')
     def visit_repetition_rule(self, m: Match) -> ta.Any:
         if len(m.children) == 2:
             ti_m, el_m = m.children
@@ -473,7 +474,7 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
         else:
             raise ValueError(m)
-    @RuleVisitor.register('repeat')
+    @RuleMatchVisitor.register('repeat')
     def visit_repeat_rule(self, m: Match) -> ta.Any:
         s = check.non_empty_str(self._source[m.start:m.end])
         if s == '*':
@@ -488,30 +489,30 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
         else:
             return Repeat.Times(n := int(s), n)
-    @RuleVisitor.register('element')
+    @RuleMatchVisitor.register('element')
     def visit_element_rule(self, m: Match) -> ta.Any:
         c = self.visit_match(check.single(m.children))
-        if isinstance(c, Parser):
+        if isinstance(c, Op):
             return c
         elif isinstance(c, self.RuleName):
             return rule(c.s)
         else:
             raise TypeError(c)
-    @RuleVisitor.register('group')
+    @RuleMatchVisitor.register('group')
     def visit_group_rule(self, m: Match) -> ta.Any:
         return self.visit_match(check.single(m.children))
-    @RuleVisitor.register('option')
+    @RuleMatchVisitor.register('option')
     def visit_option_rule(self, m: Match) -> ta.Any:
         c = self.visit_match(check.single(m.children))
-        return option(check.isinstance(c, Parser))
+        return option(check.isinstance(c, Op))
-    @RuleVisitor.register('num-val')
+    @RuleMatchVisitor.register('num-val')
     def visit_num_val_rule(self, m: Match) -> ta.Any:
         return self.visit_match(check.single(m.children))
-    def _parse_num_val(self, s: str, base: int) -> Parser:
+    def _parse_num_val(self, s: str, base: int) -> Op:
         if '-' in s:
             check.not_in('.', s)
             lo, hi = [chr(int(p, base)) for p in s.split('-')]
@@ -524,29 +525,29 @@ class MetaGrammarRuleVisitor(RuleVisitor[ta.Any]):
             c = chr(int(s, base))
             return literal(c, c)
-    @RuleVisitor.register('dec-val')
+    @RuleMatchVisitor.register('dec-val')
     def visit_dec_val_rule(self, m: Match) -> ta.Any:
         return self._parse_num_val(self._source[m.start + 1:m.end], 10)
-    @RuleVisitor.register('hex-val')
+    @RuleMatchVisitor.register('hex-val')
     def visit_hex_val_rule(self, m: Match) -> ta.Any:
         return self._parse_num_val(self._source[m.start + 1:m.end], 16)
-    @RuleVisitor.register('char-val')
+    @RuleMatchVisitor.register('char-val')
     def visit_char_val_rule(self, m: Match) -> ta.Any:
         return self.visit_match(check.single(m.children))
-    @RuleVisitor.register('case-sensitive-string')
+    @RuleMatchVisitor.register('case-sensitive-string')
     def visit_case_sensitive_string_rule(self, m: Match) -> ta.Any:
         c = self.visit_match(check.single(m.children))
         return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=True)
-    @RuleVisitor.register('case-insensitive-string')
+    @RuleMatchVisitor.register('case-insensitive-string')
     def visit_case_insensitive_string_rule(self, m: Match) -> ta.Any:
         c = self.visit_match(check.single(m.children))
         return literal(check.isinstance(c, self.QuotedString).s, case_sensitive=False)
-    @RuleVisitor.register('quoted-string')
+    @RuleMatchVisitor.register('quoted-string')
     def visit_quoted_string_rule(self, m: Match) -> ta.Any:
         check.state(m.end - m.start > 2)
         check.state(self._source[m.start] == '"')
@@ -561,7 +562,7 @@ def parse_grammar(
         root: str | None = None,
         **kwargs: ta.Any,
 ) -> Grammar:
-    source = fix_grammar_ws(source)
+    source = fix_ws(source)
     if (mg_m := parse_rules(
             META_GRAMMAR,
@@ -571,10 +572,10 @@ def parse_grammar(
     )) is None:
         raise AbnfGrammarParseError(source)
-    check.isinstance(mg_m.parser, Repeat)
+    check.isinstance(mg_m.op, Repeat)
-    mg_rv = MetaGrammarRuleVisitor(source)
-    rules = [mg_rv.visit_match(gg_cm) for gg_cm in mg_m.children]
+    mg_rmv = MetaGrammarRuleMatchVisitor(source)
+    rules = [mg_rmv.visit_match(gg_cm) for gg_cm in mg_m.children]
     return Grammar(
         *rules,

omextra 0.0.0.dev494__py3-none-any.whl → 0.0.0.dev496__py3-none-any.whl

omextra 0.0.0.dev494py3-none-any.whl → 0.0.0.dev496py3-none-any.whl