PyPI - omextra - Versions diffs - 0.0.0.dev496__py3-none-any.whl → 0.0.0.dev498__py3-none-any.whl - Mend

omextra 0.0.0.dev496py3-none-any.whl → 0.0.0.dev498py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

omextra/text/abnf/__init__.py +51 -18
omextra/text/abnf/_dataclasses.py +246 -0
omextra/text/abnf/base.py +21 -257
omextra/text/abnf/core.py +22 -10
omextra/text/abnf/grammars.py +235 -0
omextra/text/abnf/internal.py +1 -1
omextra/text/abnf/matches.py +145 -0
omextra/text/abnf/meta.py +45 -12
omextra/text/abnf/ops.py +76 -9
omextra/text/abnf/opto.py +257 -0
omextra/text/abnf/parsing.py +134 -20
omextra/text/abnf/utils.py +38 -41
omextra/text/abnf/visitors.py +1 -1
{omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/METADATA +2 -2
{omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/RECORD +19 -16
{omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/WHEEL +0 -0
{omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/entry_points.txt +0 -0
{omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/licenses/LICENSE +0 -0
{omextra-0.0.0.dev496.dist-info → omextra-0.0.0.dev498.dist-info}/top_level.txt +0 -0

omextra/text/abnf/core.py CHANGED Viewed

@@ -3,13 +3,15 @@ https://datatracker.ietf.org/doc/html/rfc5234
 """
 import typing as ta
-from .base import Grammar
-from .base import Rule
+from .grammars import Channel
+from .grammars import Grammar
+from .grammars import Rule
 from .ops import concat
 from .ops import either
 from .ops import literal
 from .ops import repeat
 from .ops import rule
+from .opto import optimize_grammar
 ##
@@ -23,6 +25,7 @@ CORE_RULES: ta.Sequence[Rule] = [
             literal('\x41', '\x5a'),
             literal('\x61', '\x7a'),
         ),
+        channel=Channel.CONTENT,
     ),
     Rule(
@@ -31,11 +34,13 @@ CORE_RULES: ta.Sequence[Rule] = [
             literal('0'),
             literal('1'),
         ),
+        channel=Channel.CONTENT,
     ),
     Rule(
         'CHAR',
         literal('\x01', '\x7f'),
+        channel=Channel.CONTENT,
     ),
     Rule(
@@ -44,12 +49,13 @@ CORE_RULES: ta.Sequence[Rule] = [
             literal('\x00', '\x1f'),
             literal('\x7f', case_sensitive=True),
         ),
+        channel=Channel.CONTENT,
     ),
     Rule(
         'CR',
         literal('\x0d', case_sensitive=True),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
@@ -58,17 +64,19 @@ CORE_RULES: ta.Sequence[Rule] = [
             rule('CR'),
             rule('LF'),
         ),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
         'DIGIT',
         literal('\x30', '\x39'),
+        channel=Channel.CONTENT,
     ),
     Rule(
         'DQUOTE',
         literal('\x22', case_sensitive=True),
+        channel=Channel.CONTENT,
     ),
     Rule(
@@ -82,18 +90,19 @@ CORE_RULES: ta.Sequence[Rule] = [
             literal('E'),
             literal('F'),
         ),
+        channel=Channel.CONTENT,
     ),
     Rule(
         'HTAB',
         literal('\x09', case_sensitive=True),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
         'LF',
         literal('\x0a', case_sensitive=True),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
@@ -107,23 +116,25 @@ CORE_RULES: ta.Sequence[Rule] = [
                 ),
             ),
         ),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
         'OCTET',
         literal('\x00', '\xff'),
+        channel=Channel.CONTENT,
     ),
     Rule(
         'SP',
         literal('\x20', case_sensitive=True),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
         'VCHAR',
         literal('\x21', '\x7e'),
+        channel=Channel.CONTENT,
     ),
     Rule(
@@ -132,10 +143,11 @@ CORE_RULES: ta.Sequence[Rule] = [
             rule('SP'),
             rule('HTAB'),
         ),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
 ]
-CORE_GRAMMAR = Grammar(*CORE_RULES)
+RAW_CORE_GRAMMAR = Grammar(*CORE_RULES)
+CORE_GRAMMAR = optimize_grammar(RAW_CORE_GRAMMAR)

omextra/text/abnf/grammars.py ADDED Viewed

@@ -0,0 +1,235 @@
+import enum
+import typing as ta
+from omlish import check
+from omlish import lang
+from .errors import AbnfError
+from .errors import AbnfIncompleteParseError
+from .matches import Match
+from .matches import longest_match
+from .ops import Op
+with lang.auto_proxy_import(globals()):
+    from . import parsing
+##
+class Channel(enum.Enum):
+    STRUCTURE = enum.auto()
+    CONTENT = enum.auto()
+    COMMENT = enum.auto()
+    SPACE = enum.auto()
+class Rule(lang.Final):
+    def __init__(
+            self,
+            name: str,
+            op: Op,
+            *,
+            channel: Channel = Channel.STRUCTURE,
+    ) -> None:
+        super().__init__()
+        self._name = check.non_empty_str(name)
+        self._op = check.isinstance(op, Op)
+        self._channel = channel
+        self._name_f = name.casefold()
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}({self._name!r}, channel={self._channel.name})'
+    def replace_op(self, op: Op) -> 'Rule':
+        return Rule(
+            self._name,
+            op,
+            channel=self._channel,
+        )
+    @property
+    def name(self) -> str:
+        return self._name
+    @property
+    def name_f(self) -> str:
+        return self._name_f
+    @property
+    def op(self) -> Op:
+        return self._op
+    @property
+    def channel(self) -> Channel:
+        return self._channel
+#
+class RulesCollection(lang.Final, ta.Collection[Rule]):
+    def __init__(self, *rules: ta.Union[Rule, 'RulesCollection']) -> None:
+        super().__init__()
+        rules_set: set[Rule] = set()
+        rules_by_name: dict[str, Rule] = {}
+        rules_by_name_f: dict[str, Rule] = {}
+        rules_by_op: dict[Op, Rule] = {}
+        def add(gr: Rule) -> None:
+            check.isinstance(gr, Rule)
+            check.not_in(gr, rules_set)
+            check.not_in(gr._name, rules_by_name)  # noqa
+            check.not_in(gr._name_f, rules_by_name_f)  # noqa
+            check.not_in(gr._op, rules_by_op)  # noqa
+            rules_set.add(gr)
+            rules_by_name[gr._name] = gr  # noqa
+            rules_by_name_f[gr._name_f] = gr  # noqa
+            rules_by_op[gr._op] = gr  # noqa
+        for e in rules:
+            if isinstance(e, RulesCollection):
+                for c in e:
+                    add(c)
+            else:
+                add(e)
+        self._rules_set = rules_set
+        self._rules_by_name: ta.Mapping[str, Rule] = rules_by_name
+        self._rules_by_name_f: ta.Mapping[str, Rule] = rules_by_name_f
+        self._rules_by_op: ta.Mapping[Op, Rule] = rules_by_op
+    @property
+    def rules_set(self) -> ta.AbstractSet[Rule]:
+        return self._rules_set
+    @property
+    def rules_by_name(self) -> ta.Mapping[str, Rule]:
+        return self._rules_by_name
+    @property
+    def rules_by_name_f(self) -> ta.Mapping[str, Rule]:
+        return self._rules_by_name_f
+    @property
+    def rules_by_op(self) -> ta.Mapping[Op, Rule]:
+        return self._rules_by_op
+    #
+    def __len__(self) -> int:
+        return len(self._rules_set)
+    def __iter__(self) -> ta.Iterator[Rule]:
+        return iter(self._rules_set)
+    def __contains__(self, item: Rule) -> bool:  # type: ignore[override]
+        return item in self._rules_set
+    #
+    def rule(self, name: str) -> Rule | None:
+        return self._rules_by_name_f.get(name.casefold())
+##
+class Grammar(lang.Final):
+    def __init__(
+            self,
+            *rules: Rule | RulesCollection,
+            root: Rule | str | None = None,
+    ) -> None:
+        super().__init__()
+        if len(rules) == 1 and isinstance(r0 := rules[0], RulesCollection):
+            self._rules = r0
+        else:
+            self._rules = RulesCollection(*rules)
+        if isinstance(root, str):
+            root = self._rules.rules_by_name_f[root.casefold()]
+        self._root = root
+    @property
+    def rules(self) -> RulesCollection:
+        return self._rules
+    @property
+    def root(self) -> Rule | None:
+        return self._root
+    #
+    def rule(self, name: str) -> Rule | None:
+        return self._rules.rule(name)
+    def replace_rules(self, *rules: Rule) -> 'Grammar':
+        rc = RulesCollection(*rules)
+        if rc.rules_set == self._rules.rules_set:
+            return self
+        return Grammar(
+            rc,
+            root=self._root.name if self._root is not None else None,
+        )
+    #
+    def iter_parse(
+            self,
+            source: str,
+            root: Rule | str | None = None,
+            *,
+            start: int = 0,
+            debug: int = 0,
+            **kwargs: ta.Any,
+    ) -> ta.Iterator[Match]:
+        if root is None:
+            if (root := self._root) is None:
+                raise AbnfError('No root or default root specified')
+        else:
+            if isinstance(root, str):
+                root = self._rules.rules_by_name_f[root.casefold()]
+            else:
+                root = check.in_(check.isinstance(root, Rule), self._rules)
+        return parsing._iter_parse(  # noqa
+            self,
+            source,
+            root._op,  # noqa
+            start,
+            debug=debug,
+            **kwargs,
+        )
+    def parse(
+            self,
+            source: str,
+            root: str | None = None,
+            *,
+            start: int = 0,
+            complete: bool = False,
+            debug: int = 0,
+            **kwargs: ta.Any,
+    ) -> Match | None:
+        if (match := longest_match(self.iter_parse(
+                source,
+                root,
+                start=start,
+                debug=debug,
+                **kwargs,
+        ))) is None:
+            return None
+        if complete and (match.start, match.end) != (start, len(source)):
+            raise AbnfIncompleteParseError
+        return match

omextra/text/abnf/internal.py CHANGED Viewed

@@ -29,4 +29,4 @@ class Regex(InternalOp, LeafOp, lang.Final):
         return self._pat
     def __repr__(self) -> str:
-        return f'{self.__class__.__name__}@{id(self):x}({self._pat!r})'
+        return f'{self.__class__.__name__}@{id(self):x}({self._pat.pattern!r})'

omextra/text/abnf/matches.py ADDED Viewed

@@ -0,0 +1,145 @@
+import io
+import itertools
+import typing as ta
+from omlish import lang
+from .internal import Regex
+from .ops import CaseInsensitiveStringLiteral
+from .ops import Op
+from .ops import RangeLiteral
+from .ops import RuleRef
+from .ops import StringLiteral
+##
+@ta.final
+class Match(ta.NamedTuple):
+    op: 'Op'
+    start: int
+    end: int
+    children: tuple['Match', ...]
+    @property
+    def length(self) -> int:
+        return self.end - self.start
+    #
+    def __repr__(self) -> str:
+        return (
+            f'{self.__class__.__name__}('
+            f'{self.op._match_repr()}, '  # noqa
+            f'{self.start}, {self.end}'
+            f'{f", {self.children!r}" if self.children else ""})'
+        )
+    def render_to(
+            self,
+            write: ta.Callable[[str], ta.Any],
+            *,
+            indent: int | None = None,
+            _depth: int = 0,
+    ) -> None:
+        ix: str | None = (' ' * (indent * _depth)) if indent is not None else None
+        if ix:
+            write(ix)
+        o = self.op
+        if isinstance(o, (StringLiteral, CaseInsensitiveStringLiteral)):
+            write(f'literal<{self.start}-{self.end}>({o.value!r})')
+        elif isinstance(o, RangeLiteral):
+            write(f'literal<{self.start}-{self.end}>({o.value.lo!r}-{o.value.hi!r})')
+        elif isinstance(o, Regex):
+            write(f'regex<{self.start}-{self.end}>({o.pat.pattern!r})')
+        else:
+            write(f'{o.__class__.__name__.lower()}<{self.start}-{self.end}>')
+            if isinstance(o, RuleRef):
+                write(f':{o.name}')
+            if self.children:
+                write('(')
+                if ix is not None:
+                    write('\n')
+                for i, c in enumerate(self.children):
+                    if i and ix is None:
+                        write(', ')
+                    c.render_to(write, indent=indent, _depth=_depth + 1)
+                    if ix is not None:
+                        write(',\n')
+                if ix:
+                    write(ix)
+                write(')')
+    def render(
+            self,
+            *,
+            indent: int | None = None,
+    ) -> str:
+        sb = io.StringIO()
+        self.render_to(sb.write, indent=indent)
+        return sb.getvalue()
+    def __str__(self) -> str:
+        return self.render()
+    #
+    def replace_children(self, *children: 'Match') -> 'Match':
+        if lang.seqs_identical(children, self.children):
+            return self
+        return self._replace(children=children)
+    def map_children(self, fn: ta.Callable[['Match'], 'Match']) -> 'Match':
+        return self.replace_children(*map(fn, self.children))
+    def flat_map_children(self, fn: ta.Callable[['Match'], ta.Iterable['Match']]) -> 'Match':
+        return self.replace_children(*itertools.chain.from_iterable(map(fn, self.children)))
+##
+def longest_match(ms: ta.Iterable[Match]) -> Match | None:
+    bm: Match | None = None
+    bl = 0
+    for m in ms:
+        l = m.length
+        if bm is None or l > bl:
+            bm, bl = m, l
+    return bm
+def filter_matches(
+        fn: ta.Callable[[Match], bool],
+        m: Match,
+        *,
+        keep_children: bool = False,
+) -> Match:
+    def inner(x: Match) -> ta.Iterable[Match]:
+        if fn(x):
+            return (rec(x),)
+        elif keep_children:
+            return lang.flatten(inner(c) for c in x.children)
+        else:
+            return ()
+    def rec(c: Match) -> Match:
+        return c.flat_map_children(inner)
+    return rec(m)

omextra/text/abnf/meta.py CHANGED Viewed

@@ -7,12 +7,13 @@ from omlish import check
 from omlish import dataclasses as dc
 from omlish import lang
-from .base import Grammar
-from .base import Match
 from .base import Op
-from .base import Rule
 from .core import CORE_RULES
 from .errors import AbnfGrammarParseError
+from .grammars import Channel
+from .grammars import Grammar
+from .grammars import Rule
+from .matches import Match
 from .ops import Repeat
 from .ops import concat
 from .ops import either
@@ -20,8 +21,10 @@ from .ops import literal
 from .ops import option
 from .ops import repeat
 from .ops import rule
+from .opto import optimize_grammar
+from .utils import filter_match_channels
 from .utils import fix_ws
-from .utils import parse_rules
+from .utils import only_match_rules
 from .visitors import RuleMatchVisitor
@@ -105,7 +108,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
                 rule('WSP'),
             ),
         ),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
@@ -114,7 +117,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
             rule('comment'),
             rule('CRLF'),
         ),
-        insignificant=True,
+        channel=Channel.SPACE,
     ),
     Rule(
@@ -129,6 +132,7 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
             ),
             rule('CRLF'),
         ),
+        channel=Channel.COMMENT,
     ),
     Rule(
@@ -409,12 +413,21 @@ META_GRAMMAR_RULES: ta.Sequence[Rule] = [
 ]
-META_GRAMMAR = Grammar(
+RAW_META_GRAMMAR = Grammar(
     *CORE_RULES,
     *META_GRAMMAR_RULES,
     root='rulelist',
 )
+META_GRAMMAR = optimize_grammar(
+    RAW_META_GRAMMAR,
+    inline_channels=(
+        Channel.CONTENT,
+        Channel.COMMENT,
+        Channel.SPACE,
+    ),
+)
 ##
@@ -555,30 +568,50 @@ class MetaGrammarRuleMatchVisitor(RuleMatchVisitor[ta.Any]):
         return self.QuotedString(self._source[m.start + 1:m.end - 1])
+##
 def parse_grammar(
         source: str,
         *,
-        no_core_rules: bool = False,
         root: str | None = None,
+        no_core_rules: bool = False,
+        no_optimize: bool = False,
         **kwargs: ta.Any,
 ) -> Grammar:
     source = fix_ws(source)
-    if (mg_m := parse_rules(
-            META_GRAMMAR,
+    if (mg_m := META_GRAMMAR.parse(
             source,
             complete=True,
             **kwargs,
     )) is None:
         raise AbnfGrammarParseError(source)
+    mg_m = only_match_rules(mg_m)
+    mg_m = filter_match_channels(
+        mg_m,
+        META_GRAMMAR,
+        keep=(Channel.STRUCTURE,),
+        keep_children=True,
+    )
     check.isinstance(mg_m.op, Repeat)
     mg_rmv = MetaGrammarRuleMatchVisitor(source)
-    rules = [mg_rmv.visit_match(gg_cm) for gg_cm in mg_m.children]
+    rules = [
+        check.isinstance(mg_rmv.visit_match(gg_cm), Rule)
+        for gg_cm in mg_m.children
+    ]
-    return Grammar(
+    gram = Grammar(
         *rules,
         *(CORE_RULES if not no_core_rules else []),
         root=root,
     )
+    if not no_optimize:
+        gram = optimize_grammar(gram)
+    return gram

omextra 0.0.0.dev496__py3-none-any.whl → 0.0.0.dev498__py3-none-any.whl

omextra 0.0.0.dev496py3-none-any.whl → 0.0.0.dev498py3-none-any.whl