PyPI - omlish - Versions diffs - 0.0.0.dev80__py3-none-any.whl → 0.0.0.dev82__py3-none-any.whl - Mend

omlish 0.0.0.dev80py3-none-any.whl → 0.0.0.dev82py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of omlish might be problematic. Click here for more details.

Files changed (35) hide show

omlish/__about__.py +4 -4
omlish/dataclasses/impl/__init__.py +8 -0
omlish/dataclasses/impl/params.py +3 -0
omlish/dataclasses/impl/slots.py +61 -7
omlish/formats/json/__init__.py +8 -1
omlish/formats/json/backends/__init__.py +7 -0
omlish/formats/json/backends/base.py +38 -0
omlish/formats/json/backends/default.py +10 -0
omlish/formats/json/backends/jiter.py +25 -0
omlish/formats/json/backends/orjson.py +46 -2
omlish/formats/json/backends/std.py +39 -0
omlish/formats/json/backends/ujson.py +49 -0
omlish/formats/json/cli.py +125 -31
omlish/formats/json/consts.py +22 -0
omlish/formats/json/encoding.py +17 -0
omlish/formats/json/json.py +9 -39
omlish/formats/json/render.py +49 -24
omlish/formats/json/stream/__init__.py +0 -0
omlish/formats/json/stream/build.py +113 -0
omlish/formats/json/stream/lex.py +285 -0
omlish/formats/json/stream/parse.py +244 -0
omlish/formats/json/stream/render.py +119 -0
omlish/genmachine.py +56 -10
omlish/lang/resources.py +6 -1
omlish/marshal/base.py +2 -0
omlish/marshal/newtypes.py +24 -0
omlish/marshal/standard.py +4 -0
omlish/reflect/__init__.py +1 -0
omlish/reflect/types.py +6 -1
{omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/METADATA +5 -5
{omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/RECORD +35 -24
{omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/LICENSE +0 -0
{omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/WHEEL +0 -0
{omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/entry_points.txt +0 -0
{omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/top_level.txt +0 -0

omlish/formats/json/encoding.py ADDED Viewed

@@ -0,0 +1,17 @@
+import json
+detect_encoding = json.detect_encoding
+def decodes(s: str | bytes | bytearray) -> str:
+    if isinstance(s, str):
+        if s.startswith('\ufeff'):
+            raise json.JSONDecodeError('Unexpected UTF-8 BOM (decode using utf-8-sig)', s, 0)
+        return s
+    elif isinstance(s, (bytes, bytearray)):
+        return s.decode(detect_encoding(s), 'surrogatepass')
+    else:
+        raise TypeError(f'the JSON object must be str, bytes or bytearray, not {s.__class__.__name__}')

omlish/formats/json/json.py CHANGED Viewed

@@ -1,47 +1,17 @@
-"""
-TODO:
- - backend abstr
- - streaming
-"""
-import functools
-import json as _json
-import typing as ta
+from .backends import DEFAULT_BACKED
 ##
-dump = _json.dump
-dumps = _json.dumps
+dump = DEFAULT_BACKED.dump
+dumps = DEFAULT_BACKED.dumps
-detect_encoding = _json.detect_encoding
+load = DEFAULT_BACKED.load
+loads = DEFAULT_BACKED.loads
-load = _json.load
-loads = _json.loads
+dump_pretty = DEFAULT_BACKED.dump_pretty
+dumps_pretty = DEFAULT_BACKED.dumps_pretty
-##
-PRETTY_INDENT = 2
-PRETTY_KWARGS: ta.Mapping[str, ta.Any] = dict(
-    indent=PRETTY_INDENT,
-)
-dump_pretty: ta.Callable[..., bytes] = functools.partial(dump, **PRETTY_KWARGS)  # type: ignore
-dumps_pretty: ta.Callable[..., str] = functools.partial(dumps, **PRETTY_KWARGS)
-##
-COMPACT_SEPARATORS = (',', ':')
-COMPACT_KWARGS: ta.Mapping[str, ta.Any] = dict(
-    indent=None,
-    separators=COMPACT_SEPARATORS,
-)
-dump_compact: ta.Callable[..., bytes] = functools.partial(dump, **COMPACT_KWARGS)  # type: ignore
-dumps_compact: ta.Callable[..., str] = functools.partial(dumps, **COMPACT_KWARGS)
+dump_compact = DEFAULT_BACKED.dump_compact
+dumps_compact = DEFAULT_BACKED.dumps_compact

omlish/formats/json/render.py CHANGED Viewed

@@ -1,40 +1,55 @@
+import abc
+import dataclasses as dc
 import enum
 import io
 import json
 import typing as ta
+from ... import lang
-class JsonRenderer:
+I = ta.TypeVar('I')
+class JsonRendererOut(ta.Protocol):
+    def write(self, s: str) -> ta.Any: ...
+class AbstractJsonRenderer(lang.Abstract, ta.Generic[I]):
     class State(enum.Enum):
         VALUE = enum.auto()
         KEY = enum.auto()
+    @dc.dataclass(frozen=True, kw_only=True)
+    class Options:
+        indent: int | str | None = None
+        separators: tuple[str, str] | None = None
+        sort_keys: bool = False
+        style: ta.Callable[[ta.Any, 'AbstractJsonRenderer.State'], tuple[str, str]] | None = None
     def __init__(
             self,
-            out: ta.TextIO,
-            *,
-            indent: int | str | None = None,
-            separators: tuple[str, str] | None = None,
-            sort_keys: bool = False,
-            style: ta.Callable[[ta.Any, State], tuple[str, str]] | None = None,
+            out: JsonRendererOut,
+            opts: Options = Options(),
     ) -> None:
         super().__init__()
         self._out = out
-        if isinstance(indent, (str, int)):
-            self._indent = (' ' * indent) if isinstance(indent, int) else indent
+        self._opts = opts
+        separators = opts.separators
+        if isinstance(opts.indent, (str, int)):
+            self._indent = (' ' * opts.indent) if isinstance(opts.indent, int) else opts.indent
             self._endl = '\n'
             if separators is None:
                 separators = (',', ': ')
-        elif indent is None:
+        elif opts.indent is None:
             self._indent = self._endl = ''
             if separators is None:
                 separators = (', ', ': ')
         else:
-            raise TypeError(indent)
+            raise TypeError(opts.indent)
         self._comma, self._colon = separators
-        self._sort_keys = sort_keys
-        self._style = style
         self._level = 0
@@ -54,9 +69,25 @@ class JsonRenderer:
             if self._level:
                 self._write(self._indent * self._level)
-    def _render(self, o: ta.Any, state: State = State.VALUE) -> None:
-        if self._style is not None:
-            pre, post = self._style(o, state)
+    @abc.abstractmethod
+    def render(self, i: I) -> None:
+        raise NotImplementedError
+    @classmethod
+    def render_str(cls, i: I, **kwargs: ta.Any) -> str:
+        out = io.StringIO()
+        cls(out, cls.Options(**kwargs)).render(i)
+        return out.getvalue()
+class JsonRenderer(AbstractJsonRenderer[ta.Any]):
+    def _render(
+            self,
+            o: ta.Any,
+            state: AbstractJsonRenderer.State = AbstractJsonRenderer.State.VALUE,
+    ) -> None:
+        if self._opts.style is not None:
+            pre, post = self._opts.style(o, state)
             self._write(pre)
         else:
             post = None
@@ -71,13 +102,13 @@ class JsonRenderer:
             self._write('{')
             self._level += 1
             items = list(o.items())
-            if self._sort_keys:
+            if self._opts.sort_keys:
                 items.sort(key=lambda t: t[0])
             for i, (k, v) in enumerate(items):
                 if i:
                     self._write(self._comma)
                 self._write_indent()
-                self._render(k, JsonRenderer.State.KEY)
+                self._render(k, AbstractJsonRenderer.State.KEY)
                 self._write(self._colon)
                 self._render(v)
             self._level -= 1
@@ -106,9 +137,3 @@ class JsonRenderer:
     def render(self, o: ta.Any) -> None:
         self._render(o)
-    @classmethod
-    def render_str(cls, o: ta.Any, **kwargs: ta.Any) -> str:
-        out = io.StringIO()
-        cls(out, **kwargs).render(o)
-        return out.getvalue()

omlish/formats/json/stream/__init__.py ADDED Viewed

File without changes

omlish/formats/json/stream/build.py ADDED Viewed

@@ -0,0 +1,113 @@
+import typing as ta
+from ....genmachine import GenMachine
+from .lex import SCALAR_VALUE_TYPES
+from .parse import BeginArray
+from .parse import BeginObject
+from .parse import EndArray
+from .parse import EndObject
+from .parse import JsonStreamObject
+from .parse import JsonStreamParserEvent
+from .parse import Key
+##
+class JsonObjectBuilder(GenMachine[JsonStreamParserEvent, ta.Any]):
+    def __init__(
+            self,
+            *,
+            yield_object_lists: bool = False,
+    ) -> None:
+        self._stack: list[JsonStreamObject | list | Key] = []
+        self._yield_object_lists = yield_object_lists
+        super().__init__(self._do())
+    def _do(self):
+        stk = self._stack
+        def emit_value(v):
+            if not stk:
+                return (v,)
+            tv = stk[-1]
+            if isinstance(tv, Key):
+                stk.pop()
+                if not stk:
+                    raise self.StateError
+                tv2 = stk[-1]
+                if not isinstance(tv2, JsonStreamObject):
+                    raise self.StateError
+                tv2.append((tv.key, v))
+                return ()
+            elif isinstance(tv, list):
+                tv.append(v)
+                return ()
+            else:
+                raise self.StateError
+        while True:
+            try:
+                e = yield None
+            except GeneratorExit:
+                if stk:
+                    raise self.StateError from None
+                else:
+                    raise
+            #
+            if isinstance(e, SCALAR_VALUE_TYPES):
+                if t := emit_value(e):
+                    yield t
+                continue
+            #
+            elif e is BeginObject:
+                stk.append(JsonStreamObject())
+                continue
+            elif isinstance(e, Key):
+                if not stk or not isinstance(stk[-1], JsonStreamObject):
+                    raise self.StateError
+                stk.append(e)
+                continue
+            elif e is EndObject:
+                tv: ta.Any
+                if not stk or not isinstance(tv := stk.pop(), JsonStreamObject):
+                    raise self.StateError
+                if not self._yield_object_lists:
+                    tv = dict(tv)
+                if t := emit_value(tv):
+                    yield t
+                continue
+            #
+            elif e is BeginArray:
+                stk.append([])
+                continue
+            elif e is EndArray:
+                if not stk or not isinstance(tv := stk.pop(), list):
+                    raise self.StateError
+                if t := emit_value(tv):
+                    yield t
+                continue
+            #
+            else:
+                raise TypeError(e)

omlish/formats/json/stream/lex.py ADDED Viewed

@@ -0,0 +1,285 @@
+"""
+TODO:
+ - max buf size
+ - max recursion depth
+ - mark start pos of tokens, currently returning end
+"""
+import dataclasses as dc
+import io
+import json
+import re
+import typing as ta
+from .... import check
+from ....genmachine import GenMachine
+##
+ValueTokenKind: ta.TypeAlias = ta.Literal[
+    'STRING',
+    'NUMBER',
+    'SPECIAL_NUMBER',
+    'BOOLEAN',
+    'NULL',
+]
+VALUE_TOKEN_KINDS = frozenset(check.isinstance(a, str) for a in ta.get_args(ValueTokenKind))
+ControlTokenKind: ta.TypeAlias = ta.Literal[
+    'LBRACE',
+    'RBRACE',
+    'LBRACKET',
+    'RBRACKET',
+    'COMMA',
+    'COLON',
+]
+TokenKind: ta.TypeAlias = ValueTokenKind | ControlTokenKind
+#
+ScalarValue: ta.TypeAlias = str | float | int | None
+SCALAR_VALUE_TYPES: tuple[type, ...] = tuple(
+    check.isinstance(e, type) if e is not None else type(None)
+    for e in ta.get_args(ScalarValue)
+)
+##
+class Token(ta.NamedTuple):
+    kind: TokenKind
+    value: ScalarValue
+    raw: str | None
+    ofs: int
+    line: int
+    col: int
+    def __iter__(self):
+        raise TypeError
+NUMBER_PAT = re.compile(r'-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?')
+CONTROL_TOKENS: ta.Mapping[str, TokenKind] = {
+    '{': 'LBRACE',
+    '}': 'RBRACE',
+    '[': 'LBRACKET',
+    ']': 'RBRACKET',
+    ',': 'COMMA',
+    ':': 'COLON',
+}
+CONST_TOKENS: ta.Mapping[str, tuple[TokenKind, str | float | None]] = {
+    'NaN': ('SPECIAL_NUMBER', float('nan')),
+    'Infinity': ('SPECIAL_NUMBER', float('inf')),
+    '-Infinity': ('SPECIAL_NUMBER', float('-inf')),
+    'true': ('BOOLEAN', True),
+    'false': ('BOOLEAN', False),
+    'null': ('NULL', None),
+}
+##
+@dc.dataclass(frozen=True)
+class JsonLexError(Exception):
+    message: str
+    ofs: int
+    line: int
+    col: int
+class JsonStreamLexer(GenMachine[str, Token]):
+    def __init__(
+            self,
+            *,
+            include_raw: bool = False,
+    ) -> None:
+        self._include_raw = include_raw
+        self._ofs = 0
+        self._line = 0
+        self._col = 0
+        self._buf = io.StringIO()
+        super().__init__(self._do_main())
+    def _char_in(self, c: str) -> str:
+        if c and len(c) != 1:
+            raise ValueError(c)
+        self._ofs += 1
+        if c == '\n':
+            self._line += 1
+            self._col = 0
+        else:
+            self._col += 1
+        return c
+    def _make_tok(
+            self,
+            kind: TokenKind,
+            value: ScalarValue,
+            raw: str,
+    ) -> ta.Sequence[Token]:
+        tok = Token(
+            kind,
+            value,
+            raw if self._include_raw else None,
+            self._ofs,
+            self._line,
+            self._col,
+        )
+        return (tok,)
+    def _flip_buf(self) -> str:
+        raw = self._buf.getvalue()
+        self._buf.seek(0)
+        self._buf.truncate()
+        return raw
+    def _raise(self, msg: str) -> ta.NoReturn:
+        raise JsonLexError(msg, self._ofs, self._line, self._col)
+    def _do_main(self):
+        while True:
+            c = self._char_in((yield None))  # noqa
+            if not c:
+                return None
+            if c.isspace():
+                continue
+            if c in CONTROL_TOKENS:
+                yield self._make_tok(CONTROL_TOKENS[c], c, c)
+                continue
+            if c == '"':
+                return self._do_string()
+            if c.isdigit() or c == '-':
+                return self._do_number(c)
+            if c in 'tfnIN':
+                return self._do_const(c)
+            self._raise(f'Unexpected character: {c}')
+    def _do_string(self):
+        self._buf.write('"')
+        last = None
+        while True:
+            try:
+                c = self._char_in((yield None))  # noqa
+            except GeneratorExit:
+                self._raise('Unexpected end of input')
+            if not c:
+                raise NotImplementedError
+            self._buf.write(c)
+            if c == '"' and last != '\\':
+                break
+            last = c
+        raw = self._flip_buf()
+        sv = json.loads(raw)
+        yield self._make_tok('STRING', sv, raw)
+        return self._do_main()
+    def _do_number(self, c: str):
+        self._buf.write(c)
+        while True:
+            try:
+                c = self._char_in((yield None))  # noqa
+            except GeneratorExit:
+                self._raise('Unexpected end of input')
+            if not c:
+                break
+            if not (c.isdigit() or c in '.eE+-'):
+                break
+            self._buf.write(c)
+        raw = self._flip_buf()
+        #
+        if not NUMBER_PAT.fullmatch(raw):
+            # Can only be -Infinity
+            if not c:
+                self._raise('Unexpected end of input')
+            raw += c
+            try:
+                for _ in range(7):
+                    raw += self._char_in((yield None))  # noqa
+            except GeneratorExit:
+                self._raise('Unexpected end of input')
+            if raw != '-Infinity':
+                self._raise(f'Invalid number format: {raw}')
+            tk, tv = CONST_TOKENS[raw]
+            yield self._make_tok(tk, tv, raw)
+            return self._do_main()
+        #
+        if '.' in raw or 'e' in raw or 'E' in raw:
+            nv = float(raw)
+        else:
+            nv = int(raw)
+        yield self._make_tok('NUMBER', nv, raw)
+        #
+        if not c:
+            return None
+        if c in CONTROL_TOKENS:
+            yield self._make_tok(CONTROL_TOKENS[c], c, c)
+        elif not c.isspace():
+            self._raise(f'Unexpected character after number: {c}')
+        return self._do_main()
+    def _do_const(self, c: str):
+        raw = c
+        while True:
+            try:
+                raw += self._char_in((yield None))  # noqa
+            except GeneratorExit:
+                self._raise('Unexpected end of input')
+            if raw in CONST_TOKENS:
+                break
+            if len(raw) > 8:  # None of the keywords are longer than 8 characters
+                self._raise(f'Invalid literal: {raw}')
+        tk, tv = CONST_TOKENS[raw]
+        yield self._make_tok(tk, tv, raw)
+        return self._do_main()

omlish 0.0.0.dev80__py3-none-any.whl → 0.0.0.dev82__py3-none-any.whl

Potentially problematic release.

omlish 0.0.0.dev80py3-none-any.whl → 0.0.0.dev82py3-none-any.whl