PyPI - omlish - Versions diffs - 0.0.0.dev162__py3-none-any.whl → 0.0.0.dev164__py3-none-any.whl - Mend

omlish 0.0.0.dev162py3-none-any.whl → 0.0.0.dev164py3-none-any.whl

Files changed (35) hide show

omlish/.manifests.json +228 -0
omlish/__about__.py +2 -2
omlish/codecs/__init__.py +69 -0
omlish/codecs/base.py +102 -0
omlish/codecs/bytes.py +119 -0
omlish/codecs/chain.py +23 -0
omlish/codecs/funcs.py +28 -0
omlish/codecs/registry.py +139 -0
omlish/codecs/standard.py +4 -0
omlish/codecs/text.py +217 -0
omlish/formats/cbor.py +31 -0
omlish/formats/codecs.py +93 -0
omlish/formats/json/codecs.py +33 -0
omlish/formats/json5.py +31 -0
omlish/formats/pickle.py +31 -0
omlish/formats/toml.py +17 -0
omlish/formats/yaml.py +18 -0
omlish/io/compress/brotli.py +15 -1
omlish/io/compress/bz2.py +14 -0
omlish/io/compress/codecs.py +58 -0
omlish/io/compress/gzip.py +11 -0
omlish/io/compress/lz4.py +14 -0
omlish/io/compress/lzma.py +14 -0
omlish/io/compress/snappy.py +14 -0
omlish/io/compress/zlib.py +14 -0
omlish/io/compress/zstd.py +14 -0
omlish/lang/__init__.py +1 -0
omlish/lang/functions.py +11 -0
omlish/manifests/load.py +44 -6
{omlish-0.0.0.dev162.dist-info → omlish-0.0.0.dev164.dist-info}/METADATA +1 -1
{omlish-0.0.0.dev162.dist-info → omlish-0.0.0.dev164.dist-info}/RECORD +35 -20
{omlish-0.0.0.dev162.dist-info → omlish-0.0.0.dev164.dist-info}/LICENSE +0 -0
{omlish-0.0.0.dev162.dist-info → omlish-0.0.0.dev164.dist-info}/WHEEL +0 -0
{omlish-0.0.0.dev162.dist-info → omlish-0.0.0.dev164.dist-info}/entry_points.txt +0 -0
{omlish-0.0.0.dev162.dist-info → omlish-0.0.0.dev164.dist-info}/top_level.txt +0 -0

omlish/codecs/registry.py ADDED Viewed

@@ -0,0 +1,139 @@
+import contextlib
+import importlib
+import threading
+import typing as ta
+from .. import cached
+from .. import check
+from .. import lang
+from .base import Codec
+from .base import LazyLoadedCodec
+from .standard import STANDARD_CODECS
+if ta.TYPE_CHECKING:
+    from ..manifests import load as manifest_load
+else:
+    manifest_load = lang.proxy_import('..manifests.load', __package__)
+##
+class CodecRegistry:
+    def __init__(
+            self,
+            *,
+            late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
+    ) -> None:
+        super().__init__()
+        self._late_load_callbacks = late_load_callbacks
+        self._lock = threading.RLock()
+        self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
+        self._names_by_alias: dict[str, str] = {}
+        self._names_by_cls: dict[type, list[str]] = {}
+    def _late_load(self) -> None:
+        if self._late_load_callbacks:
+            for cb in self._late_load_callbacks:
+                cb(self)
+            self._late_load_callbacks = None
+    @contextlib.contextmanager
+    def _lock_and_load(self) -> ta.Iterator[None]:
+        with self._lock:
+            self._late_load()
+            yield
+    def _post_load(self, codec: Codec) -> None:
+        for t in type(codec).__mro__:
+            if t is not object:
+                self._names_by_cls.setdefault(t, []).append(codec.name)
+    def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
+        with self._lock:
+            for codec in codecs:
+                for n in {codec.name, *(codec.aliases or [])}:
+                    if n in self._names_by_alias:
+                        raise KeyError(n)
+            for codec in codecs:
+                self._by_name[codec.name] = codec
+                for n in {codec.name, *(codec.aliases or [])}:
+                    self._names_by_alias[n] = codec.name
+                if isinstance(codec, Codec):
+                    self._post_load(codec)
+        return self
+    def lookup(self, name_or_alias: str) -> Codec:
+        with self._lock_and_load():
+            name = self._names_by_alias[name_or_alias.replace('_', '-')]
+            codec_or_lazy = self._by_name[name]
+            if isinstance(codec_or_lazy, LazyLoadedCodec):
+                mod = importlib.import_module(codec_or_lazy.mod_name)
+                codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
+                self._by_name[name] = codec
+                self._post_load(codec)
+            else:
+                codec = check.isinstance(codec_or_lazy, Codec)
+            return codec
+    def lookup_type(self, cls: type) -> list[Codec]:
+        with self._lock_and_load():
+            return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
+    def all(self) -> frozenset[str]:
+        with self._lock_and_load():
+            return frozenset(self._by_name)
+##
+def _install_standard_codecs(registry: CodecRegistry) -> None:
+    registry.register(*STANDARD_CODECS)
+##
+@cached.function
+def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
+    ldr = manifest_load.MANIFEST_LOADER
+    pkgs = {__package__.split('.')[0], *ldr.discover()}
+    mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
+    return [m.value for m in mns]
+def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
+    registry.register(*_build_manifest_lazy_loaded_codecs())
+##
+REGISTRY = CodecRegistry(
+    late_load_callbacks=[
+        _install_standard_codecs,
+        _install_manifest_lazy_loaded_codecs,
+    ],
+)
+register = REGISTRY.register
+lookup = REGISTRY.lookup
+##
+def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
+    return lookup(name).new(**kwargs).encode(i)
+def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
+    return lookup(name).new(**kwargs).decode(o)

omlish/codecs/standard.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .base import Codec
+STANDARD_CODECS: list[Codec] = []

omlish/codecs/text.py ADDED Viewed

@@ -0,0 +1,217 @@
+import codecs
+import dataclasses as dc
+import functools
+import typing as ta
+from omlish import check
+from .base import Codec
+from .base import ComboCodec
+from .standard import STANDARD_CODECS
+##
+TextEncodingErrors: ta.TypeAlias = ta.Literal[
+    # Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
+    'strict',
+    # Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
+    'ignore',
+    # Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
+    # REPLACEMENT CHARACTER). Implemented in replace_errors().
+    'replace',
+    # Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
+    # \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
+    # backslashreplace_errors().
+    'backslashreplace',
+    # On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
+    # turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
+    # 383 for more.)
+    'surrogateescape',
+    ##
+    # The following error handlers are only applicable to encoding (within text encodings):
+    # Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
+    # &#num;. Implemented in xmlcharrefreplace_errors().
+    'xmlcharrefreplace',
+    # Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
+    # Database. Implemented in namereplace_errors().
+    'namereplace',
+    ##
+    # In addition, the following error handler is specific to the given codecs:
+    # utf-8, utf-16, utf-32, utf-16-be, utf-16-le, utf-32-be, utf-32-le
+    # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
+    # treat the presence of surrogate code point in str as an error.
+    'surrogatepass',
+]
+@dc.dataclass(frozen=True, kw_only=True)
+class TextEncodingOptions:
+    errors: TextEncodingErrors = 'strict'
+##
+class TextEncodingComboCodec(ComboCodec[str, bytes]):
+    def __init__(
+            self,
+            info: codecs.CodecInfo,
+            options: TextEncodingOptions = TextEncodingOptions(),
+    ) -> None:
+        super().__init__()
+        self._info = check.isinstance(info, codecs.CodecInfo)
+        self._opts = check.isinstance(options, TextEncodingOptions)
+    @classmethod
+    def lookup(
+            cls,
+            name: str,
+            options: TextEncodingOptions = TextEncodingOptions(),
+    ) -> 'TextEncodingComboCodec':
+        return cls(codecs.lookup(name), options)
+    def encode(self, i: str) -> bytes:
+        o, _ = self._info.encode(i, self._opts.errors)
+        return o
+    def decode(self, o: bytes) -> str:
+        i, _ = self._info.decode(o, self._opts.errors)
+        return i
+    def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
+        x = self._info.incrementalencoder(self._opts.errors)
+        i = yield None
+        while True:
+            if not i:
+                break
+            o = x.encode(i)
+            i = yield o or None
+        o = x.encode(i, final=True)
+        yield o
+    def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
+        x = self._info.incrementaldecoder(self._opts.errors)
+        i = yield None
+        while True:
+            if not i:
+                break
+            o = x.decode(i)
+            i = yield o or None
+        o = x.decode(i, final=True)
+        yield o
+##
+class TextEncodingCodec(Codec):
+    pass
+def normalize_text_encoding_name(s: str) -> str:
+    if ' ' in s:
+        raise NameError(s)
+    return s.lower().replace('_', '-')
+def make_text_encoding_codec(
+        name: str,
+        aliases: ta.Collection[str] | None = None,
+        *,
+        append_to: ta.MutableSequence[Codec] | None = None,
+) -> TextEncodingCodec:
+    codec = TextEncodingCodec(
+        name=check.equal(name, normalize_text_encoding_name(name)),
+        aliases=check.not_isinstance(aliases, str),
+        input=str,
+        output=bytes,
+        new=functools.partial(TextEncodingComboCodec.lookup, name),
+        new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
+    )
+    if append_to is not None:
+        append_to.append(codec)
+    return codec
+##
+ASCII = make_text_encoding_codec(
+    'ascii',
+    ['646', 'us-ascii'],
+    append_to=STANDARD_CODECS,
+)
+LATIN1 = make_text_encoding_codec(
+    'latin-1',
+    ['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
+    append_to=STANDARD_CODECS,
+)
+UTF32 = make_text_encoding_codec(
+    'utf-32',
+    ['u32', 'utf32'],
+    append_to=STANDARD_CODECS,
+)
+UTF32BE = make_text_encoding_codec(
+    'utf-32-be',
+    ['utf-32be'],
+    append_to=STANDARD_CODECS,
+)
+UTF32LE = make_text_encoding_codec(
+    'utf-32-le',
+    ['utf-32le'],
+    append_to=STANDARD_CODECS,
+)
+UTF16 = make_text_encoding_codec(
+    'utf-16',
+    ['u16', 'utf16'],
+    append_to=STANDARD_CODECS,
+)
+UTF16BE = make_text_encoding_codec(
+    'utf-16-be',
+    ['utf-16be'],
+    append_to=STANDARD_CODECS,
+)
+UTF16LE = make_text_encoding_codec(
+    'utf-16-le',
+    ['utf-16le'],
+    append_to=STANDARD_CODECS,
+)
+UTF7 = make_text_encoding_codec(
+    'utf-7',
+    ['u7', 'unicode-1-1-utf-7'],
+    append_to=STANDARD_CODECS,
+)
+UTF8 = make_text_encoding_codec(
+    'utf-8',
+    ['u8', 'utf', 'utf8', 'cp65001'],
+    append_to=STANDARD_CODECS,
+)
+UTF8SIG = make_text_encoding_codec(
+    'utf-8-sig',
+    append_to=STANDARD_CODECS,
+)

omlish/formats/cbor.py ADDED Viewed

@@ -0,0 +1,31 @@
+import typing as ta
+from .. import lang
+from .codecs import make_bytes_object_codec
+from .codecs import make_object_lazy_loaded_codec
+if ta.TYPE_CHECKING:
+    import cbor2
+else:
+    cbor2 = lang.proxy_import('cbor2')
+##
+def dump(obj: ta.Any) -> bytes:
+    return cbor2.dumps(obj)
+def load(s: bytes) -> ta.Any:
+    return cbor2.loads(s)
+##
+CBOR_CODEC = make_bytes_object_codec('cbor', dump, load)
+# @omlish-manifest
+_CBOR_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CBOR_CODEC', CBOR_CODEC)

omlish/formats/codecs.py ADDED Viewed

@@ -0,0 +1,93 @@
+import typing as ta
+from .. import codecs
+from .. import reflect as rfl
+ObjectCodecT = ta.TypeVar('ObjectCodecT', bound='ObjectCodec')
+##
+class ObjectCodec(codecs.Codec):
+    pass
+def make_object_codec(
+        cls: type[ObjectCodecT],
+        name: str,
+        dumps: ta.Callable,
+        loads: ta.Callable,
+        *,
+        input: rfl.Type = rfl.type_(ta.Any),  # noqa
+        aliases: ta.Collection[str] | None = None,
+) -> ObjectCodecT:
+    return cls(
+        name=name,
+        aliases=aliases,
+        input=input,
+        output=bytes,
+        new=lambda: codecs.FnPairEagerCodec.of(dumps, loads),
+    )
+##
+class BytesObjectCodec(ObjectCodec):
+    pass
+def make_bytes_object_codec(
+        name: str,
+        dumps: ta.Callable[[ta.Any], bytes],
+        loads: ta.Callable[[bytes], ta.Any],
+        **kwargs: ta.Any,
+) -> BytesObjectCodec:
+    return make_object_codec(
+        BytesObjectCodec,
+        name,
+        dumps,
+        loads,
+        **kwargs,
+    )
+##
+class StrObjectCodec(ObjectCodec):
+    pass
+def make_str_object_codec(
+        name: str,
+        dumps: ta.Callable[[ta.Any], str],
+        loads: ta.Callable[[str], ta.Any],
+        **kwargs: ta.Any,
+) -> StrObjectCodec:
+    return make_object_codec(
+        StrObjectCodec,
+        name,
+        dumps,
+        loads,
+        **kwargs,
+    )
+##
+def make_object_lazy_loaded_codec(
+        mod_name: str,
+        attr_name: str,
+        codec: ObjectCodec,
+) -> codecs.LazyLoadedCodec:
+    return codecs.LazyLoadedCodec.new(
+        mod_name,
+        attr_name,
+        codec,
+    )

omlish/formats/json/codecs.py ADDED Viewed

@@ -0,0 +1,33 @@
+from ..codecs import make_object_lazy_loaded_codec
+from ..codecs import make_str_object_codec
+from .json import dumps
+from .json import dumps_compact
+from .json import dumps_pretty
+from .json import loads
+##
+JSON_CODEC = make_str_object_codec('json', dumps, loads)
+# @omlish-manifest
+_JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
+#
+JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
+# @omlish-manifest
+_JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
+#
+JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
+# @omlish-manifest
+_JSON_PRETTY_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_PRETTY_CODEC', JSON_PRETTY_CODEC)

omlish/formats/json5.py ADDED Viewed

@@ -0,0 +1,31 @@
+import typing as ta
+from .. import lang
+from .codecs import make_object_lazy_loaded_codec
+from .codecs import make_str_object_codec
+if ta.TYPE_CHECKING:
+    import json5
+else:
+    json5 = lang.proxy_import('json5')
+##
+def dumps(obj: ta.Any) -> str:
+    return json5.dumps(obj)
+def loads(s: str) -> ta.Any:
+    return json5.loads(s)
+##
+JSON5_CODEC = make_str_object_codec('json5', dumps, loads)
+# @omlish-manifest
+_JSON5_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON5_CODEC', JSON5_CODEC)

omlish/formats/pickle.py ADDED Viewed

@@ -0,0 +1,31 @@
+import typing as ta
+from .. import lang
+from .codecs import make_bytes_object_codec
+from .codecs import make_object_lazy_loaded_codec
+if ta.TYPE_CHECKING:
+    import pickle
+else:
+    pickle = lang.proxy_import('pickle')
+##
+def dump(obj: ta.Any) -> bytes:
+    return pickle.dumps(obj)
+def load(s: bytes) -> ta.Any:
+    return pickle.loads(s)
+##
+PICKLE_CODEC = make_bytes_object_codec('pickle', dump, load)
+# @omlish-manifest
+_PICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'PICKLE_CODEC', PICKLE_CODEC)

omlish/formats/toml.py ADDED Viewed

@@ -0,0 +1,17 @@
+import tomllib
+from .codecs import make_object_lazy_loaded_codec
+from .codecs import make_str_object_codec
+##
+def _dumps(obj):
+    raise TypeError('Unsupported')
+TOML_CODEC = make_str_object_codec('toml', _dumps, tomllib.loads)
+# @omlish-manifest
+_TOML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'TOML_CODEC', TOML_CODEC)

omlish/formats/yaml.py CHANGED Viewed

@@ -14,6 +14,8 @@ import typing as ta
 from .. import check
 from .. import dataclasses as dc
 from .. import lang
+from .codecs import make_object_lazy_loaded_codec
+from .codecs import make_str_object_codec
 if ta.TYPE_CHECKING:
@@ -239,3 +241,19 @@ def full_load(stream):  # noqa
 def full_load_all(stream):  # noqa  # noqa
     return load_all(stream, yaml.FullLoader)
+##
+def dump(obj, **kwargs):
+    return yaml.dump(obj, **kwargs)
+##
+YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
+# @omlish-manifest
+_YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)

omlish/io/compress/brotli.py CHANGED Viewed

@@ -3,6 +3,8 @@ import typing as ta
 from ... import lang
 from .base import Compression
+from .codecs import make_compression_codec
+from .codecs import make_compression_lazy_loaded_codec
 if ta.TYPE_CHECKING:
@@ -11,8 +13,11 @@ else:
     brotli = lang.proxy_import('brotli')
+##
 @dc.dataclass(frozen=True, kw_only=True)
-class SnappyCompression(Compression):
+class BrotliCompression(Compression):
     mode: int | None = None
     quality: int | None = None
     lgwin: int | None = None
@@ -31,3 +36,12 @@ class SnappyCompression(Compression):
         return brotli.decompress(
             d,
         )
+##
+BROTLI_CODEC = make_compression_codec('brotli', BrotliCompression)
+# @omlish-manifest
+_BROTLI_LAZY_CODEC = make_compression_lazy_loaded_codec(__name__, 'BROTLI_CODEC', BROTLI_CODEC)

omlish/io/compress/bz2.py CHANGED Viewed

@@ -9,6 +9,8 @@ from .adapters import CompressorObjectIncrementalAdapter
 from .adapters import DecompressorObjectIncrementalAdapter
 from .base import Compression
 from .base import IncrementalCompression
+from .codecs import make_compression_codec
+from .codecs import make_compression_lazy_loaded_codec
 if ta.TYPE_CHECKING:
@@ -17,6 +19,9 @@ else:
     bz2 = lang.proxy_import('bz2')
+##
 @dc.dataclass(frozen=True, kw_only=True)
 class Bz2Compression(Compression, IncrementalCompression):
     level: int = 9
@@ -45,3 +50,12 @@ class Bz2Compression(Compression, IncrementalCompression):
             bz2.BZ2Decompressor,  # type: ignore
             trailing_error=OSError,
         )()
+##
+BZ2_CODEC = make_compression_codec('bz2', Bz2Compression)
+# @omlish-manifest
+_BZ2_LAZY_CODEC = make_compression_lazy_loaded_codec(__name__, 'BZ2_CODEC', BZ2_CODEC)

omlish 0.0.0.dev162__py3-none-any.whl → 0.0.0.dev164__py3-none-any.whl

omlish 0.0.0.dev162py3-none-any.whl → 0.0.0.dev164py3-none-any.whl