omlish 0.0.0.dev133__py3-none-any.whl → 0.0.0.dev177__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omlish/.manifests.json +265 -7
- omlish/__about__.py +5 -3
- omlish/antlr/_runtime/__init__.py +0 -22
- omlish/antlr/_runtime/_all.py +24 -0
- omlish/antlr/_runtime/atn/ParserATNSimulator.py +1 -1
- omlish/antlr/_runtime/dfa/DFASerializer.py +1 -1
- omlish/antlr/_runtime/error/DiagnosticErrorListener.py +2 -1
- omlish/antlr/_runtime/xpath/XPath.py +7 -1
- omlish/antlr/_runtime/xpath/XPathLexer.py +1 -1
- omlish/antlr/delimit.py +106 -0
- omlish/antlr/dot.py +31 -0
- omlish/antlr/errors.py +11 -0
- omlish/antlr/input.py +96 -0
- omlish/antlr/parsing.py +19 -0
- omlish/antlr/runtime.py +102 -0
- omlish/antlr/utils.py +38 -0
- omlish/argparse/all.py +45 -0
- omlish/{argparse.py → argparse/cli.py} +112 -107
- omlish/asyncs/__init__.py +0 -35
- omlish/asyncs/all.py +35 -0
- omlish/asyncs/asyncio/all.py +7 -0
- omlish/asyncs/asyncio/channels.py +40 -0
- omlish/asyncs/asyncio/streams.py +45 -0
- omlish/asyncs/asyncio/subprocesses.py +238 -0
- omlish/asyncs/asyncio/timeouts.py +16 -0
- omlish/asyncs/bluelet/LICENSE +6 -0
- omlish/asyncs/bluelet/all.py +67 -0
- omlish/asyncs/bluelet/api.py +23 -0
- omlish/asyncs/bluelet/core.py +178 -0
- omlish/asyncs/bluelet/events.py +78 -0
- omlish/asyncs/bluelet/files.py +80 -0
- omlish/asyncs/bluelet/runner.py +416 -0
- omlish/asyncs/bluelet/sockets.py +214 -0
- omlish/bootstrap/sys.py +3 -3
- omlish/cached.py +2 -2
- omlish/check.py +49 -460
- omlish/codecs/__init__.py +72 -0
- omlish/codecs/base.py +106 -0
- omlish/codecs/bytes.py +119 -0
- omlish/codecs/chain.py +23 -0
- omlish/codecs/funcs.py +39 -0
- omlish/codecs/registry.py +139 -0
- omlish/codecs/standard.py +4 -0
- omlish/codecs/text.py +217 -0
- omlish/collections/cache/impl.py +50 -57
- omlish/collections/coerce.py +1 -0
- omlish/collections/mappings.py +1 -1
- omlish/configs/flattening.py +1 -1
- omlish/defs.py +1 -1
- omlish/diag/_pycharm/runhack.py +8 -2
- omlish/diag/procfs.py +8 -8
- omlish/docker/__init__.py +0 -36
- omlish/docker/all.py +31 -0
- omlish/docker/consts.py +4 -0
- omlish/{lite/docker.py → docker/detect.py} +18 -0
- omlish/docker/{helpers.py → timebomb.py} +0 -21
- omlish/formats/cbor.py +31 -0
- omlish/formats/cloudpickle.py +31 -0
- omlish/formats/codecs.py +93 -0
- omlish/formats/json/codecs.py +29 -0
- omlish/formats/json/delimted.py +4 -0
- omlish/formats/json/stream/errors.py +2 -0
- omlish/formats/json/stream/lex.py +12 -6
- omlish/formats/json/stream/parse.py +38 -22
- omlish/formats/json5.py +31 -0
- omlish/formats/pickle.py +31 -0
- omlish/formats/repr.py +25 -0
- omlish/formats/toml.py +17 -0
- omlish/formats/yaml.py +25 -0
- omlish/funcs/__init__.py +0 -0
- omlish/{genmachine.py → funcs/genmachine.py} +5 -4
- omlish/{matchfns.py → funcs/match.py} +1 -1
- omlish/funcs/pairs.py +215 -0
- omlish/http/__init__.py +0 -48
- omlish/http/all.py +48 -0
- omlish/http/coro/__init__.py +0 -0
- omlish/{lite/fdio/corohttp.py → http/coro/fdio.py} +21 -19
- omlish/{lite/http/coroserver.py → http/coro/server.py} +20 -21
- omlish/{lite/http → http}/handlers.py +3 -2
- omlish/{lite/http → http}/parsing.py +1 -0
- omlish/http/sessions.py +1 -1
- omlish/{lite/http → http}/versions.py +1 -0
- omlish/inject/managed.py +2 -2
- omlish/io/__init__.py +0 -3
- omlish/{lite/io.py → io/buffers.py} +8 -9
- omlish/io/compress/__init__.py +9 -0
- omlish/io/compress/abc.py +104 -0
- omlish/io/compress/adapters.py +148 -0
- omlish/io/compress/base.py +24 -0
- omlish/io/compress/brotli.py +47 -0
- omlish/io/compress/bz2.py +61 -0
- omlish/io/compress/codecs.py +78 -0
- omlish/io/compress/gzip.py +350 -0
- omlish/io/compress/lz4.py +91 -0
- omlish/io/compress/lzma.py +81 -0
- omlish/io/compress/snappy.py +34 -0
- omlish/io/compress/zlib.py +74 -0
- omlish/io/compress/zstd.py +44 -0
- omlish/io/fdio/__init__.py +1 -0
- omlish/{lite → io}/fdio/handlers.py +5 -5
- omlish/{lite → io}/fdio/kqueue.py +8 -8
- omlish/{lite → io}/fdio/manager.py +7 -7
- omlish/{lite → io}/fdio/pollers.py +13 -13
- omlish/io/generators/__init__.py +56 -0
- omlish/io/generators/consts.py +1 -0
- omlish/io/generators/direct.py +13 -0
- omlish/io/generators/readers.py +189 -0
- omlish/io/generators/stepped.py +191 -0
- omlish/io/pyio.py +5 -2
- omlish/iterators/__init__.py +24 -0
- omlish/iterators/iterators.py +132 -0
- omlish/iterators/recipes.py +18 -0
- omlish/iterators/tools.py +96 -0
- omlish/iterators/unique.py +67 -0
- omlish/lang/__init__.py +13 -1
- omlish/lang/functions.py +11 -2
- omlish/lang/generators.py +243 -0
- omlish/lang/iterables.py +46 -49
- omlish/lang/maybes.py +4 -4
- omlish/lite/cached.py +39 -6
- omlish/lite/check.py +438 -75
- omlish/lite/contextmanagers.py +17 -4
- omlish/lite/dataclasses.py +42 -0
- omlish/lite/inject.py +28 -45
- omlish/lite/logs.py +0 -270
- omlish/lite/marshal.py +309 -144
- omlish/lite/pycharm.py +47 -0
- omlish/lite/reflect.py +33 -0
- omlish/lite/resources.py +8 -0
- omlish/lite/runtime.py +4 -4
- omlish/lite/shlex.py +12 -0
- omlish/lite/socketserver.py +2 -2
- omlish/lite/strings.py +31 -0
- omlish/logs/__init__.py +0 -32
- omlish/logs/{_abc.py → abc.py} +0 -1
- omlish/logs/all.py +37 -0
- omlish/logs/{formatters.py → color.py} +1 -2
- omlish/logs/configs.py +7 -38
- omlish/logs/filters.py +10 -0
- omlish/logs/handlers.py +4 -1
- omlish/logs/json.py +56 -0
- omlish/logs/proxy.py +99 -0
- omlish/logs/standard.py +128 -0
- omlish/logs/utils.py +2 -2
- omlish/manifests/__init__.py +2 -0
- omlish/manifests/load.py +209 -0
- omlish/manifests/types.py +17 -0
- omlish/marshal/base.py +1 -1
- omlish/marshal/factories.py +1 -1
- omlish/marshal/forbidden.py +1 -1
- omlish/marshal/iterables.py +1 -1
- omlish/marshal/literals.py +50 -0
- omlish/marshal/mappings.py +1 -1
- omlish/marshal/maybes.py +1 -1
- omlish/marshal/standard.py +5 -1
- omlish/marshal/unions.py +1 -1
- omlish/os/__init__.py +0 -0
- omlish/os/atomics.py +205 -0
- omlish/os/deathsig.py +23 -0
- omlish/{os.py → os/files.py} +0 -9
- omlish/{lite → os}/journald.py +2 -1
- omlish/os/linux.py +484 -0
- omlish/os/paths.py +36 -0
- omlish/{lite → os}/pidfile.py +1 -0
- omlish/os/sizes.py +9 -0
- omlish/reflect/__init__.py +3 -0
- omlish/reflect/subst.py +2 -1
- omlish/reflect/types.py +126 -44
- omlish/secrets/pwhash.py +1 -1
- omlish/secrets/subprocesses.py +3 -1
- omlish/specs/jsonrpc/marshal.py +1 -1
- omlish/specs/openapi/marshal.py +1 -1
- omlish/sql/alchemy/asyncs.py +1 -1
- omlish/sql/queries/__init__.py +9 -1
- omlish/sql/queries/building.py +3 -0
- omlish/sql/queries/exprs.py +10 -27
- omlish/sql/queries/idents.py +48 -10
- omlish/sql/queries/names.py +80 -13
- omlish/sql/queries/params.py +64 -0
- omlish/sql/queries/rendering.py +1 -1
- omlish/subprocesses.py +340 -0
- omlish/term.py +29 -14
- omlish/testing/pytest/marks.py +2 -2
- omlish/testing/pytest/plugins/asyncs.py +6 -1
- omlish/testing/pytest/plugins/logging.py +1 -1
- omlish/testing/pytest/plugins/switches.py +1 -1
- {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/METADATA +7 -5
- {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/RECORD +200 -117
- omlish/fnpairs.py +0 -496
- omlish/formats/json/cli/__main__.py +0 -11
- omlish/formats/json/cli/cli.py +0 -298
- omlish/formats/json/cli/formats.py +0 -71
- omlish/formats/json/cli/io.py +0 -74
- omlish/formats/json/cli/parsing.py +0 -82
- omlish/formats/json/cli/processing.py +0 -48
- omlish/formats/json/cli/rendering.py +0 -92
- omlish/iterators.py +0 -300
- omlish/lite/subprocesses.py +0 -130
- /omlish/{formats/json/cli → argparse}/__init__.py +0 -0
- /omlish/{lite/fdio → asyncs/asyncio}/__init__.py +0 -0
- /omlish/asyncs/{asyncio.py → asyncio/asyncio.py} +0 -0
- /omlish/{lite/http → asyncs/bluelet}/__init__.py +0 -0
- /omlish/collections/{_abc.py → abc.py} +0 -0
- /omlish/{fnpipes.py → funcs/pipes.py} +0 -0
- /omlish/io/{_abc.py → abc.py} +0 -0
- /omlish/sql/{_abc.py → abc.py} +0 -0
- {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/top_level.txt +0 -0
omlish/codecs/base.py
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
"""
|
2
|
+
TODO:
|
3
|
+
- bytes-like - bytearray, memoryview
|
4
|
+
"""
|
5
|
+
import abc
|
6
|
+
import typing as ta
|
7
|
+
|
8
|
+
from omlish import check
|
9
|
+
from omlish import dataclasses as dc
|
10
|
+
from omlish import lang
|
11
|
+
from omlish import reflect as rfl
|
12
|
+
from omlish.funcs import pairs as fps
|
13
|
+
|
14
|
+
|
15
|
+
I = ta.TypeVar('I')
|
16
|
+
O = ta.TypeVar('O')
|
17
|
+
|
18
|
+
|
19
|
+
##
|
20
|
+
|
21
|
+
|
22
|
+
class EagerCodec(lang.Abstract, ta.Generic[I, O]):
|
23
|
+
@abc.abstractmethod
|
24
|
+
def encode(self, i: I) -> O:
|
25
|
+
raise NotImplementedError
|
26
|
+
|
27
|
+
@abc.abstractmethod
|
28
|
+
def decode(self, o: O) -> I:
|
29
|
+
raise NotImplementedError
|
30
|
+
|
31
|
+
def as_pair(self) -> fps.FnPair[I, O]:
|
32
|
+
return fps.of(self.encode, self.decode)
|
33
|
+
|
34
|
+
|
35
|
+
class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
|
36
|
+
@abc.abstractmethod
|
37
|
+
def encode_incremental(self) -> ta.Generator[O | None, I, None]:
|
38
|
+
raise NotImplementedError
|
39
|
+
|
40
|
+
@abc.abstractmethod
|
41
|
+
def decode_incremental(self) -> ta.Generator[I | None, O, None]:
|
42
|
+
raise NotImplementedError
|
43
|
+
|
44
|
+
|
45
|
+
class ComboCodec( # noqa
|
46
|
+
EagerCodec[I, O],
|
47
|
+
IncrementalCodec[I, O],
|
48
|
+
lang.Abstract,
|
49
|
+
ta.Generic[I, O],
|
50
|
+
):
|
51
|
+
pass
|
52
|
+
|
53
|
+
|
54
|
+
##
|
55
|
+
|
56
|
+
|
57
|
+
def check_codec_name(s: str) -> str:
|
58
|
+
check.non_empty_str(s)
|
59
|
+
check.not_in('_', s)
|
60
|
+
check.equal(s.strip(), s)
|
61
|
+
return s
|
62
|
+
|
63
|
+
|
64
|
+
##
|
65
|
+
|
66
|
+
|
67
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
68
|
+
class Codec:
|
69
|
+
name: str = dc.xfield(coerce=check_codec_name)
|
70
|
+
aliases: ta.Collection[str] | None = dc.xfield(
|
71
|
+
default=None,
|
72
|
+
coerce=lang.opt_fn(lambda s: [check_codec_name(a) for a in s]), # type: ignore
|
73
|
+
)
|
74
|
+
|
75
|
+
input: rfl.Type = dc.xfield(coerce=rfl.type_)
|
76
|
+
output: rfl.Type = dc.xfield(coerce=rfl.type_)
|
77
|
+
|
78
|
+
options: type | None = None
|
79
|
+
|
80
|
+
new: ta.Callable[..., EagerCodec]
|
81
|
+
new_incremental: ta.Callable[..., IncrementalCodec] | None = None
|
82
|
+
|
83
|
+
|
84
|
+
##
|
85
|
+
|
86
|
+
|
87
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
88
|
+
class LazyLoadedCodec:
|
89
|
+
mod_name: str
|
90
|
+
attr_name: str
|
91
|
+
name: str
|
92
|
+
aliases: ta.Collection[str] | None = None
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def new(
|
96
|
+
cls,
|
97
|
+
mod_name: str,
|
98
|
+
attr_name: str,
|
99
|
+
codec: Codec,
|
100
|
+
) -> 'LazyLoadedCodec':
|
101
|
+
return cls(
|
102
|
+
mod_name=mod_name,
|
103
|
+
attr_name=attr_name,
|
104
|
+
name=codec.name,
|
105
|
+
aliases=codec.aliases,
|
106
|
+
)
|
omlish/codecs/bytes.py
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
"""
|
2
|
+
TODO:
|
3
|
+
- options / kwargs
|
4
|
+
"""
|
5
|
+
import base64
|
6
|
+
import binascii
|
7
|
+
import typing as ta
|
8
|
+
|
9
|
+
from .. import check
|
10
|
+
from .base import Codec
|
11
|
+
from .funcs import FnPairEagerCodec
|
12
|
+
from .standard import STANDARD_CODECS
|
13
|
+
|
14
|
+
|
15
|
+
##
|
16
|
+
|
17
|
+
|
18
|
+
class BytesCodec(Codec):
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
def make_bytes_encoding_codec(
|
23
|
+
name: str,
|
24
|
+
aliases: ta.Collection[str] | None,
|
25
|
+
encode: ta.Callable[[bytes], bytes],
|
26
|
+
decode: ta.Callable[[bytes], bytes],
|
27
|
+
*,
|
28
|
+
append_to: ta.MutableSequence[Codec] | None = None,
|
29
|
+
) -> BytesCodec:
|
30
|
+
codec = BytesCodec(
|
31
|
+
name=name,
|
32
|
+
aliases=check.not_isinstance(aliases, str),
|
33
|
+
|
34
|
+
input=bytes,
|
35
|
+
output=bytes,
|
36
|
+
|
37
|
+
new=lambda: FnPairEagerCodec.of(encode, decode),
|
38
|
+
)
|
39
|
+
|
40
|
+
if append_to is not None:
|
41
|
+
append_to.append(codec)
|
42
|
+
|
43
|
+
return codec
|
44
|
+
|
45
|
+
|
46
|
+
##
|
47
|
+
|
48
|
+
|
49
|
+
ASCII85 = make_bytes_encoding_codec(
|
50
|
+
'ascii85',
|
51
|
+
['a85'],
|
52
|
+
base64.a85encode,
|
53
|
+
base64.a85decode,
|
54
|
+
append_to=STANDARD_CODECS,
|
55
|
+
)
|
56
|
+
|
57
|
+
BASE16 = make_bytes_encoding_codec(
|
58
|
+
'base16',
|
59
|
+
['b16'],
|
60
|
+
base64.b16encode,
|
61
|
+
base64.b16decode,
|
62
|
+
append_to=STANDARD_CODECS,
|
63
|
+
)
|
64
|
+
|
65
|
+
BASE32 = make_bytes_encoding_codec(
|
66
|
+
'base32',
|
67
|
+
['b32'],
|
68
|
+
base64.b32encode,
|
69
|
+
base64.b32decode,
|
70
|
+
append_to=STANDARD_CODECS,
|
71
|
+
)
|
72
|
+
|
73
|
+
BASE64 = make_bytes_encoding_codec(
|
74
|
+
'base64',
|
75
|
+
['b64'],
|
76
|
+
base64.b64encode,
|
77
|
+
base64.b64decode,
|
78
|
+
append_to=STANDARD_CODECS,
|
79
|
+
)
|
80
|
+
|
81
|
+
BASE85 = make_bytes_encoding_codec(
|
82
|
+
'base85',
|
83
|
+
['b85'],
|
84
|
+
base64.b85encode,
|
85
|
+
base64.b85decode,
|
86
|
+
append_to=STANDARD_CODECS,
|
87
|
+
)
|
88
|
+
|
89
|
+
BASE32_HEX = make_bytes_encoding_codec(
|
90
|
+
'base32-hex',
|
91
|
+
['b32-hex'],
|
92
|
+
base64.b32hexencode,
|
93
|
+
base64.b32hexdecode,
|
94
|
+
append_to=STANDARD_CODECS,
|
95
|
+
)
|
96
|
+
|
97
|
+
BASE64_HEX = make_bytes_encoding_codec(
|
98
|
+
'base64-hex',
|
99
|
+
['b64-hex'],
|
100
|
+
base64.standard_b64encode,
|
101
|
+
base64.standard_b64decode,
|
102
|
+
append_to=STANDARD_CODECS,
|
103
|
+
)
|
104
|
+
|
105
|
+
BASE64_URLSAFE = make_bytes_encoding_codec(
|
106
|
+
'base64-urlsafe',
|
107
|
+
['b64-urlsafe'],
|
108
|
+
base64.urlsafe_b64encode,
|
109
|
+
base64.urlsafe_b64decode,
|
110
|
+
append_to=STANDARD_CODECS,
|
111
|
+
)
|
112
|
+
|
113
|
+
HEX = make_bytes_encoding_codec(
|
114
|
+
'hex',
|
115
|
+
[],
|
116
|
+
binascii.b2a_hex,
|
117
|
+
binascii.a2b_hex,
|
118
|
+
append_to=STANDARD_CODECS,
|
119
|
+
)
|
omlish/codecs/chain.py
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
import dataclasses as dc
|
2
|
+
import typing as ta
|
3
|
+
|
4
|
+
from .base import EagerCodec
|
5
|
+
|
6
|
+
|
7
|
+
@dc.dataclass(frozen=True)
|
8
|
+
class ChainEagerCodec(EagerCodec[ta.Any, ta.Any]):
|
9
|
+
codecs: ta.Sequence[EagerCodec]
|
10
|
+
|
11
|
+
def encode(self, v: ta.Any) -> ta.Any:
|
12
|
+
for c in self.codecs:
|
13
|
+
v = c.encode(v)
|
14
|
+
return v
|
15
|
+
|
16
|
+
def decode(self, v: ta.Any) -> ta.Any:
|
17
|
+
for c in reversed(self.codecs):
|
18
|
+
v = c.decode(v)
|
19
|
+
return v
|
20
|
+
|
21
|
+
|
22
|
+
def chain(*codecs: EagerCodec) -> ChainEagerCodec:
|
23
|
+
return ChainEagerCodec(codecs)
|
omlish/codecs/funcs.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
import dataclasses as dc
|
2
|
+
import typing as ta
|
3
|
+
|
4
|
+
from ..funcs import pairs as fps
|
5
|
+
from .base import EagerCodec
|
6
|
+
|
7
|
+
|
8
|
+
I = ta.TypeVar('I')
|
9
|
+
O = ta.TypeVar('O')
|
10
|
+
|
11
|
+
|
12
|
+
@dc.dataclass(frozen=True)
|
13
|
+
class FnPairEagerCodec(EagerCodec[I, O]):
|
14
|
+
fp: fps.FnPair[I, O]
|
15
|
+
|
16
|
+
def encode(self, i: I) -> O:
|
17
|
+
return self.fp.forward(i)
|
18
|
+
|
19
|
+
def decode(self, o: O) -> I:
|
20
|
+
return self.fp.backward(o)
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def of(
|
24
|
+
cls,
|
25
|
+
encode: ta.Callable[[I], O],
|
26
|
+
decode: ta.Callable[[O], I],
|
27
|
+
) -> 'FnPairEagerCodec[I, O]':
|
28
|
+
return cls(fps.of(encode, decode))
|
29
|
+
|
30
|
+
|
31
|
+
def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
|
32
|
+
return FnPairEagerCodec(fp)
|
33
|
+
|
34
|
+
|
35
|
+
def of(
|
36
|
+
encode: ta.Callable[[I], O],
|
37
|
+
decode: ta.Callable[[O], I],
|
38
|
+
) -> FnPairEagerCodec[I, O]:
|
39
|
+
return FnPairEagerCodec(fps.of(encode, decode))
|
@@ -0,0 +1,139 @@
|
|
1
|
+
import contextlib
|
2
|
+
import importlib
|
3
|
+
import threading
|
4
|
+
import typing as ta
|
5
|
+
|
6
|
+
from .. import cached
|
7
|
+
from .. import check
|
8
|
+
from .. import lang
|
9
|
+
from .base import Codec
|
10
|
+
from .base import LazyLoadedCodec
|
11
|
+
from .standard import STANDARD_CODECS
|
12
|
+
|
13
|
+
|
14
|
+
if ta.TYPE_CHECKING:
|
15
|
+
from ..manifests import load as manifest_load
|
16
|
+
else:
|
17
|
+
manifest_load = lang.proxy_import('..manifests.load', __package__)
|
18
|
+
|
19
|
+
|
20
|
+
##
|
21
|
+
|
22
|
+
|
23
|
+
class CodecRegistry:
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
*,
|
27
|
+
late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
|
28
|
+
) -> None:
|
29
|
+
super().__init__()
|
30
|
+
|
31
|
+
self._late_load_callbacks = late_load_callbacks
|
32
|
+
|
33
|
+
self._lock = threading.RLock()
|
34
|
+
self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
|
35
|
+
self._names_by_alias: dict[str, str] = {}
|
36
|
+
self._names_by_cls: dict[type, list[str]] = {}
|
37
|
+
|
38
|
+
def _late_load(self) -> None:
|
39
|
+
if self._late_load_callbacks:
|
40
|
+
for cb in self._late_load_callbacks:
|
41
|
+
cb(self)
|
42
|
+
self._late_load_callbacks = None
|
43
|
+
|
44
|
+
@contextlib.contextmanager
|
45
|
+
def _lock_and_load(self) -> ta.Iterator[None]:
|
46
|
+
with self._lock:
|
47
|
+
self._late_load()
|
48
|
+
yield
|
49
|
+
|
50
|
+
def _post_load(self, codec: Codec) -> None:
|
51
|
+
for t in type(codec).__mro__:
|
52
|
+
if t is not object:
|
53
|
+
self._names_by_cls.setdefault(t, []).append(codec.name)
|
54
|
+
|
55
|
+
def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
|
56
|
+
with self._lock:
|
57
|
+
for codec in codecs:
|
58
|
+
for n in {codec.name, *(codec.aliases or [])}:
|
59
|
+
if n in self._names_by_alias:
|
60
|
+
raise KeyError(n)
|
61
|
+
|
62
|
+
for codec in codecs:
|
63
|
+
self._by_name[codec.name] = codec
|
64
|
+
for n in {codec.name, *(codec.aliases or [])}:
|
65
|
+
self._names_by_alias[n] = codec.name
|
66
|
+
if isinstance(codec, Codec):
|
67
|
+
self._post_load(codec)
|
68
|
+
|
69
|
+
return self
|
70
|
+
|
71
|
+
def lookup(self, name_or_alias: str) -> Codec:
|
72
|
+
with self._lock_and_load():
|
73
|
+
name = self._names_by_alias[name_or_alias.replace('_', '-')]
|
74
|
+
codec_or_lazy = self._by_name[name]
|
75
|
+
|
76
|
+
if isinstance(codec_or_lazy, LazyLoadedCodec):
|
77
|
+
mod = importlib.import_module(codec_or_lazy.mod_name)
|
78
|
+
codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
|
79
|
+
self._by_name[name] = codec
|
80
|
+
self._post_load(codec)
|
81
|
+
else:
|
82
|
+
codec = check.isinstance(codec_or_lazy, Codec)
|
83
|
+
|
84
|
+
return codec
|
85
|
+
|
86
|
+
def lookup_type(self, cls: type) -> list[Codec]:
|
87
|
+
with self._lock_and_load():
|
88
|
+
return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
|
89
|
+
|
90
|
+
def all(self) -> frozenset[str]:
|
91
|
+
with self._lock_and_load():
|
92
|
+
return frozenset(self._by_name)
|
93
|
+
|
94
|
+
|
95
|
+
##
|
96
|
+
|
97
|
+
|
98
|
+
def _install_standard_codecs(registry: CodecRegistry) -> None:
|
99
|
+
registry.register(*STANDARD_CODECS)
|
100
|
+
|
101
|
+
|
102
|
+
##
|
103
|
+
|
104
|
+
|
105
|
+
@cached.function
|
106
|
+
def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
|
107
|
+
ldr = manifest_load.MANIFEST_LOADER
|
108
|
+
pkgs = {__package__.split('.')[0], *ldr.discover()}
|
109
|
+
mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
|
110
|
+
return [m.value for m in mns]
|
111
|
+
|
112
|
+
|
113
|
+
def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
|
114
|
+
registry.register(*_build_manifest_lazy_loaded_codecs())
|
115
|
+
|
116
|
+
|
117
|
+
##
|
118
|
+
|
119
|
+
|
120
|
+
REGISTRY = CodecRegistry(
|
121
|
+
late_load_callbacks=[
|
122
|
+
_install_standard_codecs,
|
123
|
+
_install_manifest_lazy_loaded_codecs,
|
124
|
+
],
|
125
|
+
)
|
126
|
+
|
127
|
+
register = REGISTRY.register
|
128
|
+
lookup = REGISTRY.lookup
|
129
|
+
|
130
|
+
|
131
|
+
##
|
132
|
+
|
133
|
+
|
134
|
+
def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
|
135
|
+
return lookup(name).new(**kwargs).encode(i)
|
136
|
+
|
137
|
+
|
138
|
+
def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
|
139
|
+
return lookup(name).new(**kwargs).decode(o)
|
omlish/codecs/text.py
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
import codecs
|
2
|
+
import dataclasses as dc
|
3
|
+
import functools
|
4
|
+
import typing as ta
|
5
|
+
|
6
|
+
from omlish import check
|
7
|
+
|
8
|
+
from .base import Codec
|
9
|
+
from .base import ComboCodec
|
10
|
+
from .standard import STANDARD_CODECS
|
11
|
+
|
12
|
+
|
13
|
+
##
|
14
|
+
|
15
|
+
|
16
|
+
TextEncodingErrors: ta.TypeAlias = ta.Literal[
|
17
|
+
# Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
|
18
|
+
'strict',
|
19
|
+
|
20
|
+
# Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
|
21
|
+
'ignore',
|
22
|
+
|
23
|
+
# Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
|
24
|
+
# REPLACEMENT CHARACTER). Implemented in replace_errors().
|
25
|
+
'replace',
|
26
|
+
|
27
|
+
# Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
|
28
|
+
# \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
|
29
|
+
# backslashreplace_errors().
|
30
|
+
'backslashreplace',
|
31
|
+
|
32
|
+
# On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
|
33
|
+
# turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
|
34
|
+
# 383 for more.)
|
35
|
+
'surrogateescape',
|
36
|
+
|
37
|
+
##
|
38
|
+
# The following error handlers are only applicable to encoding (within text encodings):
|
39
|
+
|
40
|
+
# Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
|
41
|
+
# &#num;. Implemented in xmlcharrefreplace_errors().
|
42
|
+
'xmlcharrefreplace',
|
43
|
+
|
44
|
+
# Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
|
45
|
+
# Database. Implemented in namereplace_errors().
|
46
|
+
'namereplace',
|
47
|
+
|
48
|
+
##
|
49
|
+
# In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
|
50
|
+
# utf-16-le, utf-32-be, utf-32-le
|
51
|
+
|
52
|
+
# Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
|
53
|
+
# treat the presence of surrogate code point in str as an error.
|
54
|
+
'surrogatepass',
|
55
|
+
]
|
56
|
+
|
57
|
+
|
58
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
59
|
+
class TextEncodingOptions:
|
60
|
+
errors: TextEncodingErrors = 'strict'
|
61
|
+
|
62
|
+
|
63
|
+
##
|
64
|
+
|
65
|
+
|
66
|
+
class TextEncodingComboCodec(ComboCodec[str, bytes]):
|
67
|
+
def __init__(
|
68
|
+
self,
|
69
|
+
info: codecs.CodecInfo,
|
70
|
+
options: TextEncodingOptions = TextEncodingOptions(),
|
71
|
+
) -> None:
|
72
|
+
super().__init__()
|
73
|
+
self._info = check.isinstance(info, codecs.CodecInfo)
|
74
|
+
self._opts = check.isinstance(options, TextEncodingOptions)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def lookup(
|
78
|
+
cls,
|
79
|
+
name: str,
|
80
|
+
options: TextEncodingOptions = TextEncodingOptions(),
|
81
|
+
) -> 'TextEncodingComboCodec':
|
82
|
+
return cls(codecs.lookup(name), options)
|
83
|
+
|
84
|
+
def encode(self, i: str) -> bytes:
|
85
|
+
o, _ = self._info.encode(i, self._opts.errors)
|
86
|
+
return o
|
87
|
+
|
88
|
+
def decode(self, o: bytes) -> str:
|
89
|
+
i, _ = self._info.decode(o, self._opts.errors)
|
90
|
+
return i
|
91
|
+
|
92
|
+
def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
|
93
|
+
x = self._info.incrementalencoder(self._opts.errors)
|
94
|
+
i = yield None
|
95
|
+
while True:
|
96
|
+
if not i:
|
97
|
+
break
|
98
|
+
o = x.encode(i)
|
99
|
+
i = yield o or None
|
100
|
+
o = x.encode(i, final=True)
|
101
|
+
yield o
|
102
|
+
|
103
|
+
def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
|
104
|
+
x = self._info.incrementaldecoder(self._opts.errors)
|
105
|
+
i = yield None
|
106
|
+
while True:
|
107
|
+
if not i:
|
108
|
+
break
|
109
|
+
o = x.decode(i)
|
110
|
+
i = yield o or None
|
111
|
+
o = x.decode(i, final=True)
|
112
|
+
yield o
|
113
|
+
|
114
|
+
|
115
|
+
##
|
116
|
+
|
117
|
+
|
118
|
+
class TextEncodingCodec(Codec):
|
119
|
+
pass
|
120
|
+
|
121
|
+
|
122
|
+
def normalize_text_encoding_name(s: str) -> str:
|
123
|
+
if ' ' in s:
|
124
|
+
raise NameError(s)
|
125
|
+
return s.lower().replace('_', '-')
|
126
|
+
|
127
|
+
|
128
|
+
def make_text_encoding_codec(
|
129
|
+
name: str,
|
130
|
+
aliases: ta.Collection[str] | None = None,
|
131
|
+
*,
|
132
|
+
append_to: ta.MutableSequence[Codec] | None = None,
|
133
|
+
) -> TextEncodingCodec:
|
134
|
+
codec = TextEncodingCodec(
|
135
|
+
name=check.equal(name, normalize_text_encoding_name(name)),
|
136
|
+
aliases=check.not_isinstance(aliases, str),
|
137
|
+
|
138
|
+
input=str,
|
139
|
+
output=bytes,
|
140
|
+
|
141
|
+
new=functools.partial(TextEncodingComboCodec.lookup, name),
|
142
|
+
new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
|
143
|
+
)
|
144
|
+
|
145
|
+
if append_to is not None:
|
146
|
+
append_to.append(codec)
|
147
|
+
|
148
|
+
return codec
|
149
|
+
|
150
|
+
|
151
|
+
##
|
152
|
+
|
153
|
+
|
154
|
+
ASCII = make_text_encoding_codec(
|
155
|
+
'ascii',
|
156
|
+
['646', 'us-ascii'],
|
157
|
+
append_to=STANDARD_CODECS,
|
158
|
+
)
|
159
|
+
|
160
|
+
LATIN1 = make_text_encoding_codec(
|
161
|
+
'latin-1',
|
162
|
+
['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
|
163
|
+
append_to=STANDARD_CODECS,
|
164
|
+
)
|
165
|
+
|
166
|
+
UTF32 = make_text_encoding_codec(
|
167
|
+
'utf-32',
|
168
|
+
['u32', 'utf32'],
|
169
|
+
append_to=STANDARD_CODECS,
|
170
|
+
)
|
171
|
+
|
172
|
+
UTF32BE = make_text_encoding_codec(
|
173
|
+
'utf-32-be',
|
174
|
+
['utf-32be'],
|
175
|
+
append_to=STANDARD_CODECS,
|
176
|
+
)
|
177
|
+
|
178
|
+
UTF32LE = make_text_encoding_codec(
|
179
|
+
'utf-32-le',
|
180
|
+
['utf-32le'],
|
181
|
+
append_to=STANDARD_CODECS,
|
182
|
+
)
|
183
|
+
|
184
|
+
UTF16 = make_text_encoding_codec(
|
185
|
+
'utf-16',
|
186
|
+
['u16', 'utf16'],
|
187
|
+
append_to=STANDARD_CODECS,
|
188
|
+
)
|
189
|
+
|
190
|
+
UTF16BE = make_text_encoding_codec(
|
191
|
+
'utf-16-be',
|
192
|
+
['utf-16be'],
|
193
|
+
append_to=STANDARD_CODECS,
|
194
|
+
)
|
195
|
+
|
196
|
+
UTF16LE = make_text_encoding_codec(
|
197
|
+
'utf-16-le',
|
198
|
+
['utf-16le'],
|
199
|
+
append_to=STANDARD_CODECS,
|
200
|
+
)
|
201
|
+
|
202
|
+
UTF7 = make_text_encoding_codec(
|
203
|
+
'utf-7',
|
204
|
+
['u7', 'unicode-1-1-utf-7'],
|
205
|
+
append_to=STANDARD_CODECS,
|
206
|
+
)
|
207
|
+
|
208
|
+
UTF8 = make_text_encoding_codec(
|
209
|
+
'utf-8',
|
210
|
+
['u8', 'utf', 'utf8', 'cp65001'],
|
211
|
+
append_to=STANDARD_CODECS,
|
212
|
+
)
|
213
|
+
|
214
|
+
UTF8SIG = make_text_encoding_codec(
|
215
|
+
'utf-8-sig',
|
216
|
+
append_to=STANDARD_CODECS,
|
217
|
+
)
|