omlish 0.0.0.dev132__py3-none-any.whl → 0.0.0.dev177__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- omlish/.manifests.json +265 -7
- omlish/__about__.py +7 -5
- omlish/antlr/_runtime/__init__.py +0 -22
- omlish/antlr/_runtime/_all.py +24 -0
- omlish/antlr/_runtime/atn/ParserATNSimulator.py +1 -1
- omlish/antlr/_runtime/dfa/DFASerializer.py +1 -1
- omlish/antlr/_runtime/error/DiagnosticErrorListener.py +2 -1
- omlish/antlr/_runtime/xpath/XPath.py +7 -1
- omlish/antlr/_runtime/xpath/XPathLexer.py +1 -1
- omlish/antlr/delimit.py +106 -0
- omlish/antlr/dot.py +31 -0
- omlish/antlr/errors.py +11 -0
- omlish/antlr/input.py +96 -0
- omlish/antlr/parsing.py +19 -0
- omlish/antlr/runtime.py +102 -0
- omlish/antlr/utils.py +38 -0
- omlish/argparse/all.py +45 -0
- omlish/{argparse.py → argparse/cli.py} +112 -107
- omlish/asyncs/__init__.py +0 -35
- omlish/asyncs/all.py +35 -0
- omlish/asyncs/asyncio/all.py +7 -0
- omlish/asyncs/asyncio/channels.py +40 -0
- omlish/asyncs/asyncio/streams.py +45 -0
- omlish/asyncs/asyncio/subprocesses.py +238 -0
- omlish/asyncs/asyncio/timeouts.py +16 -0
- omlish/asyncs/bluelet/LICENSE +6 -0
- omlish/asyncs/bluelet/all.py +67 -0
- omlish/asyncs/bluelet/api.py +23 -0
- omlish/asyncs/bluelet/core.py +178 -0
- omlish/asyncs/bluelet/events.py +78 -0
- omlish/asyncs/bluelet/files.py +80 -0
- omlish/asyncs/bluelet/runner.py +416 -0
- omlish/asyncs/bluelet/sockets.py +214 -0
- omlish/bootstrap/sys.py +3 -3
- omlish/cached.py +2 -2
- omlish/check.py +49 -460
- omlish/codecs/__init__.py +72 -0
- omlish/codecs/base.py +106 -0
- omlish/codecs/bytes.py +119 -0
- omlish/codecs/chain.py +23 -0
- omlish/codecs/funcs.py +39 -0
- omlish/codecs/registry.py +139 -0
- omlish/codecs/standard.py +4 -0
- omlish/codecs/text.py +217 -0
- omlish/collections/cache/impl.py +50 -57
- omlish/collections/coerce.py +1 -0
- omlish/collections/mappings.py +1 -1
- omlish/configs/flattening.py +1 -1
- omlish/defs.py +1 -1
- omlish/diag/_pycharm/runhack.py +8 -2
- omlish/diag/procfs.py +8 -8
- omlish/docker/__init__.py +0 -36
- omlish/docker/all.py +31 -0
- omlish/docker/consts.py +4 -0
- omlish/{lite/docker.py → docker/detect.py} +18 -0
- omlish/docker/{helpers.py → timebomb.py} +0 -21
- omlish/formats/cbor.py +31 -0
- omlish/formats/cloudpickle.py +31 -0
- omlish/formats/codecs.py +93 -0
- omlish/formats/json/codecs.py +29 -0
- omlish/formats/json/delimted.py +4 -0
- omlish/formats/json/stream/errors.py +2 -0
- omlish/formats/json/stream/lex.py +12 -6
- omlish/formats/json/stream/parse.py +38 -22
- omlish/formats/json5.py +31 -0
- omlish/formats/pickle.py +31 -0
- omlish/formats/repr.py +25 -0
- omlish/formats/toml.py +17 -0
- omlish/formats/yaml.py +25 -0
- omlish/funcs/__init__.py +0 -0
- omlish/{genmachine.py → funcs/genmachine.py} +5 -4
- omlish/{matchfns.py → funcs/match.py} +1 -1
- omlish/funcs/pairs.py +215 -0
- omlish/http/__init__.py +0 -48
- omlish/http/all.py +48 -0
- omlish/http/coro/__init__.py +0 -0
- omlish/{lite/fdio/corohttp.py → http/coro/fdio.py} +21 -19
- omlish/{lite/http/coroserver.py → http/coro/server.py} +20 -21
- omlish/{lite/http → http}/handlers.py +3 -2
- omlish/{lite/http → http}/parsing.py +1 -0
- omlish/http/sessions.py +1 -1
- omlish/{lite/http → http}/versions.py +1 -0
- omlish/inject/managed.py +2 -2
- omlish/io/__init__.py +0 -3
- omlish/{lite/io.py → io/buffers.py} +8 -9
- omlish/io/compress/__init__.py +9 -0
- omlish/io/compress/abc.py +104 -0
- omlish/io/compress/adapters.py +148 -0
- omlish/io/compress/base.py +24 -0
- omlish/io/compress/brotli.py +47 -0
- omlish/io/compress/bz2.py +61 -0
- omlish/io/compress/codecs.py +78 -0
- omlish/io/compress/gzip.py +350 -0
- omlish/io/compress/lz4.py +91 -0
- omlish/io/compress/lzma.py +81 -0
- omlish/io/compress/snappy.py +34 -0
- omlish/io/compress/zlib.py +74 -0
- omlish/io/compress/zstd.py +44 -0
- omlish/io/fdio/__init__.py +1 -0
- omlish/{lite → io}/fdio/handlers.py +5 -5
- omlish/{lite → io}/fdio/kqueue.py +8 -8
- omlish/{lite → io}/fdio/manager.py +7 -7
- omlish/{lite → io}/fdio/pollers.py +13 -13
- omlish/io/generators/__init__.py +56 -0
- omlish/io/generators/consts.py +1 -0
- omlish/io/generators/direct.py +13 -0
- omlish/io/generators/readers.py +189 -0
- omlish/io/generators/stepped.py +191 -0
- omlish/io/pyio.py +5 -2
- omlish/iterators/__init__.py +24 -0
- omlish/iterators/iterators.py +132 -0
- omlish/iterators/recipes.py +18 -0
- omlish/iterators/tools.py +96 -0
- omlish/iterators/unique.py +67 -0
- omlish/lang/__init__.py +13 -1
- omlish/lang/functions.py +11 -2
- omlish/lang/generators.py +243 -0
- omlish/lang/iterables.py +46 -49
- omlish/lang/maybes.py +4 -4
- omlish/lite/cached.py +39 -6
- omlish/lite/check.py +438 -75
- omlish/lite/contextmanagers.py +17 -4
- omlish/lite/dataclasses.py +42 -0
- omlish/lite/inject.py +28 -45
- omlish/lite/logs.py +0 -270
- omlish/lite/marshal.py +309 -144
- omlish/lite/pycharm.py +47 -0
- omlish/lite/reflect.py +33 -0
- omlish/lite/resources.py +8 -0
- omlish/lite/runtime.py +4 -4
- omlish/lite/shlex.py +12 -0
- omlish/lite/socketserver.py +2 -2
- omlish/lite/strings.py +31 -0
- omlish/logs/__init__.py +0 -32
- omlish/logs/{_abc.py → abc.py} +0 -1
- omlish/logs/all.py +37 -0
- omlish/logs/{formatters.py → color.py} +1 -2
- omlish/logs/configs.py +7 -38
- omlish/logs/filters.py +10 -0
- omlish/logs/handlers.py +4 -1
- omlish/logs/json.py +56 -0
- omlish/logs/proxy.py +99 -0
- omlish/logs/standard.py +128 -0
- omlish/logs/utils.py +2 -2
- omlish/manifests/__init__.py +2 -0
- omlish/manifests/load.py +209 -0
- omlish/manifests/types.py +17 -0
- omlish/marshal/base.py +1 -1
- omlish/marshal/factories.py +1 -1
- omlish/marshal/forbidden.py +1 -1
- omlish/marshal/iterables.py +1 -1
- omlish/marshal/literals.py +50 -0
- omlish/marshal/mappings.py +1 -1
- omlish/marshal/maybes.py +1 -1
- omlish/marshal/standard.py +5 -1
- omlish/marshal/unions.py +1 -1
- omlish/os/__init__.py +0 -0
- omlish/os/atomics.py +205 -0
- omlish/os/deathsig.py +23 -0
- omlish/{os.py → os/files.py} +0 -9
- omlish/{lite → os}/journald.py +2 -1
- omlish/os/linux.py +484 -0
- omlish/os/paths.py +36 -0
- omlish/{lite → os}/pidfile.py +1 -0
- omlish/os/sizes.py +9 -0
- omlish/reflect/__init__.py +3 -0
- omlish/reflect/subst.py +2 -1
- omlish/reflect/types.py +126 -44
- omlish/secrets/pwhash.py +1 -1
- omlish/secrets/subprocesses.py +3 -1
- omlish/specs/jsonrpc/marshal.py +1 -1
- omlish/specs/openapi/marshal.py +1 -1
- omlish/sql/alchemy/asyncs.py +1 -1
- omlish/sql/queries/__init__.py +9 -1
- omlish/sql/queries/building.py +3 -0
- omlish/sql/queries/exprs.py +10 -27
- omlish/sql/queries/idents.py +48 -10
- omlish/sql/queries/names.py +80 -13
- omlish/sql/queries/params.py +64 -0
- omlish/sql/queries/rendering.py +1 -1
- omlish/subprocesses.py +340 -0
- omlish/term.py +29 -14
- omlish/testing/pytest/marks.py +2 -2
- omlish/testing/pytest/plugins/asyncs.py +6 -1
- omlish/testing/pytest/plugins/logging.py +1 -1
- omlish/testing/pytest/plugins/switches.py +1 -1
- {omlish-0.0.0.dev132.dist-info → omlish-0.0.0.dev177.dist-info}/METADATA +13 -11
- {omlish-0.0.0.dev132.dist-info → omlish-0.0.0.dev177.dist-info}/RECORD +200 -117
- omlish/fnpairs.py +0 -496
- omlish/formats/json/cli/__main__.py +0 -11
- omlish/formats/json/cli/cli.py +0 -298
- omlish/formats/json/cli/formats.py +0 -71
- omlish/formats/json/cli/io.py +0 -74
- omlish/formats/json/cli/parsing.py +0 -82
- omlish/formats/json/cli/processing.py +0 -48
- omlish/formats/json/cli/rendering.py +0 -92
- omlish/iterators.py +0 -300
- omlish/lite/subprocesses.py +0 -130
- /omlish/{formats/json/cli → argparse}/__init__.py +0 -0
- /omlish/{lite/fdio → asyncs/asyncio}/__init__.py +0 -0
- /omlish/asyncs/{asyncio.py → asyncio/asyncio.py} +0 -0
- /omlish/{lite/http → asyncs/bluelet}/__init__.py +0 -0
- /omlish/collections/{_abc.py → abc.py} +0 -0
- /omlish/{fnpipes.py → funcs/pipes.py} +0 -0
- /omlish/io/{_abc.py → abc.py} +0 -0
- /omlish/sql/{_abc.py → abc.py} +0 -0
- {omlish-0.0.0.dev132.dist-info → omlish-0.0.0.dev177.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev132.dist-info → omlish-0.0.0.dev177.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev132.dist-info → omlish-0.0.0.dev177.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev132.dist-info → omlish-0.0.0.dev177.dist-info}/top_level.txt +0 -0
omlish/codecs/base.py
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
"""
|
2
|
+
TODO:
|
3
|
+
- bytes-like - bytearray, memoryview
|
4
|
+
"""
|
5
|
+
import abc
|
6
|
+
import typing as ta
|
7
|
+
|
8
|
+
from omlish import check
|
9
|
+
from omlish import dataclasses as dc
|
10
|
+
from omlish import lang
|
11
|
+
from omlish import reflect as rfl
|
12
|
+
from omlish.funcs import pairs as fps
|
13
|
+
|
14
|
+
|
15
|
+
I = ta.TypeVar('I')
|
16
|
+
O = ta.TypeVar('O')
|
17
|
+
|
18
|
+
|
19
|
+
##
|
20
|
+
|
21
|
+
|
22
|
+
class EagerCodec(lang.Abstract, ta.Generic[I, O]):
|
23
|
+
@abc.abstractmethod
|
24
|
+
def encode(self, i: I) -> O:
|
25
|
+
raise NotImplementedError
|
26
|
+
|
27
|
+
@abc.abstractmethod
|
28
|
+
def decode(self, o: O) -> I:
|
29
|
+
raise NotImplementedError
|
30
|
+
|
31
|
+
def as_pair(self) -> fps.FnPair[I, O]:
|
32
|
+
return fps.of(self.encode, self.decode)
|
33
|
+
|
34
|
+
|
35
|
+
class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
|
36
|
+
@abc.abstractmethod
|
37
|
+
def encode_incremental(self) -> ta.Generator[O | None, I, None]:
|
38
|
+
raise NotImplementedError
|
39
|
+
|
40
|
+
@abc.abstractmethod
|
41
|
+
def decode_incremental(self) -> ta.Generator[I | None, O, None]:
|
42
|
+
raise NotImplementedError
|
43
|
+
|
44
|
+
|
45
|
+
class ComboCodec( # noqa
|
46
|
+
EagerCodec[I, O],
|
47
|
+
IncrementalCodec[I, O],
|
48
|
+
lang.Abstract,
|
49
|
+
ta.Generic[I, O],
|
50
|
+
):
|
51
|
+
pass
|
52
|
+
|
53
|
+
|
54
|
+
##
|
55
|
+
|
56
|
+
|
57
|
+
def check_codec_name(s: str) -> str:
|
58
|
+
check.non_empty_str(s)
|
59
|
+
check.not_in('_', s)
|
60
|
+
check.equal(s.strip(), s)
|
61
|
+
return s
|
62
|
+
|
63
|
+
|
64
|
+
##
|
65
|
+
|
66
|
+
|
67
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
68
|
+
class Codec:
|
69
|
+
name: str = dc.xfield(coerce=check_codec_name)
|
70
|
+
aliases: ta.Collection[str] | None = dc.xfield(
|
71
|
+
default=None,
|
72
|
+
coerce=lang.opt_fn(lambda s: [check_codec_name(a) for a in s]), # type: ignore
|
73
|
+
)
|
74
|
+
|
75
|
+
input: rfl.Type = dc.xfield(coerce=rfl.type_)
|
76
|
+
output: rfl.Type = dc.xfield(coerce=rfl.type_)
|
77
|
+
|
78
|
+
options: type | None = None
|
79
|
+
|
80
|
+
new: ta.Callable[..., EagerCodec]
|
81
|
+
new_incremental: ta.Callable[..., IncrementalCodec] | None = None
|
82
|
+
|
83
|
+
|
84
|
+
##
|
85
|
+
|
86
|
+
|
87
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
88
|
+
class LazyLoadedCodec:
|
89
|
+
mod_name: str
|
90
|
+
attr_name: str
|
91
|
+
name: str
|
92
|
+
aliases: ta.Collection[str] | None = None
|
93
|
+
|
94
|
+
@classmethod
|
95
|
+
def new(
|
96
|
+
cls,
|
97
|
+
mod_name: str,
|
98
|
+
attr_name: str,
|
99
|
+
codec: Codec,
|
100
|
+
) -> 'LazyLoadedCodec':
|
101
|
+
return cls(
|
102
|
+
mod_name=mod_name,
|
103
|
+
attr_name=attr_name,
|
104
|
+
name=codec.name,
|
105
|
+
aliases=codec.aliases,
|
106
|
+
)
|
omlish/codecs/bytes.py
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
"""
|
2
|
+
TODO:
|
3
|
+
- options / kwargs
|
4
|
+
"""
|
5
|
+
import base64
|
6
|
+
import binascii
|
7
|
+
import typing as ta
|
8
|
+
|
9
|
+
from .. import check
|
10
|
+
from .base import Codec
|
11
|
+
from .funcs import FnPairEagerCodec
|
12
|
+
from .standard import STANDARD_CODECS
|
13
|
+
|
14
|
+
|
15
|
+
##
|
16
|
+
|
17
|
+
|
18
|
+
class BytesCodec(Codec):
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
def make_bytes_encoding_codec(
|
23
|
+
name: str,
|
24
|
+
aliases: ta.Collection[str] | None,
|
25
|
+
encode: ta.Callable[[bytes], bytes],
|
26
|
+
decode: ta.Callable[[bytes], bytes],
|
27
|
+
*,
|
28
|
+
append_to: ta.MutableSequence[Codec] | None = None,
|
29
|
+
) -> BytesCodec:
|
30
|
+
codec = BytesCodec(
|
31
|
+
name=name,
|
32
|
+
aliases=check.not_isinstance(aliases, str),
|
33
|
+
|
34
|
+
input=bytes,
|
35
|
+
output=bytes,
|
36
|
+
|
37
|
+
new=lambda: FnPairEagerCodec.of(encode, decode),
|
38
|
+
)
|
39
|
+
|
40
|
+
if append_to is not None:
|
41
|
+
append_to.append(codec)
|
42
|
+
|
43
|
+
return codec
|
44
|
+
|
45
|
+
|
46
|
+
##
|
47
|
+
|
48
|
+
|
49
|
+
ASCII85 = make_bytes_encoding_codec(
|
50
|
+
'ascii85',
|
51
|
+
['a85'],
|
52
|
+
base64.a85encode,
|
53
|
+
base64.a85decode,
|
54
|
+
append_to=STANDARD_CODECS,
|
55
|
+
)
|
56
|
+
|
57
|
+
BASE16 = make_bytes_encoding_codec(
|
58
|
+
'base16',
|
59
|
+
['b16'],
|
60
|
+
base64.b16encode,
|
61
|
+
base64.b16decode,
|
62
|
+
append_to=STANDARD_CODECS,
|
63
|
+
)
|
64
|
+
|
65
|
+
BASE32 = make_bytes_encoding_codec(
|
66
|
+
'base32',
|
67
|
+
['b32'],
|
68
|
+
base64.b32encode,
|
69
|
+
base64.b32decode,
|
70
|
+
append_to=STANDARD_CODECS,
|
71
|
+
)
|
72
|
+
|
73
|
+
BASE64 = make_bytes_encoding_codec(
|
74
|
+
'base64',
|
75
|
+
['b64'],
|
76
|
+
base64.b64encode,
|
77
|
+
base64.b64decode,
|
78
|
+
append_to=STANDARD_CODECS,
|
79
|
+
)
|
80
|
+
|
81
|
+
BASE85 = make_bytes_encoding_codec(
|
82
|
+
'base85',
|
83
|
+
['b85'],
|
84
|
+
base64.b85encode,
|
85
|
+
base64.b85decode,
|
86
|
+
append_to=STANDARD_CODECS,
|
87
|
+
)
|
88
|
+
|
89
|
+
BASE32_HEX = make_bytes_encoding_codec(
|
90
|
+
'base32-hex',
|
91
|
+
['b32-hex'],
|
92
|
+
base64.b32hexencode,
|
93
|
+
base64.b32hexdecode,
|
94
|
+
append_to=STANDARD_CODECS,
|
95
|
+
)
|
96
|
+
|
97
|
+
BASE64_HEX = make_bytes_encoding_codec(
|
98
|
+
'base64-hex',
|
99
|
+
['b64-hex'],
|
100
|
+
base64.standard_b64encode,
|
101
|
+
base64.standard_b64decode,
|
102
|
+
append_to=STANDARD_CODECS,
|
103
|
+
)
|
104
|
+
|
105
|
+
BASE64_URLSAFE = make_bytes_encoding_codec(
|
106
|
+
'base64-urlsafe',
|
107
|
+
['b64-urlsafe'],
|
108
|
+
base64.urlsafe_b64encode,
|
109
|
+
base64.urlsafe_b64decode,
|
110
|
+
append_to=STANDARD_CODECS,
|
111
|
+
)
|
112
|
+
|
113
|
+
HEX = make_bytes_encoding_codec(
|
114
|
+
'hex',
|
115
|
+
[],
|
116
|
+
binascii.b2a_hex,
|
117
|
+
binascii.a2b_hex,
|
118
|
+
append_to=STANDARD_CODECS,
|
119
|
+
)
|
omlish/codecs/chain.py
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
import dataclasses as dc
|
2
|
+
import typing as ta
|
3
|
+
|
4
|
+
from .base import EagerCodec
|
5
|
+
|
6
|
+
|
7
|
+
@dc.dataclass(frozen=True)
|
8
|
+
class ChainEagerCodec(EagerCodec[ta.Any, ta.Any]):
|
9
|
+
codecs: ta.Sequence[EagerCodec]
|
10
|
+
|
11
|
+
def encode(self, v: ta.Any) -> ta.Any:
|
12
|
+
for c in self.codecs:
|
13
|
+
v = c.encode(v)
|
14
|
+
return v
|
15
|
+
|
16
|
+
def decode(self, v: ta.Any) -> ta.Any:
|
17
|
+
for c in reversed(self.codecs):
|
18
|
+
v = c.decode(v)
|
19
|
+
return v
|
20
|
+
|
21
|
+
|
22
|
+
def chain(*codecs: EagerCodec) -> ChainEagerCodec:
|
23
|
+
return ChainEagerCodec(codecs)
|
omlish/codecs/funcs.py
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
import dataclasses as dc
|
2
|
+
import typing as ta
|
3
|
+
|
4
|
+
from ..funcs import pairs as fps
|
5
|
+
from .base import EagerCodec
|
6
|
+
|
7
|
+
|
8
|
+
I = ta.TypeVar('I')
|
9
|
+
O = ta.TypeVar('O')
|
10
|
+
|
11
|
+
|
12
|
+
@dc.dataclass(frozen=True)
|
13
|
+
class FnPairEagerCodec(EagerCodec[I, O]):
|
14
|
+
fp: fps.FnPair[I, O]
|
15
|
+
|
16
|
+
def encode(self, i: I) -> O:
|
17
|
+
return self.fp.forward(i)
|
18
|
+
|
19
|
+
def decode(self, o: O) -> I:
|
20
|
+
return self.fp.backward(o)
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def of(
|
24
|
+
cls,
|
25
|
+
encode: ta.Callable[[I], O],
|
26
|
+
decode: ta.Callable[[O], I],
|
27
|
+
) -> 'FnPairEagerCodec[I, O]':
|
28
|
+
return cls(fps.of(encode, decode))
|
29
|
+
|
30
|
+
|
31
|
+
def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
|
32
|
+
return FnPairEagerCodec(fp)
|
33
|
+
|
34
|
+
|
35
|
+
def of(
|
36
|
+
encode: ta.Callable[[I], O],
|
37
|
+
decode: ta.Callable[[O], I],
|
38
|
+
) -> FnPairEagerCodec[I, O]:
|
39
|
+
return FnPairEagerCodec(fps.of(encode, decode))
|
@@ -0,0 +1,139 @@
|
|
1
|
+
import contextlib
|
2
|
+
import importlib
|
3
|
+
import threading
|
4
|
+
import typing as ta
|
5
|
+
|
6
|
+
from .. import cached
|
7
|
+
from .. import check
|
8
|
+
from .. import lang
|
9
|
+
from .base import Codec
|
10
|
+
from .base import LazyLoadedCodec
|
11
|
+
from .standard import STANDARD_CODECS
|
12
|
+
|
13
|
+
|
14
|
+
if ta.TYPE_CHECKING:
|
15
|
+
from ..manifests import load as manifest_load
|
16
|
+
else:
|
17
|
+
manifest_load = lang.proxy_import('..manifests.load', __package__)
|
18
|
+
|
19
|
+
|
20
|
+
##
|
21
|
+
|
22
|
+
|
23
|
+
class CodecRegistry:
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
*,
|
27
|
+
late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
|
28
|
+
) -> None:
|
29
|
+
super().__init__()
|
30
|
+
|
31
|
+
self._late_load_callbacks = late_load_callbacks
|
32
|
+
|
33
|
+
self._lock = threading.RLock()
|
34
|
+
self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
|
35
|
+
self._names_by_alias: dict[str, str] = {}
|
36
|
+
self._names_by_cls: dict[type, list[str]] = {}
|
37
|
+
|
38
|
+
def _late_load(self) -> None:
|
39
|
+
if self._late_load_callbacks:
|
40
|
+
for cb in self._late_load_callbacks:
|
41
|
+
cb(self)
|
42
|
+
self._late_load_callbacks = None
|
43
|
+
|
44
|
+
@contextlib.contextmanager
|
45
|
+
def _lock_and_load(self) -> ta.Iterator[None]:
|
46
|
+
with self._lock:
|
47
|
+
self._late_load()
|
48
|
+
yield
|
49
|
+
|
50
|
+
def _post_load(self, codec: Codec) -> None:
|
51
|
+
for t in type(codec).__mro__:
|
52
|
+
if t is not object:
|
53
|
+
self._names_by_cls.setdefault(t, []).append(codec.name)
|
54
|
+
|
55
|
+
def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
|
56
|
+
with self._lock:
|
57
|
+
for codec in codecs:
|
58
|
+
for n in {codec.name, *(codec.aliases or [])}:
|
59
|
+
if n in self._names_by_alias:
|
60
|
+
raise KeyError(n)
|
61
|
+
|
62
|
+
for codec in codecs:
|
63
|
+
self._by_name[codec.name] = codec
|
64
|
+
for n in {codec.name, *(codec.aliases or [])}:
|
65
|
+
self._names_by_alias[n] = codec.name
|
66
|
+
if isinstance(codec, Codec):
|
67
|
+
self._post_load(codec)
|
68
|
+
|
69
|
+
return self
|
70
|
+
|
71
|
+
def lookup(self, name_or_alias: str) -> Codec:
|
72
|
+
with self._lock_and_load():
|
73
|
+
name = self._names_by_alias[name_or_alias.replace('_', '-')]
|
74
|
+
codec_or_lazy = self._by_name[name]
|
75
|
+
|
76
|
+
if isinstance(codec_or_lazy, LazyLoadedCodec):
|
77
|
+
mod = importlib.import_module(codec_or_lazy.mod_name)
|
78
|
+
codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
|
79
|
+
self._by_name[name] = codec
|
80
|
+
self._post_load(codec)
|
81
|
+
else:
|
82
|
+
codec = check.isinstance(codec_or_lazy, Codec)
|
83
|
+
|
84
|
+
return codec
|
85
|
+
|
86
|
+
def lookup_type(self, cls: type) -> list[Codec]:
|
87
|
+
with self._lock_and_load():
|
88
|
+
return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
|
89
|
+
|
90
|
+
def all(self) -> frozenset[str]:
|
91
|
+
with self._lock_and_load():
|
92
|
+
return frozenset(self._by_name)
|
93
|
+
|
94
|
+
|
95
|
+
##
|
96
|
+
|
97
|
+
|
98
|
+
def _install_standard_codecs(registry: CodecRegistry) -> None:
|
99
|
+
registry.register(*STANDARD_CODECS)
|
100
|
+
|
101
|
+
|
102
|
+
##
|
103
|
+
|
104
|
+
|
105
|
+
@cached.function
|
106
|
+
def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
|
107
|
+
ldr = manifest_load.MANIFEST_LOADER
|
108
|
+
pkgs = {__package__.split('.')[0], *ldr.discover()}
|
109
|
+
mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
|
110
|
+
return [m.value for m in mns]
|
111
|
+
|
112
|
+
|
113
|
+
def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
|
114
|
+
registry.register(*_build_manifest_lazy_loaded_codecs())
|
115
|
+
|
116
|
+
|
117
|
+
##
|
118
|
+
|
119
|
+
|
120
|
+
REGISTRY = CodecRegistry(
|
121
|
+
late_load_callbacks=[
|
122
|
+
_install_standard_codecs,
|
123
|
+
_install_manifest_lazy_loaded_codecs,
|
124
|
+
],
|
125
|
+
)
|
126
|
+
|
127
|
+
register = REGISTRY.register
|
128
|
+
lookup = REGISTRY.lookup
|
129
|
+
|
130
|
+
|
131
|
+
##
|
132
|
+
|
133
|
+
|
134
|
+
def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
|
135
|
+
return lookup(name).new(**kwargs).encode(i)
|
136
|
+
|
137
|
+
|
138
|
+
def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
|
139
|
+
return lookup(name).new(**kwargs).decode(o)
|
omlish/codecs/text.py
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
import codecs
|
2
|
+
import dataclasses as dc
|
3
|
+
import functools
|
4
|
+
import typing as ta
|
5
|
+
|
6
|
+
from omlish import check
|
7
|
+
|
8
|
+
from .base import Codec
|
9
|
+
from .base import ComboCodec
|
10
|
+
from .standard import STANDARD_CODECS
|
11
|
+
|
12
|
+
|
13
|
+
##
|
14
|
+
|
15
|
+
|
16
|
+
TextEncodingErrors: ta.TypeAlias = ta.Literal[
|
17
|
+
# Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
|
18
|
+
'strict',
|
19
|
+
|
20
|
+
# Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
|
21
|
+
'ignore',
|
22
|
+
|
23
|
+
# Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
|
24
|
+
# REPLACEMENT CHARACTER). Implemented in replace_errors().
|
25
|
+
'replace',
|
26
|
+
|
27
|
+
# Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
|
28
|
+
# \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
|
29
|
+
# backslashreplace_errors().
|
30
|
+
'backslashreplace',
|
31
|
+
|
32
|
+
# On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
|
33
|
+
# turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
|
34
|
+
# 383 for more.)
|
35
|
+
'surrogateescape',
|
36
|
+
|
37
|
+
##
|
38
|
+
# The following error handlers are only applicable to encoding (within text encodings):
|
39
|
+
|
40
|
+
# Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
|
41
|
+
# &#num;. Implemented in xmlcharrefreplace_errors().
|
42
|
+
'xmlcharrefreplace',
|
43
|
+
|
44
|
+
# Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
|
45
|
+
# Database. Implemented in namereplace_errors().
|
46
|
+
'namereplace',
|
47
|
+
|
48
|
+
##
|
49
|
+
# In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
|
50
|
+
# utf-16-le, utf-32-be, utf-32-le
|
51
|
+
|
52
|
+
# Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
|
53
|
+
# treat the presence of surrogate code point in str as an error.
|
54
|
+
'surrogatepass',
|
55
|
+
]
|
56
|
+
|
57
|
+
|
58
|
+
@dc.dataclass(frozen=True, kw_only=True)
|
59
|
+
class TextEncodingOptions:
|
60
|
+
errors: TextEncodingErrors = 'strict'
|
61
|
+
|
62
|
+
|
63
|
+
##
|
64
|
+
|
65
|
+
|
66
|
+
class TextEncodingComboCodec(ComboCodec[str, bytes]):
|
67
|
+
def __init__(
|
68
|
+
self,
|
69
|
+
info: codecs.CodecInfo,
|
70
|
+
options: TextEncodingOptions = TextEncodingOptions(),
|
71
|
+
) -> None:
|
72
|
+
super().__init__()
|
73
|
+
self._info = check.isinstance(info, codecs.CodecInfo)
|
74
|
+
self._opts = check.isinstance(options, TextEncodingOptions)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def lookup(
|
78
|
+
cls,
|
79
|
+
name: str,
|
80
|
+
options: TextEncodingOptions = TextEncodingOptions(),
|
81
|
+
) -> 'TextEncodingComboCodec':
|
82
|
+
return cls(codecs.lookup(name), options)
|
83
|
+
|
84
|
+
def encode(self, i: str) -> bytes:
|
85
|
+
o, _ = self._info.encode(i, self._opts.errors)
|
86
|
+
return o
|
87
|
+
|
88
|
+
def decode(self, o: bytes) -> str:
|
89
|
+
i, _ = self._info.decode(o, self._opts.errors)
|
90
|
+
return i
|
91
|
+
|
92
|
+
def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
|
93
|
+
x = self._info.incrementalencoder(self._opts.errors)
|
94
|
+
i = yield None
|
95
|
+
while True:
|
96
|
+
if not i:
|
97
|
+
break
|
98
|
+
o = x.encode(i)
|
99
|
+
i = yield o or None
|
100
|
+
o = x.encode(i, final=True)
|
101
|
+
yield o
|
102
|
+
|
103
|
+
def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
|
104
|
+
x = self._info.incrementaldecoder(self._opts.errors)
|
105
|
+
i = yield None
|
106
|
+
while True:
|
107
|
+
if not i:
|
108
|
+
break
|
109
|
+
o = x.decode(i)
|
110
|
+
i = yield o or None
|
111
|
+
o = x.decode(i, final=True)
|
112
|
+
yield o
|
113
|
+
|
114
|
+
|
115
|
+
##
|
116
|
+
|
117
|
+
|
118
|
+
class TextEncodingCodec(Codec):
|
119
|
+
pass
|
120
|
+
|
121
|
+
|
122
|
+
def normalize_text_encoding_name(s: str) -> str:
|
123
|
+
if ' ' in s:
|
124
|
+
raise NameError(s)
|
125
|
+
return s.lower().replace('_', '-')
|
126
|
+
|
127
|
+
|
128
|
+
def make_text_encoding_codec(
|
129
|
+
name: str,
|
130
|
+
aliases: ta.Collection[str] | None = None,
|
131
|
+
*,
|
132
|
+
append_to: ta.MutableSequence[Codec] | None = None,
|
133
|
+
) -> TextEncodingCodec:
|
134
|
+
codec = TextEncodingCodec(
|
135
|
+
name=check.equal(name, normalize_text_encoding_name(name)),
|
136
|
+
aliases=check.not_isinstance(aliases, str),
|
137
|
+
|
138
|
+
input=str,
|
139
|
+
output=bytes,
|
140
|
+
|
141
|
+
new=functools.partial(TextEncodingComboCodec.lookup, name),
|
142
|
+
new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
|
143
|
+
)
|
144
|
+
|
145
|
+
if append_to is not None:
|
146
|
+
append_to.append(codec)
|
147
|
+
|
148
|
+
return codec
|
149
|
+
|
150
|
+
|
151
|
+
##
|
152
|
+
|
153
|
+
|
154
|
+
ASCII = make_text_encoding_codec(
|
155
|
+
'ascii',
|
156
|
+
['646', 'us-ascii'],
|
157
|
+
append_to=STANDARD_CODECS,
|
158
|
+
)
|
159
|
+
|
160
|
+
LATIN1 = make_text_encoding_codec(
|
161
|
+
'latin-1',
|
162
|
+
['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
|
163
|
+
append_to=STANDARD_CODECS,
|
164
|
+
)
|
165
|
+
|
166
|
+
UTF32 = make_text_encoding_codec(
|
167
|
+
'utf-32',
|
168
|
+
['u32', 'utf32'],
|
169
|
+
append_to=STANDARD_CODECS,
|
170
|
+
)
|
171
|
+
|
172
|
+
UTF32BE = make_text_encoding_codec(
|
173
|
+
'utf-32-be',
|
174
|
+
['utf-32be'],
|
175
|
+
append_to=STANDARD_CODECS,
|
176
|
+
)
|
177
|
+
|
178
|
+
UTF32LE = make_text_encoding_codec(
|
179
|
+
'utf-32-le',
|
180
|
+
['utf-32le'],
|
181
|
+
append_to=STANDARD_CODECS,
|
182
|
+
)
|
183
|
+
|
184
|
+
UTF16 = make_text_encoding_codec(
|
185
|
+
'utf-16',
|
186
|
+
['u16', 'utf16'],
|
187
|
+
append_to=STANDARD_CODECS,
|
188
|
+
)
|
189
|
+
|
190
|
+
UTF16BE = make_text_encoding_codec(
|
191
|
+
'utf-16-be',
|
192
|
+
['utf-16be'],
|
193
|
+
append_to=STANDARD_CODECS,
|
194
|
+
)
|
195
|
+
|
196
|
+
UTF16LE = make_text_encoding_codec(
|
197
|
+
'utf-16-le',
|
198
|
+
['utf-16le'],
|
199
|
+
append_to=STANDARD_CODECS,
|
200
|
+
)
|
201
|
+
|
202
|
+
UTF7 = make_text_encoding_codec(
|
203
|
+
'utf-7',
|
204
|
+
['u7', 'unicode-1-1-utf-7'],
|
205
|
+
append_to=STANDARD_CODECS,
|
206
|
+
)
|
207
|
+
|
208
|
+
UTF8 = make_text_encoding_codec(
|
209
|
+
'utf-8',
|
210
|
+
['u8', 'utf', 'utf8', 'cp65001'],
|
211
|
+
append_to=STANDARD_CODECS,
|
212
|
+
)
|
213
|
+
|
214
|
+
UTF8SIG = make_text_encoding_codec(
|
215
|
+
'utf-8-sig',
|
216
|
+
append_to=STANDARD_CODECS,
|
217
|
+
)
|