omlish 0.0.0.dev133__py3-none-any.whl → 0.0.0.dev177__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. omlish/.manifests.json +265 -7
  2. omlish/__about__.py +5 -3
  3. omlish/antlr/_runtime/__init__.py +0 -22
  4. omlish/antlr/_runtime/_all.py +24 -0
  5. omlish/antlr/_runtime/atn/ParserATNSimulator.py +1 -1
  6. omlish/antlr/_runtime/dfa/DFASerializer.py +1 -1
  7. omlish/antlr/_runtime/error/DiagnosticErrorListener.py +2 -1
  8. omlish/antlr/_runtime/xpath/XPath.py +7 -1
  9. omlish/antlr/_runtime/xpath/XPathLexer.py +1 -1
  10. omlish/antlr/delimit.py +106 -0
  11. omlish/antlr/dot.py +31 -0
  12. omlish/antlr/errors.py +11 -0
  13. omlish/antlr/input.py +96 -0
  14. omlish/antlr/parsing.py +19 -0
  15. omlish/antlr/runtime.py +102 -0
  16. omlish/antlr/utils.py +38 -0
  17. omlish/argparse/all.py +45 -0
  18. omlish/{argparse.py → argparse/cli.py} +112 -107
  19. omlish/asyncs/__init__.py +0 -35
  20. omlish/asyncs/all.py +35 -0
  21. omlish/asyncs/asyncio/all.py +7 -0
  22. omlish/asyncs/asyncio/channels.py +40 -0
  23. omlish/asyncs/asyncio/streams.py +45 -0
  24. omlish/asyncs/asyncio/subprocesses.py +238 -0
  25. omlish/asyncs/asyncio/timeouts.py +16 -0
  26. omlish/asyncs/bluelet/LICENSE +6 -0
  27. omlish/asyncs/bluelet/all.py +67 -0
  28. omlish/asyncs/bluelet/api.py +23 -0
  29. omlish/asyncs/bluelet/core.py +178 -0
  30. omlish/asyncs/bluelet/events.py +78 -0
  31. omlish/asyncs/bluelet/files.py +80 -0
  32. omlish/asyncs/bluelet/runner.py +416 -0
  33. omlish/asyncs/bluelet/sockets.py +214 -0
  34. omlish/bootstrap/sys.py +3 -3
  35. omlish/cached.py +2 -2
  36. omlish/check.py +49 -460
  37. omlish/codecs/__init__.py +72 -0
  38. omlish/codecs/base.py +106 -0
  39. omlish/codecs/bytes.py +119 -0
  40. omlish/codecs/chain.py +23 -0
  41. omlish/codecs/funcs.py +39 -0
  42. omlish/codecs/registry.py +139 -0
  43. omlish/codecs/standard.py +4 -0
  44. omlish/codecs/text.py +217 -0
  45. omlish/collections/cache/impl.py +50 -57
  46. omlish/collections/coerce.py +1 -0
  47. omlish/collections/mappings.py +1 -1
  48. omlish/configs/flattening.py +1 -1
  49. omlish/defs.py +1 -1
  50. omlish/diag/_pycharm/runhack.py +8 -2
  51. omlish/diag/procfs.py +8 -8
  52. omlish/docker/__init__.py +0 -36
  53. omlish/docker/all.py +31 -0
  54. omlish/docker/consts.py +4 -0
  55. omlish/{lite/docker.py → docker/detect.py} +18 -0
  56. omlish/docker/{helpers.py → timebomb.py} +0 -21
  57. omlish/formats/cbor.py +31 -0
  58. omlish/formats/cloudpickle.py +31 -0
  59. omlish/formats/codecs.py +93 -0
  60. omlish/formats/json/codecs.py +29 -0
  61. omlish/formats/json/delimted.py +4 -0
  62. omlish/formats/json/stream/errors.py +2 -0
  63. omlish/formats/json/stream/lex.py +12 -6
  64. omlish/formats/json/stream/parse.py +38 -22
  65. omlish/formats/json5.py +31 -0
  66. omlish/formats/pickle.py +31 -0
  67. omlish/formats/repr.py +25 -0
  68. omlish/formats/toml.py +17 -0
  69. omlish/formats/yaml.py +25 -0
  70. omlish/funcs/__init__.py +0 -0
  71. omlish/{genmachine.py → funcs/genmachine.py} +5 -4
  72. omlish/{matchfns.py → funcs/match.py} +1 -1
  73. omlish/funcs/pairs.py +215 -0
  74. omlish/http/__init__.py +0 -48
  75. omlish/http/all.py +48 -0
  76. omlish/http/coro/__init__.py +0 -0
  77. omlish/{lite/fdio/corohttp.py → http/coro/fdio.py} +21 -19
  78. omlish/{lite/http/coroserver.py → http/coro/server.py} +20 -21
  79. omlish/{lite/http → http}/handlers.py +3 -2
  80. omlish/{lite/http → http}/parsing.py +1 -0
  81. omlish/http/sessions.py +1 -1
  82. omlish/{lite/http → http}/versions.py +1 -0
  83. omlish/inject/managed.py +2 -2
  84. omlish/io/__init__.py +0 -3
  85. omlish/{lite/io.py → io/buffers.py} +8 -9
  86. omlish/io/compress/__init__.py +9 -0
  87. omlish/io/compress/abc.py +104 -0
  88. omlish/io/compress/adapters.py +148 -0
  89. omlish/io/compress/base.py +24 -0
  90. omlish/io/compress/brotli.py +47 -0
  91. omlish/io/compress/bz2.py +61 -0
  92. omlish/io/compress/codecs.py +78 -0
  93. omlish/io/compress/gzip.py +350 -0
  94. omlish/io/compress/lz4.py +91 -0
  95. omlish/io/compress/lzma.py +81 -0
  96. omlish/io/compress/snappy.py +34 -0
  97. omlish/io/compress/zlib.py +74 -0
  98. omlish/io/compress/zstd.py +44 -0
  99. omlish/io/fdio/__init__.py +1 -0
  100. omlish/{lite → io}/fdio/handlers.py +5 -5
  101. omlish/{lite → io}/fdio/kqueue.py +8 -8
  102. omlish/{lite → io}/fdio/manager.py +7 -7
  103. omlish/{lite → io}/fdio/pollers.py +13 -13
  104. omlish/io/generators/__init__.py +56 -0
  105. omlish/io/generators/consts.py +1 -0
  106. omlish/io/generators/direct.py +13 -0
  107. omlish/io/generators/readers.py +189 -0
  108. omlish/io/generators/stepped.py +191 -0
  109. omlish/io/pyio.py +5 -2
  110. omlish/iterators/__init__.py +24 -0
  111. omlish/iterators/iterators.py +132 -0
  112. omlish/iterators/recipes.py +18 -0
  113. omlish/iterators/tools.py +96 -0
  114. omlish/iterators/unique.py +67 -0
  115. omlish/lang/__init__.py +13 -1
  116. omlish/lang/functions.py +11 -2
  117. omlish/lang/generators.py +243 -0
  118. omlish/lang/iterables.py +46 -49
  119. omlish/lang/maybes.py +4 -4
  120. omlish/lite/cached.py +39 -6
  121. omlish/lite/check.py +438 -75
  122. omlish/lite/contextmanagers.py +17 -4
  123. omlish/lite/dataclasses.py +42 -0
  124. omlish/lite/inject.py +28 -45
  125. omlish/lite/logs.py +0 -270
  126. omlish/lite/marshal.py +309 -144
  127. omlish/lite/pycharm.py +47 -0
  128. omlish/lite/reflect.py +33 -0
  129. omlish/lite/resources.py +8 -0
  130. omlish/lite/runtime.py +4 -4
  131. omlish/lite/shlex.py +12 -0
  132. omlish/lite/socketserver.py +2 -2
  133. omlish/lite/strings.py +31 -0
  134. omlish/logs/__init__.py +0 -32
  135. omlish/logs/{_abc.py → abc.py} +0 -1
  136. omlish/logs/all.py +37 -0
  137. omlish/logs/{formatters.py → color.py} +1 -2
  138. omlish/logs/configs.py +7 -38
  139. omlish/logs/filters.py +10 -0
  140. omlish/logs/handlers.py +4 -1
  141. omlish/logs/json.py +56 -0
  142. omlish/logs/proxy.py +99 -0
  143. omlish/logs/standard.py +128 -0
  144. omlish/logs/utils.py +2 -2
  145. omlish/manifests/__init__.py +2 -0
  146. omlish/manifests/load.py +209 -0
  147. omlish/manifests/types.py +17 -0
  148. omlish/marshal/base.py +1 -1
  149. omlish/marshal/factories.py +1 -1
  150. omlish/marshal/forbidden.py +1 -1
  151. omlish/marshal/iterables.py +1 -1
  152. omlish/marshal/literals.py +50 -0
  153. omlish/marshal/mappings.py +1 -1
  154. omlish/marshal/maybes.py +1 -1
  155. omlish/marshal/standard.py +5 -1
  156. omlish/marshal/unions.py +1 -1
  157. omlish/os/__init__.py +0 -0
  158. omlish/os/atomics.py +205 -0
  159. omlish/os/deathsig.py +23 -0
  160. omlish/{os.py → os/files.py} +0 -9
  161. omlish/{lite → os}/journald.py +2 -1
  162. omlish/os/linux.py +484 -0
  163. omlish/os/paths.py +36 -0
  164. omlish/{lite → os}/pidfile.py +1 -0
  165. omlish/os/sizes.py +9 -0
  166. omlish/reflect/__init__.py +3 -0
  167. omlish/reflect/subst.py +2 -1
  168. omlish/reflect/types.py +126 -44
  169. omlish/secrets/pwhash.py +1 -1
  170. omlish/secrets/subprocesses.py +3 -1
  171. omlish/specs/jsonrpc/marshal.py +1 -1
  172. omlish/specs/openapi/marshal.py +1 -1
  173. omlish/sql/alchemy/asyncs.py +1 -1
  174. omlish/sql/queries/__init__.py +9 -1
  175. omlish/sql/queries/building.py +3 -0
  176. omlish/sql/queries/exprs.py +10 -27
  177. omlish/sql/queries/idents.py +48 -10
  178. omlish/sql/queries/names.py +80 -13
  179. omlish/sql/queries/params.py +64 -0
  180. omlish/sql/queries/rendering.py +1 -1
  181. omlish/subprocesses.py +340 -0
  182. omlish/term.py +29 -14
  183. omlish/testing/pytest/marks.py +2 -2
  184. omlish/testing/pytest/plugins/asyncs.py +6 -1
  185. omlish/testing/pytest/plugins/logging.py +1 -1
  186. omlish/testing/pytest/plugins/switches.py +1 -1
  187. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/METADATA +7 -5
  188. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/RECORD +200 -117
  189. omlish/fnpairs.py +0 -496
  190. omlish/formats/json/cli/__main__.py +0 -11
  191. omlish/formats/json/cli/cli.py +0 -298
  192. omlish/formats/json/cli/formats.py +0 -71
  193. omlish/formats/json/cli/io.py +0 -74
  194. omlish/formats/json/cli/parsing.py +0 -82
  195. omlish/formats/json/cli/processing.py +0 -48
  196. omlish/formats/json/cli/rendering.py +0 -92
  197. omlish/iterators.py +0 -300
  198. omlish/lite/subprocesses.py +0 -130
  199. /omlish/{formats/json/cli → argparse}/__init__.py +0 -0
  200. /omlish/{lite/fdio → asyncs/asyncio}/__init__.py +0 -0
  201. /omlish/asyncs/{asyncio.py → asyncio/asyncio.py} +0 -0
  202. /omlish/{lite/http → asyncs/bluelet}/__init__.py +0 -0
  203. /omlish/collections/{_abc.py → abc.py} +0 -0
  204. /omlish/{fnpipes.py → funcs/pipes.py} +0 -0
  205. /omlish/io/{_abc.py → abc.py} +0 -0
  206. /omlish/sql/{_abc.py → abc.py} +0 -0
  207. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/LICENSE +0 -0
  208. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/WHEEL +0 -0
  209. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/entry_points.txt +0 -0
  210. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/top_level.txt +0 -0
omlish/codecs/base.py ADDED
@@ -0,0 +1,106 @@
1
+ """
2
+ TODO:
3
+ - bytes-like - bytearray, memoryview
4
+ """
5
+ import abc
6
+ import typing as ta
7
+
8
+ from omlish import check
9
+ from omlish import dataclasses as dc
10
+ from omlish import lang
11
+ from omlish import reflect as rfl
12
+ from omlish.funcs import pairs as fps
13
+
14
+
15
+ I = ta.TypeVar('I')
16
+ O = ta.TypeVar('O')
17
+
18
+
19
+ ##
20
+
21
+
22
+ class EagerCodec(lang.Abstract, ta.Generic[I, O]):
23
+ @abc.abstractmethod
24
+ def encode(self, i: I) -> O:
25
+ raise NotImplementedError
26
+
27
+ @abc.abstractmethod
28
+ def decode(self, o: O) -> I:
29
+ raise NotImplementedError
30
+
31
+ def as_pair(self) -> fps.FnPair[I, O]:
32
+ return fps.of(self.encode, self.decode)
33
+
34
+
35
+ class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
36
+ @abc.abstractmethod
37
+ def encode_incremental(self) -> ta.Generator[O | None, I, None]:
38
+ raise NotImplementedError
39
+
40
+ @abc.abstractmethod
41
+ def decode_incremental(self) -> ta.Generator[I | None, O, None]:
42
+ raise NotImplementedError
43
+
44
+
45
+ class ComboCodec( # noqa
46
+ EagerCodec[I, O],
47
+ IncrementalCodec[I, O],
48
+ lang.Abstract,
49
+ ta.Generic[I, O],
50
+ ):
51
+ pass
52
+
53
+
54
+ ##
55
+
56
+
57
+ def check_codec_name(s: str) -> str:
58
+ check.non_empty_str(s)
59
+ check.not_in('_', s)
60
+ check.equal(s.strip(), s)
61
+ return s
62
+
63
+
64
+ ##
65
+
66
+
67
+ @dc.dataclass(frozen=True, kw_only=True)
68
+ class Codec:
69
+ name: str = dc.xfield(coerce=check_codec_name)
70
+ aliases: ta.Collection[str] | None = dc.xfield(
71
+ default=None,
72
+ coerce=lang.opt_fn(lambda s: [check_codec_name(a) for a in s]), # type: ignore
73
+ )
74
+
75
+ input: rfl.Type = dc.xfield(coerce=rfl.type_)
76
+ output: rfl.Type = dc.xfield(coerce=rfl.type_)
77
+
78
+ options: type | None = None
79
+
80
+ new: ta.Callable[..., EagerCodec]
81
+ new_incremental: ta.Callable[..., IncrementalCodec] | None = None
82
+
83
+
84
+ ##
85
+
86
+
87
+ @dc.dataclass(frozen=True, kw_only=True)
88
+ class LazyLoadedCodec:
89
+ mod_name: str
90
+ attr_name: str
91
+ name: str
92
+ aliases: ta.Collection[str] | None = None
93
+
94
+ @classmethod
95
+ def new(
96
+ cls,
97
+ mod_name: str,
98
+ attr_name: str,
99
+ codec: Codec,
100
+ ) -> 'LazyLoadedCodec':
101
+ return cls(
102
+ mod_name=mod_name,
103
+ attr_name=attr_name,
104
+ name=codec.name,
105
+ aliases=codec.aliases,
106
+ )
omlish/codecs/bytes.py ADDED
@@ -0,0 +1,119 @@
1
+ """
2
+ TODO:
3
+ - options / kwargs
4
+ """
5
+ import base64
6
+ import binascii
7
+ import typing as ta
8
+
9
+ from .. import check
10
+ from .base import Codec
11
+ from .funcs import FnPairEagerCodec
12
+ from .standard import STANDARD_CODECS
13
+
14
+
15
+ ##
16
+
17
+
18
+ class BytesCodec(Codec):
19
+ pass
20
+
21
+
22
+ def make_bytes_encoding_codec(
23
+ name: str,
24
+ aliases: ta.Collection[str] | None,
25
+ encode: ta.Callable[[bytes], bytes],
26
+ decode: ta.Callable[[bytes], bytes],
27
+ *,
28
+ append_to: ta.MutableSequence[Codec] | None = None,
29
+ ) -> BytesCodec:
30
+ codec = BytesCodec(
31
+ name=name,
32
+ aliases=check.not_isinstance(aliases, str),
33
+
34
+ input=bytes,
35
+ output=bytes,
36
+
37
+ new=lambda: FnPairEagerCodec.of(encode, decode),
38
+ )
39
+
40
+ if append_to is not None:
41
+ append_to.append(codec)
42
+
43
+ return codec
44
+
45
+
46
+ ##
47
+
48
+
49
+ ASCII85 = make_bytes_encoding_codec(
50
+ 'ascii85',
51
+ ['a85'],
52
+ base64.a85encode,
53
+ base64.a85decode,
54
+ append_to=STANDARD_CODECS,
55
+ )
56
+
57
+ BASE16 = make_bytes_encoding_codec(
58
+ 'base16',
59
+ ['b16'],
60
+ base64.b16encode,
61
+ base64.b16decode,
62
+ append_to=STANDARD_CODECS,
63
+ )
64
+
65
+ BASE32 = make_bytes_encoding_codec(
66
+ 'base32',
67
+ ['b32'],
68
+ base64.b32encode,
69
+ base64.b32decode,
70
+ append_to=STANDARD_CODECS,
71
+ )
72
+
73
+ BASE64 = make_bytes_encoding_codec(
74
+ 'base64',
75
+ ['b64'],
76
+ base64.b64encode,
77
+ base64.b64decode,
78
+ append_to=STANDARD_CODECS,
79
+ )
80
+
81
+ BASE85 = make_bytes_encoding_codec(
82
+ 'base85',
83
+ ['b85'],
84
+ base64.b85encode,
85
+ base64.b85decode,
86
+ append_to=STANDARD_CODECS,
87
+ )
88
+
89
+ BASE32_HEX = make_bytes_encoding_codec(
90
+ 'base32-hex',
91
+ ['b32-hex'],
92
+ base64.b32hexencode,
93
+ base64.b32hexdecode,
94
+ append_to=STANDARD_CODECS,
95
+ )
96
+
97
+ BASE64_HEX = make_bytes_encoding_codec(
98
+ 'base64-hex',
99
+ ['b64-hex'],
100
+ base64.standard_b64encode,
101
+ base64.standard_b64decode,
102
+ append_to=STANDARD_CODECS,
103
+ )
104
+
105
+ BASE64_URLSAFE = make_bytes_encoding_codec(
106
+ 'base64-urlsafe',
107
+ ['b64-urlsafe'],
108
+ base64.urlsafe_b64encode,
109
+ base64.urlsafe_b64decode,
110
+ append_to=STANDARD_CODECS,
111
+ )
112
+
113
+ HEX = make_bytes_encoding_codec(
114
+ 'hex',
115
+ [],
116
+ binascii.b2a_hex,
117
+ binascii.a2b_hex,
118
+ append_to=STANDARD_CODECS,
119
+ )
omlish/codecs/chain.py ADDED
@@ -0,0 +1,23 @@
1
+ import dataclasses as dc
2
+ import typing as ta
3
+
4
+ from .base import EagerCodec
5
+
6
+
7
+ @dc.dataclass(frozen=True)
8
+ class ChainEagerCodec(EagerCodec[ta.Any, ta.Any]):
9
+ codecs: ta.Sequence[EagerCodec]
10
+
11
+ def encode(self, v: ta.Any) -> ta.Any:
12
+ for c in self.codecs:
13
+ v = c.encode(v)
14
+ return v
15
+
16
+ def decode(self, v: ta.Any) -> ta.Any:
17
+ for c in reversed(self.codecs):
18
+ v = c.decode(v)
19
+ return v
20
+
21
+
22
+ def chain(*codecs: EagerCodec) -> ChainEagerCodec:
23
+ return ChainEagerCodec(codecs)
omlish/codecs/funcs.py ADDED
@@ -0,0 +1,39 @@
1
+ import dataclasses as dc
2
+ import typing as ta
3
+
4
+ from ..funcs import pairs as fps
5
+ from .base import EagerCodec
6
+
7
+
8
+ I = ta.TypeVar('I')
9
+ O = ta.TypeVar('O')
10
+
11
+
12
+ @dc.dataclass(frozen=True)
13
+ class FnPairEagerCodec(EagerCodec[I, O]):
14
+ fp: fps.FnPair[I, O]
15
+
16
+ def encode(self, i: I) -> O:
17
+ return self.fp.forward(i)
18
+
19
+ def decode(self, o: O) -> I:
20
+ return self.fp.backward(o)
21
+
22
+ @classmethod
23
+ def of(
24
+ cls,
25
+ encode: ta.Callable[[I], O],
26
+ decode: ta.Callable[[O], I],
27
+ ) -> 'FnPairEagerCodec[I, O]':
28
+ return cls(fps.of(encode, decode))
29
+
30
+
31
+ def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
32
+ return FnPairEagerCodec(fp)
33
+
34
+
35
+ def of(
36
+ encode: ta.Callable[[I], O],
37
+ decode: ta.Callable[[O], I],
38
+ ) -> FnPairEagerCodec[I, O]:
39
+ return FnPairEagerCodec(fps.of(encode, decode))
@@ -0,0 +1,139 @@
1
+ import contextlib
2
+ import importlib
3
+ import threading
4
+ import typing as ta
5
+
6
+ from .. import cached
7
+ from .. import check
8
+ from .. import lang
9
+ from .base import Codec
10
+ from .base import LazyLoadedCodec
11
+ from .standard import STANDARD_CODECS
12
+
13
+
14
+ if ta.TYPE_CHECKING:
15
+ from ..manifests import load as manifest_load
16
+ else:
17
+ manifest_load = lang.proxy_import('..manifests.load', __package__)
18
+
19
+
20
+ ##
21
+
22
+
23
+ class CodecRegistry:
24
+ def __init__(
25
+ self,
26
+ *,
27
+ late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
28
+ ) -> None:
29
+ super().__init__()
30
+
31
+ self._late_load_callbacks = late_load_callbacks
32
+
33
+ self._lock = threading.RLock()
34
+ self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
35
+ self._names_by_alias: dict[str, str] = {}
36
+ self._names_by_cls: dict[type, list[str]] = {}
37
+
38
+ def _late_load(self) -> None:
39
+ if self._late_load_callbacks:
40
+ for cb in self._late_load_callbacks:
41
+ cb(self)
42
+ self._late_load_callbacks = None
43
+
44
+ @contextlib.contextmanager
45
+ def _lock_and_load(self) -> ta.Iterator[None]:
46
+ with self._lock:
47
+ self._late_load()
48
+ yield
49
+
50
+ def _post_load(self, codec: Codec) -> None:
51
+ for t in type(codec).__mro__:
52
+ if t is not object:
53
+ self._names_by_cls.setdefault(t, []).append(codec.name)
54
+
55
+ def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
56
+ with self._lock:
57
+ for codec in codecs:
58
+ for n in {codec.name, *(codec.aliases or [])}:
59
+ if n in self._names_by_alias:
60
+ raise KeyError(n)
61
+
62
+ for codec in codecs:
63
+ self._by_name[codec.name] = codec
64
+ for n in {codec.name, *(codec.aliases or [])}:
65
+ self._names_by_alias[n] = codec.name
66
+ if isinstance(codec, Codec):
67
+ self._post_load(codec)
68
+
69
+ return self
70
+
71
+ def lookup(self, name_or_alias: str) -> Codec:
72
+ with self._lock_and_load():
73
+ name = self._names_by_alias[name_or_alias.replace('_', '-')]
74
+ codec_or_lazy = self._by_name[name]
75
+
76
+ if isinstance(codec_or_lazy, LazyLoadedCodec):
77
+ mod = importlib.import_module(codec_or_lazy.mod_name)
78
+ codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
79
+ self._by_name[name] = codec
80
+ self._post_load(codec)
81
+ else:
82
+ codec = check.isinstance(codec_or_lazy, Codec)
83
+
84
+ return codec
85
+
86
+ def lookup_type(self, cls: type) -> list[Codec]:
87
+ with self._lock_and_load():
88
+ return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
89
+
90
+ def all(self) -> frozenset[str]:
91
+ with self._lock_and_load():
92
+ return frozenset(self._by_name)
93
+
94
+
95
+ ##
96
+
97
+
98
+ def _install_standard_codecs(registry: CodecRegistry) -> None:
99
+ registry.register(*STANDARD_CODECS)
100
+
101
+
102
+ ##
103
+
104
+
105
+ @cached.function
106
+ def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
107
+ ldr = manifest_load.MANIFEST_LOADER
108
+ pkgs = {__package__.split('.')[0], *ldr.discover()}
109
+ mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
110
+ return [m.value for m in mns]
111
+
112
+
113
+ def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
114
+ registry.register(*_build_manifest_lazy_loaded_codecs())
115
+
116
+
117
+ ##
118
+
119
+
120
+ REGISTRY = CodecRegistry(
121
+ late_load_callbacks=[
122
+ _install_standard_codecs,
123
+ _install_manifest_lazy_loaded_codecs,
124
+ ],
125
+ )
126
+
127
+ register = REGISTRY.register
128
+ lookup = REGISTRY.lookup
129
+
130
+
131
+ ##
132
+
133
+
134
+ def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
135
+ return lookup(name).new(**kwargs).encode(i)
136
+
137
+
138
+ def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
139
+ return lookup(name).new(**kwargs).decode(o)
@@ -0,0 +1,4 @@
1
+ from .base import Codec
2
+
3
+
4
+ STANDARD_CODECS: list[Codec] = []
omlish/codecs/text.py ADDED
@@ -0,0 +1,217 @@
1
+ import codecs
2
+ import dataclasses as dc
3
+ import functools
4
+ import typing as ta
5
+
6
+ from omlish import check
7
+
8
+ from .base import Codec
9
+ from .base import ComboCodec
10
+ from .standard import STANDARD_CODECS
11
+
12
+
13
+ ##
14
+
15
+
16
+ TextEncodingErrors: ta.TypeAlias = ta.Literal[
17
+ # Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
18
+ 'strict',
19
+
20
+ # Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
21
+ 'ignore',
22
+
23
+ # Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
24
+ # REPLACEMENT CHARACTER). Implemented in replace_errors().
25
+ 'replace',
26
+
27
+ # Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
28
+ # \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
29
+ # backslashreplace_errors().
30
+ 'backslashreplace',
31
+
32
+ # On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
33
+ # turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
34
+ # 383 for more.)
35
+ 'surrogateescape',
36
+
37
+ ##
38
+ # The following error handlers are only applicable to encoding (within text encodings):
39
+
40
+ # Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
41
+ # &#num;. Implemented in xmlcharrefreplace_errors().
42
+ 'xmlcharrefreplace',
43
+
44
+ # Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
45
+ # Database. Implemented in namereplace_errors().
46
+ 'namereplace',
47
+
48
+ ##
49
+ # In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
50
+ # utf-16-le, utf-32-be, utf-32-le
51
+
52
+ # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
53
+ # treat the presence of surrogate code point in str as an error.
54
+ 'surrogatepass',
55
+ ]
56
+
57
+
58
+ @dc.dataclass(frozen=True, kw_only=True)
59
+ class TextEncodingOptions:
60
+ errors: TextEncodingErrors = 'strict'
61
+
62
+
63
+ ##
64
+
65
+
66
+ class TextEncodingComboCodec(ComboCodec[str, bytes]):
67
+ def __init__(
68
+ self,
69
+ info: codecs.CodecInfo,
70
+ options: TextEncodingOptions = TextEncodingOptions(),
71
+ ) -> None:
72
+ super().__init__()
73
+ self._info = check.isinstance(info, codecs.CodecInfo)
74
+ self._opts = check.isinstance(options, TextEncodingOptions)
75
+
76
+ @classmethod
77
+ def lookup(
78
+ cls,
79
+ name: str,
80
+ options: TextEncodingOptions = TextEncodingOptions(),
81
+ ) -> 'TextEncodingComboCodec':
82
+ return cls(codecs.lookup(name), options)
83
+
84
+ def encode(self, i: str) -> bytes:
85
+ o, _ = self._info.encode(i, self._opts.errors)
86
+ return o
87
+
88
+ def decode(self, o: bytes) -> str:
89
+ i, _ = self._info.decode(o, self._opts.errors)
90
+ return i
91
+
92
+ def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
93
+ x = self._info.incrementalencoder(self._opts.errors)
94
+ i = yield None
95
+ while True:
96
+ if not i:
97
+ break
98
+ o = x.encode(i)
99
+ i = yield o or None
100
+ o = x.encode(i, final=True)
101
+ yield o
102
+
103
+ def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
104
+ x = self._info.incrementaldecoder(self._opts.errors)
105
+ i = yield None
106
+ while True:
107
+ if not i:
108
+ break
109
+ o = x.decode(i)
110
+ i = yield o or None
111
+ o = x.decode(i, final=True)
112
+ yield o
113
+
114
+
115
+ ##
116
+
117
+
118
+ class TextEncodingCodec(Codec):
119
+ pass
120
+
121
+
122
+ def normalize_text_encoding_name(s: str) -> str:
123
+ if ' ' in s:
124
+ raise NameError(s)
125
+ return s.lower().replace('_', '-')
126
+
127
+
128
+ def make_text_encoding_codec(
129
+ name: str,
130
+ aliases: ta.Collection[str] | None = None,
131
+ *,
132
+ append_to: ta.MutableSequence[Codec] | None = None,
133
+ ) -> TextEncodingCodec:
134
+ codec = TextEncodingCodec(
135
+ name=check.equal(name, normalize_text_encoding_name(name)),
136
+ aliases=check.not_isinstance(aliases, str),
137
+
138
+ input=str,
139
+ output=bytes,
140
+
141
+ new=functools.partial(TextEncodingComboCodec.lookup, name),
142
+ new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
143
+ )
144
+
145
+ if append_to is not None:
146
+ append_to.append(codec)
147
+
148
+ return codec
149
+
150
+
151
+ ##
152
+
153
+
154
+ ASCII = make_text_encoding_codec(
155
+ 'ascii',
156
+ ['646', 'us-ascii'],
157
+ append_to=STANDARD_CODECS,
158
+ )
159
+
160
+ LATIN1 = make_text_encoding_codec(
161
+ 'latin-1',
162
+ ['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
163
+ append_to=STANDARD_CODECS,
164
+ )
165
+
166
+ UTF32 = make_text_encoding_codec(
167
+ 'utf-32',
168
+ ['u32', 'utf32'],
169
+ append_to=STANDARD_CODECS,
170
+ )
171
+
172
+ UTF32BE = make_text_encoding_codec(
173
+ 'utf-32-be',
174
+ ['utf-32be'],
175
+ append_to=STANDARD_CODECS,
176
+ )
177
+
178
+ UTF32LE = make_text_encoding_codec(
179
+ 'utf-32-le',
180
+ ['utf-32le'],
181
+ append_to=STANDARD_CODECS,
182
+ )
183
+
184
+ UTF16 = make_text_encoding_codec(
185
+ 'utf-16',
186
+ ['u16', 'utf16'],
187
+ append_to=STANDARD_CODECS,
188
+ )
189
+
190
+ UTF16BE = make_text_encoding_codec(
191
+ 'utf-16-be',
192
+ ['utf-16be'],
193
+ append_to=STANDARD_CODECS,
194
+ )
195
+
196
+ UTF16LE = make_text_encoding_codec(
197
+ 'utf-16-le',
198
+ ['utf-16le'],
199
+ append_to=STANDARD_CODECS,
200
+ )
201
+
202
+ UTF7 = make_text_encoding_codec(
203
+ 'utf-7',
204
+ ['u7', 'unicode-1-1-utf-7'],
205
+ append_to=STANDARD_CODECS,
206
+ )
207
+
208
+ UTF8 = make_text_encoding_codec(
209
+ 'utf-8',
210
+ ['u8', 'utf', 'utf8', 'cp65001'],
211
+ append_to=STANDARD_CODECS,
212
+ )
213
+
214
+ UTF8SIG = make_text_encoding_codec(
215
+ 'utf-8-sig',
216
+ append_to=STANDARD_CODECS,
217
+ )