omlish 0.0.0.dev133__py3-none-any.whl → 0.0.0.dev177__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. omlish/.manifests.json +265 -7
  2. omlish/__about__.py +5 -3
  3. omlish/antlr/_runtime/__init__.py +0 -22
  4. omlish/antlr/_runtime/_all.py +24 -0
  5. omlish/antlr/_runtime/atn/ParserATNSimulator.py +1 -1
  6. omlish/antlr/_runtime/dfa/DFASerializer.py +1 -1
  7. omlish/antlr/_runtime/error/DiagnosticErrorListener.py +2 -1
  8. omlish/antlr/_runtime/xpath/XPath.py +7 -1
  9. omlish/antlr/_runtime/xpath/XPathLexer.py +1 -1
  10. omlish/antlr/delimit.py +106 -0
  11. omlish/antlr/dot.py +31 -0
  12. omlish/antlr/errors.py +11 -0
  13. omlish/antlr/input.py +96 -0
  14. omlish/antlr/parsing.py +19 -0
  15. omlish/antlr/runtime.py +102 -0
  16. omlish/antlr/utils.py +38 -0
  17. omlish/argparse/all.py +45 -0
  18. omlish/{argparse.py → argparse/cli.py} +112 -107
  19. omlish/asyncs/__init__.py +0 -35
  20. omlish/asyncs/all.py +35 -0
  21. omlish/asyncs/asyncio/all.py +7 -0
  22. omlish/asyncs/asyncio/channels.py +40 -0
  23. omlish/asyncs/asyncio/streams.py +45 -0
  24. omlish/asyncs/asyncio/subprocesses.py +238 -0
  25. omlish/asyncs/asyncio/timeouts.py +16 -0
  26. omlish/asyncs/bluelet/LICENSE +6 -0
  27. omlish/asyncs/bluelet/all.py +67 -0
  28. omlish/asyncs/bluelet/api.py +23 -0
  29. omlish/asyncs/bluelet/core.py +178 -0
  30. omlish/asyncs/bluelet/events.py +78 -0
  31. omlish/asyncs/bluelet/files.py +80 -0
  32. omlish/asyncs/bluelet/runner.py +416 -0
  33. omlish/asyncs/bluelet/sockets.py +214 -0
  34. omlish/bootstrap/sys.py +3 -3
  35. omlish/cached.py +2 -2
  36. omlish/check.py +49 -460
  37. omlish/codecs/__init__.py +72 -0
  38. omlish/codecs/base.py +106 -0
  39. omlish/codecs/bytes.py +119 -0
  40. omlish/codecs/chain.py +23 -0
  41. omlish/codecs/funcs.py +39 -0
  42. omlish/codecs/registry.py +139 -0
  43. omlish/codecs/standard.py +4 -0
  44. omlish/codecs/text.py +217 -0
  45. omlish/collections/cache/impl.py +50 -57
  46. omlish/collections/coerce.py +1 -0
  47. omlish/collections/mappings.py +1 -1
  48. omlish/configs/flattening.py +1 -1
  49. omlish/defs.py +1 -1
  50. omlish/diag/_pycharm/runhack.py +8 -2
  51. omlish/diag/procfs.py +8 -8
  52. omlish/docker/__init__.py +0 -36
  53. omlish/docker/all.py +31 -0
  54. omlish/docker/consts.py +4 -0
  55. omlish/{lite/docker.py → docker/detect.py} +18 -0
  56. omlish/docker/{helpers.py → timebomb.py} +0 -21
  57. omlish/formats/cbor.py +31 -0
  58. omlish/formats/cloudpickle.py +31 -0
  59. omlish/formats/codecs.py +93 -0
  60. omlish/formats/json/codecs.py +29 -0
  61. omlish/formats/json/delimted.py +4 -0
  62. omlish/formats/json/stream/errors.py +2 -0
  63. omlish/formats/json/stream/lex.py +12 -6
  64. omlish/formats/json/stream/parse.py +38 -22
  65. omlish/formats/json5.py +31 -0
  66. omlish/formats/pickle.py +31 -0
  67. omlish/formats/repr.py +25 -0
  68. omlish/formats/toml.py +17 -0
  69. omlish/formats/yaml.py +25 -0
  70. omlish/funcs/__init__.py +0 -0
  71. omlish/{genmachine.py → funcs/genmachine.py} +5 -4
  72. omlish/{matchfns.py → funcs/match.py} +1 -1
  73. omlish/funcs/pairs.py +215 -0
  74. omlish/http/__init__.py +0 -48
  75. omlish/http/all.py +48 -0
  76. omlish/http/coro/__init__.py +0 -0
  77. omlish/{lite/fdio/corohttp.py → http/coro/fdio.py} +21 -19
  78. omlish/{lite/http/coroserver.py → http/coro/server.py} +20 -21
  79. omlish/{lite/http → http}/handlers.py +3 -2
  80. omlish/{lite/http → http}/parsing.py +1 -0
  81. omlish/http/sessions.py +1 -1
  82. omlish/{lite/http → http}/versions.py +1 -0
  83. omlish/inject/managed.py +2 -2
  84. omlish/io/__init__.py +0 -3
  85. omlish/{lite/io.py → io/buffers.py} +8 -9
  86. omlish/io/compress/__init__.py +9 -0
  87. omlish/io/compress/abc.py +104 -0
  88. omlish/io/compress/adapters.py +148 -0
  89. omlish/io/compress/base.py +24 -0
  90. omlish/io/compress/brotli.py +47 -0
  91. omlish/io/compress/bz2.py +61 -0
  92. omlish/io/compress/codecs.py +78 -0
  93. omlish/io/compress/gzip.py +350 -0
  94. omlish/io/compress/lz4.py +91 -0
  95. omlish/io/compress/lzma.py +81 -0
  96. omlish/io/compress/snappy.py +34 -0
  97. omlish/io/compress/zlib.py +74 -0
  98. omlish/io/compress/zstd.py +44 -0
  99. omlish/io/fdio/__init__.py +1 -0
  100. omlish/{lite → io}/fdio/handlers.py +5 -5
  101. omlish/{lite → io}/fdio/kqueue.py +8 -8
  102. omlish/{lite → io}/fdio/manager.py +7 -7
  103. omlish/{lite → io}/fdio/pollers.py +13 -13
  104. omlish/io/generators/__init__.py +56 -0
  105. omlish/io/generators/consts.py +1 -0
  106. omlish/io/generators/direct.py +13 -0
  107. omlish/io/generators/readers.py +189 -0
  108. omlish/io/generators/stepped.py +191 -0
  109. omlish/io/pyio.py +5 -2
  110. omlish/iterators/__init__.py +24 -0
  111. omlish/iterators/iterators.py +132 -0
  112. omlish/iterators/recipes.py +18 -0
  113. omlish/iterators/tools.py +96 -0
  114. omlish/iterators/unique.py +67 -0
  115. omlish/lang/__init__.py +13 -1
  116. omlish/lang/functions.py +11 -2
  117. omlish/lang/generators.py +243 -0
  118. omlish/lang/iterables.py +46 -49
  119. omlish/lang/maybes.py +4 -4
  120. omlish/lite/cached.py +39 -6
  121. omlish/lite/check.py +438 -75
  122. omlish/lite/contextmanagers.py +17 -4
  123. omlish/lite/dataclasses.py +42 -0
  124. omlish/lite/inject.py +28 -45
  125. omlish/lite/logs.py +0 -270
  126. omlish/lite/marshal.py +309 -144
  127. omlish/lite/pycharm.py +47 -0
  128. omlish/lite/reflect.py +33 -0
  129. omlish/lite/resources.py +8 -0
  130. omlish/lite/runtime.py +4 -4
  131. omlish/lite/shlex.py +12 -0
  132. omlish/lite/socketserver.py +2 -2
  133. omlish/lite/strings.py +31 -0
  134. omlish/logs/__init__.py +0 -32
  135. omlish/logs/{_abc.py → abc.py} +0 -1
  136. omlish/logs/all.py +37 -0
  137. omlish/logs/{formatters.py → color.py} +1 -2
  138. omlish/logs/configs.py +7 -38
  139. omlish/logs/filters.py +10 -0
  140. omlish/logs/handlers.py +4 -1
  141. omlish/logs/json.py +56 -0
  142. omlish/logs/proxy.py +99 -0
  143. omlish/logs/standard.py +128 -0
  144. omlish/logs/utils.py +2 -2
  145. omlish/manifests/__init__.py +2 -0
  146. omlish/manifests/load.py +209 -0
  147. omlish/manifests/types.py +17 -0
  148. omlish/marshal/base.py +1 -1
  149. omlish/marshal/factories.py +1 -1
  150. omlish/marshal/forbidden.py +1 -1
  151. omlish/marshal/iterables.py +1 -1
  152. omlish/marshal/literals.py +50 -0
  153. omlish/marshal/mappings.py +1 -1
  154. omlish/marshal/maybes.py +1 -1
  155. omlish/marshal/standard.py +5 -1
  156. omlish/marshal/unions.py +1 -1
  157. omlish/os/__init__.py +0 -0
  158. omlish/os/atomics.py +205 -0
  159. omlish/os/deathsig.py +23 -0
  160. omlish/{os.py → os/files.py} +0 -9
  161. omlish/{lite → os}/journald.py +2 -1
  162. omlish/os/linux.py +484 -0
  163. omlish/os/paths.py +36 -0
  164. omlish/{lite → os}/pidfile.py +1 -0
  165. omlish/os/sizes.py +9 -0
  166. omlish/reflect/__init__.py +3 -0
  167. omlish/reflect/subst.py +2 -1
  168. omlish/reflect/types.py +126 -44
  169. omlish/secrets/pwhash.py +1 -1
  170. omlish/secrets/subprocesses.py +3 -1
  171. omlish/specs/jsonrpc/marshal.py +1 -1
  172. omlish/specs/openapi/marshal.py +1 -1
  173. omlish/sql/alchemy/asyncs.py +1 -1
  174. omlish/sql/queries/__init__.py +9 -1
  175. omlish/sql/queries/building.py +3 -0
  176. omlish/sql/queries/exprs.py +10 -27
  177. omlish/sql/queries/idents.py +48 -10
  178. omlish/sql/queries/names.py +80 -13
  179. omlish/sql/queries/params.py +64 -0
  180. omlish/sql/queries/rendering.py +1 -1
  181. omlish/subprocesses.py +340 -0
  182. omlish/term.py +29 -14
  183. omlish/testing/pytest/marks.py +2 -2
  184. omlish/testing/pytest/plugins/asyncs.py +6 -1
  185. omlish/testing/pytest/plugins/logging.py +1 -1
  186. omlish/testing/pytest/plugins/switches.py +1 -1
  187. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/METADATA +7 -5
  188. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/RECORD +200 -117
  189. omlish/fnpairs.py +0 -496
  190. omlish/formats/json/cli/__main__.py +0 -11
  191. omlish/formats/json/cli/cli.py +0 -298
  192. omlish/formats/json/cli/formats.py +0 -71
  193. omlish/formats/json/cli/io.py +0 -74
  194. omlish/formats/json/cli/parsing.py +0 -82
  195. omlish/formats/json/cli/processing.py +0 -48
  196. omlish/formats/json/cli/rendering.py +0 -92
  197. omlish/iterators.py +0 -300
  198. omlish/lite/subprocesses.py +0 -130
  199. /omlish/{formats/json/cli → argparse}/__init__.py +0 -0
  200. /omlish/{lite/fdio → asyncs/asyncio}/__init__.py +0 -0
  201. /omlish/asyncs/{asyncio.py → asyncio/asyncio.py} +0 -0
  202. /omlish/{lite/http → asyncs/bluelet}/__init__.py +0 -0
  203. /omlish/collections/{_abc.py → abc.py} +0 -0
  204. /omlish/{fnpipes.py → funcs/pipes.py} +0 -0
  205. /omlish/io/{_abc.py → abc.py} +0 -0
  206. /omlish/sql/{_abc.py → abc.py} +0 -0
  207. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/LICENSE +0 -0
  208. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/WHEEL +0 -0
  209. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/entry_points.txt +0 -0
  210. {omlish-0.0.0.dev133.dist-info → omlish-0.0.0.dev177.dist-info}/top_level.txt +0 -0
omlish/codecs/base.py ADDED
@@ -0,0 +1,106 @@
1
+ """
2
+ TODO:
3
+ - bytes-like - bytearray, memoryview
4
+ """
5
+ import abc
6
+ import typing as ta
7
+
8
+ from omlish import check
9
+ from omlish import dataclasses as dc
10
+ from omlish import lang
11
+ from omlish import reflect as rfl
12
+ from omlish.funcs import pairs as fps
13
+
14
+
15
+ I = ta.TypeVar('I')
16
+ O = ta.TypeVar('O')
17
+
18
+
19
+ ##
20
+
21
+
22
+ class EagerCodec(lang.Abstract, ta.Generic[I, O]):
23
+ @abc.abstractmethod
24
+ def encode(self, i: I) -> O:
25
+ raise NotImplementedError
26
+
27
+ @abc.abstractmethod
28
+ def decode(self, o: O) -> I:
29
+ raise NotImplementedError
30
+
31
+ def as_pair(self) -> fps.FnPair[I, O]:
32
+ return fps.of(self.encode, self.decode)
33
+
34
+
35
+ class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
36
+ @abc.abstractmethod
37
+ def encode_incremental(self) -> ta.Generator[O | None, I, None]:
38
+ raise NotImplementedError
39
+
40
+ @abc.abstractmethod
41
+ def decode_incremental(self) -> ta.Generator[I | None, O, None]:
42
+ raise NotImplementedError
43
+
44
+
45
+ class ComboCodec( # noqa
46
+ EagerCodec[I, O],
47
+ IncrementalCodec[I, O],
48
+ lang.Abstract,
49
+ ta.Generic[I, O],
50
+ ):
51
+ pass
52
+
53
+
54
+ ##
55
+
56
+
57
+ def check_codec_name(s: str) -> str:
58
+ check.non_empty_str(s)
59
+ check.not_in('_', s)
60
+ check.equal(s.strip(), s)
61
+ return s
62
+
63
+
64
+ ##
65
+
66
+
67
+ @dc.dataclass(frozen=True, kw_only=True)
68
+ class Codec:
69
+ name: str = dc.xfield(coerce=check_codec_name)
70
+ aliases: ta.Collection[str] | None = dc.xfield(
71
+ default=None,
72
+ coerce=lang.opt_fn(lambda s: [check_codec_name(a) for a in s]), # type: ignore
73
+ )
74
+
75
+ input: rfl.Type = dc.xfield(coerce=rfl.type_)
76
+ output: rfl.Type = dc.xfield(coerce=rfl.type_)
77
+
78
+ options: type | None = None
79
+
80
+ new: ta.Callable[..., EagerCodec]
81
+ new_incremental: ta.Callable[..., IncrementalCodec] | None = None
82
+
83
+
84
+ ##
85
+
86
+
87
+ @dc.dataclass(frozen=True, kw_only=True)
88
+ class LazyLoadedCodec:
89
+ mod_name: str
90
+ attr_name: str
91
+ name: str
92
+ aliases: ta.Collection[str] | None = None
93
+
94
+ @classmethod
95
+ def new(
96
+ cls,
97
+ mod_name: str,
98
+ attr_name: str,
99
+ codec: Codec,
100
+ ) -> 'LazyLoadedCodec':
101
+ return cls(
102
+ mod_name=mod_name,
103
+ attr_name=attr_name,
104
+ name=codec.name,
105
+ aliases=codec.aliases,
106
+ )
omlish/codecs/bytes.py ADDED
@@ -0,0 +1,119 @@
1
+ """
2
+ TODO:
3
+ - options / kwargs
4
+ """
5
+ import base64
6
+ import binascii
7
+ import typing as ta
8
+
9
+ from .. import check
10
+ from .base import Codec
11
+ from .funcs import FnPairEagerCodec
12
+ from .standard import STANDARD_CODECS
13
+
14
+
15
+ ##
16
+
17
+
18
+ class BytesCodec(Codec):
19
+ pass
20
+
21
+
22
+ def make_bytes_encoding_codec(
23
+ name: str,
24
+ aliases: ta.Collection[str] | None,
25
+ encode: ta.Callable[[bytes], bytes],
26
+ decode: ta.Callable[[bytes], bytes],
27
+ *,
28
+ append_to: ta.MutableSequence[Codec] | None = None,
29
+ ) -> BytesCodec:
30
+ codec = BytesCodec(
31
+ name=name,
32
+ aliases=check.not_isinstance(aliases, str),
33
+
34
+ input=bytes,
35
+ output=bytes,
36
+
37
+ new=lambda: FnPairEagerCodec.of(encode, decode),
38
+ )
39
+
40
+ if append_to is not None:
41
+ append_to.append(codec)
42
+
43
+ return codec
44
+
45
+
46
+ ##
47
+
48
+
49
+ ASCII85 = make_bytes_encoding_codec(
50
+ 'ascii85',
51
+ ['a85'],
52
+ base64.a85encode,
53
+ base64.a85decode,
54
+ append_to=STANDARD_CODECS,
55
+ )
56
+
57
+ BASE16 = make_bytes_encoding_codec(
58
+ 'base16',
59
+ ['b16'],
60
+ base64.b16encode,
61
+ base64.b16decode,
62
+ append_to=STANDARD_CODECS,
63
+ )
64
+
65
+ BASE32 = make_bytes_encoding_codec(
66
+ 'base32',
67
+ ['b32'],
68
+ base64.b32encode,
69
+ base64.b32decode,
70
+ append_to=STANDARD_CODECS,
71
+ )
72
+
73
+ BASE64 = make_bytes_encoding_codec(
74
+ 'base64',
75
+ ['b64'],
76
+ base64.b64encode,
77
+ base64.b64decode,
78
+ append_to=STANDARD_CODECS,
79
+ )
80
+
81
+ BASE85 = make_bytes_encoding_codec(
82
+ 'base85',
83
+ ['b85'],
84
+ base64.b85encode,
85
+ base64.b85decode,
86
+ append_to=STANDARD_CODECS,
87
+ )
88
+
89
+ BASE32_HEX = make_bytes_encoding_codec(
90
+ 'base32-hex',
91
+ ['b32-hex'],
92
+ base64.b32hexencode,
93
+ base64.b32hexdecode,
94
+ append_to=STANDARD_CODECS,
95
+ )
96
+
97
+ BASE64_HEX = make_bytes_encoding_codec(
98
+ 'base64-hex',
99
+ ['b64-hex'],
100
+ base64.standard_b64encode,
101
+ base64.standard_b64decode,
102
+ append_to=STANDARD_CODECS,
103
+ )
104
+
105
+ BASE64_URLSAFE = make_bytes_encoding_codec(
106
+ 'base64-urlsafe',
107
+ ['b64-urlsafe'],
108
+ base64.urlsafe_b64encode,
109
+ base64.urlsafe_b64decode,
110
+ append_to=STANDARD_CODECS,
111
+ )
112
+
113
+ HEX = make_bytes_encoding_codec(
114
+ 'hex',
115
+ [],
116
+ binascii.b2a_hex,
117
+ binascii.a2b_hex,
118
+ append_to=STANDARD_CODECS,
119
+ )
omlish/codecs/chain.py ADDED
@@ -0,0 +1,23 @@
1
+ import dataclasses as dc
2
+ import typing as ta
3
+
4
+ from .base import EagerCodec
5
+
6
+
7
+ @dc.dataclass(frozen=True)
8
+ class ChainEagerCodec(EagerCodec[ta.Any, ta.Any]):
9
+ codecs: ta.Sequence[EagerCodec]
10
+
11
+ def encode(self, v: ta.Any) -> ta.Any:
12
+ for c in self.codecs:
13
+ v = c.encode(v)
14
+ return v
15
+
16
+ def decode(self, v: ta.Any) -> ta.Any:
17
+ for c in reversed(self.codecs):
18
+ v = c.decode(v)
19
+ return v
20
+
21
+
22
+ def chain(*codecs: EagerCodec) -> ChainEagerCodec:
23
+ return ChainEagerCodec(codecs)
omlish/codecs/funcs.py ADDED
@@ -0,0 +1,39 @@
1
+ import dataclasses as dc
2
+ import typing as ta
3
+
4
+ from ..funcs import pairs as fps
5
+ from .base import EagerCodec
6
+
7
+
8
+ I = ta.TypeVar('I')
9
+ O = ta.TypeVar('O')
10
+
11
+
12
+ @dc.dataclass(frozen=True)
13
+ class FnPairEagerCodec(EagerCodec[I, O]):
14
+ fp: fps.FnPair[I, O]
15
+
16
+ def encode(self, i: I) -> O:
17
+ return self.fp.forward(i)
18
+
19
+ def decode(self, o: O) -> I:
20
+ return self.fp.backward(o)
21
+
22
+ @classmethod
23
+ def of(
24
+ cls,
25
+ encode: ta.Callable[[I], O],
26
+ decode: ta.Callable[[O], I],
27
+ ) -> 'FnPairEagerCodec[I, O]':
28
+ return cls(fps.of(encode, decode))
29
+
30
+
31
+ def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
32
+ return FnPairEagerCodec(fp)
33
+
34
+
35
+ def of(
36
+ encode: ta.Callable[[I], O],
37
+ decode: ta.Callable[[O], I],
38
+ ) -> FnPairEagerCodec[I, O]:
39
+ return FnPairEagerCodec(fps.of(encode, decode))
@@ -0,0 +1,139 @@
1
+ import contextlib
2
+ import importlib
3
+ import threading
4
+ import typing as ta
5
+
6
+ from .. import cached
7
+ from .. import check
8
+ from .. import lang
9
+ from .base import Codec
10
+ from .base import LazyLoadedCodec
11
+ from .standard import STANDARD_CODECS
12
+
13
+
14
+ if ta.TYPE_CHECKING:
15
+ from ..manifests import load as manifest_load
16
+ else:
17
+ manifest_load = lang.proxy_import('..manifests.load', __package__)
18
+
19
+
20
+ ##
21
+
22
+
23
+ class CodecRegistry:
24
+ def __init__(
25
+ self,
26
+ *,
27
+ late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
28
+ ) -> None:
29
+ super().__init__()
30
+
31
+ self._late_load_callbacks = late_load_callbacks
32
+
33
+ self._lock = threading.RLock()
34
+ self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
35
+ self._names_by_alias: dict[str, str] = {}
36
+ self._names_by_cls: dict[type, list[str]] = {}
37
+
38
+ def _late_load(self) -> None:
39
+ if self._late_load_callbacks:
40
+ for cb in self._late_load_callbacks:
41
+ cb(self)
42
+ self._late_load_callbacks = None
43
+
44
+ @contextlib.contextmanager
45
+ def _lock_and_load(self) -> ta.Iterator[None]:
46
+ with self._lock:
47
+ self._late_load()
48
+ yield
49
+
50
+ def _post_load(self, codec: Codec) -> None:
51
+ for t in type(codec).__mro__:
52
+ if t is not object:
53
+ self._names_by_cls.setdefault(t, []).append(codec.name)
54
+
55
+ def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
56
+ with self._lock:
57
+ for codec in codecs:
58
+ for n in {codec.name, *(codec.aliases or [])}:
59
+ if n in self._names_by_alias:
60
+ raise KeyError(n)
61
+
62
+ for codec in codecs:
63
+ self._by_name[codec.name] = codec
64
+ for n in {codec.name, *(codec.aliases or [])}:
65
+ self._names_by_alias[n] = codec.name
66
+ if isinstance(codec, Codec):
67
+ self._post_load(codec)
68
+
69
+ return self
70
+
71
+ def lookup(self, name_or_alias: str) -> Codec:
72
+ with self._lock_and_load():
73
+ name = self._names_by_alias[name_or_alias.replace('_', '-')]
74
+ codec_or_lazy = self._by_name[name]
75
+
76
+ if isinstance(codec_or_lazy, LazyLoadedCodec):
77
+ mod = importlib.import_module(codec_or_lazy.mod_name)
78
+ codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
79
+ self._by_name[name] = codec
80
+ self._post_load(codec)
81
+ else:
82
+ codec = check.isinstance(codec_or_lazy, Codec)
83
+
84
+ return codec
85
+
86
+ def lookup_type(self, cls: type) -> list[Codec]:
87
+ with self._lock_and_load():
88
+ return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
89
+
90
+ def all(self) -> frozenset[str]:
91
+ with self._lock_and_load():
92
+ return frozenset(self._by_name)
93
+
94
+
95
+ ##
96
+
97
+
98
+ def _install_standard_codecs(registry: CodecRegistry) -> None:
99
+ registry.register(*STANDARD_CODECS)
100
+
101
+
102
+ ##
103
+
104
+
105
+ @cached.function
106
+ def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
107
+ ldr = manifest_load.MANIFEST_LOADER
108
+ pkgs = {__package__.split('.')[0], *ldr.discover()}
109
+ mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
110
+ return [m.value for m in mns]
111
+
112
+
113
+ def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
114
+ registry.register(*_build_manifest_lazy_loaded_codecs())
115
+
116
+
117
+ ##
118
+
119
+
120
+ REGISTRY = CodecRegistry(
121
+ late_load_callbacks=[
122
+ _install_standard_codecs,
123
+ _install_manifest_lazy_loaded_codecs,
124
+ ],
125
+ )
126
+
127
+ register = REGISTRY.register
128
+ lookup = REGISTRY.lookup
129
+
130
+
131
+ ##
132
+
133
+
134
+ def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
135
+ return lookup(name).new(**kwargs).encode(i)
136
+
137
+
138
+ def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
139
+ return lookup(name).new(**kwargs).decode(o)
@@ -0,0 +1,4 @@
1
+ from .base import Codec
2
+
3
+
4
+ STANDARD_CODECS: list[Codec] = []
omlish/codecs/text.py ADDED
@@ -0,0 +1,217 @@
1
+ import codecs
2
+ import dataclasses as dc
3
+ import functools
4
+ import typing as ta
5
+
6
+ from omlish import check
7
+
8
+ from .base import Codec
9
+ from .base import ComboCodec
10
+ from .standard import STANDARD_CODECS
11
+
12
+
13
+ ##
14
+
15
+
16
+ TextEncodingErrors: ta.TypeAlias = ta.Literal[
17
+ # Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
18
+ 'strict',
19
+
20
+ # Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
21
+ 'ignore',
22
+
23
+ # Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
24
+ # REPLACEMENT CHARACTER). Implemented in replace_errors().
25
+ 'replace',
26
+
27
+ # Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
28
+ # \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
29
+ # backslashreplace_errors().
30
+ 'backslashreplace',
31
+
32
+ # On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
33
+ # turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
34
+ # 383 for more.)
35
+ 'surrogateescape',
36
+
37
+ ##
38
+ # The following error handlers are only applicable to encoding (within text encodings):
39
+
40
+ # Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
41
+ # &#num;. Implemented in xmlcharrefreplace_errors().
42
+ 'xmlcharrefreplace',
43
+
44
+ # Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
45
+ # Database. Implemented in namereplace_errors().
46
+ 'namereplace',
47
+
48
+ ##
49
+ # In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
50
+ # utf-16-le, utf-32-be, utf-32-le
51
+
52
+ # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
53
+ # treat the presence of surrogate code point in str as an error.
54
+ 'surrogatepass',
55
+ ]
56
+
57
+
58
+ @dc.dataclass(frozen=True, kw_only=True)
59
+ class TextEncodingOptions:
60
+ errors: TextEncodingErrors = 'strict'
61
+
62
+
63
+ ##
64
+
65
+
66
+ class TextEncodingComboCodec(ComboCodec[str, bytes]):
67
+ def __init__(
68
+ self,
69
+ info: codecs.CodecInfo,
70
+ options: TextEncodingOptions = TextEncodingOptions(),
71
+ ) -> None:
72
+ super().__init__()
73
+ self._info = check.isinstance(info, codecs.CodecInfo)
74
+ self._opts = check.isinstance(options, TextEncodingOptions)
75
+
76
+ @classmethod
77
+ def lookup(
78
+ cls,
79
+ name: str,
80
+ options: TextEncodingOptions = TextEncodingOptions(),
81
+ ) -> 'TextEncodingComboCodec':
82
+ return cls(codecs.lookup(name), options)
83
+
84
+ def encode(self, i: str) -> bytes:
85
+ o, _ = self._info.encode(i, self._opts.errors)
86
+ return o
87
+
88
+ def decode(self, o: bytes) -> str:
89
+ i, _ = self._info.decode(o, self._opts.errors)
90
+ return i
91
+
92
+ def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
93
+ x = self._info.incrementalencoder(self._opts.errors)
94
+ i = yield None
95
+ while True:
96
+ if not i:
97
+ break
98
+ o = x.encode(i)
99
+ i = yield o or None
100
+ o = x.encode(i, final=True)
101
+ yield o
102
+
103
+ def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
104
+ x = self._info.incrementaldecoder(self._opts.errors)
105
+ i = yield None
106
+ while True:
107
+ if not i:
108
+ break
109
+ o = x.decode(i)
110
+ i = yield o or None
111
+ o = x.decode(i, final=True)
112
+ yield o
113
+
114
+
115
+ ##
116
+
117
+
118
+ class TextEncodingCodec(Codec):
119
+ pass
120
+
121
+
122
+ def normalize_text_encoding_name(s: str) -> str:
123
+ if ' ' in s:
124
+ raise NameError(s)
125
+ return s.lower().replace('_', '-')
126
+
127
+
128
+ def make_text_encoding_codec(
129
+ name: str,
130
+ aliases: ta.Collection[str] | None = None,
131
+ *,
132
+ append_to: ta.MutableSequence[Codec] | None = None,
133
+ ) -> TextEncodingCodec:
134
+ codec = TextEncodingCodec(
135
+ name=check.equal(name, normalize_text_encoding_name(name)),
136
+ aliases=check.not_isinstance(aliases, str),
137
+
138
+ input=str,
139
+ output=bytes,
140
+
141
+ new=functools.partial(TextEncodingComboCodec.lookup, name),
142
+ new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
143
+ )
144
+
145
+ if append_to is not None:
146
+ append_to.append(codec)
147
+
148
+ return codec
149
+
150
+
151
+ ##
152
+
153
+
154
+ ASCII = make_text_encoding_codec(
155
+ 'ascii',
156
+ ['646', 'us-ascii'],
157
+ append_to=STANDARD_CODECS,
158
+ )
159
+
160
+ LATIN1 = make_text_encoding_codec(
161
+ 'latin-1',
162
+ ['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
163
+ append_to=STANDARD_CODECS,
164
+ )
165
+
166
+ UTF32 = make_text_encoding_codec(
167
+ 'utf-32',
168
+ ['u32', 'utf32'],
169
+ append_to=STANDARD_CODECS,
170
+ )
171
+
172
+ UTF32BE = make_text_encoding_codec(
173
+ 'utf-32-be',
174
+ ['utf-32be'],
175
+ append_to=STANDARD_CODECS,
176
+ )
177
+
178
+ UTF32LE = make_text_encoding_codec(
179
+ 'utf-32-le',
180
+ ['utf-32le'],
181
+ append_to=STANDARD_CODECS,
182
+ )
183
+
184
+ UTF16 = make_text_encoding_codec(
185
+ 'utf-16',
186
+ ['u16', 'utf16'],
187
+ append_to=STANDARD_CODECS,
188
+ )
189
+
190
+ UTF16BE = make_text_encoding_codec(
191
+ 'utf-16-be',
192
+ ['utf-16be'],
193
+ append_to=STANDARD_CODECS,
194
+ )
195
+
196
+ UTF16LE = make_text_encoding_codec(
197
+ 'utf-16-le',
198
+ ['utf-16le'],
199
+ append_to=STANDARD_CODECS,
200
+ )
201
+
202
+ UTF7 = make_text_encoding_codec(
203
+ 'utf-7',
204
+ ['u7', 'unicode-1-1-utf-7'],
205
+ append_to=STANDARD_CODECS,
206
+ )
207
+
208
+ UTF8 = make_text_encoding_codec(
209
+ 'utf-8',
210
+ ['u8', 'utf', 'utf8', 'cp65001'],
211
+ append_to=STANDARD_CODECS,
212
+ )
213
+
214
+ UTF8SIG = make_text_encoding_codec(
215
+ 'utf-8-sig',
216
+ append_to=STANDARD_CODECS,
217
+ )