omlish 0.0.0.dev162__py3-none-any.whl → 0.0.0.dev164__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,139 @@
1
+ import contextlib
2
+ import importlib
3
+ import threading
4
+ import typing as ta
5
+
6
+ from .. import cached
7
+ from .. import check
8
+ from .. import lang
9
+ from .base import Codec
10
+ from .base import LazyLoadedCodec
11
+ from .standard import STANDARD_CODECS
12
+
13
+
14
+ if ta.TYPE_CHECKING:
15
+ from ..manifests import load as manifest_load
16
+ else:
17
+ manifest_load = lang.proxy_import('..manifests.load', __package__)
18
+
19
+
20
+ ##
21
+
22
+
23
+ class CodecRegistry:
24
+ def __init__(
25
+ self,
26
+ *,
27
+ late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
28
+ ) -> None:
29
+ super().__init__()
30
+
31
+ self._late_load_callbacks = late_load_callbacks
32
+
33
+ self._lock = threading.RLock()
34
+ self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
35
+ self._names_by_alias: dict[str, str] = {}
36
+ self._names_by_cls: dict[type, list[str]] = {}
37
+
38
+ def _late_load(self) -> None:
39
+ if self._late_load_callbacks:
40
+ for cb in self._late_load_callbacks:
41
+ cb(self)
42
+ self._late_load_callbacks = None
43
+
44
+ @contextlib.contextmanager
45
+ def _lock_and_load(self) -> ta.Iterator[None]:
46
+ with self._lock:
47
+ self._late_load()
48
+ yield
49
+
50
+ def _post_load(self, codec: Codec) -> None:
51
+ for t in type(codec).__mro__:
52
+ if t is not object:
53
+ self._names_by_cls.setdefault(t, []).append(codec.name)
54
+
55
+ def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
56
+ with self._lock:
57
+ for codec in codecs:
58
+ for n in {codec.name, *(codec.aliases or [])}:
59
+ if n in self._names_by_alias:
60
+ raise KeyError(n)
61
+
62
+ for codec in codecs:
63
+ self._by_name[codec.name] = codec
64
+ for n in {codec.name, *(codec.aliases or [])}:
65
+ self._names_by_alias[n] = codec.name
66
+ if isinstance(codec, Codec):
67
+ self._post_load(codec)
68
+
69
+ return self
70
+
71
+ def lookup(self, name_or_alias: str) -> Codec:
72
+ with self._lock_and_load():
73
+ name = self._names_by_alias[name_or_alias.replace('_', '-')]
74
+ codec_or_lazy = self._by_name[name]
75
+
76
+ if isinstance(codec_or_lazy, LazyLoadedCodec):
77
+ mod = importlib.import_module(codec_or_lazy.mod_name)
78
+ codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
79
+ self._by_name[name] = codec
80
+ self._post_load(codec)
81
+ else:
82
+ codec = check.isinstance(codec_or_lazy, Codec)
83
+
84
+ return codec
85
+
86
+ def lookup_type(self, cls: type) -> list[Codec]:
87
+ with self._lock_and_load():
88
+ return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
89
+
90
+ def all(self) -> frozenset[str]:
91
+ with self._lock_and_load():
92
+ return frozenset(self._by_name)
93
+
94
+
95
+ ##
96
+
97
+
98
+ def _install_standard_codecs(registry: CodecRegistry) -> None:
99
+ registry.register(*STANDARD_CODECS)
100
+
101
+
102
+ ##
103
+
104
+
105
+ @cached.function
106
+ def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
107
+ ldr = manifest_load.MANIFEST_LOADER
108
+ pkgs = {__package__.split('.')[0], *ldr.discover()}
109
+ mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
110
+ return [m.value for m in mns]
111
+
112
+
113
+ def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
114
+ registry.register(*_build_manifest_lazy_loaded_codecs())
115
+
116
+
117
+ ##
118
+
119
+
120
+ REGISTRY = CodecRegistry(
121
+ late_load_callbacks=[
122
+ _install_standard_codecs,
123
+ _install_manifest_lazy_loaded_codecs,
124
+ ],
125
+ )
126
+
127
+ register = REGISTRY.register
128
+ lookup = REGISTRY.lookup
129
+
130
+
131
+ ##
132
+
133
+
134
+ def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
135
+ return lookup(name).new(**kwargs).encode(i)
136
+
137
+
138
+ def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
139
+ return lookup(name).new(**kwargs).decode(o)
@@ -0,0 +1,4 @@
1
+ from .base import Codec
2
+
3
+
4
+ STANDARD_CODECS: list[Codec] = []
omlish/codecs/text.py ADDED
@@ -0,0 +1,217 @@
1
+ import codecs
2
+ import dataclasses as dc
3
+ import functools
4
+ import typing as ta
5
+
6
+ from omlish import check
7
+
8
+ from .base import Codec
9
+ from .base import ComboCodec
10
+ from .standard import STANDARD_CODECS
11
+
12
+
13
+ ##
14
+
15
+
16
+ TextEncodingErrors: ta.TypeAlias = ta.Literal[
17
+ # Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
18
+ 'strict',
19
+
20
+ # Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
21
+ 'ignore',
22
+
23
+ # Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
24
+ # REPLACEMENT CHARACTER). Implemented in replace_errors().
25
+ 'replace',
26
+
27
+ # Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
28
+ # \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
29
+ # backslashreplace_errors().
30
+ 'backslashreplace',
31
+
32
+ # On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
33
+ # turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
34
+ # 383 for more.)
35
+ 'surrogateescape',
36
+
37
+ ##
38
+ # The following error handlers are only applicable to encoding (within text encodings):
39
+
40
+ # Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
41
+ # &#num;. Implemented in xmlcharrefreplace_errors().
42
+ 'xmlcharrefreplace',
43
+
44
+ # Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
45
+ # Database. Implemented in namereplace_errors().
46
+ 'namereplace',
47
+
48
+ ##
49
+ # In addition, the following error handler is specific to the given codecs:
50
+ # utf-8, utf-16, utf-32, utf-16-be, utf-16-le, utf-32-be, utf-32-le
51
+
52
+ # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
53
+ # treat the presence of surrogate code point in str as an error.
54
+ 'surrogatepass',
55
+ ]
56
+
57
+
58
+ @dc.dataclass(frozen=True, kw_only=True)
59
+ class TextEncodingOptions:
60
+ errors: TextEncodingErrors = 'strict'
61
+
62
+
63
+ ##
64
+
65
+
66
+ class TextEncodingComboCodec(ComboCodec[str, bytes]):
67
+ def __init__(
68
+ self,
69
+ info: codecs.CodecInfo,
70
+ options: TextEncodingOptions = TextEncodingOptions(),
71
+ ) -> None:
72
+ super().__init__()
73
+ self._info = check.isinstance(info, codecs.CodecInfo)
74
+ self._opts = check.isinstance(options, TextEncodingOptions)
75
+
76
+ @classmethod
77
+ def lookup(
78
+ cls,
79
+ name: str,
80
+ options: TextEncodingOptions = TextEncodingOptions(),
81
+ ) -> 'TextEncodingComboCodec':
82
+ return cls(codecs.lookup(name), options)
83
+
84
+ def encode(self, i: str) -> bytes:
85
+ o, _ = self._info.encode(i, self._opts.errors)
86
+ return o
87
+
88
+ def decode(self, o: bytes) -> str:
89
+ i, _ = self._info.decode(o, self._opts.errors)
90
+ return i
91
+
92
+ def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
93
+ x = self._info.incrementalencoder(self._opts.errors)
94
+ i = yield None
95
+ while True:
96
+ if not i:
97
+ break
98
+ o = x.encode(i)
99
+ i = yield o or None
100
+ o = x.encode(i, final=True)
101
+ yield o
102
+
103
+ def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
104
+ x = self._info.incrementaldecoder(self._opts.errors)
105
+ i = yield None
106
+ while True:
107
+ if not i:
108
+ break
109
+ o = x.decode(i)
110
+ i = yield o or None
111
+ o = x.decode(i, final=True)
112
+ yield o
113
+
114
+
115
+ ##
116
+
117
+
118
+ class TextEncodingCodec(Codec):
119
+ pass
120
+
121
+
122
+ def normalize_text_encoding_name(s: str) -> str:
123
+ if ' ' in s:
124
+ raise NameError(s)
125
+ return s.lower().replace('_', '-')
126
+
127
+
128
+ def make_text_encoding_codec(
129
+ name: str,
130
+ aliases: ta.Collection[str] | None = None,
131
+ *,
132
+ append_to: ta.MutableSequence[Codec] | None = None,
133
+ ) -> TextEncodingCodec:
134
+ codec = TextEncodingCodec(
135
+ name=check.equal(name, normalize_text_encoding_name(name)),
136
+ aliases=check.not_isinstance(aliases, str),
137
+
138
+ input=str,
139
+ output=bytes,
140
+
141
+ new=functools.partial(TextEncodingComboCodec.lookup, name),
142
+ new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
143
+ )
144
+
145
+ if append_to is not None:
146
+ append_to.append(codec)
147
+
148
+ return codec
149
+
150
+
151
+ ##
152
+
153
+
154
+ ASCII = make_text_encoding_codec(
155
+ 'ascii',
156
+ ['646', 'us-ascii'],
157
+ append_to=STANDARD_CODECS,
158
+ )
159
+
160
+ LATIN1 = make_text_encoding_codec(
161
+ 'latin-1',
162
+ ['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
163
+ append_to=STANDARD_CODECS,
164
+ )
165
+
166
+ UTF32 = make_text_encoding_codec(
167
+ 'utf-32',
168
+ ['u32', 'utf32'],
169
+ append_to=STANDARD_CODECS,
170
+ )
171
+
172
+ UTF32BE = make_text_encoding_codec(
173
+ 'utf-32-be',
174
+ ['utf-32be'],
175
+ append_to=STANDARD_CODECS,
176
+ )
177
+
178
+ UTF32LE = make_text_encoding_codec(
179
+ 'utf-32-le',
180
+ ['utf-32le'],
181
+ append_to=STANDARD_CODECS,
182
+ )
183
+
184
+ UTF16 = make_text_encoding_codec(
185
+ 'utf-16',
186
+ ['u16', 'utf16'],
187
+ append_to=STANDARD_CODECS,
188
+ )
189
+
190
+ UTF16BE = make_text_encoding_codec(
191
+ 'utf-16-be',
192
+ ['utf-16be'],
193
+ append_to=STANDARD_CODECS,
194
+ )
195
+
196
+ UTF16LE = make_text_encoding_codec(
197
+ 'utf-16-le',
198
+ ['utf-16le'],
199
+ append_to=STANDARD_CODECS,
200
+ )
201
+
202
+ UTF7 = make_text_encoding_codec(
203
+ 'utf-7',
204
+ ['u7', 'unicode-1-1-utf-7'],
205
+ append_to=STANDARD_CODECS,
206
+ )
207
+
208
+ UTF8 = make_text_encoding_codec(
209
+ 'utf-8',
210
+ ['u8', 'utf', 'utf8', 'cp65001'],
211
+ append_to=STANDARD_CODECS,
212
+ )
213
+
214
+ UTF8SIG = make_text_encoding_codec(
215
+ 'utf-8-sig',
216
+ append_to=STANDARD_CODECS,
217
+ )
omlish/formats/cbor.py ADDED
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_bytes_object_codec
5
+ from .codecs import make_object_lazy_loaded_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import cbor2
10
+ else:
11
+ cbor2 = lang.proxy_import('cbor2')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dump(obj: ta.Any) -> bytes:
18
+ return cbor2.dumps(obj)
19
+
20
+
21
+ def load(s: bytes) -> ta.Any:
22
+ return cbor2.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ CBOR_CODEC = make_bytes_object_codec('cbor', dump, load)
29
+
30
+ # @omlish-manifest
31
+ _CBOR_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CBOR_CODEC', CBOR_CODEC)
@@ -0,0 +1,93 @@
1
+ import typing as ta
2
+
3
+ from .. import codecs
4
+ from .. import reflect as rfl
5
+
6
+
7
+ ObjectCodecT = ta.TypeVar('ObjectCodecT', bound='ObjectCodec')
8
+
9
+
10
+ ##
11
+
12
+
13
+ class ObjectCodec(codecs.Codec):
14
+ pass
15
+
16
+
17
+ def make_object_codec(
18
+ cls: type[ObjectCodecT],
19
+ name: str,
20
+ dumps: ta.Callable,
21
+ loads: ta.Callable,
22
+ *,
23
+ input: rfl.Type = rfl.type_(ta.Any), # noqa
24
+ aliases: ta.Collection[str] | None = None,
25
+ ) -> ObjectCodecT:
26
+ return cls(
27
+ name=name,
28
+ aliases=aliases,
29
+
30
+ input=input,
31
+ output=bytes,
32
+
33
+ new=lambda: codecs.FnPairEagerCodec.of(dumps, loads),
34
+ )
35
+
36
+
37
+ ##
38
+
39
+
40
+ class BytesObjectCodec(ObjectCodec):
41
+ pass
42
+
43
+
44
+ def make_bytes_object_codec(
45
+ name: str,
46
+ dumps: ta.Callable[[ta.Any], bytes],
47
+ loads: ta.Callable[[bytes], ta.Any],
48
+ **kwargs: ta.Any,
49
+ ) -> BytesObjectCodec:
50
+ return make_object_codec(
51
+ BytesObjectCodec,
52
+ name,
53
+ dumps,
54
+ loads,
55
+ **kwargs,
56
+ )
57
+
58
+
59
+ ##
60
+
61
+
62
+ class StrObjectCodec(ObjectCodec):
63
+ pass
64
+
65
+
66
+ def make_str_object_codec(
67
+ name: str,
68
+ dumps: ta.Callable[[ta.Any], str],
69
+ loads: ta.Callable[[str], ta.Any],
70
+ **kwargs: ta.Any,
71
+ ) -> StrObjectCodec:
72
+ return make_object_codec(
73
+ StrObjectCodec,
74
+ name,
75
+ dumps,
76
+ loads,
77
+ **kwargs,
78
+ )
79
+
80
+
81
+ ##
82
+
83
+
84
+ def make_object_lazy_loaded_codec(
85
+ mod_name: str,
86
+ attr_name: str,
87
+ codec: ObjectCodec,
88
+ ) -> codecs.LazyLoadedCodec:
89
+ return codecs.LazyLoadedCodec.new(
90
+ mod_name,
91
+ attr_name,
92
+ codec,
93
+ )
@@ -0,0 +1,33 @@
1
+ from ..codecs import make_object_lazy_loaded_codec
2
+ from ..codecs import make_str_object_codec
3
+ from .json import dumps
4
+ from .json import dumps_compact
5
+ from .json import dumps_pretty
6
+ from .json import loads
7
+
8
+
9
+ ##
10
+
11
+
12
+ JSON_CODEC = make_str_object_codec('json', dumps, loads)
13
+
14
+ # @omlish-manifest
15
+ _JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
16
+
17
+
18
+ #
19
+
20
+
21
+ JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
22
+
23
+ # @omlish-manifest
24
+ _JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
25
+
26
+
27
+ #
28
+
29
+
30
+ JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
31
+
32
+ # @omlish-manifest
33
+ _JSON_PRETTY_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_PRETTY_CODEC', JSON_PRETTY_CODEC)
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_object_lazy_loaded_codec
5
+ from .codecs import make_str_object_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import json5
10
+ else:
11
+ json5 = lang.proxy_import('json5')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dumps(obj: ta.Any) -> str:
18
+ return json5.dumps(obj)
19
+
20
+
21
+ def loads(s: str) -> ta.Any:
22
+ return json5.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ JSON5_CODEC = make_str_object_codec('json5', dumps, loads)
29
+
30
+ # @omlish-manifest
31
+ _JSON5_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON5_CODEC', JSON5_CODEC)
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_bytes_object_codec
5
+ from .codecs import make_object_lazy_loaded_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import pickle
10
+ else:
11
+ pickle = lang.proxy_import('pickle')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dump(obj: ta.Any) -> bytes:
18
+ return pickle.dumps(obj)
19
+
20
+
21
+ def load(s: bytes) -> ta.Any:
22
+ return pickle.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ PICKLE_CODEC = make_bytes_object_codec('pickle', dump, load)
29
+
30
+ # @omlish-manifest
31
+ _PICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'PICKLE_CODEC', PICKLE_CODEC)
omlish/formats/toml.py ADDED
@@ -0,0 +1,17 @@
1
+ import tomllib
2
+
3
+ from .codecs import make_object_lazy_loaded_codec
4
+ from .codecs import make_str_object_codec
5
+
6
+
7
+ ##
8
+
9
+
10
+ def _dumps(obj):
11
+ raise TypeError('Unsupported')
12
+
13
+
14
+ TOML_CODEC = make_str_object_codec('toml', _dumps, tomllib.loads)
15
+
16
+ # @omlish-manifest
17
+ _TOML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'TOML_CODEC', TOML_CODEC)
omlish/formats/yaml.py CHANGED
@@ -14,6 +14,8 @@ import typing as ta
14
14
  from .. import check
15
15
  from .. import dataclasses as dc
16
16
  from .. import lang
17
+ from .codecs import make_object_lazy_loaded_codec
18
+ from .codecs import make_str_object_codec
17
19
 
18
20
 
19
21
  if ta.TYPE_CHECKING:
@@ -239,3 +241,19 @@ def full_load(stream): # noqa
239
241
 
240
242
  def full_load_all(stream): # noqa # noqa
241
243
  return load_all(stream, yaml.FullLoader)
244
+
245
+
246
+ ##
247
+
248
+
249
+ def dump(obj, **kwargs):
250
+ return yaml.dump(obj, **kwargs)
251
+
252
+
253
+ ##
254
+
255
+
256
+ YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
257
+
258
+ # @omlish-manifest
259
+ _YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)
@@ -3,6 +3,8 @@ import typing as ta
3
3
 
4
4
  from ... import lang
5
5
  from .base import Compression
6
+ from .codecs import make_compression_codec
7
+ from .codecs import make_compression_lazy_loaded_codec
6
8
 
7
9
 
8
10
  if ta.TYPE_CHECKING:
@@ -11,8 +13,11 @@ else:
11
13
  brotli = lang.proxy_import('brotli')
12
14
 
13
15
 
16
+ ##
17
+
18
+
14
19
  @dc.dataclass(frozen=True, kw_only=True)
15
- class SnappyCompression(Compression):
20
+ class BrotliCompression(Compression):
16
21
  mode: int | None = None
17
22
  quality: int | None = None
18
23
  lgwin: int | None = None
@@ -31,3 +36,12 @@ class SnappyCompression(Compression):
31
36
  return brotli.decompress(
32
37
  d,
33
38
  )
39
+
40
+
41
+ ##
42
+
43
+
44
+ BROTLI_CODEC = make_compression_codec('brotli', BrotliCompression)
45
+
46
+ # @omlish-manifest
47
+ _BROTLI_LAZY_CODEC = make_compression_lazy_loaded_codec(__name__, 'BROTLI_CODEC', BROTLI_CODEC)
omlish/io/compress/bz2.py CHANGED
@@ -9,6 +9,8 @@ from .adapters import CompressorObjectIncrementalAdapter
9
9
  from .adapters import DecompressorObjectIncrementalAdapter
10
10
  from .base import Compression
11
11
  from .base import IncrementalCompression
12
+ from .codecs import make_compression_codec
13
+ from .codecs import make_compression_lazy_loaded_codec
12
14
 
13
15
 
14
16
  if ta.TYPE_CHECKING:
@@ -17,6 +19,9 @@ else:
17
19
  bz2 = lang.proxy_import('bz2')
18
20
 
19
21
 
22
+ ##
23
+
24
+
20
25
  @dc.dataclass(frozen=True, kw_only=True)
21
26
  class Bz2Compression(Compression, IncrementalCompression):
22
27
  level: int = 9
@@ -45,3 +50,12 @@ class Bz2Compression(Compression, IncrementalCompression):
45
50
  bz2.BZ2Decompressor, # type: ignore
46
51
  trailing_error=OSError,
47
52
  )()
53
+
54
+
55
+ ##
56
+
57
+
58
+ BZ2_CODEC = make_compression_codec('bz2', Bz2Compression)
59
+
60
+ # @omlish-manifest
61
+ _BZ2_LAZY_CODEC = make_compression_lazy_loaded_codec(__name__, 'BZ2_CODEC', BZ2_CODEC)