omlish 0.0.0.dev162__py3-none-any.whl → 0.0.0.dev163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ import contextlib
2
+ import importlib
3
+ import threading
4
+ import typing as ta
5
+
6
+ from .. import cached
7
+ from .. import check
8
+ from .. import lang
9
+ from .base import Codec
10
+ from .base import LazyLoadedCodec
11
+ from .standard import STANDARD_CODECS
12
+
13
+
14
+ if ta.TYPE_CHECKING:
15
+ from ..manifests import load as manifest_load
16
+ else:
17
+ manifest_load = lang.proxy_import('..manifests.load', __package__)
18
+
19
+
20
+ ##
21
+
22
+
23
+ class CodecRegistry:
24
+ def __init__(
25
+ self,
26
+ *,
27
+ late_load_callbacks: ta.Iterable[ta.Callable[['CodecRegistry'], None]] | None = None,
28
+ ) -> None:
29
+ super().__init__()
30
+
31
+ self._late_load_callbacks = late_load_callbacks
32
+
33
+ self._lock = threading.RLock()
34
+ self._by_name: dict[str, Codec | LazyLoadedCodec] = {}
35
+ self._names_by_alias: dict[str, str] = {}
36
+ self._names_by_cls: dict[type, list[str]] = {}
37
+
38
+ def _late_load(self) -> None:
39
+ if self._late_load_callbacks:
40
+ for cb in self._late_load_callbacks:
41
+ cb(self)
42
+ self._late_load_callbacks = None
43
+
44
+ @contextlib.contextmanager
45
+ def _lock_and_load(self) -> ta.Iterator[None]:
46
+ with self._lock:
47
+ self._late_load()
48
+ yield
49
+
50
+ def _post_load(self, codec: Codec) -> None:
51
+ for t in type(codec).__mro__:
52
+ if t is not object:
53
+ self._names_by_cls.setdefault(t, []).append(codec.name)
54
+
55
+ def register(self, *codecs: Codec | LazyLoadedCodec) -> ta.Self:
56
+ with self._lock:
57
+ for codec in codecs:
58
+ for n in {codec.name, *(codec.aliases or [])}:
59
+ if n in self._names_by_alias:
60
+ raise KeyError(n)
61
+
62
+ for codec in codecs:
63
+ self._by_name[codec.name] = codec
64
+ for n in {codec.name, *(codec.aliases or [])}:
65
+ self._names_by_alias[n] = codec.name
66
+ if isinstance(codec, Codec):
67
+ self._post_load(codec)
68
+
69
+ return self
70
+
71
+ def lookup(self, name_or_alias: str) -> Codec:
72
+ with self._lock_and_load():
73
+ name = self._names_by_alias[name_or_alias.replace('_', '-')]
74
+ codec_or_lazy = self._by_name[name]
75
+
76
+ if isinstance(codec_or_lazy, LazyLoadedCodec):
77
+ mod = importlib.import_module(codec_or_lazy.mod_name)
78
+ codec = check.isinstance(getattr(mod, codec_or_lazy.attr_name), Codec)
79
+ self._by_name[name] = codec
80
+ self._post_load(codec)
81
+ else:
82
+ codec = check.isinstance(codec_or_lazy, Codec)
83
+
84
+ return codec
85
+
86
+ def lookup_type(self, cls: type) -> list[Codec]:
87
+ with self._lock_and_load():
88
+ return [self.lookup(n) for n in self._names_by_cls.get(cls, [])]
89
+
90
+ def all(self) -> frozenset[str]:
91
+ with self._lock_and_load():
92
+ return frozenset(self._by_name)
93
+
94
+
95
+ ##
96
+
97
+
98
+ def _install_standard_codecs(registry: CodecRegistry) -> None:
99
+ registry.register(*STANDARD_CODECS)
100
+
101
+
102
+ ##
103
+
104
+
105
+ @cached.function
106
+ def _build_manifest_lazy_loaded_codecs() -> ta.Sequence[LazyLoadedCodec]:
107
+ ldr = manifest_load.MANIFEST_LOADER
108
+ pkgs = {__package__.split('.')[0], *ldr.discover()}
109
+ mns = ldr.load(*pkgs, only=[LazyLoadedCodec])
110
+ return [m.value for m in mns]
111
+
112
+
113
+ def _install_manifest_lazy_loaded_codecs(registry: CodecRegistry) -> None:
114
+ registry.register(*_build_manifest_lazy_loaded_codecs())
115
+
116
+
117
+ ##
118
+
119
+
120
+ REGISTRY = CodecRegistry(
121
+ late_load_callbacks=[
122
+ _install_standard_codecs,
123
+ _install_manifest_lazy_loaded_codecs,
124
+ ],
125
+ )
126
+
127
+ register = REGISTRY.register
128
+ lookup = REGISTRY.lookup
129
+
130
+
131
+ ##
132
+
133
+
134
+ def encode(i: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
135
+ return lookup(name).new(**kwargs).encode(i)
136
+
137
+
138
+ def decode(o: ta.Any, name: str, **kwargs: ta.Any) -> ta.Any:
139
+ return lookup(name).new(**kwargs).decode(o)
@@ -0,0 +1,4 @@
1
+ from .base import Codec
2
+
3
+
4
+ STANDARD_CODECS: list[Codec] = []
omlish/codecs/text.py ADDED
@@ -0,0 +1,217 @@
1
+ import codecs
2
+ import dataclasses as dc
3
+ import functools
4
+ import typing as ta
5
+
6
+ from omlish import check
7
+
8
+ from .base import Codec
9
+ from .base import ComboCodec
10
+ from .standard import STANDARD_CODECS
11
+
12
+
13
+ ##
14
+
15
+
16
+ TextEncodingErrors: ta.TypeAlias = ta.Literal[
17
+ # Raise UnicodeError (or a subclass), this is the default. Implemented in strict_errors().
18
+ 'strict',
19
+
20
+ # Ignore the malformed data and continue without further notice. Implemented in ignore_errors().
21
+ 'ignore',
22
+
23
+ # Replace with a replacement marker. On encoding, use ? (ASCII character). On decoding, use � (U+FFFD, the official
24
+ # REPLACEMENT CHARACTER). Implemented in replace_errors().
25
+ 'replace',
26
+
27
+ # Replace with backslashed escape sequences. On encoding, use hexadecimal form of Unicode code point with formats
28
+ # \xhh \uxxxx \Uxxxxxxxx. On decoding, use hexadecimal form of byte value with format \xhh. Implemented in
29
+ # backslashreplace_errors().
30
+ 'backslashreplace',
31
+
32
+ # On decoding, replace byte with individual surrogate code ranging from U+DC80 to U+DCFF. This code will then be
33
+ # turned back into the same byte when the 'surrogateescape' error handler is used when encoding the data. (See PEP
34
+ # 383 for more.)
35
+ 'surrogateescape',
36
+
37
+ ##
38
+ # The following error handlers are only applicable to encoding (within text encodings):
39
+
40
+ # Replace with XML/HTML numeric character reference, which is a decimal form of Unicode code point with format
41
+ # &#num;. Implemented in xmlcharrefreplace_errors().
42
+ 'xmlcharrefreplace',
43
+
44
+ # Replace with \N{...} escape sequences, what appears in the braces is the Name property from Unicode Character
45
+ # Database. Implemented in namereplace_errors().
46
+ 'namereplace',
47
+
48
+ ##
49
+ # In addition, the following error handler is specific to the given codecs:
50
+ # utf-8, utf-16, utf-32, utf-16-be, utf-16-le, utf-32-be, utf-32-le
51
+
52
+ # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
53
+ # treat the presence of surrogate code point in str as an error.
54
+ 'surrogatepass',
55
+ ]
56
+
57
+
58
+ @dc.dataclass(frozen=True, kw_only=True)
59
+ class TextEncodingOptions:
60
+ errors: TextEncodingErrors = 'strict'
61
+
62
+
63
+ ##
64
+
65
+
66
+ class TextEncodingComboCodec(ComboCodec[str, bytes]):
67
+ def __init__(
68
+ self,
69
+ info: codecs.CodecInfo,
70
+ options: TextEncodingOptions = TextEncodingOptions(),
71
+ ) -> None:
72
+ super().__init__()
73
+ self._info = check.isinstance(info, codecs.CodecInfo)
74
+ self._opts = check.isinstance(options, TextEncodingOptions)
75
+
76
+ @classmethod
77
+ def lookup(
78
+ cls,
79
+ name: str,
80
+ options: TextEncodingOptions = TextEncodingOptions(),
81
+ ) -> 'TextEncodingComboCodec':
82
+ return cls(codecs.lookup(name), options)
83
+
84
+ def encode(self, i: str) -> bytes:
85
+ o, _ = self._info.encode(i, self._opts.errors)
86
+ return o
87
+
88
+ def decode(self, o: bytes) -> str:
89
+ i, _ = self._info.decode(o, self._opts.errors)
90
+ return i
91
+
92
+ def encode_incremental(self) -> ta.Generator[bytes | None, str, None]:
93
+ x = self._info.incrementalencoder(self._opts.errors)
94
+ i = yield None
95
+ while True:
96
+ if not i:
97
+ break
98
+ o = x.encode(i)
99
+ i = yield o or None
100
+ o = x.encode(i, final=True)
101
+ yield o
102
+
103
+ def decode_incremental(self) -> ta.Generator[str | None, bytes, None]:
104
+ x = self._info.incrementaldecoder(self._opts.errors)
105
+ i = yield None
106
+ while True:
107
+ if not i:
108
+ break
109
+ o = x.decode(i)
110
+ i = yield o or None
111
+ o = x.decode(i, final=True)
112
+ yield o
113
+
114
+
115
+ ##
116
+
117
+
118
+ class TextEncodingCodec(Codec):
119
+ pass
120
+
121
+
122
+ def normalize_text_encoding_name(s: str) -> str:
123
+ if ' ' in s:
124
+ raise NameError(s)
125
+ return s.lower().replace('_', '-')
126
+
127
+
128
+ def make_text_encoding_codec(
129
+ name: str,
130
+ aliases: ta.Collection[str] | None = None,
131
+ *,
132
+ append_to: ta.MutableSequence[Codec] | None = None,
133
+ ) -> TextEncodingCodec:
134
+ codec = TextEncodingCodec(
135
+ name=check.equal(name, normalize_text_encoding_name(name)),
136
+ aliases=check.not_isinstance(aliases, str),
137
+
138
+ input=str,
139
+ output=bytes,
140
+
141
+ new=functools.partial(TextEncodingComboCodec.lookup, name),
142
+ new_incremental=functools.partial(TextEncodingComboCodec.lookup, name),
143
+ )
144
+
145
+ if append_to is not None:
146
+ append_to.append(codec)
147
+
148
+ return codec
149
+
150
+
151
+ ##
152
+
153
+
154
+ ASCII = make_text_encoding_codec(
155
+ 'ascii',
156
+ ['646', 'us-ascii'],
157
+ append_to=STANDARD_CODECS,
158
+ )
159
+
160
+ LATIN1 = make_text_encoding_codec(
161
+ 'latin-1',
162
+ ['iso-8859-1', 'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'l1'],
163
+ append_to=STANDARD_CODECS,
164
+ )
165
+
166
+ UTF32 = make_text_encoding_codec(
167
+ 'utf-32',
168
+ ['u32', 'utf32'],
169
+ append_to=STANDARD_CODECS,
170
+ )
171
+
172
+ UTF32BE = make_text_encoding_codec(
173
+ 'utf-32-be',
174
+ ['utf-32be'],
175
+ append_to=STANDARD_CODECS,
176
+ )
177
+
178
+ UTF32LE = make_text_encoding_codec(
179
+ 'utf-32-le',
180
+ ['utf-32le'],
181
+ append_to=STANDARD_CODECS,
182
+ )
183
+
184
+ UTF16 = make_text_encoding_codec(
185
+ 'utf-16',
186
+ ['u16', 'utf16'],
187
+ append_to=STANDARD_CODECS,
188
+ )
189
+
190
+ UTF16BE = make_text_encoding_codec(
191
+ 'utf-16-be',
192
+ ['utf-16be'],
193
+ append_to=STANDARD_CODECS,
194
+ )
195
+
196
+ UTF16LE = make_text_encoding_codec(
197
+ 'utf-16-le',
198
+ ['utf-16le'],
199
+ append_to=STANDARD_CODECS,
200
+ )
201
+
202
+ UTF7 = make_text_encoding_codec(
203
+ 'utf-7',
204
+ ['u7', 'unicode-1-1-utf-7'],
205
+ append_to=STANDARD_CODECS,
206
+ )
207
+
208
+ UTF8 = make_text_encoding_codec(
209
+ 'utf-8',
210
+ ['u8', 'utf', 'utf8', 'cp65001'],
211
+ append_to=STANDARD_CODECS,
212
+ )
213
+
214
+ UTF8SIG = make_text_encoding_codec(
215
+ 'utf-8-sig',
216
+ append_to=STANDARD_CODECS,
217
+ )
omlish/formats/cbor.py ADDED
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_bytes_object_codec
5
+ from .codecs import make_object_lazy_loaded_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import cbor2
10
+ else:
11
+ cbor2 = lang.proxy_import('cbor2')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dump(obj: ta.Any) -> bytes:
18
+ return cbor2.dumps(obj)
19
+
20
+
21
+ def load(s: bytes) -> ta.Any:
22
+ return cbor2.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ CBOR_CODEC = make_bytes_object_codec('cbor', dump, load)
29
+
30
+ # @omlish-manifest
31
+ _CBOR_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CBOR_CODEC', CBOR_CODEC)
@@ -0,0 +1,93 @@
1
+ import typing as ta
2
+
3
+ from .. import codecs
4
+ from .. import reflect as rfl
5
+
6
+
7
+ ObjectCodecT = ta.TypeVar('ObjectCodecT', bound='ObjectCodec')
8
+
9
+
10
+ ##
11
+
12
+
13
+ class ObjectCodec(codecs.Codec):
14
+ pass
15
+
16
+
17
+ def make_object_codec(
18
+ cls: type[ObjectCodecT],
19
+ name: str,
20
+ dumps: ta.Callable,
21
+ loads: ta.Callable,
22
+ *,
23
+ input: rfl.Type = rfl.type_(ta.Any), # noqa
24
+ aliases: ta.Collection[str] | None = None,
25
+ ) -> ObjectCodecT:
26
+ return cls(
27
+ name=name,
28
+ aliases=aliases,
29
+
30
+ input=input,
31
+ output=bytes,
32
+
33
+ new=lambda: codecs.FnPairEagerCodec.of(dumps, loads),
34
+ )
35
+
36
+
37
+ ##
38
+
39
+
40
+ class BytesObjectCodec(ObjectCodec):
41
+ pass
42
+
43
+
44
+ def make_bytes_object_codec(
45
+ name: str,
46
+ dumps: ta.Callable[[ta.Any], bytes],
47
+ loads: ta.Callable[[bytes], ta.Any],
48
+ **kwargs: ta.Any,
49
+ ) -> BytesObjectCodec:
50
+ return make_object_codec(
51
+ BytesObjectCodec,
52
+ name,
53
+ dumps,
54
+ loads,
55
+ **kwargs,
56
+ )
57
+
58
+
59
+ ##
60
+
61
+
62
+ class StrObjectCodec(ObjectCodec):
63
+ pass
64
+
65
+
66
+ def make_str_object_codec(
67
+ name: str,
68
+ dumps: ta.Callable[[ta.Any], str],
69
+ loads: ta.Callable[[str], ta.Any],
70
+ **kwargs: ta.Any,
71
+ ) -> StrObjectCodec:
72
+ return make_object_codec(
73
+ StrObjectCodec,
74
+ name,
75
+ dumps,
76
+ loads,
77
+ **kwargs,
78
+ )
79
+
80
+
81
+ ##
82
+
83
+
84
+ def make_object_lazy_loaded_codec(
85
+ mod_name: str,
86
+ attr_name: str,
87
+ codec: ObjectCodec,
88
+ ) -> codecs.LazyLoadedCodec:
89
+ return codecs.LazyLoadedCodec.new(
90
+ mod_name,
91
+ attr_name,
92
+ codec,
93
+ )
@@ -0,0 +1,33 @@
1
+ from ..codecs import make_object_lazy_loaded_codec
2
+ from ..codecs import make_str_object_codec
3
+ from .json import dumps
4
+ from .json import dumps_compact
5
+ from .json import dumps_pretty
6
+ from .json import loads
7
+
8
+
9
+ ##
10
+
11
+
12
+ JSON_CODEC = make_str_object_codec('json', dumps, loads)
13
+
14
+ # @omlish-manifest
15
+ _JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
16
+
17
+
18
+ #
19
+
20
+
21
+ JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
22
+
23
+ # @omlish-manifest
24
+ _JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
25
+
26
+
27
+ #
28
+
29
+
30
+ JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
31
+
32
+ # @omlish-manifest
33
+ _JSON_PRETTY_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_PRETTY_CODEC', JSON_PRETTY_CODEC)
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_object_lazy_loaded_codec
5
+ from .codecs import make_str_object_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import json5
10
+ else:
11
+ json5 = lang.proxy_import('json5')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dumps(obj: ta.Any) -> str:
18
+ return json5.dumps(obj)
19
+
20
+
21
+ def loads(s: str) -> ta.Any:
22
+ return json5.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ JSON5_CODEC = make_str_object_codec('json5', dumps, loads)
29
+
30
+ # @omlish-manifest
31
+ _JSON5_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON5_CODEC', JSON5_CODEC)
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_bytes_object_codec
5
+ from .codecs import make_object_lazy_loaded_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import pickle
10
+ else:
11
+ pickle = lang.proxy_import('pickle')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dump(obj: ta.Any) -> bytes:
18
+ return pickle.dumps(obj)
19
+
20
+
21
+ def load(s: bytes) -> ta.Any:
22
+ return pickle.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ PICKLE_CODEC = make_bytes_object_codec('pickle', dump, load)
29
+
30
+ # @omlish-manifest
31
+ _PICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'PICKLE_CODEC', PICKLE_CODEC)
omlish/formats/toml.py ADDED
@@ -0,0 +1,17 @@
1
+ import tomllib
2
+
3
+ from .codecs import make_object_lazy_loaded_codec
4
+ from .codecs import make_str_object_codec
5
+
6
+
7
+ ##
8
+
9
+
10
+ def _dumps(obj):
11
+ raise TypeError('Unsupported')
12
+
13
+
14
+ TOML_CODEC = make_str_object_codec('toml', _dumps, tomllib.loads)
15
+
16
+ # @omlish-manifest
17
+ _TOML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'TOML_CODEC', TOML_CODEC)
omlish/formats/yaml.py CHANGED
@@ -14,6 +14,8 @@ import typing as ta
14
14
  from .. import check
15
15
  from .. import dataclasses as dc
16
16
  from .. import lang
17
+ from .codecs import make_object_lazy_loaded_codec
18
+ from .codecs import make_str_object_codec
17
19
 
18
20
 
19
21
  if ta.TYPE_CHECKING:
@@ -239,3 +241,19 @@ def full_load(stream): # noqa
239
241
 
240
242
  def full_load_all(stream): # noqa # noqa
241
243
  return load_all(stream, yaml.FullLoader)
244
+
245
+
246
+ ##
247
+
248
+
249
+ def dump(obj, **kwargs):
250
+ return yaml.dump(obj, **kwargs)
251
+
252
+
253
+ ##
254
+
255
+
256
+ YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
257
+
258
+ # @omlish-manifest
259
+ _YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)
@@ -3,6 +3,8 @@ import typing as ta
3
3
 
4
4
  from ... import lang
5
5
  from .base import Compression
6
+ from .codecs import make_compression_codec
7
+ from .codecs import make_compression_lazy_loaded_codec
6
8
 
7
9
 
8
10
  if ta.TYPE_CHECKING:
@@ -11,8 +13,11 @@ else:
11
13
  brotli = lang.proxy_import('brotli')
12
14
 
13
15
 
16
+ ##
17
+
18
+
14
19
  @dc.dataclass(frozen=True, kw_only=True)
15
- class SnappyCompression(Compression):
20
+ class BrotliCompression(Compression):
16
21
  mode: int | None = None
17
22
  quality: int | None = None
18
23
  lgwin: int | None = None
@@ -31,3 +36,12 @@ class SnappyCompression(Compression):
31
36
  return brotli.decompress(
32
37
  d,
33
38
  )
39
+
40
+
41
+ ##
42
+
43
+
44
+ BROTLI_CODEC = make_compression_codec('brotli', BrotliCompression)
45
+
46
+ # @omlish-manifest
47
+ _BROTLI_LAZY_CODEC = make_compression_lazy_loaded_codec(__name__, 'BROTLI_CODEC', BROTLI_CODEC)
omlish/io/compress/bz2.py CHANGED
@@ -9,6 +9,8 @@ from .adapters import CompressorObjectIncrementalAdapter
9
9
  from .adapters import DecompressorObjectIncrementalAdapter
10
10
  from .base import Compression
11
11
  from .base import IncrementalCompression
12
+ from .codecs import make_compression_codec
13
+ from .codecs import make_compression_lazy_loaded_codec
12
14
 
13
15
 
14
16
  if ta.TYPE_CHECKING:
@@ -17,6 +19,9 @@ else:
17
19
  bz2 = lang.proxy_import('bz2')
18
20
 
19
21
 
22
+ ##
23
+
24
+
20
25
  @dc.dataclass(frozen=True, kw_only=True)
21
26
  class Bz2Compression(Compression, IncrementalCompression):
22
27
  level: int = 9
@@ -45,3 +50,12 @@ class Bz2Compression(Compression, IncrementalCompression):
45
50
  bz2.BZ2Decompressor, # type: ignore
46
51
  trailing_error=OSError,
47
52
  )()
53
+
54
+
55
+ ##
56
+
57
+
58
+ BZ2_CODEC = make_compression_codec('bz2', Bz2Compression)
59
+
60
+ # @omlish-manifest
61
+ _BZ2_LAZY_CODEC = make_compression_lazy_loaded_codec(__name__, 'BZ2_CODEC', BZ2_CODEC)