omlish 0.0.0.dev164__py3-none-any.whl → 0.0.0.dev166__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- omlish/.manifests.json +30 -2
- omlish/__about__.py +2 -2
- omlish/codecs/__init__.py +3 -0
- omlish/codecs/base.py +4 -0
- omlish/codecs/funcs.py +11 -0
- omlish/codecs/text.py +2 -2
- omlish/formats/cloudpickle.py +31 -0
- omlish/formats/json/codecs.py +0 -4
- omlish/formats/json/delimted.py +4 -0
- omlish/formats/yaml.py +7 -0
- omlish/funcs/pairs.py +0 -281
- omlish/io/compress/codecs.py +20 -0
- omlish/io/generators/__init__.py +3 -0
- omlish/io/generators/stepped.py +19 -3
- omlish/iterators/__init__.py +24 -0
- omlish/iterators/iterators.py +132 -0
- omlish/iterators/recipes.py +18 -0
- omlish/iterators/tools.py +96 -0
- omlish/iterators/unique.py +67 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/METADATA +1 -1
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/RECORD +25 -19
- omlish/iterators.py +0 -300
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/top_level.txt +0 -0
omlish/.manifests.json
CHANGED
@@ -37,6 +37,20 @@
|
|
37
37
|
}
|
38
38
|
}
|
39
39
|
},
|
40
|
+
{
|
41
|
+
"module": ".formats.cloudpickle",
|
42
|
+
"attr": "_CLOUDPICKLE_LAZY_CODEC",
|
43
|
+
"file": "omlish/formats/cloudpickle.py",
|
44
|
+
"line": 30,
|
45
|
+
"value": {
|
46
|
+
"$.codecs.base.LazyLoadedCodec": {
|
47
|
+
"mod_name": "omlish.formats.cloudpickle",
|
48
|
+
"attr_name": "CLOUDPICKLE_CODEC",
|
49
|
+
"name": "cloudpickle",
|
50
|
+
"aliases": null
|
51
|
+
}
|
52
|
+
}
|
53
|
+
},
|
40
54
|
{
|
41
55
|
"module": ".formats.json.codecs",
|
42
56
|
"attr": "_JSON_LAZY_CODEC",
|
@@ -55,7 +69,7 @@
|
|
55
69
|
"module": ".formats.json.codecs",
|
56
70
|
"attr": "_JSON_COMPACT_LAZY_CODEC",
|
57
71
|
"file": "omlish/formats/json/codecs.py",
|
58
|
-
"line":
|
72
|
+
"line": 21,
|
59
73
|
"value": {
|
60
74
|
"$.codecs.base.LazyLoadedCodec": {
|
61
75
|
"mod_name": "omlish.formats.json.codecs",
|
@@ -69,7 +83,7 @@
|
|
69
83
|
"module": ".formats.json.codecs",
|
70
84
|
"attr": "_JSON_PRETTY_LAZY_CODEC",
|
71
85
|
"file": "omlish/formats/json/codecs.py",
|
72
|
-
"line":
|
86
|
+
"line": 28,
|
73
87
|
"value": {
|
74
88
|
"$.codecs.base.LazyLoadedCodec": {
|
75
89
|
"mod_name": "omlish.formats.json.codecs",
|
@@ -137,6 +151,20 @@
|
|
137
151
|
}
|
138
152
|
}
|
139
153
|
},
|
154
|
+
{
|
155
|
+
"module": ".formats.yaml",
|
156
|
+
"attr": "_YAML_UNSAFE_LAZY_CODEC",
|
157
|
+
"file": "omlish/formats/yaml.py",
|
158
|
+
"line": 265,
|
159
|
+
"value": {
|
160
|
+
"$.codecs.base.LazyLoadedCodec": {
|
161
|
+
"mod_name": "omlish.formats.yaml",
|
162
|
+
"attr_name": "YAML_UNSAFE_CODEC",
|
163
|
+
"name": "yaml-unsafe",
|
164
|
+
"aliases": null
|
165
|
+
}
|
166
|
+
}
|
167
|
+
},
|
140
168
|
{
|
141
169
|
"module": ".io.compress.brotli",
|
142
170
|
"attr": "_BROTLI_LAZY_CODEC",
|
omlish/__about__.py
CHANGED
omlish/codecs/__init__.py
CHANGED
omlish/codecs/base.py
CHANGED
@@ -9,6 +9,7 @@ from omlish import check
|
|
9
9
|
from omlish import dataclasses as dc
|
10
10
|
from omlish import lang
|
11
11
|
from omlish import reflect as rfl
|
12
|
+
from omlish.funcs import pairs as fps
|
12
13
|
|
13
14
|
|
14
15
|
I = ta.TypeVar('I')
|
@@ -27,6 +28,9 @@ class EagerCodec(lang.Abstract, ta.Generic[I, O]):
|
|
27
28
|
def decode(self, o: O) -> I:
|
28
29
|
raise NotImplementedError
|
29
30
|
|
31
|
+
def as_pair(self) -> fps.FnPair[I, O]:
|
32
|
+
return fps.of(self.encode, self.decode)
|
33
|
+
|
30
34
|
|
31
35
|
class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
|
32
36
|
@abc.abstractmethod
|
omlish/codecs/funcs.py
CHANGED
@@ -26,3 +26,14 @@ class FnPairEagerCodec(EagerCodec[I, O]):
|
|
26
26
|
decode: ta.Callable[[O], I],
|
27
27
|
) -> 'FnPairEagerCodec[I, O]':
|
28
28
|
return cls(fps.of(encode, decode))
|
29
|
+
|
30
|
+
|
31
|
+
def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
|
32
|
+
return FnPairEagerCodec(fp)
|
33
|
+
|
34
|
+
|
35
|
+
def of(
|
36
|
+
encode: ta.Callable[[I], O],
|
37
|
+
decode: ta.Callable[[O], I],
|
38
|
+
) -> FnPairEagerCodec[I, O]:
|
39
|
+
return FnPairEagerCodec(fps.of(encode, decode))
|
omlish/codecs/text.py
CHANGED
@@ -46,8 +46,8 @@ TextEncodingErrors: ta.TypeAlias = ta.Literal[
|
|
46
46
|
'namereplace',
|
47
47
|
|
48
48
|
##
|
49
|
-
# In addition, the following error handler is specific to the given codecs:
|
50
|
-
# utf-
|
49
|
+
# In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
|
50
|
+
# utf-16-le, utf-32-be, utf-32-le
|
51
51
|
|
52
52
|
# Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
|
53
53
|
# treat the presence of surrogate code point in str as an error.
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import typing as ta
|
2
|
+
|
3
|
+
from .. import lang
|
4
|
+
from .codecs import make_bytes_object_codec
|
5
|
+
from .codecs import make_object_lazy_loaded_codec
|
6
|
+
|
7
|
+
|
8
|
+
if ta.TYPE_CHECKING:
|
9
|
+
import cloudpickle
|
10
|
+
else:
|
11
|
+
cloudpickle = lang.proxy_import('cloudpickle')
|
12
|
+
|
13
|
+
|
14
|
+
##
|
15
|
+
|
16
|
+
|
17
|
+
def dump(obj: ta.Any) -> bytes:
|
18
|
+
return cloudpickle.dumps(obj)
|
19
|
+
|
20
|
+
|
21
|
+
def load(s: bytes) -> ta.Any:
|
22
|
+
return cloudpickle.loads(s)
|
23
|
+
|
24
|
+
|
25
|
+
##
|
26
|
+
|
27
|
+
|
28
|
+
CLOUDPICKLE_CODEC = make_bytes_object_codec('cloudpickle', dump, load)
|
29
|
+
|
30
|
+
# @omlish-manifest
|
31
|
+
_CLOUDPICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CLOUDPICKLE_CODEC', CLOUDPICKLE_CODEC)
|
omlish/formats/json/codecs.py
CHANGED
@@ -14,19 +14,15 @@ JSON_CODEC = make_str_object_codec('json', dumps, loads)
|
|
14
14
|
# @omlish-manifest
|
15
15
|
_JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
|
16
16
|
|
17
|
-
|
18
17
|
#
|
19
18
|
|
20
|
-
|
21
19
|
JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
|
22
20
|
|
23
21
|
# @omlish-manifest
|
24
22
|
_JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
|
25
23
|
|
26
|
-
|
27
24
|
#
|
28
25
|
|
29
|
-
|
30
26
|
JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
|
31
27
|
|
32
28
|
# @omlish-manifest
|
omlish/formats/yaml.py
CHANGED
@@ -257,3 +257,10 @@ YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
|
|
257
257
|
|
258
258
|
# @omlish-manifest
|
259
259
|
_YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)
|
260
|
+
|
261
|
+
#
|
262
|
+
|
263
|
+
YAML_UNSAFE_CODEC = make_str_object_codec('yaml-unsafe', dump, full_load)
|
264
|
+
|
265
|
+
# @omlish-manifest
|
266
|
+
_YAML_UNSAFE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_UNSAFE_CODEC', YAML_UNSAFE_CODEC)
|
omlish/funcs/pairs.py
CHANGED
@@ -1,22 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
TODO:
|
3
|
-
- objects
|
4
|
-
- csv
|
5
|
-
- csvloader
|
6
|
-
- wrapped (wait for usecase)
|
7
|
-
- streams / incremental
|
8
|
-
- fileobj -> fileobj?
|
9
|
-
- swap zstandard for zstd
|
10
|
-
|
11
|
-
Compression choice:
|
12
|
-
- lzma if-available minimal-space
|
13
|
-
- lz4 if-available read-heavy
|
14
|
-
- zstd if-available
|
15
|
-
- bz2 read-heavy (but no parallel decompress)
|
16
|
-
- gz
|
17
|
-
"""
|
18
1
|
import abc
|
19
|
-
import codecs
|
20
2
|
import dataclasses as dc
|
21
3
|
import typing as ta
|
22
4
|
|
@@ -24,40 +6,9 @@ from .. import lang
|
|
24
6
|
|
25
7
|
|
26
8
|
if ta.TYPE_CHECKING:
|
27
|
-
import bz2 as _bz2
|
28
|
-
import gzip as _gzip
|
29
|
-
import lzma as _lzma
|
30
|
-
import pickle as _pickle
|
31
9
|
import struct as _struct
|
32
|
-
import tomllib as _tomllib
|
33
|
-
|
34
|
-
import cbor2 as _cbor2
|
35
|
-
import cloudpickle as _cloudpickle
|
36
|
-
import json5 as _json5
|
37
|
-
import lz4.frame as _lz4_frame
|
38
|
-
import snappy as _snappy
|
39
|
-
import yaml as _yaml
|
40
|
-
import zstandard as _zstandard
|
41
|
-
|
42
|
-
from ..formats import json as _json
|
43
|
-
|
44
10
|
else:
|
45
|
-
_bz2 = lang.proxy_import('bz2')
|
46
|
-
_gzip = lang.proxy_import('gzip')
|
47
|
-
_lzma = lang.proxy_import('lzma')
|
48
|
-
_pickle = lang.proxy_import('pickle')
|
49
11
|
_struct = lang.proxy_import('struct')
|
50
|
-
_tomllib = lang.proxy_import('tomllib')
|
51
|
-
|
52
|
-
_cbor2 = lang.proxy_import('cbor2')
|
53
|
-
_cloudpickle = lang.proxy_import('cloudpickle')
|
54
|
-
_json5 = lang.proxy_import('json5')
|
55
|
-
_lz4_frame = lang.proxy_import('lz4.frame')
|
56
|
-
_snappy = lang.proxy_import('snappy')
|
57
|
-
_yaml = lang.proxy_import('yaml')
|
58
|
-
_zstandard = lang.proxy_import('zstandard')
|
59
|
-
|
60
|
-
_json = lang.proxy_import('..formats.json', __package__)
|
61
12
|
|
62
13
|
|
63
14
|
##
|
@@ -211,36 +162,6 @@ def compose(*ps):
|
|
211
162
|
##
|
212
163
|
|
213
164
|
|
214
|
-
@dc.dataclass(frozen=True)
|
215
|
-
class Text(FnPair[str, bytes]):
|
216
|
-
ci: codecs.CodecInfo
|
217
|
-
encode_errors: str = dc.field(default='strict', kw_only=True)
|
218
|
-
decode_errors: str = dc.field(default='strict', kw_only=True)
|
219
|
-
|
220
|
-
def forward(self, f: str) -> bytes:
|
221
|
-
# Python ignores the returned length:
|
222
|
-
# https://github.com/python/cpython/blob/7431c3799efbd06ed03ee70b64420f45e83b3667/Python/codecs.c#L424
|
223
|
-
t, _ = self.ci.encode(f, self.encode_errors)
|
224
|
-
return t
|
225
|
-
|
226
|
-
def backward(self, t: bytes) -> str:
|
227
|
-
f, _ = self.ci.decode(t, self.decode_errors)
|
228
|
-
return f
|
229
|
-
|
230
|
-
|
231
|
-
def text(name: str, *, encode_errors: str = 'strict', decode_errors: str = 'strict') -> Text:
|
232
|
-
ci = codecs.lookup(name)
|
233
|
-
if not ci._is_text_encoding: # noqa
|
234
|
-
raise TypeError(f'must be text codec: {name}')
|
235
|
-
return Text(ci, encode_errors=encode_errors, decode_errors=decode_errors)
|
236
|
-
|
237
|
-
|
238
|
-
UTF8 = text('utf-8')
|
239
|
-
|
240
|
-
|
241
|
-
#
|
242
|
-
|
243
|
-
|
244
165
|
@dc.dataclass(frozen=True)
|
245
166
|
class Optional(FnPair[F | None, T | None]):
|
246
167
|
fp: FnPair[F, T]
|
@@ -263,99 +184,6 @@ class Lines(FnPair[ta.Sequence[str], str]):
|
|
263
184
|
##
|
264
185
|
|
265
186
|
|
266
|
-
_EXTENSION_REGISTRY: dict[str, type[FnPair]] = {}
|
267
|
-
|
268
|
-
|
269
|
-
def _register_extension(*ss):
|
270
|
-
def inner(cls):
|
271
|
-
for s in ss:
|
272
|
-
if s in _EXTENSION_REGISTRY:
|
273
|
-
raise KeyError(s)
|
274
|
-
_EXTENSION_REGISTRY[s] = cls
|
275
|
-
return cls
|
276
|
-
return inner
|
277
|
-
|
278
|
-
|
279
|
-
def get_for_extension(ext: str) -> FnPair:
|
280
|
-
return compose(*[_EXTENSION_REGISTRY[p]() for p in ext.split('.')])
|
281
|
-
|
282
|
-
|
283
|
-
##
|
284
|
-
|
285
|
-
|
286
|
-
class Compression(FnPair[bytes, bytes], abc.ABC):
|
287
|
-
pass
|
288
|
-
|
289
|
-
|
290
|
-
@_register_extension('bz2')
|
291
|
-
@dc.dataclass(frozen=True)
|
292
|
-
class Bz2(Compression):
|
293
|
-
compresslevel: int = 9
|
294
|
-
|
295
|
-
def forward(self, f: bytes) -> bytes:
|
296
|
-
return _bz2.compress(f, compresslevel=self.compresslevel)
|
297
|
-
|
298
|
-
def backward(self, t: bytes) -> bytes:
|
299
|
-
return _bz2.decompress(t)
|
300
|
-
|
301
|
-
|
302
|
-
@_register_extension('gz')
|
303
|
-
@dc.dataclass(frozen=True)
|
304
|
-
class Gzip(Compression):
|
305
|
-
compresslevel: int = 9
|
306
|
-
|
307
|
-
def forward(self, f: bytes) -> bytes:
|
308
|
-
return _gzip.compress(f, compresslevel=self.compresslevel)
|
309
|
-
|
310
|
-
def backward(self, t: bytes) -> bytes:
|
311
|
-
return _gzip.decompress(t)
|
312
|
-
|
313
|
-
|
314
|
-
@_register_extension('lzma')
|
315
|
-
class Lzma(Compression):
|
316
|
-
def forward(self, f: bytes) -> bytes:
|
317
|
-
return _lzma.compress(f)
|
318
|
-
|
319
|
-
def backward(self, t: bytes) -> bytes:
|
320
|
-
return _lzma.decompress(t)
|
321
|
-
|
322
|
-
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
@_register_extension('lz4')
|
327
|
-
@dc.dataclass(frozen=True)
|
328
|
-
class Lz4(Compression):
|
329
|
-
compression_level: int = 0
|
330
|
-
|
331
|
-
def forward(self, f: bytes) -> bytes:
|
332
|
-
return _lz4_frame.compress(f, compression_level=self.compression_level)
|
333
|
-
|
334
|
-
def backward(self, t: bytes) -> bytes:
|
335
|
-
return _lz4_frame.decompress(t)
|
336
|
-
|
337
|
-
|
338
|
-
@_register_extension('snappy')
|
339
|
-
class Snappy(Compression):
|
340
|
-
def forward(self, f: bytes) -> bytes:
|
341
|
-
return _snappy.compress(f)
|
342
|
-
|
343
|
-
def backward(self, t: bytes) -> bytes:
|
344
|
-
return _snappy.decompress(t)
|
345
|
-
|
346
|
-
|
347
|
-
@_register_extension('zstd')
|
348
|
-
class Zstd(Compression):
|
349
|
-
def forward(self, f: bytes) -> bytes:
|
350
|
-
return _zstandard.compress(f)
|
351
|
-
|
352
|
-
def backward(self, t: bytes) -> bytes:
|
353
|
-
return _zstandard.decompress(t)
|
354
|
-
|
355
|
-
|
356
|
-
##
|
357
|
-
|
358
|
-
|
359
187
|
@dc.dataclass(frozen=True)
|
360
188
|
class Struct(FnPair[tuple, bytes]):
|
361
189
|
fmt: str
|
@@ -385,112 +213,3 @@ class ObjectStr_(Object_[str], lang.Abstract): # noqa
|
|
385
213
|
|
386
214
|
class ObjectBytes_(Object_[bytes], lang.Abstract): # noqa
|
387
215
|
pass
|
388
|
-
|
389
|
-
|
390
|
-
#
|
391
|
-
|
392
|
-
|
393
|
-
@_register_extension('pkl')
|
394
|
-
@dc.dataclass(frozen=True)
|
395
|
-
class Pickle(ObjectBytes_):
|
396
|
-
protocol: int | None = None
|
397
|
-
|
398
|
-
def forward(self, f: ta.Any) -> bytes:
|
399
|
-
return _pickle.dumps(f, protocol=self.protocol)
|
400
|
-
|
401
|
-
def backward(self, t: bytes) -> ta.Any:
|
402
|
-
return _pickle.loads(t)
|
403
|
-
|
404
|
-
|
405
|
-
class _Json(ObjectStr_, lang.Abstract): # noqa
|
406
|
-
def backward(self, t: str) -> ta.Any:
|
407
|
-
return _json.loads(t)
|
408
|
-
|
409
|
-
|
410
|
-
@_register_extension('json')
|
411
|
-
class Json(_Json):
|
412
|
-
def forward(self, f: ta.Any) -> str:
|
413
|
-
return _json.dumps(f)
|
414
|
-
|
415
|
-
|
416
|
-
class JsonPretty(_Json):
|
417
|
-
def forward(self, f: ta.Any) -> str:
|
418
|
-
return _json.dumps_pretty(f)
|
419
|
-
|
420
|
-
|
421
|
-
class JsonCompact(_Json):
|
422
|
-
def forward(self, f: ta.Any) -> str:
|
423
|
-
return _json.dumps_compact(f)
|
424
|
-
|
425
|
-
|
426
|
-
JSON = Json()
|
427
|
-
PRETTY_JSON = JsonPretty()
|
428
|
-
COMPACT_JSON = JsonCompact()
|
429
|
-
|
430
|
-
|
431
|
-
@_register_extension('jsonl')
|
432
|
-
class JsonLines(FnPair[ta.Sequence[ta.Any], str]):
|
433
|
-
def forward(self, f: ta.Sequence[ta.Any]) -> str:
|
434
|
-
return '\n'.join(_json.dumps(e) for e in f)
|
435
|
-
|
436
|
-
def backward(self, t: str) -> ta.Sequence[ta.Any]:
|
437
|
-
return [_json.loads(l) for l in t.splitlines()]
|
438
|
-
|
439
|
-
|
440
|
-
@_register_extension('toml')
|
441
|
-
class Toml(ObjectStr_):
|
442
|
-
def forward(self, f: ta.Any) -> str:
|
443
|
-
raise NotImplementedError
|
444
|
-
|
445
|
-
def backward(self, t: str) -> ta.Any:
|
446
|
-
return _tomllib.loads(t)
|
447
|
-
|
448
|
-
|
449
|
-
#
|
450
|
-
|
451
|
-
|
452
|
-
@_register_extension('cbor')
|
453
|
-
class Cbor(ObjectBytes_):
|
454
|
-
def forward(self, f: ta.Any) -> bytes:
|
455
|
-
return _cbor2.dumps(f)
|
456
|
-
|
457
|
-
def backward(self, t: bytes) -> ta.Any:
|
458
|
-
return _cbor2.loads(t)
|
459
|
-
|
460
|
-
|
461
|
-
@_register_extension('clpkl')
|
462
|
-
@dc.dataclass(frozen=True)
|
463
|
-
class Cloudpickle(ObjectBytes_):
|
464
|
-
protocol: int | None = None
|
465
|
-
|
466
|
-
def forward(self, f: ta.Any) -> bytes:
|
467
|
-
return _cloudpickle.dumps(f, protocol=self.protocol)
|
468
|
-
|
469
|
-
def backward(self, t: bytes) -> ta.Any:
|
470
|
-
return _cloudpickle.loads(t)
|
471
|
-
|
472
|
-
|
473
|
-
@_register_extension('json5')
|
474
|
-
class Json5(ObjectStr_):
|
475
|
-
def forward(self, f: ta.Any) -> str:
|
476
|
-
return _json5.dumps(f)
|
477
|
-
|
478
|
-
def backward(self, t: str) -> ta.Any:
|
479
|
-
return _json5.loads(t)
|
480
|
-
|
481
|
-
|
482
|
-
@_register_extension('yml', 'yaml')
|
483
|
-
class Yaml(ObjectStr_):
|
484
|
-
def forward(self, f: ta.Any) -> str:
|
485
|
-
return _yaml.dump(f)
|
486
|
-
|
487
|
-
def backward(self, t: str) -> ta.Any:
|
488
|
-
return _yaml.safe_load(t)
|
489
|
-
|
490
|
-
|
491
|
-
class YamlUnsafe(ObjectStr_):
|
492
|
-
def forward(self, f: ta.Any) -> str:
|
493
|
-
return _yaml.dump(f)
|
494
|
-
|
495
|
-
def backward(self, t: str) -> ta.Any:
|
496
|
-
return _yaml.load(t, _yaml.FullLoader)
|
omlish/io/compress/codecs.py
CHANGED
@@ -2,7 +2,9 @@ import dataclasses as dc
|
|
2
2
|
import typing as ta
|
3
3
|
|
4
4
|
from ... import codecs
|
5
|
+
from ..generators import buffer_bytes_stepped_reader_generator
|
5
6
|
from .base import Compression
|
7
|
+
from .base import IncrementalCompression
|
6
8
|
|
7
9
|
|
8
10
|
##
|
@@ -22,6 +24,20 @@ class CompressionEagerCodec(codecs.EagerCodec[bytes, bytes]):
|
|
22
24
|
##
|
23
25
|
|
24
26
|
|
27
|
+
@dc.dataclass(frozen=True)
|
28
|
+
class CompressionIncrementalCodec(codecs.IncrementalCodec[bytes, bytes]):
|
29
|
+
compression: IncrementalCompression
|
30
|
+
|
31
|
+
def encode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
|
32
|
+
return self.compression.compress_incremental()
|
33
|
+
|
34
|
+
def decode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
|
35
|
+
return buffer_bytes_stepped_reader_generator(self.compression.decompress_incremental())
|
36
|
+
|
37
|
+
|
38
|
+
##
|
39
|
+
|
40
|
+
|
25
41
|
class CompressionCodec(codecs.Codec):
|
26
42
|
pass
|
27
43
|
|
@@ -40,6 +56,10 @@ def make_compression_codec(
|
|
40
56
|
output=bytes,
|
41
57
|
|
42
58
|
new=lambda *args, **kwargs: CompressionEagerCodec(cls(*args, **kwargs)),
|
59
|
+
|
60
|
+
new_incremental=(
|
61
|
+
lambda *args, **kwargs: CompressionIncrementalCodec(cls(*args, **kwargs)) # noqa
|
62
|
+
) if issubclass(cls, IncrementalCompression) else None,
|
43
63
|
)
|
44
64
|
|
45
65
|
|
omlish/io/generators/__init__.py
CHANGED
omlish/io/generators/stepped.py
CHANGED
@@ -151,18 +151,32 @@ def read_into_str_stepped_generator(
|
|
151
151
|
def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> BytesSteppedGenerator:
|
152
152
|
o = g.send(None)
|
153
153
|
buf: ta.Any = None
|
154
|
+
eof = False
|
154
155
|
|
155
156
|
while True:
|
157
|
+
if eof:
|
158
|
+
raise EOFError
|
159
|
+
|
156
160
|
if not buf:
|
157
161
|
buf = check.isinstance((yield None), bytes)
|
162
|
+
if not buf:
|
163
|
+
eof = True
|
158
164
|
|
159
|
-
if o is None
|
165
|
+
if o is None:
|
160
166
|
i = buf
|
167
|
+
buf = None
|
168
|
+
|
161
169
|
elif isinstance(o, int):
|
162
|
-
|
163
|
-
|
170
|
+
while len(buf) < o:
|
171
|
+
more = check.isinstance((yield None), bytes)
|
172
|
+
if not more:
|
173
|
+
raise EOFError
|
174
|
+
# FIXME: lol - share guts with readers
|
175
|
+
buf += more
|
176
|
+
|
164
177
|
i = buf[:o]
|
165
178
|
buf = buf[o:]
|
179
|
+
|
166
180
|
else:
|
167
181
|
raise TypeError(o)
|
168
182
|
|
@@ -171,5 +185,7 @@ def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> Byt
|
|
171
185
|
i = None
|
172
186
|
if isinstance(o, bytes):
|
173
187
|
check.none((yield o))
|
188
|
+
if not o:
|
189
|
+
return
|
174
190
|
else:
|
175
191
|
break
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from .iterators import ( # noqa
|
2
|
+
PeekIterator,
|
3
|
+
PrefetchIterator,
|
4
|
+
ProxyIterator,
|
5
|
+
RetainIterator,
|
6
|
+
)
|
7
|
+
|
8
|
+
from .recipes import ( # noqa
|
9
|
+
sliding_window,
|
10
|
+
)
|
11
|
+
|
12
|
+
from .tools import ( # noqa
|
13
|
+
chunk,
|
14
|
+
expand_indexed_pairs,
|
15
|
+
merge_on,
|
16
|
+
take,
|
17
|
+
unzip,
|
18
|
+
)
|
19
|
+
|
20
|
+
from .unique import ( # noqa
|
21
|
+
UniqueItem,
|
22
|
+
UniqueIterator,
|
23
|
+
UniqueStats,
|
24
|
+
)
|