omlish 0.0.0.dev164__py3-none-any.whl → 0.0.0.dev166__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omlish/.manifests.json +30 -2
- omlish/__about__.py +2 -2
- omlish/codecs/__init__.py +3 -0
- omlish/codecs/base.py +4 -0
- omlish/codecs/funcs.py +11 -0
- omlish/codecs/text.py +2 -2
- omlish/formats/cloudpickle.py +31 -0
- omlish/formats/json/codecs.py +0 -4
- omlish/formats/json/delimted.py +4 -0
- omlish/formats/yaml.py +7 -0
- omlish/funcs/pairs.py +0 -281
- omlish/io/compress/codecs.py +20 -0
- omlish/io/generators/__init__.py +3 -0
- omlish/io/generators/stepped.py +19 -3
- omlish/iterators/__init__.py +24 -0
- omlish/iterators/iterators.py +132 -0
- omlish/iterators/recipes.py +18 -0
- omlish/iterators/tools.py +96 -0
- omlish/iterators/unique.py +67 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/METADATA +1 -1
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/RECORD +25 -19
- omlish/iterators.py +0 -300
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/LICENSE +0 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev164.dist-info → omlish-0.0.0.dev166.dist-info}/top_level.txt +0 -0
omlish/.manifests.json
CHANGED
@@ -37,6 +37,20 @@
|
|
37
37
|
}
|
38
38
|
}
|
39
39
|
},
|
40
|
+
{
|
41
|
+
"module": ".formats.cloudpickle",
|
42
|
+
"attr": "_CLOUDPICKLE_LAZY_CODEC",
|
43
|
+
"file": "omlish/formats/cloudpickle.py",
|
44
|
+
"line": 30,
|
45
|
+
"value": {
|
46
|
+
"$.codecs.base.LazyLoadedCodec": {
|
47
|
+
"mod_name": "omlish.formats.cloudpickle",
|
48
|
+
"attr_name": "CLOUDPICKLE_CODEC",
|
49
|
+
"name": "cloudpickle",
|
50
|
+
"aliases": null
|
51
|
+
}
|
52
|
+
}
|
53
|
+
},
|
40
54
|
{
|
41
55
|
"module": ".formats.json.codecs",
|
42
56
|
"attr": "_JSON_LAZY_CODEC",
|
@@ -55,7 +69,7 @@
|
|
55
69
|
"module": ".formats.json.codecs",
|
56
70
|
"attr": "_JSON_COMPACT_LAZY_CODEC",
|
57
71
|
"file": "omlish/formats/json/codecs.py",
|
58
|
-
"line":
|
72
|
+
"line": 21,
|
59
73
|
"value": {
|
60
74
|
"$.codecs.base.LazyLoadedCodec": {
|
61
75
|
"mod_name": "omlish.formats.json.codecs",
|
@@ -69,7 +83,7 @@
|
|
69
83
|
"module": ".formats.json.codecs",
|
70
84
|
"attr": "_JSON_PRETTY_LAZY_CODEC",
|
71
85
|
"file": "omlish/formats/json/codecs.py",
|
72
|
-
"line":
|
86
|
+
"line": 28,
|
73
87
|
"value": {
|
74
88
|
"$.codecs.base.LazyLoadedCodec": {
|
75
89
|
"mod_name": "omlish.formats.json.codecs",
|
@@ -137,6 +151,20 @@
|
|
137
151
|
}
|
138
152
|
}
|
139
153
|
},
|
154
|
+
{
|
155
|
+
"module": ".formats.yaml",
|
156
|
+
"attr": "_YAML_UNSAFE_LAZY_CODEC",
|
157
|
+
"file": "omlish/formats/yaml.py",
|
158
|
+
"line": 265,
|
159
|
+
"value": {
|
160
|
+
"$.codecs.base.LazyLoadedCodec": {
|
161
|
+
"mod_name": "omlish.formats.yaml",
|
162
|
+
"attr_name": "YAML_UNSAFE_CODEC",
|
163
|
+
"name": "yaml-unsafe",
|
164
|
+
"aliases": null
|
165
|
+
}
|
166
|
+
}
|
167
|
+
},
|
140
168
|
{
|
141
169
|
"module": ".io.compress.brotli",
|
142
170
|
"attr": "_BROTLI_LAZY_CODEC",
|
omlish/__about__.py
CHANGED
omlish/codecs/__init__.py
CHANGED
omlish/codecs/base.py
CHANGED
@@ -9,6 +9,7 @@ from omlish import check
|
|
9
9
|
from omlish import dataclasses as dc
|
10
10
|
from omlish import lang
|
11
11
|
from omlish import reflect as rfl
|
12
|
+
from omlish.funcs import pairs as fps
|
12
13
|
|
13
14
|
|
14
15
|
I = ta.TypeVar('I')
|
@@ -27,6 +28,9 @@ class EagerCodec(lang.Abstract, ta.Generic[I, O]):
|
|
27
28
|
def decode(self, o: O) -> I:
|
28
29
|
raise NotImplementedError
|
29
30
|
|
31
|
+
def as_pair(self) -> fps.FnPair[I, O]:
|
32
|
+
return fps.of(self.encode, self.decode)
|
33
|
+
|
30
34
|
|
31
35
|
class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
|
32
36
|
@abc.abstractmethod
|
omlish/codecs/funcs.py
CHANGED
@@ -26,3 +26,14 @@ class FnPairEagerCodec(EagerCodec[I, O]):
|
|
26
26
|
decode: ta.Callable[[O], I],
|
27
27
|
) -> 'FnPairEagerCodec[I, O]':
|
28
28
|
return cls(fps.of(encode, decode))
|
29
|
+
|
30
|
+
|
31
|
+
def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
|
32
|
+
return FnPairEagerCodec(fp)
|
33
|
+
|
34
|
+
|
35
|
+
def of(
|
36
|
+
encode: ta.Callable[[I], O],
|
37
|
+
decode: ta.Callable[[O], I],
|
38
|
+
) -> FnPairEagerCodec[I, O]:
|
39
|
+
return FnPairEagerCodec(fps.of(encode, decode))
|
omlish/codecs/text.py
CHANGED
@@ -46,8 +46,8 @@ TextEncodingErrors: ta.TypeAlias = ta.Literal[
|
|
46
46
|
'namereplace',
|
47
47
|
|
48
48
|
##
|
49
|
-
# In addition, the following error handler is specific to the given codecs:
|
50
|
-
# utf-
|
49
|
+
# In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
|
50
|
+
# utf-16-le, utf-32-be, utf-32-le
|
51
51
|
|
52
52
|
# Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
|
53
53
|
# treat the presence of surrogate code point in str as an error.
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import typing as ta
|
2
|
+
|
3
|
+
from .. import lang
|
4
|
+
from .codecs import make_bytes_object_codec
|
5
|
+
from .codecs import make_object_lazy_loaded_codec
|
6
|
+
|
7
|
+
|
8
|
+
if ta.TYPE_CHECKING:
|
9
|
+
import cloudpickle
|
10
|
+
else:
|
11
|
+
cloudpickle = lang.proxy_import('cloudpickle')
|
12
|
+
|
13
|
+
|
14
|
+
##
|
15
|
+
|
16
|
+
|
17
|
+
def dump(obj: ta.Any) -> bytes:
|
18
|
+
return cloudpickle.dumps(obj)
|
19
|
+
|
20
|
+
|
21
|
+
def load(s: bytes) -> ta.Any:
|
22
|
+
return cloudpickle.loads(s)
|
23
|
+
|
24
|
+
|
25
|
+
##
|
26
|
+
|
27
|
+
|
28
|
+
CLOUDPICKLE_CODEC = make_bytes_object_codec('cloudpickle', dump, load)
|
29
|
+
|
30
|
+
# @omlish-manifest
|
31
|
+
_CLOUDPICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CLOUDPICKLE_CODEC', CLOUDPICKLE_CODEC)
|
omlish/formats/json/codecs.py
CHANGED
@@ -14,19 +14,15 @@ JSON_CODEC = make_str_object_codec('json', dumps, loads)
|
|
14
14
|
# @omlish-manifest
|
15
15
|
_JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
|
16
16
|
|
17
|
-
|
18
17
|
#
|
19
18
|
|
20
|
-
|
21
19
|
JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
|
22
20
|
|
23
21
|
# @omlish-manifest
|
24
22
|
_JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
|
25
23
|
|
26
|
-
|
27
24
|
#
|
28
25
|
|
29
|
-
|
30
26
|
JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
|
31
27
|
|
32
28
|
# @omlish-manifest
|
omlish/formats/yaml.py
CHANGED
@@ -257,3 +257,10 @@ YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
|
|
257
257
|
|
258
258
|
# @omlish-manifest
|
259
259
|
_YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)
|
260
|
+
|
261
|
+
#
|
262
|
+
|
263
|
+
YAML_UNSAFE_CODEC = make_str_object_codec('yaml-unsafe', dump, full_load)
|
264
|
+
|
265
|
+
# @omlish-manifest
|
266
|
+
_YAML_UNSAFE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_UNSAFE_CODEC', YAML_UNSAFE_CODEC)
|
omlish/funcs/pairs.py
CHANGED
@@ -1,22 +1,4 @@
|
|
1
|
-
"""
|
2
|
-
TODO:
|
3
|
-
- objects
|
4
|
-
- csv
|
5
|
-
- csvloader
|
6
|
-
- wrapped (wait for usecase)
|
7
|
-
- streams / incremental
|
8
|
-
- fileobj -> fileobj?
|
9
|
-
- swap zstandard for zstd
|
10
|
-
|
11
|
-
Compression choice:
|
12
|
-
- lzma if-available minimal-space
|
13
|
-
- lz4 if-available read-heavy
|
14
|
-
- zstd if-available
|
15
|
-
- bz2 read-heavy (but no parallel decompress)
|
16
|
-
- gz
|
17
|
-
"""
|
18
1
|
import abc
|
19
|
-
import codecs
|
20
2
|
import dataclasses as dc
|
21
3
|
import typing as ta
|
22
4
|
|
@@ -24,40 +6,9 @@ from .. import lang
|
|
24
6
|
|
25
7
|
|
26
8
|
if ta.TYPE_CHECKING:
|
27
|
-
import bz2 as _bz2
|
28
|
-
import gzip as _gzip
|
29
|
-
import lzma as _lzma
|
30
|
-
import pickle as _pickle
|
31
9
|
import struct as _struct
|
32
|
-
import tomllib as _tomllib
|
33
|
-
|
34
|
-
import cbor2 as _cbor2
|
35
|
-
import cloudpickle as _cloudpickle
|
36
|
-
import json5 as _json5
|
37
|
-
import lz4.frame as _lz4_frame
|
38
|
-
import snappy as _snappy
|
39
|
-
import yaml as _yaml
|
40
|
-
import zstandard as _zstandard
|
41
|
-
|
42
|
-
from ..formats import json as _json
|
43
|
-
|
44
10
|
else:
|
45
|
-
_bz2 = lang.proxy_import('bz2')
|
46
|
-
_gzip = lang.proxy_import('gzip')
|
47
|
-
_lzma = lang.proxy_import('lzma')
|
48
|
-
_pickle = lang.proxy_import('pickle')
|
49
11
|
_struct = lang.proxy_import('struct')
|
50
|
-
_tomllib = lang.proxy_import('tomllib')
|
51
|
-
|
52
|
-
_cbor2 = lang.proxy_import('cbor2')
|
53
|
-
_cloudpickle = lang.proxy_import('cloudpickle')
|
54
|
-
_json5 = lang.proxy_import('json5')
|
55
|
-
_lz4_frame = lang.proxy_import('lz4.frame')
|
56
|
-
_snappy = lang.proxy_import('snappy')
|
57
|
-
_yaml = lang.proxy_import('yaml')
|
58
|
-
_zstandard = lang.proxy_import('zstandard')
|
59
|
-
|
60
|
-
_json = lang.proxy_import('..formats.json', __package__)
|
61
12
|
|
62
13
|
|
63
14
|
##
|
@@ -211,36 +162,6 @@ def compose(*ps):
|
|
211
162
|
##
|
212
163
|
|
213
164
|
|
214
|
-
@dc.dataclass(frozen=True)
|
215
|
-
class Text(FnPair[str, bytes]):
|
216
|
-
ci: codecs.CodecInfo
|
217
|
-
encode_errors: str = dc.field(default='strict', kw_only=True)
|
218
|
-
decode_errors: str = dc.field(default='strict', kw_only=True)
|
219
|
-
|
220
|
-
def forward(self, f: str) -> bytes:
|
221
|
-
# Python ignores the returned length:
|
222
|
-
# https://github.com/python/cpython/blob/7431c3799efbd06ed03ee70b64420f45e83b3667/Python/codecs.c#L424
|
223
|
-
t, _ = self.ci.encode(f, self.encode_errors)
|
224
|
-
return t
|
225
|
-
|
226
|
-
def backward(self, t: bytes) -> str:
|
227
|
-
f, _ = self.ci.decode(t, self.decode_errors)
|
228
|
-
return f
|
229
|
-
|
230
|
-
|
231
|
-
def text(name: str, *, encode_errors: str = 'strict', decode_errors: str = 'strict') -> Text:
|
232
|
-
ci = codecs.lookup(name)
|
233
|
-
if not ci._is_text_encoding: # noqa
|
234
|
-
raise TypeError(f'must be text codec: {name}')
|
235
|
-
return Text(ci, encode_errors=encode_errors, decode_errors=decode_errors)
|
236
|
-
|
237
|
-
|
238
|
-
UTF8 = text('utf-8')
|
239
|
-
|
240
|
-
|
241
|
-
#
|
242
|
-
|
243
|
-
|
244
165
|
@dc.dataclass(frozen=True)
|
245
166
|
class Optional(FnPair[F | None, T | None]):
|
246
167
|
fp: FnPair[F, T]
|
@@ -263,99 +184,6 @@ class Lines(FnPair[ta.Sequence[str], str]):
|
|
263
184
|
##
|
264
185
|
|
265
186
|
|
266
|
-
_EXTENSION_REGISTRY: dict[str, type[FnPair]] = {}
|
267
|
-
|
268
|
-
|
269
|
-
def _register_extension(*ss):
|
270
|
-
def inner(cls):
|
271
|
-
for s in ss:
|
272
|
-
if s in _EXTENSION_REGISTRY:
|
273
|
-
raise KeyError(s)
|
274
|
-
_EXTENSION_REGISTRY[s] = cls
|
275
|
-
return cls
|
276
|
-
return inner
|
277
|
-
|
278
|
-
|
279
|
-
def get_for_extension(ext: str) -> FnPair:
|
280
|
-
return compose(*[_EXTENSION_REGISTRY[p]() for p in ext.split('.')])
|
281
|
-
|
282
|
-
|
283
|
-
##
|
284
|
-
|
285
|
-
|
286
|
-
class Compression(FnPair[bytes, bytes], abc.ABC):
|
287
|
-
pass
|
288
|
-
|
289
|
-
|
290
|
-
@_register_extension('bz2')
|
291
|
-
@dc.dataclass(frozen=True)
|
292
|
-
class Bz2(Compression):
|
293
|
-
compresslevel: int = 9
|
294
|
-
|
295
|
-
def forward(self, f: bytes) -> bytes:
|
296
|
-
return _bz2.compress(f, compresslevel=self.compresslevel)
|
297
|
-
|
298
|
-
def backward(self, t: bytes) -> bytes:
|
299
|
-
return _bz2.decompress(t)
|
300
|
-
|
301
|
-
|
302
|
-
@_register_extension('gz')
|
303
|
-
@dc.dataclass(frozen=True)
|
304
|
-
class Gzip(Compression):
|
305
|
-
compresslevel: int = 9
|
306
|
-
|
307
|
-
def forward(self, f: bytes) -> bytes:
|
308
|
-
return _gzip.compress(f, compresslevel=self.compresslevel)
|
309
|
-
|
310
|
-
def backward(self, t: bytes) -> bytes:
|
311
|
-
return _gzip.decompress(t)
|
312
|
-
|
313
|
-
|
314
|
-
@_register_extension('lzma')
|
315
|
-
class Lzma(Compression):
|
316
|
-
def forward(self, f: bytes) -> bytes:
|
317
|
-
return _lzma.compress(f)
|
318
|
-
|
319
|
-
def backward(self, t: bytes) -> bytes:
|
320
|
-
return _lzma.decompress(t)
|
321
|
-
|
322
|
-
|
323
|
-
#
|
324
|
-
|
325
|
-
|
326
|
-
@_register_extension('lz4')
|
327
|
-
@dc.dataclass(frozen=True)
|
328
|
-
class Lz4(Compression):
|
329
|
-
compression_level: int = 0
|
330
|
-
|
331
|
-
def forward(self, f: bytes) -> bytes:
|
332
|
-
return _lz4_frame.compress(f, compression_level=self.compression_level)
|
333
|
-
|
334
|
-
def backward(self, t: bytes) -> bytes:
|
335
|
-
return _lz4_frame.decompress(t)
|
336
|
-
|
337
|
-
|
338
|
-
@_register_extension('snappy')
|
339
|
-
class Snappy(Compression):
|
340
|
-
def forward(self, f: bytes) -> bytes:
|
341
|
-
return _snappy.compress(f)
|
342
|
-
|
343
|
-
def backward(self, t: bytes) -> bytes:
|
344
|
-
return _snappy.decompress(t)
|
345
|
-
|
346
|
-
|
347
|
-
@_register_extension('zstd')
|
348
|
-
class Zstd(Compression):
|
349
|
-
def forward(self, f: bytes) -> bytes:
|
350
|
-
return _zstandard.compress(f)
|
351
|
-
|
352
|
-
def backward(self, t: bytes) -> bytes:
|
353
|
-
return _zstandard.decompress(t)
|
354
|
-
|
355
|
-
|
356
|
-
##
|
357
|
-
|
358
|
-
|
359
187
|
@dc.dataclass(frozen=True)
|
360
188
|
class Struct(FnPair[tuple, bytes]):
|
361
189
|
fmt: str
|
@@ -385,112 +213,3 @@ class ObjectStr_(Object_[str], lang.Abstract): # noqa
|
|
385
213
|
|
386
214
|
class ObjectBytes_(Object_[bytes], lang.Abstract): # noqa
|
387
215
|
pass
|
388
|
-
|
389
|
-
|
390
|
-
#
|
391
|
-
|
392
|
-
|
393
|
-
@_register_extension('pkl')
|
394
|
-
@dc.dataclass(frozen=True)
|
395
|
-
class Pickle(ObjectBytes_):
|
396
|
-
protocol: int | None = None
|
397
|
-
|
398
|
-
def forward(self, f: ta.Any) -> bytes:
|
399
|
-
return _pickle.dumps(f, protocol=self.protocol)
|
400
|
-
|
401
|
-
def backward(self, t: bytes) -> ta.Any:
|
402
|
-
return _pickle.loads(t)
|
403
|
-
|
404
|
-
|
405
|
-
class _Json(ObjectStr_, lang.Abstract): # noqa
|
406
|
-
def backward(self, t: str) -> ta.Any:
|
407
|
-
return _json.loads(t)
|
408
|
-
|
409
|
-
|
410
|
-
@_register_extension('json')
|
411
|
-
class Json(_Json):
|
412
|
-
def forward(self, f: ta.Any) -> str:
|
413
|
-
return _json.dumps(f)
|
414
|
-
|
415
|
-
|
416
|
-
class JsonPretty(_Json):
|
417
|
-
def forward(self, f: ta.Any) -> str:
|
418
|
-
return _json.dumps_pretty(f)
|
419
|
-
|
420
|
-
|
421
|
-
class JsonCompact(_Json):
|
422
|
-
def forward(self, f: ta.Any) -> str:
|
423
|
-
return _json.dumps_compact(f)
|
424
|
-
|
425
|
-
|
426
|
-
JSON = Json()
|
427
|
-
PRETTY_JSON = JsonPretty()
|
428
|
-
COMPACT_JSON = JsonCompact()
|
429
|
-
|
430
|
-
|
431
|
-
@_register_extension('jsonl')
|
432
|
-
class JsonLines(FnPair[ta.Sequence[ta.Any], str]):
|
433
|
-
def forward(self, f: ta.Sequence[ta.Any]) -> str:
|
434
|
-
return '\n'.join(_json.dumps(e) for e in f)
|
435
|
-
|
436
|
-
def backward(self, t: str) -> ta.Sequence[ta.Any]:
|
437
|
-
return [_json.loads(l) for l in t.splitlines()]
|
438
|
-
|
439
|
-
|
440
|
-
@_register_extension('toml')
|
441
|
-
class Toml(ObjectStr_):
|
442
|
-
def forward(self, f: ta.Any) -> str:
|
443
|
-
raise NotImplementedError
|
444
|
-
|
445
|
-
def backward(self, t: str) -> ta.Any:
|
446
|
-
return _tomllib.loads(t)
|
447
|
-
|
448
|
-
|
449
|
-
#
|
450
|
-
|
451
|
-
|
452
|
-
@_register_extension('cbor')
|
453
|
-
class Cbor(ObjectBytes_):
|
454
|
-
def forward(self, f: ta.Any) -> bytes:
|
455
|
-
return _cbor2.dumps(f)
|
456
|
-
|
457
|
-
def backward(self, t: bytes) -> ta.Any:
|
458
|
-
return _cbor2.loads(t)
|
459
|
-
|
460
|
-
|
461
|
-
@_register_extension('clpkl')
|
462
|
-
@dc.dataclass(frozen=True)
|
463
|
-
class Cloudpickle(ObjectBytes_):
|
464
|
-
protocol: int | None = None
|
465
|
-
|
466
|
-
def forward(self, f: ta.Any) -> bytes:
|
467
|
-
return _cloudpickle.dumps(f, protocol=self.protocol)
|
468
|
-
|
469
|
-
def backward(self, t: bytes) -> ta.Any:
|
470
|
-
return _cloudpickle.loads(t)
|
471
|
-
|
472
|
-
|
473
|
-
@_register_extension('json5')
|
474
|
-
class Json5(ObjectStr_):
|
475
|
-
def forward(self, f: ta.Any) -> str:
|
476
|
-
return _json5.dumps(f)
|
477
|
-
|
478
|
-
def backward(self, t: str) -> ta.Any:
|
479
|
-
return _json5.loads(t)
|
480
|
-
|
481
|
-
|
482
|
-
@_register_extension('yml', 'yaml')
|
483
|
-
class Yaml(ObjectStr_):
|
484
|
-
def forward(self, f: ta.Any) -> str:
|
485
|
-
return _yaml.dump(f)
|
486
|
-
|
487
|
-
def backward(self, t: str) -> ta.Any:
|
488
|
-
return _yaml.safe_load(t)
|
489
|
-
|
490
|
-
|
491
|
-
class YamlUnsafe(ObjectStr_):
|
492
|
-
def forward(self, f: ta.Any) -> str:
|
493
|
-
return _yaml.dump(f)
|
494
|
-
|
495
|
-
def backward(self, t: str) -> ta.Any:
|
496
|
-
return _yaml.load(t, _yaml.FullLoader)
|
omlish/io/compress/codecs.py
CHANGED
@@ -2,7 +2,9 @@ import dataclasses as dc
|
|
2
2
|
import typing as ta
|
3
3
|
|
4
4
|
from ... import codecs
|
5
|
+
from ..generators import buffer_bytes_stepped_reader_generator
|
5
6
|
from .base import Compression
|
7
|
+
from .base import IncrementalCompression
|
6
8
|
|
7
9
|
|
8
10
|
##
|
@@ -22,6 +24,20 @@ class CompressionEagerCodec(codecs.EagerCodec[bytes, bytes]):
|
|
22
24
|
##
|
23
25
|
|
24
26
|
|
27
|
+
@dc.dataclass(frozen=True)
|
28
|
+
class CompressionIncrementalCodec(codecs.IncrementalCodec[bytes, bytes]):
|
29
|
+
compression: IncrementalCompression
|
30
|
+
|
31
|
+
def encode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
|
32
|
+
return self.compression.compress_incremental()
|
33
|
+
|
34
|
+
def decode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
|
35
|
+
return buffer_bytes_stepped_reader_generator(self.compression.decompress_incremental())
|
36
|
+
|
37
|
+
|
38
|
+
##
|
39
|
+
|
40
|
+
|
25
41
|
class CompressionCodec(codecs.Codec):
|
26
42
|
pass
|
27
43
|
|
@@ -40,6 +56,10 @@ def make_compression_codec(
|
|
40
56
|
output=bytes,
|
41
57
|
|
42
58
|
new=lambda *args, **kwargs: CompressionEagerCodec(cls(*args, **kwargs)),
|
59
|
+
|
60
|
+
new_incremental=(
|
61
|
+
lambda *args, **kwargs: CompressionIncrementalCodec(cls(*args, **kwargs)) # noqa
|
62
|
+
) if issubclass(cls, IncrementalCompression) else None,
|
43
63
|
)
|
44
64
|
|
45
65
|
|
omlish/io/generators/__init__.py
CHANGED
omlish/io/generators/stepped.py
CHANGED
@@ -151,18 +151,32 @@ def read_into_str_stepped_generator(
|
|
151
151
|
def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> BytesSteppedGenerator:
|
152
152
|
o = g.send(None)
|
153
153
|
buf: ta.Any = None
|
154
|
+
eof = False
|
154
155
|
|
155
156
|
while True:
|
157
|
+
if eof:
|
158
|
+
raise EOFError
|
159
|
+
|
156
160
|
if not buf:
|
157
161
|
buf = check.isinstance((yield None), bytes)
|
162
|
+
if not buf:
|
163
|
+
eof = True
|
158
164
|
|
159
|
-
if o is None
|
165
|
+
if o is None:
|
160
166
|
i = buf
|
167
|
+
buf = None
|
168
|
+
|
161
169
|
elif isinstance(o, int):
|
162
|
-
|
163
|
-
|
170
|
+
while len(buf) < o:
|
171
|
+
more = check.isinstance((yield None), bytes)
|
172
|
+
if not more:
|
173
|
+
raise EOFError
|
174
|
+
# FIXME: lol - share guts with readers
|
175
|
+
buf += more
|
176
|
+
|
164
177
|
i = buf[:o]
|
165
178
|
buf = buf[o:]
|
179
|
+
|
166
180
|
else:
|
167
181
|
raise TypeError(o)
|
168
182
|
|
@@ -171,5 +185,7 @@ def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> Byt
|
|
171
185
|
i = None
|
172
186
|
if isinstance(o, bytes):
|
173
187
|
check.none((yield o))
|
188
|
+
if not o:
|
189
|
+
return
|
174
190
|
else:
|
175
191
|
break
|
@@ -0,0 +1,24 @@
|
|
1
|
+
from .iterators import ( # noqa
|
2
|
+
PeekIterator,
|
3
|
+
PrefetchIterator,
|
4
|
+
ProxyIterator,
|
5
|
+
RetainIterator,
|
6
|
+
)
|
7
|
+
|
8
|
+
from .recipes import ( # noqa
|
9
|
+
sliding_window,
|
10
|
+
)
|
11
|
+
|
12
|
+
from .tools import ( # noqa
|
13
|
+
chunk,
|
14
|
+
expand_indexed_pairs,
|
15
|
+
merge_on,
|
16
|
+
take,
|
17
|
+
unzip,
|
18
|
+
)
|
19
|
+
|
20
|
+
from .unique import ( # noqa
|
21
|
+
UniqueItem,
|
22
|
+
UniqueIterator,
|
23
|
+
UniqueStats,
|
24
|
+
)
|