omlish 0.0.0.dev164__py3-none-any.whl → 0.0.0.dev166__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
omlish/.manifests.json CHANGED
@@ -37,6 +37,20 @@
37
37
  }
38
38
  }
39
39
  },
40
+ {
41
+ "module": ".formats.cloudpickle",
42
+ "attr": "_CLOUDPICKLE_LAZY_CODEC",
43
+ "file": "omlish/formats/cloudpickle.py",
44
+ "line": 30,
45
+ "value": {
46
+ "$.codecs.base.LazyLoadedCodec": {
47
+ "mod_name": "omlish.formats.cloudpickle",
48
+ "attr_name": "CLOUDPICKLE_CODEC",
49
+ "name": "cloudpickle",
50
+ "aliases": null
51
+ }
52
+ }
53
+ },
40
54
  {
41
55
  "module": ".formats.json.codecs",
42
56
  "attr": "_JSON_LAZY_CODEC",
@@ -55,7 +69,7 @@
55
69
  "module": ".formats.json.codecs",
56
70
  "attr": "_JSON_COMPACT_LAZY_CODEC",
57
71
  "file": "omlish/formats/json/codecs.py",
58
- "line": 23,
72
+ "line": 21,
59
73
  "value": {
60
74
  "$.codecs.base.LazyLoadedCodec": {
61
75
  "mod_name": "omlish.formats.json.codecs",
@@ -69,7 +83,7 @@
69
83
  "module": ".formats.json.codecs",
70
84
  "attr": "_JSON_PRETTY_LAZY_CODEC",
71
85
  "file": "omlish/formats/json/codecs.py",
72
- "line": 32,
86
+ "line": 28,
73
87
  "value": {
74
88
  "$.codecs.base.LazyLoadedCodec": {
75
89
  "mod_name": "omlish.formats.json.codecs",
@@ -137,6 +151,20 @@
137
151
  }
138
152
  }
139
153
  },
154
+ {
155
+ "module": ".formats.yaml",
156
+ "attr": "_YAML_UNSAFE_LAZY_CODEC",
157
+ "file": "omlish/formats/yaml.py",
158
+ "line": 265,
159
+ "value": {
160
+ "$.codecs.base.LazyLoadedCodec": {
161
+ "mod_name": "omlish.formats.yaml",
162
+ "attr_name": "YAML_UNSAFE_CODEC",
163
+ "name": "yaml-unsafe",
164
+ "aliases": null
165
+ }
166
+ }
167
+ },
140
168
  {
141
169
  "module": ".io.compress.brotli",
142
170
  "attr": "_BROTLI_LAZY_CODEC",
omlish/__about__.py CHANGED
@@ -1,5 +1,5 @@
1
- __version__ = '0.0.0.dev164'
2
- __revision__ = '72b4cfb1086b384ec55cc221069d5bb2be6b3c10'
1
+ __version__ = '0.0.0.dev166'
2
+ __revision__ = 'e832ee32347c3f4c51e8ead2186def228e3aac1c'
3
3
 
4
4
 
5
5
  #
omlish/codecs/__init__.py CHANGED
@@ -30,6 +30,9 @@ from .chain import ( # noqa
30
30
 
31
31
  from .funcs import ( # noqa
32
32
  FnPairEagerCodec,
33
+
34
+ of_pair,
35
+ of,
33
36
  )
34
37
 
35
38
  from .registry import ( # noqa
omlish/codecs/base.py CHANGED
@@ -9,6 +9,7 @@ from omlish import check
9
9
  from omlish import dataclasses as dc
10
10
  from omlish import lang
11
11
  from omlish import reflect as rfl
12
+ from omlish.funcs import pairs as fps
12
13
 
13
14
 
14
15
  I = ta.TypeVar('I')
@@ -27,6 +28,9 @@ class EagerCodec(lang.Abstract, ta.Generic[I, O]):
27
28
  def decode(self, o: O) -> I:
28
29
  raise NotImplementedError
29
30
 
31
+ def as_pair(self) -> fps.FnPair[I, O]:
32
+ return fps.of(self.encode, self.decode)
33
+
30
34
 
31
35
  class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
32
36
  @abc.abstractmethod
omlish/codecs/funcs.py CHANGED
@@ -26,3 +26,14 @@ class FnPairEagerCodec(EagerCodec[I, O]):
26
26
  decode: ta.Callable[[O], I],
27
27
  ) -> 'FnPairEagerCodec[I, O]':
28
28
  return cls(fps.of(encode, decode))
29
+
30
+
31
+ def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
32
+ return FnPairEagerCodec(fp)
33
+
34
+
35
+ def of(
36
+ encode: ta.Callable[[I], O],
37
+ decode: ta.Callable[[O], I],
38
+ ) -> FnPairEagerCodec[I, O]:
39
+ return FnPairEagerCodec(fps.of(encode, decode))
omlish/codecs/text.py CHANGED
@@ -46,8 +46,8 @@ TextEncodingErrors: ta.TypeAlias = ta.Literal[
46
46
  'namereplace',
47
47
 
48
48
  ##
49
- # In addition, the following error handler is specific to the given codecs:
50
- # utf-8, utf-16, utf-32, utf-16-be, utf-16-le, utf-32-be, utf-32-le
49
+ # In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
50
+ # utf-16-le, utf-32-be, utf-32-le
51
51
 
52
52
  # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
53
53
  # treat the presence of surrogate code point in str as an error.
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_bytes_object_codec
5
+ from .codecs import make_object_lazy_loaded_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import cloudpickle
10
+ else:
11
+ cloudpickle = lang.proxy_import('cloudpickle')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dump(obj: ta.Any) -> bytes:
18
+ return cloudpickle.dumps(obj)
19
+
20
+
21
+ def load(s: bytes) -> ta.Any:
22
+ return cloudpickle.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ CLOUDPICKLE_CODEC = make_bytes_object_codec('cloudpickle', dump, load)
29
+
30
+ # @omlish-manifest
31
+ _CLOUDPICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CLOUDPICKLE_CODEC', CLOUDPICKLE_CODEC)
@@ -14,19 +14,15 @@ JSON_CODEC = make_str_object_codec('json', dumps, loads)
14
14
  # @omlish-manifest
15
15
  _JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
16
16
 
17
-
18
17
  #
19
18
 
20
-
21
19
  JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
22
20
 
23
21
  # @omlish-manifest
24
22
  _JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
25
23
 
26
-
27
24
  #
28
25
 
29
-
30
26
  JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
31
27
 
32
28
  # @omlish-manifest
@@ -0,0 +1,4 @@
1
+ """
2
+ TODO:
3
+ - jsonl codec
4
+ """
omlish/formats/yaml.py CHANGED
@@ -257,3 +257,10 @@ YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
257
257
 
258
258
  # @omlish-manifest
259
259
  _YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)
260
+
261
+ #
262
+
263
+ YAML_UNSAFE_CODEC = make_str_object_codec('yaml-unsafe', dump, full_load)
264
+
265
+ # @omlish-manifest
266
+ _YAML_UNSAFE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_UNSAFE_CODEC', YAML_UNSAFE_CODEC)
omlish/funcs/pairs.py CHANGED
@@ -1,22 +1,4 @@
1
- """
2
- TODO:
3
- - objects
4
- - csv
5
- - csvloader
6
- - wrapped (wait for usecase)
7
- - streams / incremental
8
- - fileobj -> fileobj?
9
- - swap zstandard for zstd
10
-
11
- Compression choice:
12
- - lzma if-available minimal-space
13
- - lz4 if-available read-heavy
14
- - zstd if-available
15
- - bz2 read-heavy (but no parallel decompress)
16
- - gz
17
- """
18
1
  import abc
19
- import codecs
20
2
  import dataclasses as dc
21
3
  import typing as ta
22
4
 
@@ -24,40 +6,9 @@ from .. import lang
24
6
 
25
7
 
26
8
  if ta.TYPE_CHECKING:
27
- import bz2 as _bz2
28
- import gzip as _gzip
29
- import lzma as _lzma
30
- import pickle as _pickle
31
9
  import struct as _struct
32
- import tomllib as _tomllib
33
-
34
- import cbor2 as _cbor2
35
- import cloudpickle as _cloudpickle
36
- import json5 as _json5
37
- import lz4.frame as _lz4_frame
38
- import snappy as _snappy
39
- import yaml as _yaml
40
- import zstandard as _zstandard
41
-
42
- from ..formats import json as _json
43
-
44
10
  else:
45
- _bz2 = lang.proxy_import('bz2')
46
- _gzip = lang.proxy_import('gzip')
47
- _lzma = lang.proxy_import('lzma')
48
- _pickle = lang.proxy_import('pickle')
49
11
  _struct = lang.proxy_import('struct')
50
- _tomllib = lang.proxy_import('tomllib')
51
-
52
- _cbor2 = lang.proxy_import('cbor2')
53
- _cloudpickle = lang.proxy_import('cloudpickle')
54
- _json5 = lang.proxy_import('json5')
55
- _lz4_frame = lang.proxy_import('lz4.frame')
56
- _snappy = lang.proxy_import('snappy')
57
- _yaml = lang.proxy_import('yaml')
58
- _zstandard = lang.proxy_import('zstandard')
59
-
60
- _json = lang.proxy_import('..formats.json', __package__)
61
12
 
62
13
 
63
14
  ##
@@ -211,36 +162,6 @@ def compose(*ps):
211
162
  ##
212
163
 
213
164
 
214
- @dc.dataclass(frozen=True)
215
- class Text(FnPair[str, bytes]):
216
- ci: codecs.CodecInfo
217
- encode_errors: str = dc.field(default='strict', kw_only=True)
218
- decode_errors: str = dc.field(default='strict', kw_only=True)
219
-
220
- def forward(self, f: str) -> bytes:
221
- # Python ignores the returned length:
222
- # https://github.com/python/cpython/blob/7431c3799efbd06ed03ee70b64420f45e83b3667/Python/codecs.c#L424
223
- t, _ = self.ci.encode(f, self.encode_errors)
224
- return t
225
-
226
- def backward(self, t: bytes) -> str:
227
- f, _ = self.ci.decode(t, self.decode_errors)
228
- return f
229
-
230
-
231
- def text(name: str, *, encode_errors: str = 'strict', decode_errors: str = 'strict') -> Text:
232
- ci = codecs.lookup(name)
233
- if not ci._is_text_encoding: # noqa
234
- raise TypeError(f'must be text codec: {name}')
235
- return Text(ci, encode_errors=encode_errors, decode_errors=decode_errors)
236
-
237
-
238
- UTF8 = text('utf-8')
239
-
240
-
241
- #
242
-
243
-
244
165
  @dc.dataclass(frozen=True)
245
166
  class Optional(FnPair[F | None, T | None]):
246
167
  fp: FnPair[F, T]
@@ -263,99 +184,6 @@ class Lines(FnPair[ta.Sequence[str], str]):
263
184
  ##
264
185
 
265
186
 
266
- _EXTENSION_REGISTRY: dict[str, type[FnPair]] = {}
267
-
268
-
269
- def _register_extension(*ss):
270
- def inner(cls):
271
- for s in ss:
272
- if s in _EXTENSION_REGISTRY:
273
- raise KeyError(s)
274
- _EXTENSION_REGISTRY[s] = cls
275
- return cls
276
- return inner
277
-
278
-
279
- def get_for_extension(ext: str) -> FnPair:
280
- return compose(*[_EXTENSION_REGISTRY[p]() for p in ext.split('.')])
281
-
282
-
283
- ##
284
-
285
-
286
- class Compression(FnPair[bytes, bytes], abc.ABC):
287
- pass
288
-
289
-
290
- @_register_extension('bz2')
291
- @dc.dataclass(frozen=True)
292
- class Bz2(Compression):
293
- compresslevel: int = 9
294
-
295
- def forward(self, f: bytes) -> bytes:
296
- return _bz2.compress(f, compresslevel=self.compresslevel)
297
-
298
- def backward(self, t: bytes) -> bytes:
299
- return _bz2.decompress(t)
300
-
301
-
302
- @_register_extension('gz')
303
- @dc.dataclass(frozen=True)
304
- class Gzip(Compression):
305
- compresslevel: int = 9
306
-
307
- def forward(self, f: bytes) -> bytes:
308
- return _gzip.compress(f, compresslevel=self.compresslevel)
309
-
310
- def backward(self, t: bytes) -> bytes:
311
- return _gzip.decompress(t)
312
-
313
-
314
- @_register_extension('lzma')
315
- class Lzma(Compression):
316
- def forward(self, f: bytes) -> bytes:
317
- return _lzma.compress(f)
318
-
319
- def backward(self, t: bytes) -> bytes:
320
- return _lzma.decompress(t)
321
-
322
-
323
- #
324
-
325
-
326
- @_register_extension('lz4')
327
- @dc.dataclass(frozen=True)
328
- class Lz4(Compression):
329
- compression_level: int = 0
330
-
331
- def forward(self, f: bytes) -> bytes:
332
- return _lz4_frame.compress(f, compression_level=self.compression_level)
333
-
334
- def backward(self, t: bytes) -> bytes:
335
- return _lz4_frame.decompress(t)
336
-
337
-
338
- @_register_extension('snappy')
339
- class Snappy(Compression):
340
- def forward(self, f: bytes) -> bytes:
341
- return _snappy.compress(f)
342
-
343
- def backward(self, t: bytes) -> bytes:
344
- return _snappy.decompress(t)
345
-
346
-
347
- @_register_extension('zstd')
348
- class Zstd(Compression):
349
- def forward(self, f: bytes) -> bytes:
350
- return _zstandard.compress(f)
351
-
352
- def backward(self, t: bytes) -> bytes:
353
- return _zstandard.decompress(t)
354
-
355
-
356
- ##
357
-
358
-
359
187
  @dc.dataclass(frozen=True)
360
188
  class Struct(FnPair[tuple, bytes]):
361
189
  fmt: str
@@ -385,112 +213,3 @@ class ObjectStr_(Object_[str], lang.Abstract): # noqa
385
213
 
386
214
  class ObjectBytes_(Object_[bytes], lang.Abstract): # noqa
387
215
  pass
388
-
389
-
390
- #
391
-
392
-
393
- @_register_extension('pkl')
394
- @dc.dataclass(frozen=True)
395
- class Pickle(ObjectBytes_):
396
- protocol: int | None = None
397
-
398
- def forward(self, f: ta.Any) -> bytes:
399
- return _pickle.dumps(f, protocol=self.protocol)
400
-
401
- def backward(self, t: bytes) -> ta.Any:
402
- return _pickle.loads(t)
403
-
404
-
405
- class _Json(ObjectStr_, lang.Abstract): # noqa
406
- def backward(self, t: str) -> ta.Any:
407
- return _json.loads(t)
408
-
409
-
410
- @_register_extension('json')
411
- class Json(_Json):
412
- def forward(self, f: ta.Any) -> str:
413
- return _json.dumps(f)
414
-
415
-
416
- class JsonPretty(_Json):
417
- def forward(self, f: ta.Any) -> str:
418
- return _json.dumps_pretty(f)
419
-
420
-
421
- class JsonCompact(_Json):
422
- def forward(self, f: ta.Any) -> str:
423
- return _json.dumps_compact(f)
424
-
425
-
426
- JSON = Json()
427
- PRETTY_JSON = JsonPretty()
428
- COMPACT_JSON = JsonCompact()
429
-
430
-
431
- @_register_extension('jsonl')
432
- class JsonLines(FnPair[ta.Sequence[ta.Any], str]):
433
- def forward(self, f: ta.Sequence[ta.Any]) -> str:
434
- return '\n'.join(_json.dumps(e) for e in f)
435
-
436
- def backward(self, t: str) -> ta.Sequence[ta.Any]:
437
- return [_json.loads(l) for l in t.splitlines()]
438
-
439
-
440
- @_register_extension('toml')
441
- class Toml(ObjectStr_):
442
- def forward(self, f: ta.Any) -> str:
443
- raise NotImplementedError
444
-
445
- def backward(self, t: str) -> ta.Any:
446
- return _tomllib.loads(t)
447
-
448
-
449
- #
450
-
451
-
452
- @_register_extension('cbor')
453
- class Cbor(ObjectBytes_):
454
- def forward(self, f: ta.Any) -> bytes:
455
- return _cbor2.dumps(f)
456
-
457
- def backward(self, t: bytes) -> ta.Any:
458
- return _cbor2.loads(t)
459
-
460
-
461
- @_register_extension('clpkl')
462
- @dc.dataclass(frozen=True)
463
- class Cloudpickle(ObjectBytes_):
464
- protocol: int | None = None
465
-
466
- def forward(self, f: ta.Any) -> bytes:
467
- return _cloudpickle.dumps(f, protocol=self.protocol)
468
-
469
- def backward(self, t: bytes) -> ta.Any:
470
- return _cloudpickle.loads(t)
471
-
472
-
473
- @_register_extension('json5')
474
- class Json5(ObjectStr_):
475
- def forward(self, f: ta.Any) -> str:
476
- return _json5.dumps(f)
477
-
478
- def backward(self, t: str) -> ta.Any:
479
- return _json5.loads(t)
480
-
481
-
482
- @_register_extension('yml', 'yaml')
483
- class Yaml(ObjectStr_):
484
- def forward(self, f: ta.Any) -> str:
485
- return _yaml.dump(f)
486
-
487
- def backward(self, t: str) -> ta.Any:
488
- return _yaml.safe_load(t)
489
-
490
-
491
- class YamlUnsafe(ObjectStr_):
492
- def forward(self, f: ta.Any) -> str:
493
- return _yaml.dump(f)
494
-
495
- def backward(self, t: str) -> ta.Any:
496
- return _yaml.load(t, _yaml.FullLoader)
@@ -2,7 +2,9 @@ import dataclasses as dc
2
2
  import typing as ta
3
3
 
4
4
  from ... import codecs
5
+ from ..generators import buffer_bytes_stepped_reader_generator
5
6
  from .base import Compression
7
+ from .base import IncrementalCompression
6
8
 
7
9
 
8
10
  ##
@@ -22,6 +24,20 @@ class CompressionEagerCodec(codecs.EagerCodec[bytes, bytes]):
22
24
  ##
23
25
 
24
26
 
27
+ @dc.dataclass(frozen=True)
28
+ class CompressionIncrementalCodec(codecs.IncrementalCodec[bytes, bytes]):
29
+ compression: IncrementalCompression
30
+
31
+ def encode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
32
+ return self.compression.compress_incremental()
33
+
34
+ def decode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
35
+ return buffer_bytes_stepped_reader_generator(self.compression.decompress_incremental())
36
+
37
+
38
+ ##
39
+
40
+
25
41
  class CompressionCodec(codecs.Codec):
26
42
  pass
27
43
 
@@ -40,6 +56,10 @@ def make_compression_codec(
40
56
  output=bytes,
41
57
 
42
58
  new=lambda *args, **kwargs: CompressionEagerCodec(cls(*args, **kwargs)),
59
+
60
+ new_incremental=(
61
+ lambda *args, **kwargs: CompressionIncrementalCodec(cls(*args, **kwargs)) # noqa
62
+ ) if issubclass(cls, IncrementalCompression) else None,
43
63
  )
44
64
 
45
65
 
@@ -50,4 +50,7 @@ from .stepped import ( # noqa
50
50
  joined_str_stepped_generator,
51
51
 
52
52
  read_into_bytes_stepped_generator,
53
+ read_into_str_stepped_generator,
54
+
55
+ buffer_bytes_stepped_reader_generator,
53
56
  )
@@ -151,18 +151,32 @@ def read_into_str_stepped_generator(
151
151
  def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> BytesSteppedGenerator:
152
152
  o = g.send(None)
153
153
  buf: ta.Any = None
154
+ eof = False
154
155
 
155
156
  while True:
157
+ if eof:
158
+ raise EOFError
159
+
156
160
  if not buf:
157
161
  buf = check.isinstance((yield None), bytes)
162
+ if not buf:
163
+ eof = True
158
164
 
159
- if o is None or not buf:
165
+ if o is None:
160
166
  i = buf
167
+ buf = None
168
+
161
169
  elif isinstance(o, int):
162
- if len(buf) < o:
163
- raise NotImplementedError
170
+ while len(buf) < o:
171
+ more = check.isinstance((yield None), bytes)
172
+ if not more:
173
+ raise EOFError
174
+ # FIXME: lol - share guts with readers
175
+ buf += more
176
+
164
177
  i = buf[:o]
165
178
  buf = buf[o:]
179
+
166
180
  else:
167
181
  raise TypeError(o)
168
182
 
@@ -171,5 +185,7 @@ def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> Byt
171
185
  i = None
172
186
  if isinstance(o, bytes):
173
187
  check.none((yield o))
188
+ if not o:
189
+ return
174
190
  else:
175
191
  break
@@ -0,0 +1,24 @@
1
+ from .iterators import ( # noqa
2
+ PeekIterator,
3
+ PrefetchIterator,
4
+ ProxyIterator,
5
+ RetainIterator,
6
+ )
7
+
8
+ from .recipes import ( # noqa
9
+ sliding_window,
10
+ )
11
+
12
+ from .tools import ( # noqa
13
+ chunk,
14
+ expand_indexed_pairs,
15
+ merge_on,
16
+ take,
17
+ unzip,
18
+ )
19
+
20
+ from .unique import ( # noqa
21
+ UniqueItem,
22
+ UniqueIterator,
23
+ UniqueStats,
24
+ )