omlish 0.0.0.dev164__py3-none-any.whl → 0.0.0.dev166__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
omlish/.manifests.json CHANGED
@@ -37,6 +37,20 @@
37
37
  }
38
38
  }
39
39
  },
40
+ {
41
+ "module": ".formats.cloudpickle",
42
+ "attr": "_CLOUDPICKLE_LAZY_CODEC",
43
+ "file": "omlish/formats/cloudpickle.py",
44
+ "line": 30,
45
+ "value": {
46
+ "$.codecs.base.LazyLoadedCodec": {
47
+ "mod_name": "omlish.formats.cloudpickle",
48
+ "attr_name": "CLOUDPICKLE_CODEC",
49
+ "name": "cloudpickle",
50
+ "aliases": null
51
+ }
52
+ }
53
+ },
40
54
  {
41
55
  "module": ".formats.json.codecs",
42
56
  "attr": "_JSON_LAZY_CODEC",
@@ -55,7 +69,7 @@
55
69
  "module": ".formats.json.codecs",
56
70
  "attr": "_JSON_COMPACT_LAZY_CODEC",
57
71
  "file": "omlish/formats/json/codecs.py",
58
- "line": 23,
72
+ "line": 21,
59
73
  "value": {
60
74
  "$.codecs.base.LazyLoadedCodec": {
61
75
  "mod_name": "omlish.formats.json.codecs",
@@ -69,7 +83,7 @@
69
83
  "module": ".formats.json.codecs",
70
84
  "attr": "_JSON_PRETTY_LAZY_CODEC",
71
85
  "file": "omlish/formats/json/codecs.py",
72
- "line": 32,
86
+ "line": 28,
73
87
  "value": {
74
88
  "$.codecs.base.LazyLoadedCodec": {
75
89
  "mod_name": "omlish.formats.json.codecs",
@@ -137,6 +151,20 @@
137
151
  }
138
152
  }
139
153
  },
154
+ {
155
+ "module": ".formats.yaml",
156
+ "attr": "_YAML_UNSAFE_LAZY_CODEC",
157
+ "file": "omlish/formats/yaml.py",
158
+ "line": 265,
159
+ "value": {
160
+ "$.codecs.base.LazyLoadedCodec": {
161
+ "mod_name": "omlish.formats.yaml",
162
+ "attr_name": "YAML_UNSAFE_CODEC",
163
+ "name": "yaml-unsafe",
164
+ "aliases": null
165
+ }
166
+ }
167
+ },
140
168
  {
141
169
  "module": ".io.compress.brotli",
142
170
  "attr": "_BROTLI_LAZY_CODEC",
omlish/__about__.py CHANGED
@@ -1,5 +1,5 @@
1
- __version__ = '0.0.0.dev164'
2
- __revision__ = '72b4cfb1086b384ec55cc221069d5bb2be6b3c10'
1
+ __version__ = '0.0.0.dev166'
2
+ __revision__ = 'e832ee32347c3f4c51e8ead2186def228e3aac1c'
3
3
 
4
4
 
5
5
  #
omlish/codecs/__init__.py CHANGED
@@ -30,6 +30,9 @@ from .chain import ( # noqa
30
30
 
31
31
  from .funcs import ( # noqa
32
32
  FnPairEagerCodec,
33
+
34
+ of_pair,
35
+ of,
33
36
  )
34
37
 
35
38
  from .registry import ( # noqa
omlish/codecs/base.py CHANGED
@@ -9,6 +9,7 @@ from omlish import check
9
9
  from omlish import dataclasses as dc
10
10
  from omlish import lang
11
11
  from omlish import reflect as rfl
12
+ from omlish.funcs import pairs as fps
12
13
 
13
14
 
14
15
  I = ta.TypeVar('I')
@@ -27,6 +28,9 @@ class EagerCodec(lang.Abstract, ta.Generic[I, O]):
27
28
  def decode(self, o: O) -> I:
28
29
  raise NotImplementedError
29
30
 
31
+ def as_pair(self) -> fps.FnPair[I, O]:
32
+ return fps.of(self.encode, self.decode)
33
+
30
34
 
31
35
  class IncrementalCodec(lang.Abstract, ta.Generic[I, O]):
32
36
  @abc.abstractmethod
omlish/codecs/funcs.py CHANGED
@@ -26,3 +26,14 @@ class FnPairEagerCodec(EagerCodec[I, O]):
26
26
  decode: ta.Callable[[O], I],
27
27
  ) -> 'FnPairEagerCodec[I, O]':
28
28
  return cls(fps.of(encode, decode))
29
+
30
+
31
+ def of_pair(fp: fps.FnPair[I, O]) -> FnPairEagerCodec[I, O]:
32
+ return FnPairEagerCodec(fp)
33
+
34
+
35
+ def of(
36
+ encode: ta.Callable[[I], O],
37
+ decode: ta.Callable[[O], I],
38
+ ) -> FnPairEagerCodec[I, O]:
39
+ return FnPairEagerCodec(fps.of(encode, decode))
omlish/codecs/text.py CHANGED
@@ -46,8 +46,8 @@ TextEncodingErrors: ta.TypeAlias = ta.Literal[
46
46
  'namereplace',
47
47
 
48
48
  ##
49
- # In addition, the following error handler is specific to the given codecs:
50
- # utf-8, utf-16, utf-32, utf-16-be, utf-16-le, utf-32-be, utf-32-le
49
+ # In addition, the following error handler is specific to the given codecs: utf-8, utf-16, utf-32, utf-16-be,
50
+ # utf-16-le, utf-32-be, utf-32-le
51
51
 
52
52
  # Allow encoding and decoding surrogate code point (U+D800 - U+DFFF) as normal code point. Otherwise these codecs
53
53
  # treat the presence of surrogate code point in str as an error.
@@ -0,0 +1,31 @@
1
+ import typing as ta
2
+
3
+ from .. import lang
4
+ from .codecs import make_bytes_object_codec
5
+ from .codecs import make_object_lazy_loaded_codec
6
+
7
+
8
+ if ta.TYPE_CHECKING:
9
+ import cloudpickle
10
+ else:
11
+ cloudpickle = lang.proxy_import('cloudpickle')
12
+
13
+
14
+ ##
15
+
16
+
17
+ def dump(obj: ta.Any) -> bytes:
18
+ return cloudpickle.dumps(obj)
19
+
20
+
21
+ def load(s: bytes) -> ta.Any:
22
+ return cloudpickle.loads(s)
23
+
24
+
25
+ ##
26
+
27
+
28
+ CLOUDPICKLE_CODEC = make_bytes_object_codec('cloudpickle', dump, load)
29
+
30
+ # @omlish-manifest
31
+ _CLOUDPICKLE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'CLOUDPICKLE_CODEC', CLOUDPICKLE_CODEC)
@@ -14,19 +14,15 @@ JSON_CODEC = make_str_object_codec('json', dumps, loads)
14
14
  # @omlish-manifest
15
15
  _JSON_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_CODEC', JSON_CODEC)
16
16
 
17
-
18
17
  #
19
18
 
20
-
21
19
  JSON_COMPACT_CODEC = make_str_object_codec('json-compact', dumps_compact, loads)
22
20
 
23
21
  # @omlish-manifest
24
22
  _JSON_COMPACT_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'JSON_COMPACT_CODEC', JSON_COMPACT_CODEC)
25
23
 
26
-
27
24
  #
28
25
 
29
-
30
26
  JSON_PRETTY_CODEC = make_str_object_codec('json-pretty', dumps_pretty, loads)
31
27
 
32
28
  # @omlish-manifest
@@ -0,0 +1,4 @@
1
+ """
2
+ TODO:
3
+ - jsonl codec
4
+ """
omlish/formats/yaml.py CHANGED
@@ -257,3 +257,10 @@ YAML_CODEC = make_str_object_codec('yaml', dump, safe_load, aliases=['yml'])
257
257
 
258
258
  # @omlish-manifest
259
259
  _YAML_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_CODEC', YAML_CODEC)
260
+
261
+ #
262
+
263
+ YAML_UNSAFE_CODEC = make_str_object_codec('yaml-unsafe', dump, full_load)
264
+
265
+ # @omlish-manifest
266
+ _YAML_UNSAFE_LAZY_CODEC = make_object_lazy_loaded_codec(__name__, 'YAML_UNSAFE_CODEC', YAML_UNSAFE_CODEC)
omlish/funcs/pairs.py CHANGED
@@ -1,22 +1,4 @@
1
- """
2
- TODO:
3
- - objects
4
- - csv
5
- - csvloader
6
- - wrapped (wait for usecase)
7
- - streams / incremental
8
- - fileobj -> fileobj?
9
- - swap zstandard for zstd
10
-
11
- Compression choice:
12
- - lzma if-available minimal-space
13
- - lz4 if-available read-heavy
14
- - zstd if-available
15
- - bz2 read-heavy (but no parallel decompress)
16
- - gz
17
- """
18
1
  import abc
19
- import codecs
20
2
  import dataclasses as dc
21
3
  import typing as ta
22
4
 
@@ -24,40 +6,9 @@ from .. import lang
24
6
 
25
7
 
26
8
  if ta.TYPE_CHECKING:
27
- import bz2 as _bz2
28
- import gzip as _gzip
29
- import lzma as _lzma
30
- import pickle as _pickle
31
9
  import struct as _struct
32
- import tomllib as _tomllib
33
-
34
- import cbor2 as _cbor2
35
- import cloudpickle as _cloudpickle
36
- import json5 as _json5
37
- import lz4.frame as _lz4_frame
38
- import snappy as _snappy
39
- import yaml as _yaml
40
- import zstandard as _zstandard
41
-
42
- from ..formats import json as _json
43
-
44
10
  else:
45
- _bz2 = lang.proxy_import('bz2')
46
- _gzip = lang.proxy_import('gzip')
47
- _lzma = lang.proxy_import('lzma')
48
- _pickle = lang.proxy_import('pickle')
49
11
  _struct = lang.proxy_import('struct')
50
- _tomllib = lang.proxy_import('tomllib')
51
-
52
- _cbor2 = lang.proxy_import('cbor2')
53
- _cloudpickle = lang.proxy_import('cloudpickle')
54
- _json5 = lang.proxy_import('json5')
55
- _lz4_frame = lang.proxy_import('lz4.frame')
56
- _snappy = lang.proxy_import('snappy')
57
- _yaml = lang.proxy_import('yaml')
58
- _zstandard = lang.proxy_import('zstandard')
59
-
60
- _json = lang.proxy_import('..formats.json', __package__)
61
12
 
62
13
 
63
14
  ##
@@ -211,36 +162,6 @@ def compose(*ps):
211
162
  ##
212
163
 
213
164
 
214
- @dc.dataclass(frozen=True)
215
- class Text(FnPair[str, bytes]):
216
- ci: codecs.CodecInfo
217
- encode_errors: str = dc.field(default='strict', kw_only=True)
218
- decode_errors: str = dc.field(default='strict', kw_only=True)
219
-
220
- def forward(self, f: str) -> bytes:
221
- # Python ignores the returned length:
222
- # https://github.com/python/cpython/blob/7431c3799efbd06ed03ee70b64420f45e83b3667/Python/codecs.c#L424
223
- t, _ = self.ci.encode(f, self.encode_errors)
224
- return t
225
-
226
- def backward(self, t: bytes) -> str:
227
- f, _ = self.ci.decode(t, self.decode_errors)
228
- return f
229
-
230
-
231
- def text(name: str, *, encode_errors: str = 'strict', decode_errors: str = 'strict') -> Text:
232
- ci = codecs.lookup(name)
233
- if not ci._is_text_encoding: # noqa
234
- raise TypeError(f'must be text codec: {name}')
235
- return Text(ci, encode_errors=encode_errors, decode_errors=decode_errors)
236
-
237
-
238
- UTF8 = text('utf-8')
239
-
240
-
241
- #
242
-
243
-
244
165
  @dc.dataclass(frozen=True)
245
166
  class Optional(FnPair[F | None, T | None]):
246
167
  fp: FnPair[F, T]
@@ -263,99 +184,6 @@ class Lines(FnPair[ta.Sequence[str], str]):
263
184
  ##
264
185
 
265
186
 
266
- _EXTENSION_REGISTRY: dict[str, type[FnPair]] = {}
267
-
268
-
269
- def _register_extension(*ss):
270
- def inner(cls):
271
- for s in ss:
272
- if s in _EXTENSION_REGISTRY:
273
- raise KeyError(s)
274
- _EXTENSION_REGISTRY[s] = cls
275
- return cls
276
- return inner
277
-
278
-
279
- def get_for_extension(ext: str) -> FnPair:
280
- return compose(*[_EXTENSION_REGISTRY[p]() for p in ext.split('.')])
281
-
282
-
283
- ##
284
-
285
-
286
- class Compression(FnPair[bytes, bytes], abc.ABC):
287
- pass
288
-
289
-
290
- @_register_extension('bz2')
291
- @dc.dataclass(frozen=True)
292
- class Bz2(Compression):
293
- compresslevel: int = 9
294
-
295
- def forward(self, f: bytes) -> bytes:
296
- return _bz2.compress(f, compresslevel=self.compresslevel)
297
-
298
- def backward(self, t: bytes) -> bytes:
299
- return _bz2.decompress(t)
300
-
301
-
302
- @_register_extension('gz')
303
- @dc.dataclass(frozen=True)
304
- class Gzip(Compression):
305
- compresslevel: int = 9
306
-
307
- def forward(self, f: bytes) -> bytes:
308
- return _gzip.compress(f, compresslevel=self.compresslevel)
309
-
310
- def backward(self, t: bytes) -> bytes:
311
- return _gzip.decompress(t)
312
-
313
-
314
- @_register_extension('lzma')
315
- class Lzma(Compression):
316
- def forward(self, f: bytes) -> bytes:
317
- return _lzma.compress(f)
318
-
319
- def backward(self, t: bytes) -> bytes:
320
- return _lzma.decompress(t)
321
-
322
-
323
- #
324
-
325
-
326
- @_register_extension('lz4')
327
- @dc.dataclass(frozen=True)
328
- class Lz4(Compression):
329
- compression_level: int = 0
330
-
331
- def forward(self, f: bytes) -> bytes:
332
- return _lz4_frame.compress(f, compression_level=self.compression_level)
333
-
334
- def backward(self, t: bytes) -> bytes:
335
- return _lz4_frame.decompress(t)
336
-
337
-
338
- @_register_extension('snappy')
339
- class Snappy(Compression):
340
- def forward(self, f: bytes) -> bytes:
341
- return _snappy.compress(f)
342
-
343
- def backward(self, t: bytes) -> bytes:
344
- return _snappy.decompress(t)
345
-
346
-
347
- @_register_extension('zstd')
348
- class Zstd(Compression):
349
- def forward(self, f: bytes) -> bytes:
350
- return _zstandard.compress(f)
351
-
352
- def backward(self, t: bytes) -> bytes:
353
- return _zstandard.decompress(t)
354
-
355
-
356
- ##
357
-
358
-
359
187
  @dc.dataclass(frozen=True)
360
188
  class Struct(FnPair[tuple, bytes]):
361
189
  fmt: str
@@ -385,112 +213,3 @@ class ObjectStr_(Object_[str], lang.Abstract): # noqa
385
213
 
386
214
  class ObjectBytes_(Object_[bytes], lang.Abstract): # noqa
387
215
  pass
388
-
389
-
390
- #
391
-
392
-
393
- @_register_extension('pkl')
394
- @dc.dataclass(frozen=True)
395
- class Pickle(ObjectBytes_):
396
- protocol: int | None = None
397
-
398
- def forward(self, f: ta.Any) -> bytes:
399
- return _pickle.dumps(f, protocol=self.protocol)
400
-
401
- def backward(self, t: bytes) -> ta.Any:
402
- return _pickle.loads(t)
403
-
404
-
405
- class _Json(ObjectStr_, lang.Abstract): # noqa
406
- def backward(self, t: str) -> ta.Any:
407
- return _json.loads(t)
408
-
409
-
410
- @_register_extension('json')
411
- class Json(_Json):
412
- def forward(self, f: ta.Any) -> str:
413
- return _json.dumps(f)
414
-
415
-
416
- class JsonPretty(_Json):
417
- def forward(self, f: ta.Any) -> str:
418
- return _json.dumps_pretty(f)
419
-
420
-
421
- class JsonCompact(_Json):
422
- def forward(self, f: ta.Any) -> str:
423
- return _json.dumps_compact(f)
424
-
425
-
426
- JSON = Json()
427
- PRETTY_JSON = JsonPretty()
428
- COMPACT_JSON = JsonCompact()
429
-
430
-
431
- @_register_extension('jsonl')
432
- class JsonLines(FnPair[ta.Sequence[ta.Any], str]):
433
- def forward(self, f: ta.Sequence[ta.Any]) -> str:
434
- return '\n'.join(_json.dumps(e) for e in f)
435
-
436
- def backward(self, t: str) -> ta.Sequence[ta.Any]:
437
- return [_json.loads(l) for l in t.splitlines()]
438
-
439
-
440
- @_register_extension('toml')
441
- class Toml(ObjectStr_):
442
- def forward(self, f: ta.Any) -> str:
443
- raise NotImplementedError
444
-
445
- def backward(self, t: str) -> ta.Any:
446
- return _tomllib.loads(t)
447
-
448
-
449
- #
450
-
451
-
452
- @_register_extension('cbor')
453
- class Cbor(ObjectBytes_):
454
- def forward(self, f: ta.Any) -> bytes:
455
- return _cbor2.dumps(f)
456
-
457
- def backward(self, t: bytes) -> ta.Any:
458
- return _cbor2.loads(t)
459
-
460
-
461
- @_register_extension('clpkl')
462
- @dc.dataclass(frozen=True)
463
- class Cloudpickle(ObjectBytes_):
464
- protocol: int | None = None
465
-
466
- def forward(self, f: ta.Any) -> bytes:
467
- return _cloudpickle.dumps(f, protocol=self.protocol)
468
-
469
- def backward(self, t: bytes) -> ta.Any:
470
- return _cloudpickle.loads(t)
471
-
472
-
473
- @_register_extension('json5')
474
- class Json5(ObjectStr_):
475
- def forward(self, f: ta.Any) -> str:
476
- return _json5.dumps(f)
477
-
478
- def backward(self, t: str) -> ta.Any:
479
- return _json5.loads(t)
480
-
481
-
482
- @_register_extension('yml', 'yaml')
483
- class Yaml(ObjectStr_):
484
- def forward(self, f: ta.Any) -> str:
485
- return _yaml.dump(f)
486
-
487
- def backward(self, t: str) -> ta.Any:
488
- return _yaml.safe_load(t)
489
-
490
-
491
- class YamlUnsafe(ObjectStr_):
492
- def forward(self, f: ta.Any) -> str:
493
- return _yaml.dump(f)
494
-
495
- def backward(self, t: str) -> ta.Any:
496
- return _yaml.load(t, _yaml.FullLoader)
@@ -2,7 +2,9 @@ import dataclasses as dc
2
2
  import typing as ta
3
3
 
4
4
  from ... import codecs
5
+ from ..generators import buffer_bytes_stepped_reader_generator
5
6
  from .base import Compression
7
+ from .base import IncrementalCompression
6
8
 
7
9
 
8
10
  ##
@@ -22,6 +24,20 @@ class CompressionEagerCodec(codecs.EagerCodec[bytes, bytes]):
22
24
  ##
23
25
 
24
26
 
27
+ @dc.dataclass(frozen=True)
28
+ class CompressionIncrementalCodec(codecs.IncrementalCodec[bytes, bytes]):
29
+ compression: IncrementalCompression
30
+
31
+ def encode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
32
+ return self.compression.compress_incremental()
33
+
34
+ def decode_incremental(self) -> ta.Generator[bytes | None, bytes, None]:
35
+ return buffer_bytes_stepped_reader_generator(self.compression.decompress_incremental())
36
+
37
+
38
+ ##
39
+
40
+
25
41
  class CompressionCodec(codecs.Codec):
26
42
  pass
27
43
 
@@ -40,6 +56,10 @@ def make_compression_codec(
40
56
  output=bytes,
41
57
 
42
58
  new=lambda *args, **kwargs: CompressionEagerCodec(cls(*args, **kwargs)),
59
+
60
+ new_incremental=(
61
+ lambda *args, **kwargs: CompressionIncrementalCodec(cls(*args, **kwargs)) # noqa
62
+ ) if issubclass(cls, IncrementalCompression) else None,
43
63
  )
44
64
 
45
65
 
@@ -50,4 +50,7 @@ from .stepped import ( # noqa
50
50
  joined_str_stepped_generator,
51
51
 
52
52
  read_into_bytes_stepped_generator,
53
+ read_into_str_stepped_generator,
54
+
55
+ buffer_bytes_stepped_reader_generator,
53
56
  )
@@ -151,18 +151,32 @@ def read_into_str_stepped_generator(
151
151
  def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> BytesSteppedGenerator:
152
152
  o = g.send(None)
153
153
  buf: ta.Any = None
154
+ eof = False
154
155
 
155
156
  while True:
157
+ if eof:
158
+ raise EOFError
159
+
156
160
  if not buf:
157
161
  buf = check.isinstance((yield None), bytes)
162
+ if not buf:
163
+ eof = True
158
164
 
159
- if o is None or not buf:
165
+ if o is None:
160
166
  i = buf
167
+ buf = None
168
+
161
169
  elif isinstance(o, int):
162
- if len(buf) < o:
163
- raise NotImplementedError
170
+ while len(buf) < o:
171
+ more = check.isinstance((yield None), bytes)
172
+ if not more:
173
+ raise EOFError
174
+ # FIXME: lol - share guts with readers
175
+ buf += more
176
+
164
177
  i = buf[:o]
165
178
  buf = buf[o:]
179
+
166
180
  else:
167
181
  raise TypeError(o)
168
182
 
@@ -171,5 +185,7 @@ def buffer_bytes_stepped_reader_generator(g: BytesSteppedReaderGenerator) -> Byt
171
185
  i = None
172
186
  if isinstance(o, bytes):
173
187
  check.none((yield o))
188
+ if not o:
189
+ return
174
190
  else:
175
191
  break
@@ -0,0 +1,24 @@
1
+ from .iterators import ( # noqa
2
+ PeekIterator,
3
+ PrefetchIterator,
4
+ ProxyIterator,
5
+ RetainIterator,
6
+ )
7
+
8
+ from .recipes import ( # noqa
9
+ sliding_window,
10
+ )
11
+
12
+ from .tools import ( # noqa
13
+ chunk,
14
+ expand_indexed_pairs,
15
+ merge_on,
16
+ take,
17
+ unzip,
18
+ )
19
+
20
+ from .unique import ( # noqa
21
+ UniqueItem,
22
+ UniqueIterator,
23
+ UniqueStats,
24
+ )