omlish 0.0.0.dev80__py3-none-any.whl → 0.0.0.dev82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. omlish/__about__.py +4 -4
  2. omlish/dataclasses/impl/__init__.py +8 -0
  3. omlish/dataclasses/impl/params.py +3 -0
  4. omlish/dataclasses/impl/slots.py +61 -7
  5. omlish/formats/json/__init__.py +8 -1
  6. omlish/formats/json/backends/__init__.py +7 -0
  7. omlish/formats/json/backends/base.py +38 -0
  8. omlish/formats/json/backends/default.py +10 -0
  9. omlish/formats/json/backends/jiter.py +25 -0
  10. omlish/formats/json/backends/orjson.py +46 -2
  11. omlish/formats/json/backends/std.py +39 -0
  12. omlish/formats/json/backends/ujson.py +49 -0
  13. omlish/formats/json/cli.py +125 -31
  14. omlish/formats/json/consts.py +22 -0
  15. omlish/formats/json/encoding.py +17 -0
  16. omlish/formats/json/json.py +9 -39
  17. omlish/formats/json/render.py +49 -24
  18. omlish/formats/json/stream/__init__.py +0 -0
  19. omlish/formats/json/stream/build.py +113 -0
  20. omlish/formats/json/stream/lex.py +285 -0
  21. omlish/formats/json/stream/parse.py +244 -0
  22. omlish/formats/json/stream/render.py +119 -0
  23. omlish/genmachine.py +56 -10
  24. omlish/lang/resources.py +6 -1
  25. omlish/marshal/base.py +2 -0
  26. omlish/marshal/newtypes.py +24 -0
  27. omlish/marshal/standard.py +4 -0
  28. omlish/reflect/__init__.py +1 -0
  29. omlish/reflect/types.py +6 -1
  30. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/METADATA +5 -5
  31. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/RECORD +35 -24
  32. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/LICENSE +0 -0
  33. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/WHEEL +0 -0
  34. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/entry_points.txt +0 -0
  35. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,17 @@
1
+ import json
2
+
3
+
4
+ detect_encoding = json.detect_encoding
5
+
6
+
7
+ def decodes(s: str | bytes | bytearray) -> str:
8
+ if isinstance(s, str):
9
+ if s.startswith('\ufeff'):
10
+ raise json.JSONDecodeError('Unexpected UTF-8 BOM (decode using utf-8-sig)', s, 0)
11
+ return s
12
+
13
+ elif isinstance(s, (bytes, bytearray)):
14
+ return s.decode(detect_encoding(s), 'surrogatepass')
15
+
16
+ else:
17
+ raise TypeError(f'the JSON object must be str, bytes or bytearray, not {s.__class__.__name__}')
@@ -1,47 +1,17 @@
1
- """
2
- TODO:
3
- - backend abstr
4
- - streaming
5
- """
6
- import functools
7
- import json as _json
8
- import typing as ta
1
+ from .backends import DEFAULT_BACKED
9
2
 
10
3
 
11
4
  ##
12
5
 
13
6
 
14
- dump = _json.dump
15
- dumps = _json.dumps
7
+ dump = DEFAULT_BACKED.dump
8
+ dumps = DEFAULT_BACKED.dumps
16
9
 
17
- detect_encoding = _json.detect_encoding
10
+ load = DEFAULT_BACKED.load
11
+ loads = DEFAULT_BACKED.loads
18
12
 
19
- load = _json.load
20
- loads = _json.loads
13
+ dump_pretty = DEFAULT_BACKED.dump_pretty
14
+ dumps_pretty = DEFAULT_BACKED.dumps_pretty
21
15
 
22
-
23
- ##
24
-
25
-
26
- PRETTY_INDENT = 2
27
-
28
- PRETTY_KWARGS: ta.Mapping[str, ta.Any] = dict(
29
- indent=PRETTY_INDENT,
30
- )
31
-
32
- dump_pretty: ta.Callable[..., bytes] = functools.partial(dump, **PRETTY_KWARGS) # type: ignore
33
- dumps_pretty: ta.Callable[..., str] = functools.partial(dumps, **PRETTY_KWARGS)
34
-
35
-
36
- ##
37
-
38
-
39
- COMPACT_SEPARATORS = (',', ':')
40
-
41
- COMPACT_KWARGS: ta.Mapping[str, ta.Any] = dict(
42
- indent=None,
43
- separators=COMPACT_SEPARATORS,
44
- )
45
-
46
- dump_compact: ta.Callable[..., bytes] = functools.partial(dump, **COMPACT_KWARGS) # type: ignore
47
- dumps_compact: ta.Callable[..., str] = functools.partial(dumps, **COMPACT_KWARGS)
16
+ dump_compact = DEFAULT_BACKED.dump_compact
17
+ dumps_compact = DEFAULT_BACKED.dumps_compact
@@ -1,40 +1,55 @@
1
+ import abc
2
+ import dataclasses as dc
1
3
  import enum
2
4
  import io
3
5
  import json
4
6
  import typing as ta
5
7
 
8
+ from ... import lang
6
9
 
7
- class JsonRenderer:
10
+
11
+ I = ta.TypeVar('I')
12
+
13
+
14
+ class JsonRendererOut(ta.Protocol):
15
+ def write(self, s: str) -> ta.Any: ...
16
+
17
+
18
+ class AbstractJsonRenderer(lang.Abstract, ta.Generic[I]):
8
19
  class State(enum.Enum):
9
20
  VALUE = enum.auto()
10
21
  KEY = enum.auto()
11
22
 
23
+ @dc.dataclass(frozen=True, kw_only=True)
24
+ class Options:
25
+ indent: int | str | None = None
26
+ separators: tuple[str, str] | None = None
27
+ sort_keys: bool = False
28
+ style: ta.Callable[[ta.Any, 'AbstractJsonRenderer.State'], tuple[str, str]] | None = None
29
+
12
30
  def __init__(
13
31
  self,
14
- out: ta.TextIO,
15
- *,
16
- indent: int | str | None = None,
17
- separators: tuple[str, str] | None = None,
18
- sort_keys: bool = False,
19
- style: ta.Callable[[ta.Any, State], tuple[str, str]] | None = None,
32
+ out: JsonRendererOut,
33
+ opts: Options = Options(),
20
34
  ) -> None:
21
35
  super().__init__()
22
36
 
23
37
  self._out = out
24
- if isinstance(indent, (str, int)):
25
- self._indent = (' ' * indent) if isinstance(indent, int) else indent
38
+ self._opts = opts
39
+
40
+ separators = opts.separators
41
+ if isinstance(opts.indent, (str, int)):
42
+ self._indent = (' ' * opts.indent) if isinstance(opts.indent, int) else opts.indent
26
43
  self._endl = '\n'
27
44
  if separators is None:
28
45
  separators = (',', ': ')
29
- elif indent is None:
46
+ elif opts.indent is None:
30
47
  self._indent = self._endl = ''
31
48
  if separators is None:
32
49
  separators = (', ', ': ')
33
50
  else:
34
- raise TypeError(indent)
51
+ raise TypeError(opts.indent)
35
52
  self._comma, self._colon = separators
36
- self._sort_keys = sort_keys
37
- self._style = style
38
53
 
39
54
  self._level = 0
40
55
 
@@ -54,9 +69,25 @@ class JsonRenderer:
54
69
  if self._level:
55
70
  self._write(self._indent * self._level)
56
71
 
57
- def _render(self, o: ta.Any, state: State = State.VALUE) -> None:
58
- if self._style is not None:
59
- pre, post = self._style(o, state)
72
+ @abc.abstractmethod
73
+ def render(self, i: I) -> None:
74
+ raise NotImplementedError
75
+
76
+ @classmethod
77
+ def render_str(cls, i: I, **kwargs: ta.Any) -> str:
78
+ out = io.StringIO()
79
+ cls(out, cls.Options(**kwargs)).render(i)
80
+ return out.getvalue()
81
+
82
+
83
+ class JsonRenderer(AbstractJsonRenderer[ta.Any]):
84
+ def _render(
85
+ self,
86
+ o: ta.Any,
87
+ state: AbstractJsonRenderer.State = AbstractJsonRenderer.State.VALUE,
88
+ ) -> None:
89
+ if self._opts.style is not None:
90
+ pre, post = self._opts.style(o, state)
60
91
  self._write(pre)
61
92
  else:
62
93
  post = None
@@ -71,13 +102,13 @@ class JsonRenderer:
71
102
  self._write('{')
72
103
  self._level += 1
73
104
  items = list(o.items())
74
- if self._sort_keys:
105
+ if self._opts.sort_keys:
75
106
  items.sort(key=lambda t: t[0])
76
107
  for i, (k, v) in enumerate(items):
77
108
  if i:
78
109
  self._write(self._comma)
79
110
  self._write_indent()
80
- self._render(k, JsonRenderer.State.KEY)
111
+ self._render(k, AbstractJsonRenderer.State.KEY)
81
112
  self._write(self._colon)
82
113
  self._render(v)
83
114
  self._level -= 1
@@ -106,9 +137,3 @@ class JsonRenderer:
106
137
 
107
138
  def render(self, o: ta.Any) -> None:
108
139
  self._render(o)
109
-
110
- @classmethod
111
- def render_str(cls, o: ta.Any, **kwargs: ta.Any) -> str:
112
- out = io.StringIO()
113
- cls(out, **kwargs).render(o)
114
- return out.getvalue()
File without changes
@@ -0,0 +1,113 @@
1
+ import typing as ta
2
+
3
+ from ....genmachine import GenMachine
4
+ from .lex import SCALAR_VALUE_TYPES
5
+ from .parse import BeginArray
6
+ from .parse import BeginObject
7
+ from .parse import EndArray
8
+ from .parse import EndObject
9
+ from .parse import JsonStreamObject
10
+ from .parse import JsonStreamParserEvent
11
+ from .parse import Key
12
+
13
+
14
+ ##
15
+
16
+
17
+ class JsonObjectBuilder(GenMachine[JsonStreamParserEvent, ta.Any]):
18
+ def __init__(
19
+ self,
20
+ *,
21
+ yield_object_lists: bool = False,
22
+ ) -> None:
23
+ self._stack: list[JsonStreamObject | list | Key] = []
24
+ self._yield_object_lists = yield_object_lists
25
+
26
+ super().__init__(self._do())
27
+
28
+ def _do(self):
29
+ stk = self._stack
30
+
31
+ def emit_value(v):
32
+ if not stk:
33
+ return (v,)
34
+
35
+ tv = stk[-1]
36
+ if isinstance(tv, Key):
37
+ stk.pop()
38
+ if not stk:
39
+ raise self.StateError
40
+
41
+ tv2 = stk[-1]
42
+ if not isinstance(tv2, JsonStreamObject):
43
+ raise self.StateError
44
+
45
+ tv2.append((tv.key, v))
46
+ return ()
47
+
48
+ elif isinstance(tv, list):
49
+ tv.append(v)
50
+ return ()
51
+
52
+ else:
53
+ raise self.StateError
54
+
55
+ while True:
56
+ try:
57
+ e = yield None
58
+ except GeneratorExit:
59
+ if stk:
60
+ raise self.StateError from None
61
+ else:
62
+ raise
63
+
64
+ #
65
+
66
+ if isinstance(e, SCALAR_VALUE_TYPES):
67
+ if t := emit_value(e):
68
+ yield t
69
+ continue
70
+
71
+ #
72
+
73
+ elif e is BeginObject:
74
+ stk.append(JsonStreamObject())
75
+ continue
76
+
77
+ elif isinstance(e, Key):
78
+ if not stk or not isinstance(stk[-1], JsonStreamObject):
79
+ raise self.StateError
80
+
81
+ stk.append(e)
82
+ continue
83
+
84
+ elif e is EndObject:
85
+ tv: ta.Any
86
+ if not stk or not isinstance(tv := stk.pop(), JsonStreamObject):
87
+ raise self.StateError
88
+
89
+ if not self._yield_object_lists:
90
+ tv = dict(tv)
91
+
92
+ if t := emit_value(tv):
93
+ yield t
94
+ continue
95
+
96
+ #
97
+
98
+ elif e is BeginArray:
99
+ stk.append([])
100
+ continue
101
+
102
+ elif e is EndArray:
103
+ if not stk or not isinstance(tv := stk.pop(), list):
104
+ raise self.StateError
105
+
106
+ if t := emit_value(tv):
107
+ yield t
108
+ continue
109
+
110
+ #
111
+
112
+ else:
113
+ raise TypeError(e)
@@ -0,0 +1,285 @@
1
+ """
2
+ TODO:
3
+ - max buf size
4
+ - max recursion depth
5
+ - mark start pos of tokens, currently returning end
6
+ """
7
+ import dataclasses as dc
8
+ import io
9
+ import json
10
+ import re
11
+ import typing as ta
12
+
13
+ from .... import check
14
+ from ....genmachine import GenMachine
15
+
16
+
17
+ ##
18
+
19
+
20
+ ValueTokenKind: ta.TypeAlias = ta.Literal[
21
+ 'STRING',
22
+ 'NUMBER',
23
+
24
+ 'SPECIAL_NUMBER',
25
+ 'BOOLEAN',
26
+ 'NULL',
27
+ ]
28
+
29
+ VALUE_TOKEN_KINDS = frozenset(check.isinstance(a, str) for a in ta.get_args(ValueTokenKind))
30
+
31
+ ControlTokenKind: ta.TypeAlias = ta.Literal[
32
+ 'LBRACE',
33
+ 'RBRACE',
34
+ 'LBRACKET',
35
+ 'RBRACKET',
36
+ 'COMMA',
37
+ 'COLON',
38
+ ]
39
+
40
+ TokenKind: ta.TypeAlias = ValueTokenKind | ControlTokenKind
41
+
42
+ #
43
+
44
+ ScalarValue: ta.TypeAlias = str | float | int | None
45
+
46
+ SCALAR_VALUE_TYPES: tuple[type, ...] = tuple(
47
+ check.isinstance(e, type) if e is not None else type(None)
48
+ for e in ta.get_args(ScalarValue)
49
+ )
50
+
51
+
52
+ ##
53
+
54
+
55
+ class Token(ta.NamedTuple):
56
+ kind: TokenKind
57
+ value: ScalarValue
58
+ raw: str | None
59
+
60
+ ofs: int
61
+ line: int
62
+ col: int
63
+
64
+ def __iter__(self):
65
+ raise TypeError
66
+
67
+
68
+ NUMBER_PAT = re.compile(r'-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?')
69
+
70
+ CONTROL_TOKENS: ta.Mapping[str, TokenKind] = {
71
+ '{': 'LBRACE',
72
+ '}': 'RBRACE',
73
+ '[': 'LBRACKET',
74
+ ']': 'RBRACKET',
75
+ ',': 'COMMA',
76
+ ':': 'COLON',
77
+ }
78
+
79
+ CONST_TOKENS: ta.Mapping[str, tuple[TokenKind, str | float | None]] = {
80
+ 'NaN': ('SPECIAL_NUMBER', float('nan')),
81
+ 'Infinity': ('SPECIAL_NUMBER', float('inf')),
82
+ '-Infinity': ('SPECIAL_NUMBER', float('-inf')),
83
+
84
+ 'true': ('BOOLEAN', True),
85
+ 'false': ('BOOLEAN', False),
86
+ 'null': ('NULL', None),
87
+ }
88
+
89
+
90
+ ##
91
+
92
+
93
+ @dc.dataclass(frozen=True)
94
+ class JsonLexError(Exception):
95
+ message: str
96
+
97
+ ofs: int
98
+ line: int
99
+ col: int
100
+
101
+
102
+ class JsonStreamLexer(GenMachine[str, Token]):
103
+ def __init__(
104
+ self,
105
+ *,
106
+ include_raw: bool = False,
107
+ ) -> None:
108
+ self._include_raw = include_raw
109
+
110
+ self._ofs = 0
111
+ self._line = 0
112
+ self._col = 0
113
+
114
+ self._buf = io.StringIO()
115
+
116
+ super().__init__(self._do_main())
117
+
118
+ def _char_in(self, c: str) -> str:
119
+ if c and len(c) != 1:
120
+ raise ValueError(c)
121
+
122
+ self._ofs += 1
123
+
124
+ if c == '\n':
125
+ self._line += 1
126
+ self._col = 0
127
+ else:
128
+ self._col += 1
129
+
130
+ return c
131
+
132
+ def _make_tok(
133
+ self,
134
+ kind: TokenKind,
135
+ value: ScalarValue,
136
+ raw: str,
137
+ ) -> ta.Sequence[Token]:
138
+ tok = Token(
139
+ kind,
140
+ value,
141
+ raw if self._include_raw else None,
142
+ self._ofs,
143
+ self._line,
144
+ self._col,
145
+ )
146
+ return (tok,)
147
+
148
+ def _flip_buf(self) -> str:
149
+ raw = self._buf.getvalue()
150
+ self._buf.seek(0)
151
+ self._buf.truncate()
152
+ return raw
153
+
154
+ def _raise(self, msg: str) -> ta.NoReturn:
155
+ raise JsonLexError(msg, self._ofs, self._line, self._col)
156
+
157
+ def _do_main(self):
158
+ while True:
159
+ c = self._char_in((yield None)) # noqa
160
+
161
+ if not c:
162
+ return None
163
+
164
+ if c.isspace():
165
+ continue
166
+
167
+ if c in CONTROL_TOKENS:
168
+ yield self._make_tok(CONTROL_TOKENS[c], c, c)
169
+ continue
170
+
171
+ if c == '"':
172
+ return self._do_string()
173
+
174
+ if c.isdigit() or c == '-':
175
+ return self._do_number(c)
176
+
177
+ if c in 'tfnIN':
178
+ return self._do_const(c)
179
+
180
+ self._raise(f'Unexpected character: {c}')
181
+
182
+ def _do_string(self):
183
+ self._buf.write('"')
184
+
185
+ last = None
186
+ while True:
187
+ try:
188
+ c = self._char_in((yield None)) # noqa
189
+ except GeneratorExit:
190
+ self._raise('Unexpected end of input')
191
+
192
+ if not c:
193
+ raise NotImplementedError
194
+
195
+ self._buf.write(c)
196
+ if c == '"' and last != '\\':
197
+ break
198
+ last = c
199
+
200
+ raw = self._flip_buf()
201
+ sv = json.loads(raw)
202
+ yield self._make_tok('STRING', sv, raw)
203
+
204
+ return self._do_main()
205
+
206
+ def _do_number(self, c: str):
207
+ self._buf.write(c)
208
+
209
+ while True:
210
+ try:
211
+ c = self._char_in((yield None)) # noqa
212
+ except GeneratorExit:
213
+ self._raise('Unexpected end of input')
214
+
215
+ if not c:
216
+ break
217
+
218
+ if not (c.isdigit() or c in '.eE+-'):
219
+ break
220
+ self._buf.write(c)
221
+
222
+ raw = self._flip_buf()
223
+
224
+ #
225
+
226
+ if not NUMBER_PAT.fullmatch(raw):
227
+ # Can only be -Infinity
228
+
229
+ if not c:
230
+ self._raise('Unexpected end of input')
231
+
232
+ raw += c
233
+ try:
234
+ for _ in range(7):
235
+ raw += self._char_in((yield None)) # noqa
236
+ except GeneratorExit:
237
+ self._raise('Unexpected end of input')
238
+
239
+ if raw != '-Infinity':
240
+ self._raise(f'Invalid number format: {raw}')
241
+
242
+ tk, tv = CONST_TOKENS[raw]
243
+ yield self._make_tok(tk, tv, raw)
244
+
245
+ return self._do_main()
246
+
247
+ #
248
+
249
+ if '.' in raw or 'e' in raw or 'E' in raw:
250
+ nv = float(raw)
251
+ else:
252
+ nv = int(raw)
253
+ yield self._make_tok('NUMBER', nv, raw)
254
+
255
+ #
256
+
257
+ if not c:
258
+ return None
259
+
260
+ if c in CONTROL_TOKENS:
261
+ yield self._make_tok(CONTROL_TOKENS[c], c, c)
262
+
263
+ elif not c.isspace():
264
+ self._raise(f'Unexpected character after number: {c}')
265
+
266
+ return self._do_main()
267
+
268
+ def _do_const(self, c: str):
269
+ raw = c
270
+ while True:
271
+ try:
272
+ raw += self._char_in((yield None)) # noqa
273
+ except GeneratorExit:
274
+ self._raise('Unexpected end of input')
275
+
276
+ if raw in CONST_TOKENS:
277
+ break
278
+
279
+ if len(raw) > 8: # None of the keywords are longer than 8 characters
280
+ self._raise(f'Invalid literal: {raw}')
281
+
282
+ tk, tv = CONST_TOKENS[raw]
283
+ yield self._make_tok(tk, tv, raw)
284
+
285
+ return self._do_main()