omlish 0.0.0.dev80__py3-none-any.whl → 0.0.0.dev82__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. omlish/__about__.py +4 -4
  2. omlish/dataclasses/impl/__init__.py +8 -0
  3. omlish/dataclasses/impl/params.py +3 -0
  4. omlish/dataclasses/impl/slots.py +61 -7
  5. omlish/formats/json/__init__.py +8 -1
  6. omlish/formats/json/backends/__init__.py +7 -0
  7. omlish/formats/json/backends/base.py +38 -0
  8. omlish/formats/json/backends/default.py +10 -0
  9. omlish/formats/json/backends/jiter.py +25 -0
  10. omlish/formats/json/backends/orjson.py +46 -2
  11. omlish/formats/json/backends/std.py +39 -0
  12. omlish/formats/json/backends/ujson.py +49 -0
  13. omlish/formats/json/cli.py +125 -31
  14. omlish/formats/json/consts.py +22 -0
  15. omlish/formats/json/encoding.py +17 -0
  16. omlish/formats/json/json.py +9 -39
  17. omlish/formats/json/render.py +49 -24
  18. omlish/formats/json/stream/__init__.py +0 -0
  19. omlish/formats/json/stream/build.py +113 -0
  20. omlish/formats/json/stream/lex.py +285 -0
  21. omlish/formats/json/stream/parse.py +244 -0
  22. omlish/formats/json/stream/render.py +119 -0
  23. omlish/genmachine.py +56 -10
  24. omlish/lang/resources.py +6 -1
  25. omlish/marshal/base.py +2 -0
  26. omlish/marshal/newtypes.py +24 -0
  27. omlish/marshal/standard.py +4 -0
  28. omlish/reflect/__init__.py +1 -0
  29. omlish/reflect/types.py +6 -1
  30. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/METADATA +5 -5
  31. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/RECORD +35 -24
  32. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/LICENSE +0 -0
  33. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/WHEEL +0 -0
  34. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/entry_points.txt +0 -0
  35. {omlish-0.0.0.dev80.dist-info → omlish-0.0.0.dev82.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,17 @@
1
+ import json
2
+
3
+
4
+ detect_encoding = json.detect_encoding
5
+
6
+
7
+ def decodes(s: str | bytes | bytearray) -> str:
8
+ if isinstance(s, str):
9
+ if s.startswith('\ufeff'):
10
+ raise json.JSONDecodeError('Unexpected UTF-8 BOM (decode using utf-8-sig)', s, 0)
11
+ return s
12
+
13
+ elif isinstance(s, (bytes, bytearray)):
14
+ return s.decode(detect_encoding(s), 'surrogatepass')
15
+
16
+ else:
17
+ raise TypeError(f'the JSON object must be str, bytes or bytearray, not {s.__class__.__name__}')
@@ -1,47 +1,17 @@
1
- """
2
- TODO:
3
- - backend abstr
4
- - streaming
5
- """
6
- import functools
7
- import json as _json
8
- import typing as ta
1
+ from .backends import DEFAULT_BACKED
9
2
 
10
3
 
11
4
  ##
12
5
 
13
6
 
14
- dump = _json.dump
15
- dumps = _json.dumps
7
+ dump = DEFAULT_BACKED.dump
8
+ dumps = DEFAULT_BACKED.dumps
16
9
 
17
- detect_encoding = _json.detect_encoding
10
+ load = DEFAULT_BACKED.load
11
+ loads = DEFAULT_BACKED.loads
18
12
 
19
- load = _json.load
20
- loads = _json.loads
13
+ dump_pretty = DEFAULT_BACKED.dump_pretty
14
+ dumps_pretty = DEFAULT_BACKED.dumps_pretty
21
15
 
22
-
23
- ##
24
-
25
-
26
- PRETTY_INDENT = 2
27
-
28
- PRETTY_KWARGS: ta.Mapping[str, ta.Any] = dict(
29
- indent=PRETTY_INDENT,
30
- )
31
-
32
- dump_pretty: ta.Callable[..., bytes] = functools.partial(dump, **PRETTY_KWARGS) # type: ignore
33
- dumps_pretty: ta.Callable[..., str] = functools.partial(dumps, **PRETTY_KWARGS)
34
-
35
-
36
- ##
37
-
38
-
39
- COMPACT_SEPARATORS = (',', ':')
40
-
41
- COMPACT_KWARGS: ta.Mapping[str, ta.Any] = dict(
42
- indent=None,
43
- separators=COMPACT_SEPARATORS,
44
- )
45
-
46
- dump_compact: ta.Callable[..., bytes] = functools.partial(dump, **COMPACT_KWARGS) # type: ignore
47
- dumps_compact: ta.Callable[..., str] = functools.partial(dumps, **COMPACT_KWARGS)
16
+ dump_compact = DEFAULT_BACKED.dump_compact
17
+ dumps_compact = DEFAULT_BACKED.dumps_compact
@@ -1,40 +1,55 @@
1
+ import abc
2
+ import dataclasses as dc
1
3
  import enum
2
4
  import io
3
5
  import json
4
6
  import typing as ta
5
7
 
8
+ from ... import lang
6
9
 
7
- class JsonRenderer:
10
+
11
+ I = ta.TypeVar('I')
12
+
13
+
14
+ class JsonRendererOut(ta.Protocol):
15
+ def write(self, s: str) -> ta.Any: ...
16
+
17
+
18
+ class AbstractJsonRenderer(lang.Abstract, ta.Generic[I]):
8
19
  class State(enum.Enum):
9
20
  VALUE = enum.auto()
10
21
  KEY = enum.auto()
11
22
 
23
+ @dc.dataclass(frozen=True, kw_only=True)
24
+ class Options:
25
+ indent: int | str | None = None
26
+ separators: tuple[str, str] | None = None
27
+ sort_keys: bool = False
28
+ style: ta.Callable[[ta.Any, 'AbstractJsonRenderer.State'], tuple[str, str]] | None = None
29
+
12
30
  def __init__(
13
31
  self,
14
- out: ta.TextIO,
15
- *,
16
- indent: int | str | None = None,
17
- separators: tuple[str, str] | None = None,
18
- sort_keys: bool = False,
19
- style: ta.Callable[[ta.Any, State], tuple[str, str]] | None = None,
32
+ out: JsonRendererOut,
33
+ opts: Options = Options(),
20
34
  ) -> None:
21
35
  super().__init__()
22
36
 
23
37
  self._out = out
24
- if isinstance(indent, (str, int)):
25
- self._indent = (' ' * indent) if isinstance(indent, int) else indent
38
+ self._opts = opts
39
+
40
+ separators = opts.separators
41
+ if isinstance(opts.indent, (str, int)):
42
+ self._indent = (' ' * opts.indent) if isinstance(opts.indent, int) else opts.indent
26
43
  self._endl = '\n'
27
44
  if separators is None:
28
45
  separators = (',', ': ')
29
- elif indent is None:
46
+ elif opts.indent is None:
30
47
  self._indent = self._endl = ''
31
48
  if separators is None:
32
49
  separators = (', ', ': ')
33
50
  else:
34
- raise TypeError(indent)
51
+ raise TypeError(opts.indent)
35
52
  self._comma, self._colon = separators
36
- self._sort_keys = sort_keys
37
- self._style = style
38
53
 
39
54
  self._level = 0
40
55
 
@@ -54,9 +69,25 @@ class JsonRenderer:
54
69
  if self._level:
55
70
  self._write(self._indent * self._level)
56
71
 
57
- def _render(self, o: ta.Any, state: State = State.VALUE) -> None:
58
- if self._style is not None:
59
- pre, post = self._style(o, state)
72
+ @abc.abstractmethod
73
+ def render(self, i: I) -> None:
74
+ raise NotImplementedError
75
+
76
+ @classmethod
77
+ def render_str(cls, i: I, **kwargs: ta.Any) -> str:
78
+ out = io.StringIO()
79
+ cls(out, cls.Options(**kwargs)).render(i)
80
+ return out.getvalue()
81
+
82
+
83
+ class JsonRenderer(AbstractJsonRenderer[ta.Any]):
84
+ def _render(
85
+ self,
86
+ o: ta.Any,
87
+ state: AbstractJsonRenderer.State = AbstractJsonRenderer.State.VALUE,
88
+ ) -> None:
89
+ if self._opts.style is not None:
90
+ pre, post = self._opts.style(o, state)
60
91
  self._write(pre)
61
92
  else:
62
93
  post = None
@@ -71,13 +102,13 @@ class JsonRenderer:
71
102
  self._write('{')
72
103
  self._level += 1
73
104
  items = list(o.items())
74
- if self._sort_keys:
105
+ if self._opts.sort_keys:
75
106
  items.sort(key=lambda t: t[0])
76
107
  for i, (k, v) in enumerate(items):
77
108
  if i:
78
109
  self._write(self._comma)
79
110
  self._write_indent()
80
- self._render(k, JsonRenderer.State.KEY)
111
+ self._render(k, AbstractJsonRenderer.State.KEY)
81
112
  self._write(self._colon)
82
113
  self._render(v)
83
114
  self._level -= 1
@@ -106,9 +137,3 @@ class JsonRenderer:
106
137
 
107
138
  def render(self, o: ta.Any) -> None:
108
139
  self._render(o)
109
-
110
- @classmethod
111
- def render_str(cls, o: ta.Any, **kwargs: ta.Any) -> str:
112
- out = io.StringIO()
113
- cls(out, **kwargs).render(o)
114
- return out.getvalue()
File without changes
@@ -0,0 +1,113 @@
1
+ import typing as ta
2
+
3
+ from ....genmachine import GenMachine
4
+ from .lex import SCALAR_VALUE_TYPES
5
+ from .parse import BeginArray
6
+ from .parse import BeginObject
7
+ from .parse import EndArray
8
+ from .parse import EndObject
9
+ from .parse import JsonStreamObject
10
+ from .parse import JsonStreamParserEvent
11
+ from .parse import Key
12
+
13
+
14
+ ##
15
+
16
+
17
+ class JsonObjectBuilder(GenMachine[JsonStreamParserEvent, ta.Any]):
18
+ def __init__(
19
+ self,
20
+ *,
21
+ yield_object_lists: bool = False,
22
+ ) -> None:
23
+ self._stack: list[JsonStreamObject | list | Key] = []
24
+ self._yield_object_lists = yield_object_lists
25
+
26
+ super().__init__(self._do())
27
+
28
+ def _do(self):
29
+ stk = self._stack
30
+
31
+ def emit_value(v):
32
+ if not stk:
33
+ return (v,)
34
+
35
+ tv = stk[-1]
36
+ if isinstance(tv, Key):
37
+ stk.pop()
38
+ if not stk:
39
+ raise self.StateError
40
+
41
+ tv2 = stk[-1]
42
+ if not isinstance(tv2, JsonStreamObject):
43
+ raise self.StateError
44
+
45
+ tv2.append((tv.key, v))
46
+ return ()
47
+
48
+ elif isinstance(tv, list):
49
+ tv.append(v)
50
+ return ()
51
+
52
+ else:
53
+ raise self.StateError
54
+
55
+ while True:
56
+ try:
57
+ e = yield None
58
+ except GeneratorExit:
59
+ if stk:
60
+ raise self.StateError from None
61
+ else:
62
+ raise
63
+
64
+ #
65
+
66
+ if isinstance(e, SCALAR_VALUE_TYPES):
67
+ if t := emit_value(e):
68
+ yield t
69
+ continue
70
+
71
+ #
72
+
73
+ elif e is BeginObject:
74
+ stk.append(JsonStreamObject())
75
+ continue
76
+
77
+ elif isinstance(e, Key):
78
+ if not stk or not isinstance(stk[-1], JsonStreamObject):
79
+ raise self.StateError
80
+
81
+ stk.append(e)
82
+ continue
83
+
84
+ elif e is EndObject:
85
+ tv: ta.Any
86
+ if not stk or not isinstance(tv := stk.pop(), JsonStreamObject):
87
+ raise self.StateError
88
+
89
+ if not self._yield_object_lists:
90
+ tv = dict(tv)
91
+
92
+ if t := emit_value(tv):
93
+ yield t
94
+ continue
95
+
96
+ #
97
+
98
+ elif e is BeginArray:
99
+ stk.append([])
100
+ continue
101
+
102
+ elif e is EndArray:
103
+ if not stk or not isinstance(tv := stk.pop(), list):
104
+ raise self.StateError
105
+
106
+ if t := emit_value(tv):
107
+ yield t
108
+ continue
109
+
110
+ #
111
+
112
+ else:
113
+ raise TypeError(e)
@@ -0,0 +1,285 @@
1
+ """
2
+ TODO:
3
+ - max buf size
4
+ - max recursion depth
5
+ - mark start pos of tokens, currently returning end
6
+ """
7
+ import dataclasses as dc
8
+ import io
9
+ import json
10
+ import re
11
+ import typing as ta
12
+
13
+ from .... import check
14
+ from ....genmachine import GenMachine
15
+
16
+
17
+ ##
18
+
19
+
20
+ ValueTokenKind: ta.TypeAlias = ta.Literal[
21
+ 'STRING',
22
+ 'NUMBER',
23
+
24
+ 'SPECIAL_NUMBER',
25
+ 'BOOLEAN',
26
+ 'NULL',
27
+ ]
28
+
29
+ VALUE_TOKEN_KINDS = frozenset(check.isinstance(a, str) for a in ta.get_args(ValueTokenKind))
30
+
31
+ ControlTokenKind: ta.TypeAlias = ta.Literal[
32
+ 'LBRACE',
33
+ 'RBRACE',
34
+ 'LBRACKET',
35
+ 'RBRACKET',
36
+ 'COMMA',
37
+ 'COLON',
38
+ ]
39
+
40
+ TokenKind: ta.TypeAlias = ValueTokenKind | ControlTokenKind
41
+
42
+ #
43
+
44
+ ScalarValue: ta.TypeAlias = str | float | int | None
45
+
46
+ SCALAR_VALUE_TYPES: tuple[type, ...] = tuple(
47
+ check.isinstance(e, type) if e is not None else type(None)
48
+ for e in ta.get_args(ScalarValue)
49
+ )
50
+
51
+
52
+ ##
53
+
54
+
55
+ class Token(ta.NamedTuple):
56
+ kind: TokenKind
57
+ value: ScalarValue
58
+ raw: str | None
59
+
60
+ ofs: int
61
+ line: int
62
+ col: int
63
+
64
+ def __iter__(self):
65
+ raise TypeError
66
+
67
+
68
+ NUMBER_PAT = re.compile(r'-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?')
69
+
70
+ CONTROL_TOKENS: ta.Mapping[str, TokenKind] = {
71
+ '{': 'LBRACE',
72
+ '}': 'RBRACE',
73
+ '[': 'LBRACKET',
74
+ ']': 'RBRACKET',
75
+ ',': 'COMMA',
76
+ ':': 'COLON',
77
+ }
78
+
79
+ CONST_TOKENS: ta.Mapping[str, tuple[TokenKind, str | float | None]] = {
80
+ 'NaN': ('SPECIAL_NUMBER', float('nan')),
81
+ 'Infinity': ('SPECIAL_NUMBER', float('inf')),
82
+ '-Infinity': ('SPECIAL_NUMBER', float('-inf')),
83
+
84
+ 'true': ('BOOLEAN', True),
85
+ 'false': ('BOOLEAN', False),
86
+ 'null': ('NULL', None),
87
+ }
88
+
89
+
90
+ ##
91
+
92
+
93
+ @dc.dataclass(frozen=True)
94
+ class JsonLexError(Exception):
95
+ message: str
96
+
97
+ ofs: int
98
+ line: int
99
+ col: int
100
+
101
+
102
+ class JsonStreamLexer(GenMachine[str, Token]):
103
+ def __init__(
104
+ self,
105
+ *,
106
+ include_raw: bool = False,
107
+ ) -> None:
108
+ self._include_raw = include_raw
109
+
110
+ self._ofs = 0
111
+ self._line = 0
112
+ self._col = 0
113
+
114
+ self._buf = io.StringIO()
115
+
116
+ super().__init__(self._do_main())
117
+
118
+ def _char_in(self, c: str) -> str:
119
+ if c and len(c) != 1:
120
+ raise ValueError(c)
121
+
122
+ self._ofs += 1
123
+
124
+ if c == '\n':
125
+ self._line += 1
126
+ self._col = 0
127
+ else:
128
+ self._col += 1
129
+
130
+ return c
131
+
132
+ def _make_tok(
133
+ self,
134
+ kind: TokenKind,
135
+ value: ScalarValue,
136
+ raw: str,
137
+ ) -> ta.Sequence[Token]:
138
+ tok = Token(
139
+ kind,
140
+ value,
141
+ raw if self._include_raw else None,
142
+ self._ofs,
143
+ self._line,
144
+ self._col,
145
+ )
146
+ return (tok,)
147
+
148
+ def _flip_buf(self) -> str:
149
+ raw = self._buf.getvalue()
150
+ self._buf.seek(0)
151
+ self._buf.truncate()
152
+ return raw
153
+
154
+ def _raise(self, msg: str) -> ta.NoReturn:
155
+ raise JsonLexError(msg, self._ofs, self._line, self._col)
156
+
157
+ def _do_main(self):
158
+ while True:
159
+ c = self._char_in((yield None)) # noqa
160
+
161
+ if not c:
162
+ return None
163
+
164
+ if c.isspace():
165
+ continue
166
+
167
+ if c in CONTROL_TOKENS:
168
+ yield self._make_tok(CONTROL_TOKENS[c], c, c)
169
+ continue
170
+
171
+ if c == '"':
172
+ return self._do_string()
173
+
174
+ if c.isdigit() or c == '-':
175
+ return self._do_number(c)
176
+
177
+ if c in 'tfnIN':
178
+ return self._do_const(c)
179
+
180
+ self._raise(f'Unexpected character: {c}')
181
+
182
+ def _do_string(self):
183
+ self._buf.write('"')
184
+
185
+ last = None
186
+ while True:
187
+ try:
188
+ c = self._char_in((yield None)) # noqa
189
+ except GeneratorExit:
190
+ self._raise('Unexpected end of input')
191
+
192
+ if not c:
193
+ raise NotImplementedError
194
+
195
+ self._buf.write(c)
196
+ if c == '"' and last != '\\':
197
+ break
198
+ last = c
199
+
200
+ raw = self._flip_buf()
201
+ sv = json.loads(raw)
202
+ yield self._make_tok('STRING', sv, raw)
203
+
204
+ return self._do_main()
205
+
206
+ def _do_number(self, c: str):
207
+ self._buf.write(c)
208
+
209
+ while True:
210
+ try:
211
+ c = self._char_in((yield None)) # noqa
212
+ except GeneratorExit:
213
+ self._raise('Unexpected end of input')
214
+
215
+ if not c:
216
+ break
217
+
218
+ if not (c.isdigit() or c in '.eE+-'):
219
+ break
220
+ self._buf.write(c)
221
+
222
+ raw = self._flip_buf()
223
+
224
+ #
225
+
226
+ if not NUMBER_PAT.fullmatch(raw):
227
+ # Can only be -Infinity
228
+
229
+ if not c:
230
+ self._raise('Unexpected end of input')
231
+
232
+ raw += c
233
+ try:
234
+ for _ in range(7):
235
+ raw += self._char_in((yield None)) # noqa
236
+ except GeneratorExit:
237
+ self._raise('Unexpected end of input')
238
+
239
+ if raw != '-Infinity':
240
+ self._raise(f'Invalid number format: {raw}')
241
+
242
+ tk, tv = CONST_TOKENS[raw]
243
+ yield self._make_tok(tk, tv, raw)
244
+
245
+ return self._do_main()
246
+
247
+ #
248
+
249
+ if '.' in raw or 'e' in raw or 'E' in raw:
250
+ nv = float(raw)
251
+ else:
252
+ nv = int(raw)
253
+ yield self._make_tok('NUMBER', nv, raw)
254
+
255
+ #
256
+
257
+ if not c:
258
+ return None
259
+
260
+ if c in CONTROL_TOKENS:
261
+ yield self._make_tok(CONTROL_TOKENS[c], c, c)
262
+
263
+ elif not c.isspace():
264
+ self._raise(f'Unexpected character after number: {c}')
265
+
266
+ return self._do_main()
267
+
268
+ def _do_const(self, c: str):
269
+ raw = c
270
+ while True:
271
+ try:
272
+ raw += self._char_in((yield None)) # noqa
273
+ except GeneratorExit:
274
+ self._raise('Unexpected end of input')
275
+
276
+ if raw in CONST_TOKENS:
277
+ break
278
+
279
+ if len(raw) > 8: # None of the keywords are longer than 8 characters
280
+ self._raise(f'Invalid literal: {raw}')
281
+
282
+ tk, tv = CONST_TOKENS[raw]
283
+ yield self._make_tok(tk, tv, raw)
284
+
285
+ return self._do_main()