omlish 0.0.0.dev81__py3-none-any.whl → 0.0.0.dev82__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. omlish/__about__.py +2 -2
  2. omlish/dataclasses/impl/__init__.py +8 -0
  3. omlish/dataclasses/impl/params.py +3 -0
  4. omlish/dataclasses/impl/slots.py +61 -7
  5. omlish/formats/json/__init__.py +8 -1
  6. omlish/formats/json/backends/__init__.py +7 -0
  7. omlish/formats/json/backends/base.py +38 -0
  8. omlish/formats/json/backends/default.py +10 -0
  9. omlish/formats/json/backends/jiter.py +25 -0
  10. omlish/formats/json/backends/orjson.py +46 -2
  11. omlish/formats/json/backends/std.py +39 -0
  12. omlish/formats/json/backends/ujson.py +49 -0
  13. omlish/formats/json/cli.py +36 -6
  14. omlish/formats/json/consts.py +22 -0
  15. omlish/formats/json/encoding.py +17 -0
  16. omlish/formats/json/json.py +9 -39
  17. omlish/formats/json/render.py +49 -28
  18. omlish/formats/json/stream/__init__.py +0 -0
  19. omlish/formats/json/stream/build.py +113 -0
  20. omlish/formats/json/{stream.py → stream/lex.py} +68 -172
  21. omlish/formats/json/stream/parse.py +244 -0
  22. omlish/formats/json/stream/render.py +119 -0
  23. omlish/genmachine.py +14 -2
  24. omlish/marshal/base.py +2 -0
  25. omlish/marshal/newtypes.py +24 -0
  26. omlish/marshal/standard.py +4 -0
  27. omlish/reflect/__init__.py +1 -0
  28. omlish/reflect/types.py +6 -1
  29. {omlish-0.0.0.dev81.dist-info → omlish-0.0.0.dev82.dist-info}/METADATA +1 -1
  30. {omlish-0.0.0.dev81.dist-info → omlish-0.0.0.dev82.dist-info}/RECORD +34 -24
  31. {omlish-0.0.0.dev81.dist-info → omlish-0.0.0.dev82.dist-info}/LICENSE +0 -0
  32. {omlish-0.0.0.dev81.dist-info → omlish-0.0.0.dev82.dist-info}/WHEEL +0 -0
  33. {omlish-0.0.0.dev81.dist-info → omlish-0.0.0.dev82.dist-info}/entry_points.txt +0 -0
  34. {omlish-0.0.0.dev81.dist-info → omlish-0.0.0.dev82.dist-info}/top_level.txt +0 -0
@@ -1,44 +1,55 @@
1
- """
2
- TODO:
3
- - genmachine...
4
- """
1
+ import abc
2
+ import dataclasses as dc
5
3
  import enum
6
4
  import io
7
5
  import json
8
6
  import typing as ta
9
7
 
8
+ from ... import lang
10
9
 
11
- class JsonRenderer:
10
+
11
+ I = ta.TypeVar('I')
12
+
13
+
14
+ class JsonRendererOut(ta.Protocol):
15
+ def write(self, s: str) -> ta.Any: ...
16
+
17
+
18
+ class AbstractJsonRenderer(lang.Abstract, ta.Generic[I]):
12
19
  class State(enum.Enum):
13
20
  VALUE = enum.auto()
14
21
  KEY = enum.auto()
15
22
 
23
+ @dc.dataclass(frozen=True, kw_only=True)
24
+ class Options:
25
+ indent: int | str | None = None
26
+ separators: tuple[str, str] | None = None
27
+ sort_keys: bool = False
28
+ style: ta.Callable[[ta.Any, 'AbstractJsonRenderer.State'], tuple[str, str]] | None = None
29
+
16
30
  def __init__(
17
31
  self,
18
- out: ta.TextIO,
19
- *,
20
- indent: int | str | None = None,
21
- separators: tuple[str, str] | None = None,
22
- sort_keys: bool = False,
23
- style: ta.Callable[[ta.Any, State], tuple[str, str]] | None = None,
32
+ out: JsonRendererOut,
33
+ opts: Options = Options(),
24
34
  ) -> None:
25
35
  super().__init__()
26
36
 
27
37
  self._out = out
28
- if isinstance(indent, (str, int)):
29
- self._indent = (' ' * indent) if isinstance(indent, int) else indent
38
+ self._opts = opts
39
+
40
+ separators = opts.separators
41
+ if isinstance(opts.indent, (str, int)):
42
+ self._indent = (' ' * opts.indent) if isinstance(opts.indent, int) else opts.indent
30
43
  self._endl = '\n'
31
44
  if separators is None:
32
45
  separators = (',', ': ')
33
- elif indent is None:
46
+ elif opts.indent is None:
34
47
  self._indent = self._endl = ''
35
48
  if separators is None:
36
49
  separators = (', ', ': ')
37
50
  else:
38
- raise TypeError(indent)
51
+ raise TypeError(opts.indent)
39
52
  self._comma, self._colon = separators
40
- self._sort_keys = sort_keys
41
- self._style = style
42
53
 
43
54
  self._level = 0
44
55
 
@@ -58,9 +69,25 @@ class JsonRenderer:
58
69
  if self._level:
59
70
  self._write(self._indent * self._level)
60
71
 
61
- def _render(self, o: ta.Any, state: State = State.VALUE) -> None:
62
- if self._style is not None:
63
- pre, post = self._style(o, state)
72
+ @abc.abstractmethod
73
+ def render(self, i: I) -> None:
74
+ raise NotImplementedError
75
+
76
+ @classmethod
77
+ def render_str(cls, i: I, **kwargs: ta.Any) -> str:
78
+ out = io.StringIO()
79
+ cls(out, cls.Options(**kwargs)).render(i)
80
+ return out.getvalue()
81
+
82
+
83
+ class JsonRenderer(AbstractJsonRenderer[ta.Any]):
84
+ def _render(
85
+ self,
86
+ o: ta.Any,
87
+ state: AbstractJsonRenderer.State = AbstractJsonRenderer.State.VALUE,
88
+ ) -> None:
89
+ if self._opts.style is not None:
90
+ pre, post = self._opts.style(o, state)
64
91
  self._write(pre)
65
92
  else:
66
93
  post = None
@@ -75,13 +102,13 @@ class JsonRenderer:
75
102
  self._write('{')
76
103
  self._level += 1
77
104
  items = list(o.items())
78
- if self._sort_keys:
105
+ if self._opts.sort_keys:
79
106
  items.sort(key=lambda t: t[0])
80
107
  for i, (k, v) in enumerate(items):
81
108
  if i:
82
109
  self._write(self._comma)
83
110
  self._write_indent()
84
- self._render(k, JsonRenderer.State.KEY)
111
+ self._render(k, AbstractJsonRenderer.State.KEY)
85
112
  self._write(self._colon)
86
113
  self._render(v)
87
114
  self._level -= 1
@@ -110,9 +137,3 @@ class JsonRenderer:
110
137
 
111
138
  def render(self, o: ta.Any) -> None:
112
139
  self._render(o)
113
-
114
- @classmethod
115
- def render_str(cls, o: ta.Any, **kwargs: ta.Any) -> str:
116
- out = io.StringIO()
117
- cls(out, **kwargs).render(o)
118
- return out.getvalue()
File without changes
@@ -0,0 +1,113 @@
1
+ import typing as ta
2
+
3
+ from ....genmachine import GenMachine
4
+ from .lex import SCALAR_VALUE_TYPES
5
+ from .parse import BeginArray
6
+ from .parse import BeginObject
7
+ from .parse import EndArray
8
+ from .parse import EndObject
9
+ from .parse import JsonStreamObject
10
+ from .parse import JsonStreamParserEvent
11
+ from .parse import Key
12
+
13
+
14
+ ##
15
+
16
+
17
+ class JsonObjectBuilder(GenMachine[JsonStreamParserEvent, ta.Any]):
18
+ def __init__(
19
+ self,
20
+ *,
21
+ yield_object_lists: bool = False,
22
+ ) -> None:
23
+ self._stack: list[JsonStreamObject | list | Key] = []
24
+ self._yield_object_lists = yield_object_lists
25
+
26
+ super().__init__(self._do())
27
+
28
+ def _do(self):
29
+ stk = self._stack
30
+
31
+ def emit_value(v):
32
+ if not stk:
33
+ return (v,)
34
+
35
+ tv = stk[-1]
36
+ if isinstance(tv, Key):
37
+ stk.pop()
38
+ if not stk:
39
+ raise self.StateError
40
+
41
+ tv2 = stk[-1]
42
+ if not isinstance(tv2, JsonStreamObject):
43
+ raise self.StateError
44
+
45
+ tv2.append((tv.key, v))
46
+ return ()
47
+
48
+ elif isinstance(tv, list):
49
+ tv.append(v)
50
+ return ()
51
+
52
+ else:
53
+ raise self.StateError
54
+
55
+ while True:
56
+ try:
57
+ e = yield None
58
+ except GeneratorExit:
59
+ if stk:
60
+ raise self.StateError from None
61
+ else:
62
+ raise
63
+
64
+ #
65
+
66
+ if isinstance(e, SCALAR_VALUE_TYPES):
67
+ if t := emit_value(e):
68
+ yield t
69
+ continue
70
+
71
+ #
72
+
73
+ elif e is BeginObject:
74
+ stk.append(JsonStreamObject())
75
+ continue
76
+
77
+ elif isinstance(e, Key):
78
+ if not stk or not isinstance(stk[-1], JsonStreamObject):
79
+ raise self.StateError
80
+
81
+ stk.append(e)
82
+ continue
83
+
84
+ elif e is EndObject:
85
+ tv: ta.Any
86
+ if not stk or not isinstance(tv := stk.pop(), JsonStreamObject):
87
+ raise self.StateError
88
+
89
+ if not self._yield_object_lists:
90
+ tv = dict(tv)
91
+
92
+ if t := emit_value(tv):
93
+ yield t
94
+ continue
95
+
96
+ #
97
+
98
+ elif e is BeginArray:
99
+ stk.append([])
100
+ continue
101
+
102
+ elif e is EndArray:
103
+ if not stk or not isinstance(tv := stk.pop(), list):
104
+ raise self.StateError
105
+
106
+ if t := emit_value(tv):
107
+ yield t
108
+ continue
109
+
110
+ #
111
+
112
+ else:
113
+ raise TypeError(e)
@@ -1,11 +1,20 @@
1
+ """
2
+ TODO:
3
+ - max buf size
4
+ - max recursion depth
5
+ - mark start pos of tokens, currently returning end
6
+ """
1
7
  import dataclasses as dc
2
8
  import io
3
9
  import json
4
10
  import re
5
11
  import typing as ta
6
12
 
7
- from ... import check
8
- from ...genmachine import GenMachine
13
+ from .... import check
14
+ from ....genmachine import GenMachine
15
+
16
+
17
+ ##
9
18
 
10
19
 
11
20
  ValueTokenKind: ta.TypeAlias = ta.Literal[
@@ -17,6 +26,8 @@ ValueTokenKind: ta.TypeAlias = ta.Literal[
17
26
  'NULL',
18
27
  ]
19
28
 
29
+ VALUE_TOKEN_KINDS = frozenset(check.isinstance(a, str) for a in ta.get_args(ValueTokenKind))
30
+
20
31
  ControlTokenKind: ta.TypeAlias = ta.Literal[
21
32
  'LBRACE',
22
33
  'RBRACE',
@@ -28,13 +39,23 @@ ControlTokenKind: ta.TypeAlias = ta.Literal[
28
39
 
29
40
  TokenKind: ta.TypeAlias = ValueTokenKind | ControlTokenKind
30
41
 
31
- TokenValue: ta.TypeAlias = str | float | int | None
42
+ #
43
+
44
+ ScalarValue: ta.TypeAlias = str | float | int | None
45
+
46
+ SCALAR_VALUE_TYPES: tuple[type, ...] = tuple(
47
+ check.isinstance(e, type) if e is not None else type(None)
48
+ for e in ta.get_args(ScalarValue)
49
+ )
50
+
51
+
52
+ ##
32
53
 
33
54
 
34
55
  class Token(ta.NamedTuple):
35
56
  kind: TokenKind
36
- value: TokenValue
37
- raw: str
57
+ value: ScalarValue
58
+ raw: str | None
38
59
 
39
60
  ofs: int
40
61
  line: int
@@ -46,8 +67,6 @@ class Token(ta.NamedTuple):
46
67
 
47
68
  NUMBER_PAT = re.compile(r'-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?')
48
69
 
49
- VALUE_TOKEN_KINDS = frozenset(check.isinstance(a, str) for a in ta.get_args(ValueTokenKind))
50
-
51
70
  CONTROL_TOKENS: ta.Mapping[str, TokenKind] = {
52
71
  '{': 'LBRACE',
53
72
  '}': 'RBRACE',
@@ -68,6 +87,9 @@ CONST_TOKENS: ta.Mapping[str, tuple[TokenKind, str | float | None]] = {
68
87
  }
69
88
 
70
89
 
90
+ ##
91
+
92
+
71
93
  @dc.dataclass(frozen=True)
72
94
  class JsonLexError(Exception):
73
95
  message: str
@@ -78,7 +100,13 @@ class JsonLexError(Exception):
78
100
 
79
101
 
80
102
  class JsonStreamLexer(GenMachine[str, Token]):
81
- def __init__(self) -> None:
103
+ def __init__(
104
+ self,
105
+ *,
106
+ include_raw: bool = False,
107
+ ) -> None:
108
+ self._include_raw = include_raw
109
+
82
110
  self._ofs = 0
83
111
  self._line = 0
84
112
  self._col = 0
@@ -88,7 +116,7 @@ class JsonStreamLexer(GenMachine[str, Token]):
88
116
  super().__init__(self._do_main())
89
117
 
90
118
  def _char_in(self, c: str) -> str:
91
- if len(c) != 1:
119
+ if c and len(c) != 1:
92
120
  raise ValueError(c)
93
121
 
94
122
  self._ofs += 1
@@ -104,13 +132,13 @@ class JsonStreamLexer(GenMachine[str, Token]):
104
132
  def _make_tok(
105
133
  self,
106
134
  kind: TokenKind,
107
- value: TokenValue,
135
+ value: ScalarValue,
108
136
  raw: str,
109
137
  ) -> ta.Sequence[Token]:
110
138
  tok = Token(
111
139
  kind,
112
140
  value,
113
- raw,
141
+ raw if self._include_raw else None,
114
142
  self._ofs,
115
143
  self._line,
116
144
  self._col,
@@ -130,6 +158,9 @@ class JsonStreamLexer(GenMachine[str, Token]):
130
158
  while True:
131
159
  c = self._char_in((yield None)) # noqa
132
160
 
161
+ if not c:
162
+ return None
163
+
133
164
  if c.isspace():
134
165
  continue
135
166
 
@@ -158,6 +189,9 @@ class JsonStreamLexer(GenMachine[str, Token]):
158
189
  except GeneratorExit:
159
190
  self._raise('Unexpected end of input')
160
191
 
192
+ if not c:
193
+ raise NotImplementedError
194
+
161
195
  self._buf.write(c)
162
196
  if c == '"' and last != '\\':
163
197
  break
@@ -178,12 +212,23 @@ class JsonStreamLexer(GenMachine[str, Token]):
178
212
  except GeneratorExit:
179
213
  self._raise('Unexpected end of input')
180
214
 
215
+ if not c:
216
+ break
217
+
181
218
  if not (c.isdigit() or c in '.eE+-'):
182
219
  break
183
220
  self._buf.write(c)
184
221
 
185
222
  raw = self._flip_buf()
223
+
224
+ #
225
+
186
226
  if not NUMBER_PAT.fullmatch(raw):
227
+ # Can only be -Infinity
228
+
229
+ if not c:
230
+ self._raise('Unexpected end of input')
231
+
187
232
  raw += c
188
233
  try:
189
234
  for _ in range(7):
@@ -199,11 +244,22 @@ class JsonStreamLexer(GenMachine[str, Token]):
199
244
 
200
245
  return self._do_main()
201
246
 
202
- nv = float(raw) if '.' in raw or 'e' in raw or 'E' in raw else int(raw)
247
+ #
248
+
249
+ if '.' in raw or 'e' in raw or 'E' in raw:
250
+ nv = float(raw)
251
+ else:
252
+ nv = int(raw)
203
253
  yield self._make_tok('NUMBER', nv, raw)
204
254
 
255
+ #
256
+
257
+ if not c:
258
+ return None
259
+
205
260
  if c in CONTROL_TOKENS:
206
261
  yield self._make_tok(CONTROL_TOKENS[c], c, c)
262
+
207
263
  elif not c.isspace():
208
264
  self._raise(f'Unexpected character after number: {c}')
209
265
 
@@ -227,163 +283,3 @@ class JsonStreamLexer(GenMachine[str, Token]):
227
283
  yield self._make_tok(tk, tv, raw)
228
284
 
229
285
  return self._do_main()
230
-
231
-
232
- class JsonStreamObject(list):
233
- def __repr__(self) -> str:
234
- return f'{self.__class__.__name__}({super().__repr__()})'
235
-
236
-
237
- class JsonStreamValueBuilder(GenMachine[Token, ta.Any]):
238
- def __init__(
239
- self,
240
- *,
241
- yield_object_lists: bool = False,
242
- ) -> None:
243
- super().__init__(self._do_value())
244
-
245
- self._yield_object_lists = yield_object_lists
246
-
247
- self._stack: list[
248
- tuple[ta.Literal['OBJECT'], JsonStreamObject] |
249
- tuple[ta.Literal['PAIR'], str] |
250
- tuple[ta.Literal['ARRAY'], list]
251
- ] = []
252
-
253
- #
254
-
255
- def _emit_value(self, v):
256
- if not self._stack:
257
- return ((v,), self._do_value())
258
-
259
- tt, tv = self._stack[-1]
260
- if tt == 'PAIR':
261
- self._stack.pop()
262
- if not self._stack:
263
- raise self.StateError
264
-
265
- tt2, tv2 = self._stack[-1]
266
- if tt2 == 'OBJECT':
267
- tv2.append((tv, v)) # type: ignore
268
- return ((), self._do_after_pair())
269
-
270
- else:
271
- raise self.StateError
272
-
273
- elif tt == 'ARRAY':
274
- tv.append(v) # type: ignore
275
- return ((), self._do_after_element())
276
-
277
- else:
278
- raise self.StateError
279
-
280
- #
281
-
282
- def _do_value(self):
283
- try:
284
- tok = yield None
285
- except GeneratorExit:
286
- if self._stack:
287
- raise self.StateError from None
288
- else:
289
- raise
290
-
291
- if tok.kind in VALUE_TOKEN_KINDS:
292
- y, r = self._emit_value(tok.value)
293
- yield y
294
- return r
295
-
296
- elif tok.kind == 'LBRACE':
297
- return self._do_object()
298
-
299
- elif tok.kind == 'LBRACKET':
300
- return self._do_array()
301
-
302
- else:
303
- raise self.StateError
304
-
305
- #
306
-
307
- def _do_object(self):
308
- self._stack.append(('OBJECT', JsonStreamObject()))
309
- return self._do_object_body()
310
-
311
- def _do_object_body(self):
312
- try:
313
- tok = yield None
314
- except GeneratorExit:
315
- raise self.StateError from None
316
-
317
- if tok.kind == 'STRING':
318
- k = tok.value
319
-
320
- try:
321
- tok = yield None
322
- except GeneratorExit:
323
- raise self.StateError from None
324
- if tok.kind != 'COLON':
325
- raise self.StateError
326
-
327
- self._stack.append(('PAIR', k))
328
- return self._do_value()
329
-
330
- else:
331
- raise self.StateError
332
-
333
- def _do_after_pair(self):
334
- try:
335
- tok = yield None
336
- except GeneratorExit:
337
- raise self.StateError from None
338
-
339
- if tok.kind == 'COMMA':
340
- return self._do_object_body()
341
-
342
- elif tok.kind == 'RBRACE':
343
- if not self._stack:
344
- raise self.StateError
345
-
346
- tv: ta.Any
347
- tt, tv = self._stack.pop()
348
- if tt != 'OBJECT':
349
- raise self.StateError
350
-
351
- if not self._yield_object_lists:
352
- tv = dict(tv)
353
-
354
- y, r = self._emit_value(tv)
355
- yield y
356
- return r
357
-
358
- else:
359
- raise self.StateError
360
-
361
- #
362
-
363
- def _do_array(self):
364
- self._stack.append(('ARRAY', []))
365
- return self._do_value()
366
-
367
- def _do_after_element(self):
368
- try:
369
- tok = yield None
370
- except GeneratorExit:
371
- raise self.StateError from None
372
-
373
- if tok.kind == 'COMMA':
374
- return self._do_value()
375
-
376
- elif tok.kind == 'RBRACKET':
377
- if not self._stack:
378
- raise self.StateError
379
-
380
- tt, tv = self._stack.pop()
381
- if tt != 'ARRAY':
382
- raise self.StateError
383
-
384
- y, r = self._emit_value(tv)
385
- yield y
386
- return r
387
-
388
- else:
389
- raise self.StateError