omlish 0.0.0.dev437__py3-none-any.whl → 0.0.0.dev438__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. omlish/__about__.py +2 -2
  2. omlish/formats/json/stream/__init__.py +3 -1
  3. omlish/formats/json/stream/lexing.py +187 -42
  4. omlish/formats/json/stream/parsing.py +27 -5
  5. omlish/formats/json/stream/utils.py +106 -33
  6. omlish/formats/json5/literals.py +7 -4
  7. omlish/formats/json5/parsing.py +33 -79
  8. omlish/formats/json5/stream.py +45 -50
  9. omlish/http/all.py +59 -53
  10. {omlish-0.0.0.dev437.dist-info → omlish-0.0.0.dev438.dist-info}/METADATA +1 -1
  11. {omlish-0.0.0.dev437.dist-info → omlish-0.0.0.dev438.dist-info}/RECORD +15 -89
  12. omlish/formats/json5/Json5.g4 +0 -168
  13. omlish/formats/json5/_antlr/Json5Lexer.py +0 -354
  14. omlish/formats/json5/_antlr/Json5Listener.py +0 -79
  15. omlish/formats/json5/_antlr/Json5Parser.py +0 -617
  16. omlish/formats/json5/_antlr/Json5Visitor.py +0 -52
  17. omlish/formats/json5/_antlr/__init__.py +0 -0
  18. omlish/text/antlr/__init__.py +0 -3
  19. omlish/text/antlr/_runtime/BufferedTokenStream.py +0 -305
  20. omlish/text/antlr/_runtime/CommonTokenFactory.py +0 -64
  21. omlish/text/antlr/_runtime/CommonTokenStream.py +0 -90
  22. omlish/text/antlr/_runtime/FileStream.py +0 -30
  23. omlish/text/antlr/_runtime/InputStream.py +0 -90
  24. omlish/text/antlr/_runtime/IntervalSet.py +0 -183
  25. omlish/text/antlr/_runtime/LICENSE.txt +0 -28
  26. omlish/text/antlr/_runtime/LL1Analyzer.py +0 -176
  27. omlish/text/antlr/_runtime/Lexer.py +0 -332
  28. omlish/text/antlr/_runtime/ListTokenSource.py +0 -147
  29. omlish/text/antlr/_runtime/Parser.py +0 -583
  30. omlish/text/antlr/_runtime/ParserInterpreter.py +0 -173
  31. omlish/text/antlr/_runtime/ParserRuleContext.py +0 -189
  32. omlish/text/antlr/_runtime/PredictionContext.py +0 -632
  33. omlish/text/antlr/_runtime/Recognizer.py +0 -150
  34. omlish/text/antlr/_runtime/RuleContext.py +0 -230
  35. omlish/text/antlr/_runtime/StdinStream.py +0 -14
  36. omlish/text/antlr/_runtime/Token.py +0 -158
  37. omlish/text/antlr/_runtime/TokenStreamRewriter.py +0 -258
  38. omlish/text/antlr/_runtime/Utils.py +0 -36
  39. omlish/text/antlr/_runtime/__init__.py +0 -2
  40. omlish/text/antlr/_runtime/_all.py +0 -24
  41. omlish/text/antlr/_runtime/_pygrun.py +0 -174
  42. omlish/text/antlr/_runtime/atn/ATN.py +0 -135
  43. omlish/text/antlr/_runtime/atn/ATNConfig.py +0 -162
  44. omlish/text/antlr/_runtime/atn/ATNConfigSet.py +0 -215
  45. omlish/text/antlr/_runtime/atn/ATNDeserializationOptions.py +0 -27
  46. omlish/text/antlr/_runtime/atn/ATNDeserializer.py +0 -449
  47. omlish/text/antlr/_runtime/atn/ATNSimulator.py +0 -50
  48. omlish/text/antlr/_runtime/atn/ATNState.py +0 -267
  49. omlish/text/antlr/_runtime/atn/ATNType.py +0 -20
  50. omlish/text/antlr/_runtime/atn/LexerATNSimulator.py +0 -573
  51. omlish/text/antlr/_runtime/atn/LexerAction.py +0 -301
  52. omlish/text/antlr/_runtime/atn/LexerActionExecutor.py +0 -146
  53. omlish/text/antlr/_runtime/atn/ParserATNSimulator.py +0 -1664
  54. omlish/text/antlr/_runtime/atn/PredictionMode.py +0 -502
  55. omlish/text/antlr/_runtime/atn/SemanticContext.py +0 -333
  56. omlish/text/antlr/_runtime/atn/Transition.py +0 -271
  57. omlish/text/antlr/_runtime/atn/__init__.py +0 -4
  58. omlish/text/antlr/_runtime/dfa/DFA.py +0 -136
  59. omlish/text/antlr/_runtime/dfa/DFASerializer.py +0 -76
  60. omlish/text/antlr/_runtime/dfa/DFAState.py +0 -129
  61. omlish/text/antlr/_runtime/dfa/__init__.py +0 -4
  62. omlish/text/antlr/_runtime/error/DiagnosticErrorListener.py +0 -111
  63. omlish/text/antlr/_runtime/error/ErrorListener.py +0 -75
  64. omlish/text/antlr/_runtime/error/ErrorStrategy.py +0 -712
  65. omlish/text/antlr/_runtime/error/Errors.py +0 -176
  66. omlish/text/antlr/_runtime/error/__init__.py +0 -4
  67. omlish/text/antlr/_runtime/tree/Chunk.py +0 -33
  68. omlish/text/antlr/_runtime/tree/ParseTreeMatch.py +0 -121
  69. omlish/text/antlr/_runtime/tree/ParseTreePattern.py +0 -75
  70. omlish/text/antlr/_runtime/tree/ParseTreePatternMatcher.py +0 -377
  71. omlish/text/antlr/_runtime/tree/RuleTagToken.py +0 -53
  72. omlish/text/antlr/_runtime/tree/TokenTagToken.py +0 -50
  73. omlish/text/antlr/_runtime/tree/Tree.py +0 -194
  74. omlish/text/antlr/_runtime/tree/Trees.py +0 -114
  75. omlish/text/antlr/_runtime/tree/__init__.py +0 -2
  76. omlish/text/antlr/_runtime/xpath/XPath.py +0 -278
  77. omlish/text/antlr/_runtime/xpath/XPathLexer.py +0 -98
  78. omlish/text/antlr/_runtime/xpath/__init__.py +0 -4
  79. omlish/text/antlr/delimit.py +0 -109
  80. omlish/text/antlr/dot.py +0 -41
  81. omlish/text/antlr/errors.py +0 -14
  82. omlish/text/antlr/input.py +0 -96
  83. omlish/text/antlr/parsing.py +0 -54
  84. omlish/text/antlr/runtime.py +0 -102
  85. omlish/text/antlr/utils.py +0 -38
  86. {omlish-0.0.0.dev437.dist-info → omlish-0.0.0.dev438.dist-info}/WHEEL +0 -0
  87. {omlish-0.0.0.dev437.dist-info → omlish-0.0.0.dev438.dist-info}/entry_points.txt +0 -0
  88. {omlish-0.0.0.dev437.dist-info → omlish-0.0.0.dev438.dist-info}/licenses/LICENSE +0 -0
  89. {omlish-0.0.0.dev437.dist-info → omlish-0.0.0.dev438.dist-info}/top_level.txt +0 -0
omlish/__about__.py CHANGED
@@ -1,5 +1,5 @@
1
- __version__ = '0.0.0.dev437'
2
- __revision__ = '6e7eba22d4b3eb0dc36e24d1612b485ffa1e3c00'
1
+ __version__ = '0.0.0.dev438'
2
+ __revision__ = '5451232ca0015e84e38bd53d3c368a9d659ea8ec'
3
3
 
4
4
 
5
5
  #
@@ -8,6 +8,7 @@ from .errors import ( # noqa
8
8
 
9
9
 
10
10
  from .lexing import ( # noqa
11
+ IdentTokenKind,
11
12
  ValueTokenKind,
12
13
  VALUE_TOKEN_KINDS,
13
14
  ControlTokenKind,
@@ -22,7 +23,7 @@ from .lexing import ( # noqa
22
23
  Token,
23
24
 
24
25
  CONTROL_TOKENS,
25
- CONST_TOKENS,
26
+ CONST_IDENT_VALUES,
26
27
 
27
28
  JsonStreamLexError,
28
29
  JsonStreamLexer,
@@ -54,4 +55,5 @@ from .utils import ( # noqa
54
55
 
55
56
  stream_parse_values,
56
57
  stream_parse_one_value,
58
+ stream_parse_exactly_one_value,
57
59
  )
@@ -13,20 +13,23 @@ import re
13
13
  import typing as ta
14
14
 
15
15
  from .... import check
16
+ from .... import lang
16
17
  from ....funcs.genmachine import GenMachine
17
18
  from .errors import JsonStreamError
18
19
 
19
20
 
21
+ with lang.auto_proxy_import(globals()):
22
+ import unicodedata
23
+
24
+
20
25
  ##
21
26
 
22
27
 
28
+ IdentTokenKind: ta.TypeAlias = ta.Literal['IDENT']
29
+
23
30
  ValueTokenKind: ta.TypeAlias = ta.Literal[
24
31
  'STRING',
25
32
  'NUMBER',
26
-
27
- 'SPECIAL_NUMBER',
28
- 'BOOLEAN',
29
- 'NULL',
30
33
  ]
31
34
 
32
35
  VALUE_TOKEN_KINDS = frozenset(check.isinstance(a, str) for a in ta.get_args(ValueTokenKind))
@@ -45,6 +48,7 @@ SpaceTokenKind: ta.TypeAlias = ta.Literal['SPACE']
45
48
  CommentTokenKind: ta.TypeAlias = ta.Literal['COMMENT']
46
49
 
47
50
  TokenKind: ta.TypeAlias = ta.Union[ # noqa
51
+ IdentTokenKind,
48
52
  ValueTokenKind,
49
53
  ControlTokenKind,
50
54
  SpaceTokenKind,
@@ -93,16 +97,19 @@ CONTROL_TOKENS: ta.Mapping[str, TokenKind] = {
93
97
  ':': 'COLON',
94
98
  }
95
99
 
96
- CONST_TOKENS: ta.Mapping[str, tuple[TokenKind, str | float | None]] = {
97
- 'NaN': ('SPECIAL_NUMBER', float('nan')),
98
- 'Infinity': ('SPECIAL_NUMBER', float('inf')),
99
- '-Infinity': ('SPECIAL_NUMBER', float('-inf')),
100
+ CONST_IDENT_VALUES: ta.Mapping[str, str | float | None] = {
101
+ 'NaN': float('nan'),
102
+ '-NaN': float('-nan'), # distinguished in parsing even if indistinguishable in value
103
+ 'Infinity': float('inf'),
104
+ '-Infinity': float('-inf'),
100
105
 
101
- 'true': ('BOOLEAN', True),
102
- 'false': ('BOOLEAN', False),
103
- 'null': ('NULL', None),
106
+ 'true': True,
107
+ 'false': False,
108
+ 'null': None,
104
109
  }
105
110
 
111
+ MAX_CONST_IDENT_LEN = max(map(len, CONST_IDENT_VALUES))
112
+
106
113
 
107
114
  ##
108
115
 
@@ -119,21 +126,39 @@ class JsonStreamLexer(GenMachine[str, Token]):
119
126
  self,
120
127
  *,
121
128
  include_raw: bool = False,
129
+
130
+ allow_extended_space: bool = False,
122
131
  include_space: bool = False,
132
+
123
133
  allow_comments: bool = False,
124
134
  include_comments: bool = False,
135
+
125
136
  allow_single_quotes: bool = False,
126
137
  string_literal_parser: ta.Callable[[str], str] | None = None,
138
+
139
+ allow_extended_number_literals: bool = False,
140
+ number_literal_parser: ta.Callable[[str], ta.Any] | None = None,
141
+
142
+ allow_extended_idents: bool = False,
127
143
  ) -> None:
128
144
  self._include_raw = include_raw
145
+
146
+ self._allow_extended_space = allow_extended_space
129
147
  self._include_space = include_space
148
+
130
149
  self._allow_comments = allow_comments
131
150
  self._include_comments = include_comments
151
+
132
152
  self._allow_single_quotes = allow_single_quotes
133
153
  if string_literal_parser is None:
134
- string_literal_parser = json.loads
154
+ string_literal_parser = json.loads # noqa
135
155
  self._string_literal_parser = string_literal_parser
136
156
 
157
+ self._allow_extended_number_literals = allow_extended_number_literals
158
+ self._number_literal_parser = number_literal_parser
159
+
160
+ self._allow_extended_idents = allow_extended_idents
161
+
137
162
  self._ofs = 0
138
163
  self._line = 1
139
164
  self._col = 0
@@ -199,7 +224,33 @@ class JsonStreamLexer(GenMachine[str, Token]):
199
224
  if not c:
200
225
  return None
201
226
 
202
- if c.isspace():
227
+ if c.isspace() or (self._allow_extended_space and c in (
228
+ '\u0009'
229
+ '\u000A'
230
+ '\u000B'
231
+ '\u000C'
232
+ '\u000D'
233
+ '\u0020'
234
+ '\u00A0'
235
+ '\u2028'
236
+ '\u2029'
237
+ '\uFEFF'
238
+ '\u1680'
239
+ '\u2000'
240
+ '\u2001'
241
+ '\u2002'
242
+ '\u2003'
243
+ '\u2004'
244
+ '\u2005'
245
+ '\u2006'
246
+ '\u2007'
247
+ '\u2008'
248
+ '\u2009'
249
+ '\u200A'
250
+ '\u202F'
251
+ '\u205F'
252
+ '\u3000'
253
+ )):
203
254
  if self._include_space:
204
255
  yield self._make_tok('SPACE', c, c, self.pos)
205
256
  continue
@@ -211,16 +262,18 @@ class JsonStreamLexer(GenMachine[str, Token]):
211
262
  if c == '"' or (self._allow_single_quotes and c == "'"):
212
263
  return self._do_string(c)
213
264
 
214
- if c.isdigit() or c == '-':
265
+ if c.isdigit() or c == '-' or (self._allow_extended_number_literals and c in '.+'):
215
266
  return self._do_number(c)
216
267
 
268
+ if self._allow_comments and c == '/':
269
+ return self._do_comment()
270
+
271
+ if self._allow_extended_idents:
272
+ return self._do_extended_ident(c)
273
+
217
274
  if c in 'tfnIN':
218
275
  return self._do_const(c)
219
276
 
220
- if self._allow_comments and c == '/':
221
- yield from self._do_comment()
222
- continue
223
-
224
277
  self._raise(f'Unexpected character: {c}')
225
278
 
226
279
  def _do_string(self, q: str):
@@ -269,7 +322,7 @@ class JsonStreamLexer(GenMachine[str, Token]):
269
322
  if not c:
270
323
  break
271
324
 
272
- if not (c.isdigit() or c in '.eE+-'):
325
+ if not (c.isdigit() or c in '.eE+-' or (self._allow_extended_number_literals and c in 'xXabcdefABCDEF')):
273
326
  break
274
327
  self._buf.write(c)
275
328
 
@@ -277,33 +330,58 @@ class JsonStreamLexer(GenMachine[str, Token]):
277
330
 
278
331
  #
279
332
 
280
- if not NUMBER_PAT.fullmatch(raw):
281
- # Can only be -Infinity
333
+ if self._allow_extended_number_literals:
334
+ p = 1 if raw[0] in '+-' else 0
335
+ if (len(raw) - p) > 1 and raw[p] == '0' and raw[p + 1].isdigit():
336
+ self._raise('Invalid number literal')
282
337
 
283
- if not c:
284
- self._raise('Unexpected end of input')
338
+ if raw == '-' or (self._allow_extended_number_literals and raw == '+'):
339
+ for svs in [
340
+ 'Infinity',
341
+ *(['NaN'] if self._allow_extended_number_literals else []),
342
+ ]:
343
+ if c != svs[0]:
344
+ continue
285
345
 
286
- raw += c
287
- try:
288
- for _ in range(7):
289
- raw += self._char_in((yield None)) # noqa
290
- except GeneratorExit:
291
- self._raise('Unexpected end of input')
346
+ if not c:
347
+ self._raise('Unexpected end of input')
292
348
 
293
- if raw != '-Infinity':
294
- self._raise(f'Invalid number format: {raw}')
349
+ raw += c
350
+ try:
351
+ for _ in range(len(svs) - 1):
352
+ c = self._char_in((yield None)) # noqa
353
+ if not c:
354
+ break
355
+ raw += c
356
+ except GeneratorExit:
357
+ self._raise('Unexpected end of input')
358
+
359
+ if raw[1:] != svs:
360
+ self._raise(f'Invalid number format: {raw}')
295
361
 
296
- tk, tv = CONST_TOKENS[raw]
297
- yield self._make_tok(tk, tv, raw, pos)
362
+ if raw[0] == '+':
363
+ raw = raw[1:]
298
364
 
299
- return self._do_main()
365
+ yield self._make_tok('IDENT', raw, raw, pos)
366
+
367
+ return self._do_main()
300
368
 
301
369
  #
302
370
 
303
- if '.' in raw or 'e' in raw or 'E' in raw:
304
- nv = float(raw)
371
+ nv: ta.Any
372
+
373
+ if (np := self._number_literal_parser) is not None:
374
+ nv = np(raw)
375
+
305
376
  else:
306
- nv = int(raw)
377
+ if not NUMBER_PAT.fullmatch(raw):
378
+ self._raise(f'Invalid number format: {raw}')
379
+
380
+ if '.' in raw or 'e' in raw or 'E' in raw:
381
+ nv = float(raw)
382
+ else:
383
+ nv = int(raw)
384
+
307
385
  yield self._make_tok('NUMBER', nv, raw, pos)
308
386
 
309
387
  #
@@ -322,17 +400,79 @@ class JsonStreamLexer(GenMachine[str, Token]):
322
400
  except GeneratorExit:
323
401
  self._raise('Unexpected end of input')
324
402
 
325
- if raw in CONST_TOKENS:
403
+ if raw in CONST_IDENT_VALUES:
326
404
  break
327
405
 
328
- if len(raw) > 8: # None of the keywords are longer than 8 characters
406
+ if len(raw) > MAX_CONST_IDENT_LEN:
329
407
  self._raise(f'Invalid literal: {raw}')
330
408
 
331
- tk, tv = CONST_TOKENS[raw]
332
- yield self._make_tok(tk, tv, raw, pos)
409
+ yield self._make_tok('IDENT', raw, raw, pos)
333
410
 
334
411
  return self._do_main()
335
412
 
413
+ def _do_unicode_escape(self):
414
+ try:
415
+ c = self._char_in((yield None)) # noqa
416
+ except GeneratorExit:
417
+ self._raise('Unexpected end of input')
418
+
419
+ if c != 'u':
420
+ self._raise('Illegal identifier escape')
421
+
422
+ ux = []
423
+ for _ in range(4):
424
+ try:
425
+ c = self._char_in((yield None)) # noqa
426
+ except GeneratorExit:
427
+ self._raise('Unexpected end of input')
428
+
429
+ if c not in '0123456789abcdefABCDEF':
430
+ self._raise('Illegal identifier escape')
431
+
432
+ ux.append(c)
433
+
434
+ return chr(int(''.join(ux), 16))
435
+
436
+ def _do_extended_ident(self, c: str):
437
+ check.state(self._buf.tell() == 0)
438
+
439
+ if c == '\\':
440
+ c = yield from self._do_unicode_escape()
441
+
442
+ elif not (c in '$_' or unicodedata.category(c).startswith('L')):
443
+ self._raise('Illegal identifier start')
444
+
445
+ self._buf.write(c)
446
+
447
+ pos = self.pos
448
+
449
+ while True:
450
+ try:
451
+ c = self._char_in((yield None)) # noqa
452
+ except GeneratorExit:
453
+ self._raise('Unexpected end of input')
454
+
455
+ if c == '\\':
456
+ c = yield from self._do_unicode_escape()
457
+ self._buf.write(c)
458
+ continue
459
+
460
+ if not c:
461
+ break
462
+
463
+ if c not in '$_\u200C\u200D':
464
+ uc = unicodedata.category(c)
465
+ if not (uc.startswith(('L', 'M', 'N')) or uc == 'Pc'):
466
+ break
467
+
468
+ self._buf.write(c)
469
+
470
+ raw = self._flip_buf()
471
+
472
+ yield self._make_tok('IDENT', raw, raw, pos)
473
+
474
+ return self._do_main(c)
475
+
336
476
  def _do_comment(self):
337
477
  check.state(self._buf.tell() == 0)
338
478
 
@@ -349,7 +489,7 @@ class JsonStreamLexer(GenMachine[str, Token]):
349
489
  except GeneratorExit:
350
490
  self._raise('Unexpected end of input')
351
491
 
352
- if ic == '\n':
492
+ if not ic or ic == '\n':
353
493
  break
354
494
 
355
495
  if self._include_comments:
@@ -360,6 +500,9 @@ class JsonStreamLexer(GenMachine[str, Token]):
360
500
  raw = f'//{cmt}\n' if self._include_raw else None
361
501
  yield self._make_tok('COMMENT', cmt, raw, pos)
362
502
 
503
+ if not ic:
504
+ return
505
+
363
506
  elif oc == '*':
364
507
  lc: str | None = None
365
508
  while True:
@@ -382,3 +525,5 @@ class JsonStreamLexer(GenMachine[str, Token]):
382
525
 
383
526
  else:
384
527
  self._raise(f'Unexpected character after comment start: {oc}')
528
+
529
+ return self._do_main()
@@ -4,6 +4,7 @@ import typing as ta
4
4
  from .... import lang
5
5
  from ....funcs.genmachine import GenMachine
6
6
  from .errors import JsonStreamError
7
+ from .lexing import CONST_IDENT_VALUES
7
8
  from .lexing import SCALAR_VALUE_TYPES
8
9
  from .lexing import VALUE_TOKEN_KINDS
9
10
  from .lexing import Position
@@ -95,11 +96,21 @@ class JsonStreamObject(list):
95
96
 
96
97
 
97
98
  class JsonStreamParser(GenMachine[Token, JsonStreamParserEvent]):
98
- def __init__(self) -> None:
99
- super().__init__(self._do_value())
99
+ def __init__(
100
+ self,
101
+ *,
102
+ allow_trailing_commas: bool = False,
103
+
104
+ allow_extended_idents: bool = False,
105
+ ) -> None:
106
+ self._allow_trailing_commas = allow_trailing_commas
107
+
108
+ self._allow_extended_idents = allow_extended_idents
100
109
 
101
110
  self._stack: list[ta.Literal['OBJECT', 'KEY', 'ARRAY']] = []
102
111
 
112
+ super().__init__(self._do_value())
113
+
103
114
  #
104
115
 
105
116
  def _next_tok(self):
@@ -144,12 +155,23 @@ class JsonStreamParser(GenMachine[Token, JsonStreamParserEvent]):
144
155
  raise JsonStreamParseError('Expected value') from None
145
156
  else:
146
157
  raise
158
+ # except Exception as e:
159
+ # raise
147
160
 
148
161
  if tok.kind in VALUE_TOKEN_KINDS:
149
162
  y, r = self._emit_event(tok.value)
150
163
  yield y
151
164
  return r
152
165
 
166
+ elif tok.kind == 'IDENT':
167
+ try:
168
+ cv = CONST_IDENT_VALUES[tok.value]
169
+ except KeyError:
170
+ raise JsonStreamParseError('Expected value', tok.pos) from None
171
+ y, r = self._emit_event(cv)
172
+ yield y
173
+ return r
174
+
153
175
  elif tok.kind == 'LBRACE':
154
176
  y, r = self._emit_begin_object()
155
177
  yield y
@@ -193,7 +215,7 @@ class JsonStreamParser(GenMachine[Token, JsonStreamParserEvent]):
193
215
  except GeneratorExit:
194
216
  raise JsonStreamParseError('Expected object body') from None
195
217
 
196
- if tok.kind == 'STRING':
218
+ if tok.kind == 'STRING' or (self._allow_trailing_commas and tok.kind == 'IDENT'):
197
219
  k = tok.value
198
220
 
199
221
  try:
@@ -225,7 +247,7 @@ class JsonStreamParser(GenMachine[Token, JsonStreamParserEvent]):
225
247
  raise JsonStreamParseError('Expected continuation') from None
226
248
 
227
249
  if tok.kind == 'COMMA':
228
- return self._do_object_body(must_be_present=True)
250
+ return self._do_object_body(must_be_present=not self._allow_trailing_commas)
229
251
 
230
252
  elif tok.kind == 'RBRACE':
231
253
  y, r = self._emit_end_object()
@@ -258,7 +280,7 @@ class JsonStreamParser(GenMachine[Token, JsonStreamParserEvent]):
258
280
  raise JsonStreamParseError('Expected continuation') from None
259
281
 
260
282
  if tok.kind == 'COMMA':
261
- return self._do_value(must_be_present=True)
283
+ return self._do_value(must_be_present=not self._allow_trailing_commas)
262
284
 
263
285
  elif tok.kind == 'RBRACKET':
264
286
  y, r = self._emit_end_array()
@@ -23,63 +23,136 @@ TODO:
23
23
  - Names and values separated by = or => instead of :.
24
24
  - Name/value pairs separated by ; instead of ,.
25
25
  """
26
- import dataclasses as dc
26
+ import itertools
27
27
  import typing as ta
28
28
 
29
29
  from .... import lang
30
30
  from .building import JsonValueBuilder
31
+ from .errors import JsonStreamError
31
32
  from .lexing import JsonStreamLexer
33
+ from .lexing import Token
32
34
  from .parsing import JsonStreamParser
35
+ from .parsing import JsonStreamParserEvent
33
36
 
34
37
 
35
38
  ##
36
39
 
37
40
 
38
- @dc.dataclass(kw_only=True)
39
41
  class JsonStreamValueParser(lang.ExitStacked):
40
- include_raw: bool = False
41
- yield_object_lists: bool = False
42
+ class Machinery(ta.NamedTuple):
43
+ lex: JsonStreamLexer
44
+ parse: JsonStreamParser
45
+ build: JsonValueBuilder
42
46
 
43
- json5: bool = False
47
+ def __init__(self, m: Machinery) -> None:
48
+ super().__init__()
44
49
 
45
- #
50
+ self._m = m
46
51
 
47
- _lex: JsonStreamLexer = dc.field(init=False)
48
- _parse: JsonStreamParser = dc.field(init=False)
49
- _build: JsonValueBuilder = dc.field(init=False)
52
+ #
50
53
 
51
54
  def _enter_contexts(self) -> None:
52
- self._lex = JsonStreamLexer(
53
- include_raw=self.include_raw,
54
- allow_comments=self.json5,
55
- allow_single_quotes=self.json5,
56
- )
55
+ self._enter_context(self._m.lex)
56
+ self._enter_context(self._m.parse)
57
+
58
+ def feed(self, i: ta.Iterable[str]) -> ta.Iterator[ta.Any]:
59
+ for c in i:
60
+ for t in self._m.lex(c):
61
+ for e in self._m.parse(t):
62
+ for v in self._m.build(e): # noqa
63
+ yield v
64
+
65
+ #
66
+
67
+ @classmethod
68
+ def parse_values(
69
+ cls,
70
+ m: Machinery,
71
+ i: ta.Iterable[str],
72
+ ) -> ta.Iterator[ta.Any]:
73
+ with cls(m) as p:
74
+ yield from p.feed(itertools.chain(i, ['']))
75
+
76
+ @classmethod
77
+ def parse_one_value(
78
+ cls,
79
+ m: Machinery,
80
+ i: ta.Iterable[str],
81
+ ) -> ta.Any:
82
+ with cls(m) as p:
83
+ return next(p.feed(itertools.chain(i, [''])))
84
+
85
+ @classmethod
86
+ def parse_exactly_one_value(
87
+ cls,
88
+ m: Machinery,
89
+ i: ta.Iterable[str],
90
+ ) -> ta.Any:
91
+ r: ta.Any
92
+ r = not_set = object()
93
+ with cls(m) as p:
94
+ for v in p.feed(itertools.chain(i, [''])):
95
+ if r is not_set:
96
+ r = v
97
+ else:
98
+ raise JsonStreamError('Unexpected input')
99
+ if r is not_set:
100
+ raise JsonStreamError('No value')
101
+ return r
102
+
103
+
104
+ ##
105
+
57
106
 
58
- self._parse = JsonStreamParser()
107
+ class DebugJsonStreamValueParser(JsonStreamValueParser):
108
+ def __init__(self, m: JsonStreamValueParser.Machinery) -> None:
109
+ super().__init__(m)
59
110
 
60
- self._build = JsonValueBuilder(
61
- yield_object_lists=self.yield_object_lists,
62
- )
111
+ self._chars: list[str] = []
112
+ self._tokens: list[Token] = []
113
+ self._events: list[JsonStreamParserEvent] = []
114
+ self._values: list[ta.Any] = []
63
115
 
64
116
  def feed(self, i: ta.Iterable[str]) -> ta.Iterator[ta.Any]:
65
117
  for c in i:
66
- for t in self._lex(c):
67
- for e in self._parse(t):
68
- for v in self._build(e): # noqa
118
+ self._chars.append(c)
119
+ for t in self._m.lex(c):
120
+ self._tokens.append(t)
121
+ for e in self._m.parse(t):
122
+ self._events.append(e)
123
+ for v in self._m.build(e):
124
+ self._values.append(v)
69
125
  yield v
70
126
 
71
127
 
72
- def stream_parse_values(
73
- i: ta.Iterable[str],
74
- **kwargs: ta.Any,
75
- ) -> ta.Generator[ta.Any]:
76
- with JsonStreamValueParser(**kwargs) as p:
77
- yield from p.feed(i)
128
+ ##
129
+
130
+
131
+ def make_machinery(
132
+ *,
133
+ include_raw: bool = False,
134
+ yield_object_lists: bool = False,
135
+ ) -> JsonStreamValueParser.Machinery:
136
+ return JsonStreamValueParser.Machinery(
137
+ JsonStreamLexer(
138
+ include_raw=include_raw,
139
+ ),
140
+
141
+ JsonStreamParser(),
142
+
143
+ JsonValueBuilder(
144
+ yield_object_lists=yield_object_lists,
145
+ ),
146
+ )
147
+
148
+
149
+ def stream_parse_values(i: ta.Iterable[str], **kwargs: ta.Any) -> ta.Iterator[ta.Any]:
150
+ return JsonStreamValueParser.parse_values(make_machinery(**kwargs), i)
151
+
152
+
153
+ def stream_parse_one_value(i: ta.Iterable[str], **kwargs: ta.Any) -> ta.Any:
154
+ return JsonStreamValueParser.parse_one_value(make_machinery(**kwargs), i)
78
155
 
79
156
 
80
- def stream_parse_one_value(
81
- i: ta.Iterable[str],
82
- **kwargs: ta.Any,
83
- ) -> ta.Any:
84
- with JsonStreamValueParser(**kwargs) as p:
85
- return next(p.feed(i))
157
+ def stream_parse_exactly_one_value(i: ta.Iterable[str], **kwargs: ta.Any) -> ta.Any:
158
+ return JsonStreamValueParser.parse_exactly_one_value(make_machinery(**kwargs), i)
@@ -124,7 +124,10 @@ def parse_string_literal(s: str) -> str:
124
124
  def parse_number_literal(s: str) -> int | float:
125
125
  s = s.lower()
126
126
 
127
- if 'x' in s:
128
- return int(s, 16)
129
- else:
130
- return float(s)
127
+ try:
128
+ if 'x' in s:
129
+ return int(s, 16)
130
+ else:
131
+ return float(s)
132
+ except ValueError as e:
133
+ raise Json5Error from e