omlish 0.0.0.dev267__py3-none-any.whl → 0.0.0.dev269__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. omlish/__about__.py +2 -2
  2. omlish/dataclasses/__init__.py +7 -0
  3. omlish/dataclasses/impl/metaclass.py +1 -0
  4. omlish/dataclasses/impl/simple.py +1 -1
  5. omlish/formats/toml/parser.py +593 -594
  6. omlish/lang/classes/abstract.py +3 -0
  7. omlish/lang/comparison.py +3 -0
  8. omlish/lang/datetimes.py +3 -0
  9. omlish/lang/generators.py +2 -2
  10. omlish/lang/maybes.py +3 -0
  11. omlish/lang/resolving.py +3 -0
  12. omlish/lang/sys.py +3 -0
  13. omlish/marshal/objects/dataclasses.py +1 -1
  14. omlish/marshal/polymorphism/metadata.py +9 -3
  15. omlish/sql/alchemy/__init__.py +31 -0
  16. omlish/sql/alchemy/apiadapter.py +121 -0
  17. omlish/sql/api/__init__.py +39 -0
  18. omlish/sql/api/base.py +1 -0
  19. omlish/sql/parsing/parsing.py +1 -1
  20. omlish/sql/queries/__init__.py +4 -0
  21. omlish/sql/queries/base.py +113 -2
  22. omlish/sql/queries/exprs.py +15 -2
  23. omlish/sql/queries/inserts.py +2 -1
  24. omlish/sql/queries/marshal.py +7 -1
  25. omlish/sql/queries/params.py +3 -2
  26. omlish/sql/queries/rendering.py +16 -4
  27. omlish/sql/queries/selects.py +17 -2
  28. omlish/testing/testing.py +0 -1
  29. {omlish-0.0.0.dev267.dist-info → omlish-0.0.0.dev269.dist-info}/METADATA +1 -1
  30. {omlish-0.0.0.dev267.dist-info → omlish-0.0.0.dev269.dist-info}/RECORD +34 -33
  31. {omlish-0.0.0.dev267.dist-info → omlish-0.0.0.dev269.dist-info}/WHEEL +0 -0
  32. {omlish-0.0.0.dev267.dist-info → omlish-0.0.0.dev269.dist-info}/entry_points.txt +0 -0
  33. {omlish-0.0.0.dev267.dist-info → omlish-0.0.0.dev269.dist-info}/licenses/LICENSE +0 -0
  34. {omlish-0.0.0.dev267.dist-info → omlish-0.0.0.dev269.dist-info}/top_level.txt +0 -0
@@ -57,75 +57,6 @@ TomlPos = int # ta.TypeAlias
57
57
  ##
58
58
 
59
59
 
60
- _TOML_TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
61
-
62
- TOML_RE_NUMBER = re.compile(
63
- r"""
64
- 0
65
- (?:
66
- x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
67
- |
68
- b[01](?:_?[01])* # bin
69
- |
70
- o[0-7](?:_?[0-7])* # oct
71
- )
72
- |
73
- [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
74
- (?P<floatpart>
75
- (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
76
- (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
77
- )
78
- """,
79
- flags=re.VERBOSE,
80
- )
81
- TOML_RE_LOCALTIME = re.compile(_TOML_TIME_RE_STR)
82
- TOML_RE_DATETIME = re.compile(
83
- rf"""
84
- ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
85
- (?:
86
- [Tt ]
87
- {_TOML_TIME_RE_STR}
88
- (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
89
- )?
90
- """,
91
- flags=re.VERBOSE,
92
- )
93
-
94
-
95
- def toml_match_to_datetime(match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
96
- """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
97
-
98
- Raises ValueError if the match does not correspond to a valid date or datetime.
99
- """
100
- (
101
- year_str,
102
- month_str,
103
- day_str,
104
- hour_str,
105
- minute_str,
106
- sec_str,
107
- micros_str,
108
- zulu_time,
109
- offset_sign_str,
110
- offset_hour_str,
111
- offset_minute_str,
112
- ) = match.groups()
113
- year, month, day = int(year_str), int(month_str), int(day_str)
114
- if hour_str is None:
115
- return datetime.date(year, month, day)
116
- hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
117
- micros = int(micros_str.ljust(6, '0')) if micros_str else 0
118
- if offset_sign_str:
119
- tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
120
- offset_hour_str, offset_minute_str, offset_sign_str,
121
- )
122
- elif zulu_time:
123
- tz = datetime.UTC
124
- else: # local date-time
125
- tz = None
126
- return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
127
-
128
-
129
60
  @functools.lru_cache() # noqa
130
61
  def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.timezone:
131
62
  sign = 1 if sign_str == '+' else -1
@@ -137,47 +68,25 @@ def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.ti
137
68
  )
138
69
 
139
70
 
140
- def toml_match_to_localtime(match: re.Match) -> datetime.time:
141
- hour_str, minute_str, sec_str, micros_str = match.groups()
142
- micros = int(micros_str.ljust(6, '0')) if micros_str else 0
143
- return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
144
-
145
-
146
- def toml_match_to_number(match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
147
- if match.group('floatpart'):
148
- return parse_float(match.group())
149
- return int(match.group(), 0)
150
-
151
-
152
- TOML_ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
153
-
154
- # Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the parser
155
- # functions.
156
- TOML_ILLEGAL_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t')
157
- TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t\n')
71
+ def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
72
+ """
73
+ A decorator to make `parse_float` safe.
158
74
 
159
- TOML_ILLEGAL_LITERAL_STR_CHARS = TOML_ILLEGAL_BASIC_STR_CHARS
160
- TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
75
+ `parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
76
+ thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
77
+ """
161
78
 
162
- TOML_ILLEGAL_COMMENT_CHARS = TOML_ILLEGAL_BASIC_STR_CHARS
79
+ # The default `float` callable never returns illegal types. Optimize it.
80
+ if parse_float is float:
81
+ return float
163
82
 
164
- TOML_WS = frozenset(' \t')
165
- TOML_WS_AND_NEWLINE = TOML_WS | frozenset('\n')
166
- TOML_BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
167
- TOML_KEY_INITIAL_CHARS = TOML_BARE_KEY_CHARS | frozenset("\"'")
168
- TOML_HEXDIGIT_CHARS = frozenset(string.hexdigits)
83
+ def safe_parse_float(float_str: str) -> ta.Any:
84
+ float_value = parse_float(float_str)
85
+ if isinstance(float_value, (dict, list)):
86
+ raise ValueError('parse_float must not return dicts or lists') # noqa
87
+ return float_value
169
88
 
170
- TOML_BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType(
171
- {
172
- '\\b': '\u0008', # backspace
173
- '\\t': '\u0009', # tab
174
- '\\n': '\u000A', # linefeed
175
- '\\f': '\u000C', # form feed
176
- '\\r': '\u000D', # carriage return
177
- '\\"': '\u0022', # quote
178
- '\\\\': '\u005C', # backslash
179
- },
180
- )
89
+ return safe_parse_float
181
90
 
182
91
 
183
92
  class TomlDecodeError(ValueError):
@@ -202,63 +111,15 @@ def toml_loads(s: str, /, *, parse_float: TomlParseFloat = float) -> ta.Dict[str
202
111
  src = s.replace('\r\n', '\n')
203
112
  except (AttributeError, TypeError):
204
113
  raise TypeError(f"Expected str object, not '{type(s).__qualname__}'") from None
205
- pos = 0
206
- out = TomlOutput(TomlNestedDict(), TomlFlags())
207
- header: TomlKey = ()
114
+
208
115
  parse_float = toml_make_safe_parse_float(parse_float)
209
116
 
210
- # Parse one statement at a time (typically means one line in TOML source)
211
- while True:
212
- # 1. Skip line leading whitespace
213
- pos = toml_skip_chars(src, pos, TOML_WS)
214
-
215
- # 2. Parse rules. Expect one of the following:
216
- # - end of file
217
- # - end of line
218
- # - comment
219
- # - key/value pair
220
- # - append dict to list (and move to its namespace)
221
- # - create dict (and move to its namespace)
222
- # Skip trailing whitespace when applicable.
223
- try:
224
- char = src[pos]
225
- except IndexError:
226
- break
227
- if char == '\n':
228
- pos += 1
229
- continue
230
- if char in TOML_KEY_INITIAL_CHARS:
231
- pos = toml_key_value_rule(src, pos, out, header, parse_float)
232
- pos = toml_skip_chars(src, pos, TOML_WS)
233
- elif char == '[':
234
- try:
235
- second_char: ta.Optional[str] = src[pos + 1]
236
- except IndexError:
237
- second_char = None
238
- out.flags.finalize_pending()
239
- if second_char == '[':
240
- pos, header = toml_create_list_rule(src, pos, out)
241
- else:
242
- pos, header = toml_create_dict_rule(src, pos, out)
243
- pos = toml_skip_chars(src, pos, TOML_WS)
244
- elif char != '#':
245
- raise toml_suffixed_err(src, pos, 'Invalid statement')
246
-
247
- # 3. Skip comment
248
- pos = toml_skip_comment(src, pos)
249
-
250
- # 4. Expect end of line or end of file
251
- try:
252
- char = src[pos]
253
- except IndexError:
254
- break
255
- if char != '\n':
256
- raise toml_suffixed_err(
257
- src, pos, 'Expected newline or end of document after a statement',
258
- )
259
- pos += 1
117
+ parser = TomlParser(
118
+ src,
119
+ parse_float=parse_float,
120
+ )
260
121
 
261
- return out.data.dict
122
+ return parser.parse()
262
123
 
263
124
 
264
125
  class TomlFlags:
@@ -270,6 +131,8 @@ class TomlFlags:
270
131
  EXPLICIT_NEST = 1
271
132
 
272
133
  def __init__(self) -> None:
134
+ super().__init__()
135
+
273
136
  self._flags: ta.Dict[str, dict] = {}
274
137
  self._pending_flags: ta.Set[ta.Tuple[TomlKey, int]] = set()
275
138
 
@@ -320,6 +183,8 @@ class TomlFlags:
320
183
 
321
184
  class TomlNestedDict:
322
185
  def __init__(self) -> None:
186
+ super().__init__()
187
+
323
188
  # The parsed content of the TOML document
324
189
  self.dict: ta.Dict[str, ta.Any] = {}
325
190
 
@@ -352,476 +217,610 @@ class TomlNestedDict:
352
217
  cont[last_key] = [{}]
353
218
 
354
219
 
355
- class TomlOutput(ta.NamedTuple):
356
- data: TomlNestedDict
357
- flags: TomlFlags
220
+ class TomlParser:
221
+ def __init__(
222
+ self,
223
+ src: str,
224
+ *,
225
+ parse_float: TomlParseFloat = float,
226
+ ) -> None:
227
+ super().__init__()
358
228
 
229
+ self.src = src
359
230
 
360
- def toml_skip_chars(src: str, pos: TomlPos, chars: ta.Iterable[str]) -> TomlPos:
361
- try:
362
- while src[pos] in chars:
363
- pos += 1
364
- except IndexError:
365
- pass
366
- return pos
367
-
368
-
369
- def toml_skip_until(
370
- src: str,
371
- pos: TomlPos,
372
- expect: str,
373
- *,
374
- error_on: ta.FrozenSet[str],
375
- error_on_eof: bool,
376
- ) -> TomlPos:
377
- try:
378
- new_pos = src.index(expect, pos)
379
- except ValueError:
380
- new_pos = len(src)
381
- if error_on_eof:
382
- raise toml_suffixed_err(src, new_pos, f'Expected {expect!r}') from None
231
+ self.parse_float = parse_float
383
232
 
384
- if not error_on.isdisjoint(src[pos:new_pos]):
385
- while src[pos] not in error_on:
386
- pos += 1
387
- raise toml_suffixed_err(src, pos, f'Found invalid character {src[pos]!r}')
388
- return new_pos
233
+ self.data = TomlNestedDict()
234
+ self.flags = TomlFlags()
235
+ self.pos = 0
389
236
 
237
+ ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
390
238
 
391
- def toml_skip_comment(src: str, pos: TomlPos) -> TomlPos:
392
- try:
393
- char: ta.Optional[str] = src[pos]
394
- except IndexError:
395
- char = None
396
- if char == '#':
397
- return toml_skip_until(
398
- src, pos + 1, '\n', error_on=TOML_ILLEGAL_COMMENT_CHARS, error_on_eof=False,
399
- )
400
- return pos
239
+ # Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the
240
+ # parser functions.
241
+ ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t')
242
+ ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t\n')
243
+
244
+ ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
245
+ ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
401
246
 
247
+ ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
402
248
 
403
- def toml_skip_comments_and_array_ws(src: str, pos: TomlPos) -> TomlPos:
404
- while True:
405
- pos_before_skip = pos
406
- pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
407
- pos = toml_skip_comment(src, pos)
408
- if pos == pos_before_skip:
409
- return pos
249
+ WS = frozenset(' \t')
250
+ WS_AND_NEWLINE = WS | frozenset('\n')
251
+ BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
252
+ KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
253
+ HEXDIGIT_CHARS = frozenset(string.hexdigits)
410
254
 
255
+ BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType({
256
+ '\\b': '\u0008', # backspace
257
+ '\\t': '\u0009', # tab
258
+ '\\n': '\u000A', # linefeed
259
+ '\\f': '\u000C', # form feed
260
+ '\\r': '\u000D', # carriage return
261
+ '\\"': '\u0022', # quote
262
+ '\\\\': '\u005C', # backslash
263
+ })
264
+
265
+ def parse(self) -> ta.Dict[str, ta.Any]: # noqa: C901
266
+ header: TomlKey = ()
267
+
268
+ # Parse one statement at a time (typically means one line in TOML source)
269
+ while True:
270
+ # 1. Skip line leading whitespace
271
+ self.skip_chars(self.WS)
272
+
273
+ # 2. Parse rules. Expect one of the following:
274
+ # - end of file
275
+ # - end of line
276
+ # - comment
277
+ # - key/value pair
278
+ # - append dict to list (and move to its namespace)
279
+ # - create dict (and move to its namespace)
280
+ # Skip trailing whitespace when applicable.
281
+ try:
282
+ char = self.src[self.pos]
283
+ except IndexError:
284
+ break
285
+ if char == '\n':
286
+ self.pos += 1
287
+ continue
288
+ if char in self.KEY_INITIAL_CHARS:
289
+ self.key_value_rule(header)
290
+ self.skip_chars(self.WS)
291
+ elif char == '[':
292
+ try:
293
+ second_char: ta.Optional[str] = self.src[self.pos + 1]
294
+ except IndexError:
295
+ second_char = None
296
+ self.flags.finalize_pending()
297
+ if second_char == '[':
298
+ header = self.create_list_rule()
299
+ else:
300
+ header = self.create_dict_rule()
301
+ self.skip_chars(self.WS)
302
+ elif char != '#':
303
+ raise self.suffixed_err('Invalid statement')
304
+
305
+ # 3. Skip comment
306
+ self.skip_comment()
307
+
308
+ # 4. Expect end of line or end of file
309
+ try:
310
+ char = self.src[self.pos]
311
+ except IndexError:
312
+ break
313
+ if char != '\n':
314
+ raise self.suffixed_err('Expected newline or end of document after a statement')
315
+ self.pos += 1
411
316
 
412
- def toml_create_dict_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
413
- pos += 1 # Skip "["
414
- pos = toml_skip_chars(src, pos, TOML_WS)
415
- pos, key = toml_parse_key(src, pos)
317
+ return self.data.dict
416
318
 
417
- if out.flags.is_(key, TomlFlags.EXPLICIT_NEST) or out.flags.is_(key, TomlFlags.FROZEN):
418
- raise toml_suffixed_err(src, pos, f'Cannot declare {key} twice')
419
- out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
420
- try:
421
- out.data.get_or_create_nest(key)
422
- except KeyError:
423
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
424
-
425
- if not src.startswith(']', pos):
426
- raise toml_suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
427
- return pos + 1, key
428
-
429
-
430
- def toml_create_list_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
431
- pos += 2 # Skip "[["
432
- pos = toml_skip_chars(src, pos, TOML_WS)
433
- pos, key = toml_parse_key(src, pos)
434
-
435
- if out.flags.is_(key, TomlFlags.FROZEN):
436
- raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
437
- # Free the namespace now that it points to another empty list item...
438
- out.flags.unset_all(key)
439
- # ...but this key precisely is still prohibited from table declaration
440
- out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
441
- try:
442
- out.data.append_nest_to_list(key)
443
- except KeyError:
444
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
445
-
446
- if not src.startswith(']]', pos):
447
- raise toml_suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
448
- return pos + 2, key
449
-
450
-
451
- def toml_key_value_rule(
452
- src: str,
453
- pos: TomlPos,
454
- out: TomlOutput,
455
- header: TomlKey,
456
- parse_float: TomlParseFloat,
457
- ) -> TomlPos:
458
- pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
459
- key_parent, key_stem = key[:-1], key[-1]
460
- abs_key_parent = header + key_parent
461
-
462
- relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
463
- for cont_key in relative_path_cont_keys:
464
- # Check that dotted key syntax does not redefine an existing table
465
- if out.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
466
- raise toml_suffixed_err(src, pos, f'Cannot redefine namespace {cont_key}')
467
- # Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in following
468
- # table sections.
469
- out.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
470
-
471
- if out.flags.is_(abs_key_parent, TomlFlags.FROZEN):
472
- raise toml_suffixed_err(
473
- src,
474
- pos,
475
- f'Cannot mutate immutable namespace {abs_key_parent}',
476
- )
319
+ def skip_chars(self, chars: ta.Iterable[str]) -> None:
320
+ try:
321
+ while self.src[self.pos] in chars:
322
+ self.pos += 1
323
+ except IndexError:
324
+ pass
477
325
 
478
- try:
479
- nest = out.data.get_or_create_nest(abs_key_parent)
480
- except KeyError:
481
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
482
- if key_stem in nest:
483
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value')
484
- # Mark inline table and array namespaces recursively immutable
485
- if isinstance(value, (dict, list)):
486
- out.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
487
- nest[key_stem] = value
488
- return pos
489
-
490
-
491
- def toml_parse_key_value_pair(
492
- src: str,
493
- pos: TomlPos,
494
- parse_float: TomlParseFloat,
495
- ) -> ta.Tuple[TomlPos, TomlKey, ta.Any]:
496
- pos, key = toml_parse_key(src, pos)
497
- try:
498
- char: ta.Optional[str] = src[pos]
499
- except IndexError:
500
- char = None
501
- if char != '=':
502
- raise toml_suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
503
- pos += 1
504
- pos = toml_skip_chars(src, pos, TOML_WS)
505
- pos, value = toml_parse_value(src, pos, parse_float)
506
- return pos, key, value
507
-
508
-
509
- def toml_parse_key(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, TomlKey]:
510
- pos, key_part = toml_parse_key_part(src, pos)
511
- key: TomlKey = (key_part,)
512
- pos = toml_skip_chars(src, pos, TOML_WS)
513
- while True:
326
+ def skip_until(
327
+ self,
328
+ expect: str,
329
+ *,
330
+ error_on: ta.FrozenSet[str],
331
+ error_on_eof: bool,
332
+ ) -> None:
514
333
  try:
515
- char: ta.Optional[str] = src[pos]
334
+ new_pos = self.src.index(expect, self.pos)
335
+ except ValueError:
336
+ new_pos = len(self.src)
337
+ if error_on_eof:
338
+ raise self.suffixed_err(f'Expected {expect!r}', pos=new_pos) from None
339
+
340
+ if not error_on.isdisjoint(self.src[self.pos:new_pos]):
341
+ while self.src[self.pos] not in error_on:
342
+ self.pos += 1
343
+ raise self.suffixed_err(f'Found invalid character {self.src[self.pos]!r}')
344
+ self.pos = new_pos
345
+
346
+ def skip_comment(self) -> None:
347
+ try:
348
+ char: ta.Optional[str] = self.src[self.pos]
516
349
  except IndexError:
517
350
  char = None
518
- if char != '.':
519
- return pos, key
520
- pos += 1
521
- pos = toml_skip_chars(src, pos, TOML_WS)
522
- pos, key_part = toml_parse_key_part(src, pos)
523
- key += (key_part,)
524
- pos = toml_skip_chars(src, pos, TOML_WS)
351
+ if char == '#':
352
+ self.pos += 1
353
+ self.skip_until(
354
+ '\n',
355
+ error_on=self.ILLEGAL_COMMENT_CHARS,
356
+ error_on_eof=False,
357
+ )
525
358
 
359
+ def skip_comments_and_array_ws(self) -> None:
360
+ while True:
361
+ pos_before_skip = self.pos
362
+ self.skip_chars(self.WS_AND_NEWLINE)
363
+ self.skip_comment()
364
+ if self.pos == pos_before_skip:
365
+ return
526
366
 
527
- def toml_parse_key_part(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
528
- try:
529
- char: ta.Optional[str] = src[pos]
530
- except IndexError:
531
- char = None
532
- if char in TOML_BARE_KEY_CHARS:
533
- start_pos = pos
534
- pos = toml_skip_chars(src, pos, TOML_BARE_KEY_CHARS)
535
- return pos, src[start_pos:pos]
536
- if char == "'":
537
- return toml_parse_literal_str(src, pos)
538
- if char == '"':
539
- return toml_parse_one_line_basic_str(src, pos)
540
- raise toml_suffixed_err(src, pos, 'Invalid initial character for a key part')
541
-
542
-
543
- def toml_parse_one_line_basic_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
544
- pos += 1
545
- return toml_parse_basic_str(src, pos, multiline=False)
546
-
547
-
548
- def toml_parse_array(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, list]:
549
- pos += 1
550
- array: list = []
551
-
552
- pos = toml_skip_comments_and_array_ws(src, pos)
553
- if src.startswith(']', pos):
554
- return pos + 1, array
555
- while True:
556
- pos, val = toml_parse_value(src, pos, parse_float)
557
- array.append(val)
558
- pos = toml_skip_comments_and_array_ws(src, pos)
559
-
560
- c = src[pos:pos + 1]
561
- if c == ']':
562
- return pos + 1, array
563
- if c != ',':
564
- raise toml_suffixed_err(src, pos, 'Unclosed array')
565
- pos += 1
566
-
567
- pos = toml_skip_comments_and_array_ws(src, pos)
568
- if src.startswith(']', pos):
569
- return pos + 1, array
570
-
571
-
572
- def toml_parse_inline_table(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, dict]:
573
- pos += 1
574
- nested_dict = TomlNestedDict()
575
- flags = TomlFlags()
576
-
577
- pos = toml_skip_chars(src, pos, TOML_WS)
578
- if src.startswith('}', pos):
579
- return pos + 1, nested_dict.dict
580
- while True:
581
- pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
582
- key_parent, key_stem = key[:-1], key[-1]
583
- if flags.is_(key, TomlFlags.FROZEN):
584
- raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
367
+ def create_dict_rule(self) -> TomlKey:
368
+ self.pos += 1 # Skip "["
369
+ self.skip_chars(self.WS)
370
+ key = self.parse_key()
371
+
372
+ if self.flags.is_(key, TomlFlags.EXPLICIT_NEST) or self.flags.is_(key, TomlFlags.FROZEN):
373
+ raise self.suffixed_err(f'Cannot declare {key} twice')
374
+ self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
585
375
  try:
586
- nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
376
+ self.data.get_or_create_nest(key)
587
377
  except KeyError:
588
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
589
- if key_stem in nest:
590
- raise toml_suffixed_err(src, pos, f'Duplicate inline table key {key_stem!r}')
591
- nest[key_stem] = value
592
- pos = toml_skip_chars(src, pos, TOML_WS)
593
- c = src[pos:pos + 1]
594
- if c == '}':
595
- return pos + 1, nested_dict.dict
596
- if c != ',':
597
- raise toml_suffixed_err(src, pos, 'Unclosed inline table')
598
- if isinstance(value, (dict, list)):
599
- flags.set(key, TomlFlags.FROZEN, recursive=True)
600
- pos += 1
601
- pos = toml_skip_chars(src, pos, TOML_WS)
602
-
603
-
604
- def toml_parse_basic_str_escape(
605
- src: str,
606
- pos: TomlPos,
607
- *,
608
- multiline: bool = False,
609
- ) -> ta.Tuple[TomlPos, str]:
610
- escape_id = src[pos:pos + 2]
611
- pos += 2
612
- if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
613
- # Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found before
614
- # newline.
615
- if escape_id != '\\\n':
616
- pos = toml_skip_chars(src, pos, TOML_WS)
617
- try:
618
- char = src[pos]
619
- except IndexError:
620
- return pos, ''
621
- if char != '\n':
622
- raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string")
623
- pos += 1
624
- pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
625
- return pos, ''
626
- if escape_id == '\\u':
627
- return toml_parse_hex_char(src, pos, 4)
628
- if escape_id == '\\U':
629
- return toml_parse_hex_char(src, pos, 8)
630
- try:
631
- return pos, TOML_BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
632
- except KeyError:
633
- raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string") from None
378
+ raise self.suffixed_err('Cannot overwrite a value') from None
379
+
380
+ if not self.src.startswith(']', self.pos):
381
+ raise self.suffixed_err("Expected ']' at the end of a table declaration")
382
+ self.pos += 1
383
+ return key
384
+
385
+ def create_list_rule(self) -> TomlKey:
386
+ self.pos += 2 # Skip "[["
387
+ self.skip_chars(self.WS)
388
+ key = self.parse_key()
389
+
390
+ if self.flags.is_(key, TomlFlags.FROZEN):
391
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
392
+ # Free the namespace now that it points to another empty list item...
393
+ self.flags.unset_all(key)
394
+ # ...but this key precisely is still prohibited from table declaration
395
+ self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
396
+ try:
397
+ self.data.append_nest_to_list(key)
398
+ except KeyError:
399
+ raise self.suffixed_err('Cannot overwrite a value') from None
634
400
 
401
+ if not self.src.startswith(']]', self.pos):
402
+ raise self.suffixed_err("Expected ']]' at the end of an array declaration")
403
+ self.pos += 2
404
+ return key
635
405
 
636
- def toml_parse_basic_str_escape_multiline(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
637
- return toml_parse_basic_str_escape(src, pos, multiline=True)
406
+ def key_value_rule(self, header: TomlKey) -> None:
407
+ key, value = self.parse_key_value_pair()
408
+ key_parent, key_stem = key[:-1], key[-1]
409
+ abs_key_parent = header + key_parent
638
410
 
411
+ relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
412
+ for cont_key in relative_path_cont_keys:
413
+ # Check that dotted key syntax does not redefine an existing table
414
+ if self.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
415
+ raise self.suffixed_err(f'Cannot redefine namespace {cont_key}')
416
+ # Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in
417
+ # following table sections.
418
+ self.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
639
419
 
640
- def toml_parse_hex_char(src: str, pos: TomlPos, hex_len: int) -> ta.Tuple[TomlPos, str]:
641
- hex_str = src[pos:pos + hex_len]
642
- if len(hex_str) != hex_len or not TOML_HEXDIGIT_CHARS.issuperset(hex_str):
643
- raise toml_suffixed_err(src, pos, 'Invalid hex value')
644
- pos += hex_len
645
- hex_int = int(hex_str, 16)
646
- if not toml_is_unicode_scalar_value(hex_int):
647
- raise toml_suffixed_err(src, pos, 'Escaped character is not a Unicode scalar value')
648
- return pos, chr(hex_int)
420
+ if self.flags.is_(abs_key_parent, TomlFlags.FROZEN):
421
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {abs_key_parent}')
649
422
 
423
+ try:
424
+ nest = self.data.get_or_create_nest(abs_key_parent)
425
+ except KeyError:
426
+ raise self.suffixed_err('Cannot overwrite a value') from None
427
+ if key_stem in nest:
428
+ raise self.suffixed_err('Cannot overwrite a value')
429
+ # Mark inline table and array namespaces recursively immutable
430
+ if isinstance(value, (dict, list)):
431
+ self.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
432
+ nest[key_stem] = value
650
433
 
651
- def toml_parse_literal_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
652
- pos += 1 # Skip starting apostrophe
653
- start_pos = pos
654
- pos = toml_skip_until(
655
- src, pos, "'", error_on=TOML_ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True,
656
- )
657
- return pos + 1, src[start_pos:pos] # Skip ending apostrophe
658
-
659
-
660
- def toml_parse_multiline_str(src: str, pos: TomlPos, *, literal: bool) -> ta.Tuple[TomlPos, str]:
661
- pos += 3
662
- if src.startswith('\n', pos):
663
- pos += 1
664
-
665
- if literal:
666
- delim = "'"
667
- end_pos = toml_skip_until(
668
- src,
669
- pos,
670
- "'''",
671
- error_on=TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
672
- error_on_eof=True,
673
- )
674
- result = src[pos:end_pos]
675
- pos = end_pos + 3
676
- else:
677
- delim = '"'
678
- pos, result = toml_parse_basic_str(src, pos, multiline=True)
679
-
680
- # Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
681
- if not src.startswith(delim, pos):
682
- return pos, result
683
- pos += 1
684
- if not src.startswith(delim, pos):
685
- return pos, result + delim
686
- pos += 1
687
- return pos, result + (delim * 2)
688
-
689
-
690
- def toml_parse_basic_str(src: str, pos: TomlPos, *, multiline: bool) -> ta.Tuple[TomlPos, str]:
691
- if multiline:
692
- error_on = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
693
- parse_escapes = toml_parse_basic_str_escape_multiline
694
- else:
695
- error_on = TOML_ILLEGAL_BASIC_STR_CHARS
696
- parse_escapes = toml_parse_basic_str_escape
697
- result = ''
698
- start_pos = pos
699
- while True:
434
+ def parse_key_value_pair(self) -> ta.Tuple[TomlKey, ta.Any]:
435
+ key = self.parse_key()
700
436
  try:
701
- char = src[pos]
437
+ char: ta.Optional[str] = self.src[self.pos]
702
438
  except IndexError:
703
- raise toml_suffixed_err(src, pos, 'Unterminated string') from None
439
+ char = None
440
+ if char != '=':
441
+ raise self.suffixed_err("Expected '=' after a key in a key/value pair")
442
+ self.pos += 1
443
+ self.skip_chars(self.WS)
444
+ value = self.parse_value()
445
+ return key, value
446
+
447
+ def parse_key(self) -> TomlKey:
448
+ key_part = self.parse_key_part()
449
+ key: TomlKey = (key_part,)
450
+ self.skip_chars(self.WS)
451
+ while True:
452
+ try:
453
+ char: ta.Optional[str] = self.src[self.pos]
454
+ except IndexError:
455
+ char = None
456
+ if char != '.':
457
+ return key
458
+ self.pos += 1
459
+ self.skip_chars(self.WS)
460
+ key_part = self.parse_key_part()
461
+ key += (key_part,)
462
+ self.skip_chars(self.WS)
463
+
464
+ def parse_key_part(self) -> str:
465
+ try:
466
+ char: ta.Optional[str] = self.src[self.pos]
467
+ except IndexError:
468
+ char = None
469
+ if char in self.BARE_KEY_CHARS:
470
+ start_pos = self.pos
471
+ self.skip_chars(self.BARE_KEY_CHARS)
472
+ return self.src[start_pos:self.pos]
473
+ if char == "'":
474
+ return self.parse_literal_str()
704
475
  if char == '"':
705
- if not multiline:
706
- return pos + 1, result + src[start_pos:pos]
707
- if src.startswith('"""', pos):
708
- return pos + 3, result + src[start_pos:pos]
709
- pos += 1
710
- continue
711
- if char == '\\':
712
- result += src[start_pos:pos]
713
- pos, parsed_escape = parse_escapes(src, pos)
714
- result += parsed_escape
715
- start_pos = pos
716
- continue
717
- if char in error_on:
718
- raise toml_suffixed_err(src, pos, f'Illegal character {char!r}')
719
- pos += 1
720
-
721
-
722
- def toml_parse_value( # noqa: C901
723
- src: str,
724
- pos: TomlPos,
725
- parse_float: TomlParseFloat,
726
- ) -> ta.Tuple[TomlPos, ta.Any]:
727
- try:
728
- char: ta.Optional[str] = src[pos]
729
- except IndexError:
730
- char = None
731
-
732
- # IMPORTANT: order conditions based on speed of checking and likelihood
733
-
734
- # Basic strings
735
- if char == '"':
736
- if src.startswith('"""', pos):
737
- return toml_parse_multiline_str(src, pos, literal=False)
738
- return toml_parse_one_line_basic_str(src, pos)
739
-
740
- # Literal strings
741
- if char == "'":
742
- if src.startswith("'''", pos):
743
- return toml_parse_multiline_str(src, pos, literal=True)
744
- return toml_parse_literal_str(src, pos)
745
-
746
- # Booleans
747
- if char == 't':
748
- if src.startswith('true', pos):
749
- return pos + 4, True
750
- if char == 'f':
751
- if src.startswith('false', pos):
752
- return pos + 5, False
753
-
754
- # Arrays
755
- if char == '[':
756
- return toml_parse_array(src, pos, parse_float)
757
-
758
- # Inline tables
759
- if char == '{':
760
- return toml_parse_inline_table(src, pos, parse_float)
761
-
762
- # Dates and times
763
- datetime_match = TOML_RE_DATETIME.match(src, pos)
764
- if datetime_match:
476
+ return self.parse_one_line_basic_str()
477
+ raise self.suffixed_err('Invalid initial character for a key part')
478
+
479
+ def parse_one_line_basic_str(self) -> str:
480
+ self.pos += 1
481
+ return self.parse_basic_str(multiline=False)
482
+
483
+ def parse_array(self) -> list:
484
+ self.pos += 1
485
+ array: list = []
486
+
487
+ self.skip_comments_and_array_ws()
488
+ if self.src.startswith(']', self.pos):
489
+ self.pos += 1
490
+ return array
491
+ while True:
492
+ val = self.parse_value()
493
+ array.append(val)
494
+ self.skip_comments_and_array_ws()
495
+
496
+ c = self.src[self.pos:self.pos + 1]
497
+ if c == ']':
498
+ self.pos += 1
499
+ return array
500
+ if c != ',':
501
+ raise self.suffixed_err('Unclosed array')
502
+ self.pos += 1
503
+
504
+ self.skip_comments_and_array_ws()
505
+ if self.src.startswith(']', self.pos):
506
+ self.pos += 1
507
+ return array
508
+
509
+ def parse_inline_table(self) -> dict:
510
+ self.pos += 1
511
+ nested_dict = TomlNestedDict()
512
+ flags = TomlFlags()
513
+
514
+ self.skip_chars(self.WS)
515
+ if self.src.startswith('}', self.pos):
516
+ self.pos += 1
517
+ return nested_dict.dict
518
+ while True:
519
+ key, value = self.parse_key_value_pair()
520
+ key_parent, key_stem = key[:-1], key[-1]
521
+ if flags.is_(key, TomlFlags.FROZEN):
522
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
523
+ try:
524
+ nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
525
+ except KeyError:
526
+ raise self.suffixed_err('Cannot overwrite a value') from None
527
+ if key_stem in nest:
528
+ raise self.suffixed_err(f'Duplicate inline table key {key_stem!r}')
529
+ nest[key_stem] = value
530
+ self.skip_chars(self.WS)
531
+ c = self.src[self.pos:self.pos + 1]
532
+ if c == '}':
533
+ self.pos += 1
534
+ return nested_dict.dict
535
+ if c != ',':
536
+ raise self.suffixed_err('Unclosed inline table')
537
+ if isinstance(value, (dict, list)):
538
+ flags.set(key, TomlFlags.FROZEN, recursive=True)
539
+ self.pos += 1
540
+ self.skip_chars(self.WS)
541
+
542
+ def parse_basic_str_escape(self, multiline: bool = False) -> str:
543
+ escape_id = self.src[self.pos:self.pos + 2]
544
+ self.pos += 2
545
+ if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
546
+ # Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found
547
+ # before newline.
548
+ if escape_id != '\\\n':
549
+ self.skip_chars(self.WS)
550
+ try:
551
+ char = self.src[self.pos]
552
+ except IndexError:
553
+ return ''
554
+ if char != '\n':
555
+ raise self.suffixed_err("Unescaped '\\' in a string")
556
+ self.pos += 1
557
+ self.skip_chars(self.WS_AND_NEWLINE)
558
+ return ''
559
+ if escape_id == '\\u':
560
+ return self.parse_hex_char(4)
561
+ if escape_id == '\\U':
562
+ return self.parse_hex_char(8)
765
563
  try:
766
- datetime_obj = toml_match_to_datetime(datetime_match)
767
- except ValueError as e:
768
- raise toml_suffixed_err(src, pos, 'Invalid date or datetime') from e
769
- return datetime_match.end(), datetime_obj
770
- localtime_match = TOML_RE_LOCALTIME.match(src, pos)
771
- if localtime_match:
772
- return localtime_match.end(), toml_match_to_localtime(localtime_match)
773
-
774
- # Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to be
775
- # located after handling of dates and times.
776
- number_match = TOML_RE_NUMBER.match(src, pos)
777
- if number_match:
778
- return number_match.end(), toml_match_to_number(number_match, parse_float)
779
-
780
- # Special floats
781
- first_three = src[pos:pos + 3]
782
- if first_three in {'inf', 'nan'}:
783
- return pos + 3, parse_float(first_three)
784
- first_four = src[pos:pos + 4]
785
- if first_four in {'-inf', '+inf', '-nan', '+nan'}:
786
- return pos + 4, parse_float(first_four)
787
-
788
- raise toml_suffixed_err(src, pos, 'Invalid value')
789
-
790
-
791
- def toml_suffixed_err(src: str, pos: TomlPos, msg: str) -> TomlDecodeError:
792
- """Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
793
-
794
- def coord_repr(src: str, pos: TomlPos) -> str:
795
- if pos >= len(src):
564
+ return self.BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
565
+ except KeyError:
566
+ raise self.suffixed_err("Unescaped '\\' in a string") from None
567
+
568
+ def parse_basic_str_escape_multiline(self) -> str:
569
+ return self.parse_basic_str_escape(multiline=True)
570
+
571
+ @classmethod
572
+ def is_unicode_scalar_value(cls, codepoint: int) -> bool:
573
+ return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
574
+
575
+ def parse_hex_char(self, hex_len: int) -> str:
576
+ hex_str = self.src[self.pos:self.pos + hex_len]
577
+ if len(hex_str) != hex_len or not self.HEXDIGIT_CHARS.issuperset(hex_str):
578
+ raise self.suffixed_err('Invalid hex value')
579
+ self.pos += hex_len
580
+ hex_int = int(hex_str, 16)
581
+ if not self.is_unicode_scalar_value(hex_int):
582
+ raise self.suffixed_err('Escaped character is not a Unicode scalar value')
583
+ return chr(hex_int)
584
+
585
+ def parse_literal_str(self) -> str:
586
+ self.pos += 1 # Skip starting apostrophe
587
+ start_pos = self.pos
588
+ self.skip_until("'", error_on=self.ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True)
589
+ end_pos = self.pos
590
+ self.pos += 1
591
+ return self.src[start_pos:end_pos] # Skip ending apostrophe
592
+
593
+ def parse_multiline_str(self, *, literal: bool) -> str:
594
+ self.pos += 3
595
+ if self.src.startswith('\n', self.pos):
596
+ self.pos += 1
597
+
598
+ if literal:
599
+ delim = "'"
600
+ start_pos = self.pos
601
+ self.skip_until(
602
+ "'''",
603
+ error_on=self.ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
604
+ error_on_eof=True,
605
+ )
606
+ result = self.src[start_pos:self.pos]
607
+ self.pos += 3
608
+ else:
609
+ delim = '"'
610
+ result = self.parse_basic_str(multiline=True)
611
+
612
+ # Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
613
+ if not self.src.startswith(delim, self.pos):
614
+ return result
615
+ self.pos += 1
616
+ if not self.src.startswith(delim, self.pos):
617
+ return result + delim
618
+ self.pos += 1
619
+ return result + (delim * 2)
620
+
621
+ def parse_basic_str(self, *, multiline: bool) -> str:
622
+ if multiline:
623
+ error_on = self.ILLEGAL_MULTILINE_BASIC_STR_CHARS
624
+ parse_escapes = self.parse_basic_str_escape_multiline
625
+ else:
626
+ error_on = self.ILLEGAL_BASIC_STR_CHARS
627
+ parse_escapes = self.parse_basic_str_escape
628
+ result = ''
629
+ start_pos = self.pos
630
+ while True:
631
+ try:
632
+ char = self.src[self.pos]
633
+ except IndexError:
634
+ raise self.suffixed_err('Unterminated string') from None
635
+ if char == '"':
636
+ if not multiline:
637
+ end_pos = self.pos
638
+ self.pos += 1
639
+ return result + self.src[start_pos:end_pos]
640
+ if self.src.startswith('"""', self.pos):
641
+ end_pos = self.pos
642
+ self.pos += 3
643
+ return result + self.src[start_pos:end_pos]
644
+ self.pos += 1
645
+ continue
646
+ if char == '\\':
647
+ result += self.src[start_pos:self.pos]
648
+ parsed_escape = parse_escapes()
649
+ result += parsed_escape
650
+ start_pos = self.pos
651
+ continue
652
+ if char in error_on:
653
+ raise self.suffixed_err(f'Illegal character {char!r}')
654
+ self.pos += 1
655
+
656
+ def parse_value(self) -> ta.Any: # noqa: C901
657
+ try:
658
+ char: ta.Optional[str] = self.src[self.pos]
659
+ except IndexError:
660
+ char = None
661
+
662
+ # IMPORTANT: order conditions based on speed of checking and likelihood
663
+
664
+ # Basic strings
665
+ if char == '"':
666
+ if self.src.startswith('"""', self.pos):
667
+ return self.parse_multiline_str(literal=False)
668
+ return self.parse_one_line_basic_str()
669
+
670
+ # Literal strings
671
+ if char == "'":
672
+ if self.src.startswith("'''", self.pos):
673
+ return self.parse_multiline_str(literal=True)
674
+ return self.parse_literal_str()
675
+
676
+ # Booleans
677
+ if char == 't':
678
+ if self.src.startswith('true', self.pos):
679
+ self.pos += 4
680
+ return True
681
+ if char == 'f':
682
+ if self.src.startswith('false', self.pos):
683
+ self.pos += 5
684
+ return False
685
+
686
+ # Arrays
687
+ if char == '[':
688
+ return self.parse_array()
689
+
690
+ # Inline tables
691
+ if char == '{':
692
+ return self.parse_inline_table()
693
+
694
+ # Dates and times
695
+ datetime_match = self.RE_DATETIME.match(self.src, self.pos)
696
+ if datetime_match:
697
+ try:
698
+ datetime_obj = self.match_to_datetime(datetime_match)
699
+ except ValueError as e:
700
+ raise self.suffixed_err('Invalid date or datetime') from e
701
+ self.pos = datetime_match.end()
702
+ return datetime_obj
703
+ localtime_match = self.RE_LOCALTIME.match(self.src, self.pos)
704
+ if localtime_match:
705
+ self.pos = localtime_match.end()
706
+ return self.match_to_localtime(localtime_match)
707
+
708
+ # Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to
709
+ # be located after handling of dates and times.
710
+ number_match = self.RE_NUMBER.match(self.src, self.pos)
711
+ if number_match:
712
+ self.pos = number_match.end()
713
+ return self.match_to_number(number_match, self.parse_float)
714
+
715
+ # Special floats
716
+ first_three = self.src[self.pos:self.pos + 3]
717
+ if first_three in {'inf', 'nan'}:
718
+ self.pos += 3
719
+ return self.parse_float(first_three)
720
+ first_four = self.src[self.pos:self.pos + 4]
721
+ if first_four in {'-inf', '+inf', '-nan', '+nan'}:
722
+ self.pos += 4
723
+ return self.parse_float(first_four)
724
+
725
+ raise self.suffixed_err('Invalid value')
726
+
727
+ def coord_repr(self, pos: TomlPos) -> str:
728
+ if pos >= len(self.src):
796
729
  return 'end of document'
797
- line = src.count('\n', 0, pos) + 1
730
+ line = self.src.count('\n', 0, pos) + 1
798
731
  if line == 1:
799
732
  column = pos + 1
800
733
  else:
801
- column = pos - src.rindex('\n', 0, pos)
734
+ column = pos - self.src.rindex('\n', 0, pos)
802
735
  return f'line {line}, column {column}'
803
736
 
804
- return TomlDecodeError(f'{msg} (at {coord_repr(src, pos)})')
805
-
737
+ def suffixed_err(self, msg: str, *, pos: ta.Optional[TomlPos] = None) -> TomlDecodeError:
738
+ """Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
806
739
 
807
- def toml_is_unicode_scalar_value(codepoint: int) -> bool:
808
- return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
740
+ if pos is None:
741
+ pos = self.pos
742
+ return TomlDecodeError(f'{msg} (at {self.coord_repr(pos)})')
809
743
 
744
+ _TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
810
745
 
811
- def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
812
- """A decorator to make `parse_float` safe.
813
-
814
- `parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
815
- thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
816
- """
817
- # The default `float` callable never returns illegal types. Optimize it.
818
- if parse_float is float:
819
- return float
746
+ RE_NUMBER = re.compile(
747
+ r"""
748
+ 0
749
+ (?:
750
+ x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
751
+ |
752
+ b[01](?:_?[01])* # bin
753
+ |
754
+ o[0-7](?:_?[0-7])* # oct
755
+ )
756
+ |
757
+ [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
758
+ (?P<floatpart>
759
+ (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
760
+ (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
761
+ )
762
+ """,
763
+ flags=re.VERBOSE,
764
+ )
820
765
 
821
- def safe_parse_float(float_str: str) -> ta.Any:
822
- float_value = parse_float(float_str)
823
- if isinstance(float_value, (dict, list)):
824
- raise ValueError('parse_float must not return dicts or lists') # noqa
825
- return float_value
766
+ RE_LOCALTIME = re.compile(_TIME_RE_STR)
767
+
768
+ RE_DATETIME = re.compile(
769
+ rf"""
770
+ ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
771
+ (?:
772
+ [Tt ]
773
+ {_TIME_RE_STR}
774
+ (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
775
+ )?
776
+ """,
777
+ flags=re.VERBOSE,
778
+ )
826
779
 
827
- return safe_parse_float
780
+ @classmethod
781
+ def match_to_datetime(cls, match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
782
+ """
783
+ Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
784
+
785
+ Raises ValueError if the match does not correspond to a valid date or datetime.
786
+ """
787
+
788
+ (
789
+ year_str,
790
+ month_str,
791
+ day_str,
792
+ hour_str,
793
+ minute_str,
794
+ sec_str,
795
+ micros_str,
796
+ zulu_time,
797
+ offset_sign_str,
798
+ offset_hour_str,
799
+ offset_minute_str,
800
+ ) = match.groups()
801
+ year, month, day = int(year_str), int(month_str), int(day_str)
802
+ if hour_str is None:
803
+ return datetime.date(year, month, day)
804
+ hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
805
+ micros = int(micros_str.ljust(6, '0')) if micros_str else 0
806
+ if offset_sign_str:
807
+ tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
808
+ offset_hour_str, offset_minute_str, offset_sign_str,
809
+ )
810
+ elif zulu_time:
811
+ tz = datetime.UTC
812
+ else: # local date-time
813
+ tz = None
814
+ return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
815
+
816
+ @classmethod
817
+ def match_to_localtime(cls, match: re.Match) -> datetime.time:
818
+ hour_str, minute_str, sec_str, micros_str = match.groups()
819
+ micros = int(micros_str.ljust(6, '0')) if micros_str else 0
820
+ return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
821
+
822
+ @classmethod
823
+ def match_to_number(cls, match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
824
+ if match.group('floatpart'):
825
+ return parse_float(match.group())
826
+ return int(match.group(), 0)