ominfra 0.0.0.dev268__py3-none-any.whl → 0.0.0.dev269__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -186,75 +186,6 @@ def render_ini_sections(
186
186
  ##
187
187
 
188
188
 
189
- _TOML_TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
190
-
191
- TOML_RE_NUMBER = re.compile(
192
- r"""
193
- 0
194
- (?:
195
- x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
196
- |
197
- b[01](?:_?[01])* # bin
198
- |
199
- o[0-7](?:_?[0-7])* # oct
200
- )
201
- |
202
- [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
203
- (?P<floatpart>
204
- (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
205
- (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
206
- )
207
- """,
208
- flags=re.VERBOSE,
209
- )
210
- TOML_RE_LOCALTIME = re.compile(_TOML_TIME_RE_STR)
211
- TOML_RE_DATETIME = re.compile(
212
- rf"""
213
- ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
214
- (?:
215
- [Tt ]
216
- {_TOML_TIME_RE_STR}
217
- (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
218
- )?
219
- """,
220
- flags=re.VERBOSE,
221
- )
222
-
223
-
224
- def toml_match_to_datetime(match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
225
- """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
226
-
227
- Raises ValueError if the match does not correspond to a valid date or datetime.
228
- """
229
- (
230
- year_str,
231
- month_str,
232
- day_str,
233
- hour_str,
234
- minute_str,
235
- sec_str,
236
- micros_str,
237
- zulu_time,
238
- offset_sign_str,
239
- offset_hour_str,
240
- offset_minute_str,
241
- ) = match.groups()
242
- year, month, day = int(year_str), int(month_str), int(day_str)
243
- if hour_str is None:
244
- return datetime.date(year, month, day)
245
- hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
246
- micros = int(micros_str.ljust(6, '0')) if micros_str else 0
247
- if offset_sign_str:
248
- tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
249
- offset_hour_str, offset_minute_str, offset_sign_str,
250
- )
251
- elif zulu_time:
252
- tz = datetime.UTC
253
- else: # local date-time
254
- tz = None
255
- return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
256
-
257
-
258
189
  @functools.lru_cache() # noqa
259
190
  def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.timezone:
260
191
  sign = 1 if sign_str == '+' else -1
@@ -266,47 +197,25 @@ def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.ti
266
197
  )
267
198
 
268
199
 
269
- def toml_match_to_localtime(match: re.Match) -> datetime.time:
270
- hour_str, minute_str, sec_str, micros_str = match.groups()
271
- micros = int(micros_str.ljust(6, '0')) if micros_str else 0
272
- return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
273
-
274
-
275
- def toml_match_to_number(match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
276
- if match.group('floatpart'):
277
- return parse_float(match.group())
278
- return int(match.group(), 0)
279
-
280
-
281
- TOML_ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
282
-
283
- # Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the parser
284
- # functions.
285
- TOML_ILLEGAL_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t')
286
- TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t\n')
200
+ def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
201
+ """
202
+ A decorator to make `parse_float` safe.
287
203
 
288
- TOML_ILLEGAL_LITERAL_STR_CHARS = TOML_ILLEGAL_BASIC_STR_CHARS
289
- TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
204
+ `parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
205
+ thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
206
+ """
290
207
 
291
- TOML_ILLEGAL_COMMENT_CHARS = TOML_ILLEGAL_BASIC_STR_CHARS
208
+ # The default `float` callable never returns illegal types. Optimize it.
209
+ if parse_float is float:
210
+ return float
292
211
 
293
- TOML_WS = frozenset(' \t')
294
- TOML_WS_AND_NEWLINE = TOML_WS | frozenset('\n')
295
- TOML_BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
296
- TOML_KEY_INITIAL_CHARS = TOML_BARE_KEY_CHARS | frozenset("\"'")
297
- TOML_HEXDIGIT_CHARS = frozenset(string.hexdigits)
212
+ def safe_parse_float(float_str: str) -> ta.Any:
213
+ float_value = parse_float(float_str)
214
+ if isinstance(float_value, (dict, list)):
215
+ raise ValueError('parse_float must not return dicts or lists') # noqa
216
+ return float_value
298
217
 
299
- TOML_BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType(
300
- {
301
- '\\b': '\u0008', # backspace
302
- '\\t': '\u0009', # tab
303
- '\\n': '\u000A', # linefeed
304
- '\\f': '\u000C', # form feed
305
- '\\r': '\u000D', # carriage return
306
- '\\"': '\u0022', # quote
307
- '\\\\': '\u005C', # backslash
308
- },
309
- )
218
+ return safe_parse_float
310
219
 
311
220
 
312
221
  class TomlDecodeError(ValueError):
@@ -331,63 +240,15 @@ def toml_loads(s: str, /, *, parse_float: TomlParseFloat = float) -> ta.Dict[str
331
240
  src = s.replace('\r\n', '\n')
332
241
  except (AttributeError, TypeError):
333
242
  raise TypeError(f"Expected str object, not '{type(s).__qualname__}'") from None
334
- pos = 0
335
- out = TomlOutput(TomlNestedDict(), TomlFlags())
336
- header: TomlKey = ()
337
- parse_float = toml_make_safe_parse_float(parse_float)
338
-
339
- # Parse one statement at a time (typically means one line in TOML source)
340
- while True:
341
- # 1. Skip line leading whitespace
342
- pos = toml_skip_chars(src, pos, TOML_WS)
343
-
344
- # 2. Parse rules. Expect one of the following:
345
- # - end of file
346
- # - end of line
347
- # - comment
348
- # - key/value pair
349
- # - append dict to list (and move to its namespace)
350
- # - create dict (and move to its namespace)
351
- # Skip trailing whitespace when applicable.
352
- try:
353
- char = src[pos]
354
- except IndexError:
355
- break
356
- if char == '\n':
357
- pos += 1
358
- continue
359
- if char in TOML_KEY_INITIAL_CHARS:
360
- pos = toml_key_value_rule(src, pos, out, header, parse_float)
361
- pos = toml_skip_chars(src, pos, TOML_WS)
362
- elif char == '[':
363
- try:
364
- second_char: ta.Optional[str] = src[pos + 1]
365
- except IndexError:
366
- second_char = None
367
- out.flags.finalize_pending()
368
- if second_char == '[':
369
- pos, header = toml_create_list_rule(src, pos, out)
370
- else:
371
- pos, header = toml_create_dict_rule(src, pos, out)
372
- pos = toml_skip_chars(src, pos, TOML_WS)
373
- elif char != '#':
374
- raise toml_suffixed_err(src, pos, 'Invalid statement')
375
243
 
376
- # 3. Skip comment
377
- pos = toml_skip_comment(src, pos)
244
+ parse_float = toml_make_safe_parse_float(parse_float)
378
245
 
379
- # 4. Expect end of line or end of file
380
- try:
381
- char = src[pos]
382
- except IndexError:
383
- break
384
- if char != '\n':
385
- raise toml_suffixed_err(
386
- src, pos, 'Expected newline or end of document after a statement',
387
- )
388
- pos += 1
246
+ parser = TomlParser(
247
+ src,
248
+ parse_float=parse_float,
249
+ )
389
250
 
390
- return out.data.dict
251
+ return parser.parse()
391
252
 
392
253
 
393
254
  class TomlFlags:
@@ -399,6 +260,8 @@ class TomlFlags:
399
260
  EXPLICIT_NEST = 1
400
261
 
401
262
  def __init__(self) -> None:
263
+ super().__init__()
264
+
402
265
  self._flags: ta.Dict[str, dict] = {}
403
266
  self._pending_flags: ta.Set[ta.Tuple[TomlKey, int]] = set()
404
267
 
@@ -449,6 +312,8 @@ class TomlFlags:
449
312
 
450
313
  class TomlNestedDict:
451
314
  def __init__(self) -> None:
315
+ super().__init__()
316
+
452
317
  # The parsed content of the TOML document
453
318
  self.dict: ta.Dict[str, ta.Any] = {}
454
319
 
@@ -481,479 +346,613 @@ class TomlNestedDict:
481
346
  cont[last_key] = [{}]
482
347
 
483
348
 
484
- class TomlOutput(ta.NamedTuple):
485
- data: TomlNestedDict
486
- flags: TomlFlags
349
+ class TomlParser:
350
+ def __init__(
351
+ self,
352
+ src: str,
353
+ *,
354
+ parse_float: TomlParseFloat = float,
355
+ ) -> None:
356
+ super().__init__()
487
357
 
358
+ self.src = src
488
359
 
489
- def toml_skip_chars(src: str, pos: TomlPos, chars: ta.Iterable[str]) -> TomlPos:
490
- try:
491
- while src[pos] in chars:
492
- pos += 1
493
- except IndexError:
494
- pass
495
- return pos
360
+ self.parse_float = parse_float
496
361
 
362
+ self.data = TomlNestedDict()
363
+ self.flags = TomlFlags()
364
+ self.pos = 0
497
365
 
498
- def toml_skip_until(
499
- src: str,
500
- pos: TomlPos,
501
- expect: str,
502
- *,
503
- error_on: ta.FrozenSet[str],
504
- error_on_eof: bool,
505
- ) -> TomlPos:
506
- try:
507
- new_pos = src.index(expect, pos)
508
- except ValueError:
509
- new_pos = len(src)
510
- if error_on_eof:
511
- raise toml_suffixed_err(src, new_pos, f'Expected {expect!r}') from None
366
+ ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
512
367
 
513
- if not error_on.isdisjoint(src[pos:new_pos]):
514
- while src[pos] not in error_on:
515
- pos += 1
516
- raise toml_suffixed_err(src, pos, f'Found invalid character {src[pos]!r}')
517
- return new_pos
368
+ # Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the
369
+ # parser functions.
370
+ ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t')
371
+ ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t\n')
518
372
 
373
+ ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
374
+ ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
519
375
 
520
- def toml_skip_comment(src: str, pos: TomlPos) -> TomlPos:
521
- try:
522
- char: ta.Optional[str] = src[pos]
523
- except IndexError:
524
- char = None
525
- if char == '#':
526
- return toml_skip_until(
527
- src, pos + 1, '\n', error_on=TOML_ILLEGAL_COMMENT_CHARS, error_on_eof=False,
528
- )
529
- return pos
376
+ ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
530
377
 
378
+ WS = frozenset(' \t')
379
+ WS_AND_NEWLINE = WS | frozenset('\n')
380
+ BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
381
+ KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
382
+ HEXDIGIT_CHARS = frozenset(string.hexdigits)
531
383
 
532
- def toml_skip_comments_and_array_ws(src: str, pos: TomlPos) -> TomlPos:
533
- while True:
534
- pos_before_skip = pos
535
- pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
536
- pos = toml_skip_comment(src, pos)
537
- if pos == pos_before_skip:
538
- return pos
384
+ BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType({
385
+ '\\b': '\u0008', # backspace
386
+ '\\t': '\u0009', # tab
387
+ '\\n': '\u000A', # linefeed
388
+ '\\f': '\u000C', # form feed
389
+ '\\r': '\u000D', # carriage return
390
+ '\\"': '\u0022', # quote
391
+ '\\\\': '\u005C', # backslash
392
+ })
539
393
 
394
+ def parse(self) -> ta.Dict[str, ta.Any]: # noqa: C901
395
+ header: TomlKey = ()
540
396
 
541
- def toml_create_dict_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
542
- pos += 1 # Skip "["
543
- pos = toml_skip_chars(src, pos, TOML_WS)
544
- pos, key = toml_parse_key(src, pos)
397
+ # Parse one statement at a time (typically means one line in TOML source)
398
+ while True:
399
+ # 1. Skip line leading whitespace
400
+ self.skip_chars(self.WS)
401
+
402
+ # 2. Parse rules. Expect one of the following:
403
+ # - end of file
404
+ # - end of line
405
+ # - comment
406
+ # - key/value pair
407
+ # - append dict to list (and move to its namespace)
408
+ # - create dict (and move to its namespace)
409
+ # Skip trailing whitespace when applicable.
410
+ try:
411
+ char = self.src[self.pos]
412
+ except IndexError:
413
+ break
414
+ if char == '\n':
415
+ self.pos += 1
416
+ continue
417
+ if char in self.KEY_INITIAL_CHARS:
418
+ self.key_value_rule(header)
419
+ self.skip_chars(self.WS)
420
+ elif char == '[':
421
+ try:
422
+ second_char: ta.Optional[str] = self.src[self.pos + 1]
423
+ except IndexError:
424
+ second_char = None
425
+ self.flags.finalize_pending()
426
+ if second_char == '[':
427
+ header = self.create_list_rule()
428
+ else:
429
+ header = self.create_dict_rule()
430
+ self.skip_chars(self.WS)
431
+ elif char != '#':
432
+ raise self.suffixed_err('Invalid statement')
545
433
 
546
- if out.flags.is_(key, TomlFlags.EXPLICIT_NEST) or out.flags.is_(key, TomlFlags.FROZEN):
547
- raise toml_suffixed_err(src, pos, f'Cannot declare {key} twice')
548
- out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
549
- try:
550
- out.data.get_or_create_nest(key)
551
- except KeyError:
552
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
553
-
554
- if not src.startswith(']', pos):
555
- raise toml_suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
556
- return pos + 1, key
557
-
558
-
559
- def toml_create_list_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
560
- pos += 2 # Skip "[["
561
- pos = toml_skip_chars(src, pos, TOML_WS)
562
- pos, key = toml_parse_key(src, pos)
563
-
564
- if out.flags.is_(key, TomlFlags.FROZEN):
565
- raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
566
- # Free the namespace now that it points to another empty list item...
567
- out.flags.unset_all(key)
568
- # ...but this key precisely is still prohibited from table declaration
569
- out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
570
- try:
571
- out.data.append_nest_to_list(key)
572
- except KeyError:
573
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
574
-
575
- if not src.startswith(']]', pos):
576
- raise toml_suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
577
- return pos + 2, key
578
-
579
-
580
- def toml_key_value_rule(
581
- src: str,
582
- pos: TomlPos,
583
- out: TomlOutput,
584
- header: TomlKey,
585
- parse_float: TomlParseFloat,
586
- ) -> TomlPos:
587
- pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
588
- key_parent, key_stem = key[:-1], key[-1]
589
- abs_key_parent = header + key_parent
590
-
591
- relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
592
- for cont_key in relative_path_cont_keys:
593
- # Check that dotted key syntax does not redefine an existing table
594
- if out.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
595
- raise toml_suffixed_err(src, pos, f'Cannot redefine namespace {cont_key}')
596
- # Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in following
597
- # table sections.
598
- out.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
599
-
600
- if out.flags.is_(abs_key_parent, TomlFlags.FROZEN):
601
- raise toml_suffixed_err(
602
- src,
603
- pos,
604
- f'Cannot mutate immutable namespace {abs_key_parent}',
605
- )
434
+ # 3. Skip comment
435
+ self.skip_comment()
606
436
 
607
- try:
608
- nest = out.data.get_or_create_nest(abs_key_parent)
609
- except KeyError:
610
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
611
- if key_stem in nest:
612
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value')
613
- # Mark inline table and array namespaces recursively immutable
614
- if isinstance(value, (dict, list)):
615
- out.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
616
- nest[key_stem] = value
617
- return pos
618
-
619
-
620
- def toml_parse_key_value_pair(
621
- src: str,
622
- pos: TomlPos,
623
- parse_float: TomlParseFloat,
624
- ) -> ta.Tuple[TomlPos, TomlKey, ta.Any]:
625
- pos, key = toml_parse_key(src, pos)
626
- try:
627
- char: ta.Optional[str] = src[pos]
628
- except IndexError:
629
- char = None
630
- if char != '=':
631
- raise toml_suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
632
- pos += 1
633
- pos = toml_skip_chars(src, pos, TOML_WS)
634
- pos, value = toml_parse_value(src, pos, parse_float)
635
- return pos, key, value
636
-
637
-
638
- def toml_parse_key(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, TomlKey]:
639
- pos, key_part = toml_parse_key_part(src, pos)
640
- key: TomlKey = (key_part,)
641
- pos = toml_skip_chars(src, pos, TOML_WS)
642
- while True:
437
+ # 4. Expect end of line or end of file
438
+ try:
439
+ char = self.src[self.pos]
440
+ except IndexError:
441
+ break
442
+ if char != '\n':
443
+ raise self.suffixed_err('Expected newline or end of document after a statement')
444
+ self.pos += 1
445
+
446
+ return self.data.dict
447
+
448
+ def skip_chars(self, chars: ta.Iterable[str]) -> None:
449
+ try:
450
+ while self.src[self.pos] in chars:
451
+ self.pos += 1
452
+ except IndexError:
453
+ pass
454
+
455
+ def skip_until(
456
+ self,
457
+ expect: str,
458
+ *,
459
+ error_on: ta.FrozenSet[str],
460
+ error_on_eof: bool,
461
+ ) -> None:
643
462
  try:
644
- char: ta.Optional[str] = src[pos]
463
+ new_pos = self.src.index(expect, self.pos)
464
+ except ValueError:
465
+ new_pos = len(self.src)
466
+ if error_on_eof:
467
+ raise self.suffixed_err(f'Expected {expect!r}', pos=new_pos) from None
468
+
469
+ if not error_on.isdisjoint(self.src[self.pos:new_pos]):
470
+ while self.src[self.pos] not in error_on:
471
+ self.pos += 1
472
+ raise self.suffixed_err(f'Found invalid character {self.src[self.pos]!r}')
473
+ self.pos = new_pos
474
+
475
+ def skip_comment(self) -> None:
476
+ try:
477
+ char: ta.Optional[str] = self.src[self.pos]
645
478
  except IndexError:
646
479
  char = None
647
- if char != '.':
648
- return pos, key
649
- pos += 1
650
- pos = toml_skip_chars(src, pos, TOML_WS)
651
- pos, key_part = toml_parse_key_part(src, pos)
652
- key += (key_part,)
653
- pos = toml_skip_chars(src, pos, TOML_WS)
480
+ if char == '#':
481
+ self.pos += 1
482
+ self.skip_until(
483
+ '\n',
484
+ error_on=self.ILLEGAL_COMMENT_CHARS,
485
+ error_on_eof=False,
486
+ )
487
+
488
+ def skip_comments_and_array_ws(self) -> None:
489
+ while True:
490
+ pos_before_skip = self.pos
491
+ self.skip_chars(self.WS_AND_NEWLINE)
492
+ self.skip_comment()
493
+ if self.pos == pos_before_skip:
494
+ return
654
495
 
496
+ def create_dict_rule(self) -> TomlKey:
497
+ self.pos += 1 # Skip "["
498
+ self.skip_chars(self.WS)
499
+ key = self.parse_key()
655
500
 
656
- def toml_parse_key_part(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
657
- try:
658
- char: ta.Optional[str] = src[pos]
659
- except IndexError:
660
- char = None
661
- if char in TOML_BARE_KEY_CHARS:
662
- start_pos = pos
663
- pos = toml_skip_chars(src, pos, TOML_BARE_KEY_CHARS)
664
- return pos, src[start_pos:pos]
665
- if char == "'":
666
- return toml_parse_literal_str(src, pos)
667
- if char == '"':
668
- return toml_parse_one_line_basic_str(src, pos)
669
- raise toml_suffixed_err(src, pos, 'Invalid initial character for a key part')
670
-
671
-
672
- def toml_parse_one_line_basic_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
673
- pos += 1
674
- return toml_parse_basic_str(src, pos, multiline=False)
675
-
676
-
677
- def toml_parse_array(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, list]:
678
- pos += 1
679
- array: list = []
680
-
681
- pos = toml_skip_comments_and_array_ws(src, pos)
682
- if src.startswith(']', pos):
683
- return pos + 1, array
684
- while True:
685
- pos, val = toml_parse_value(src, pos, parse_float)
686
- array.append(val)
687
- pos = toml_skip_comments_and_array_ws(src, pos)
688
-
689
- c = src[pos:pos + 1]
690
- if c == ']':
691
- return pos + 1, array
692
- if c != ',':
693
- raise toml_suffixed_err(src, pos, 'Unclosed array')
694
- pos += 1
695
-
696
- pos = toml_skip_comments_and_array_ws(src, pos)
697
- if src.startswith(']', pos):
698
- return pos + 1, array
699
-
700
-
701
- def toml_parse_inline_table(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, dict]:
702
- pos += 1
703
- nested_dict = TomlNestedDict()
704
- flags = TomlFlags()
705
-
706
- pos = toml_skip_chars(src, pos, TOML_WS)
707
- if src.startswith('}', pos):
708
- return pos + 1, nested_dict.dict
709
- while True:
710
- pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
501
+ if self.flags.is_(key, TomlFlags.EXPLICIT_NEST) or self.flags.is_(key, TomlFlags.FROZEN):
502
+ raise self.suffixed_err(f'Cannot declare {key} twice')
503
+ self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
504
+ try:
505
+ self.data.get_or_create_nest(key)
506
+ except KeyError:
507
+ raise self.suffixed_err('Cannot overwrite a value') from None
508
+
509
+ if not self.src.startswith(']', self.pos):
510
+ raise self.suffixed_err("Expected ']' at the end of a table declaration")
511
+ self.pos += 1
512
+ return key
513
+
514
+ def create_list_rule(self) -> TomlKey:
515
+ self.pos += 2 # Skip "[["
516
+ self.skip_chars(self.WS)
517
+ key = self.parse_key()
518
+
519
+ if self.flags.is_(key, TomlFlags.FROZEN):
520
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
521
+ # Free the namespace now that it points to another empty list item...
522
+ self.flags.unset_all(key)
523
+ # ...but this key precisely is still prohibited from table declaration
524
+ self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
525
+ try:
526
+ self.data.append_nest_to_list(key)
527
+ except KeyError:
528
+ raise self.suffixed_err('Cannot overwrite a value') from None
529
+
530
+ if not self.src.startswith(']]', self.pos):
531
+ raise self.suffixed_err("Expected ']]' at the end of an array declaration")
532
+ self.pos += 2
533
+ return key
534
+
535
+ def key_value_rule(self, header: TomlKey) -> None:
536
+ key, value = self.parse_key_value_pair()
711
537
  key_parent, key_stem = key[:-1], key[-1]
712
- if flags.is_(key, TomlFlags.FROZEN):
713
- raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
538
+ abs_key_parent = header + key_parent
539
+
540
+ relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
541
+ for cont_key in relative_path_cont_keys:
542
+ # Check that dotted key syntax does not redefine an existing table
543
+ if self.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
544
+ raise self.suffixed_err(f'Cannot redefine namespace {cont_key}')
545
+ # Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in
546
+ # following table sections.
547
+ self.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
548
+
549
+ if self.flags.is_(abs_key_parent, TomlFlags.FROZEN):
550
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {abs_key_parent}')
551
+
714
552
  try:
715
- nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
553
+ nest = self.data.get_or_create_nest(abs_key_parent)
716
554
  except KeyError:
717
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
555
+ raise self.suffixed_err('Cannot overwrite a value') from None
718
556
  if key_stem in nest:
719
- raise toml_suffixed_err(src, pos, f'Duplicate inline table key {key_stem!r}')
720
- nest[key_stem] = value
721
- pos = toml_skip_chars(src, pos, TOML_WS)
722
- c = src[pos:pos + 1]
723
- if c == '}':
724
- return pos + 1, nested_dict.dict
725
- if c != ',':
726
- raise toml_suffixed_err(src, pos, 'Unclosed inline table')
557
+ raise self.suffixed_err('Cannot overwrite a value')
558
+ # Mark inline table and array namespaces recursively immutable
727
559
  if isinstance(value, (dict, list)):
728
- flags.set(key, TomlFlags.FROZEN, recursive=True)
729
- pos += 1
730
- pos = toml_skip_chars(src, pos, TOML_WS)
731
-
560
+ self.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
561
+ nest[key_stem] = value
732
562
 
733
- def toml_parse_basic_str_escape(
734
- src: str,
735
- pos: TomlPos,
736
- *,
737
- multiline: bool = False,
738
- ) -> ta.Tuple[TomlPos, str]:
739
- escape_id = src[pos:pos + 2]
740
- pos += 2
741
- if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
742
- # Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found before
743
- # newline.
744
- if escape_id != '\\\n':
745
- pos = toml_skip_chars(src, pos, TOML_WS)
563
+ def parse_key_value_pair(self) -> ta.Tuple[TomlKey, ta.Any]:
564
+ key = self.parse_key()
565
+ try:
566
+ char: ta.Optional[str] = self.src[self.pos]
567
+ except IndexError:
568
+ char = None
569
+ if char != '=':
570
+ raise self.suffixed_err("Expected '=' after a key in a key/value pair")
571
+ self.pos += 1
572
+ self.skip_chars(self.WS)
573
+ value = self.parse_value()
574
+ return key, value
575
+
576
+ def parse_key(self) -> TomlKey:
577
+ key_part = self.parse_key_part()
578
+ key: TomlKey = (key_part,)
579
+ self.skip_chars(self.WS)
580
+ while True:
746
581
  try:
747
- char = src[pos]
582
+ char: ta.Optional[str] = self.src[self.pos]
748
583
  except IndexError:
749
- return pos, ''
750
- if char != '\n':
751
- raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string")
752
- pos += 1
753
- pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
754
- return pos, ''
755
- if escape_id == '\\u':
756
- return toml_parse_hex_char(src, pos, 4)
757
- if escape_id == '\\U':
758
- return toml_parse_hex_char(src, pos, 8)
759
- try:
760
- return pos, TOML_BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
761
- except KeyError:
762
- raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string") from None
584
+ char = None
585
+ if char != '.':
586
+ return key
587
+ self.pos += 1
588
+ self.skip_chars(self.WS)
589
+ key_part = self.parse_key_part()
590
+ key += (key_part,)
591
+ self.skip_chars(self.WS)
592
+
593
+ def parse_key_part(self) -> str:
594
+ try:
595
+ char: ta.Optional[str] = self.src[self.pos]
596
+ except IndexError:
597
+ char = None
598
+ if char in self.BARE_KEY_CHARS:
599
+ start_pos = self.pos
600
+ self.skip_chars(self.BARE_KEY_CHARS)
601
+ return self.src[start_pos:self.pos]
602
+ if char == "'":
603
+ return self.parse_literal_str()
604
+ if char == '"':
605
+ return self.parse_one_line_basic_str()
606
+ raise self.suffixed_err('Invalid initial character for a key part')
763
607
 
608
+ def parse_one_line_basic_str(self) -> str:
609
+ self.pos += 1
610
+ return self.parse_basic_str(multiline=False)
764
611
 
765
- def toml_parse_basic_str_escape_multiline(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
766
- return toml_parse_basic_str_escape(src, pos, multiline=True)
612
+ def parse_array(self) -> list:
613
+ self.pos += 1
614
+ array: list = []
767
615
 
616
+ self.skip_comments_and_array_ws()
617
+ if self.src.startswith(']', self.pos):
618
+ self.pos += 1
619
+ return array
620
+ while True:
621
+ val = self.parse_value()
622
+ array.append(val)
623
+ self.skip_comments_and_array_ws()
624
+
625
+ c = self.src[self.pos:self.pos + 1]
626
+ if c == ']':
627
+ self.pos += 1
628
+ return array
629
+ if c != ',':
630
+ raise self.suffixed_err('Unclosed array')
631
+ self.pos += 1
632
+
633
+ self.skip_comments_and_array_ws()
634
+ if self.src.startswith(']', self.pos):
635
+ self.pos += 1
636
+ return array
637
+
638
+ def parse_inline_table(self) -> dict:
639
+ self.pos += 1
640
+ nested_dict = TomlNestedDict()
641
+ flags = TomlFlags()
642
+
643
+ self.skip_chars(self.WS)
644
+ if self.src.startswith('}', self.pos):
645
+ self.pos += 1
646
+ return nested_dict.dict
647
+ while True:
648
+ key, value = self.parse_key_value_pair()
649
+ key_parent, key_stem = key[:-1], key[-1]
650
+ if flags.is_(key, TomlFlags.FROZEN):
651
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
652
+ try:
653
+ nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
654
+ except KeyError:
655
+ raise self.suffixed_err('Cannot overwrite a value') from None
656
+ if key_stem in nest:
657
+ raise self.suffixed_err(f'Duplicate inline table key {key_stem!r}')
658
+ nest[key_stem] = value
659
+ self.skip_chars(self.WS)
660
+ c = self.src[self.pos:self.pos + 1]
661
+ if c == '}':
662
+ self.pos += 1
663
+ return nested_dict.dict
664
+ if c != ',':
665
+ raise self.suffixed_err('Unclosed inline table')
666
+ if isinstance(value, (dict, list)):
667
+ flags.set(key, TomlFlags.FROZEN, recursive=True)
668
+ self.pos += 1
669
+ self.skip_chars(self.WS)
670
+
671
+ def parse_basic_str_escape(self, multiline: bool = False) -> str:
672
+ escape_id = self.src[self.pos:self.pos + 2]
673
+ self.pos += 2
674
+ if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
675
+ # Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found
676
+ # before newline.
677
+ if escape_id != '\\\n':
678
+ self.skip_chars(self.WS)
679
+ try:
680
+ char = self.src[self.pos]
681
+ except IndexError:
682
+ return ''
683
+ if char != '\n':
684
+ raise self.suffixed_err("Unescaped '\\' in a string")
685
+ self.pos += 1
686
+ self.skip_chars(self.WS_AND_NEWLINE)
687
+ return ''
688
+ if escape_id == '\\u':
689
+ return self.parse_hex_char(4)
690
+ if escape_id == '\\U':
691
+ return self.parse_hex_char(8)
692
+ try:
693
+ return self.BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
694
+ except KeyError:
695
+ raise self.suffixed_err("Unescaped '\\' in a string") from None
768
696
 
769
- def toml_parse_hex_char(src: str, pos: TomlPos, hex_len: int) -> ta.Tuple[TomlPos, str]:
770
- hex_str = src[pos:pos + hex_len]
771
- if len(hex_str) != hex_len or not TOML_HEXDIGIT_CHARS.issuperset(hex_str):
772
- raise toml_suffixed_err(src, pos, 'Invalid hex value')
773
- pos += hex_len
774
- hex_int = int(hex_str, 16)
775
- if not toml_is_unicode_scalar_value(hex_int):
776
- raise toml_suffixed_err(src, pos, 'Escaped character is not a Unicode scalar value')
777
- return pos, chr(hex_int)
697
+ def parse_basic_str_escape_multiline(self) -> str:
698
+ return self.parse_basic_str_escape(multiline=True)
778
699
 
700
+ @classmethod
701
+ def is_unicode_scalar_value(cls, codepoint: int) -> bool:
702
+ return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
703
+
704
+ def parse_hex_char(self, hex_len: int) -> str:
705
+ hex_str = self.src[self.pos:self.pos + hex_len]
706
+ if len(hex_str) != hex_len or not self.HEXDIGIT_CHARS.issuperset(hex_str):
707
+ raise self.suffixed_err('Invalid hex value')
708
+ self.pos += hex_len
709
+ hex_int = int(hex_str, 16)
710
+ if not self.is_unicode_scalar_value(hex_int):
711
+ raise self.suffixed_err('Escaped character is not a Unicode scalar value')
712
+ return chr(hex_int)
713
+
714
+ def parse_literal_str(self) -> str:
715
+ self.pos += 1 # Skip starting apostrophe
716
+ start_pos = self.pos
717
+ self.skip_until("'", error_on=self.ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True)
718
+ end_pos = self.pos
719
+ self.pos += 1
720
+ return self.src[start_pos:end_pos] # Skip ending apostrophe
721
+
722
+ def parse_multiline_str(self, *, literal: bool) -> str:
723
+ self.pos += 3
724
+ if self.src.startswith('\n', self.pos):
725
+ self.pos += 1
726
+
727
+ if literal:
728
+ delim = "'"
729
+ start_pos = self.pos
730
+ self.skip_until(
731
+ "'''",
732
+ error_on=self.ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
733
+ error_on_eof=True,
734
+ )
735
+ result = self.src[start_pos:self.pos]
736
+ self.pos += 3
737
+ else:
738
+ delim = '"'
739
+ result = self.parse_basic_str(multiline=True)
740
+
741
+ # Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
742
+ if not self.src.startswith(delim, self.pos):
743
+ return result
744
+ self.pos += 1
745
+ if not self.src.startswith(delim, self.pos):
746
+ return result + delim
747
+ self.pos += 1
748
+ return result + (delim * 2)
749
+
750
+ def parse_basic_str(self, *, multiline: bool) -> str:
751
+ if multiline:
752
+ error_on = self.ILLEGAL_MULTILINE_BASIC_STR_CHARS
753
+ parse_escapes = self.parse_basic_str_escape_multiline
754
+ else:
755
+ error_on = self.ILLEGAL_BASIC_STR_CHARS
756
+ parse_escapes = self.parse_basic_str_escape
757
+ result = ''
758
+ start_pos = self.pos
759
+ while True:
760
+ try:
761
+ char = self.src[self.pos]
762
+ except IndexError:
763
+ raise self.suffixed_err('Unterminated string') from None
764
+ if char == '"':
765
+ if not multiline:
766
+ end_pos = self.pos
767
+ self.pos += 1
768
+ return result + self.src[start_pos:end_pos]
769
+ if self.src.startswith('"""', self.pos):
770
+ end_pos = self.pos
771
+ self.pos += 3
772
+ return result + self.src[start_pos:end_pos]
773
+ self.pos += 1
774
+ continue
775
+ if char == '\\':
776
+ result += self.src[start_pos:self.pos]
777
+ parsed_escape = parse_escapes()
778
+ result += parsed_escape
779
+ start_pos = self.pos
780
+ continue
781
+ if char in error_on:
782
+ raise self.suffixed_err(f'Illegal character {char!r}')
783
+ self.pos += 1
779
784
 
780
- def toml_parse_literal_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
781
- pos += 1 # Skip starting apostrophe
782
- start_pos = pos
783
- pos = toml_skip_until(
784
- src, pos, "'", error_on=TOML_ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True,
785
- )
786
- return pos + 1, src[start_pos:pos] # Skip ending apostrophe
787
-
788
-
789
- def toml_parse_multiline_str(src: str, pos: TomlPos, *, literal: bool) -> ta.Tuple[TomlPos, str]:
790
- pos += 3
791
- if src.startswith('\n', pos):
792
- pos += 1
793
-
794
- if literal:
795
- delim = "'"
796
- end_pos = toml_skip_until(
797
- src,
798
- pos,
799
- "'''",
800
- error_on=TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
801
- error_on_eof=True,
802
- )
803
- result = src[pos:end_pos]
804
- pos = end_pos + 3
805
- else:
806
- delim = '"'
807
- pos, result = toml_parse_basic_str(src, pos, multiline=True)
808
-
809
- # Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
810
- if not src.startswith(delim, pos):
811
- return pos, result
812
- pos += 1
813
- if not src.startswith(delim, pos):
814
- return pos, result + delim
815
- pos += 1
816
- return pos, result + (delim * 2)
817
-
818
-
819
- def toml_parse_basic_str(src: str, pos: TomlPos, *, multiline: bool) -> ta.Tuple[TomlPos, str]:
820
- if multiline:
821
- error_on = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
822
- parse_escapes = toml_parse_basic_str_escape_multiline
823
- else:
824
- error_on = TOML_ILLEGAL_BASIC_STR_CHARS
825
- parse_escapes = toml_parse_basic_str_escape
826
- result = ''
827
- start_pos = pos
828
- while True:
785
+ def parse_value(self) -> ta.Any: # noqa: C901
829
786
  try:
830
- char = src[pos]
787
+ char: ta.Optional[str] = self.src[self.pos]
831
788
  except IndexError:
832
- raise toml_suffixed_err(src, pos, 'Unterminated string') from None
789
+ char = None
790
+
791
+ # IMPORTANT: order conditions based on speed of checking and likelihood
792
+
793
+ # Basic strings
833
794
  if char == '"':
834
- if not multiline:
835
- return pos + 1, result + src[start_pos:pos]
836
- if src.startswith('"""', pos):
837
- return pos + 3, result + src[start_pos:pos]
838
- pos += 1
839
- continue
840
- if char == '\\':
841
- result += src[start_pos:pos]
842
- pos, parsed_escape = parse_escapes(src, pos)
843
- result += parsed_escape
844
- start_pos = pos
845
- continue
846
- if char in error_on:
847
- raise toml_suffixed_err(src, pos, f'Illegal character {char!r}')
848
- pos += 1
795
+ if self.src.startswith('"""', self.pos):
796
+ return self.parse_multiline_str(literal=False)
797
+ return self.parse_one_line_basic_str()
798
+
799
+ # Literal strings
800
+ if char == "'":
801
+ if self.src.startswith("'''", self.pos):
802
+ return self.parse_multiline_str(literal=True)
803
+ return self.parse_literal_str()
804
+
805
+ # Booleans
806
+ if char == 't':
807
+ if self.src.startswith('true', self.pos):
808
+ self.pos += 4
809
+ return True
810
+ if char == 'f':
811
+ if self.src.startswith('false', self.pos):
812
+ self.pos += 5
813
+ return False
849
814
 
815
+ # Arrays
816
+ if char == '[':
817
+ return self.parse_array()
850
818
 
851
- def toml_parse_value( # noqa: C901
852
- src: str,
853
- pos: TomlPos,
854
- parse_float: TomlParseFloat,
855
- ) -> ta.Tuple[TomlPos, ta.Any]:
856
- try:
857
- char: ta.Optional[str] = src[pos]
858
- except IndexError:
859
- char = None
860
-
861
- # IMPORTANT: order conditions based on speed of checking and likelihood
862
-
863
- # Basic strings
864
- if char == '"':
865
- if src.startswith('"""', pos):
866
- return toml_parse_multiline_str(src, pos, literal=False)
867
- return toml_parse_one_line_basic_str(src, pos)
868
-
869
- # Literal strings
870
- if char == "'":
871
- if src.startswith("'''", pos):
872
- return toml_parse_multiline_str(src, pos, literal=True)
873
- return toml_parse_literal_str(src, pos)
874
-
875
- # Booleans
876
- if char == 't':
877
- if src.startswith('true', pos):
878
- return pos + 4, True
879
- if char == 'f':
880
- if src.startswith('false', pos):
881
- return pos + 5, False
882
-
883
- # Arrays
884
- if char == '[':
885
- return toml_parse_array(src, pos, parse_float)
886
-
887
- # Inline tables
888
- if char == '{':
889
- return toml_parse_inline_table(src, pos, parse_float)
890
-
891
- # Dates and times
892
- datetime_match = TOML_RE_DATETIME.match(src, pos)
893
- if datetime_match:
894
- try:
895
- datetime_obj = toml_match_to_datetime(datetime_match)
896
- except ValueError as e:
897
- raise toml_suffixed_err(src, pos, 'Invalid date or datetime') from e
898
- return datetime_match.end(), datetime_obj
899
- localtime_match = TOML_RE_LOCALTIME.match(src, pos)
900
- if localtime_match:
901
- return localtime_match.end(), toml_match_to_localtime(localtime_match)
902
-
903
- # Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to be
904
- # located after handling of dates and times.
905
- number_match = TOML_RE_NUMBER.match(src, pos)
906
- if number_match:
907
- return number_match.end(), toml_match_to_number(number_match, parse_float)
908
-
909
- # Special floats
910
- first_three = src[pos:pos + 3]
911
- if first_three in {'inf', 'nan'}:
912
- return pos + 3, parse_float(first_three)
913
- first_four = src[pos:pos + 4]
914
- if first_four in {'-inf', '+inf', '-nan', '+nan'}:
915
- return pos + 4, parse_float(first_four)
916
-
917
- raise toml_suffixed_err(src, pos, 'Invalid value')
918
-
919
-
920
- def toml_suffixed_err(src: str, pos: TomlPos, msg: str) -> TomlDecodeError:
921
- """Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
922
-
923
- def coord_repr(src: str, pos: TomlPos) -> str:
924
- if pos >= len(src):
819
+ # Inline tables
820
+ if char == '{':
821
+ return self.parse_inline_table()
822
+
823
+ # Dates and times
824
+ datetime_match = self.RE_DATETIME.match(self.src, self.pos)
825
+ if datetime_match:
826
+ try:
827
+ datetime_obj = self.match_to_datetime(datetime_match)
828
+ except ValueError as e:
829
+ raise self.suffixed_err('Invalid date or datetime') from e
830
+ self.pos = datetime_match.end()
831
+ return datetime_obj
832
+ localtime_match = self.RE_LOCALTIME.match(self.src, self.pos)
833
+ if localtime_match:
834
+ self.pos = localtime_match.end()
835
+ return self.match_to_localtime(localtime_match)
836
+
837
+ # Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to
838
+ # be located after handling of dates and times.
839
+ number_match = self.RE_NUMBER.match(self.src, self.pos)
840
+ if number_match:
841
+ self.pos = number_match.end()
842
+ return self.match_to_number(number_match, self.parse_float)
843
+
844
+ # Special floats
845
+ first_three = self.src[self.pos:self.pos + 3]
846
+ if first_three in {'inf', 'nan'}:
847
+ self.pos += 3
848
+ return self.parse_float(first_three)
849
+ first_four = self.src[self.pos:self.pos + 4]
850
+ if first_four in {'-inf', '+inf', '-nan', '+nan'}:
851
+ self.pos += 4
852
+ return self.parse_float(first_four)
853
+
854
+ raise self.suffixed_err('Invalid value')
855
+
856
+ def coord_repr(self, pos: TomlPos) -> str:
857
+ if pos >= len(self.src):
925
858
  return 'end of document'
926
- line = src.count('\n', 0, pos) + 1
859
+ line = self.src.count('\n', 0, pos) + 1
927
860
  if line == 1:
928
861
  column = pos + 1
929
862
  else:
930
- column = pos - src.rindex('\n', 0, pos)
863
+ column = pos - self.src.rindex('\n', 0, pos)
931
864
  return f'line {line}, column {column}'
932
865
 
933
- return TomlDecodeError(f'{msg} (at {coord_repr(src, pos)})')
866
+ def suffixed_err(self, msg: str, *, pos: ta.Optional[TomlPos] = None) -> TomlDecodeError:
867
+ """Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
934
868
 
869
+ if pos is None:
870
+ pos = self.pos
871
+ return TomlDecodeError(f'{msg} (at {self.coord_repr(pos)})')
935
872
 
936
- def toml_is_unicode_scalar_value(codepoint: int) -> bool:
937
- return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
873
+ _TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
938
874
 
875
+ RE_NUMBER = re.compile(
876
+ r"""
877
+ 0
878
+ (?:
879
+ x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
880
+ |
881
+ b[01](?:_?[01])* # bin
882
+ |
883
+ o[0-7](?:_?[0-7])* # oct
884
+ )
885
+ |
886
+ [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
887
+ (?P<floatpart>
888
+ (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
889
+ (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
890
+ )
891
+ """,
892
+ flags=re.VERBOSE,
893
+ )
939
894
 
940
- def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
941
- """A decorator to make `parse_float` safe.
895
+ RE_LOCALTIME = re.compile(_TIME_RE_STR)
896
+
897
+ RE_DATETIME = re.compile(
898
+ rf"""
899
+ ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
900
+ (?:
901
+ [Tt ]
902
+ {_TIME_RE_STR}
903
+ (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
904
+ )?
905
+ """,
906
+ flags=re.VERBOSE,
907
+ )
942
908
 
943
- `parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
944
- thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
945
- """
946
- # The default `float` callable never returns illegal types. Optimize it.
947
- if parse_float is float:
948
- return float
909
+ @classmethod
910
+ def match_to_datetime(cls, match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
911
+ """
912
+ Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
949
913
 
950
- def safe_parse_float(float_str: str) -> ta.Any:
951
- float_value = parse_float(float_str)
952
- if isinstance(float_value, (dict, list)):
953
- raise ValueError('parse_float must not return dicts or lists') # noqa
954
- return float_value
914
+ Raises ValueError if the match does not correspond to a valid date or datetime.
915
+ """
955
916
 
956
- return safe_parse_float
917
+ (
918
+ year_str,
919
+ month_str,
920
+ day_str,
921
+ hour_str,
922
+ minute_str,
923
+ sec_str,
924
+ micros_str,
925
+ zulu_time,
926
+ offset_sign_str,
927
+ offset_hour_str,
928
+ offset_minute_str,
929
+ ) = match.groups()
930
+ year, month, day = int(year_str), int(month_str), int(day_str)
931
+ if hour_str is None:
932
+ return datetime.date(year, month, day)
933
+ hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
934
+ micros = int(micros_str.ljust(6, '0')) if micros_str else 0
935
+ if offset_sign_str:
936
+ tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
937
+ offset_hour_str, offset_minute_str, offset_sign_str,
938
+ )
939
+ elif zulu_time:
940
+ tz = datetime.UTC
941
+ else: # local date-time
942
+ tz = None
943
+ return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
944
+
945
+ @classmethod
946
+ def match_to_localtime(cls, match: re.Match) -> datetime.time:
947
+ hour_str, minute_str, sec_str, micros_str = match.groups()
948
+ micros = int(micros_str.ljust(6, '0')) if micros_str else 0
949
+ return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
950
+
951
+ @classmethod
952
+ def match_to_number(cls, match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
953
+ if match.group('floatpart'):
954
+ return parse_float(match.group())
955
+ return int(match.group(), 0)
957
956
 
958
957
 
959
958
  ########################################