ominfra 0.0.0.dev268__py3-none-any.whl → 0.0.0.dev269__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -726,75 +726,6 @@ def render_ini_sections(
726
726
  ##
727
727
 
728
728
 
729
- _TOML_TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
730
-
731
- TOML_RE_NUMBER = re.compile(
732
- r"""
733
- 0
734
- (?:
735
- x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
736
- |
737
- b[01](?:_?[01])* # bin
738
- |
739
- o[0-7](?:_?[0-7])* # oct
740
- )
741
- |
742
- [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
743
- (?P<floatpart>
744
- (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
745
- (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
746
- )
747
- """,
748
- flags=re.VERBOSE,
749
- )
750
- TOML_RE_LOCALTIME = re.compile(_TOML_TIME_RE_STR)
751
- TOML_RE_DATETIME = re.compile(
752
- rf"""
753
- ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
754
- (?:
755
- [Tt ]
756
- {_TOML_TIME_RE_STR}
757
- (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
758
- )?
759
- """,
760
- flags=re.VERBOSE,
761
- )
762
-
763
-
764
- def toml_match_to_datetime(match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
765
- """Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
766
-
767
- Raises ValueError if the match does not correspond to a valid date or datetime.
768
- """
769
- (
770
- year_str,
771
- month_str,
772
- day_str,
773
- hour_str,
774
- minute_str,
775
- sec_str,
776
- micros_str,
777
- zulu_time,
778
- offset_sign_str,
779
- offset_hour_str,
780
- offset_minute_str,
781
- ) = match.groups()
782
- year, month, day = int(year_str), int(month_str), int(day_str)
783
- if hour_str is None:
784
- return datetime.date(year, month, day)
785
- hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
786
- micros = int(micros_str.ljust(6, '0')) if micros_str else 0
787
- if offset_sign_str:
788
- tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
789
- offset_hour_str, offset_minute_str, offset_sign_str,
790
- )
791
- elif zulu_time:
792
- tz = datetime.UTC
793
- else: # local date-time
794
- tz = None
795
- return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
796
-
797
-
798
729
  @functools.lru_cache() # noqa
799
730
  def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.timezone:
800
731
  sign = 1 if sign_str == '+' else -1
@@ -806,47 +737,25 @@ def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.ti
806
737
  )
807
738
 
808
739
 
809
- def toml_match_to_localtime(match: re.Match) -> datetime.time:
810
- hour_str, minute_str, sec_str, micros_str = match.groups()
811
- micros = int(micros_str.ljust(6, '0')) if micros_str else 0
812
- return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
813
-
814
-
815
- def toml_match_to_number(match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
816
- if match.group('floatpart'):
817
- return parse_float(match.group())
818
- return int(match.group(), 0)
819
-
820
-
821
- TOML_ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
822
-
823
- # Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the parser
824
- # functions.
825
- TOML_ILLEGAL_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t')
826
- TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t\n')
740
+ def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
741
+ """
742
+ A decorator to make `parse_float` safe.
827
743
 
828
- TOML_ILLEGAL_LITERAL_STR_CHARS = TOML_ILLEGAL_BASIC_STR_CHARS
829
- TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
744
+ `parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
745
+ thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
746
+ """
830
747
 
831
- TOML_ILLEGAL_COMMENT_CHARS = TOML_ILLEGAL_BASIC_STR_CHARS
748
+ # The default `float` callable never returns illegal types. Optimize it.
749
+ if parse_float is float:
750
+ return float
832
751
 
833
- TOML_WS = frozenset(' \t')
834
- TOML_WS_AND_NEWLINE = TOML_WS | frozenset('\n')
835
- TOML_BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
836
- TOML_KEY_INITIAL_CHARS = TOML_BARE_KEY_CHARS | frozenset("\"'")
837
- TOML_HEXDIGIT_CHARS = frozenset(string.hexdigits)
752
+ def safe_parse_float(float_str: str) -> ta.Any:
753
+ float_value = parse_float(float_str)
754
+ if isinstance(float_value, (dict, list)):
755
+ raise ValueError('parse_float must not return dicts or lists') # noqa
756
+ return float_value
838
757
 
839
- TOML_BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType(
840
- {
841
- '\\b': '\u0008', # backspace
842
- '\\t': '\u0009', # tab
843
- '\\n': '\u000A', # linefeed
844
- '\\f': '\u000C', # form feed
845
- '\\r': '\u000D', # carriage return
846
- '\\"': '\u0022', # quote
847
- '\\\\': '\u005C', # backslash
848
- },
849
- )
758
+ return safe_parse_float
850
759
 
851
760
 
852
761
  class TomlDecodeError(ValueError):
@@ -871,63 +780,15 @@ def toml_loads(s: str, /, *, parse_float: TomlParseFloat = float) -> ta.Dict[str
871
780
  src = s.replace('\r\n', '\n')
872
781
  except (AttributeError, TypeError):
873
782
  raise TypeError(f"Expected str object, not '{type(s).__qualname__}'") from None
874
- pos = 0
875
- out = TomlOutput(TomlNestedDict(), TomlFlags())
876
- header: TomlKey = ()
877
- parse_float = toml_make_safe_parse_float(parse_float)
878
-
879
- # Parse one statement at a time (typically means one line in TOML source)
880
- while True:
881
- # 1. Skip line leading whitespace
882
- pos = toml_skip_chars(src, pos, TOML_WS)
883
-
884
- # 2. Parse rules. Expect one of the following:
885
- # - end of file
886
- # - end of line
887
- # - comment
888
- # - key/value pair
889
- # - append dict to list (and move to its namespace)
890
- # - create dict (and move to its namespace)
891
- # Skip trailing whitespace when applicable.
892
- try:
893
- char = src[pos]
894
- except IndexError:
895
- break
896
- if char == '\n':
897
- pos += 1
898
- continue
899
- if char in TOML_KEY_INITIAL_CHARS:
900
- pos = toml_key_value_rule(src, pos, out, header, parse_float)
901
- pos = toml_skip_chars(src, pos, TOML_WS)
902
- elif char == '[':
903
- try:
904
- second_char: ta.Optional[str] = src[pos + 1]
905
- except IndexError:
906
- second_char = None
907
- out.flags.finalize_pending()
908
- if second_char == '[':
909
- pos, header = toml_create_list_rule(src, pos, out)
910
- else:
911
- pos, header = toml_create_dict_rule(src, pos, out)
912
- pos = toml_skip_chars(src, pos, TOML_WS)
913
- elif char != '#':
914
- raise toml_suffixed_err(src, pos, 'Invalid statement')
915
783
 
916
- # 3. Skip comment
917
- pos = toml_skip_comment(src, pos)
784
+ parse_float = toml_make_safe_parse_float(parse_float)
918
785
 
919
- # 4. Expect end of line or end of file
920
- try:
921
- char = src[pos]
922
- except IndexError:
923
- break
924
- if char != '\n':
925
- raise toml_suffixed_err(
926
- src, pos, 'Expected newline or end of document after a statement',
927
- )
928
- pos += 1
786
+ parser = TomlParser(
787
+ src,
788
+ parse_float=parse_float,
789
+ )
929
790
 
930
- return out.data.dict
791
+ return parser.parse()
931
792
 
932
793
 
933
794
  class TomlFlags:
@@ -939,6 +800,8 @@ class TomlFlags:
939
800
  EXPLICIT_NEST = 1
940
801
 
941
802
  def __init__(self) -> None:
803
+ super().__init__()
804
+
942
805
  self._flags: ta.Dict[str, dict] = {}
943
806
  self._pending_flags: ta.Set[ta.Tuple[TomlKey, int]] = set()
944
807
 
@@ -989,6 +852,8 @@ class TomlFlags:
989
852
 
990
853
  class TomlNestedDict:
991
854
  def __init__(self) -> None:
855
+ super().__init__()
856
+
992
857
  # The parsed content of the TOML document
993
858
  self.dict: ta.Dict[str, ta.Any] = {}
994
859
 
@@ -1021,479 +886,613 @@ class TomlNestedDict:
1021
886
  cont[last_key] = [{}]
1022
887
 
1023
888
 
1024
- class TomlOutput(ta.NamedTuple):
1025
- data: TomlNestedDict
1026
- flags: TomlFlags
889
+ class TomlParser:
890
+ def __init__(
891
+ self,
892
+ src: str,
893
+ *,
894
+ parse_float: TomlParseFloat = float,
895
+ ) -> None:
896
+ super().__init__()
1027
897
 
898
+ self.src = src
1028
899
 
1029
- def toml_skip_chars(src: str, pos: TomlPos, chars: ta.Iterable[str]) -> TomlPos:
1030
- try:
1031
- while src[pos] in chars:
1032
- pos += 1
1033
- except IndexError:
1034
- pass
1035
- return pos
900
+ self.parse_float = parse_float
1036
901
 
902
+ self.data = TomlNestedDict()
903
+ self.flags = TomlFlags()
904
+ self.pos = 0
1037
905
 
1038
- def toml_skip_until(
1039
- src: str,
1040
- pos: TomlPos,
1041
- expect: str,
1042
- *,
1043
- error_on: ta.FrozenSet[str],
1044
- error_on_eof: bool,
1045
- ) -> TomlPos:
1046
- try:
1047
- new_pos = src.index(expect, pos)
1048
- except ValueError:
1049
- new_pos = len(src)
1050
- if error_on_eof:
1051
- raise toml_suffixed_err(src, new_pos, f'Expected {expect!r}') from None
906
+ ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
1052
907
 
1053
- if not error_on.isdisjoint(src[pos:new_pos]):
1054
- while src[pos] not in error_on:
1055
- pos += 1
1056
- raise toml_suffixed_err(src, pos, f'Found invalid character {src[pos]!r}')
1057
- return new_pos
908
+ # Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the
909
+ # parser functions.
910
+ ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t')
911
+ ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t\n')
1058
912
 
913
+ ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
914
+ ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
1059
915
 
1060
- def toml_skip_comment(src: str, pos: TomlPos) -> TomlPos:
1061
- try:
1062
- char: ta.Optional[str] = src[pos]
1063
- except IndexError:
1064
- char = None
1065
- if char == '#':
1066
- return toml_skip_until(
1067
- src, pos + 1, '\n', error_on=TOML_ILLEGAL_COMMENT_CHARS, error_on_eof=False,
1068
- )
1069
- return pos
916
+ ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
1070
917
 
918
+ WS = frozenset(' \t')
919
+ WS_AND_NEWLINE = WS | frozenset('\n')
920
+ BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
921
+ KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
922
+ HEXDIGIT_CHARS = frozenset(string.hexdigits)
1071
923
 
1072
- def toml_skip_comments_and_array_ws(src: str, pos: TomlPos) -> TomlPos:
1073
- while True:
1074
- pos_before_skip = pos
1075
- pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
1076
- pos = toml_skip_comment(src, pos)
1077
- if pos == pos_before_skip:
1078
- return pos
924
+ BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType({
925
+ '\\b': '\u0008', # backspace
926
+ '\\t': '\u0009', # tab
927
+ '\\n': '\u000A', # linefeed
928
+ '\\f': '\u000C', # form feed
929
+ '\\r': '\u000D', # carriage return
930
+ '\\"': '\u0022', # quote
931
+ '\\\\': '\u005C', # backslash
932
+ })
1079
933
 
934
+ def parse(self) -> ta.Dict[str, ta.Any]: # noqa: C901
935
+ header: TomlKey = ()
1080
936
 
1081
- def toml_create_dict_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
1082
- pos += 1 # Skip "["
1083
- pos = toml_skip_chars(src, pos, TOML_WS)
1084
- pos, key = toml_parse_key(src, pos)
937
+ # Parse one statement at a time (typically means one line in TOML source)
938
+ while True:
939
+ # 1. Skip line leading whitespace
940
+ self.skip_chars(self.WS)
941
+
942
+ # 2. Parse rules. Expect one of the following:
943
+ # - end of file
944
+ # - end of line
945
+ # - comment
946
+ # - key/value pair
947
+ # - append dict to list (and move to its namespace)
948
+ # - create dict (and move to its namespace)
949
+ # Skip trailing whitespace when applicable.
950
+ try:
951
+ char = self.src[self.pos]
952
+ except IndexError:
953
+ break
954
+ if char == '\n':
955
+ self.pos += 1
956
+ continue
957
+ if char in self.KEY_INITIAL_CHARS:
958
+ self.key_value_rule(header)
959
+ self.skip_chars(self.WS)
960
+ elif char == '[':
961
+ try:
962
+ second_char: ta.Optional[str] = self.src[self.pos + 1]
963
+ except IndexError:
964
+ second_char = None
965
+ self.flags.finalize_pending()
966
+ if second_char == '[':
967
+ header = self.create_list_rule()
968
+ else:
969
+ header = self.create_dict_rule()
970
+ self.skip_chars(self.WS)
971
+ elif char != '#':
972
+ raise self.suffixed_err('Invalid statement')
1085
973
 
1086
- if out.flags.is_(key, TomlFlags.EXPLICIT_NEST) or out.flags.is_(key, TomlFlags.FROZEN):
1087
- raise toml_suffixed_err(src, pos, f'Cannot declare {key} twice')
1088
- out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
1089
- try:
1090
- out.data.get_or_create_nest(key)
1091
- except KeyError:
1092
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
974
+ # 3. Skip comment
975
+ self.skip_comment()
1093
976
 
1094
- if not src.startswith(']', pos):
1095
- raise toml_suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
1096
- return pos + 1, key
977
+ # 4. Expect end of line or end of file
978
+ try:
979
+ char = self.src[self.pos]
980
+ except IndexError:
981
+ break
982
+ if char != '\n':
983
+ raise self.suffixed_err('Expected newline or end of document after a statement')
984
+ self.pos += 1
1097
985
 
986
+ return self.data.dict
1098
987
 
1099
- def toml_create_list_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
1100
- pos += 2 # Skip "[["
1101
- pos = toml_skip_chars(src, pos, TOML_WS)
1102
- pos, key = toml_parse_key(src, pos)
988
+ def skip_chars(self, chars: ta.Iterable[str]) -> None:
989
+ try:
990
+ while self.src[self.pos] in chars:
991
+ self.pos += 1
992
+ except IndexError:
993
+ pass
1103
994
 
1104
- if out.flags.is_(key, TomlFlags.FROZEN):
1105
- raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
1106
- # Free the namespace now that it points to another empty list item...
1107
- out.flags.unset_all(key)
1108
- # ...but this key precisely is still prohibited from table declaration
1109
- out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
1110
- try:
1111
- out.data.append_nest_to_list(key)
1112
- except KeyError:
1113
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
1114
-
1115
- if not src.startswith(']]', pos):
1116
- raise toml_suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
1117
- return pos + 2, key
1118
-
1119
-
1120
- def toml_key_value_rule(
1121
- src: str,
1122
- pos: TomlPos,
1123
- out: TomlOutput,
1124
- header: TomlKey,
1125
- parse_float: TomlParseFloat,
1126
- ) -> TomlPos:
1127
- pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
1128
- key_parent, key_stem = key[:-1], key[-1]
1129
- abs_key_parent = header + key_parent
1130
-
1131
- relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
1132
- for cont_key in relative_path_cont_keys:
1133
- # Check that dotted key syntax does not redefine an existing table
1134
- if out.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
1135
- raise toml_suffixed_err(src, pos, f'Cannot redefine namespace {cont_key}')
1136
- # Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in following
1137
- # table sections.
1138
- out.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
1139
-
1140
- if out.flags.is_(abs_key_parent, TomlFlags.FROZEN):
1141
- raise toml_suffixed_err(
1142
- src,
1143
- pos,
1144
- f'Cannot mutate immutable namespace {abs_key_parent}',
1145
- )
995
+ def skip_until(
996
+ self,
997
+ expect: str,
998
+ *,
999
+ error_on: ta.FrozenSet[str],
1000
+ error_on_eof: bool,
1001
+ ) -> None:
1002
+ try:
1003
+ new_pos = self.src.index(expect, self.pos)
1004
+ except ValueError:
1005
+ new_pos = len(self.src)
1006
+ if error_on_eof:
1007
+ raise self.suffixed_err(f'Expected {expect!r}', pos=new_pos) from None
1146
1008
 
1147
- try:
1148
- nest = out.data.get_or_create_nest(abs_key_parent)
1149
- except KeyError:
1150
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
1151
- if key_stem in nest:
1152
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value')
1153
- # Mark inline table and array namespaces recursively immutable
1154
- if isinstance(value, (dict, list)):
1155
- out.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
1156
- nest[key_stem] = value
1157
- return pos
1158
-
1159
-
1160
- def toml_parse_key_value_pair(
1161
- src: str,
1162
- pos: TomlPos,
1163
- parse_float: TomlParseFloat,
1164
- ) -> ta.Tuple[TomlPos, TomlKey, ta.Any]:
1165
- pos, key = toml_parse_key(src, pos)
1166
- try:
1167
- char: ta.Optional[str] = src[pos]
1168
- except IndexError:
1169
- char = None
1170
- if char != '=':
1171
- raise toml_suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
1172
- pos += 1
1173
- pos = toml_skip_chars(src, pos, TOML_WS)
1174
- pos, value = toml_parse_value(src, pos, parse_float)
1175
- return pos, key, value
1176
-
1177
-
1178
- def toml_parse_key(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, TomlKey]:
1179
- pos, key_part = toml_parse_key_part(src, pos)
1180
- key: TomlKey = (key_part,)
1181
- pos = toml_skip_chars(src, pos, TOML_WS)
1182
- while True:
1009
+ if not error_on.isdisjoint(self.src[self.pos:new_pos]):
1010
+ while self.src[self.pos] not in error_on:
1011
+ self.pos += 1
1012
+ raise self.suffixed_err(f'Found invalid character {self.src[self.pos]!r}')
1013
+ self.pos = new_pos
1014
+
1015
+ def skip_comment(self) -> None:
1183
1016
  try:
1184
- char: ta.Optional[str] = src[pos]
1017
+ char: ta.Optional[str] = self.src[self.pos]
1185
1018
  except IndexError:
1186
1019
  char = None
1187
- if char != '.':
1188
- return pos, key
1189
- pos += 1
1190
- pos = toml_skip_chars(src, pos, TOML_WS)
1191
- pos, key_part = toml_parse_key_part(src, pos)
1192
- key += (key_part,)
1193
- pos = toml_skip_chars(src, pos, TOML_WS)
1020
+ if char == '#':
1021
+ self.pos += 1
1022
+ self.skip_until(
1023
+ '\n',
1024
+ error_on=self.ILLEGAL_COMMENT_CHARS,
1025
+ error_on_eof=False,
1026
+ )
1194
1027
 
1028
+ def skip_comments_and_array_ws(self) -> None:
1029
+ while True:
1030
+ pos_before_skip = self.pos
1031
+ self.skip_chars(self.WS_AND_NEWLINE)
1032
+ self.skip_comment()
1033
+ if self.pos == pos_before_skip:
1034
+ return
1195
1035
 
1196
- def toml_parse_key_part(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
1197
- try:
1198
- char: ta.Optional[str] = src[pos]
1199
- except IndexError:
1200
- char = None
1201
- if char in TOML_BARE_KEY_CHARS:
1202
- start_pos = pos
1203
- pos = toml_skip_chars(src, pos, TOML_BARE_KEY_CHARS)
1204
- return pos, src[start_pos:pos]
1205
- if char == "'":
1206
- return toml_parse_literal_str(src, pos)
1207
- if char == '"':
1208
- return toml_parse_one_line_basic_str(src, pos)
1209
- raise toml_suffixed_err(src, pos, 'Invalid initial character for a key part')
1210
-
1211
-
1212
- def toml_parse_one_line_basic_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
1213
- pos += 1
1214
- return toml_parse_basic_str(src, pos, multiline=False)
1215
-
1216
-
1217
- def toml_parse_array(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, list]:
1218
- pos += 1
1219
- array: list = []
1220
-
1221
- pos = toml_skip_comments_and_array_ws(src, pos)
1222
- if src.startswith(']', pos):
1223
- return pos + 1, array
1224
- while True:
1225
- pos, val = toml_parse_value(src, pos, parse_float)
1226
- array.append(val)
1227
- pos = toml_skip_comments_and_array_ws(src, pos)
1228
-
1229
- c = src[pos:pos + 1]
1230
- if c == ']':
1231
- return pos + 1, array
1232
- if c != ',':
1233
- raise toml_suffixed_err(src, pos, 'Unclosed array')
1234
- pos += 1
1235
-
1236
- pos = toml_skip_comments_and_array_ws(src, pos)
1237
- if src.startswith(']', pos):
1238
- return pos + 1, array
1239
-
1240
-
1241
- def toml_parse_inline_table(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, dict]:
1242
- pos += 1
1243
- nested_dict = TomlNestedDict()
1244
- flags = TomlFlags()
1245
-
1246
- pos = toml_skip_chars(src, pos, TOML_WS)
1247
- if src.startswith('}', pos):
1248
- return pos + 1, nested_dict.dict
1249
- while True:
1250
- pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
1036
+ def create_dict_rule(self) -> TomlKey:
1037
+ self.pos += 1 # Skip "["
1038
+ self.skip_chars(self.WS)
1039
+ key = self.parse_key()
1040
+
1041
+ if self.flags.is_(key, TomlFlags.EXPLICIT_NEST) or self.flags.is_(key, TomlFlags.FROZEN):
1042
+ raise self.suffixed_err(f'Cannot declare {key} twice')
1043
+ self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
1044
+ try:
1045
+ self.data.get_or_create_nest(key)
1046
+ except KeyError:
1047
+ raise self.suffixed_err('Cannot overwrite a value') from None
1048
+
1049
+ if not self.src.startswith(']', self.pos):
1050
+ raise self.suffixed_err("Expected ']' at the end of a table declaration")
1051
+ self.pos += 1
1052
+ return key
1053
+
1054
+ def create_list_rule(self) -> TomlKey:
1055
+ self.pos += 2 # Skip "[["
1056
+ self.skip_chars(self.WS)
1057
+ key = self.parse_key()
1058
+
1059
+ if self.flags.is_(key, TomlFlags.FROZEN):
1060
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
1061
+ # Free the namespace now that it points to another empty list item...
1062
+ self.flags.unset_all(key)
1063
+ # ...but this key precisely is still prohibited from table declaration
1064
+ self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
1065
+ try:
1066
+ self.data.append_nest_to_list(key)
1067
+ except KeyError:
1068
+ raise self.suffixed_err('Cannot overwrite a value') from None
1069
+
1070
+ if not self.src.startswith(']]', self.pos):
1071
+ raise self.suffixed_err("Expected ']]' at the end of an array declaration")
1072
+ self.pos += 2
1073
+ return key
1074
+
1075
+ def key_value_rule(self, header: TomlKey) -> None:
1076
+ key, value = self.parse_key_value_pair()
1251
1077
  key_parent, key_stem = key[:-1], key[-1]
1252
- if flags.is_(key, TomlFlags.FROZEN):
1253
- raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
1078
+ abs_key_parent = header + key_parent
1079
+
1080
+ relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
1081
+ for cont_key in relative_path_cont_keys:
1082
+ # Check that dotted key syntax does not redefine an existing table
1083
+ if self.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
1084
+ raise self.suffixed_err(f'Cannot redefine namespace {cont_key}')
1085
+ # Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in
1086
+ # following table sections.
1087
+ self.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
1088
+
1089
+ if self.flags.is_(abs_key_parent, TomlFlags.FROZEN):
1090
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {abs_key_parent}')
1091
+
1254
1092
  try:
1255
- nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
1093
+ nest = self.data.get_or_create_nest(abs_key_parent)
1256
1094
  except KeyError:
1257
- raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
1095
+ raise self.suffixed_err('Cannot overwrite a value') from None
1258
1096
  if key_stem in nest:
1259
- raise toml_suffixed_err(src, pos, f'Duplicate inline table key {key_stem!r}')
1260
- nest[key_stem] = value
1261
- pos = toml_skip_chars(src, pos, TOML_WS)
1262
- c = src[pos:pos + 1]
1263
- if c == '}':
1264
- return pos + 1, nested_dict.dict
1265
- if c != ',':
1266
- raise toml_suffixed_err(src, pos, 'Unclosed inline table')
1097
+ raise self.suffixed_err('Cannot overwrite a value')
1098
+ # Mark inline table and array namespaces recursively immutable
1267
1099
  if isinstance(value, (dict, list)):
1268
- flags.set(key, TomlFlags.FROZEN, recursive=True)
1269
- pos += 1
1270
- pos = toml_skip_chars(src, pos, TOML_WS)
1271
-
1100
+ self.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
1101
+ nest[key_stem] = value
1272
1102
 
1273
- def toml_parse_basic_str_escape(
1274
- src: str,
1275
- pos: TomlPos,
1276
- *,
1277
- multiline: bool = False,
1278
- ) -> ta.Tuple[TomlPos, str]:
1279
- escape_id = src[pos:pos + 2]
1280
- pos += 2
1281
- if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
1282
- # Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found before
1283
- # newline.
1284
- if escape_id != '\\\n':
1285
- pos = toml_skip_chars(src, pos, TOML_WS)
1103
+ def parse_key_value_pair(self) -> ta.Tuple[TomlKey, ta.Any]:
1104
+ key = self.parse_key()
1105
+ try:
1106
+ char: ta.Optional[str] = self.src[self.pos]
1107
+ except IndexError:
1108
+ char = None
1109
+ if char != '=':
1110
+ raise self.suffixed_err("Expected '=' after a key in a key/value pair")
1111
+ self.pos += 1
1112
+ self.skip_chars(self.WS)
1113
+ value = self.parse_value()
1114
+ return key, value
1115
+
1116
+ def parse_key(self) -> TomlKey:
1117
+ key_part = self.parse_key_part()
1118
+ key: TomlKey = (key_part,)
1119
+ self.skip_chars(self.WS)
1120
+ while True:
1286
1121
  try:
1287
- char = src[pos]
1122
+ char: ta.Optional[str] = self.src[self.pos]
1288
1123
  except IndexError:
1289
- return pos, ''
1290
- if char != '\n':
1291
- raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string")
1292
- pos += 1
1293
- pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
1294
- return pos, ''
1295
- if escape_id == '\\u':
1296
- return toml_parse_hex_char(src, pos, 4)
1297
- if escape_id == '\\U':
1298
- return toml_parse_hex_char(src, pos, 8)
1299
- try:
1300
- return pos, TOML_BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
1301
- except KeyError:
1302
- raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string") from None
1124
+ char = None
1125
+ if char != '.':
1126
+ return key
1127
+ self.pos += 1
1128
+ self.skip_chars(self.WS)
1129
+ key_part = self.parse_key_part()
1130
+ key += (key_part,)
1131
+ self.skip_chars(self.WS)
1132
+
1133
+ def parse_key_part(self) -> str:
1134
+ try:
1135
+ char: ta.Optional[str] = self.src[self.pos]
1136
+ except IndexError:
1137
+ char = None
1138
+ if char in self.BARE_KEY_CHARS:
1139
+ start_pos = self.pos
1140
+ self.skip_chars(self.BARE_KEY_CHARS)
1141
+ return self.src[start_pos:self.pos]
1142
+ if char == "'":
1143
+ return self.parse_literal_str()
1144
+ if char == '"':
1145
+ return self.parse_one_line_basic_str()
1146
+ raise self.suffixed_err('Invalid initial character for a key part')
1303
1147
 
1148
+ def parse_one_line_basic_str(self) -> str:
1149
+ self.pos += 1
1150
+ return self.parse_basic_str(multiline=False)
1304
1151
 
1305
- def toml_parse_basic_str_escape_multiline(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
1306
- return toml_parse_basic_str_escape(src, pos, multiline=True)
1152
+ def parse_array(self) -> list:
1153
+ self.pos += 1
1154
+ array: list = []
1307
1155
 
1156
+ self.skip_comments_and_array_ws()
1157
+ if self.src.startswith(']', self.pos):
1158
+ self.pos += 1
1159
+ return array
1160
+ while True:
1161
+ val = self.parse_value()
1162
+ array.append(val)
1163
+ self.skip_comments_and_array_ws()
1164
+
1165
+ c = self.src[self.pos:self.pos + 1]
1166
+ if c == ']':
1167
+ self.pos += 1
1168
+ return array
1169
+ if c != ',':
1170
+ raise self.suffixed_err('Unclosed array')
1171
+ self.pos += 1
1172
+
1173
+ self.skip_comments_and_array_ws()
1174
+ if self.src.startswith(']', self.pos):
1175
+ self.pos += 1
1176
+ return array
1177
+
1178
+ def parse_inline_table(self) -> dict:
1179
+ self.pos += 1
1180
+ nested_dict = TomlNestedDict()
1181
+ flags = TomlFlags()
1182
+
1183
+ self.skip_chars(self.WS)
1184
+ if self.src.startswith('}', self.pos):
1185
+ self.pos += 1
1186
+ return nested_dict.dict
1187
+ while True:
1188
+ key, value = self.parse_key_value_pair()
1189
+ key_parent, key_stem = key[:-1], key[-1]
1190
+ if flags.is_(key, TomlFlags.FROZEN):
1191
+ raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
1192
+ try:
1193
+ nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
1194
+ except KeyError:
1195
+ raise self.suffixed_err('Cannot overwrite a value') from None
1196
+ if key_stem in nest:
1197
+ raise self.suffixed_err(f'Duplicate inline table key {key_stem!r}')
1198
+ nest[key_stem] = value
1199
+ self.skip_chars(self.WS)
1200
+ c = self.src[self.pos:self.pos + 1]
1201
+ if c == '}':
1202
+ self.pos += 1
1203
+ return nested_dict.dict
1204
+ if c != ',':
1205
+ raise self.suffixed_err('Unclosed inline table')
1206
+ if isinstance(value, (dict, list)):
1207
+ flags.set(key, TomlFlags.FROZEN, recursive=True)
1208
+ self.pos += 1
1209
+ self.skip_chars(self.WS)
1210
+
1211
+ def parse_basic_str_escape(self, multiline: bool = False) -> str:
1212
+ escape_id = self.src[self.pos:self.pos + 2]
1213
+ self.pos += 2
1214
+ if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
1215
+ # Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found
1216
+ # before newline.
1217
+ if escape_id != '\\\n':
1218
+ self.skip_chars(self.WS)
1219
+ try:
1220
+ char = self.src[self.pos]
1221
+ except IndexError:
1222
+ return ''
1223
+ if char != '\n':
1224
+ raise self.suffixed_err("Unescaped '\\' in a string")
1225
+ self.pos += 1
1226
+ self.skip_chars(self.WS_AND_NEWLINE)
1227
+ return ''
1228
+ if escape_id == '\\u':
1229
+ return self.parse_hex_char(4)
1230
+ if escape_id == '\\U':
1231
+ return self.parse_hex_char(8)
1232
+ try:
1233
+ return self.BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
1234
+ except KeyError:
1235
+ raise self.suffixed_err("Unescaped '\\' in a string") from None
1308
1236
 
1309
- def toml_parse_hex_char(src: str, pos: TomlPos, hex_len: int) -> ta.Tuple[TomlPos, str]:
1310
- hex_str = src[pos:pos + hex_len]
1311
- if len(hex_str) != hex_len or not TOML_HEXDIGIT_CHARS.issuperset(hex_str):
1312
- raise toml_suffixed_err(src, pos, 'Invalid hex value')
1313
- pos += hex_len
1314
- hex_int = int(hex_str, 16)
1315
- if not toml_is_unicode_scalar_value(hex_int):
1316
- raise toml_suffixed_err(src, pos, 'Escaped character is not a Unicode scalar value')
1317
- return pos, chr(hex_int)
1237
+ def parse_basic_str_escape_multiline(self) -> str:
1238
+ return self.parse_basic_str_escape(multiline=True)
1318
1239
 
1240
+ @classmethod
1241
+ def is_unicode_scalar_value(cls, codepoint: int) -> bool:
1242
+ return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
1243
+
1244
+ def parse_hex_char(self, hex_len: int) -> str:
1245
+ hex_str = self.src[self.pos:self.pos + hex_len]
1246
+ if len(hex_str) != hex_len or not self.HEXDIGIT_CHARS.issuperset(hex_str):
1247
+ raise self.suffixed_err('Invalid hex value')
1248
+ self.pos += hex_len
1249
+ hex_int = int(hex_str, 16)
1250
+ if not self.is_unicode_scalar_value(hex_int):
1251
+ raise self.suffixed_err('Escaped character is not a Unicode scalar value')
1252
+ return chr(hex_int)
1253
+
1254
+ def parse_literal_str(self) -> str:
1255
+ self.pos += 1 # Skip starting apostrophe
1256
+ start_pos = self.pos
1257
+ self.skip_until("'", error_on=self.ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True)
1258
+ end_pos = self.pos
1259
+ self.pos += 1
1260
+ return self.src[start_pos:end_pos] # Skip ending apostrophe
1261
+
1262
+ def parse_multiline_str(self, *, literal: bool) -> str:
1263
+ self.pos += 3
1264
+ if self.src.startswith('\n', self.pos):
1265
+ self.pos += 1
1266
+
1267
+ if literal:
1268
+ delim = "'"
1269
+ start_pos = self.pos
1270
+ self.skip_until(
1271
+ "'''",
1272
+ error_on=self.ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
1273
+ error_on_eof=True,
1274
+ )
1275
+ result = self.src[start_pos:self.pos]
1276
+ self.pos += 3
1277
+ else:
1278
+ delim = '"'
1279
+ result = self.parse_basic_str(multiline=True)
1280
+
1281
+ # Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
1282
+ if not self.src.startswith(delim, self.pos):
1283
+ return result
1284
+ self.pos += 1
1285
+ if not self.src.startswith(delim, self.pos):
1286
+ return result + delim
1287
+ self.pos += 1
1288
+ return result + (delim * 2)
1289
+
1290
+ def parse_basic_str(self, *, multiline: bool) -> str:
1291
+ if multiline:
1292
+ error_on = self.ILLEGAL_MULTILINE_BASIC_STR_CHARS
1293
+ parse_escapes = self.parse_basic_str_escape_multiline
1294
+ else:
1295
+ error_on = self.ILLEGAL_BASIC_STR_CHARS
1296
+ parse_escapes = self.parse_basic_str_escape
1297
+ result = ''
1298
+ start_pos = self.pos
1299
+ while True:
1300
+ try:
1301
+ char = self.src[self.pos]
1302
+ except IndexError:
1303
+ raise self.suffixed_err('Unterminated string') from None
1304
+ if char == '"':
1305
+ if not multiline:
1306
+ end_pos = self.pos
1307
+ self.pos += 1
1308
+ return result + self.src[start_pos:end_pos]
1309
+ if self.src.startswith('"""', self.pos):
1310
+ end_pos = self.pos
1311
+ self.pos += 3
1312
+ return result + self.src[start_pos:end_pos]
1313
+ self.pos += 1
1314
+ continue
1315
+ if char == '\\':
1316
+ result += self.src[start_pos:self.pos]
1317
+ parsed_escape = parse_escapes()
1318
+ result += parsed_escape
1319
+ start_pos = self.pos
1320
+ continue
1321
+ if char in error_on:
1322
+ raise self.suffixed_err(f'Illegal character {char!r}')
1323
+ self.pos += 1
1319
1324
 
1320
- def toml_parse_literal_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
1321
- pos += 1 # Skip starting apostrophe
1322
- start_pos = pos
1323
- pos = toml_skip_until(
1324
- src, pos, "'", error_on=TOML_ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True,
1325
- )
1326
- return pos + 1, src[start_pos:pos] # Skip ending apostrophe
1327
-
1328
-
1329
- def toml_parse_multiline_str(src: str, pos: TomlPos, *, literal: bool) -> ta.Tuple[TomlPos, str]:
1330
- pos += 3
1331
- if src.startswith('\n', pos):
1332
- pos += 1
1333
-
1334
- if literal:
1335
- delim = "'"
1336
- end_pos = toml_skip_until(
1337
- src,
1338
- pos,
1339
- "'''",
1340
- error_on=TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
1341
- error_on_eof=True,
1342
- )
1343
- result = src[pos:end_pos]
1344
- pos = end_pos + 3
1345
- else:
1346
- delim = '"'
1347
- pos, result = toml_parse_basic_str(src, pos, multiline=True)
1348
-
1349
- # Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
1350
- if not src.startswith(delim, pos):
1351
- return pos, result
1352
- pos += 1
1353
- if not src.startswith(delim, pos):
1354
- return pos, result + delim
1355
- pos += 1
1356
- return pos, result + (delim * 2)
1357
-
1358
-
1359
- def toml_parse_basic_str(src: str, pos: TomlPos, *, multiline: bool) -> ta.Tuple[TomlPos, str]:
1360
- if multiline:
1361
- error_on = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
1362
- parse_escapes = toml_parse_basic_str_escape_multiline
1363
- else:
1364
- error_on = TOML_ILLEGAL_BASIC_STR_CHARS
1365
- parse_escapes = toml_parse_basic_str_escape
1366
- result = ''
1367
- start_pos = pos
1368
- while True:
1325
+ def parse_value(self) -> ta.Any: # noqa: C901
1369
1326
  try:
1370
- char = src[pos]
1327
+ char: ta.Optional[str] = self.src[self.pos]
1371
1328
  except IndexError:
1372
- raise toml_suffixed_err(src, pos, 'Unterminated string') from None
1329
+ char = None
1330
+
1331
+ # IMPORTANT: order conditions based on speed of checking and likelihood
1332
+
1333
+ # Basic strings
1373
1334
  if char == '"':
1374
- if not multiline:
1375
- return pos + 1, result + src[start_pos:pos]
1376
- if src.startswith('"""', pos):
1377
- return pos + 3, result + src[start_pos:pos]
1378
- pos += 1
1379
- continue
1380
- if char == '\\':
1381
- result += src[start_pos:pos]
1382
- pos, parsed_escape = parse_escapes(src, pos)
1383
- result += parsed_escape
1384
- start_pos = pos
1385
- continue
1386
- if char in error_on:
1387
- raise toml_suffixed_err(src, pos, f'Illegal character {char!r}')
1388
- pos += 1
1335
+ if self.src.startswith('"""', self.pos):
1336
+ return self.parse_multiline_str(literal=False)
1337
+ return self.parse_one_line_basic_str()
1338
+
1339
+ # Literal strings
1340
+ if char == "'":
1341
+ if self.src.startswith("'''", self.pos):
1342
+ return self.parse_multiline_str(literal=True)
1343
+ return self.parse_literal_str()
1344
+
1345
+ # Booleans
1346
+ if char == 't':
1347
+ if self.src.startswith('true', self.pos):
1348
+ self.pos += 4
1349
+ return True
1350
+ if char == 'f':
1351
+ if self.src.startswith('false', self.pos):
1352
+ self.pos += 5
1353
+ return False
1389
1354
 
1355
+ # Arrays
1356
+ if char == '[':
1357
+ return self.parse_array()
1390
1358
 
1391
- def toml_parse_value( # noqa: C901
1392
- src: str,
1393
- pos: TomlPos,
1394
- parse_float: TomlParseFloat,
1395
- ) -> ta.Tuple[TomlPos, ta.Any]:
1396
- try:
1397
- char: ta.Optional[str] = src[pos]
1398
- except IndexError:
1399
- char = None
1400
-
1401
- # IMPORTANT: order conditions based on speed of checking and likelihood
1402
-
1403
- # Basic strings
1404
- if char == '"':
1405
- if src.startswith('"""', pos):
1406
- return toml_parse_multiline_str(src, pos, literal=False)
1407
- return toml_parse_one_line_basic_str(src, pos)
1408
-
1409
- # Literal strings
1410
- if char == "'":
1411
- if src.startswith("'''", pos):
1412
- return toml_parse_multiline_str(src, pos, literal=True)
1413
- return toml_parse_literal_str(src, pos)
1414
-
1415
- # Booleans
1416
- if char == 't':
1417
- if src.startswith('true', pos):
1418
- return pos + 4, True
1419
- if char == 'f':
1420
- if src.startswith('false', pos):
1421
- return pos + 5, False
1422
-
1423
- # Arrays
1424
- if char == '[':
1425
- return toml_parse_array(src, pos, parse_float)
1426
-
1427
- # Inline tables
1428
- if char == '{':
1429
- return toml_parse_inline_table(src, pos, parse_float)
1430
-
1431
- # Dates and times
1432
- datetime_match = TOML_RE_DATETIME.match(src, pos)
1433
- if datetime_match:
1434
- try:
1435
- datetime_obj = toml_match_to_datetime(datetime_match)
1436
- except ValueError as e:
1437
- raise toml_suffixed_err(src, pos, 'Invalid date or datetime') from e
1438
- return datetime_match.end(), datetime_obj
1439
- localtime_match = TOML_RE_LOCALTIME.match(src, pos)
1440
- if localtime_match:
1441
- return localtime_match.end(), toml_match_to_localtime(localtime_match)
1442
-
1443
- # Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to be
1444
- # located after handling of dates and times.
1445
- number_match = TOML_RE_NUMBER.match(src, pos)
1446
- if number_match:
1447
- return number_match.end(), toml_match_to_number(number_match, parse_float)
1448
-
1449
- # Special floats
1450
- first_three = src[pos:pos + 3]
1451
- if first_three in {'inf', 'nan'}:
1452
- return pos + 3, parse_float(first_three)
1453
- first_four = src[pos:pos + 4]
1454
- if first_four in {'-inf', '+inf', '-nan', '+nan'}:
1455
- return pos + 4, parse_float(first_four)
1456
-
1457
- raise toml_suffixed_err(src, pos, 'Invalid value')
1458
-
1459
-
1460
- def toml_suffixed_err(src: str, pos: TomlPos, msg: str) -> TomlDecodeError:
1461
- """Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
1462
-
1463
- def coord_repr(src: str, pos: TomlPos) -> str:
1464
- if pos >= len(src):
1359
+ # Inline tables
1360
+ if char == '{':
1361
+ return self.parse_inline_table()
1362
+
1363
+ # Dates and times
1364
+ datetime_match = self.RE_DATETIME.match(self.src, self.pos)
1365
+ if datetime_match:
1366
+ try:
1367
+ datetime_obj = self.match_to_datetime(datetime_match)
1368
+ except ValueError as e:
1369
+ raise self.suffixed_err('Invalid date or datetime') from e
1370
+ self.pos = datetime_match.end()
1371
+ return datetime_obj
1372
+ localtime_match = self.RE_LOCALTIME.match(self.src, self.pos)
1373
+ if localtime_match:
1374
+ self.pos = localtime_match.end()
1375
+ return self.match_to_localtime(localtime_match)
1376
+
1377
+ # Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to
1378
+ # be located after handling of dates and times.
1379
+ number_match = self.RE_NUMBER.match(self.src, self.pos)
1380
+ if number_match:
1381
+ self.pos = number_match.end()
1382
+ return self.match_to_number(number_match, self.parse_float)
1383
+
1384
+ # Special floats
1385
+ first_three = self.src[self.pos:self.pos + 3]
1386
+ if first_three in {'inf', 'nan'}:
1387
+ self.pos += 3
1388
+ return self.parse_float(first_three)
1389
+ first_four = self.src[self.pos:self.pos + 4]
1390
+ if first_four in {'-inf', '+inf', '-nan', '+nan'}:
1391
+ self.pos += 4
1392
+ return self.parse_float(first_four)
1393
+
1394
+ raise self.suffixed_err('Invalid value')
1395
+
1396
+ def coord_repr(self, pos: TomlPos) -> str:
1397
+ if pos >= len(self.src):
1465
1398
  return 'end of document'
1466
- line = src.count('\n', 0, pos) + 1
1399
+ line = self.src.count('\n', 0, pos) + 1
1467
1400
  if line == 1:
1468
1401
  column = pos + 1
1469
1402
  else:
1470
- column = pos - src.rindex('\n', 0, pos)
1403
+ column = pos - self.src.rindex('\n', 0, pos)
1471
1404
  return f'line {line}, column {column}'
1472
1405
 
1473
- return TomlDecodeError(f'{msg} (at {coord_repr(src, pos)})')
1406
+ def suffixed_err(self, msg: str, *, pos: ta.Optional[TomlPos] = None) -> TomlDecodeError:
1407
+ """Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
1474
1408
 
1409
+ if pos is None:
1410
+ pos = self.pos
1411
+ return TomlDecodeError(f'{msg} (at {self.coord_repr(pos)})')
1475
1412
 
1476
- def toml_is_unicode_scalar_value(codepoint: int) -> bool:
1477
- return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
1413
+ _TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
1478
1414
 
1415
+ RE_NUMBER = re.compile(
1416
+ r"""
1417
+ 0
1418
+ (?:
1419
+ x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
1420
+ |
1421
+ b[01](?:_?[01])* # bin
1422
+ |
1423
+ o[0-7](?:_?[0-7])* # oct
1424
+ )
1425
+ |
1426
+ [+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
1427
+ (?P<floatpart>
1428
+ (?:\.[0-9](?:_?[0-9])*)? # optional fractional part
1429
+ (?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
1430
+ )
1431
+ """,
1432
+ flags=re.VERBOSE,
1433
+ )
1479
1434
 
1480
- def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
1481
- """A decorator to make `parse_float` safe.
1435
+ RE_LOCALTIME = re.compile(_TIME_RE_STR)
1436
+
1437
+ RE_DATETIME = re.compile(
1438
+ rf"""
1439
+ ([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
1440
+ (?:
1441
+ [Tt ]
1442
+ {_TIME_RE_STR}
1443
+ (?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
1444
+ )?
1445
+ """,
1446
+ flags=re.VERBOSE,
1447
+ )
1482
1448
 
1483
- `parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
1484
- thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
1485
- """
1486
- # The default `float` callable never returns illegal types. Optimize it.
1487
- if parse_float is float:
1488
- return float
1449
+ @classmethod
1450
+ def match_to_datetime(cls, match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
1451
+ """
1452
+ Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
1489
1453
 
1490
- def safe_parse_float(float_str: str) -> ta.Any:
1491
- float_value = parse_float(float_str)
1492
- if isinstance(float_value, (dict, list)):
1493
- raise ValueError('parse_float must not return dicts or lists') # noqa
1494
- return float_value
1454
+ Raises ValueError if the match does not correspond to a valid date or datetime.
1455
+ """
1495
1456
 
1496
- return safe_parse_float
1457
+ (
1458
+ year_str,
1459
+ month_str,
1460
+ day_str,
1461
+ hour_str,
1462
+ minute_str,
1463
+ sec_str,
1464
+ micros_str,
1465
+ zulu_time,
1466
+ offset_sign_str,
1467
+ offset_hour_str,
1468
+ offset_minute_str,
1469
+ ) = match.groups()
1470
+ year, month, day = int(year_str), int(month_str), int(day_str)
1471
+ if hour_str is None:
1472
+ return datetime.date(year, month, day)
1473
+ hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
1474
+ micros = int(micros_str.ljust(6, '0')) if micros_str else 0
1475
+ if offset_sign_str:
1476
+ tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
1477
+ offset_hour_str, offset_minute_str, offset_sign_str,
1478
+ )
1479
+ elif zulu_time:
1480
+ tz = datetime.UTC
1481
+ else: # local date-time
1482
+ tz = None
1483
+ return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
1484
+
1485
+ @classmethod
1486
+ def match_to_localtime(cls, match: re.Match) -> datetime.time:
1487
+ hour_str, minute_str, sec_str, micros_str = match.groups()
1488
+ micros = int(micros_str.ljust(6, '0')) if micros_str else 0
1489
+ return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
1490
+
1491
+ @classmethod
1492
+ def match_to_number(cls, match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
1493
+ if match.group('floatpart'):
1494
+ return parse_float(match.group())
1495
+ return int(match.group(), 0)
1497
1496
 
1498
1497
 
1499
1498
  ########################################