omlish 0.0.0.dev268__py3-none-any.whl → 0.0.0.dev270__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omlish/__about__.py +2 -2
- omlish/bootstrap/marshal.py +4 -3
- omlish/dataclasses/__init__.py +7 -0
- omlish/dataclasses/impl/metaclass.py +1 -0
- omlish/dataclasses/impl/simple.py +1 -1
- omlish/formats/toml/parser.py +593 -594
- omlish/lang/classes/abstract.py +3 -0
- omlish/lang/comparison.py +3 -0
- omlish/lang/datetimes.py +3 -0
- omlish/lang/generators.py +2 -2
- omlish/lang/maybes.py +3 -0
- omlish/lang/resolving.py +3 -0
- omlish/lang/sys.py +3 -0
- omlish/marshal/__init__.py +16 -0
- omlish/marshal/base.py +230 -14
- omlish/marshal/composite/iterables.py +3 -0
- omlish/marshal/composite/literals.py +7 -4
- omlish/marshal/composite/mappings.py +3 -0
- omlish/marshal/composite/maybes.py +3 -0
- omlish/marshal/composite/newtypes.py +7 -4
- omlish/marshal/composite/optionals.py +7 -4
- omlish/marshal/composite/wrapped.py +3 -0
- omlish/marshal/global_.py +16 -0
- omlish/marshal/objects/dataclasses.py +5 -5
- omlish/marshal/objects/marshal.py +2 -2
- omlish/marshal/objects/namedtuples.py +4 -4
- omlish/marshal/objects/unmarshal.py +2 -2
- omlish/marshal/polymorphism/marshal.py +2 -2
- omlish/marshal/polymorphism/metadata.py +9 -3
- omlish/marshal/polymorphism/unions.py +6 -6
- omlish/marshal/polymorphism/unmarshal.py +2 -2
- omlish/marshal/singular/base64.py +3 -0
- omlish/marshal/singular/enums.py +7 -4
- omlish/marshal/singular/numbers.py +3 -0
- omlish/marshal/singular/uuids.py +3 -0
- omlish/marshal/standard.py +25 -3
- omlish/marshal/trivial/any.py +3 -0
- omlish/marshal/trivial/forbidden.py +13 -2
- omlish/marshal/trivial/nop.py +3 -0
- omlish/secrets/marshal.py +4 -6
- omlish/specs/jsonrpc/marshal.py +4 -5
- omlish/specs/openapi/marshal.py +4 -2
- omlish/sql/alchemy/__init__.py +31 -0
- omlish/sql/alchemy/apiadapter.py +121 -0
- omlish/sql/api/__init__.py +39 -0
- omlish/sql/api/base.py +1 -0
- omlish/sql/parsing/parsing.py +1 -1
- omlish/sql/queries/__init__.py +4 -0
- omlish/sql/queries/base.py +113 -2
- omlish/sql/queries/exprs.py +15 -2
- omlish/sql/queries/inserts.py +2 -1
- omlish/sql/queries/marshal.py +23 -9
- omlish/sql/queries/params.py +3 -2
- omlish/sql/queries/rendering.py +16 -4
- omlish/sql/queries/selects.py +17 -2
- omlish/sql/tabledefs/marshal.py +4 -2
- {omlish-0.0.0.dev268.dist-info → omlish-0.0.0.dev270.dist-info}/METADATA +1 -1
- {omlish-0.0.0.dev268.dist-info → omlish-0.0.0.dev270.dist-info}/RECORD +62 -61
- {omlish-0.0.0.dev268.dist-info → omlish-0.0.0.dev270.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev268.dist-info → omlish-0.0.0.dev270.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev268.dist-info → omlish-0.0.0.dev270.dist-info}/licenses/LICENSE +0 -0
- {omlish-0.0.0.dev268.dist-info → omlish-0.0.0.dev270.dist-info}/top_level.txt +0 -0
omlish/formats/toml/parser.py
CHANGED
@@ -57,75 +57,6 @@ TomlPos = int # ta.TypeAlias
|
|
57
57
|
##
|
58
58
|
|
59
59
|
|
60
|
-
_TOML_TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
|
61
|
-
|
62
|
-
TOML_RE_NUMBER = re.compile(
|
63
|
-
r"""
|
64
|
-
0
|
65
|
-
(?:
|
66
|
-
x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
|
67
|
-
|
|
68
|
-
b[01](?:_?[01])* # bin
|
69
|
-
|
|
70
|
-
o[0-7](?:_?[0-7])* # oct
|
71
|
-
)
|
72
|
-
|
|
73
|
-
[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
|
74
|
-
(?P<floatpart>
|
75
|
-
(?:\.[0-9](?:_?[0-9])*)? # optional fractional part
|
76
|
-
(?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
|
77
|
-
)
|
78
|
-
""",
|
79
|
-
flags=re.VERBOSE,
|
80
|
-
)
|
81
|
-
TOML_RE_LOCALTIME = re.compile(_TOML_TIME_RE_STR)
|
82
|
-
TOML_RE_DATETIME = re.compile(
|
83
|
-
rf"""
|
84
|
-
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
|
85
|
-
(?:
|
86
|
-
[Tt ]
|
87
|
-
{_TOML_TIME_RE_STR}
|
88
|
-
(?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
|
89
|
-
)?
|
90
|
-
""",
|
91
|
-
flags=re.VERBOSE,
|
92
|
-
)
|
93
|
-
|
94
|
-
|
95
|
-
def toml_match_to_datetime(match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
|
96
|
-
"""Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
|
97
|
-
|
98
|
-
Raises ValueError if the match does not correspond to a valid date or datetime.
|
99
|
-
"""
|
100
|
-
(
|
101
|
-
year_str,
|
102
|
-
month_str,
|
103
|
-
day_str,
|
104
|
-
hour_str,
|
105
|
-
minute_str,
|
106
|
-
sec_str,
|
107
|
-
micros_str,
|
108
|
-
zulu_time,
|
109
|
-
offset_sign_str,
|
110
|
-
offset_hour_str,
|
111
|
-
offset_minute_str,
|
112
|
-
) = match.groups()
|
113
|
-
year, month, day = int(year_str), int(month_str), int(day_str)
|
114
|
-
if hour_str is None:
|
115
|
-
return datetime.date(year, month, day)
|
116
|
-
hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
|
117
|
-
micros = int(micros_str.ljust(6, '0')) if micros_str else 0
|
118
|
-
if offset_sign_str:
|
119
|
-
tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
|
120
|
-
offset_hour_str, offset_minute_str, offset_sign_str,
|
121
|
-
)
|
122
|
-
elif zulu_time:
|
123
|
-
tz = datetime.UTC
|
124
|
-
else: # local date-time
|
125
|
-
tz = None
|
126
|
-
return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
|
127
|
-
|
128
|
-
|
129
60
|
@functools.lru_cache() # noqa
|
130
61
|
def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.timezone:
|
131
62
|
sign = 1 if sign_str == '+' else -1
|
@@ -137,47 +68,25 @@ def toml_cached_tz(hour_str: str, minute_str: str, sign_str: str) -> datetime.ti
|
|
137
68
|
)
|
138
69
|
|
139
70
|
|
140
|
-
def
|
141
|
-
|
142
|
-
|
143
|
-
return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
|
144
|
-
|
145
|
-
|
146
|
-
def toml_match_to_number(match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
|
147
|
-
if match.group('floatpart'):
|
148
|
-
return parse_float(match.group())
|
149
|
-
return int(match.group(), 0)
|
150
|
-
|
151
|
-
|
152
|
-
TOML_ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
|
153
|
-
|
154
|
-
# Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the parser
|
155
|
-
# functions.
|
156
|
-
TOML_ILLEGAL_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t')
|
157
|
-
TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS = TOML_ASCII_CTRL - frozenset('\t\n')
|
71
|
+
def toml_make_safe_parse_float(parse_float: TomlParseFloat) -> TomlParseFloat:
|
72
|
+
"""
|
73
|
+
A decorator to make `parse_float` safe.
|
158
74
|
|
159
|
-
|
160
|
-
|
75
|
+
`parse_float` must not return dicts or lists, because these types would be mixed with parsed TOML tables and arrays,
|
76
|
+
thus confusing the parser. The returned decorated callable raises `ValueError` instead of returning illegal types.
|
77
|
+
"""
|
161
78
|
|
162
|
-
|
79
|
+
# The default `float` callable never returns illegal types. Optimize it.
|
80
|
+
if parse_float is float:
|
81
|
+
return float
|
163
82
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
83
|
+
def safe_parse_float(float_str: str) -> ta.Any:
|
84
|
+
float_value = parse_float(float_str)
|
85
|
+
if isinstance(float_value, (dict, list)):
|
86
|
+
raise ValueError('parse_float must not return dicts or lists') # noqa
|
87
|
+
return float_value
|
169
88
|
|
170
|
-
|
171
|
-
{
|
172
|
-
'\\b': '\u0008', # backspace
|
173
|
-
'\\t': '\u0009', # tab
|
174
|
-
'\\n': '\u000A', # linefeed
|
175
|
-
'\\f': '\u000C', # form feed
|
176
|
-
'\\r': '\u000D', # carriage return
|
177
|
-
'\\"': '\u0022', # quote
|
178
|
-
'\\\\': '\u005C', # backslash
|
179
|
-
},
|
180
|
-
)
|
89
|
+
return safe_parse_float
|
181
90
|
|
182
91
|
|
183
92
|
class TomlDecodeError(ValueError):
|
@@ -202,63 +111,15 @@ def toml_loads(s: str, /, *, parse_float: TomlParseFloat = float) -> ta.Dict[str
|
|
202
111
|
src = s.replace('\r\n', '\n')
|
203
112
|
except (AttributeError, TypeError):
|
204
113
|
raise TypeError(f"Expected str object, not '{type(s).__qualname__}'") from None
|
205
|
-
|
206
|
-
out = TomlOutput(TomlNestedDict(), TomlFlags())
|
207
|
-
header: TomlKey = ()
|
114
|
+
|
208
115
|
parse_float = toml_make_safe_parse_float(parse_float)
|
209
116
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
# 2. Parse rules. Expect one of the following:
|
216
|
-
# - end of file
|
217
|
-
# - end of line
|
218
|
-
# - comment
|
219
|
-
# - key/value pair
|
220
|
-
# - append dict to list (and move to its namespace)
|
221
|
-
# - create dict (and move to its namespace)
|
222
|
-
# Skip trailing whitespace when applicable.
|
223
|
-
try:
|
224
|
-
char = src[pos]
|
225
|
-
except IndexError:
|
226
|
-
break
|
227
|
-
if char == '\n':
|
228
|
-
pos += 1
|
229
|
-
continue
|
230
|
-
if char in TOML_KEY_INITIAL_CHARS:
|
231
|
-
pos = toml_key_value_rule(src, pos, out, header, parse_float)
|
232
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
233
|
-
elif char == '[':
|
234
|
-
try:
|
235
|
-
second_char: ta.Optional[str] = src[pos + 1]
|
236
|
-
except IndexError:
|
237
|
-
second_char = None
|
238
|
-
out.flags.finalize_pending()
|
239
|
-
if second_char == '[':
|
240
|
-
pos, header = toml_create_list_rule(src, pos, out)
|
241
|
-
else:
|
242
|
-
pos, header = toml_create_dict_rule(src, pos, out)
|
243
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
244
|
-
elif char != '#':
|
245
|
-
raise toml_suffixed_err(src, pos, 'Invalid statement')
|
246
|
-
|
247
|
-
# 3. Skip comment
|
248
|
-
pos = toml_skip_comment(src, pos)
|
249
|
-
|
250
|
-
# 4. Expect end of line or end of file
|
251
|
-
try:
|
252
|
-
char = src[pos]
|
253
|
-
except IndexError:
|
254
|
-
break
|
255
|
-
if char != '\n':
|
256
|
-
raise toml_suffixed_err(
|
257
|
-
src, pos, 'Expected newline or end of document after a statement',
|
258
|
-
)
|
259
|
-
pos += 1
|
117
|
+
parser = TomlParser(
|
118
|
+
src,
|
119
|
+
parse_float=parse_float,
|
120
|
+
)
|
260
121
|
|
261
|
-
return
|
122
|
+
return parser.parse()
|
262
123
|
|
263
124
|
|
264
125
|
class TomlFlags:
|
@@ -270,6 +131,8 @@ class TomlFlags:
|
|
270
131
|
EXPLICIT_NEST = 1
|
271
132
|
|
272
133
|
def __init__(self) -> None:
|
134
|
+
super().__init__()
|
135
|
+
|
273
136
|
self._flags: ta.Dict[str, dict] = {}
|
274
137
|
self._pending_flags: ta.Set[ta.Tuple[TomlKey, int]] = set()
|
275
138
|
|
@@ -320,6 +183,8 @@ class TomlFlags:
|
|
320
183
|
|
321
184
|
class TomlNestedDict:
|
322
185
|
def __init__(self) -> None:
|
186
|
+
super().__init__()
|
187
|
+
|
323
188
|
# The parsed content of the TOML document
|
324
189
|
self.dict: ta.Dict[str, ta.Any] = {}
|
325
190
|
|
@@ -352,476 +217,610 @@ class TomlNestedDict:
|
|
352
217
|
cont[last_key] = [{}]
|
353
218
|
|
354
219
|
|
355
|
-
class
|
356
|
-
|
357
|
-
|
220
|
+
class TomlParser:
|
221
|
+
def __init__(
|
222
|
+
self,
|
223
|
+
src: str,
|
224
|
+
*,
|
225
|
+
parse_float: TomlParseFloat = float,
|
226
|
+
) -> None:
|
227
|
+
super().__init__()
|
358
228
|
|
229
|
+
self.src = src
|
359
230
|
|
360
|
-
|
361
|
-
try:
|
362
|
-
while src[pos] in chars:
|
363
|
-
pos += 1
|
364
|
-
except IndexError:
|
365
|
-
pass
|
366
|
-
return pos
|
367
|
-
|
368
|
-
|
369
|
-
def toml_skip_until(
|
370
|
-
src: str,
|
371
|
-
pos: TomlPos,
|
372
|
-
expect: str,
|
373
|
-
*,
|
374
|
-
error_on: ta.FrozenSet[str],
|
375
|
-
error_on_eof: bool,
|
376
|
-
) -> TomlPos:
|
377
|
-
try:
|
378
|
-
new_pos = src.index(expect, pos)
|
379
|
-
except ValueError:
|
380
|
-
new_pos = len(src)
|
381
|
-
if error_on_eof:
|
382
|
-
raise toml_suffixed_err(src, new_pos, f'Expected {expect!r}') from None
|
231
|
+
self.parse_float = parse_float
|
383
232
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
raise toml_suffixed_err(src, pos, f'Found invalid character {src[pos]!r}')
|
388
|
-
return new_pos
|
233
|
+
self.data = TomlNestedDict()
|
234
|
+
self.flags = TomlFlags()
|
235
|
+
self.pos = 0
|
389
236
|
|
237
|
+
ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
|
390
238
|
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
src, pos + 1, '\n', error_on=TOML_ILLEGAL_COMMENT_CHARS, error_on_eof=False,
|
399
|
-
)
|
400
|
-
return pos
|
239
|
+
# Neither of these sets include quotation mark or backslash. They are currently handled as separate cases in the
|
240
|
+
# parser functions.
|
241
|
+
ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t')
|
242
|
+
ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset('\t\n')
|
243
|
+
|
244
|
+
ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
|
245
|
+
ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
|
401
246
|
|
247
|
+
ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
|
402
248
|
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
if pos == pos_before_skip:
|
409
|
-
return pos
|
249
|
+
WS = frozenset(' \t')
|
250
|
+
WS_AND_NEWLINE = WS | frozenset('\n')
|
251
|
+
BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + '-_')
|
252
|
+
KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
|
253
|
+
HEXDIGIT_CHARS = frozenset(string.hexdigits)
|
410
254
|
|
255
|
+
BASIC_STR_ESCAPE_REPLACEMENTS = types.MappingProxyType({
|
256
|
+
'\\b': '\u0008', # backspace
|
257
|
+
'\\t': '\u0009', # tab
|
258
|
+
'\\n': '\u000A', # linefeed
|
259
|
+
'\\f': '\u000C', # form feed
|
260
|
+
'\\r': '\u000D', # carriage return
|
261
|
+
'\\"': '\u0022', # quote
|
262
|
+
'\\\\': '\u005C', # backslash
|
263
|
+
})
|
264
|
+
|
265
|
+
def parse(self) -> ta.Dict[str, ta.Any]: # noqa: C901
|
266
|
+
header: TomlKey = ()
|
267
|
+
|
268
|
+
# Parse one statement at a time (typically means one line in TOML source)
|
269
|
+
while True:
|
270
|
+
# 1. Skip line leading whitespace
|
271
|
+
self.skip_chars(self.WS)
|
272
|
+
|
273
|
+
# 2. Parse rules. Expect one of the following:
|
274
|
+
# - end of file
|
275
|
+
# - end of line
|
276
|
+
# - comment
|
277
|
+
# - key/value pair
|
278
|
+
# - append dict to list (and move to its namespace)
|
279
|
+
# - create dict (and move to its namespace)
|
280
|
+
# Skip trailing whitespace when applicable.
|
281
|
+
try:
|
282
|
+
char = self.src[self.pos]
|
283
|
+
except IndexError:
|
284
|
+
break
|
285
|
+
if char == '\n':
|
286
|
+
self.pos += 1
|
287
|
+
continue
|
288
|
+
if char in self.KEY_INITIAL_CHARS:
|
289
|
+
self.key_value_rule(header)
|
290
|
+
self.skip_chars(self.WS)
|
291
|
+
elif char == '[':
|
292
|
+
try:
|
293
|
+
second_char: ta.Optional[str] = self.src[self.pos + 1]
|
294
|
+
except IndexError:
|
295
|
+
second_char = None
|
296
|
+
self.flags.finalize_pending()
|
297
|
+
if second_char == '[':
|
298
|
+
header = self.create_list_rule()
|
299
|
+
else:
|
300
|
+
header = self.create_dict_rule()
|
301
|
+
self.skip_chars(self.WS)
|
302
|
+
elif char != '#':
|
303
|
+
raise self.suffixed_err('Invalid statement')
|
304
|
+
|
305
|
+
# 3. Skip comment
|
306
|
+
self.skip_comment()
|
307
|
+
|
308
|
+
# 4. Expect end of line or end of file
|
309
|
+
try:
|
310
|
+
char = self.src[self.pos]
|
311
|
+
except IndexError:
|
312
|
+
break
|
313
|
+
if char != '\n':
|
314
|
+
raise self.suffixed_err('Expected newline or end of document after a statement')
|
315
|
+
self.pos += 1
|
411
316
|
|
412
|
-
|
413
|
-
pos += 1 # Skip "["
|
414
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
415
|
-
pos, key = toml_parse_key(src, pos)
|
317
|
+
return self.data.dict
|
416
318
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
|
424
|
-
|
425
|
-
if not src.startswith(']', pos):
|
426
|
-
raise toml_suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
|
427
|
-
return pos + 1, key
|
428
|
-
|
429
|
-
|
430
|
-
def toml_create_list_rule(src: str, pos: TomlPos, out: TomlOutput) -> ta.Tuple[TomlPos, TomlKey]:
|
431
|
-
pos += 2 # Skip "[["
|
432
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
433
|
-
pos, key = toml_parse_key(src, pos)
|
434
|
-
|
435
|
-
if out.flags.is_(key, TomlFlags.FROZEN):
|
436
|
-
raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
|
437
|
-
# Free the namespace now that it points to another empty list item...
|
438
|
-
out.flags.unset_all(key)
|
439
|
-
# ...but this key precisely is still prohibited from table declaration
|
440
|
-
out.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
|
441
|
-
try:
|
442
|
-
out.data.append_nest_to_list(key)
|
443
|
-
except KeyError:
|
444
|
-
raise toml_suffixed_err(src, pos, 'Cannot overwrite a value') from None
|
445
|
-
|
446
|
-
if not src.startswith(']]', pos):
|
447
|
-
raise toml_suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
|
448
|
-
return pos + 2, key
|
449
|
-
|
450
|
-
|
451
|
-
def toml_key_value_rule(
|
452
|
-
src: str,
|
453
|
-
pos: TomlPos,
|
454
|
-
out: TomlOutput,
|
455
|
-
header: TomlKey,
|
456
|
-
parse_float: TomlParseFloat,
|
457
|
-
) -> TomlPos:
|
458
|
-
pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
|
459
|
-
key_parent, key_stem = key[:-1], key[-1]
|
460
|
-
abs_key_parent = header + key_parent
|
461
|
-
|
462
|
-
relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
|
463
|
-
for cont_key in relative_path_cont_keys:
|
464
|
-
# Check that dotted key syntax does not redefine an existing table
|
465
|
-
if out.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
|
466
|
-
raise toml_suffixed_err(src, pos, f'Cannot redefine namespace {cont_key}')
|
467
|
-
# Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in following
|
468
|
-
# table sections.
|
469
|
-
out.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
|
470
|
-
|
471
|
-
if out.flags.is_(abs_key_parent, TomlFlags.FROZEN):
|
472
|
-
raise toml_suffixed_err(
|
473
|
-
src,
|
474
|
-
pos,
|
475
|
-
f'Cannot mutate immutable namespace {abs_key_parent}',
|
476
|
-
)
|
319
|
+
def skip_chars(self, chars: ta.Iterable[str]) -> None:
|
320
|
+
try:
|
321
|
+
while self.src[self.pos] in chars:
|
322
|
+
self.pos += 1
|
323
|
+
except IndexError:
|
324
|
+
pass
|
477
325
|
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
if isinstance(value, (dict, list)):
|
486
|
-
out.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
|
487
|
-
nest[key_stem] = value
|
488
|
-
return pos
|
489
|
-
|
490
|
-
|
491
|
-
def toml_parse_key_value_pair(
|
492
|
-
src: str,
|
493
|
-
pos: TomlPos,
|
494
|
-
parse_float: TomlParseFloat,
|
495
|
-
) -> ta.Tuple[TomlPos, TomlKey, ta.Any]:
|
496
|
-
pos, key = toml_parse_key(src, pos)
|
497
|
-
try:
|
498
|
-
char: ta.Optional[str] = src[pos]
|
499
|
-
except IndexError:
|
500
|
-
char = None
|
501
|
-
if char != '=':
|
502
|
-
raise toml_suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
|
503
|
-
pos += 1
|
504
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
505
|
-
pos, value = toml_parse_value(src, pos, parse_float)
|
506
|
-
return pos, key, value
|
507
|
-
|
508
|
-
|
509
|
-
def toml_parse_key(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, TomlKey]:
|
510
|
-
pos, key_part = toml_parse_key_part(src, pos)
|
511
|
-
key: TomlKey = (key_part,)
|
512
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
513
|
-
while True:
|
326
|
+
def skip_until(
|
327
|
+
self,
|
328
|
+
expect: str,
|
329
|
+
*,
|
330
|
+
error_on: ta.FrozenSet[str],
|
331
|
+
error_on_eof: bool,
|
332
|
+
) -> None:
|
514
333
|
try:
|
515
|
-
|
334
|
+
new_pos = self.src.index(expect, self.pos)
|
335
|
+
except ValueError:
|
336
|
+
new_pos = len(self.src)
|
337
|
+
if error_on_eof:
|
338
|
+
raise self.suffixed_err(f'Expected {expect!r}', pos=new_pos) from None
|
339
|
+
|
340
|
+
if not error_on.isdisjoint(self.src[self.pos:new_pos]):
|
341
|
+
while self.src[self.pos] not in error_on:
|
342
|
+
self.pos += 1
|
343
|
+
raise self.suffixed_err(f'Found invalid character {self.src[self.pos]!r}')
|
344
|
+
self.pos = new_pos
|
345
|
+
|
346
|
+
def skip_comment(self) -> None:
|
347
|
+
try:
|
348
|
+
char: ta.Optional[str] = self.src[self.pos]
|
516
349
|
except IndexError:
|
517
350
|
char = None
|
518
|
-
if char
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
351
|
+
if char == '#':
|
352
|
+
self.pos += 1
|
353
|
+
self.skip_until(
|
354
|
+
'\n',
|
355
|
+
error_on=self.ILLEGAL_COMMENT_CHARS,
|
356
|
+
error_on_eof=False,
|
357
|
+
)
|
525
358
|
|
359
|
+
def skip_comments_and_array_ws(self) -> None:
|
360
|
+
while True:
|
361
|
+
pos_before_skip = self.pos
|
362
|
+
self.skip_chars(self.WS_AND_NEWLINE)
|
363
|
+
self.skip_comment()
|
364
|
+
if self.pos == pos_before_skip:
|
365
|
+
return
|
526
366
|
|
527
|
-
def
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
return pos, src[start_pos:pos]
|
536
|
-
if char == "'":
|
537
|
-
return toml_parse_literal_str(src, pos)
|
538
|
-
if char == '"':
|
539
|
-
return toml_parse_one_line_basic_str(src, pos)
|
540
|
-
raise toml_suffixed_err(src, pos, 'Invalid initial character for a key part')
|
541
|
-
|
542
|
-
|
543
|
-
def toml_parse_one_line_basic_str(src: str, pos: TomlPos) -> ta.Tuple[TomlPos, str]:
|
544
|
-
pos += 1
|
545
|
-
return toml_parse_basic_str(src, pos, multiline=False)
|
546
|
-
|
547
|
-
|
548
|
-
def toml_parse_array(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, list]:
|
549
|
-
pos += 1
|
550
|
-
array: list = []
|
551
|
-
|
552
|
-
pos = toml_skip_comments_and_array_ws(src, pos)
|
553
|
-
if src.startswith(']', pos):
|
554
|
-
return pos + 1, array
|
555
|
-
while True:
|
556
|
-
pos, val = toml_parse_value(src, pos, parse_float)
|
557
|
-
array.append(val)
|
558
|
-
pos = toml_skip_comments_and_array_ws(src, pos)
|
559
|
-
|
560
|
-
c = src[pos:pos + 1]
|
561
|
-
if c == ']':
|
562
|
-
return pos + 1, array
|
563
|
-
if c != ',':
|
564
|
-
raise toml_suffixed_err(src, pos, 'Unclosed array')
|
565
|
-
pos += 1
|
566
|
-
|
567
|
-
pos = toml_skip_comments_and_array_ws(src, pos)
|
568
|
-
if src.startswith(']', pos):
|
569
|
-
return pos + 1, array
|
570
|
-
|
571
|
-
|
572
|
-
def toml_parse_inline_table(src: str, pos: TomlPos, parse_float: TomlParseFloat) -> ta.Tuple[TomlPos, dict]:
|
573
|
-
pos += 1
|
574
|
-
nested_dict = TomlNestedDict()
|
575
|
-
flags = TomlFlags()
|
576
|
-
|
577
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
578
|
-
if src.startswith('}', pos):
|
579
|
-
return pos + 1, nested_dict.dict
|
580
|
-
while True:
|
581
|
-
pos, key, value = toml_parse_key_value_pair(src, pos, parse_float)
|
582
|
-
key_parent, key_stem = key[:-1], key[-1]
|
583
|
-
if flags.is_(key, TomlFlags.FROZEN):
|
584
|
-
raise toml_suffixed_err(src, pos, f'Cannot mutate immutable namespace {key}')
|
367
|
+
def create_dict_rule(self) -> TomlKey:
|
368
|
+
self.pos += 1 # Skip "["
|
369
|
+
self.skip_chars(self.WS)
|
370
|
+
key = self.parse_key()
|
371
|
+
|
372
|
+
if self.flags.is_(key, TomlFlags.EXPLICIT_NEST) or self.flags.is_(key, TomlFlags.FROZEN):
|
373
|
+
raise self.suffixed_err(f'Cannot declare {key} twice')
|
374
|
+
self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
|
585
375
|
try:
|
586
|
-
|
376
|
+
self.data.get_or_create_nest(key)
|
587
377
|
except KeyError:
|
588
|
-
raise
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
pos
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
)
|
610
|
-
escape_id = src[pos:pos + 2]
|
611
|
-
pos += 2
|
612
|
-
if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
|
613
|
-
# Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found before
|
614
|
-
# newline.
|
615
|
-
if escape_id != '\\\n':
|
616
|
-
pos = toml_skip_chars(src, pos, TOML_WS)
|
617
|
-
try:
|
618
|
-
char = src[pos]
|
619
|
-
except IndexError:
|
620
|
-
return pos, ''
|
621
|
-
if char != '\n':
|
622
|
-
raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string")
|
623
|
-
pos += 1
|
624
|
-
pos = toml_skip_chars(src, pos, TOML_WS_AND_NEWLINE)
|
625
|
-
return pos, ''
|
626
|
-
if escape_id == '\\u':
|
627
|
-
return toml_parse_hex_char(src, pos, 4)
|
628
|
-
if escape_id == '\\U':
|
629
|
-
return toml_parse_hex_char(src, pos, 8)
|
630
|
-
try:
|
631
|
-
return pos, TOML_BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
|
632
|
-
except KeyError:
|
633
|
-
raise toml_suffixed_err(src, pos, "Unescaped '\\' in a string") from None
|
378
|
+
raise self.suffixed_err('Cannot overwrite a value') from None
|
379
|
+
|
380
|
+
if not self.src.startswith(']', self.pos):
|
381
|
+
raise self.suffixed_err("Expected ']' at the end of a table declaration")
|
382
|
+
self.pos += 1
|
383
|
+
return key
|
384
|
+
|
385
|
+
def create_list_rule(self) -> TomlKey:
|
386
|
+
self.pos += 2 # Skip "[["
|
387
|
+
self.skip_chars(self.WS)
|
388
|
+
key = self.parse_key()
|
389
|
+
|
390
|
+
if self.flags.is_(key, TomlFlags.FROZEN):
|
391
|
+
raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
|
392
|
+
# Free the namespace now that it points to another empty list item...
|
393
|
+
self.flags.unset_all(key)
|
394
|
+
# ...but this key precisely is still prohibited from table declaration
|
395
|
+
self.flags.set(key, TomlFlags.EXPLICIT_NEST, recursive=False)
|
396
|
+
try:
|
397
|
+
self.data.append_nest_to_list(key)
|
398
|
+
except KeyError:
|
399
|
+
raise self.suffixed_err('Cannot overwrite a value') from None
|
634
400
|
|
401
|
+
if not self.src.startswith(']]', self.pos):
|
402
|
+
raise self.suffixed_err("Expected ']]' at the end of an array declaration")
|
403
|
+
self.pos += 2
|
404
|
+
return key
|
635
405
|
|
636
|
-
def
|
637
|
-
|
406
|
+
def key_value_rule(self, header: TomlKey) -> None:
|
407
|
+
key, value = self.parse_key_value_pair()
|
408
|
+
key_parent, key_stem = key[:-1], key[-1]
|
409
|
+
abs_key_parent = header + key_parent
|
638
410
|
|
411
|
+
relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
|
412
|
+
for cont_key in relative_path_cont_keys:
|
413
|
+
# Check that dotted key syntax does not redefine an existing table
|
414
|
+
if self.flags.is_(cont_key, TomlFlags.EXPLICIT_NEST):
|
415
|
+
raise self.suffixed_err(f'Cannot redefine namespace {cont_key}')
|
416
|
+
# Containers in the relative path can't be opened with the table syntax or dotted key/value syntax in
|
417
|
+
# following table sections.
|
418
|
+
self.flags.add_pending(cont_key, TomlFlags.EXPLICIT_NEST)
|
639
419
|
|
640
|
-
|
641
|
-
|
642
|
-
if len(hex_str) != hex_len or not TOML_HEXDIGIT_CHARS.issuperset(hex_str):
|
643
|
-
raise toml_suffixed_err(src, pos, 'Invalid hex value')
|
644
|
-
pos += hex_len
|
645
|
-
hex_int = int(hex_str, 16)
|
646
|
-
if not toml_is_unicode_scalar_value(hex_int):
|
647
|
-
raise toml_suffixed_err(src, pos, 'Escaped character is not a Unicode scalar value')
|
648
|
-
return pos, chr(hex_int)
|
420
|
+
if self.flags.is_(abs_key_parent, TomlFlags.FROZEN):
|
421
|
+
raise self.suffixed_err(f'Cannot mutate immutable namespace {abs_key_parent}')
|
649
422
|
|
423
|
+
try:
|
424
|
+
nest = self.data.get_or_create_nest(abs_key_parent)
|
425
|
+
except KeyError:
|
426
|
+
raise self.suffixed_err('Cannot overwrite a value') from None
|
427
|
+
if key_stem in nest:
|
428
|
+
raise self.suffixed_err('Cannot overwrite a value')
|
429
|
+
# Mark inline table and array namespaces recursively immutable
|
430
|
+
if isinstance(value, (dict, list)):
|
431
|
+
self.flags.set(header + key, TomlFlags.FROZEN, recursive=True)
|
432
|
+
nest[key_stem] = value
|
650
433
|
|
651
|
-
def
|
652
|
-
|
653
|
-
start_pos = pos
|
654
|
-
pos = toml_skip_until(
|
655
|
-
src, pos, "'", error_on=TOML_ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True,
|
656
|
-
)
|
657
|
-
return pos + 1, src[start_pos:pos] # Skip ending apostrophe
|
658
|
-
|
659
|
-
|
660
|
-
def toml_parse_multiline_str(src: str, pos: TomlPos, *, literal: bool) -> ta.Tuple[TomlPos, str]:
|
661
|
-
pos += 3
|
662
|
-
if src.startswith('\n', pos):
|
663
|
-
pos += 1
|
664
|
-
|
665
|
-
if literal:
|
666
|
-
delim = "'"
|
667
|
-
end_pos = toml_skip_until(
|
668
|
-
src,
|
669
|
-
pos,
|
670
|
-
"'''",
|
671
|
-
error_on=TOML_ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
|
672
|
-
error_on_eof=True,
|
673
|
-
)
|
674
|
-
result = src[pos:end_pos]
|
675
|
-
pos = end_pos + 3
|
676
|
-
else:
|
677
|
-
delim = '"'
|
678
|
-
pos, result = toml_parse_basic_str(src, pos, multiline=True)
|
679
|
-
|
680
|
-
# Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
|
681
|
-
if not src.startswith(delim, pos):
|
682
|
-
return pos, result
|
683
|
-
pos += 1
|
684
|
-
if not src.startswith(delim, pos):
|
685
|
-
return pos, result + delim
|
686
|
-
pos += 1
|
687
|
-
return pos, result + (delim * 2)
|
688
|
-
|
689
|
-
|
690
|
-
def toml_parse_basic_str(src: str, pos: TomlPos, *, multiline: bool) -> ta.Tuple[TomlPos, str]:
|
691
|
-
if multiline:
|
692
|
-
error_on = TOML_ILLEGAL_MULTILINE_BASIC_STR_CHARS
|
693
|
-
parse_escapes = toml_parse_basic_str_escape_multiline
|
694
|
-
else:
|
695
|
-
error_on = TOML_ILLEGAL_BASIC_STR_CHARS
|
696
|
-
parse_escapes = toml_parse_basic_str_escape
|
697
|
-
result = ''
|
698
|
-
start_pos = pos
|
699
|
-
while True:
|
434
|
+
def parse_key_value_pair(self) -> ta.Tuple[TomlKey, ta.Any]:
|
435
|
+
key = self.parse_key()
|
700
436
|
try:
|
701
|
-
char = src[pos]
|
437
|
+
char: ta.Optional[str] = self.src[self.pos]
|
702
438
|
except IndexError:
|
703
|
-
|
439
|
+
char = None
|
440
|
+
if char != '=':
|
441
|
+
raise self.suffixed_err("Expected '=' after a key in a key/value pair")
|
442
|
+
self.pos += 1
|
443
|
+
self.skip_chars(self.WS)
|
444
|
+
value = self.parse_value()
|
445
|
+
return key, value
|
446
|
+
|
447
|
+
def parse_key(self) -> TomlKey:
|
448
|
+
key_part = self.parse_key_part()
|
449
|
+
key: TomlKey = (key_part,)
|
450
|
+
self.skip_chars(self.WS)
|
451
|
+
while True:
|
452
|
+
try:
|
453
|
+
char: ta.Optional[str] = self.src[self.pos]
|
454
|
+
except IndexError:
|
455
|
+
char = None
|
456
|
+
if char != '.':
|
457
|
+
return key
|
458
|
+
self.pos += 1
|
459
|
+
self.skip_chars(self.WS)
|
460
|
+
key_part = self.parse_key_part()
|
461
|
+
key += (key_part,)
|
462
|
+
self.skip_chars(self.WS)
|
463
|
+
|
464
|
+
def parse_key_part(self) -> str:
|
465
|
+
try:
|
466
|
+
char: ta.Optional[str] = self.src[self.pos]
|
467
|
+
except IndexError:
|
468
|
+
char = None
|
469
|
+
if char in self.BARE_KEY_CHARS:
|
470
|
+
start_pos = self.pos
|
471
|
+
self.skip_chars(self.BARE_KEY_CHARS)
|
472
|
+
return self.src[start_pos:self.pos]
|
473
|
+
if char == "'":
|
474
|
+
return self.parse_literal_str()
|
704
475
|
if char == '"':
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
if
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
476
|
+
return self.parse_one_line_basic_str()
|
477
|
+
raise self.suffixed_err('Invalid initial character for a key part')
|
478
|
+
|
479
|
+
def parse_one_line_basic_str(self) -> str:
|
480
|
+
self.pos += 1
|
481
|
+
return self.parse_basic_str(multiline=False)
|
482
|
+
|
483
|
+
def parse_array(self) -> list:
|
484
|
+
self.pos += 1
|
485
|
+
array: list = []
|
486
|
+
|
487
|
+
self.skip_comments_and_array_ws()
|
488
|
+
if self.src.startswith(']', self.pos):
|
489
|
+
self.pos += 1
|
490
|
+
return array
|
491
|
+
while True:
|
492
|
+
val = self.parse_value()
|
493
|
+
array.append(val)
|
494
|
+
self.skip_comments_and_array_ws()
|
495
|
+
|
496
|
+
c = self.src[self.pos:self.pos + 1]
|
497
|
+
if c == ']':
|
498
|
+
self.pos += 1
|
499
|
+
return array
|
500
|
+
if c != ',':
|
501
|
+
raise self.suffixed_err('Unclosed array')
|
502
|
+
self.pos += 1
|
503
|
+
|
504
|
+
self.skip_comments_and_array_ws()
|
505
|
+
if self.src.startswith(']', self.pos):
|
506
|
+
self.pos += 1
|
507
|
+
return array
|
508
|
+
|
509
|
+
def parse_inline_table(self) -> dict:
|
510
|
+
self.pos += 1
|
511
|
+
nested_dict = TomlNestedDict()
|
512
|
+
flags = TomlFlags()
|
513
|
+
|
514
|
+
self.skip_chars(self.WS)
|
515
|
+
if self.src.startswith('}', self.pos):
|
516
|
+
self.pos += 1
|
517
|
+
return nested_dict.dict
|
518
|
+
while True:
|
519
|
+
key, value = self.parse_key_value_pair()
|
520
|
+
key_parent, key_stem = key[:-1], key[-1]
|
521
|
+
if flags.is_(key, TomlFlags.FROZEN):
|
522
|
+
raise self.suffixed_err(f'Cannot mutate immutable namespace {key}')
|
523
|
+
try:
|
524
|
+
nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
|
525
|
+
except KeyError:
|
526
|
+
raise self.suffixed_err('Cannot overwrite a value') from None
|
527
|
+
if key_stem in nest:
|
528
|
+
raise self.suffixed_err(f'Duplicate inline table key {key_stem!r}')
|
529
|
+
nest[key_stem] = value
|
530
|
+
self.skip_chars(self.WS)
|
531
|
+
c = self.src[self.pos:self.pos + 1]
|
532
|
+
if c == '}':
|
533
|
+
self.pos += 1
|
534
|
+
return nested_dict.dict
|
535
|
+
if c != ',':
|
536
|
+
raise self.suffixed_err('Unclosed inline table')
|
537
|
+
if isinstance(value, (dict, list)):
|
538
|
+
flags.set(key, TomlFlags.FROZEN, recursive=True)
|
539
|
+
self.pos += 1
|
540
|
+
self.skip_chars(self.WS)
|
541
|
+
|
542
|
+
def parse_basic_str_escape(self, multiline: bool = False) -> str:
|
543
|
+
escape_id = self.src[self.pos:self.pos + 2]
|
544
|
+
self.pos += 2
|
545
|
+
if multiline and escape_id in {'\\ ', '\\\t', '\\\n'}:
|
546
|
+
# Skip whitespace until next non-whitespace character or end of the doc. Error if non-whitespace is found
|
547
|
+
# before newline.
|
548
|
+
if escape_id != '\\\n':
|
549
|
+
self.skip_chars(self.WS)
|
550
|
+
try:
|
551
|
+
char = self.src[self.pos]
|
552
|
+
except IndexError:
|
553
|
+
return ''
|
554
|
+
if char != '\n':
|
555
|
+
raise self.suffixed_err("Unescaped '\\' in a string")
|
556
|
+
self.pos += 1
|
557
|
+
self.skip_chars(self.WS_AND_NEWLINE)
|
558
|
+
return ''
|
559
|
+
if escape_id == '\\u':
|
560
|
+
return self.parse_hex_char(4)
|
561
|
+
if escape_id == '\\U':
|
562
|
+
return self.parse_hex_char(8)
|
765
563
|
try:
|
766
|
-
|
767
|
-
except
|
768
|
-
raise
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
564
|
+
return self.BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
|
565
|
+
except KeyError:
|
566
|
+
raise self.suffixed_err("Unescaped '\\' in a string") from None
|
567
|
+
|
568
|
+
def parse_basic_str_escape_multiline(self) -> str:
|
569
|
+
return self.parse_basic_str_escape(multiline=True)
|
570
|
+
|
571
|
+
@classmethod
|
572
|
+
def is_unicode_scalar_value(cls, codepoint: int) -> bool:
|
573
|
+
return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
|
574
|
+
|
575
|
+
def parse_hex_char(self, hex_len: int) -> str:
|
576
|
+
hex_str = self.src[self.pos:self.pos + hex_len]
|
577
|
+
if len(hex_str) != hex_len or not self.HEXDIGIT_CHARS.issuperset(hex_str):
|
578
|
+
raise self.suffixed_err('Invalid hex value')
|
579
|
+
self.pos += hex_len
|
580
|
+
hex_int = int(hex_str, 16)
|
581
|
+
if not self.is_unicode_scalar_value(hex_int):
|
582
|
+
raise self.suffixed_err('Escaped character is not a Unicode scalar value')
|
583
|
+
return chr(hex_int)
|
584
|
+
|
585
|
+
def parse_literal_str(self) -> str:
|
586
|
+
self.pos += 1 # Skip starting apostrophe
|
587
|
+
start_pos = self.pos
|
588
|
+
self.skip_until("'", error_on=self.ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True)
|
589
|
+
end_pos = self.pos
|
590
|
+
self.pos += 1
|
591
|
+
return self.src[start_pos:end_pos] # Skip ending apostrophe
|
592
|
+
|
593
|
+
def parse_multiline_str(self, *, literal: bool) -> str:
|
594
|
+
self.pos += 3
|
595
|
+
if self.src.startswith('\n', self.pos):
|
596
|
+
self.pos += 1
|
597
|
+
|
598
|
+
if literal:
|
599
|
+
delim = "'"
|
600
|
+
start_pos = self.pos
|
601
|
+
self.skip_until(
|
602
|
+
"'''",
|
603
|
+
error_on=self.ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
|
604
|
+
error_on_eof=True,
|
605
|
+
)
|
606
|
+
result = self.src[start_pos:self.pos]
|
607
|
+
self.pos += 3
|
608
|
+
else:
|
609
|
+
delim = '"'
|
610
|
+
result = self.parse_basic_str(multiline=True)
|
611
|
+
|
612
|
+
# Add at maximum two extra apostrophes/quotes if the end sequence is 4 or 5 chars long instead of just 3.
|
613
|
+
if not self.src.startswith(delim, self.pos):
|
614
|
+
return result
|
615
|
+
self.pos += 1
|
616
|
+
if not self.src.startswith(delim, self.pos):
|
617
|
+
return result + delim
|
618
|
+
self.pos += 1
|
619
|
+
return result + (delim * 2)
|
620
|
+
|
621
|
+
def parse_basic_str(self, *, multiline: bool) -> str:
|
622
|
+
if multiline:
|
623
|
+
error_on = self.ILLEGAL_MULTILINE_BASIC_STR_CHARS
|
624
|
+
parse_escapes = self.parse_basic_str_escape_multiline
|
625
|
+
else:
|
626
|
+
error_on = self.ILLEGAL_BASIC_STR_CHARS
|
627
|
+
parse_escapes = self.parse_basic_str_escape
|
628
|
+
result = ''
|
629
|
+
start_pos = self.pos
|
630
|
+
while True:
|
631
|
+
try:
|
632
|
+
char = self.src[self.pos]
|
633
|
+
except IndexError:
|
634
|
+
raise self.suffixed_err('Unterminated string') from None
|
635
|
+
if char == '"':
|
636
|
+
if not multiline:
|
637
|
+
end_pos = self.pos
|
638
|
+
self.pos += 1
|
639
|
+
return result + self.src[start_pos:end_pos]
|
640
|
+
if self.src.startswith('"""', self.pos):
|
641
|
+
end_pos = self.pos
|
642
|
+
self.pos += 3
|
643
|
+
return result + self.src[start_pos:end_pos]
|
644
|
+
self.pos += 1
|
645
|
+
continue
|
646
|
+
if char == '\\':
|
647
|
+
result += self.src[start_pos:self.pos]
|
648
|
+
parsed_escape = parse_escapes()
|
649
|
+
result += parsed_escape
|
650
|
+
start_pos = self.pos
|
651
|
+
continue
|
652
|
+
if char in error_on:
|
653
|
+
raise self.suffixed_err(f'Illegal character {char!r}')
|
654
|
+
self.pos += 1
|
655
|
+
|
656
|
+
def parse_value(self) -> ta.Any: # noqa: C901
|
657
|
+
try:
|
658
|
+
char: ta.Optional[str] = self.src[self.pos]
|
659
|
+
except IndexError:
|
660
|
+
char = None
|
661
|
+
|
662
|
+
# IMPORTANT: order conditions based on speed of checking and likelihood
|
663
|
+
|
664
|
+
# Basic strings
|
665
|
+
if char == '"':
|
666
|
+
if self.src.startswith('"""', self.pos):
|
667
|
+
return self.parse_multiline_str(literal=False)
|
668
|
+
return self.parse_one_line_basic_str()
|
669
|
+
|
670
|
+
# Literal strings
|
671
|
+
if char == "'":
|
672
|
+
if self.src.startswith("'''", self.pos):
|
673
|
+
return self.parse_multiline_str(literal=True)
|
674
|
+
return self.parse_literal_str()
|
675
|
+
|
676
|
+
# Booleans
|
677
|
+
if char == 't':
|
678
|
+
if self.src.startswith('true', self.pos):
|
679
|
+
self.pos += 4
|
680
|
+
return True
|
681
|
+
if char == 'f':
|
682
|
+
if self.src.startswith('false', self.pos):
|
683
|
+
self.pos += 5
|
684
|
+
return False
|
685
|
+
|
686
|
+
# Arrays
|
687
|
+
if char == '[':
|
688
|
+
return self.parse_array()
|
689
|
+
|
690
|
+
# Inline tables
|
691
|
+
if char == '{':
|
692
|
+
return self.parse_inline_table()
|
693
|
+
|
694
|
+
# Dates and times
|
695
|
+
datetime_match = self.RE_DATETIME.match(self.src, self.pos)
|
696
|
+
if datetime_match:
|
697
|
+
try:
|
698
|
+
datetime_obj = self.match_to_datetime(datetime_match)
|
699
|
+
except ValueError as e:
|
700
|
+
raise self.suffixed_err('Invalid date or datetime') from e
|
701
|
+
self.pos = datetime_match.end()
|
702
|
+
return datetime_obj
|
703
|
+
localtime_match = self.RE_LOCALTIME.match(self.src, self.pos)
|
704
|
+
if localtime_match:
|
705
|
+
self.pos = localtime_match.end()
|
706
|
+
return self.match_to_localtime(localtime_match)
|
707
|
+
|
708
|
+
# Integers and "normal" floats. The regex will greedily match any type starting with a decimal char, so needs to
|
709
|
+
# be located after handling of dates and times.
|
710
|
+
number_match = self.RE_NUMBER.match(self.src, self.pos)
|
711
|
+
if number_match:
|
712
|
+
self.pos = number_match.end()
|
713
|
+
return self.match_to_number(number_match, self.parse_float)
|
714
|
+
|
715
|
+
# Special floats
|
716
|
+
first_three = self.src[self.pos:self.pos + 3]
|
717
|
+
if first_three in {'inf', 'nan'}:
|
718
|
+
self.pos += 3
|
719
|
+
return self.parse_float(first_three)
|
720
|
+
first_four = self.src[self.pos:self.pos + 4]
|
721
|
+
if first_four in {'-inf', '+inf', '-nan', '+nan'}:
|
722
|
+
self.pos += 4
|
723
|
+
return self.parse_float(first_four)
|
724
|
+
|
725
|
+
raise self.suffixed_err('Invalid value')
|
726
|
+
|
727
|
+
def coord_repr(self, pos: TomlPos) -> str:
|
728
|
+
if pos >= len(self.src):
|
796
729
|
return 'end of document'
|
797
|
-
line = src.count('\n', 0, pos) + 1
|
730
|
+
line = self.src.count('\n', 0, pos) + 1
|
798
731
|
if line == 1:
|
799
732
|
column = pos + 1
|
800
733
|
else:
|
801
|
-
column = pos - src.rindex('\n', 0, pos)
|
734
|
+
column = pos - self.src.rindex('\n', 0, pos)
|
802
735
|
return f'line {line}, column {column}'
|
803
736
|
|
804
|
-
|
805
|
-
|
737
|
+
def suffixed_err(self, msg: str, *, pos: ta.Optional[TomlPos] = None) -> TomlDecodeError:
|
738
|
+
"""Return a `TomlDecodeError` where error message is suffixed with coordinates in source."""
|
806
739
|
|
807
|
-
|
808
|
-
|
740
|
+
if pos is None:
|
741
|
+
pos = self.pos
|
742
|
+
return TomlDecodeError(f'{msg} (at {self.coord_repr(pos)})')
|
809
743
|
|
744
|
+
_TIME_RE_STR = r'([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?'
|
810
745
|
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
746
|
+
RE_NUMBER = re.compile(
|
747
|
+
r"""
|
748
|
+
0
|
749
|
+
(?:
|
750
|
+
x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
|
751
|
+
|
|
752
|
+
b[01](?:_?[01])* # bin
|
753
|
+
|
|
754
|
+
o[0-7](?:_?[0-7])* # oct
|
755
|
+
)
|
756
|
+
|
|
757
|
+
[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
|
758
|
+
(?P<floatpart>
|
759
|
+
(?:\.[0-9](?:_?[0-9])*)? # optional fractional part
|
760
|
+
(?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
|
761
|
+
)
|
762
|
+
""",
|
763
|
+
flags=re.VERBOSE,
|
764
|
+
)
|
820
765
|
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
766
|
+
RE_LOCALTIME = re.compile(_TIME_RE_STR)
|
767
|
+
|
768
|
+
RE_DATETIME = re.compile(
|
769
|
+
rf"""
|
770
|
+
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
|
771
|
+
(?:
|
772
|
+
[Tt ]
|
773
|
+
{_TIME_RE_STR}
|
774
|
+
(?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
|
775
|
+
)?
|
776
|
+
""",
|
777
|
+
flags=re.VERBOSE,
|
778
|
+
)
|
826
779
|
|
827
|
-
|
780
|
+
@classmethod
|
781
|
+
def match_to_datetime(cls, match: re.Match) -> ta.Union[datetime.datetime, datetime.date]:
|
782
|
+
"""
|
783
|
+
Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
|
784
|
+
|
785
|
+
Raises ValueError if the match does not correspond to a valid date or datetime.
|
786
|
+
"""
|
787
|
+
|
788
|
+
(
|
789
|
+
year_str,
|
790
|
+
month_str,
|
791
|
+
day_str,
|
792
|
+
hour_str,
|
793
|
+
minute_str,
|
794
|
+
sec_str,
|
795
|
+
micros_str,
|
796
|
+
zulu_time,
|
797
|
+
offset_sign_str,
|
798
|
+
offset_hour_str,
|
799
|
+
offset_minute_str,
|
800
|
+
) = match.groups()
|
801
|
+
year, month, day = int(year_str), int(month_str), int(day_str)
|
802
|
+
if hour_str is None:
|
803
|
+
return datetime.date(year, month, day)
|
804
|
+
hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
|
805
|
+
micros = int(micros_str.ljust(6, '0')) if micros_str else 0
|
806
|
+
if offset_sign_str:
|
807
|
+
tz: ta.Optional[datetime.tzinfo] = toml_cached_tz(
|
808
|
+
offset_hour_str, offset_minute_str, offset_sign_str,
|
809
|
+
)
|
810
|
+
elif zulu_time:
|
811
|
+
tz = datetime.UTC
|
812
|
+
else: # local date-time
|
813
|
+
tz = None
|
814
|
+
return datetime.datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
|
815
|
+
|
816
|
+
@classmethod
|
817
|
+
def match_to_localtime(cls, match: re.Match) -> datetime.time:
|
818
|
+
hour_str, minute_str, sec_str, micros_str = match.groups()
|
819
|
+
micros = int(micros_str.ljust(6, '0')) if micros_str else 0
|
820
|
+
return datetime.time(int(hour_str), int(minute_str), int(sec_str), micros)
|
821
|
+
|
822
|
+
@classmethod
|
823
|
+
def match_to_number(cls, match: re.Match, parse_float: TomlParseFloat) -> ta.Any:
|
824
|
+
if match.group('floatpart'):
|
825
|
+
return parse_float(match.group())
|
826
|
+
return int(match.group(), 0)
|