omextra 0.0.0.dev472__py3-none-any.whl → 0.0.0.dev485__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omextra/__about__.py +2 -0
- omextra/collections/__init__.py +0 -0
- omextra/collections/hamt/LICENSE +35 -0
- omextra/collections/hamt/__init__.py +0 -0
- omextra/collections/hamt/_hamt.c +3621 -0
- omextra/formats/goyaml/LICENSE +16 -0
- omextra/formats/goyaml/__init__.py +29 -0
- omextra/formats/goyaml/ast.py +2217 -0
- omextra/formats/goyaml/errors.py +49 -0
- omextra/formats/goyaml/parsing.py +2332 -0
- omextra/formats/goyaml/scanning.py +1888 -0
- omextra/formats/goyaml/tokens.py +998 -0
- omextra/text/abnf/LICENSE +16 -0
- omextra/text/abnf/__init__.py +79 -0
- omextra/text/abnf/base.py +313 -0
- omextra/text/abnf/core.py +141 -0
- omextra/text/abnf/errors.py +10 -0
- omextra/text/abnf/meta.py +583 -0
- omextra/text/abnf/parsers.py +343 -0
- omextra/text/abnf/utils.py +76 -0
- omextra/text/abnf/visitors.py +55 -0
- {omextra-0.0.0.dev472.dist-info → omextra-0.0.0.dev485.dist-info}/METADATA +2 -2
- {omextra-0.0.0.dev472.dist-info → omextra-0.0.0.dev485.dist-info}/RECORD +27 -7
- {omextra-0.0.0.dev472.dist-info → omextra-0.0.0.dev485.dist-info}/WHEEL +0 -0
- {omextra-0.0.0.dev472.dist-info → omextra-0.0.0.dev485.dist-info}/entry_points.txt +0 -0
- {omextra-0.0.0.dev472.dist-info → omextra-0.0.0.dev485.dist-info}/licenses/LICENSE +0 -0
- {omextra-0.0.0.dev472.dist-info → omextra-0.0.0.dev485.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1888 @@
|
|
|
1
|
+
# ruff: noqa: UP006 UP007 UP043 UP045
|
|
2
|
+
# @omlish-lite
|
|
3
|
+
import dataclasses as dc
|
|
4
|
+
import enum
|
|
5
|
+
import typing as ta
|
|
6
|
+
|
|
7
|
+
from omlish.lite.check import check
|
|
8
|
+
|
|
9
|
+
from .errors import EofYamlError
|
|
10
|
+
from .errors import YamlError
|
|
11
|
+
from .errors import YamlErrorOr
|
|
12
|
+
from .errors import yaml_error
|
|
13
|
+
from .tokens import YAML_RESERVED_TAG_KEYWORD_MAP
|
|
14
|
+
from .tokens import YamlIndicator
|
|
15
|
+
from .tokens import YamlPosition
|
|
16
|
+
from .tokens import YamlToken
|
|
17
|
+
from .tokens import YamlTokenMakers
|
|
18
|
+
from .tokens import YamlTokens
|
|
19
|
+
from .tokens import YamlTokenType
|
|
20
|
+
from .tokens import new_yaml_token
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
##
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dc.dataclass()
|
|
27
|
+
class InvalidTokenYamlError(YamlError):
|
|
28
|
+
token: YamlToken
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def message(self) -> str:
|
|
32
|
+
return check.not_none(self.token.error).message
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def err_invalid_token(tk: YamlToken) -> InvalidTokenYamlError:
|
|
36
|
+
return InvalidTokenYamlError(
|
|
37
|
+
token=tk,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
##
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Context at scanning
|
|
45
|
+
@dc.dataclass()
|
|
46
|
+
class YamlScanningContext:
|
|
47
|
+
idx: int = 0
|
|
48
|
+
size: int = 0
|
|
49
|
+
not_space_char_pos: int = 0
|
|
50
|
+
not_space_org_char_pos: int = 0
|
|
51
|
+
src: str = ''
|
|
52
|
+
buf: str = ''
|
|
53
|
+
obuf: str = ''
|
|
54
|
+
tokens: YamlTokens = dc.field(default_factory=YamlTokens)
|
|
55
|
+
mstate: ta.Optional['YamlMultiLineState'] = None
|
|
56
|
+
|
|
57
|
+
def clear(self) -> None:
|
|
58
|
+
self.reset_buffer()
|
|
59
|
+
self.mstate = None
|
|
60
|
+
|
|
61
|
+
def reset(self, src: str) -> None:
|
|
62
|
+
self.idx = 0
|
|
63
|
+
self.size = len(src)
|
|
64
|
+
self.src = src
|
|
65
|
+
self.tokens = YamlTokens()
|
|
66
|
+
self.reset_buffer()
|
|
67
|
+
self.mstate = None
|
|
68
|
+
|
|
69
|
+
def reset_buffer(self) -> None:
|
|
70
|
+
self.buf = ''
|
|
71
|
+
self.obuf = ''
|
|
72
|
+
self.not_space_char_pos = 0
|
|
73
|
+
self.not_space_org_char_pos = 0
|
|
74
|
+
|
|
75
|
+
def break_multi_line(self) -> None:
|
|
76
|
+
self.mstate = None
|
|
77
|
+
|
|
78
|
+
def get_multi_line_state(self) -> ta.Optional['YamlMultiLineState']:
|
|
79
|
+
return self.mstate
|
|
80
|
+
|
|
81
|
+
def set_literal(self, last_delim_column: int, opt: str) -> None:
|
|
82
|
+
mstate = YamlMultiLineState(
|
|
83
|
+
is_literal=True,
|
|
84
|
+
opt=opt,
|
|
85
|
+
)
|
|
86
|
+
indent = first_line_indent_column_by_opt(opt)
|
|
87
|
+
if indent > 0:
|
|
88
|
+
mstate.first_line_indent_column = last_delim_column + indent
|
|
89
|
+
self.mstate = mstate
|
|
90
|
+
|
|
91
|
+
def set_folded(self, last_delim_column: int, opt: str) -> None:
|
|
92
|
+
mstate = YamlMultiLineState(
|
|
93
|
+
is_folded=True,
|
|
94
|
+
opt=opt,
|
|
95
|
+
)
|
|
96
|
+
indent = first_line_indent_column_by_opt(opt)
|
|
97
|
+
if indent > 0:
|
|
98
|
+
mstate.first_line_indent_column = last_delim_column + indent
|
|
99
|
+
self.mstate = mstate
|
|
100
|
+
|
|
101
|
+
def set_raw_folded(self, column: int) -> None:
|
|
102
|
+
mstate = YamlMultiLineState(
|
|
103
|
+
is_raw_folded=True,
|
|
104
|
+
)
|
|
105
|
+
mstate.update_indent_column(column)
|
|
106
|
+
self.mstate = mstate
|
|
107
|
+
|
|
108
|
+
def add_token(self, tk: ta.Optional[YamlToken]) -> None:
|
|
109
|
+
if tk is None:
|
|
110
|
+
return
|
|
111
|
+
self.tokens.append(tk) # FIXME: .add??
|
|
112
|
+
|
|
113
|
+
def add_buf(self, r: str) -> None:
|
|
114
|
+
if len(self.buf) == 0 and (r == ' ' or r == '\t'):
|
|
115
|
+
return
|
|
116
|
+
self.buf += r
|
|
117
|
+
if r != ' ' and r != '\t':
|
|
118
|
+
self.not_space_char_pos = len(self.buf)
|
|
119
|
+
|
|
120
|
+
def add_buf_with_tab(self, r: str) -> None:
|
|
121
|
+
if len(self.buf) == 0 and r == ' ':
|
|
122
|
+
return
|
|
123
|
+
self.buf += r
|
|
124
|
+
if r != ' ':
|
|
125
|
+
self.not_space_char_pos = len(self.buf)
|
|
126
|
+
|
|
127
|
+
def add_origin_buf(self, r: str) -> None:
|
|
128
|
+
self.obuf += r
|
|
129
|
+
if r != ' ' and r != '\t':
|
|
130
|
+
self.not_space_org_char_pos = len(self.obuf)
|
|
131
|
+
|
|
132
|
+
def remove_right_space_from_buf(self) -> None:
|
|
133
|
+
trimmed_buf = self.obuf[:self.not_space_org_char_pos]
|
|
134
|
+
buflen = len(trimmed_buf)
|
|
135
|
+
diff = len(self.obuf) - buflen
|
|
136
|
+
if diff > 0:
|
|
137
|
+
self.obuf = self.obuf[:buflen]
|
|
138
|
+
self.buf = self.buffered_src()
|
|
139
|
+
|
|
140
|
+
def is_eos(self) -> bool:
|
|
141
|
+
return len(self.src) - 1 <= self.idx
|
|
142
|
+
|
|
143
|
+
def is_next_eos(self) -> bool:
|
|
144
|
+
return len(self.src) <= self.idx + 1
|
|
145
|
+
|
|
146
|
+
def next(self) -> bool:
|
|
147
|
+
return self.idx < self.size
|
|
148
|
+
|
|
149
|
+
def source(self, s: int, e: int) -> str:
|
|
150
|
+
return self.src[s:e]
|
|
151
|
+
|
|
152
|
+
def previous_char(self) -> str:
|
|
153
|
+
if self.idx > 0:
|
|
154
|
+
return self.src[self.idx - 1]
|
|
155
|
+
return ''
|
|
156
|
+
|
|
157
|
+
def current_char(self) -> str:
|
|
158
|
+
if self.size > self.idx:
|
|
159
|
+
return self.src[self.idx]
|
|
160
|
+
return ''
|
|
161
|
+
|
|
162
|
+
def next_char(self) -> str:
|
|
163
|
+
if self.size > self.idx + 1:
|
|
164
|
+
return self.src[self.idx + 1]
|
|
165
|
+
return ''
|
|
166
|
+
|
|
167
|
+
def repeat_num(self, r: str) -> int:
|
|
168
|
+
cnt = 0
|
|
169
|
+
for i in range(self.idx, self.size):
|
|
170
|
+
if self.src[i] == r:
|
|
171
|
+
cnt += 1
|
|
172
|
+
else:
|
|
173
|
+
break
|
|
174
|
+
return cnt
|
|
175
|
+
|
|
176
|
+
def progress(self, num: int) -> None:
|
|
177
|
+
self.idx += num
|
|
178
|
+
|
|
179
|
+
def exists_buffer(self) -> bool:
|
|
180
|
+
return len(self.buffered_src()) != 0
|
|
181
|
+
|
|
182
|
+
def is_multi_line(self) -> bool:
|
|
183
|
+
return self.mstate is not None
|
|
184
|
+
|
|
185
|
+
def buffered_src(self) -> str:
|
|
186
|
+
src = self.buf[:self.not_space_char_pos]
|
|
187
|
+
|
|
188
|
+
if self.is_multi_line():
|
|
189
|
+
mstate = check.not_none(self.get_multi_line_state())
|
|
190
|
+
|
|
191
|
+
# remove end '\n' character and trailing empty lines.
|
|
192
|
+
# https://yaml.org/spec/1.2.2/#8112-block-chomping-indicator
|
|
193
|
+
if mstate.has_trim_all_end_newline_opt():
|
|
194
|
+
# If the '-' flag is specified, all trailing newline characters will be removed.
|
|
195
|
+
src = src.rstrip('\n')
|
|
196
|
+
|
|
197
|
+
elif not mstate.has_keep_all_end_newline_opt():
|
|
198
|
+
# Normally, all but one of the trailing newline characters are removed.
|
|
199
|
+
new_line_char_count = 0
|
|
200
|
+
for i in range(len(src) - 1, -1, -1):
|
|
201
|
+
if src[i] == '\n':
|
|
202
|
+
new_line_char_count += 1
|
|
203
|
+
continue
|
|
204
|
+
break
|
|
205
|
+
|
|
206
|
+
removed_new_line_char_count = new_line_char_count - 1
|
|
207
|
+
while removed_new_line_char_count > 0:
|
|
208
|
+
src = src.rstrip('\n')
|
|
209
|
+
removed_new_line_char_count -= 1
|
|
210
|
+
|
|
211
|
+
# If the text ends with a space character, remove all of them.
|
|
212
|
+
if mstate.has_trim_all_end_newline_opt():
|
|
213
|
+
src = src.rstrip(' ')
|
|
214
|
+
|
|
215
|
+
if src == '\n':
|
|
216
|
+
# If the content consists only of a newline, it can be considered as the document ending without any
|
|
217
|
+
# specified value, so it is treated as an empty string.
|
|
218
|
+
src = ''
|
|
219
|
+
|
|
220
|
+
if mstate.has_keep_all_end_newline_opt() and len(src) == 0:
|
|
221
|
+
src = '\n'
|
|
222
|
+
|
|
223
|
+
return src
|
|
224
|
+
|
|
225
|
+
def buffered_token(self, pos: YamlPosition) -> ta.Optional[YamlToken]:
|
|
226
|
+
if self.idx == 0:
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
source = self.buffered_src()
|
|
230
|
+
if len(source) == 0:
|
|
231
|
+
self.buf = self.buf[:0] # clear value's buffer only.
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
tk: ta.Optional[YamlToken]
|
|
235
|
+
if self.is_multi_line():
|
|
236
|
+
tk = YamlTokenMakers.new_string(source, self.obuf, pos)
|
|
237
|
+
else:
|
|
238
|
+
tk = new_yaml_token(source, self.obuf, pos)
|
|
239
|
+
|
|
240
|
+
self.set_token_type_by_prev_tag(tk)
|
|
241
|
+
self.reset_buffer()
|
|
242
|
+
return tk
|
|
243
|
+
|
|
244
|
+
def set_token_type_by_prev_tag(self, tk: ta.Optional[YamlToken]) -> None:
|
|
245
|
+
last_tk = self.last_token()
|
|
246
|
+
if last_tk is None:
|
|
247
|
+
return
|
|
248
|
+
|
|
249
|
+
if last_tk.type != YamlTokenType.TAG:
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
tag = last_tk.value
|
|
253
|
+
if tag not in YAML_RESERVED_TAG_KEYWORD_MAP:
|
|
254
|
+
check.not_none(tk).type = YamlTokenType.STRING
|
|
255
|
+
|
|
256
|
+
def last_token(self) -> ta.Optional[YamlToken]:
|
|
257
|
+
if len(self.tokens) != 0:
|
|
258
|
+
return self.tokens[len(self.tokens) - 1]
|
|
259
|
+
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
@staticmethod
|
|
263
|
+
def new(src: str) -> 'YamlScanningContext':
|
|
264
|
+
ctx = YamlScanningContext()
|
|
265
|
+
ctx.reset(src)
|
|
266
|
+
return ctx
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
##
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@dc.dataclass()
|
|
273
|
+
class YamlMultiLineState:
|
|
274
|
+
opt: str = ''
|
|
275
|
+
first_line_indent_column: int = 0
|
|
276
|
+
prev_line_indent_column: int = 0
|
|
277
|
+
line_indent_column: int = 0
|
|
278
|
+
last_not_space_only_line_indent_column: int = 0
|
|
279
|
+
space_only_indent_column: int = 0
|
|
280
|
+
folded_new_line: bool = False
|
|
281
|
+
is_raw_folded: bool = False
|
|
282
|
+
is_literal: bool = False
|
|
283
|
+
is_folded: bool = False
|
|
284
|
+
|
|
285
|
+
def last_delim_column(self) -> int:
|
|
286
|
+
if self.first_line_indent_column == 0:
|
|
287
|
+
return 0
|
|
288
|
+
return self.first_line_indent_column - 1
|
|
289
|
+
|
|
290
|
+
def update_indent_column(self, column: int) -> None:
|
|
291
|
+
if self.first_line_indent_column == 0:
|
|
292
|
+
self.first_line_indent_column = column
|
|
293
|
+
if self.line_indent_column == 0:
|
|
294
|
+
self.line_indent_column = column
|
|
295
|
+
|
|
296
|
+
def update_space_only_indent_column(self, column: int) -> None:
|
|
297
|
+
if self.first_line_indent_column != 0:
|
|
298
|
+
return
|
|
299
|
+
self.space_only_indent_column = column
|
|
300
|
+
|
|
301
|
+
def validate_indent_after_space_only(self, column: int) -> ta.Optional[YamlError]:
|
|
302
|
+
if self.first_line_indent_column != 0:
|
|
303
|
+
return None
|
|
304
|
+
if self.space_only_indent_column > column:
|
|
305
|
+
return yaml_error('invalid number of indent is specified after space only')
|
|
306
|
+
return None
|
|
307
|
+
|
|
308
|
+
def validate_indent_column(self) -> ta.Optional[YamlError]:
|
|
309
|
+
if first_line_indent_column_by_opt(self.opt) == 0:
|
|
310
|
+
return None
|
|
311
|
+
if self.first_line_indent_column > self.line_indent_column:
|
|
312
|
+
return yaml_error('invalid number of indent is specified in the multi-line header')
|
|
313
|
+
return None
|
|
314
|
+
|
|
315
|
+
def update_new_line_state(self) -> None:
|
|
316
|
+
self.prev_line_indent_column = self.line_indent_column
|
|
317
|
+
if self.line_indent_column != 0:
|
|
318
|
+
self.last_not_space_only_line_indent_column = self.line_indent_column
|
|
319
|
+
self.folded_new_line = True
|
|
320
|
+
self.line_indent_column = 0
|
|
321
|
+
|
|
322
|
+
def is_indent_column(self, column: int) -> bool:
|
|
323
|
+
if self.first_line_indent_column == 0:
|
|
324
|
+
return column == 1
|
|
325
|
+
return self.first_line_indent_column > column
|
|
326
|
+
|
|
327
|
+
def add_indent(self, ctx: YamlScanningContext, column: int) -> None:
|
|
328
|
+
if self.first_line_indent_column == 0:
|
|
329
|
+
return
|
|
330
|
+
|
|
331
|
+
# If the first line of the document has already been evaluated, the number is treated as the threshold, since
|
|
332
|
+
# the `first_line_indent_column` is a positive number.
|
|
333
|
+
if column < self.first_line_indent_column:
|
|
334
|
+
return
|
|
335
|
+
|
|
336
|
+
# `c.folded_new_line` is a variable that is set to True for every newline.
|
|
337
|
+
if not self.is_literal and self.folded_new_line:
|
|
338
|
+
self.folded_new_line = False
|
|
339
|
+
|
|
340
|
+
# Since add_buf ignore space character, add to the buffer directly.
|
|
341
|
+
ctx.buf += ' '
|
|
342
|
+
ctx.not_space_char_pos = len(ctx.buf)
|
|
343
|
+
|
|
344
|
+
# update_new_line_in_folded if Folded or RawFolded context and the content on the current line starts at the same
|
|
345
|
+
# column as the previous line, treat the new-line-char as a space.
|
|
346
|
+
def update_new_line_in_folded(self, ctx: YamlScanningContext, column: int) -> None:
|
|
347
|
+
if self.is_literal:
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
# Folded or RawFolded.
|
|
351
|
+
|
|
352
|
+
if not self.folded_new_line:
|
|
353
|
+
return
|
|
354
|
+
|
|
355
|
+
last_char = ''
|
|
356
|
+
prev_last_char = ''
|
|
357
|
+
if len(ctx.buf) != 0:
|
|
358
|
+
last_char = ctx.buf[len(ctx.buf) - 1]
|
|
359
|
+
if len(ctx.buf) > 1:
|
|
360
|
+
prev_last_char = ctx.buf[len(ctx.buf) - 2]
|
|
361
|
+
|
|
362
|
+
if self.line_indent_column == self.prev_line_indent_column:
|
|
363
|
+
# ---
|
|
364
|
+
# >
|
|
365
|
+
# a
|
|
366
|
+
# b
|
|
367
|
+
if last_char == '\n':
|
|
368
|
+
ctx.buf = ctx.buf[:-1] + ' '
|
|
369
|
+
|
|
370
|
+
elif self.prev_line_indent_column == 0 and self.last_not_space_only_line_indent_column == column:
|
|
371
|
+
# if previous line is indent-space and new-line-char only, prev_line_indent_column is zero. In this case,
|
|
372
|
+
# last new-line-char is removed.
|
|
373
|
+
# ---
|
|
374
|
+
# >
|
|
375
|
+
# a
|
|
376
|
+
#
|
|
377
|
+
# b
|
|
378
|
+
if last_char == '\n' and prev_last_char == '\n':
|
|
379
|
+
ctx.buf = ctx.buf[:len(ctx.buf) - 1]
|
|
380
|
+
ctx.not_space_char_pos = len(ctx.buf)
|
|
381
|
+
|
|
382
|
+
self.folded_new_line = False
|
|
383
|
+
|
|
384
|
+
def has_trim_all_end_newline_opt(self) -> bool:
|
|
385
|
+
return self.opt.startswith('-') or self.opt.endswith('-') or self.is_raw_folded
|
|
386
|
+
|
|
387
|
+
def has_keep_all_end_newline_opt(self) -> bool:
|
|
388
|
+
return self.opt.startswith('+') or self.opt.endswith('+')
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
##
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def first_line_indent_column_by_opt(opt: str) -> int:
|
|
395
|
+
opt = opt.lstrip('-')
|
|
396
|
+
opt = opt.lstrip('+')
|
|
397
|
+
opt = opt.rstrip('-')
|
|
398
|
+
opt = opt.rstrip('+')
|
|
399
|
+
try:
|
|
400
|
+
return int(opt, 10)
|
|
401
|
+
except ValueError:
|
|
402
|
+
return 0
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
##
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class YamlIndentState(enum.Enum):
|
|
409
|
+
# EQUAL equals previous indent
|
|
410
|
+
EQUAL = enum.auto()
|
|
411
|
+
# UP more indent than previous
|
|
412
|
+
UP = enum.auto()
|
|
413
|
+
# DOWN less indent than previous
|
|
414
|
+
DOWN = enum.auto()
|
|
415
|
+
# KEEP uses not indent token
|
|
416
|
+
KEEP = enum.auto()
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
# Scanner holds the scanner's internal state while processing a given text. It can be allocated as part of another data
|
|
420
|
+
# structure but must be initialized via init before use.
|
|
421
|
+
@dc.dataclass()
|
|
422
|
+
class YamlScanner:
|
|
423
|
+
source: str = ''
|
|
424
|
+
source_pos: int = 0
|
|
425
|
+
source_size: int = 0
|
|
426
|
+
# line number. This number starts from 1.
|
|
427
|
+
line: int = 0
|
|
428
|
+
# column number. This number starts from 1.
|
|
429
|
+
column: int = 0
|
|
430
|
+
# offset represents the offset from the beginning of the source.
|
|
431
|
+
offset: int = 0
|
|
432
|
+
# last_delim_column is the last column needed to compare indent is retained.
|
|
433
|
+
last_delim_column: int = 0
|
|
434
|
+
# indent_num indicates the number of spaces used for indentation.
|
|
435
|
+
indent_num: int = 0
|
|
436
|
+
# prev_line_indent_num indicates the number of spaces used for indentation at previous line.
|
|
437
|
+
prev_line_indent_num: int = 0
|
|
438
|
+
# indent_level indicates the level of indent depth. This value does not match the column value.
|
|
439
|
+
indent_level: int = 0
|
|
440
|
+
is_first_char_at_line: bool = False
|
|
441
|
+
is_anchor: bool = False
|
|
442
|
+
is_alias: bool = False
|
|
443
|
+
is_directive: bool = False
|
|
444
|
+
started_flow_sequence_num: int = 0
|
|
445
|
+
started_flow_map_num: int = 0
|
|
446
|
+
indent_state: YamlIndentState = YamlIndentState.EQUAL
|
|
447
|
+
saved_pos: ta.Optional[YamlPosition] = None
|
|
448
|
+
|
|
449
|
+
def pos(self) -> YamlPosition:
|
|
450
|
+
return YamlPosition(
|
|
451
|
+
line=self.line,
|
|
452
|
+
column=self.column,
|
|
453
|
+
offset=self.offset,
|
|
454
|
+
indent_num=self.indent_num,
|
|
455
|
+
indent_level=self.indent_level,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def buffered_token(self, ctx: YamlScanningContext) -> ta.Optional[YamlToken]:
|
|
459
|
+
if self.saved_pos is not None:
|
|
460
|
+
tk = ctx.buffered_token(self.saved_pos)
|
|
461
|
+
self.saved_pos = None
|
|
462
|
+
return tk
|
|
463
|
+
|
|
464
|
+
line = self.line
|
|
465
|
+
column = self.column - len(ctx.buf)
|
|
466
|
+
level = self.indent_level
|
|
467
|
+
if ctx.is_multi_line():
|
|
468
|
+
line -= self.new_line_count(ctx.buf)
|
|
469
|
+
column = ctx.obuf.find(ctx.buf) + 1
|
|
470
|
+
# Since we are in a literal, folded or raw folded we can use the indent level from the last token.
|
|
471
|
+
last = ctx.last_token()
|
|
472
|
+
if last is not None: # The last token should never be None here.
|
|
473
|
+
level = last.position.indent_level + 1
|
|
474
|
+
|
|
475
|
+
return ctx.buffered_token(YamlPosition(
|
|
476
|
+
line=line,
|
|
477
|
+
column=column,
|
|
478
|
+
offset=self.offset - len(ctx.buf),
|
|
479
|
+
indent_num=self.indent_num,
|
|
480
|
+
indent_level=level,
|
|
481
|
+
))
|
|
482
|
+
|
|
483
|
+
def progress_column(self, ctx: YamlScanningContext, num: int) -> None:
|
|
484
|
+
self.column += num
|
|
485
|
+
self.offset += num
|
|
486
|
+
self.progress(ctx, num)
|
|
487
|
+
|
|
488
|
+
def progress_only(self, ctx: YamlScanningContext, num: int) -> None:
|
|
489
|
+
self.offset += num
|
|
490
|
+
self.progress(ctx, num)
|
|
491
|
+
|
|
492
|
+
def progress_line(self, ctx: YamlScanningContext) -> None:
|
|
493
|
+
self.prev_line_indent_num = self.indent_num
|
|
494
|
+
self.column = 1
|
|
495
|
+
self.line += 1
|
|
496
|
+
self.offset += 1
|
|
497
|
+
self.indent_num = 0
|
|
498
|
+
self.is_first_char_at_line = True
|
|
499
|
+
self.is_anchor = False
|
|
500
|
+
self.is_alias = False
|
|
501
|
+
self.is_directive = False
|
|
502
|
+
self.progress(ctx, 1)
|
|
503
|
+
|
|
504
|
+
def progress(self, ctx: YamlScanningContext, num: int) -> None:
|
|
505
|
+
ctx.progress(num)
|
|
506
|
+
self.source_pos += num
|
|
507
|
+
|
|
508
|
+
def is_new_line_char(self, c: str) -> bool:
|
|
509
|
+
if c == '\n':
|
|
510
|
+
return True
|
|
511
|
+
if c == '\r':
|
|
512
|
+
return True
|
|
513
|
+
return False
|
|
514
|
+
|
|
515
|
+
def new_line_count(self, src: str) -> int:
|
|
516
|
+
size = len(src)
|
|
517
|
+
cnt = 0
|
|
518
|
+
i = -1
|
|
519
|
+
while True:
|
|
520
|
+
i += 1
|
|
521
|
+
if not (i < size):
|
|
522
|
+
break
|
|
523
|
+
c = src[i]
|
|
524
|
+
if c == '\r':
|
|
525
|
+
if i + 1 < size and src[i + 1] == '\n':
|
|
526
|
+
i += 1
|
|
527
|
+
cnt += 1
|
|
528
|
+
elif c == '\n':
|
|
529
|
+
cnt += 1
|
|
530
|
+
return cnt
|
|
531
|
+
|
|
532
|
+
def update_indent_level(self) -> None:
|
|
533
|
+
if self.prev_line_indent_num < self.indent_num:
|
|
534
|
+
self.indent_level += 1
|
|
535
|
+
elif self.prev_line_indent_num > self.indent_num:
|
|
536
|
+
if self.indent_level > 0:
|
|
537
|
+
self.indent_level -= 1
|
|
538
|
+
|
|
539
|
+
def update_indent_state(self, ctx: YamlScanningContext) -> None:
|
|
540
|
+
if self.last_delim_column == 0:
|
|
541
|
+
return
|
|
542
|
+
|
|
543
|
+
if self.last_delim_column < self.column:
|
|
544
|
+
self.indent_state = YamlIndentState.UP
|
|
545
|
+
else:
|
|
546
|
+
# If last_delim_column and self.column are the same, treat as Down state since it is the same column as
|
|
547
|
+
# delimiter.
|
|
548
|
+
self.indent_state = YamlIndentState.DOWN
|
|
549
|
+
|
|
550
|
+
def update_indent(self, ctx: YamlScanningContext, c: str) -> None:
|
|
551
|
+
if self.is_first_char_at_line and self.is_new_line_char(c):
|
|
552
|
+
return
|
|
553
|
+
if self.is_first_char_at_line and c == ' ':
|
|
554
|
+
self.indent_num += 1
|
|
555
|
+
return
|
|
556
|
+
if self.is_first_char_at_line and c == '\t':
|
|
557
|
+
# Found tab indent. In this case, scan_tab returns error.
|
|
558
|
+
return
|
|
559
|
+
if not self.is_first_char_at_line:
|
|
560
|
+
self.indent_state = YamlIndentState.KEEP
|
|
561
|
+
return
|
|
562
|
+
self.update_indent_level()
|
|
563
|
+
self.update_indent_state(ctx)
|
|
564
|
+
self.is_first_char_at_line = False
|
|
565
|
+
|
|
566
|
+
def is_changed_to_indent_state_down(self) -> bool:
|
|
567
|
+
return self.indent_state == YamlIndentState.DOWN
|
|
568
|
+
|
|
569
|
+
def is_changed_to_indent_state_up(self) -> bool:
|
|
570
|
+
return self.indent_state == YamlIndentState.UP
|
|
571
|
+
|
|
572
|
+
def add_buffered_token_if_exists(self, ctx: YamlScanningContext) -> None:
|
|
573
|
+
ctx.add_token(self.buffered_token(ctx))
|
|
574
|
+
|
|
575
|
+
def break_multi_line(self, ctx: YamlScanningContext) -> None:
|
|
576
|
+
ctx.break_multi_line()
|
|
577
|
+
|
|
578
|
+
def scan_single_quote(self, ctx: YamlScanningContext) -> YamlErrorOr[YamlToken]:
|
|
579
|
+
ctx.add_origin_buf("'")
|
|
580
|
+
srcpos = self.pos()
|
|
581
|
+
start_index = ctx.idx + 1
|
|
582
|
+
src = ctx.src
|
|
583
|
+
size = len(src)
|
|
584
|
+
value = ''
|
|
585
|
+
is_first_line_char = False
|
|
586
|
+
is_new_line = False
|
|
587
|
+
|
|
588
|
+
idx = start_index - 1
|
|
589
|
+
while True:
|
|
590
|
+
idx += 1
|
|
591
|
+
if not (idx < size):
|
|
592
|
+
break
|
|
593
|
+
|
|
594
|
+
if not is_new_line:
|
|
595
|
+
self.progress_column(ctx, 1)
|
|
596
|
+
else:
|
|
597
|
+
is_new_line = False
|
|
598
|
+
|
|
599
|
+
c = src[idx]
|
|
600
|
+
ctx.add_origin_buf(c)
|
|
601
|
+
if self.is_new_line_char(c):
|
|
602
|
+
not_space_idx = -1
|
|
603
|
+
for i in range(len(value) - 1, -1, -1):
|
|
604
|
+
if value[i] == ' ':
|
|
605
|
+
continue
|
|
606
|
+
not_space_idx = i
|
|
607
|
+
break
|
|
608
|
+
|
|
609
|
+
if len(value) > not_space_idx:
|
|
610
|
+
value = value[:not_space_idx + 1]
|
|
611
|
+
if is_first_line_char:
|
|
612
|
+
value += '\n'
|
|
613
|
+
else:
|
|
614
|
+
value += ' '
|
|
615
|
+
|
|
616
|
+
is_first_line_char = True
|
|
617
|
+
is_new_line = True
|
|
618
|
+
self.progress_line(ctx)
|
|
619
|
+
if idx + 1 < size:
|
|
620
|
+
if (err := self.validate_document_separator_marker(ctx, src[idx + 1:])) is not None:
|
|
621
|
+
return err
|
|
622
|
+
|
|
623
|
+
continue
|
|
624
|
+
|
|
625
|
+
if is_first_line_char and c == ' ':
|
|
626
|
+
continue
|
|
627
|
+
|
|
628
|
+
if is_first_line_char and c == '\t':
|
|
629
|
+
if self.last_delim_column >= self.column:
|
|
630
|
+
return err_invalid_token(
|
|
631
|
+
YamlTokenMakers.new_invalid(
|
|
632
|
+
yaml_error('tab character cannot be used for indentation in single-quoted text'),
|
|
633
|
+
ctx.obuf,
|
|
634
|
+
self.pos(),
|
|
635
|
+
),
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
continue
|
|
639
|
+
|
|
640
|
+
if c != "'":
|
|
641
|
+
value += c
|
|
642
|
+
is_first_line_char = False
|
|
643
|
+
continue
|
|
644
|
+
|
|
645
|
+
if idx + 1 < len(ctx.src) and ctx.src[idx + 1] == '\'':
|
|
646
|
+
# '' handle as ' character
|
|
647
|
+
value += c
|
|
648
|
+
ctx.add_origin_buf(c)
|
|
649
|
+
idx += 1
|
|
650
|
+
self.progress_column(ctx, 1)
|
|
651
|
+
continue
|
|
652
|
+
|
|
653
|
+
self.progress_column(ctx, 1)
|
|
654
|
+
return YamlTokenMakers.new_single_quote(value, ctx.obuf, srcpos)
|
|
655
|
+
|
|
656
|
+
self.progress_column(ctx, 1)
|
|
657
|
+
return err_invalid_token(
|
|
658
|
+
YamlTokenMakers.new_invalid(
|
|
659
|
+
yaml_error('could not find end character of single-quoted text'),
|
|
660
|
+
ctx.obuf,
|
|
661
|
+
srcpos,
|
|
662
|
+
),
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
def scan_double_quote(self, ctx: YamlScanningContext) -> YamlErrorOr[YamlToken]:
|
|
666
|
+
ctx.add_origin_buf('"')
|
|
667
|
+
srcpos = self.pos()
|
|
668
|
+
start_index = ctx.idx + 1
|
|
669
|
+
src = ctx.src
|
|
670
|
+
size = len(src)
|
|
671
|
+
value = ''
|
|
672
|
+
is_first_line_char = False
|
|
673
|
+
is_new_line = False
|
|
674
|
+
|
|
675
|
+
idx = start_index - 1
|
|
676
|
+
while True:
|
|
677
|
+
idx += 1
|
|
678
|
+
if not (idx < size):
|
|
679
|
+
break
|
|
680
|
+
|
|
681
|
+
if not is_new_line:
|
|
682
|
+
self.progress_column(ctx, 1)
|
|
683
|
+
else:
|
|
684
|
+
is_new_line = False
|
|
685
|
+
|
|
686
|
+
c = src[idx]
|
|
687
|
+
ctx.add_origin_buf(c)
|
|
688
|
+
if self.is_new_line_char(c):
|
|
689
|
+
not_space_idx = -1
|
|
690
|
+
for i in range(len(value) - 1, -1, -1):
|
|
691
|
+
if value[i] == ' ':
|
|
692
|
+
continue
|
|
693
|
+
not_space_idx = i
|
|
694
|
+
break
|
|
695
|
+
|
|
696
|
+
if len(value) > not_space_idx:
|
|
697
|
+
value = value[:not_space_idx + 1]
|
|
698
|
+
|
|
699
|
+
if is_first_line_char:
|
|
700
|
+
value += '\n'
|
|
701
|
+
else:
|
|
702
|
+
value += ' '
|
|
703
|
+
|
|
704
|
+
is_first_line_char = True
|
|
705
|
+
is_new_line = True
|
|
706
|
+
self.progress_line(ctx)
|
|
707
|
+
if idx + 1 < size:
|
|
708
|
+
if (err := self.validate_document_separator_marker(ctx, src[idx + 1:])) is not None:
|
|
709
|
+
return err
|
|
710
|
+
|
|
711
|
+
continue
|
|
712
|
+
|
|
713
|
+
if is_first_line_char and c == ' ':
|
|
714
|
+
continue
|
|
715
|
+
|
|
716
|
+
if is_first_line_char and c == '\t':
|
|
717
|
+
if self.last_delim_column >= self.column:
|
|
718
|
+
return err_invalid_token(
|
|
719
|
+
YamlTokenMakers.new_invalid(
|
|
720
|
+
yaml_error('tab character cannot be used for indentation in double-quoted text'),
|
|
721
|
+
ctx.obuf,
|
|
722
|
+
self.pos(),
|
|
723
|
+
),
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
continue
|
|
727
|
+
|
|
728
|
+
if c == '\\':
|
|
729
|
+
is_first_line_char = False
|
|
730
|
+
if idx + 1 >= size:
|
|
731
|
+
value += c
|
|
732
|
+
continue
|
|
733
|
+
|
|
734
|
+
next_char = src[idx + 1]
|
|
735
|
+
progress = 0
|
|
736
|
+
|
|
737
|
+
if next_char == '0':
|
|
738
|
+
progress = 1
|
|
739
|
+
ctx.add_origin_buf(next_char)
|
|
740
|
+
value += chr(0)
|
|
741
|
+
elif next_char == 'a':
|
|
742
|
+
progress = 1
|
|
743
|
+
ctx.add_origin_buf(next_char)
|
|
744
|
+
value += '\x07'
|
|
745
|
+
elif next_char == 'b':
|
|
746
|
+
progress = 1
|
|
747
|
+
ctx.add_origin_buf(next_char)
|
|
748
|
+
value += '\x08'
|
|
749
|
+
elif next_char == 't':
|
|
750
|
+
progress = 1
|
|
751
|
+
ctx.add_origin_buf(next_char)
|
|
752
|
+
value += '\x09'
|
|
753
|
+
elif next_char == 'n':
|
|
754
|
+
progress = 1
|
|
755
|
+
ctx.add_origin_buf(next_char)
|
|
756
|
+
value += '\x0A'
|
|
757
|
+
elif next_char == 'v':
|
|
758
|
+
progress = 1
|
|
759
|
+
ctx.add_origin_buf(next_char)
|
|
760
|
+
value += '\x0B'
|
|
761
|
+
elif next_char == 'f':
|
|
762
|
+
progress = 1
|
|
763
|
+
ctx.add_origin_buf(next_char)
|
|
764
|
+
value += '\x0C'
|
|
765
|
+
elif next_char == 'r':
|
|
766
|
+
progress = 1
|
|
767
|
+
ctx.add_origin_buf(next_char)
|
|
768
|
+
value += '\x0D'
|
|
769
|
+
elif next_char == 'e':
|
|
770
|
+
progress = 1
|
|
771
|
+
ctx.add_origin_buf(next_char)
|
|
772
|
+
value += '\x1B'
|
|
773
|
+
elif next_char == ' ':
|
|
774
|
+
progress = 1
|
|
775
|
+
ctx.add_origin_buf(next_char)
|
|
776
|
+
value += '\x20'
|
|
777
|
+
elif next_char == '"':
|
|
778
|
+
progress = 1
|
|
779
|
+
ctx.add_origin_buf(next_char)
|
|
780
|
+
value += '\x22'
|
|
781
|
+
elif next_char == '/':
|
|
782
|
+
progress = 1
|
|
783
|
+
ctx.add_origin_buf(next_char)
|
|
784
|
+
value += '\x2F'
|
|
785
|
+
elif next_char == '\\':
|
|
786
|
+
progress = 1
|
|
787
|
+
ctx.add_origin_buf(next_char)
|
|
788
|
+
value += '\x5C'
|
|
789
|
+
elif next_char == 'N':
|
|
790
|
+
progress = 1
|
|
791
|
+
ctx.add_origin_buf(next_char)
|
|
792
|
+
value += '\x85'
|
|
793
|
+
elif next_char == '_':
|
|
794
|
+
progress = 1
|
|
795
|
+
ctx.add_origin_buf(next_char)
|
|
796
|
+
value += '\xA0'
|
|
797
|
+
elif next_char == 'L':
|
|
798
|
+
progress = 1
|
|
799
|
+
ctx.add_origin_buf(next_char)
|
|
800
|
+
value += '\u2028'
|
|
801
|
+
elif next_char == 'P':
|
|
802
|
+
progress = 1
|
|
803
|
+
ctx.add_origin_buf(next_char)
|
|
804
|
+
value += '\u2029'
|
|
805
|
+
|
|
806
|
+
elif next_char == 'x':
|
|
807
|
+
if idx + 3 >= size:
|
|
808
|
+
progress = 1
|
|
809
|
+
ctx.add_origin_buf(next_char)
|
|
810
|
+
value += next_char
|
|
811
|
+
else:
|
|
812
|
+
progress = 3
|
|
813
|
+
code_num = hex_runes_to_int(src[idx + 2: idx + progress + 1])
|
|
814
|
+
value += chr(code_num)
|
|
815
|
+
|
|
816
|
+
elif next_char == 'u':
|
|
817
|
+
# \u0000 style must have 5 characters at least.
|
|
818
|
+
if idx + 5 >= size:
|
|
819
|
+
return err_invalid_token(
|
|
820
|
+
YamlTokenMakers.new_invalid(
|
|
821
|
+
yaml_error('not enough length for escaped UTF-16 character'),
|
|
822
|
+
ctx.obuf,
|
|
823
|
+
self.pos(),
|
|
824
|
+
),
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
progress = 5
|
|
828
|
+
code_num = hex_runes_to_int(src[idx + 2: idx + 6])
|
|
829
|
+
|
|
830
|
+
# handle surrogate pairs.
|
|
831
|
+
if code_num >= 0xD800 and code_num <= 0xDBFF:
|
|
832
|
+
high = code_num
|
|
833
|
+
|
|
834
|
+
# \u0000\u0000 style must have 11 characters at least.
|
|
835
|
+
if idx + 11 >= size:
|
|
836
|
+
return err_invalid_token(
|
|
837
|
+
YamlTokenMakers.new_invalid(
|
|
838
|
+
yaml_error('not enough length for escaped UTF-16 surrogate pair'),
|
|
839
|
+
ctx.obuf,
|
|
840
|
+
self.pos(),
|
|
841
|
+
),
|
|
842
|
+
)
|
|
843
|
+
|
|
844
|
+
if src[idx + 6] != '\\' or src[idx + 7] != 'u':
|
|
845
|
+
return err_invalid_token(
|
|
846
|
+
YamlTokenMakers.new_invalid(
|
|
847
|
+
yaml_error('found unexpected character after high surrogate for UTF-16 surrogate pair'), # noqa
|
|
848
|
+
ctx.obuf,
|
|
849
|
+
self.pos(),
|
|
850
|
+
),
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
low = hex_runes_to_int(src[idx + 8: idx + 12])
|
|
854
|
+
if low < 0xDC00 or low > 0xDFFF:
|
|
855
|
+
return err_invalid_token(
|
|
856
|
+
YamlTokenMakers.new_invalid(
|
|
857
|
+
yaml_error('found unexpected low surrogate after high surrogate'),
|
|
858
|
+
ctx.obuf,
|
|
859
|
+
self.pos(),
|
|
860
|
+
),
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
code_num = ((high - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000
|
|
864
|
+
progress += 6
|
|
865
|
+
|
|
866
|
+
value += chr(code_num)
|
|
867
|
+
|
|
868
|
+
elif next_char == 'U':
|
|
869
|
+
# \U00000000 style must have 9 characters at least.
|
|
870
|
+
if idx + 9 >= size:
|
|
871
|
+
return err_invalid_token(
|
|
872
|
+
YamlTokenMakers.new_invalid(
|
|
873
|
+
yaml_error('not enough length for escaped UTF-32 character'),
|
|
874
|
+
ctx.obuf,
|
|
875
|
+
self.pos(),
|
|
876
|
+
),
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
progress = 9
|
|
880
|
+
code_num = hex_runes_to_int(src[idx + 2: idx + 10])
|
|
881
|
+
value += chr(code_num)
|
|
882
|
+
|
|
883
|
+
elif next_char == '\n':
|
|
884
|
+
is_first_line_char = True
|
|
885
|
+
is_new_line = True
|
|
886
|
+
ctx.add_origin_buf(next_char)
|
|
887
|
+
self.progress_column(ctx, 1)
|
|
888
|
+
self.progress_line(ctx)
|
|
889
|
+
idx += 1
|
|
890
|
+
continue
|
|
891
|
+
|
|
892
|
+
elif next_char == '\r':
|
|
893
|
+
is_first_line_char = True
|
|
894
|
+
is_new_line = True
|
|
895
|
+
ctx.add_origin_buf(next_char)
|
|
896
|
+
self.progress_line(ctx)
|
|
897
|
+
progress = 1
|
|
898
|
+
# Skip \n after \r in CRLF sequences
|
|
899
|
+
if idx + 2 < size and src[idx + 2] == '\n':
|
|
900
|
+
ctx.add_origin_buf('\n')
|
|
901
|
+
progress = 2
|
|
902
|
+
|
|
903
|
+
elif next_char == '\t':
|
|
904
|
+
progress = 1
|
|
905
|
+
ctx.add_origin_buf(next_char)
|
|
906
|
+
value += next_char
|
|
907
|
+
|
|
908
|
+
else:
|
|
909
|
+
self.progress_column(ctx, 1)
|
|
910
|
+
return err_invalid_token(
|
|
911
|
+
YamlTokenMakers.new_invalid(
|
|
912
|
+
yaml_error(f'found unknown escape character {next_char!r}'),
|
|
913
|
+
ctx.obuf,
|
|
914
|
+
self.pos(),
|
|
915
|
+
),
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
idx += progress
|
|
919
|
+
self.progress_column(ctx, progress)
|
|
920
|
+
continue
|
|
921
|
+
|
|
922
|
+
if c == '\t':
|
|
923
|
+
found_not_space_char = False
|
|
924
|
+
progress = 0
|
|
925
|
+
|
|
926
|
+
for i in range(idx + 1, size):
|
|
927
|
+
if src[i] == ' ' or src[i] == '\t':
|
|
928
|
+
progress += 1
|
|
929
|
+
continue
|
|
930
|
+
|
|
931
|
+
if self.is_new_line_char(src[i]):
|
|
932
|
+
break
|
|
933
|
+
|
|
934
|
+
found_not_space_char = True
|
|
935
|
+
|
|
936
|
+
if found_not_space_char:
|
|
937
|
+
value += c
|
|
938
|
+
if src[idx + 1] != '"':
|
|
939
|
+
self.progress_column(ctx, 1)
|
|
940
|
+
|
|
941
|
+
else:
|
|
942
|
+
idx += progress
|
|
943
|
+
self.progress_column(ctx, progress)
|
|
944
|
+
|
|
945
|
+
continue
|
|
946
|
+
|
|
947
|
+
if c != '"':
|
|
948
|
+
value += c
|
|
949
|
+
is_first_line_char = False
|
|
950
|
+
continue
|
|
951
|
+
|
|
952
|
+
self.progress_column(ctx, 1)
|
|
953
|
+
return YamlTokenMakers.new_double_quote(value, ctx.obuf, srcpos)
|
|
954
|
+
|
|
955
|
+
self.progress_column(ctx, 1)
|
|
956
|
+
return err_invalid_token(
|
|
957
|
+
YamlTokenMakers.new_invalid(
|
|
958
|
+
yaml_error('could not find end character of double-quoted text'),
|
|
959
|
+
ctx.obuf,
|
|
960
|
+
srcpos,
|
|
961
|
+
),
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
def validate_document_separator_marker(self, ctx: YamlScanningContext, src: str) -> ta.Optional[YamlError]:
|
|
965
|
+
if self.found_document_separator_marker(src):
|
|
966
|
+
return err_invalid_token(
|
|
967
|
+
YamlTokenMakers.new_invalid(yaml_error('found unexpected document separator'), ctx.obuf, self.pos()),
|
|
968
|
+
)
|
|
969
|
+
|
|
970
|
+
return None
|
|
971
|
+
|
|
972
|
+
def found_document_separator_marker(self, src: str) -> bool:
|
|
973
|
+
if len(src) < 3:
|
|
974
|
+
return False
|
|
975
|
+
|
|
976
|
+
marker = ''
|
|
977
|
+
if len(src) == 3:
|
|
978
|
+
marker = src
|
|
979
|
+
else:
|
|
980
|
+
marker = trim_right_func(src[:4], lambda r: r == ' ' or r == '\t' or r == '\n' or r == '\r')
|
|
981
|
+
|
|
982
|
+
return marker == '---' or marker == '...'
|
|
983
|
+
|
|
984
|
+
def scan_quote(self, ctx: YamlScanningContext, ch: str) -> YamlErrorOr[bool]:
|
|
985
|
+
if ctx.exists_buffer():
|
|
986
|
+
return False
|
|
987
|
+
|
|
988
|
+
if ch == "'":
|
|
989
|
+
tk = self.scan_single_quote(ctx)
|
|
990
|
+
if isinstance(tk, YamlError):
|
|
991
|
+
return tk
|
|
992
|
+
|
|
993
|
+
ctx.add_token(tk)
|
|
994
|
+
|
|
995
|
+
else:
|
|
996
|
+
tk = self.scan_double_quote(ctx)
|
|
997
|
+
if isinstance(tk, YamlError):
|
|
998
|
+
return tk
|
|
999
|
+
|
|
1000
|
+
ctx.add_token(tk)
|
|
1001
|
+
|
|
1002
|
+
ctx.clear()
|
|
1003
|
+
return True
|
|
1004
|
+
|
|
1005
|
+
def scan_white_space(self, ctx: YamlScanningContext) -> bool:
|
|
1006
|
+
if ctx.is_multi_line():
|
|
1007
|
+
return False
|
|
1008
|
+
|
|
1009
|
+
if not self.is_anchor and not self.is_directive and not self.is_alias and not self.is_first_char_at_line:
|
|
1010
|
+
return False
|
|
1011
|
+
|
|
1012
|
+
if self.is_first_char_at_line:
|
|
1013
|
+
self.progress_column(ctx, 1)
|
|
1014
|
+
ctx.add_origin_buf(' ')
|
|
1015
|
+
return True
|
|
1016
|
+
|
|
1017
|
+
if self.is_directive:
|
|
1018
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1019
|
+
self.progress_column(ctx, 1)
|
|
1020
|
+
ctx.add_origin_buf(' ')
|
|
1021
|
+
return True
|
|
1022
|
+
|
|
1023
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1024
|
+
self.is_anchor = False
|
|
1025
|
+
self.is_alias = False
|
|
1026
|
+
return True
|
|
1027
|
+
|
|
1028
|
+
def is_merge_key(self, ctx: YamlScanningContext) -> bool:
|
|
1029
|
+
if ctx.repeat_num('<') != 2:
|
|
1030
|
+
return False
|
|
1031
|
+
|
|
1032
|
+
src = ctx.src
|
|
1033
|
+
size = len(src)
|
|
1034
|
+
for idx in range(ctx.idx + 2, size):
|
|
1035
|
+
c = src[idx]
|
|
1036
|
+
if c == ' ':
|
|
1037
|
+
continue
|
|
1038
|
+
|
|
1039
|
+
if c != ':':
|
|
1040
|
+
return False
|
|
1041
|
+
|
|
1042
|
+
if idx + 1 < size:
|
|
1043
|
+
nc = src[idx + 1]
|
|
1044
|
+
if nc == ' ' or self.is_new_line_char(nc):
|
|
1045
|
+
return True
|
|
1046
|
+
|
|
1047
|
+
return False
|
|
1048
|
+
|
|
1049
|
+
def scan_tag(self, ctx: YamlScanningContext) -> YamlErrorOr[bool]:
|
|
1050
|
+
if ctx.exists_buffer() or self.is_directive:
|
|
1051
|
+
return False
|
|
1052
|
+
|
|
1053
|
+
ctx.add_origin_buf('!')
|
|
1054
|
+
self.progress(ctx, 1) # skip '!' character
|
|
1055
|
+
|
|
1056
|
+
progress = 0
|
|
1057
|
+
for idx, c in enumerate(ctx.src[ctx.idx:]):
|
|
1058
|
+
progress = idx + 1
|
|
1059
|
+
|
|
1060
|
+
if c == ' ':
|
|
1061
|
+
ctx.add_origin_buf(c)
|
|
1062
|
+
value = ctx.source(ctx.idx - 1, ctx.idx + idx)
|
|
1063
|
+
ctx.add_token(YamlTokenMakers.new_tag(value, ctx.obuf, self.pos()))
|
|
1064
|
+
self.progress_column(ctx, len(value))
|
|
1065
|
+
ctx.clear()
|
|
1066
|
+
return True
|
|
1067
|
+
|
|
1068
|
+
elif c == ',':
|
|
1069
|
+
if self.started_flow_sequence_num > 0 or self.started_flow_map_num > 0:
|
|
1070
|
+
value = ctx.source(ctx.idx - 1, ctx.idx + idx)
|
|
1071
|
+
ctx.add_token(YamlTokenMakers.new_tag(value, ctx.obuf, self.pos()))
|
|
1072
|
+
# progress column before collect-entry for scanning it at scan_flow_entry function.
|
|
1073
|
+
self.progress_column(ctx, len(value) - 1)
|
|
1074
|
+
ctx.clear()
|
|
1075
|
+
return True
|
|
1076
|
+
else:
|
|
1077
|
+
ctx.add_origin_buf(c)
|
|
1078
|
+
|
|
1079
|
+
elif c in ('\n', '\r'):
|
|
1080
|
+
ctx.add_origin_buf(c)
|
|
1081
|
+
value = ctx.source(ctx.idx - 1, ctx.idx + idx)
|
|
1082
|
+
ctx.add_token(YamlTokenMakers.new_tag(value, ctx.obuf, self.pos()))
|
|
1083
|
+
# progress column before new-line-char for scanning new-line-char at scan_new_line function.
|
|
1084
|
+
self.progress_column(ctx, len(value) - 1)
|
|
1085
|
+
ctx.clear()
|
|
1086
|
+
return True
|
|
1087
|
+
|
|
1088
|
+
elif c in ('{', '}'):
|
|
1089
|
+
ctx.add_origin_buf(c)
|
|
1090
|
+
self.progress_column(ctx, progress)
|
|
1091
|
+
invalid_tk = YamlTokenMakers.new_invalid(
|
|
1092
|
+
yaml_error(f'found invalid tag character {c!r}'),
|
|
1093
|
+
ctx.obuf,
|
|
1094
|
+
self.pos(),
|
|
1095
|
+
)
|
|
1096
|
+
return err_invalid_token(invalid_tk)
|
|
1097
|
+
|
|
1098
|
+
else:
|
|
1099
|
+
ctx.add_origin_buf(c)
|
|
1100
|
+
|
|
1101
|
+
self.progress_column(ctx, progress)
|
|
1102
|
+
ctx.clear()
|
|
1103
|
+
return True
|
|
1104
|
+
|
|
1105
|
+
def scan_comment(self, ctx: YamlScanningContext) -> bool:
|
|
1106
|
+
if ctx.exists_buffer():
|
|
1107
|
+
c = ctx.previous_char()
|
|
1108
|
+
if c != ' ' and c != '\t' and not self.is_new_line_char(c):
|
|
1109
|
+
return False
|
|
1110
|
+
|
|
1111
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1112
|
+
ctx.add_origin_buf('#')
|
|
1113
|
+
self.progress(ctx, 1) # skip '#' character
|
|
1114
|
+
|
|
1115
|
+
for idx, c in enumerate(ctx.src[ctx.idx:]):
|
|
1116
|
+
ctx.add_origin_buf(c)
|
|
1117
|
+
if not self.is_new_line_char(c):
|
|
1118
|
+
continue
|
|
1119
|
+
if ctx.previous_char() == '\\':
|
|
1120
|
+
continue
|
|
1121
|
+
|
|
1122
|
+
value = ctx.source(ctx.idx, ctx.idx + idx)
|
|
1123
|
+
progress = len(value)
|
|
1124
|
+
ctx.add_token(YamlTokenMakers.new_comment(value, ctx.obuf, self.pos()))
|
|
1125
|
+
self.progress_column(ctx, progress)
|
|
1126
|
+
self.progress_line(ctx)
|
|
1127
|
+
ctx.clear()
|
|
1128
|
+
return True
|
|
1129
|
+
|
|
1130
|
+
# document ends with comment.
|
|
1131
|
+
value = ctx.src[ctx.idx:]
|
|
1132
|
+
ctx.add_token(YamlTokenMakers.new_comment(value, ctx.obuf, self.pos()))
|
|
1133
|
+
progress = len(value)
|
|
1134
|
+
self.progress_column(ctx, progress)
|
|
1135
|
+
self.progress_line(ctx)
|
|
1136
|
+
ctx.clear()
|
|
1137
|
+
return True
|
|
1138
|
+
|
|
1139
|
+
def scan_multi_line(self, ctx: YamlScanningContext, c: str) -> ta.Optional[YamlError]:
|
|
1140
|
+
state = check.not_none(ctx.get_multi_line_state())
|
|
1141
|
+
ctx.add_origin_buf(c)
|
|
1142
|
+
|
|
1143
|
+
if ctx.is_eos():
|
|
1144
|
+
if self.is_first_char_at_line and c == ' ':
|
|
1145
|
+
state.add_indent(ctx, self.column)
|
|
1146
|
+
else:
|
|
1147
|
+
ctx.add_buf(c)
|
|
1148
|
+
|
|
1149
|
+
state.update_indent_column(self.column)
|
|
1150
|
+
if (err := state.validate_indent_column()) is not None:
|
|
1151
|
+
invalid_tk = YamlTokenMakers.new_invalid(yaml_error(str(err)), ctx.obuf, self.pos())
|
|
1152
|
+
self.progress_column(ctx, 1)
|
|
1153
|
+
return err_invalid_token(invalid_tk)
|
|
1154
|
+
|
|
1155
|
+
value = ctx.buffered_src()
|
|
1156
|
+
ctx.add_token(YamlTokenMakers.new_string(value, ctx.obuf, self.pos()))
|
|
1157
|
+
ctx.clear()
|
|
1158
|
+
self.progress_column(ctx, 1)
|
|
1159
|
+
|
|
1160
|
+
elif self.is_new_line_char(c):
|
|
1161
|
+
ctx.add_buf(c)
|
|
1162
|
+
state.update_space_only_indent_column(self.column - 1)
|
|
1163
|
+
state.update_new_line_state()
|
|
1164
|
+
self.progress_line(ctx)
|
|
1165
|
+
if ctx.next():
|
|
1166
|
+
if self.found_document_separator_marker(ctx.src[ctx.idx:]):
|
|
1167
|
+
value = ctx.buffered_src()
|
|
1168
|
+
ctx.add_token(YamlTokenMakers.new_string(value, ctx.obuf, self.pos()))
|
|
1169
|
+
ctx.clear()
|
|
1170
|
+
self.break_multi_line(ctx)
|
|
1171
|
+
|
|
1172
|
+
elif self.is_first_char_at_line and c == ' ':
|
|
1173
|
+
state.add_indent(ctx, self.column)
|
|
1174
|
+
self.progress_column(ctx, 1)
|
|
1175
|
+
|
|
1176
|
+
elif self.is_first_char_at_line and c == '\t' and state.is_indent_column(self.column):
|
|
1177
|
+
err = err_invalid_token(
|
|
1178
|
+
YamlTokenMakers.new_invalid(
|
|
1179
|
+
yaml_error('found a tab character where an indentation space is expected'),
|
|
1180
|
+
ctx.obuf,
|
|
1181
|
+
self.pos(),
|
|
1182
|
+
),
|
|
1183
|
+
)
|
|
1184
|
+
self.progress_column(ctx, 1)
|
|
1185
|
+
return err
|
|
1186
|
+
|
|
1187
|
+
elif c == '\t' and not state.is_indent_column(self.column):
|
|
1188
|
+
ctx.add_buf_with_tab(c)
|
|
1189
|
+
self.progress_column(ctx, 1)
|
|
1190
|
+
|
|
1191
|
+
else:
|
|
1192
|
+
if (err := state.validate_indent_after_space_only(self.column)) is not None:
|
|
1193
|
+
invalid_tk = YamlTokenMakers.new_invalid(yaml_error(str(err)), ctx.obuf, self.pos())
|
|
1194
|
+
self.progress_column(ctx, 1)
|
|
1195
|
+
return err_invalid_token(invalid_tk)
|
|
1196
|
+
|
|
1197
|
+
state.update_indent_column(self.column)
|
|
1198
|
+
if (err := state.validate_indent_column()) is not None:
|
|
1199
|
+
invalid_tk = YamlTokenMakers.new_invalid(yaml_error(str(err)), ctx.obuf, self.pos())
|
|
1200
|
+
self.progress_column(ctx, 1)
|
|
1201
|
+
return err_invalid_token(invalid_tk)
|
|
1202
|
+
|
|
1203
|
+
if (col := state.last_delim_column()) > 0:
|
|
1204
|
+
self.last_delim_column = col
|
|
1205
|
+
|
|
1206
|
+
state.update_new_line_in_folded(ctx, self.column)
|
|
1207
|
+
ctx.add_buf_with_tab(c)
|
|
1208
|
+
self.progress_column(ctx, 1)
|
|
1209
|
+
|
|
1210
|
+
return None
|
|
1211
|
+
|
|
1212
|
+
def scan_new_line(self, ctx: YamlScanningContext, c: str) -> None:
|
|
1213
|
+
if len(ctx.buf) > 0 and self.saved_pos is None:
|
|
1214
|
+
buf_len = len(ctx.buffered_src())
|
|
1215
|
+
self.saved_pos = self.pos()
|
|
1216
|
+
self.saved_pos.column -= buf_len
|
|
1217
|
+
self.saved_pos.offset -= buf_len
|
|
1218
|
+
|
|
1219
|
+
# if the following case, origin buffer has unnecessary two spaces.
|
|
1220
|
+
# So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too.
|
|
1221
|
+
# ---
|
|
1222
|
+
# a:[space][space]
|
|
1223
|
+
# b: c
|
|
1224
|
+
ctx.remove_right_space_from_buf()
|
|
1225
|
+
|
|
1226
|
+
# There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following
|
|
1227
|
+
# YAML1.2 spec.
|
|
1228
|
+
# > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be
|
|
1229
|
+
# parsed into a single line feed character.
|
|
1230
|
+
# > Outside scalar content, YAML allows any line break to be used to terminate lines.
|
|
1231
|
+
# > -- https://yaml.org/spec/1.2/spec.html
|
|
1232
|
+
if c == '\r' and ctx.next_char() == '\n':
|
|
1233
|
+
ctx.add_origin_buf('\r')
|
|
1234
|
+
self.progress(ctx, 1)
|
|
1235
|
+
self.offset += 1
|
|
1236
|
+
c = '\n'
|
|
1237
|
+
|
|
1238
|
+
if ctx.is_eos():
|
|
1239
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1240
|
+
elif self.is_anchor or self.is_alias or self.is_directive:
|
|
1241
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1242
|
+
|
|
1243
|
+
if ctx.exists_buffer() and self.is_first_char_at_line:
|
|
1244
|
+
if ctx.buf[len(ctx.buf) - 1] == ' ':
|
|
1245
|
+
ctx.buf = ctx.buf[:-1] + '\n'
|
|
1246
|
+
else:
|
|
1247
|
+
ctx.buf += '\n'
|
|
1248
|
+
else:
|
|
1249
|
+
ctx.add_buf(' ')
|
|
1250
|
+
|
|
1251
|
+
ctx.add_origin_buf(c)
|
|
1252
|
+
self.progress_line(ctx)
|
|
1253
|
+
|
|
1254
|
+
def is_flow_mode(self) -> bool:
|
|
1255
|
+
if self.started_flow_sequence_num > 0:
|
|
1256
|
+
return True
|
|
1257
|
+
|
|
1258
|
+
if self.started_flow_map_num > 0:
|
|
1259
|
+
return True
|
|
1260
|
+
|
|
1261
|
+
return False
|
|
1262
|
+
|
|
1263
|
+
def scan_flow_map_start(self, ctx: YamlScanningContext) -> bool:
|
|
1264
|
+
if ctx.exists_buffer() and not self.is_flow_mode():
|
|
1265
|
+
return False
|
|
1266
|
+
|
|
1267
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1268
|
+
ctx.add_origin_buf('{')
|
|
1269
|
+
ctx.add_token(YamlTokenMakers.new_mapping_start(ctx.obuf, self.pos()))
|
|
1270
|
+
self.started_flow_map_num += 1
|
|
1271
|
+
self.progress_column(ctx, 1)
|
|
1272
|
+
ctx.clear()
|
|
1273
|
+
return True
|
|
1274
|
+
|
|
1275
|
+
def scan_flow_map_end(self, ctx: YamlScanningContext) -> bool:
|
|
1276
|
+
if self.started_flow_map_num <= 0:
|
|
1277
|
+
return False
|
|
1278
|
+
|
|
1279
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1280
|
+
ctx.add_origin_buf('}')
|
|
1281
|
+
ctx.add_token(YamlTokenMakers.new_mapping_end(ctx.obuf, self.pos()))
|
|
1282
|
+
self.started_flow_map_num -= 1
|
|
1283
|
+
self.progress_column(ctx, 1)
|
|
1284
|
+
ctx.clear()
|
|
1285
|
+
return True
|
|
1286
|
+
|
|
1287
|
+
def scan_flow_array_start(self, ctx: YamlScanningContext) -> bool:
|
|
1288
|
+
if ctx.exists_buffer() and not self.is_flow_mode():
|
|
1289
|
+
return False
|
|
1290
|
+
|
|
1291
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1292
|
+
ctx.add_origin_buf('[')
|
|
1293
|
+
ctx.add_token(YamlTokenMakers.new_sequence_start(ctx.obuf, self.pos()))
|
|
1294
|
+
self.started_flow_sequence_num += 1
|
|
1295
|
+
self.progress_column(ctx, 1)
|
|
1296
|
+
ctx.clear()
|
|
1297
|
+
return True
|
|
1298
|
+
|
|
1299
|
+
def scan_flow_array_end(self, ctx: YamlScanningContext) -> bool:
|
|
1300
|
+
if ctx.exists_buffer() and self.started_flow_sequence_num <= 0:
|
|
1301
|
+
return False
|
|
1302
|
+
|
|
1303
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1304
|
+
ctx.add_origin_buf(']')
|
|
1305
|
+
ctx.add_token(YamlTokenMakers.new_sequence_end(ctx.obuf, self.pos()))
|
|
1306
|
+
self.started_flow_sequence_num -= 1
|
|
1307
|
+
self.progress_column(ctx, 1)
|
|
1308
|
+
ctx.clear()
|
|
1309
|
+
return True
|
|
1310
|
+
|
|
1311
|
+
def scan_flow_entry(self, ctx: YamlScanningContext, c: str) -> bool:
|
|
1312
|
+
if self.started_flow_sequence_num <= 0 and self.started_flow_map_num <= 0:
|
|
1313
|
+
return False
|
|
1314
|
+
|
|
1315
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1316
|
+
ctx.add_origin_buf(c)
|
|
1317
|
+
ctx.add_token(YamlTokenMakers.new_collect_entry(ctx.obuf, self.pos()))
|
|
1318
|
+
self.progress_column(ctx, 1)
|
|
1319
|
+
ctx.clear()
|
|
1320
|
+
return True
|
|
1321
|
+
|
|
1322
|
+
def scan_map_delim(self, ctx: YamlScanningContext) -> YamlErrorOr[bool]:
|
|
1323
|
+
nc = ctx.next_char()
|
|
1324
|
+
if self.is_directive or self.is_anchor or self.is_alias:
|
|
1325
|
+
return False
|
|
1326
|
+
|
|
1327
|
+
if (
|
|
1328
|
+
self.started_flow_map_num <= 0 and
|
|
1329
|
+
nc != ' ' and
|
|
1330
|
+
nc != '\t' and
|
|
1331
|
+
not self.is_new_line_char(nc) and
|
|
1332
|
+
not ctx.is_next_eos()
|
|
1333
|
+
):
|
|
1334
|
+
return False
|
|
1335
|
+
|
|
1336
|
+
if self.started_flow_map_num > 0 and nc == '/':
|
|
1337
|
+
# like http://
|
|
1338
|
+
return False
|
|
1339
|
+
|
|
1340
|
+
if self.started_flow_map_num > 0:
|
|
1341
|
+
tk = ctx.last_token()
|
|
1342
|
+
if tk is not None and tk.type == YamlTokenType.MAPPING_VALUE:
|
|
1343
|
+
return False
|
|
1344
|
+
|
|
1345
|
+
if ctx.obuf.lstrip(' ').startswith('\t') and not ctx.buf.startswith('\t'):
|
|
1346
|
+
invalid_tk = YamlTokenMakers.new_invalid(
|
|
1347
|
+
yaml_error('tab character cannot use as a map key directly'),
|
|
1348
|
+
ctx.obuf,
|
|
1349
|
+
self.pos(),
|
|
1350
|
+
)
|
|
1351
|
+
self.progress_column(ctx, 1)
|
|
1352
|
+
return err_invalid_token(invalid_tk)
|
|
1353
|
+
|
|
1354
|
+
# mapping value
|
|
1355
|
+
tk = self.buffered_token(ctx)
|
|
1356
|
+
if tk is not None:
|
|
1357
|
+
self.last_delim_column = tk.position.column
|
|
1358
|
+
ctx.add_token(tk)
|
|
1359
|
+
|
|
1360
|
+
elif (tk := ctx.last_token()) is not None:
|
|
1361
|
+
# If the map key is quote, the buffer does not exist because it has already been cut into tokens.
|
|
1362
|
+
# Therefore, we need to check the last token.
|
|
1363
|
+
if tk.indicator == YamlIndicator.QUOTED_SCALAR:
|
|
1364
|
+
self.last_delim_column = tk.position.column
|
|
1365
|
+
|
|
1366
|
+
ctx.add_token(YamlTokenMakers.new_mapping_value(self.pos()))
|
|
1367
|
+
self.progress_column(ctx, 1)
|
|
1368
|
+
ctx.clear()
|
|
1369
|
+
return True
|
|
1370
|
+
|
|
1371
|
+
def scan_document_start(self, ctx: YamlScanningContext) -> bool:
|
|
1372
|
+
if self.indent_num != 0:
|
|
1373
|
+
return False
|
|
1374
|
+
|
|
1375
|
+
if self.column != 1:
|
|
1376
|
+
return False
|
|
1377
|
+
|
|
1378
|
+
if ctx.repeat_num('-') != 3:
|
|
1379
|
+
return False
|
|
1380
|
+
|
|
1381
|
+
if ctx.size > ctx.idx + 3:
|
|
1382
|
+
c = ctx.src[ctx.idx + 3]
|
|
1383
|
+
if c != ' ' and c != '\t' and c != '\n' and c != '\r':
|
|
1384
|
+
return False
|
|
1385
|
+
|
|
1386
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1387
|
+
ctx.add_token(YamlTokenMakers.new_document_header(ctx.obuf + '---', self.pos()))
|
|
1388
|
+
self.progress_column(ctx, 3)
|
|
1389
|
+
ctx.clear()
|
|
1390
|
+
self.clear_state()
|
|
1391
|
+
return True
|
|
1392
|
+
|
|
1393
|
+
def scan_document_end(self, ctx: YamlScanningContext) -> bool:
|
|
1394
|
+
if self.indent_num != 0:
|
|
1395
|
+
return False
|
|
1396
|
+
|
|
1397
|
+
if self.column != 1:
|
|
1398
|
+
return False
|
|
1399
|
+
|
|
1400
|
+
if ctx.repeat_num('.') != 3:
|
|
1401
|
+
return False
|
|
1402
|
+
|
|
1403
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1404
|
+
ctx.add_token(YamlTokenMakers.new_document_end(ctx.obuf + '...', self.pos()))
|
|
1405
|
+
self.progress_column(ctx, 3)
|
|
1406
|
+
ctx.clear()
|
|
1407
|
+
return True
|
|
1408
|
+
|
|
1409
|
+
def scan_merge_key(self, ctx: YamlScanningContext) -> bool:
|
|
1410
|
+
if not self.is_merge_key(ctx):
|
|
1411
|
+
return False
|
|
1412
|
+
|
|
1413
|
+
self.last_delim_column = self.column
|
|
1414
|
+
ctx.add_token(YamlTokenMakers.new_merge_key(ctx.obuf + '<<', self.pos()))
|
|
1415
|
+
self.progress_column(ctx, 2)
|
|
1416
|
+
ctx.clear()
|
|
1417
|
+
return True
|
|
1418
|
+
|
|
1419
|
+
def scan_raw_folded_char(self, ctx: YamlScanningContext) -> bool:
|
|
1420
|
+
if not ctx.exists_buffer():
|
|
1421
|
+
return False
|
|
1422
|
+
|
|
1423
|
+
if not self.is_changed_to_indent_state_up():
|
|
1424
|
+
return False
|
|
1425
|
+
|
|
1426
|
+
ctx.set_raw_folded(self.column)
|
|
1427
|
+
ctx.add_buf('-')
|
|
1428
|
+
ctx.add_origin_buf('-')
|
|
1429
|
+
self.progress_column(ctx, 1)
|
|
1430
|
+
return True
|
|
1431
|
+
|
|
1432
|
+
def scan_sequence(self, ctx: YamlScanningContext) -> YamlErrorOr[bool]:
|
|
1433
|
+
if ctx.exists_buffer():
|
|
1434
|
+
return False
|
|
1435
|
+
|
|
1436
|
+
nc = ctx.next_char()
|
|
1437
|
+
if nc != 0 and nc != ' ' and nc != '\t' and not self.is_new_line_char(nc):
|
|
1438
|
+
return False
|
|
1439
|
+
|
|
1440
|
+
if ctx.obuf.lstrip(' ').startswith('\t'):
|
|
1441
|
+
invalid_tk = YamlTokenMakers.new_invalid(
|
|
1442
|
+
yaml_error('tab character cannot use as a sequence delimiter'),
|
|
1443
|
+
ctx.obuf,
|
|
1444
|
+
self.pos(),
|
|
1445
|
+
)
|
|
1446
|
+
self.progress_column(ctx, 1)
|
|
1447
|
+
return err_invalid_token(invalid_tk)
|
|
1448
|
+
|
|
1449
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1450
|
+
ctx.add_origin_buf('-')
|
|
1451
|
+
tk = YamlTokenMakers.new_sequence_entry(ctx.obuf, self.pos())
|
|
1452
|
+
self.last_delim_column = tk.position.column
|
|
1453
|
+
ctx.add_token(tk)
|
|
1454
|
+
self.progress_column(ctx, 1)
|
|
1455
|
+
ctx.clear()
|
|
1456
|
+
return True
|
|
1457
|
+
|
|
1458
|
+
def scan_multi_line_header(self, ctx: YamlScanningContext) -> YamlErrorOr[bool]:
|
|
1459
|
+
if ctx.exists_buffer():
|
|
1460
|
+
return False
|
|
1461
|
+
|
|
1462
|
+
if (err := self.scan_multi_line_header_option(ctx)) is not None:
|
|
1463
|
+
return err
|
|
1464
|
+
|
|
1465
|
+
self.progress_line(ctx)
|
|
1466
|
+
return True
|
|
1467
|
+
|
|
1468
|
+
def validate_multi_line_header_option(self, opt: str) -> ta.Optional[YamlError]:
|
|
1469
|
+
if len(opt) == 0:
|
|
1470
|
+
return None
|
|
1471
|
+
|
|
1472
|
+
org_opt = opt
|
|
1473
|
+
opt = opt.lstrip('-')
|
|
1474
|
+
opt = opt.lstrip('+')
|
|
1475
|
+
opt = opt.rstrip('-')
|
|
1476
|
+
opt = opt.rstrip('+')
|
|
1477
|
+
if len(opt) == 0:
|
|
1478
|
+
return None
|
|
1479
|
+
|
|
1480
|
+
if opt == '0':
|
|
1481
|
+
return yaml_error(f'invalid header option: {org_opt!r}')
|
|
1482
|
+
|
|
1483
|
+
try:
|
|
1484
|
+
i = int(opt, 10)
|
|
1485
|
+
except ValueError:
|
|
1486
|
+
return yaml_error(f'invalid header option: {org_opt!r}')
|
|
1487
|
+
|
|
1488
|
+
if i > 9:
|
|
1489
|
+
return yaml_error(f'invalid header option: {org_opt!r}')
|
|
1490
|
+
|
|
1491
|
+
return None
|
|
1492
|
+
|
|
1493
|
+
def scan_multi_line_header_option(self, ctx: YamlScanningContext) -> ta.Optional[YamlError]:
|
|
1494
|
+
header = ctx.current_char()
|
|
1495
|
+
ctx.add_origin_buf(header)
|
|
1496
|
+
self.progress(ctx, 1) # skip '|' or '>' character
|
|
1497
|
+
|
|
1498
|
+
progress = 0
|
|
1499
|
+
for idx, c in enumerate(ctx.src[ctx.idx:]):
|
|
1500
|
+
progress = idx
|
|
1501
|
+
ctx.add_origin_buf(c)
|
|
1502
|
+
if self.is_new_line_char(c):
|
|
1503
|
+
break
|
|
1504
|
+
|
|
1505
|
+
value = ctx.source(ctx.idx, ctx.idx + progress).rstrip(' ')
|
|
1506
|
+
comment_value_index = value.find('#')
|
|
1507
|
+
opt = value
|
|
1508
|
+
if comment_value_index > 0:
|
|
1509
|
+
opt = value[:comment_value_index]
|
|
1510
|
+
|
|
1511
|
+
opt = trim_right_func(opt, lambda r: r == ' ' or r == '\t')
|
|
1512
|
+
|
|
1513
|
+
if len(opt) != 0:
|
|
1514
|
+
if (err := self.validate_multi_line_header_option(opt)) is not None:
|
|
1515
|
+
invalid_tk = YamlTokenMakers.new_invalid(yaml_error(str(err)), ctx.obuf, self.pos())
|
|
1516
|
+
self.progress_column(ctx, progress)
|
|
1517
|
+
return err_invalid_token(invalid_tk)
|
|
1518
|
+
|
|
1519
|
+
if self.column == 1:
|
|
1520
|
+
self.last_delim_column = 1
|
|
1521
|
+
|
|
1522
|
+
comment_index = ctx.obuf.find('#')
|
|
1523
|
+
header_buf = ctx.obuf
|
|
1524
|
+
if comment_index > 0:
|
|
1525
|
+
header_buf = header_buf[:comment_index]
|
|
1526
|
+
|
|
1527
|
+
if header == '|':
|
|
1528
|
+
ctx.add_token(YamlTokenMakers.new_literal('|' + opt, header_buf, self.pos()))
|
|
1529
|
+
ctx.set_literal(self.last_delim_column, opt)
|
|
1530
|
+
elif header == '>':
|
|
1531
|
+
ctx.add_token(YamlTokenMakers.new_folded('>' + opt, header_buf, self.pos()))
|
|
1532
|
+
ctx.set_folded(self.last_delim_column, opt)
|
|
1533
|
+
|
|
1534
|
+
if comment_index > 0:
|
|
1535
|
+
comment = value[comment_value_index + 1:]
|
|
1536
|
+
self.offset += len(header_buf)
|
|
1537
|
+
self.column += len(header_buf)
|
|
1538
|
+
ctx.add_token(YamlTokenMakers.new_comment(comment, ctx.obuf[len(header_buf):], self.pos()))
|
|
1539
|
+
|
|
1540
|
+
self.indent_state = YamlIndentState.KEEP
|
|
1541
|
+
ctx.reset_buffer()
|
|
1542
|
+
self.progress_column(ctx, progress)
|
|
1543
|
+
return None
|
|
1544
|
+
|
|
1545
|
+
def scan_map_key(self, ctx: YamlScanningContext) -> bool:
|
|
1546
|
+
if ctx.exists_buffer():
|
|
1547
|
+
return False
|
|
1548
|
+
|
|
1549
|
+
nc = ctx.next_char()
|
|
1550
|
+
if nc != ' ' and nc != '\t':
|
|
1551
|
+
return False
|
|
1552
|
+
|
|
1553
|
+
tk = YamlTokenMakers.new_mapping_key(self.pos())
|
|
1554
|
+
self.last_delim_column = tk.position.column
|
|
1555
|
+
ctx.add_token(tk)
|
|
1556
|
+
self.progress_column(ctx, 1)
|
|
1557
|
+
ctx.clear()
|
|
1558
|
+
return True
|
|
1559
|
+
|
|
1560
|
+
def scan_directive(self, ctx: YamlScanningContext) -> bool:
|
|
1561
|
+
if ctx.exists_buffer():
|
|
1562
|
+
return False
|
|
1563
|
+
if self.indent_num != 0:
|
|
1564
|
+
return False
|
|
1565
|
+
|
|
1566
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1567
|
+
ctx.add_origin_buf('%')
|
|
1568
|
+
ctx.add_token(YamlTokenMakers.new_directive(ctx.obuf, self.pos()))
|
|
1569
|
+
self.progress_column(ctx, 1)
|
|
1570
|
+
ctx.clear()
|
|
1571
|
+
self.is_directive = True
|
|
1572
|
+
return True
|
|
1573
|
+
|
|
1574
|
+
def scan_anchor(self, ctx: YamlScanningContext) -> bool:
|
|
1575
|
+
if ctx.exists_buffer():
|
|
1576
|
+
return False
|
|
1577
|
+
|
|
1578
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1579
|
+
ctx.add_origin_buf('&')
|
|
1580
|
+
ctx.add_token(YamlTokenMakers.new_anchor(ctx.obuf, self.pos()))
|
|
1581
|
+
self.progress_column(ctx, 1)
|
|
1582
|
+
self.is_anchor = True
|
|
1583
|
+
ctx.clear()
|
|
1584
|
+
return True
|
|
1585
|
+
|
|
1586
|
+
def scan_alias(self, ctx: YamlScanningContext) -> bool:
|
|
1587
|
+
if ctx.exists_buffer():
|
|
1588
|
+
return False
|
|
1589
|
+
|
|
1590
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1591
|
+
ctx.add_origin_buf('*')
|
|
1592
|
+
ctx.add_token(YamlTokenMakers.new_alias(ctx.obuf, self.pos()))
|
|
1593
|
+
self.progress_column(ctx, 1)
|
|
1594
|
+
self.is_alias = True
|
|
1595
|
+
ctx.clear()
|
|
1596
|
+
return True
|
|
1597
|
+
|
|
1598
|
+
def scan_reserved_char(self, ctx: YamlScanningContext, c: str) -> ta.Optional[YamlError]:
|
|
1599
|
+
if ctx.exists_buffer():
|
|
1600
|
+
return None
|
|
1601
|
+
|
|
1602
|
+
ctx.add_buf(c)
|
|
1603
|
+
ctx.add_origin_buf(c)
|
|
1604
|
+
err = err_invalid_token(
|
|
1605
|
+
YamlTokenMakers.new_invalid(
|
|
1606
|
+
yaml_error(f'{c!r} is a reserved character'),
|
|
1607
|
+
ctx.obuf,
|
|
1608
|
+
self.pos(),
|
|
1609
|
+
),
|
|
1610
|
+
)
|
|
1611
|
+
self.progress_column(ctx, 1)
|
|
1612
|
+
ctx.clear()
|
|
1613
|
+
return err
|
|
1614
|
+
|
|
1615
|
+
def scan_tab(self, ctx: YamlScanningContext, c: str) -> ta.Optional[YamlError]:
|
|
1616
|
+
if self.started_flow_sequence_num > 0 or self.started_flow_map_num > 0:
|
|
1617
|
+
# tabs character is allowed in flow mode.
|
|
1618
|
+
return None
|
|
1619
|
+
|
|
1620
|
+
if not self.is_first_char_at_line:
|
|
1621
|
+
return None
|
|
1622
|
+
|
|
1623
|
+
ctx.add_buf(c)
|
|
1624
|
+
ctx.add_origin_buf(c)
|
|
1625
|
+
err = err_invalid_token(
|
|
1626
|
+
YamlTokenMakers.new_invalid(
|
|
1627
|
+
yaml_error("found character '\t' that cannot start any token"),
|
|
1628
|
+
ctx.obuf,
|
|
1629
|
+
self.pos(),
|
|
1630
|
+
),
|
|
1631
|
+
)
|
|
1632
|
+
self.progress_column(ctx, 1)
|
|
1633
|
+
ctx.clear()
|
|
1634
|
+
return err
|
|
1635
|
+
|
|
1636
|
+
def _scan(self, ctx: YamlScanningContext) -> ta.Optional[YamlError]:
|
|
1637
|
+
while ctx.next():
|
|
1638
|
+
c = ctx.current_char()
|
|
1639
|
+
# First, change the IndentState.
|
|
1640
|
+
# If the target character is the first character in a line, IndentState is Up/Down/Equal state.
|
|
1641
|
+
# The second and subsequent letters are Keep.
|
|
1642
|
+
self.update_indent(ctx, c)
|
|
1643
|
+
|
|
1644
|
+
# If IndentState is down, tokens are split, so the buffer accumulated until that point needs to be cutted as
|
|
1645
|
+
# a token.
|
|
1646
|
+
if self.is_changed_to_indent_state_down():
|
|
1647
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1648
|
+
|
|
1649
|
+
if ctx.is_multi_line():
|
|
1650
|
+
if self.is_changed_to_indent_state_down():
|
|
1651
|
+
if (tk := ctx.last_token()) is not None:
|
|
1652
|
+
# If literal/folded content is empty, no string token is added.
|
|
1653
|
+
# Therefore, add an empty string token.
|
|
1654
|
+
# But if literal/folded token column is 1, it is invalid at down state.
|
|
1655
|
+
if tk.position.column == 1:
|
|
1656
|
+
return yaml_error(err_invalid_token(
|
|
1657
|
+
YamlTokenMakers.new_invalid(
|
|
1658
|
+
yaml_error('could not find multi-line content'),
|
|
1659
|
+
ctx.obuf,
|
|
1660
|
+
self.pos(),
|
|
1661
|
+
),
|
|
1662
|
+
))
|
|
1663
|
+
|
|
1664
|
+
if tk.type != YamlTokenType.STRING:
|
|
1665
|
+
ctx.add_token(YamlTokenMakers.new_string('', '', self.pos()))
|
|
1666
|
+
|
|
1667
|
+
self.break_multi_line(ctx)
|
|
1668
|
+
|
|
1669
|
+
else:
|
|
1670
|
+
if (err := self.scan_multi_line(ctx, c)) is not None:
|
|
1671
|
+
return err
|
|
1672
|
+
|
|
1673
|
+
continue
|
|
1674
|
+
|
|
1675
|
+
if c == '{':
|
|
1676
|
+
if self.scan_flow_map_start(ctx):
|
|
1677
|
+
continue
|
|
1678
|
+
|
|
1679
|
+
elif c == '}':
|
|
1680
|
+
if self.scan_flow_map_end(ctx):
|
|
1681
|
+
continue
|
|
1682
|
+
|
|
1683
|
+
elif c == '.':
|
|
1684
|
+
if self.scan_document_end(ctx):
|
|
1685
|
+
continue
|
|
1686
|
+
|
|
1687
|
+
elif c == '<':
|
|
1688
|
+
if self.scan_merge_key(ctx):
|
|
1689
|
+
continue
|
|
1690
|
+
|
|
1691
|
+
elif c == '-':
|
|
1692
|
+
if self.scan_document_start(ctx):
|
|
1693
|
+
continue
|
|
1694
|
+
|
|
1695
|
+
if self.scan_raw_folded_char(ctx):
|
|
1696
|
+
continue
|
|
1697
|
+
|
|
1698
|
+
scanned = self.scan_sequence(ctx)
|
|
1699
|
+
if isinstance(scanned, YamlError):
|
|
1700
|
+
return scanned
|
|
1701
|
+
|
|
1702
|
+
if scanned:
|
|
1703
|
+
continue
|
|
1704
|
+
|
|
1705
|
+
elif c == '[':
|
|
1706
|
+
if self.scan_flow_array_start(ctx):
|
|
1707
|
+
continue
|
|
1708
|
+
|
|
1709
|
+
elif c == ']':
|
|
1710
|
+
if self.scan_flow_array_end(ctx):
|
|
1711
|
+
continue
|
|
1712
|
+
|
|
1713
|
+
elif c == ',':
|
|
1714
|
+
if self.scan_flow_entry(ctx, c):
|
|
1715
|
+
continue
|
|
1716
|
+
|
|
1717
|
+
elif c == ':':
|
|
1718
|
+
scanned = self.scan_map_delim(ctx)
|
|
1719
|
+
if isinstance(scanned, YamlError):
|
|
1720
|
+
return scanned
|
|
1721
|
+
|
|
1722
|
+
if scanned:
|
|
1723
|
+
continue
|
|
1724
|
+
|
|
1725
|
+
elif c in ('|', '>'):
|
|
1726
|
+
scanned = self.scan_multi_line_header(ctx)
|
|
1727
|
+
if isinstance(scanned, YamlError):
|
|
1728
|
+
return scanned
|
|
1729
|
+
|
|
1730
|
+
if scanned:
|
|
1731
|
+
continue
|
|
1732
|
+
|
|
1733
|
+
elif c == '!':
|
|
1734
|
+
scanned = self.scan_tag(ctx)
|
|
1735
|
+
if isinstance(scanned, YamlError):
|
|
1736
|
+
return scanned
|
|
1737
|
+
|
|
1738
|
+
if scanned:
|
|
1739
|
+
continue
|
|
1740
|
+
|
|
1741
|
+
elif c == '%':
|
|
1742
|
+
if self.scan_directive(ctx):
|
|
1743
|
+
continue
|
|
1744
|
+
|
|
1745
|
+
elif c == '?':
|
|
1746
|
+
if self.scan_map_key(ctx):
|
|
1747
|
+
continue
|
|
1748
|
+
|
|
1749
|
+
elif c == '&':
|
|
1750
|
+
if self.scan_anchor(ctx):
|
|
1751
|
+
continue
|
|
1752
|
+
|
|
1753
|
+
elif c == '*':
|
|
1754
|
+
if self.scan_alias(ctx):
|
|
1755
|
+
continue
|
|
1756
|
+
|
|
1757
|
+
elif c == '#':
|
|
1758
|
+
if self.scan_comment(ctx):
|
|
1759
|
+
continue
|
|
1760
|
+
|
|
1761
|
+
elif c in ("'", '"'):
|
|
1762
|
+
scanned = self.scan_quote(ctx, c)
|
|
1763
|
+
if isinstance(scanned, YamlError):
|
|
1764
|
+
return scanned
|
|
1765
|
+
|
|
1766
|
+
if scanned:
|
|
1767
|
+
continue
|
|
1768
|
+
|
|
1769
|
+
elif c in ('\r', '\n'):
|
|
1770
|
+
self.scan_new_line(ctx, c)
|
|
1771
|
+
continue
|
|
1772
|
+
|
|
1773
|
+
elif c == ' ':
|
|
1774
|
+
if self.scan_white_space(ctx):
|
|
1775
|
+
continue
|
|
1776
|
+
|
|
1777
|
+
elif c in ('@', '`'):
|
|
1778
|
+
if (err := self.scan_reserved_char(ctx, c)) is not None:
|
|
1779
|
+
return err
|
|
1780
|
+
|
|
1781
|
+
elif c == '\t':
|
|
1782
|
+
if ctx.exists_buffer() and self.last_delim_column == 0:
|
|
1783
|
+
# tab indent for plain text (yaml-test-suite's spec-example-7-12-plain-lines).
|
|
1784
|
+
self.indent_num += 1
|
|
1785
|
+
ctx.add_origin_buf(c)
|
|
1786
|
+
self.progress_only(ctx, 1)
|
|
1787
|
+
continue
|
|
1788
|
+
|
|
1789
|
+
if self.last_delim_column < self.column:
|
|
1790
|
+
self.indent_num += 1
|
|
1791
|
+
ctx.add_origin_buf(c)
|
|
1792
|
+
self.progress_only(ctx, 1)
|
|
1793
|
+
continue
|
|
1794
|
+
|
|
1795
|
+
if (err := self.scan_tab(ctx, c)) is not None:
|
|
1796
|
+
return err
|
|
1797
|
+
|
|
1798
|
+
ctx.add_buf(c)
|
|
1799
|
+
ctx.add_origin_buf(c)
|
|
1800
|
+
self.progress_column(ctx, 1)
|
|
1801
|
+
|
|
1802
|
+
self.add_buffered_token_if_exists(ctx)
|
|
1803
|
+
return None
|
|
1804
|
+
|
|
1805
|
+
# init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src.
|
|
1806
|
+
def init(self, text: str) -> None:
|
|
1807
|
+
src = text
|
|
1808
|
+
self.source = src
|
|
1809
|
+
self.source_pos = 0
|
|
1810
|
+
self.source_size = len(src)
|
|
1811
|
+
self.line = 1
|
|
1812
|
+
self.column = 1
|
|
1813
|
+
self.offset = 1
|
|
1814
|
+
self.is_first_char_at_line = True
|
|
1815
|
+
self.clear_state()
|
|
1816
|
+
|
|
1817
|
+
def clear_state(self) -> None:
|
|
1818
|
+
self.prev_line_indent_num = 0
|
|
1819
|
+
self.last_delim_column = 0
|
|
1820
|
+
self.indent_level = 0
|
|
1821
|
+
self.indent_num = 0
|
|
1822
|
+
|
|
1823
|
+
# scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
|
|
1824
|
+
def scan(self) -> ta.Tuple[ta.Optional[YamlTokens], ta.Optional[YamlError]]:
|
|
1825
|
+
if self.source_pos >= self.source_size:
|
|
1826
|
+
return None, EofYamlError()
|
|
1827
|
+
|
|
1828
|
+
ctx = YamlScanningContext.new(self.source[self.source_pos:])
|
|
1829
|
+
|
|
1830
|
+
lst = YamlTokens()
|
|
1831
|
+
err = self._scan(ctx)
|
|
1832
|
+
lst.extend(ctx.tokens)
|
|
1833
|
+
|
|
1834
|
+
if err is not None:
|
|
1835
|
+
# var invalidTokenErr *InvalidTokenError
|
|
1836
|
+
# if errors.As(err, &invalidTokenErr):
|
|
1837
|
+
# lst = append(lst, invalidTokenErr.Token)
|
|
1838
|
+
return lst, err
|
|
1839
|
+
|
|
1840
|
+
return lst, None
|
|
1841
|
+
|
|
1842
|
+
|
|
1843
|
+
# Tokenize split to token instances from string
|
|
1844
|
+
def yaml_tokenize(src: str) -> YamlTokens:
|
|
1845
|
+
s = YamlScanner()
|
|
1846
|
+
s.init(src)
|
|
1847
|
+
|
|
1848
|
+
tks = YamlTokens()
|
|
1849
|
+
while True:
|
|
1850
|
+
sub_tokens, err = s.scan()
|
|
1851
|
+
if isinstance(err, EofYamlError):
|
|
1852
|
+
break
|
|
1853
|
+
|
|
1854
|
+
tks.add(*check.not_none(sub_tokens))
|
|
1855
|
+
|
|
1856
|
+
return tks
|
|
1857
|
+
|
|
1858
|
+
|
|
1859
|
+
##
|
|
1860
|
+
|
|
1861
|
+
|
|
1862
|
+
def hex_to_int(s: str) -> int:
|
|
1863
|
+
if len(s) != 1:
|
|
1864
|
+
raise ValueError(s)
|
|
1865
|
+
b = s[0]
|
|
1866
|
+
if 'A' <= b <= 'F':
|
|
1867
|
+
return ord(b) - ord('A') + 10
|
|
1868
|
+
if 'a' <= b <= 'f':
|
|
1869
|
+
return ord(b) - ord('a') + 10
|
|
1870
|
+
return ord(b) - ord('0')
|
|
1871
|
+
|
|
1872
|
+
|
|
1873
|
+
def hex_runes_to_int(b: str) -> int:
|
|
1874
|
+
n = 0
|
|
1875
|
+
for i in range(len(b)):
|
|
1876
|
+
n += hex_to_int(b[i]) << ((len(b) - i - 1) * 4)
|
|
1877
|
+
return n
|
|
1878
|
+
|
|
1879
|
+
|
|
1880
|
+
def trim_right_func(s: str, predicate: ta.Callable[[str], bool]) -> str:
|
|
1881
|
+
if not s:
|
|
1882
|
+
return s
|
|
1883
|
+
|
|
1884
|
+
i = len(s) - 1
|
|
1885
|
+
while i >= 0 and predicate(s[i]):
|
|
1886
|
+
i -= 1
|
|
1887
|
+
|
|
1888
|
+
return s[:i + 1]
|