outerbounds 0.3.55rc3__py3-none-any.whl → 0.3.133__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. outerbounds/_vendor/PyYAML.LICENSE +20 -0
  2. outerbounds/_vendor/__init__.py +0 -0
  3. outerbounds/_vendor/_yaml/__init__.py +34 -0
  4. outerbounds/_vendor/click/__init__.py +73 -0
  5. outerbounds/_vendor/click/_compat.py +626 -0
  6. outerbounds/_vendor/click/_termui_impl.py +717 -0
  7. outerbounds/_vendor/click/_textwrap.py +49 -0
  8. outerbounds/_vendor/click/_winconsole.py +279 -0
  9. outerbounds/_vendor/click/core.py +2998 -0
  10. outerbounds/_vendor/click/decorators.py +497 -0
  11. outerbounds/_vendor/click/exceptions.py +287 -0
  12. outerbounds/_vendor/click/formatting.py +301 -0
  13. outerbounds/_vendor/click/globals.py +68 -0
  14. outerbounds/_vendor/click/parser.py +529 -0
  15. outerbounds/_vendor/click/py.typed +0 -0
  16. outerbounds/_vendor/click/shell_completion.py +580 -0
  17. outerbounds/_vendor/click/termui.py +787 -0
  18. outerbounds/_vendor/click/testing.py +479 -0
  19. outerbounds/_vendor/click/types.py +1073 -0
  20. outerbounds/_vendor/click/utils.py +580 -0
  21. outerbounds/_vendor/click.LICENSE +28 -0
  22. outerbounds/_vendor/vendor_any.txt +2 -0
  23. outerbounds/_vendor/yaml/__init__.py +471 -0
  24. outerbounds/_vendor/yaml/_yaml.cpython-311-darwin.so +0 -0
  25. outerbounds/_vendor/yaml/composer.py +146 -0
  26. outerbounds/_vendor/yaml/constructor.py +862 -0
  27. outerbounds/_vendor/yaml/cyaml.py +177 -0
  28. outerbounds/_vendor/yaml/dumper.py +138 -0
  29. outerbounds/_vendor/yaml/emitter.py +1239 -0
  30. outerbounds/_vendor/yaml/error.py +94 -0
  31. outerbounds/_vendor/yaml/events.py +104 -0
  32. outerbounds/_vendor/yaml/loader.py +62 -0
  33. outerbounds/_vendor/yaml/nodes.py +51 -0
  34. outerbounds/_vendor/yaml/parser.py +629 -0
  35. outerbounds/_vendor/yaml/reader.py +208 -0
  36. outerbounds/_vendor/yaml/representer.py +378 -0
  37. outerbounds/_vendor/yaml/resolver.py +245 -0
  38. outerbounds/_vendor/yaml/scanner.py +1555 -0
  39. outerbounds/_vendor/yaml/serializer.py +127 -0
  40. outerbounds/_vendor/yaml/tokens.py +129 -0
  41. outerbounds/command_groups/apps_cli.py +450 -0
  42. outerbounds/command_groups/cli.py +9 -5
  43. outerbounds/command_groups/local_setup_cli.py +249 -33
  44. outerbounds/command_groups/perimeters_cli.py +231 -33
  45. outerbounds/command_groups/tutorials_cli.py +111 -0
  46. outerbounds/command_groups/workstations_cli.py +88 -15
  47. outerbounds/utils/kubeconfig.py +2 -2
  48. outerbounds/utils/metaflowconfig.py +111 -21
  49. outerbounds/utils/schema.py +8 -2
  50. outerbounds/utils/utils.py +19 -0
  51. outerbounds/vendor.py +159 -0
  52. {outerbounds-0.3.55rc3.dist-info → outerbounds-0.3.133.dist-info}/METADATA +17 -6
  53. outerbounds-0.3.133.dist-info/RECORD +59 -0
  54. {outerbounds-0.3.55rc3.dist-info → outerbounds-0.3.133.dist-info}/WHEEL +1 -1
  55. outerbounds-0.3.55rc3.dist-info/RECORD +0 -15
  56. {outerbounds-0.3.55rc3.dist-info → outerbounds-0.3.133.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1555 @@
1
+ # Scanner produces tokens of the following types:
2
+ # STREAM-START
3
+ # STREAM-END
4
+ # DIRECTIVE(name, value)
5
+ # DOCUMENT-START
6
+ # DOCUMENT-END
7
+ # BLOCK-SEQUENCE-START
8
+ # BLOCK-MAPPING-START
9
+ # BLOCK-END
10
+ # FLOW-SEQUENCE-START
11
+ # FLOW-MAPPING-START
12
+ # FLOW-SEQUENCE-END
13
+ # FLOW-MAPPING-END
14
+ # BLOCK-ENTRY
15
+ # FLOW-ENTRY
16
+ # KEY
17
+ # VALUE
18
+ # ALIAS(value)
19
+ # ANCHOR(value)
20
+ # TAG(value)
21
+ # SCALAR(value, plain, style)
22
+ #
23
+ # Read comments in the Scanner code for more details.
24
+ #
25
+
26
+ __all__ = ["Scanner", "ScannerError"]
27
+
28
+ from .error import MarkedYAMLError
29
+ from .tokens import *
30
+
31
+
32
+ class ScannerError(MarkedYAMLError):
33
+ pass
34
+
35
+
36
+ class SimpleKey:
37
+ # See below simple keys treatment.
38
+
39
+ def __init__(self, token_number, required, index, line, column, mark):
40
+ self.token_number = token_number
41
+ self.required = required
42
+ self.index = index
43
+ self.line = line
44
+ self.column = column
45
+ self.mark = mark
46
+
47
+
48
+ class Scanner:
49
+ def __init__(self):
50
+ """Initialize the scanner."""
51
+ # It is assumed that Scanner and Reader will have a common descendant.
52
+ # Reader do the dirty work of checking for BOM and converting the
53
+ # input data to Unicode. It also adds NUL to the end.
54
+ #
55
+ # Reader supports the following methods
56
+ # self.peek(i=0) # peek the next i-th character
57
+ # self.prefix(l=1) # peek the next l characters
58
+ # self.forward(l=1) # read the next l characters and move the pointer.
59
+
60
+ # Had we reached the end of the stream?
61
+ self.done = False
62
+
63
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
64
+ # context.
65
+ self.flow_level = 0
66
+
67
+ # List of processed tokens that are not yet emitted.
68
+ self.tokens = []
69
+
70
+ # Add the STREAM-START token.
71
+ self.fetch_stream_start()
72
+
73
+ # Number of tokens that were emitted through the `get_token` method.
74
+ self.tokens_taken = 0
75
+
76
+ # The current indentation level.
77
+ self.indent = -1
78
+
79
+ # Past indentation levels.
80
+ self.indents = []
81
+
82
+ # Variables related to simple keys treatment.
83
+
84
+ # A simple key is a key that is not denoted by the '?' indicator.
85
+ # Example of simple keys:
86
+ # ---
87
+ # block simple key: value
88
+ # ? not a simple key:
89
+ # : { flow simple key: value }
90
+ # We emit the KEY token before all keys, so when we find a potential
91
+ # simple key, we try to locate the corresponding ':' indicator.
92
+ # Simple keys should be limited to a single line and 1024 characters.
93
+
94
+ # Can a simple key start at the current position? A simple key may
95
+ # start:
96
+ # - at the beginning of the line, not counting indentation spaces
97
+ # (in block context),
98
+ # - after '{', '[', ',' (in the flow context),
99
+ # - after '?', ':', '-' (in the block context).
100
+ # In the block context, this flag also signifies if a block collection
101
+ # may start at the current position.
102
+ self.allow_simple_key = True
103
+
104
+ # Keep track of possible simple keys. This is a dictionary. The key
105
+ # is `flow_level`; there can be no more that one possible simple key
106
+ # for each level. The value is a SimpleKey record:
107
+ # (token_number, required, index, line, column, mark)
108
+ # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
109
+ # '[', or '{' tokens.
110
+ self.possible_simple_keys = {}
111
+
112
+ # Public methods.
113
+
114
+ def check_token(self, *choices):
115
+ # Check if the next token is one of the given types.
116
+ while self.need_more_tokens():
117
+ self.fetch_more_tokens()
118
+ if self.tokens:
119
+ if not choices:
120
+ return True
121
+ for choice in choices:
122
+ if isinstance(self.tokens[0], choice):
123
+ return True
124
+ return False
125
+
126
+ def peek_token(self):
127
+ # Return the next token, but do not delete if from the queue.
128
+ # Return None if no more tokens.
129
+ while self.need_more_tokens():
130
+ self.fetch_more_tokens()
131
+ if self.tokens:
132
+ return self.tokens[0]
133
+ else:
134
+ return None
135
+
136
+ def get_token(self):
137
+ # Return the next token.
138
+ while self.need_more_tokens():
139
+ self.fetch_more_tokens()
140
+ if self.tokens:
141
+ self.tokens_taken += 1
142
+ return self.tokens.pop(0)
143
+
144
+ # Private methods.
145
+
146
+ def need_more_tokens(self):
147
+ if self.done:
148
+ return False
149
+ if not self.tokens:
150
+ return True
151
+ # The current token may be a potential simple key, so we
152
+ # need to look further.
153
+ self.stale_possible_simple_keys()
154
+ if self.next_possible_simple_key() == self.tokens_taken:
155
+ return True
156
+
157
+ def fetch_more_tokens(self):
158
+
159
+ # Eat whitespaces and comments until we reach the next token.
160
+ self.scan_to_next_token()
161
+
162
+ # Remove obsolete possible simple keys.
163
+ self.stale_possible_simple_keys()
164
+
165
+ # Compare the current indentation and column. It may add some tokens
166
+ # and decrease the current indentation level.
167
+ self.unwind_indent(self.column)
168
+
169
+ # Peek the next character.
170
+ ch = self.peek()
171
+
172
+ # Is it the end of stream?
173
+ if ch == "\0":
174
+ return self.fetch_stream_end()
175
+
176
+ # Is it a directive?
177
+ if ch == "%" and self.check_directive():
178
+ return self.fetch_directive()
179
+
180
+ # Is it the document start?
181
+ if ch == "-" and self.check_document_start():
182
+ return self.fetch_document_start()
183
+
184
+ # Is it the document end?
185
+ if ch == "." and self.check_document_end():
186
+ return self.fetch_document_end()
187
+
188
+ # TODO: support for BOM within a stream.
189
+ # if ch == '\uFEFF':
190
+ # return self.fetch_bom() <-- issue BOMToken
191
+
192
+ # Note: the order of the following checks is NOT significant.
193
+
194
+ # Is it the flow sequence start indicator?
195
+ if ch == "[":
196
+ return self.fetch_flow_sequence_start()
197
+
198
+ # Is it the flow mapping start indicator?
199
+ if ch == "{":
200
+ return self.fetch_flow_mapping_start()
201
+
202
+ # Is it the flow sequence end indicator?
203
+ if ch == "]":
204
+ return self.fetch_flow_sequence_end()
205
+
206
+ # Is it the flow mapping end indicator?
207
+ if ch == "}":
208
+ return self.fetch_flow_mapping_end()
209
+
210
+ # Is it the flow entry indicator?
211
+ if ch == ",":
212
+ return self.fetch_flow_entry()
213
+
214
+ # Is it the block entry indicator?
215
+ if ch == "-" and self.check_block_entry():
216
+ return self.fetch_block_entry()
217
+
218
+ # Is it the key indicator?
219
+ if ch == "?" and self.check_key():
220
+ return self.fetch_key()
221
+
222
+ # Is it the value indicator?
223
+ if ch == ":" and self.check_value():
224
+ return self.fetch_value()
225
+
226
+ # Is it an alias?
227
+ if ch == "*":
228
+ return self.fetch_alias()
229
+
230
+ # Is it an anchor?
231
+ if ch == "&":
232
+ return self.fetch_anchor()
233
+
234
+ # Is it a tag?
235
+ if ch == "!":
236
+ return self.fetch_tag()
237
+
238
+ # Is it a literal scalar?
239
+ if ch == "|" and not self.flow_level:
240
+ return self.fetch_literal()
241
+
242
+ # Is it a folded scalar?
243
+ if ch == ">" and not self.flow_level:
244
+ return self.fetch_folded()
245
+
246
+ # Is it a single quoted scalar?
247
+ if ch == "'":
248
+ return self.fetch_single()
249
+
250
+ # Is it a double quoted scalar?
251
+ if ch == '"':
252
+ return self.fetch_double()
253
+
254
+ # It must be a plain scalar then.
255
+ if self.check_plain():
256
+ return self.fetch_plain()
257
+
258
+ # No? It's an error. Let's produce a nice error message.
259
+ raise ScannerError(
260
+ "while scanning for the next token",
261
+ None,
262
+ "found character %r that cannot start any token" % ch,
263
+ self.get_mark(),
264
+ )
265
+
266
+ # Simple keys treatment.
267
+
268
+ def next_possible_simple_key(self):
269
+ # Return the number of the nearest possible simple key. Actually we
270
+ # don't need to loop through the whole dictionary. We may replace it
271
+ # with the following code:
272
+ # if not self.possible_simple_keys:
273
+ # return None
274
+ # return self.possible_simple_keys[
275
+ # min(self.possible_simple_keys.keys())].token_number
276
+ min_token_number = None
277
+ for level in self.possible_simple_keys:
278
+ key = self.possible_simple_keys[level]
279
+ if min_token_number is None or key.token_number < min_token_number:
280
+ min_token_number = key.token_number
281
+ return min_token_number
282
+
283
+ def stale_possible_simple_keys(self):
284
+ # Remove entries that are no longer possible simple keys. According to
285
+ # the YAML specification, simple keys
286
+ # - should be limited to a single line,
287
+ # - should be no longer than 1024 characters.
288
+ # Disabling this procedure will allow simple keys of any length and
289
+ # height (may cause problems if indentation is broken though).
290
+ for level in list(self.possible_simple_keys):
291
+ key = self.possible_simple_keys[level]
292
+ if key.line != self.line or self.index - key.index > 1024:
293
+ if key.required:
294
+ raise ScannerError(
295
+ "while scanning a simple key",
296
+ key.mark,
297
+ "could not find expected ':'",
298
+ self.get_mark(),
299
+ )
300
+ del self.possible_simple_keys[level]
301
+
302
+ def save_possible_simple_key(self):
303
+ # The next token may start a simple key. We check if it's possible
304
+ # and save its position. This function is called for
305
+ # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
306
+
307
+ # Check if a simple key is required at the current position.
308
+ required = not self.flow_level and self.indent == self.column
309
+
310
+ # The next token might be a simple key. Let's save it's number and
311
+ # position.
312
+ if self.allow_simple_key:
313
+ self.remove_possible_simple_key()
314
+ token_number = self.tokens_taken + len(self.tokens)
315
+ key = SimpleKey(
316
+ token_number,
317
+ required,
318
+ self.index,
319
+ self.line,
320
+ self.column,
321
+ self.get_mark(),
322
+ )
323
+ self.possible_simple_keys[self.flow_level] = key
324
+
325
+ def remove_possible_simple_key(self):
326
+ # Remove the saved possible key position at the current flow level.
327
+ if self.flow_level in self.possible_simple_keys:
328
+ key = self.possible_simple_keys[self.flow_level]
329
+
330
+ if key.required:
331
+ raise ScannerError(
332
+ "while scanning a simple key",
333
+ key.mark,
334
+ "could not find expected ':'",
335
+ self.get_mark(),
336
+ )
337
+
338
+ del self.possible_simple_keys[self.flow_level]
339
+
340
+ # Indentation functions.
341
+
342
+ def unwind_indent(self, column):
343
+
344
+ ## In flow context, tokens should respect indentation.
345
+ ## Actually the condition should be `self.indent >= column` according to
346
+ ## the spec. But this condition will prohibit intuitively correct
347
+ ## constructions such as
348
+ ## key : {
349
+ ## }
350
+ # if self.flow_level and self.indent > column:
351
+ # raise ScannerError(None, None,
352
+ # "invalid indentation or unclosed '[' or '{'",
353
+ # self.get_mark())
354
+
355
+ # In the flow context, indentation is ignored. We make the scanner less
356
+ # restrictive then specification requires.
357
+ if self.flow_level:
358
+ return
359
+
360
+ # In block context, we may need to issue the BLOCK-END tokens.
361
+ while self.indent > column:
362
+ mark = self.get_mark()
363
+ self.indent = self.indents.pop()
364
+ self.tokens.append(BlockEndToken(mark, mark))
365
+
366
+ def add_indent(self, column):
367
+ # Check if we need to increase indentation.
368
+ if self.indent < column:
369
+ self.indents.append(self.indent)
370
+ self.indent = column
371
+ return True
372
+ return False
373
+
374
+ # Fetchers.
375
+
376
+ def fetch_stream_start(self):
377
+ # We always add STREAM-START as the first token and STREAM-END as the
378
+ # last token.
379
+
380
+ # Read the token.
381
+ mark = self.get_mark()
382
+
383
+ # Add STREAM-START.
384
+ self.tokens.append(StreamStartToken(mark, mark, encoding=self.encoding))
385
+
386
+ def fetch_stream_end(self):
387
+
388
+ # Set the current indentation to -1.
389
+ self.unwind_indent(-1)
390
+
391
+ # Reset simple keys.
392
+ self.remove_possible_simple_key()
393
+ self.allow_simple_key = False
394
+ self.possible_simple_keys = {}
395
+
396
+ # Read the token.
397
+ mark = self.get_mark()
398
+
399
+ # Add STREAM-END.
400
+ self.tokens.append(StreamEndToken(mark, mark))
401
+
402
+ # The steam is finished.
403
+ self.done = True
404
+
405
+ def fetch_directive(self):
406
+
407
+ # Set the current indentation to -1.
408
+ self.unwind_indent(-1)
409
+
410
+ # Reset simple keys.
411
+ self.remove_possible_simple_key()
412
+ self.allow_simple_key = False
413
+
414
+ # Scan and add DIRECTIVE.
415
+ self.tokens.append(self.scan_directive())
416
+
417
+ def fetch_document_start(self):
418
+ self.fetch_document_indicator(DocumentStartToken)
419
+
420
+ def fetch_document_end(self):
421
+ self.fetch_document_indicator(DocumentEndToken)
422
+
423
+ def fetch_document_indicator(self, TokenClass):
424
+
425
+ # Set the current indentation to -1.
426
+ self.unwind_indent(-1)
427
+
428
+ # Reset simple keys. Note that there could not be a block collection
429
+ # after '---'.
430
+ self.remove_possible_simple_key()
431
+ self.allow_simple_key = False
432
+
433
+ # Add DOCUMENT-START or DOCUMENT-END.
434
+ start_mark = self.get_mark()
435
+ self.forward(3)
436
+ end_mark = self.get_mark()
437
+ self.tokens.append(TokenClass(start_mark, end_mark))
438
+
439
+ def fetch_flow_sequence_start(self):
440
+ self.fetch_flow_collection_start(FlowSequenceStartToken)
441
+
442
+ def fetch_flow_mapping_start(self):
443
+ self.fetch_flow_collection_start(FlowMappingStartToken)
444
+
445
+ def fetch_flow_collection_start(self, TokenClass):
446
+
447
+ # '[' and '{' may start a simple key.
448
+ self.save_possible_simple_key()
449
+
450
+ # Increase the flow level.
451
+ self.flow_level += 1
452
+
453
+ # Simple keys are allowed after '[' and '{'.
454
+ self.allow_simple_key = True
455
+
456
+ # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
457
+ start_mark = self.get_mark()
458
+ self.forward()
459
+ end_mark = self.get_mark()
460
+ self.tokens.append(TokenClass(start_mark, end_mark))
461
+
462
+ def fetch_flow_sequence_end(self):
463
+ self.fetch_flow_collection_end(FlowSequenceEndToken)
464
+
465
+ def fetch_flow_mapping_end(self):
466
+ self.fetch_flow_collection_end(FlowMappingEndToken)
467
+
468
+ def fetch_flow_collection_end(self, TokenClass):
469
+
470
+ # Reset possible simple key on the current level.
471
+ self.remove_possible_simple_key()
472
+
473
+ # Decrease the flow level.
474
+ self.flow_level -= 1
475
+
476
+ # No simple keys after ']' or '}'.
477
+ self.allow_simple_key = False
478
+
479
+ # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
480
+ start_mark = self.get_mark()
481
+ self.forward()
482
+ end_mark = self.get_mark()
483
+ self.tokens.append(TokenClass(start_mark, end_mark))
484
+
485
+ def fetch_flow_entry(self):
486
+
487
+ # Simple keys are allowed after ','.
488
+ self.allow_simple_key = True
489
+
490
+ # Reset possible simple key on the current level.
491
+ self.remove_possible_simple_key()
492
+
493
+ # Add FLOW-ENTRY.
494
+ start_mark = self.get_mark()
495
+ self.forward()
496
+ end_mark = self.get_mark()
497
+ self.tokens.append(FlowEntryToken(start_mark, end_mark))
498
+
499
+ def fetch_block_entry(self):
500
+
501
+ # Block context needs additional checks.
502
+ if not self.flow_level:
503
+
504
+ # Are we allowed to start a new entry?
505
+ if not self.allow_simple_key:
506
+ raise ScannerError(
507
+ None, None, "sequence entries are not allowed here", self.get_mark()
508
+ )
509
+
510
+ # We may need to add BLOCK-SEQUENCE-START.
511
+ if self.add_indent(self.column):
512
+ mark = self.get_mark()
513
+ self.tokens.append(BlockSequenceStartToken(mark, mark))
514
+
515
+ # It's an error for the block entry to occur in the flow context,
516
+ # but we let the parser detect this.
517
+ else:
518
+ pass
519
+
520
+ # Simple keys are allowed after '-'.
521
+ self.allow_simple_key = True
522
+
523
+ # Reset possible simple key on the current level.
524
+ self.remove_possible_simple_key()
525
+
526
+ # Add BLOCK-ENTRY.
527
+ start_mark = self.get_mark()
528
+ self.forward()
529
+ end_mark = self.get_mark()
530
+ self.tokens.append(BlockEntryToken(start_mark, end_mark))
531
+
532
+ def fetch_key(self):
533
+
534
+ # Block context needs additional checks.
535
+ if not self.flow_level:
536
+
537
+ # Are we allowed to start a key (not necessary a simple)?
538
+ if not self.allow_simple_key:
539
+ raise ScannerError(
540
+ None, None, "mapping keys are not allowed here", self.get_mark()
541
+ )
542
+
543
+ # We may need to add BLOCK-MAPPING-START.
544
+ if self.add_indent(self.column):
545
+ mark = self.get_mark()
546
+ self.tokens.append(BlockMappingStartToken(mark, mark))
547
+
548
+ # Simple keys are allowed after '?' in the block context.
549
+ self.allow_simple_key = not self.flow_level
550
+
551
+ # Reset possible simple key on the current level.
552
+ self.remove_possible_simple_key()
553
+
554
+ # Add KEY.
555
+ start_mark = self.get_mark()
556
+ self.forward()
557
+ end_mark = self.get_mark()
558
+ self.tokens.append(KeyToken(start_mark, end_mark))
559
+
560
+ def fetch_value(self):
561
+
562
+ # Do we determine a simple key?
563
+ if self.flow_level in self.possible_simple_keys:
564
+
565
+ # Add KEY.
566
+ key = self.possible_simple_keys[self.flow_level]
567
+ del self.possible_simple_keys[self.flow_level]
568
+ self.tokens.insert(
569
+ key.token_number - self.tokens_taken, KeyToken(key.mark, key.mark)
570
+ )
571
+
572
+ # If this key starts a new block mapping, we need to add
573
+ # BLOCK-MAPPING-START.
574
+ if not self.flow_level:
575
+ if self.add_indent(key.column):
576
+ self.tokens.insert(
577
+ key.token_number - self.tokens_taken,
578
+ BlockMappingStartToken(key.mark, key.mark),
579
+ )
580
+
581
+ # There cannot be two simple keys one after another.
582
+ self.allow_simple_key = False
583
+
584
+ # It must be a part of a complex key.
585
+ else:
586
+
587
+ # Block context needs additional checks.
588
+ # (Do we really need them? They will be caught by the parser
589
+ # anyway.)
590
+ if not self.flow_level:
591
+
592
+ # We are allowed to start a complex value if and only if
593
+ # we can start a simple key.
594
+ if not self.allow_simple_key:
595
+ raise ScannerError(
596
+ None,
597
+ None,
598
+ "mapping values are not allowed here",
599
+ self.get_mark(),
600
+ )
601
+
602
+ # If this value starts a new block mapping, we need to add
603
+ # BLOCK-MAPPING-START. It will be detected as an error later by
604
+ # the parser.
605
+ if not self.flow_level:
606
+ if self.add_indent(self.column):
607
+ mark = self.get_mark()
608
+ self.tokens.append(BlockMappingStartToken(mark, mark))
609
+
610
+ # Simple keys are allowed after ':' in the block context.
611
+ self.allow_simple_key = not self.flow_level
612
+
613
+ # Reset possible simple key on the current level.
614
+ self.remove_possible_simple_key()
615
+
616
+ # Add VALUE.
617
+ start_mark = self.get_mark()
618
+ self.forward()
619
+ end_mark = self.get_mark()
620
+ self.tokens.append(ValueToken(start_mark, end_mark))
621
+
622
+ def fetch_alias(self):
623
+
624
+ # ALIAS could be a simple key.
625
+ self.save_possible_simple_key()
626
+
627
+ # No simple keys after ALIAS.
628
+ self.allow_simple_key = False
629
+
630
+ # Scan and add ALIAS.
631
+ self.tokens.append(self.scan_anchor(AliasToken))
632
+
633
+ def fetch_anchor(self):
634
+
635
+ # ANCHOR could start a simple key.
636
+ self.save_possible_simple_key()
637
+
638
+ # No simple keys after ANCHOR.
639
+ self.allow_simple_key = False
640
+
641
+ # Scan and add ANCHOR.
642
+ self.tokens.append(self.scan_anchor(AnchorToken))
643
+
644
+ def fetch_tag(self):
645
+
646
+ # TAG could start a simple key.
647
+ self.save_possible_simple_key()
648
+
649
+ # No simple keys after TAG.
650
+ self.allow_simple_key = False
651
+
652
+ # Scan and add TAG.
653
+ self.tokens.append(self.scan_tag())
654
+
655
+ def fetch_literal(self):
656
+ self.fetch_block_scalar(style="|")
657
+
658
+ def fetch_folded(self):
659
+ self.fetch_block_scalar(style=">")
660
+
661
+ def fetch_block_scalar(self, style):
662
+
663
+ # A simple key may follow a block scalar.
664
+ self.allow_simple_key = True
665
+
666
+ # Reset possible simple key on the current level.
667
+ self.remove_possible_simple_key()
668
+
669
+ # Scan and add SCALAR.
670
+ self.tokens.append(self.scan_block_scalar(style))
671
+
672
+ def fetch_single(self):
673
+ self.fetch_flow_scalar(style="'")
674
+
675
+ def fetch_double(self):
676
+ self.fetch_flow_scalar(style='"')
677
+
678
+ def fetch_flow_scalar(self, style):
679
+
680
+ # A flow scalar could be a simple key.
681
+ self.save_possible_simple_key()
682
+
683
+ # No simple keys after flow scalars.
684
+ self.allow_simple_key = False
685
+
686
+ # Scan and add SCALAR.
687
+ self.tokens.append(self.scan_flow_scalar(style))
688
+
689
+ def fetch_plain(self):
690
+
691
+ # A plain scalar could be a simple key.
692
+ self.save_possible_simple_key()
693
+
694
+ # No simple keys after plain scalars. But note that `scan_plain` will
695
+ # change this flag if the scan is finished at the beginning of the
696
+ # line.
697
+ self.allow_simple_key = False
698
+
699
+ # Scan and add SCALAR. May change `allow_simple_key`.
700
+ self.tokens.append(self.scan_plain())
701
+
702
+ # Checkers.
703
+
704
+ def check_directive(self):
705
+
706
+ # DIRECTIVE: ^ '%' ...
707
+ # The '%' indicator is already checked.
708
+ if self.column == 0:
709
+ return True
710
+
711
+ def check_document_start(self):
712
+
713
+ # DOCUMENT-START: ^ '---' (' '|'\n')
714
+ if self.column == 0:
715
+ if self.prefix(3) == "---" and self.peek(3) in "\0 \t\r\n\x85\u2028\u2029":
716
+ return True
717
+
718
+ def check_document_end(self):
719
+
720
+ # DOCUMENT-END: ^ '...' (' '|'\n')
721
+ if self.column == 0:
722
+ if self.prefix(3) == "..." and self.peek(3) in "\0 \t\r\n\x85\u2028\u2029":
723
+ return True
724
+
725
+ def check_block_entry(self):
726
+
727
+ # BLOCK-ENTRY: '-' (' '|'\n')
728
+ return self.peek(1) in "\0 \t\r\n\x85\u2028\u2029"
729
+
730
+ def check_key(self):
731
+
732
+ # KEY(flow context): '?'
733
+ if self.flow_level:
734
+ return True
735
+
736
+ # KEY(block context): '?' (' '|'\n')
737
+ else:
738
+ return self.peek(1) in "\0 \t\r\n\x85\u2028\u2029"
739
+
740
+ def check_value(self):
741
+
742
+ # VALUE(flow context): ':'
743
+ if self.flow_level:
744
+ return True
745
+
746
+ # VALUE(block context): ':' (' '|'\n')
747
+ else:
748
+ return self.peek(1) in "\0 \t\r\n\x85\u2028\u2029"
749
+
750
+ def check_plain(self):
751
+
752
+ # A plain scalar may start with any non-space character except:
753
+ # '-', '?', ':', ',', '[', ']', '{', '}',
754
+ # '#', '&', '*', '!', '|', '>', '\'', '\"',
755
+ # '%', '@', '`'.
756
+ #
757
+ # It may also start with
758
+ # '-', '?', ':'
759
+ # if it is followed by a non-space character.
760
+ #
761
+ # Note that we limit the last rule to the block context (except the
762
+ # '-' character) because we want the flow context to be space
763
+ # independent.
764
+ ch = self.peek()
765
+ return ch not in "\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>'\"%@`" or (
766
+ self.peek(1) not in "\0 \t\r\n\x85\u2028\u2029"
767
+ and (ch == "-" or (not self.flow_level and ch in "?:"))
768
+ )
769
+
770
+ # Scanners.
771
+
772
+ def scan_to_next_token(self):
773
+ # We ignore spaces, line breaks and comments.
774
+ # If we find a line break in the block context, we set the flag
775
+ # `allow_simple_key` on.
776
+ # The byte order mark is stripped if it's the first character in the
777
+ # stream. We do not yet support BOM inside the stream as the
778
+ # specification requires. Any such mark will be considered as a part
779
+ # of the document.
780
+ #
781
+ # TODO: We need to make tab handling rules more sane. A good rule is
782
+ # Tabs cannot precede tokens
783
+ # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
784
+ # KEY(block), VALUE(block), BLOCK-ENTRY
785
+ # So the checking code is
786
+ # if <TAB>:
787
+ # self.allow_simple_keys = False
788
+ # We also need to add the check for `allow_simple_keys == True` to
789
+ # `unwind_indent` before issuing BLOCK-END.
790
+ # Scanners for block, flow, and plain scalars need to be modified.
791
+
792
+ if self.index == 0 and self.peek() == "\uFEFF":
793
+ self.forward()
794
+ found = False
795
+ while not found:
796
+ while self.peek() == " ":
797
+ self.forward()
798
+ if self.peek() == "#":
799
+ while self.peek() not in "\0\r\n\x85\u2028\u2029":
800
+ self.forward()
801
+ if self.scan_line_break():
802
+ if not self.flow_level:
803
+ self.allow_simple_key = True
804
+ else:
805
+ found = True
806
+
807
+ def scan_directive(self):
808
+ # See the specification for details.
809
+ start_mark = self.get_mark()
810
+ self.forward()
811
+ name = self.scan_directive_name(start_mark)
812
+ value = None
813
+ if name == "YAML":
814
+ value = self.scan_yaml_directive_value(start_mark)
815
+ end_mark = self.get_mark()
816
+ elif name == "TAG":
817
+ value = self.scan_tag_directive_value(start_mark)
818
+ end_mark = self.get_mark()
819
+ else:
820
+ end_mark = self.get_mark()
821
+ while self.peek() not in "\0\r\n\x85\u2028\u2029":
822
+ self.forward()
823
+ self.scan_directive_ignored_line(start_mark)
824
+ return DirectiveToken(name, value, start_mark, end_mark)
825
+
826
+ def scan_directive_name(self, start_mark):
827
+ # See the specification for details.
828
+ length = 0
829
+ ch = self.peek(length)
830
+ while "0" <= ch <= "9" or "A" <= ch <= "Z" or "a" <= ch <= "z" or ch in "-_":
831
+ length += 1
832
+ ch = self.peek(length)
833
+ if not length:
834
+ raise ScannerError(
835
+ "while scanning a directive",
836
+ start_mark,
837
+ "expected alphabetic or numeric character, but found %r" % ch,
838
+ self.get_mark(),
839
+ )
840
+ value = self.prefix(length)
841
+ self.forward(length)
842
+ ch = self.peek()
843
+ if ch not in "\0 \r\n\x85\u2028\u2029":
844
+ raise ScannerError(
845
+ "while scanning a directive",
846
+ start_mark,
847
+ "expected alphabetic or numeric character, but found %r" % ch,
848
+ self.get_mark(),
849
+ )
850
+ return value
851
+
852
+ def scan_yaml_directive_value(self, start_mark):
853
+ # See the specification for details.
854
+ while self.peek() == " ":
855
+ self.forward()
856
+ major = self.scan_yaml_directive_number(start_mark)
857
+ if self.peek() != ".":
858
+ raise ScannerError(
859
+ "while scanning a directive",
860
+ start_mark,
861
+ "expected a digit or '.', but found %r" % self.peek(),
862
+ self.get_mark(),
863
+ )
864
+ self.forward()
865
+ minor = self.scan_yaml_directive_number(start_mark)
866
+ if self.peek() not in "\0 \r\n\x85\u2028\u2029":
867
+ raise ScannerError(
868
+ "while scanning a directive",
869
+ start_mark,
870
+ "expected a digit or ' ', but found %r" % self.peek(),
871
+ self.get_mark(),
872
+ )
873
+ return (major, minor)
874
+
875
+ def scan_yaml_directive_number(self, start_mark):
876
+ # See the specification for details.
877
+ ch = self.peek()
878
+ if not ("0" <= ch <= "9"):
879
+ raise ScannerError(
880
+ "while scanning a directive",
881
+ start_mark,
882
+ "expected a digit, but found %r" % ch,
883
+ self.get_mark(),
884
+ )
885
+ length = 0
886
+ while "0" <= self.peek(length) <= "9":
887
+ length += 1
888
+ value = int(self.prefix(length))
889
+ self.forward(length)
890
+ return value
891
+
892
+ def scan_tag_directive_value(self, start_mark):
893
+ # See the specification for details.
894
+ while self.peek() == " ":
895
+ self.forward()
896
+ handle = self.scan_tag_directive_handle(start_mark)
897
+ while self.peek() == " ":
898
+ self.forward()
899
+ prefix = self.scan_tag_directive_prefix(start_mark)
900
+ return (handle, prefix)
901
+
902
+ def scan_tag_directive_handle(self, start_mark):
903
+ # See the specification for details.
904
+ value = self.scan_tag_handle("directive", start_mark)
905
+ ch = self.peek()
906
+ if ch != " ":
907
+ raise ScannerError(
908
+ "while scanning a directive",
909
+ start_mark,
910
+ "expected ' ', but found %r" % ch,
911
+ self.get_mark(),
912
+ )
913
+ return value
914
+
915
+ def scan_tag_directive_prefix(self, start_mark):
916
+ # See the specification for details.
917
+ value = self.scan_tag_uri("directive", start_mark)
918
+ ch = self.peek()
919
+ if ch not in "\0 \r\n\x85\u2028\u2029":
920
+ raise ScannerError(
921
+ "while scanning a directive",
922
+ start_mark,
923
+ "expected ' ', but found %r" % ch,
924
+ self.get_mark(),
925
+ )
926
+ return value
927
+
928
+ def scan_directive_ignored_line(self, start_mark):
929
+ # See the specification for details.
930
+ while self.peek() == " ":
931
+ self.forward()
932
+ if self.peek() == "#":
933
+ while self.peek() not in "\0\r\n\x85\u2028\u2029":
934
+ self.forward()
935
+ ch = self.peek()
936
+ if ch not in "\0\r\n\x85\u2028\u2029":
937
+ raise ScannerError(
938
+ "while scanning a directive",
939
+ start_mark,
940
+ "expected a comment or a line break, but found %r" % ch,
941
+ self.get_mark(),
942
+ )
943
+ self.scan_line_break()
944
+
945
+ def scan_anchor(self, TokenClass):
946
+ # The specification does not restrict characters for anchors and
947
+ # aliases. This may lead to problems, for instance, the document:
948
+ # [ *alias, value ]
949
+ # can be interpreted in two ways, as
950
+ # [ "value" ]
951
+ # and
952
+ # [ *alias , "value" ]
953
+ # Therefore we restrict aliases to numbers and ASCII letters.
954
+ start_mark = self.get_mark()
955
+ indicator = self.peek()
956
+ if indicator == "*":
957
+ name = "alias"
958
+ else:
959
+ name = "anchor"
960
+ self.forward()
961
+ length = 0
962
+ ch = self.peek(length)
963
+ while "0" <= ch <= "9" or "A" <= ch <= "Z" or "a" <= ch <= "z" or ch in "-_":
964
+ length += 1
965
+ ch = self.peek(length)
966
+ if not length:
967
+ raise ScannerError(
968
+ "while scanning an %s" % name,
969
+ start_mark,
970
+ "expected alphabetic or numeric character, but found %r" % ch,
971
+ self.get_mark(),
972
+ )
973
+ value = self.prefix(length)
974
+ self.forward(length)
975
+ ch = self.peek()
976
+ if ch not in "\0 \t\r\n\x85\u2028\u2029?:,]}%@`":
977
+ raise ScannerError(
978
+ "while scanning an %s" % name,
979
+ start_mark,
980
+ "expected alphabetic or numeric character, but found %r" % ch,
981
+ self.get_mark(),
982
+ )
983
+ end_mark = self.get_mark()
984
+ return TokenClass(value, start_mark, end_mark)
985
+
986
+ def scan_tag(self):
987
+ # See the specification for details.
988
+ start_mark = self.get_mark()
989
+ ch = self.peek(1)
990
+ if ch == "<":
991
+ handle = None
992
+ self.forward(2)
993
+ suffix = self.scan_tag_uri("tag", start_mark)
994
+ if self.peek() != ">":
995
+ raise ScannerError(
996
+ "while parsing a tag",
997
+ start_mark,
998
+ "expected '>', but found %r" % self.peek(),
999
+ self.get_mark(),
1000
+ )
1001
+ self.forward()
1002
+ elif ch in "\0 \t\r\n\x85\u2028\u2029":
1003
+ handle = None
1004
+ suffix = "!"
1005
+ self.forward()
1006
+ else:
1007
+ length = 1
1008
+ use_handle = False
1009
+ while ch not in "\0 \r\n\x85\u2028\u2029":
1010
+ if ch == "!":
1011
+ use_handle = True
1012
+ break
1013
+ length += 1
1014
+ ch = self.peek(length)
1015
+ handle = "!"
1016
+ if use_handle:
1017
+ handle = self.scan_tag_handle("tag", start_mark)
1018
+ else:
1019
+ handle = "!"
1020
+ self.forward()
1021
+ suffix = self.scan_tag_uri("tag", start_mark)
1022
+ ch = self.peek()
1023
+ if ch not in "\0 \r\n\x85\u2028\u2029":
1024
+ raise ScannerError(
1025
+ "while scanning a tag",
1026
+ start_mark,
1027
+ "expected ' ', but found %r" % ch,
1028
+ self.get_mark(),
1029
+ )
1030
+ value = (handle, suffix)
1031
+ end_mark = self.get_mark()
1032
+ return TagToken(value, start_mark, end_mark)
1033
+
1034
+ def scan_block_scalar(self, style):
1035
+ # See the specification for details.
1036
+
1037
+ if style == ">":
1038
+ folded = True
1039
+ else:
1040
+ folded = False
1041
+
1042
+ chunks = []
1043
+ start_mark = self.get_mark()
1044
+
1045
+ # Scan the header.
1046
+ self.forward()
1047
+ chomping, increment = self.scan_block_scalar_indicators(start_mark)
1048
+ self.scan_block_scalar_ignored_line(start_mark)
1049
+
1050
+ # Determine the indentation level and go to the first non-empty line.
1051
+ min_indent = self.indent + 1
1052
+ if min_indent < 1:
1053
+ min_indent = 1
1054
+ if increment is None:
1055
+ breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
1056
+ indent = max(min_indent, max_indent)
1057
+ else:
1058
+ indent = min_indent + increment - 1
1059
+ breaks, end_mark = self.scan_block_scalar_breaks(indent)
1060
+ line_break = ""
1061
+
1062
+ # Scan the inner part of the block scalar.
1063
+ while self.column == indent and self.peek() != "\0":
1064
+ chunks.extend(breaks)
1065
+ leading_non_space = self.peek() not in " \t"
1066
+ length = 0
1067
+ while self.peek(length) not in "\0\r\n\x85\u2028\u2029":
1068
+ length += 1
1069
+ chunks.append(self.prefix(length))
1070
+ self.forward(length)
1071
+ line_break = self.scan_line_break()
1072
+ breaks, end_mark = self.scan_block_scalar_breaks(indent)
1073
+ if self.column == indent and self.peek() != "\0":
1074
+
1075
+ # Unfortunately, folding rules are ambiguous.
1076
+ #
1077
+ # This is the folding according to the specification:
1078
+
1079
+ if (
1080
+ folded
1081
+ and line_break == "\n"
1082
+ and leading_non_space
1083
+ and self.peek() not in " \t"
1084
+ ):
1085
+ if not breaks:
1086
+ chunks.append(" ")
1087
+ else:
1088
+ chunks.append(line_break)
1089
+
1090
+ # This is Clark Evans's interpretation (also in the spec
1091
+ # examples):
1092
+ #
1093
+ # if folded and line_break == '\n':
1094
+ # if not breaks:
1095
+ # if self.peek() not in ' \t':
1096
+ # chunks.append(' ')
1097
+ # else:
1098
+ # chunks.append(line_break)
1099
+ # else:
1100
+ # chunks.append(line_break)
1101
+ else:
1102
+ break
1103
+
1104
+ # Chomp the tail.
1105
+ if chomping is not False:
1106
+ chunks.append(line_break)
1107
+ if chomping is True:
1108
+ chunks.extend(breaks)
1109
+
1110
+ # We are done.
1111
+ return ScalarToken("".join(chunks), False, start_mark, end_mark, style)
1112
+
1113
+ def scan_block_scalar_indicators(self, start_mark):
1114
+ # See the specification for details.
1115
+ chomping = None
1116
+ increment = None
1117
+ ch = self.peek()
1118
+ if ch in "+-":
1119
+ if ch == "+":
1120
+ chomping = True
1121
+ else:
1122
+ chomping = False
1123
+ self.forward()
1124
+ ch = self.peek()
1125
+ if ch in "0123456789":
1126
+ increment = int(ch)
1127
+ if increment == 0:
1128
+ raise ScannerError(
1129
+ "while scanning a block scalar",
1130
+ start_mark,
1131
+ "expected indentation indicator in the range 1-9, but found 0",
1132
+ self.get_mark(),
1133
+ )
1134
+ self.forward()
1135
+ elif ch in "0123456789":
1136
+ increment = int(ch)
1137
+ if increment == 0:
1138
+ raise ScannerError(
1139
+ "while scanning a block scalar",
1140
+ start_mark,
1141
+ "expected indentation indicator in the range 1-9, but found 0",
1142
+ self.get_mark(),
1143
+ )
1144
+ self.forward()
1145
+ ch = self.peek()
1146
+ if ch in "+-":
1147
+ if ch == "+":
1148
+ chomping = True
1149
+ else:
1150
+ chomping = False
1151
+ self.forward()
1152
+ ch = self.peek()
1153
+ if ch not in "\0 \r\n\x85\u2028\u2029":
1154
+ raise ScannerError(
1155
+ "while scanning a block scalar",
1156
+ start_mark,
1157
+ "expected chomping or indentation indicators, but found %r" % ch,
1158
+ self.get_mark(),
1159
+ )
1160
+ return chomping, increment
1161
+
1162
+ def scan_block_scalar_ignored_line(self, start_mark):
1163
+ # See the specification for details.
1164
+ while self.peek() == " ":
1165
+ self.forward()
1166
+ if self.peek() == "#":
1167
+ while self.peek() not in "\0\r\n\x85\u2028\u2029":
1168
+ self.forward()
1169
+ ch = self.peek()
1170
+ if ch not in "\0\r\n\x85\u2028\u2029":
1171
+ raise ScannerError(
1172
+ "while scanning a block scalar",
1173
+ start_mark,
1174
+ "expected a comment or a line break, but found %r" % ch,
1175
+ self.get_mark(),
1176
+ )
1177
+ self.scan_line_break()
1178
+
1179
+ def scan_block_scalar_indentation(self):
1180
+ # See the specification for details.
1181
+ chunks = []
1182
+ max_indent = 0
1183
+ end_mark = self.get_mark()
1184
+ while self.peek() in " \r\n\x85\u2028\u2029":
1185
+ if self.peek() != " ":
1186
+ chunks.append(self.scan_line_break())
1187
+ end_mark = self.get_mark()
1188
+ else:
1189
+ self.forward()
1190
+ if self.column > max_indent:
1191
+ max_indent = self.column
1192
+ return chunks, max_indent, end_mark
1193
+
1194
+ def scan_block_scalar_breaks(self, indent):
1195
+ # See the specification for details.
1196
+ chunks = []
1197
+ end_mark = self.get_mark()
1198
+ while self.column < indent and self.peek() == " ":
1199
+ self.forward()
1200
+ while self.peek() in "\r\n\x85\u2028\u2029":
1201
+ chunks.append(self.scan_line_break())
1202
+ end_mark = self.get_mark()
1203
+ while self.column < indent and self.peek() == " ":
1204
+ self.forward()
1205
+ return chunks, end_mark
1206
+
1207
+ def scan_flow_scalar(self, style):
1208
+ # See the specification for details.
1209
+ # Note that we loose indentation rules for quoted scalars. Quoted
1210
+ # scalars don't need to adhere indentation because " and ' clearly
1211
+ # mark the beginning and the end of them. Therefore we are less
1212
+ # restrictive then the specification requires. We only need to check
1213
+ # that document separators are not included in scalars.
1214
+ if style == '"':
1215
+ double = True
1216
+ else:
1217
+ double = False
1218
+ chunks = []
1219
+ start_mark = self.get_mark()
1220
+ quote = self.peek()
1221
+ self.forward()
1222
+ chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1223
+ while self.peek() != quote:
1224
+ chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
1225
+ chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1226
+ self.forward()
1227
+ end_mark = self.get_mark()
1228
+ return ScalarToken("".join(chunks), False, start_mark, end_mark, style)
1229
+
1230
+ ESCAPE_REPLACEMENTS = {
1231
+ "0": "\0",
1232
+ "a": "\x07",
1233
+ "b": "\x08",
1234
+ "t": "\x09",
1235
+ "\t": "\x09",
1236
+ "n": "\x0A",
1237
+ "v": "\x0B",
1238
+ "f": "\x0C",
1239
+ "r": "\x0D",
1240
+ "e": "\x1B",
1241
+ " ": "\x20",
1242
+ '"': '"',
1243
+ "\\": "\\",
1244
+ "/": "/",
1245
+ "N": "\x85",
1246
+ "_": "\xA0",
1247
+ "L": "\u2028",
1248
+ "P": "\u2029",
1249
+ }
1250
+
1251
+ ESCAPE_CODES = {
1252
+ "x": 2,
1253
+ "u": 4,
1254
+ "U": 8,
1255
+ }
1256
+
1257
+ def scan_flow_scalar_non_spaces(self, double, start_mark):
1258
+ # See the specification for details.
1259
+ chunks = []
1260
+ while True:
1261
+ length = 0
1262
+ while self.peek(length) not in "'\"\\\0 \t\r\n\x85\u2028\u2029":
1263
+ length += 1
1264
+ if length:
1265
+ chunks.append(self.prefix(length))
1266
+ self.forward(length)
1267
+ ch = self.peek()
1268
+ if not double and ch == "'" and self.peek(1) == "'":
1269
+ chunks.append("'")
1270
+ self.forward(2)
1271
+ elif (double and ch == "'") or (not double and ch in '"\\'):
1272
+ chunks.append(ch)
1273
+ self.forward()
1274
+ elif double and ch == "\\":
1275
+ self.forward()
1276
+ ch = self.peek()
1277
+ if ch in self.ESCAPE_REPLACEMENTS:
1278
+ chunks.append(self.ESCAPE_REPLACEMENTS[ch])
1279
+ self.forward()
1280
+ elif ch in self.ESCAPE_CODES:
1281
+ length = self.ESCAPE_CODES[ch]
1282
+ self.forward()
1283
+ for k in range(length):
1284
+ if self.peek(k) not in "0123456789ABCDEFabcdef":
1285
+ raise ScannerError(
1286
+ "while scanning a double-quoted scalar",
1287
+ start_mark,
1288
+ "expected escape sequence of %d hexadecimal numbers, but found %r"
1289
+ % (length, self.peek(k)),
1290
+ self.get_mark(),
1291
+ )
1292
+ code = int(self.prefix(length), 16)
1293
+ chunks.append(chr(code))
1294
+ self.forward(length)
1295
+ elif ch in "\r\n\x85\u2028\u2029":
1296
+ self.scan_line_break()
1297
+ chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
1298
+ else:
1299
+ raise ScannerError(
1300
+ "while scanning a double-quoted scalar",
1301
+ start_mark,
1302
+ "found unknown escape character %r" % ch,
1303
+ self.get_mark(),
1304
+ )
1305
+ else:
1306
+ return chunks
1307
+
1308
+ def scan_flow_scalar_spaces(self, double, start_mark):
1309
+ # See the specification for details.
1310
+ chunks = []
1311
+ length = 0
1312
+ while self.peek(length) in " \t":
1313
+ length += 1
1314
+ whitespaces = self.prefix(length)
1315
+ self.forward(length)
1316
+ ch = self.peek()
1317
+ if ch == "\0":
1318
+ raise ScannerError(
1319
+ "while scanning a quoted scalar",
1320
+ start_mark,
1321
+ "found unexpected end of stream",
1322
+ self.get_mark(),
1323
+ )
1324
+ elif ch in "\r\n\x85\u2028\u2029":
1325
+ line_break = self.scan_line_break()
1326
+ breaks = self.scan_flow_scalar_breaks(double, start_mark)
1327
+ if line_break != "\n":
1328
+ chunks.append(line_break)
1329
+ elif not breaks:
1330
+ chunks.append(" ")
1331
+ chunks.extend(breaks)
1332
+ else:
1333
+ chunks.append(whitespaces)
1334
+ return chunks
1335
+
1336
+ def scan_flow_scalar_breaks(self, double, start_mark):
1337
+ # See the specification for details.
1338
+ chunks = []
1339
+ while True:
1340
+ # Instead of checking indentation, we check for document
1341
+ # separators.
1342
+ prefix = self.prefix(3)
1343
+ if (prefix == "---" or prefix == "...") and self.peek(
1344
+ 3
1345
+ ) in "\0 \t\r\n\x85\u2028\u2029":
1346
+ raise ScannerError(
1347
+ "while scanning a quoted scalar",
1348
+ start_mark,
1349
+ "found unexpected document separator",
1350
+ self.get_mark(),
1351
+ )
1352
+ while self.peek() in " \t":
1353
+ self.forward()
1354
+ if self.peek() in "\r\n\x85\u2028\u2029":
1355
+ chunks.append(self.scan_line_break())
1356
+ else:
1357
+ return chunks
1358
+
1359
+ def scan_plain(self):
1360
+ # See the specification for details.
1361
+ # We add an additional restriction for the flow context:
1362
+ # plain scalars in the flow context cannot contain ',' or '?'.
1363
+ # We also keep track of the `allow_simple_key` flag here.
1364
+ # Indentation rules are loosed for the flow context.
1365
+ chunks = []
1366
+ start_mark = self.get_mark()
1367
+ end_mark = start_mark
1368
+ indent = self.indent + 1
1369
+ # We allow zero indentation for scalars, but then we need to check for
1370
+ # document separators at the beginning of the line.
1371
+ # if indent == 0:
1372
+ # indent = 1
1373
+ spaces = []
1374
+ while True:
1375
+ length = 0
1376
+ if self.peek() == "#":
1377
+ break
1378
+ while True:
1379
+ ch = self.peek(length)
1380
+ if (
1381
+ ch in "\0 \t\r\n\x85\u2028\u2029"
1382
+ or (
1383
+ ch == ":"
1384
+ and self.peek(length + 1)
1385
+ in "\0 \t\r\n\x85\u2028\u2029"
1386
+ + (",[]{}" if self.flow_level else "")
1387
+ )
1388
+ or (self.flow_level and ch in ",?[]{}")
1389
+ ):
1390
+ break
1391
+ length += 1
1392
+ if length == 0:
1393
+ break
1394
+ self.allow_simple_key = False
1395
+ chunks.extend(spaces)
1396
+ chunks.append(self.prefix(length))
1397
+ self.forward(length)
1398
+ end_mark = self.get_mark()
1399
+ spaces = self.scan_plain_spaces(indent, start_mark)
1400
+ if (
1401
+ not spaces
1402
+ or self.peek() == "#"
1403
+ or (not self.flow_level and self.column < indent)
1404
+ ):
1405
+ break
1406
+ return ScalarToken("".join(chunks), True, start_mark, end_mark)
1407
+
1408
+ def scan_plain_spaces(self, indent, start_mark):
1409
+ # See the specification for details.
1410
+ # The specification is really confusing about tabs in plain scalars.
1411
+ # We just forbid them completely. Do not use tabs in YAML!
1412
+ chunks = []
1413
+ length = 0
1414
+ while self.peek(length) in " ":
1415
+ length += 1
1416
+ whitespaces = self.prefix(length)
1417
+ self.forward(length)
1418
+ ch = self.peek()
1419
+ if ch in "\r\n\x85\u2028\u2029":
1420
+ line_break = self.scan_line_break()
1421
+ self.allow_simple_key = True
1422
+ prefix = self.prefix(3)
1423
+ if (prefix == "---" or prefix == "...") and self.peek(
1424
+ 3
1425
+ ) in "\0 \t\r\n\x85\u2028\u2029":
1426
+ return
1427
+ breaks = []
1428
+ while self.peek() in " \r\n\x85\u2028\u2029":
1429
+ if self.peek() == " ":
1430
+ self.forward()
1431
+ else:
1432
+ breaks.append(self.scan_line_break())
1433
+ prefix = self.prefix(3)
1434
+ if (prefix == "---" or prefix == "...") and self.peek(
1435
+ 3
1436
+ ) in "\0 \t\r\n\x85\u2028\u2029":
1437
+ return
1438
+ if line_break != "\n":
1439
+ chunks.append(line_break)
1440
+ elif not breaks:
1441
+ chunks.append(" ")
1442
+ chunks.extend(breaks)
1443
+ elif whitespaces:
1444
+ chunks.append(whitespaces)
1445
+ return chunks
1446
+
1447
+ def scan_tag_handle(self, name, start_mark):
1448
+ # See the specification for details.
1449
+ # For some strange reasons, the specification does not allow '_' in
1450
+ # tag handles. I have allowed it anyway.
1451
+ ch = self.peek()
1452
+ if ch != "!":
1453
+ raise ScannerError(
1454
+ "while scanning a %s" % name,
1455
+ start_mark,
1456
+ "expected '!', but found %r" % ch,
1457
+ self.get_mark(),
1458
+ )
1459
+ length = 1
1460
+ ch = self.peek(length)
1461
+ if ch != " ":
1462
+ while (
1463
+ "0" <= ch <= "9" or "A" <= ch <= "Z" or "a" <= ch <= "z" or ch in "-_"
1464
+ ):
1465
+ length += 1
1466
+ ch = self.peek(length)
1467
+ if ch != "!":
1468
+ self.forward(length)
1469
+ raise ScannerError(
1470
+ "while scanning a %s" % name,
1471
+ start_mark,
1472
+ "expected '!', but found %r" % ch,
1473
+ self.get_mark(),
1474
+ )
1475
+ length += 1
1476
+ value = self.prefix(length)
1477
+ self.forward(length)
1478
+ return value
1479
+
1480
+ def scan_tag_uri(self, name, start_mark):
1481
+ # See the specification for details.
1482
+ # Note: we do not check if URI is well-formed.
1483
+ chunks = []
1484
+ length = 0
1485
+ ch = self.peek(length)
1486
+ while (
1487
+ "0" <= ch <= "9"
1488
+ or "A" <= ch <= "Z"
1489
+ or "a" <= ch <= "z"
1490
+ or ch in "-;/?:@&=+$,_.!~*'()[]%"
1491
+ ):
1492
+ if ch == "%":
1493
+ chunks.append(self.prefix(length))
1494
+ self.forward(length)
1495
+ length = 0
1496
+ chunks.append(self.scan_uri_escapes(name, start_mark))
1497
+ else:
1498
+ length += 1
1499
+ ch = self.peek(length)
1500
+ if length:
1501
+ chunks.append(self.prefix(length))
1502
+ self.forward(length)
1503
+ length = 0
1504
+ if not chunks:
1505
+ raise ScannerError(
1506
+ "while parsing a %s" % name,
1507
+ start_mark,
1508
+ "expected URI, but found %r" % ch,
1509
+ self.get_mark(),
1510
+ )
1511
+ return "".join(chunks)
1512
+
1513
+ def scan_uri_escapes(self, name, start_mark):
1514
+ # See the specification for details.
1515
+ codes = []
1516
+ mark = self.get_mark()
1517
+ while self.peek() == "%":
1518
+ self.forward()
1519
+ for k in range(2):
1520
+ if self.peek(k) not in "0123456789ABCDEFabcdef":
1521
+ raise ScannerError(
1522
+ "while scanning a %s" % name,
1523
+ start_mark,
1524
+ "expected URI escape sequence of 2 hexadecimal numbers, but found %r"
1525
+ % self.peek(k),
1526
+ self.get_mark(),
1527
+ )
1528
+ codes.append(int(self.prefix(2), 16))
1529
+ self.forward(2)
1530
+ try:
1531
+ value = bytes(codes).decode("utf-8")
1532
+ except UnicodeDecodeError as exc:
1533
+ raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
1534
+ return value
1535
+
1536
+ def scan_line_break(self):
1537
+ # Transforms:
1538
+ # '\r\n' : '\n'
1539
+ # '\r' : '\n'
1540
+ # '\n' : '\n'
1541
+ # '\x85' : '\n'
1542
+ # '\u2028' : '\u2028'
1543
+ # '\u2029 : '\u2029'
1544
+ # default : ''
1545
+ ch = self.peek()
1546
+ if ch in "\r\n\x85":
1547
+ if self.prefix(2) == "\r\n":
1548
+ self.forward(2)
1549
+ else:
1550
+ self.forward()
1551
+ return "\n"
1552
+ elif ch in "\u2028\u2029":
1553
+ self.forward()
1554
+ return ch
1555
+ return ""