RbYAML 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/lib/rbyaml/composer.rb +28 -25
- data/lib/rbyaml/composer.rb.~1.2.~ +109 -0
- data/lib/rbyaml/constructor.rb +94 -84
- data/lib/rbyaml/constructor.rb.~1.2.~ +381 -0
- data/lib/rbyaml/dumper.rb +10 -17
- data/lib/rbyaml/dumper.rb.~1.2.~ +43 -0
- data/lib/rbyaml/emitter.rb +13 -26
- data/lib/rbyaml/emitter.rb.~1.2.~ +1116 -0
- data/lib/rbyaml/error.rb +15 -21
- data/lib/rbyaml/events.rb +29 -5
- data/lib/rbyaml/events.rb.~1.2.~ +93 -0
- data/lib/rbyaml/loader.rb +11 -23
- data/lib/rbyaml/loader.rb.~1.2.~ +52 -0
- data/lib/rbyaml/nodes.rb +13 -9
- data/lib/rbyaml/nodes.rb.~1.2.~ +52 -0
- data/lib/rbyaml/parser.rb +481 -343
- data/lib/rbyaml/parser.rb.old +531 -0
- data/lib/rbyaml/parser.rb.~1.2.~ +494 -0
- data/lib/rbyaml/reader.rb.~1.1.1.1.~ +127 -0
- data/lib/rbyaml/representer.rb +26 -17
- data/lib/rbyaml/representer.rb.~1.2.~ +239 -0
- data/lib/rbyaml/resolver.rb +15 -15
- data/lib/rbyaml/resolver.rb.~1.1.~ +163 -0
- data/lib/rbyaml/scanner.rb +457 -366
- data/lib/rbyaml/scanner.rb.~1.2.~ +1259 -0
- data/lib/rbyaml/serializer.rb +19 -17
- data/lib/rbyaml/serializer.rb.~1.2.~ +115 -0
- data/lib/rbyaml/tokens.rb +44 -4
- data/lib/rbyaml/tokens.rb.~1.2.~ +164 -0
- data/lib/rbyaml/util.rb +28 -0
- data/lib/rbyaml/yaml.rb +12 -12
- data/lib/rbyaml/yaml.rb.~1.2.~ +136 -0
- data/test/test_bm.rb +28 -0
- data/test/test_bm_syck.rb +28 -0
- data/test/test_invoke.rb +31 -0
- data/test/test_one.rb +5 -0
- data/test/test_profile.rb +32 -0
- data/test/test_rbyaml.rb +2 -1
- data/test/test_rbyaml.rb.~1.2.~ +31 -0
- data/test/test_time.rb +13 -8
- data/test/test_time.rb.~1.1.~ +29 -0
- data/test/yamlx.rb +3563 -0
- metadata +27 -2
@@ -0,0 +1,1259 @@
|
|
1
|
+
# Scanner produces tokens of the following types:
|
2
|
+
# STREAM-START
|
3
|
+
# STREAM-END
|
4
|
+
# DIRECTIVE(name, value)
|
5
|
+
# DOCUMENT-START
|
6
|
+
# DOCUMENT-END
|
7
|
+
# BLOCK-SEQUENCE-START
|
8
|
+
# BLOCK-MAPPING-START
|
9
|
+
# BLOCK-END
|
10
|
+
# FLOW-SEQUENCE-START
|
11
|
+
# FLOW-MAPPING-START
|
12
|
+
# FLOW-SEQUENCE-END
|
13
|
+
# FLOW-MAPPING-END
|
14
|
+
# BLOCK-ENTRY
|
15
|
+
# FLOW-ENTRY
|
16
|
+
# KEY
|
17
|
+
# VALUE
|
18
|
+
# ALIAS(value)
|
19
|
+
# ANCHOR(value)
|
20
|
+
# TAG(value)
|
21
|
+
# SCALAR(value, plain)
|
22
|
+
#
|
23
|
+
# Read comments in the Scanner code for more details.
|
24
|
+
#
|
25
|
+
|
26
|
+
require 'rbyaml/error'
|
27
|
+
require 'rbyaml/tokens'
|
28
|
+
|
29
|
+
module RbYAML
|
30
|
+
class ScannerError < MarkedYAMLError
|
31
|
+
end
|
32
|
+
|
33
|
+
class SimpleKey
|
34
|
+
attr_reader :token_number, :required, :index, :line, :column, :mark
|
35
|
+
|
36
|
+
def initialize(token_number,required,index,line,column,mark)
|
37
|
+
@token_number = token_number
|
38
|
+
@required = required
|
39
|
+
@index = index
|
40
|
+
@line = line
|
41
|
+
@column = column
|
42
|
+
@mark = mark
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
module Scanner
|
47
|
+
def initialize_scanner
|
48
|
+
# It is assumed that Scanner and Reader will mixin to the same point.
|
49
|
+
# Reader do the dirty work of checking for BOM. It also adds NUL to the end.
|
50
|
+
#
|
51
|
+
# Reader supports the following methods
|
52
|
+
# self.peek(i=0) # peek the next i-th character
|
53
|
+
# self.prefix(l=1) # peek the next l characters
|
54
|
+
# self.forward(l=1) # read the next l characters and move the pointer.
|
55
|
+
|
56
|
+
# Had we reached the end of the stream?
|
57
|
+
@done = false
|
58
|
+
|
59
|
+
# The number of unclosed '{' and '['. `flow_level == 0` means block
|
60
|
+
# context.
|
61
|
+
@flow_level = 0
|
62
|
+
|
63
|
+
# List of processed tokens that are not yet emitted.
|
64
|
+
@tokens = []
|
65
|
+
|
66
|
+
# Add the STREAM-START token.
|
67
|
+
fetch_stream_start
|
68
|
+
|
69
|
+
# Number of tokens that were emitted through the `get_token` method.
|
70
|
+
@tokens_taken = 0
|
71
|
+
|
72
|
+
# The current indentation level.
|
73
|
+
@indent = -1
|
74
|
+
|
75
|
+
# Past indentation levels.
|
76
|
+
@indents = []
|
77
|
+
|
78
|
+
# Variables related to simple keys treatment.
|
79
|
+
|
80
|
+
# A simple key is a key that is not denoted by the '?' indicator.
|
81
|
+
# Example of simple keys:
|
82
|
+
# ---
|
83
|
+
# block simple key: value
|
84
|
+
# ? not a simple key:
|
85
|
+
# : { flow simple key: value }
|
86
|
+
# We emit the KEY token before all keys, so when we find a potential
|
87
|
+
# simple key, we try to locate the corresponding ':' indicator.
|
88
|
+
# Simple keys should be limited to a single line and 1024 characters.
|
89
|
+
|
90
|
+
# Can a simple key start at the current position? A simple key may
|
91
|
+
# start:
|
92
|
+
# - at the beginning of the line, not counting indentation spaces
|
93
|
+
# (in block context),
|
94
|
+
# - after '{', '[', ',' (in the flow context),
|
95
|
+
# - after '?', ':', '-' (in the block context).
|
96
|
+
# In the block context, this flag also signifies if a block collection
|
97
|
+
# may start at the current position.
|
98
|
+
@allow_simple_key = true
|
99
|
+
|
100
|
+
# Keep track of possible simple keys. This is a dictionary. The key
|
101
|
+
# is `flow_level`; there can be no more that one possible simple key
|
102
|
+
# for each level. The value is a SimpleKey record:
|
103
|
+
# (token_number, required, index, line, column, mark)
|
104
|
+
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
|
105
|
+
# '[', or '{' tokens.
|
106
|
+
@possible_simple_keys = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def check_token(*choices)
|
110
|
+
# Check if the next token is one of the given types.
|
111
|
+
fetch_more_tokens while need_more_tokens
|
112
|
+
unless @tokens.empty?
|
113
|
+
return true if choices.empty?
|
114
|
+
for choice in choices
|
115
|
+
return true if choice === @tokens[0]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
|
121
|
+
def peek_token
|
122
|
+
# Return the next token, but do not delete if from the queue.
|
123
|
+
fetch_more_tokens while need_more_tokens
|
124
|
+
return @tokens[0] unless @tokens.empty?
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_token
|
128
|
+
# Return the next token.
|
129
|
+
fetch_more_tokens while need_more_tokens
|
130
|
+
unless @tokens.empty?
|
131
|
+
@tokens_taken += 1
|
132
|
+
@tokens.shift
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def each_token
|
137
|
+
fetch_more_tokens while need_more_tokens
|
138
|
+
while !@tokens.empty?
|
139
|
+
@tokens_taken += 1
|
140
|
+
yield @tokens.shift
|
141
|
+
fetch_more_tokens while need_more_tokens
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def need_more_tokens
|
146
|
+
return false if @done
|
147
|
+
return true if @tokens.empty?
|
148
|
+
# The current token may be a potential simple key, so we
|
149
|
+
# need to look further.
|
150
|
+
stale_possible_simple_keys
|
151
|
+
return true if next_possible_simple_key == @tokens_taken
|
152
|
+
end
|
153
|
+
|
154
|
+
def fetch_more_tokens
|
155
|
+
# Eat whitespaces and comments until we reach the next token.
|
156
|
+
scan_to_next_token
|
157
|
+
|
158
|
+
# Remove obsolete possible simple keys.
|
159
|
+
stale_possible_simple_keys
|
160
|
+
|
161
|
+
# Compare the current indentation and column. It may add some tokens
|
162
|
+
# and decrease the current indentation level.
|
163
|
+
unwind_indent(@column)
|
164
|
+
|
165
|
+
# Peek the next character.
|
166
|
+
ch = peek
|
167
|
+
|
168
|
+
return case
|
169
|
+
# Is it the end of stream?
|
170
|
+
when ch == ?\0: fetch_stream_end
|
171
|
+
# Is it a directive?
|
172
|
+
when ch == ?% && check_directive: fetch_directive
|
173
|
+
# Is it the document start?
|
174
|
+
when ch == ?- && check_document_start: fetch_document_start
|
175
|
+
# Is it the document end?
|
176
|
+
when ch == ?. && check_document_end: fetch_document_end
|
177
|
+
# Is it the flow sequence start indicator?
|
178
|
+
when ch == ?[: fetch_flow_sequence_start
|
179
|
+
# Is it the flow mapping start indicator?
|
180
|
+
when ch == ?{: fetch_flow_mapping_start
|
181
|
+
# Is it the flow sequence end indicator?
|
182
|
+
when ch == ?]: fetch_flow_sequence_end
|
183
|
+
# Is it the flow mapping end indicator?
|
184
|
+
when ch == ?}: fetch_flow_mapping_end
|
185
|
+
# Is it the flow entry indicator?
|
186
|
+
when ch == ?,: fetch_flow_entry
|
187
|
+
# Is it the block entry indicator?
|
188
|
+
when ch == ?- && check_block_entry: fetch_block_entry
|
189
|
+
# Is it the key indicator?
|
190
|
+
when ch == ?? && check_key: fetch_key
|
191
|
+
# Is it the value indicator?
|
192
|
+
when ch == ?: && check_value: fetch_value
|
193
|
+
# Is it an alias?
|
194
|
+
when ch == ?*: fetch_alias
|
195
|
+
# Is it an anchor?
|
196
|
+
when ch == ?&: fetch_anchor
|
197
|
+
# Is it a tag?
|
198
|
+
when ch == ?!: fetch_tag
|
199
|
+
# Is it a literal scalar?
|
200
|
+
when ch == ?| && @flow_level==0: fetch_literal
|
201
|
+
# Is it a folded scalar?
|
202
|
+
when ch == ?> && @flow_level==0: fetch_folded
|
203
|
+
# Is it a single quoted scalar?
|
204
|
+
when ch == ?': fetch_single
|
205
|
+
# Is it a double quoted scalar?
|
206
|
+
when ch == ?": fetch_double
|
207
|
+
# It must be a plain scalar then.
|
208
|
+
when check_plain: fetch_plain
|
209
|
+
else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Simple keys treatment.
|
214
|
+
|
215
|
+
def next_possible_simple_key
|
216
|
+
# Return the number of the nearest possible simple key. Actually we
|
217
|
+
# don't need to loop through the whole dictionary.
|
218
|
+
min_token_number = nil
|
219
|
+
for level in @possible_simple_keys.keys
|
220
|
+
key = @possible_simple_keys[level]
|
221
|
+
if min_token_number.nil? || key.token_number < min_token_number
|
222
|
+
min_token_number = key.token_number
|
223
|
+
end
|
224
|
+
end
|
225
|
+
min_token_number
|
226
|
+
end
|
227
|
+
|
228
|
+
def stale_possible_simple_keys
|
229
|
+
# Remove entries that are no longer possible simple keys. According to
|
230
|
+
# the YAML specification, simple keys
|
231
|
+
# - should be limited to a single line,
|
232
|
+
# - should be no longer than 1024 characters.
|
233
|
+
# Disabling this procedure will allow simple keys of any length and
|
234
|
+
# height (may cause problems if indentation is broken though).
|
235
|
+
@possible_simple_keys.delete_if {|level,key|
|
236
|
+
if key.line != @line || @index-key.index > 1024
|
237
|
+
raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
|
238
|
+
return true
|
239
|
+
end
|
240
|
+
return false
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
def save_possible_simple_key
|
245
|
+
# The next token may start a simple key. We check if it's possible
|
246
|
+
# and save its position. This function is called for
|
247
|
+
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
|
248
|
+
|
249
|
+
# Check if a simple key is required at the current position.
|
250
|
+
required = @flow_level==0 && @indent == @column
|
251
|
+
|
252
|
+
# The next token might be a simple key. Let's save it's number and
|
253
|
+
# position.
|
254
|
+
if @allow_simple_key
|
255
|
+
remove_possible_simple_key
|
256
|
+
token_number = @tokens_taken+@tokens.length
|
257
|
+
key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
|
258
|
+
@possible_simple_keys[@flow_level] = key
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def remove_possible_simple_key
|
263
|
+
# Remove the saved possible key position at the current flow level.
|
264
|
+
key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
|
265
|
+
end
|
266
|
+
|
267
|
+
# Indentation functions.
|
268
|
+
|
269
|
+
def unwind_indent(column)
|
270
|
+
## In flow context, tokens should respect indentation.
|
271
|
+
## Actually the condition should be `@indent >= column` according to
|
272
|
+
## the spec. But this condition will prohibit intuitively correct
|
273
|
+
## constructions such as
|
274
|
+
## key : {
|
275
|
+
## }
|
276
|
+
#if @flow_level and @indent > column
|
277
|
+
# raise ScannerError(nil, nil,
|
278
|
+
# "invalid intendation or unclosed '[' or '{'",
|
279
|
+
# get_mark)
|
280
|
+
|
281
|
+
# In the flow context, indentation is ignored. We make the scanner less
|
282
|
+
# restrictive then specification requires.
|
283
|
+
return nil if @flow_level != 0
|
284
|
+
# In block context, we may need to issue the BLOCK-END tokens.
|
285
|
+
while @indent > column
|
286
|
+
mark = get_mark
|
287
|
+
@indent = @indents.pop()
|
288
|
+
@tokens << BlockEndToken.new(mark, mark)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def add_indent(column)
|
293
|
+
# Check if we need to increase indentation.
|
294
|
+
if @indent < column
|
295
|
+
@indents << @indent
|
296
|
+
@indent = column
|
297
|
+
return true
|
298
|
+
end
|
299
|
+
return false
|
300
|
+
end
|
301
|
+
|
302
|
+
# Fetchers.
|
303
|
+
|
304
|
+
def fetch_stream_start
|
305
|
+
# We always add STREAM-START as the first token and STREAM-END as the
|
306
|
+
# last token.
|
307
|
+
# Read the token.
|
308
|
+
mark = get_mark
|
309
|
+
# Add STREAM-START.
|
310
|
+
@tokens << StreamStartToken.new(mark, mark, @encoding)
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def fetch_stream_end
|
315
|
+
# Set the current intendation to -1.
|
316
|
+
unwind_indent(-1)
|
317
|
+
# Reset everything (not really needed).
|
318
|
+
@allow_simple_key = false
|
319
|
+
@possible_simple_keys = {}
|
320
|
+
# Read the token.
|
321
|
+
mark = get_mark
|
322
|
+
# Add STREAM-END.
|
323
|
+
@tokens << StreamEndToken.new(mark, mark)
|
324
|
+
# The stream is finished.
|
325
|
+
@done = true
|
326
|
+
end
|
327
|
+
|
328
|
+
def fetch_directive
|
329
|
+
# Set the current intendation to -1.
|
330
|
+
unwind_indent(-1)
|
331
|
+
# Reset simple keys.
|
332
|
+
remove_possible_simple_key
|
333
|
+
@allow_simple_key = false
|
334
|
+
# Scan and add DIRECTIVE.
|
335
|
+
@tokens << scan_directive
|
336
|
+
end
|
337
|
+
|
338
|
+
def fetch_document_start
|
339
|
+
fetch_document_indicator(DocumentStartToken)
|
340
|
+
end
|
341
|
+
|
342
|
+
def fetch_document_end
|
343
|
+
fetch_document_indicator(DocumentEndToken)
|
344
|
+
end
|
345
|
+
|
346
|
+
def fetch_document_indicator(token)
|
347
|
+
# Set the current intendation to -1.
|
348
|
+
unwind_indent(-1)
|
349
|
+
# Reset simple keys. Note that there could not be a block collection
|
350
|
+
# after '---'.
|
351
|
+
remove_possible_simple_key
|
352
|
+
@allow_simple_key = false
|
353
|
+
# Add DOCUMENT-START or DOCUMENT-END.
|
354
|
+
start_mark = get_mark
|
355
|
+
forward(3)
|
356
|
+
end_mark = get_mark
|
357
|
+
@tokens << token.new(start_mark, end_mark)
|
358
|
+
end
|
359
|
+
|
360
|
+
def fetch_flow_sequence_start
|
361
|
+
fetch_flow_collection_start(FlowSequenceStartToken)
|
362
|
+
end
|
363
|
+
|
364
|
+
def fetch_flow_mapping_start
|
365
|
+
fetch_flow_collection_start(FlowMappingStartToken)
|
366
|
+
end
|
367
|
+
|
368
|
+
def fetch_flow_collection_start(token)
|
369
|
+
# '[' and '{' may start a simple key.
|
370
|
+
save_possible_simple_key
|
371
|
+
# Increase the flow level.
|
372
|
+
@flow_level += 1
|
373
|
+
# Simple keys are allowed after '[' and '{'.
|
374
|
+
@allow_simple_key = true
|
375
|
+
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
|
376
|
+
start_mark = get_mark
|
377
|
+
forward
|
378
|
+
end_mark = get_mark
|
379
|
+
@tokens << token.new(start_mark, end_mark)
|
380
|
+
end
|
381
|
+
|
382
|
+
def fetch_flow_sequence_end
|
383
|
+
fetch_flow_collection_end(FlowSequenceEndToken)
|
384
|
+
end
|
385
|
+
|
386
|
+
def fetch_flow_mapping_end
|
387
|
+
fetch_flow_collection_end(FlowMappingEndToken)
|
388
|
+
end
|
389
|
+
|
390
|
+
def fetch_flow_collection_end(token)
|
391
|
+
# Reset possible simple key on the current level.
|
392
|
+
remove_possible_simple_key
|
393
|
+
# Decrease the flow level.
|
394
|
+
@flow_level -= 1
|
395
|
+
# No simple keys after ']' or '}'.
|
396
|
+
@allow_simple_key = false
|
397
|
+
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
|
398
|
+
start_mark = get_mark
|
399
|
+
forward
|
400
|
+
end_mark = get_mark
|
401
|
+
@tokens << token.new(start_mark, end_mark)
|
402
|
+
end
|
403
|
+
|
404
|
+
def fetch_flow_entry
|
405
|
+
# Simple keys are allowed after ','.
|
406
|
+
@allow_simple_key = true
|
407
|
+
# Reset possible simple key on the current level.
|
408
|
+
remove_possible_simple_key
|
409
|
+
# Add FLOW-ENTRY.
|
410
|
+
start_mark = get_mark
|
411
|
+
forward
|
412
|
+
end_mark = get_mark
|
413
|
+
@tokens << FlowEntryToken.new(start_mark, end_mark)
|
414
|
+
end
|
415
|
+
|
416
|
+
def fetch_block_entry
|
417
|
+
# Block context needs additional checks.
|
418
|
+
if @flow_level==0
|
419
|
+
raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
|
420
|
+
# We may need to add BLOCK-SEQUENCE-START.
|
421
|
+
if add_indent(@column)
|
422
|
+
mark = get_mark
|
423
|
+
@tokens << BlockSequenceStartToken.new(mark, mark)
|
424
|
+
end
|
425
|
+
# It's an error for the block entry to occur in the flow context,
|
426
|
+
# but we let the parser detect this.
|
427
|
+
end
|
428
|
+
# Simple keys are allowed after '-'.
|
429
|
+
@allow_simple_key = true
|
430
|
+
# Reset possible simple key on the current level.
|
431
|
+
remove_possible_simple_key
|
432
|
+
# Add BLOCK-ENTRY.
|
433
|
+
start_mark = get_mark
|
434
|
+
forward
|
435
|
+
end_mark = get_mark
|
436
|
+
@tokens << BlockEntryToken.new(start_mark, end_mark)
|
437
|
+
end
|
438
|
+
|
439
|
+
def fetch_key
|
440
|
+
# Block context needs additional checks.
|
441
|
+
if @flow_level==0
|
442
|
+
# Are we allowed to start a key (not nessesary a simple)?
|
443
|
+
raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
|
444
|
+
# We may need to add BLOCK-MAPPING-START.
|
445
|
+
if add_indent(@column)
|
446
|
+
mark = get_mark
|
447
|
+
@tokens << BlockMappingStartToken.new(mark, mark)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
# Simple keys are allowed after '?' in the block context.
|
451
|
+
@allow_simple_key = @flow_level==0
|
452
|
+
# Reset possible simple key on the current level.
|
453
|
+
remove_possible_simple_key
|
454
|
+
# Add KEY.
|
455
|
+
start_mark = get_mark
|
456
|
+
forward
|
457
|
+
end_mark = get_mark
|
458
|
+
@tokens << KeyToken.new(start_mark, end_mark)
|
459
|
+
end
|
460
|
+
|
461
|
+
def fetch_value
|
462
|
+
# Do we determine a simple key?
|
463
|
+
if @possible_simple_keys.include?(@flow_level)
|
464
|
+
# Add KEY.
|
465
|
+
key = @possible_simple_keys[@flow_level]
|
466
|
+
@possible_simple_keys.delete(@flow_level)
|
467
|
+
@tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
|
468
|
+
# If this key starts a new block mapping, we need to add
|
469
|
+
# BLOCK-MAPPING-START.
|
470
|
+
@tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
|
471
|
+
# There cannot be two simple keys one after another.
|
472
|
+
@allow_simple_key = false
|
473
|
+
# It must be a part of a complex key.
|
474
|
+
else
|
475
|
+
# Block context needs additional checks.
|
476
|
+
# (Do we really need them? They will be catched by the parser
|
477
|
+
# anyway.)
|
478
|
+
if @flow_level==0
|
479
|
+
# We are allowed to start a complex value if and only if
|
480
|
+
# we can start a simple key.
|
481
|
+
raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
|
482
|
+
# Simple keys are allowed after ':' in the block context.
|
483
|
+
@allow_simple_key = @flow_level==0
|
484
|
+
# Reset possible simple key on the current level.
|
485
|
+
remove_possible_simple_key
|
486
|
+
end
|
487
|
+
end
|
488
|
+
# Add VALUE.
|
489
|
+
start_mark = get_mark
|
490
|
+
forward
|
491
|
+
end_mark = get_mark
|
492
|
+
@tokens << ValueToken.new(start_mark, end_mark)
|
493
|
+
end
|
494
|
+
|
495
|
+
def fetch_alias
|
496
|
+
# ALIAS could be a simple key.
|
497
|
+
save_possible_simple_key
|
498
|
+
# No simple keys after ALIAS.
|
499
|
+
@allow_simple_key = false
|
500
|
+
# Scan and add ALIAS.
|
501
|
+
@tokens << scan_anchor(AliasToken)
|
502
|
+
end
|
503
|
+
|
504
|
+
def fetch_anchor
|
505
|
+
# ANCHOR could start a simple key.
|
506
|
+
save_possible_simple_key
|
507
|
+
# No simple keys after ANCHOR.
|
508
|
+
@allow_simple_key = false
|
509
|
+
# Scan and add ANCHOR.
|
510
|
+
@tokens << scan_anchor(AnchorToken)
|
511
|
+
end
|
512
|
+
|
513
|
+
def fetch_tag
|
514
|
+
# TAG could start a simple key.
|
515
|
+
save_possible_simple_key
|
516
|
+
# No simple keys after TAG.
|
517
|
+
@allow_simple_key = false
|
518
|
+
# Scan and add TAG.
|
519
|
+
@tokens << scan_tag
|
520
|
+
end
|
521
|
+
|
522
|
+
def fetch_literal
|
523
|
+
fetch_block_scalar(?|)
|
524
|
+
end
|
525
|
+
|
526
|
+
def fetch_folded
|
527
|
+
fetch_block_scalar(?>)
|
528
|
+
end
|
529
|
+
|
530
|
+
def fetch_block_scalar(style)
|
531
|
+
# A simple key may follow a block scalar.
|
532
|
+
@allow_simple_key = true
|
533
|
+
# Reset possible simple key on the current level.
|
534
|
+
remove_possible_simple_key
|
535
|
+
# Scan and add SCALAR.
|
536
|
+
@tokens << scan_block_scalar(style)
|
537
|
+
end
|
538
|
+
|
539
|
+
def fetch_single
|
540
|
+
fetch_flow_scalar(?')
|
541
|
+
end
|
542
|
+
|
543
|
+
def fetch_double
|
544
|
+
fetch_flow_scalar(?")
|
545
|
+
end
|
546
|
+
|
547
|
+
def fetch_flow_scalar(style)
|
548
|
+
# A flow scalar could be a simple key.
|
549
|
+
save_possible_simple_key
|
550
|
+
# No simple keys after flow scalars.
|
551
|
+
@allow_simple_key = false
|
552
|
+
# Scan and add SCALAR.
|
553
|
+
@tokens << scan_flow_scalar(style)
|
554
|
+
end
|
555
|
+
|
556
|
+
def fetch_plain
|
557
|
+
# A plain scalar could be a simple key.
|
558
|
+
save_possible_simple_key
|
559
|
+
# No simple keys after plain scalars. But note that `scan_plain` will
|
560
|
+
# change this flag if the scan is finished at the beginning of the
|
561
|
+
# line.
|
562
|
+
@allow_simple_key = false
|
563
|
+
# Scan and add SCALAR. May change `allow_simple_key`.
|
564
|
+
@tokens << scan_plain
|
565
|
+
end
|
566
|
+
|
567
|
+
# Checkers.
|
568
|
+
|
569
|
+
def check_directive
|
570
|
+
# DIRECTIVE: ^ '%' ...
|
571
|
+
# The '%' indicator is already checked.
|
572
|
+
@column == 0
|
573
|
+
end
|
574
|
+
|
575
|
+
def check_document_start
|
576
|
+
# DOCUMENT-START: ^ '---' (' '|'\n')
|
577
|
+
@column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
|
578
|
+
end
|
579
|
+
|
580
|
+
def check_document_end
|
581
|
+
# DOCUMENT-END: ^ '...' (' '|'\n')
|
582
|
+
@column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
|
583
|
+
end
|
584
|
+
|
585
|
+
def check_block_entry
|
586
|
+
# BLOCK-ENTRY: '-' (' '|'\n')
|
587
|
+
"\0 \t\r\n\x85".include?(peek(1))
|
588
|
+
end
|
589
|
+
|
590
|
+
def check_key
|
591
|
+
# KEY(flow context): '?'
|
592
|
+
# KEY(block context): '?' (' '|'\n')
|
593
|
+
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
594
|
+
end
|
595
|
+
|
596
|
+
def check_value
|
597
|
+
# VALUE(flow context): ':'
|
598
|
+
# VALUE(block context): ':' (' '|'\n')
|
599
|
+
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
600
|
+
end
|
601
|
+
|
602
|
+
def check_plain
|
603
|
+
# A plain scalar may start with any non-space character except:
|
604
|
+
# '-', '?', ':', ',', '[', ']', '{', '}',
|
605
|
+
# '#', '&', '*', '!', '|', '>', '\'', '\"',
|
606
|
+
# '%', '@', '`'.
|
607
|
+
#
|
608
|
+
# It may also start with
|
609
|
+
# '-', '?', ':'
|
610
|
+
# if it is followed by a non-space character.
|
611
|
+
#
|
612
|
+
# Note that we limit the last rule to the block context (except the
|
613
|
+
# '-' character) because we want the flow context to be space
|
614
|
+
# independent.
|
615
|
+
ch = peek
|
616
|
+
!("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
|
617
|
+
end
|
618
|
+
|
619
|
+
|
620
|
+
|
621
|
+
|
622
|
+
|
623
|
+
|
624
|
+
# Scanners.
|
625
|
+
|
626
|
+
def scan_to_next_token
|
627
|
+
# We ignore spaces, line breaks and comments.
|
628
|
+
# If we find a line break in the block context, we set the flag
|
629
|
+
# `allow_simple_key` on.
|
630
|
+
#
|
631
|
+
# TODO: We need to make tab handling rules more sane. A good rule is
|
632
|
+
# Tabs cannot precede tokens
|
633
|
+
# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
|
634
|
+
# KEY(block), VALUE(block), BLOCK-ENTRY
|
635
|
+
# So the checking code is
|
636
|
+
# if <TAB>:
|
637
|
+
# @allow_simple_keys = false
|
638
|
+
# We also need to add the check for `allow_simple_keys == true` to
|
639
|
+
# `unwind_indent` before issuing BLOCK-END.
|
640
|
+
# Scanners for block, flow, and plain scalars need to be modified.
|
641
|
+
found = false
|
642
|
+
while !found
|
643
|
+
while peek == 32
|
644
|
+
forward
|
645
|
+
end
|
646
|
+
if peek == ?#
|
647
|
+
forward while !"\0\r\n\x85".include?(peek)
|
648
|
+
end
|
649
|
+
if !scan_line_break.empty?
|
650
|
+
@allow_simple_key = true if @flow_level==0
|
651
|
+
else
|
652
|
+
found = true
|
653
|
+
end
|
654
|
+
end
|
655
|
+
end
|
656
|
+
|
657
|
+
def scan_directive
|
658
|
+
# See the specification for details.
|
659
|
+
start_mark = get_mark
|
660
|
+
forward
|
661
|
+
name = scan_directive_name(start_mark)
|
662
|
+
value = nil
|
663
|
+
if name == "YAML"
|
664
|
+
value = scan_yaml_directive_value(start_mark)
|
665
|
+
end_mark = get_mark
|
666
|
+
elsif name == "TAG"
|
667
|
+
value = scan_tag_directive_value(start_mark)
|
668
|
+
end_mark = get_mark
|
669
|
+
else
|
670
|
+
end_mark = get_mark
|
671
|
+
forward while !"\0\r\n\x85".include?(peek)
|
672
|
+
end
|
673
|
+
scan_directive_ignored_line(start_mark)
|
674
|
+
DirectiveToken.new(name, value, start_mark, end_mark)
|
675
|
+
end
|
676
|
+
|
677
|
+
def scan_directive_name(start_mark)
|
678
|
+
# See the specification for details.
|
679
|
+
length = 0
|
680
|
+
ch = peek(length)
|
681
|
+
while /[-0-9A-Za-z_]/ =~ ch.chr
|
682
|
+
length += 1
|
683
|
+
ch = peek(length)
|
684
|
+
end
|
685
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
|
686
|
+
value = prefix(length)
|
687
|
+
forward(length)
|
688
|
+
ch = peek()
|
689
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
690
|
+
value
|
691
|
+
end
|
692
|
+
|
693
|
+
def scan_yaml_directive_value(start_mark)
|
694
|
+
# See the specification for details.
|
695
|
+
forward while peek == 32
|
696
|
+
major = scan_yaml_directive_number(start_mark)
|
697
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
|
698
|
+
forward
|
699
|
+
minor = scan_yaml_directive_number(start_mark)
|
700
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
|
701
|
+
[major, minor]
|
702
|
+
end
|
703
|
+
|
704
|
+
def scan_yaml_directive_number(start_mark)
|
705
|
+
# See the specification for details.
|
706
|
+
ch = peek
|
707
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
|
708
|
+
length = 0
|
709
|
+
length += 1 while ((?0..?9) === peek(length))
|
710
|
+
value = prefix(length)
|
711
|
+
forward(length)
|
712
|
+
value
|
713
|
+
end
|
714
|
+
|
715
|
+
def scan_tag_directive_value(start_mark)
|
716
|
+
# See the specification for details.
|
717
|
+
forward while peek == 32
|
718
|
+
handle = scan_tag_directive_handle(start_mark)
|
719
|
+
forward while peek == 32
|
720
|
+
prefix = scan_tag_directive_prefix(start_mark)
|
721
|
+
[handle, prefix]
|
722
|
+
end
|
723
|
+
|
724
|
+
def scan_tag_directive_handle(start_mark)
|
725
|
+
# See the specification for details.
|
726
|
+
value = scan_tag_handle("directive", start_mark)
|
727
|
+
ch = peek
|
728
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
|
729
|
+
value
|
730
|
+
end
|
731
|
+
|
732
|
+
def scan_tag_directive_prefix(start_mark)
|
733
|
+
# See the specification for details.
|
734
|
+
value = scan_tag_uri("directive", start_mark)
|
735
|
+
ch = peek
|
736
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
|
737
|
+
value
|
738
|
+
end
|
739
|
+
|
740
|
+
def scan_directive_ignored_line(start_mark)
|
741
|
+
# See the specification for details.
|
742
|
+
forward while peek == 32
|
743
|
+
if peek == ?#
|
744
|
+
forward while !"\0\r\n\x85".include?(peek)
|
745
|
+
end
|
746
|
+
ch = peek
|
747
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
|
748
|
+
scan_line_break
|
749
|
+
end
|
750
|
+
|
751
|
+
def scan_anchor(token)
|
752
|
+
# The specification does not restrict characters for anchors and
|
753
|
+
# aliases. This may lead to problems, for instance, the document:
|
754
|
+
# [ *alias, value ]
|
755
|
+
# can be interpteted in two ways, as
|
756
|
+
# [ "value" ]
|
757
|
+
# and
|
758
|
+
# [ *alias , "value" ]
|
759
|
+
# Therefore we restrict aliases to numbers and ASCII letters.
|
760
|
+
start_mark = get_mark
|
761
|
+
indicator = peek
|
762
|
+
name = (indicator == ?*) ? "alias":"anchor"
|
763
|
+
forward
|
764
|
+
length = 0
|
765
|
+
ch = peek(length)
|
766
|
+
while /[-0-9A-Za-z_]/ =~ ch.chr
|
767
|
+
length += 1
|
768
|
+
ch = peek(length)
|
769
|
+
end
|
770
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
|
771
|
+
value = prefix(length)
|
772
|
+
forward(length)
|
773
|
+
ch = peek
|
774
|
+
if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
|
775
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
|
776
|
+
end
|
777
|
+
end_mark = get_mark
|
778
|
+
token.new(value, start_mark, end_mark)
|
779
|
+
end
|
780
|
+
|
781
|
+
|
782
|
+
def scan_tag
|
783
|
+
# See the specification for details.
|
784
|
+
start_mark = get_mark
|
785
|
+
ch = peek(1)
|
786
|
+
if ch == ?<
|
787
|
+
handle = nil
|
788
|
+
forward(2)
|
789
|
+
suffix = scan_tag_uri("tag", start_mark)
|
790
|
+
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
|
791
|
+
forward
|
792
|
+
elsif "\0 \t\r\n\x85".include?(ch)
|
793
|
+
handle = nil
|
794
|
+
suffix = "!"
|
795
|
+
forward
|
796
|
+
else
|
797
|
+
length = 1
|
798
|
+
use_handle = false
|
799
|
+
while !"\0 \t\r\n\x85".include?(ch)
|
800
|
+
if ch == ?!
|
801
|
+
use_handle = true
|
802
|
+
break
|
803
|
+
end
|
804
|
+
length += 1
|
805
|
+
ch = peek(length)
|
806
|
+
end
|
807
|
+
handle = "!"
|
808
|
+
if use_handle
|
809
|
+
handle = scan_tag_handle("tag", start_mark)
|
810
|
+
else
|
811
|
+
handle = "!"
|
812
|
+
forward
|
813
|
+
end
|
814
|
+
suffix = scan_tag_uri("tag", start_mark)
|
815
|
+
end
|
816
|
+
ch = peek
|
817
|
+
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
818
|
+
value = [handle, suffix]
|
819
|
+
end_mark = get_mark
|
820
|
+
TagToken.new(value, start_mark, end_mark)
|
821
|
+
end
|
822
|
+
|
823
|
+
def scan_block_scalar(style)
|
824
|
+
# See the specification for details.
|
825
|
+
folded = style== ?>
|
826
|
+
chunks = []
|
827
|
+
start_mark = get_mark
|
828
|
+
# Scan the header.
|
829
|
+
forward
|
830
|
+
chomping, increment = scan_block_scalar_indicators(start_mark)
|
831
|
+
scan_block_scalar_ignored_line(start_mark)
|
832
|
+
# Determine the indentation level and go to the first non-empty line.
|
833
|
+
min_indent = @indent+1
|
834
|
+
min_indent = 1 if min_indent < 1
|
835
|
+
if increment.nil?
|
836
|
+
breaks, max_indent, end_mark = scan_block_scalar_indentation
|
837
|
+
indent = [min_indent, max_indent].max
|
838
|
+
else
|
839
|
+
indent = min_indent+increment-1
|
840
|
+
breaks, end_mark = scan_block_scalar_breaks(indent)
|
841
|
+
end
|
842
|
+
line_break = ''
|
843
|
+
# Scan the inner part of the block scalar.
|
844
|
+
while @column == indent and peek != ?\0
|
845
|
+
chunks += breaks
|
846
|
+
leading_non_space = !" \t".include?(peek)
|
847
|
+
length = 0
|
848
|
+
length += 1 while !"\0\r\n\x85".include?(peek(length))
|
849
|
+
chunks << prefix(length)
|
850
|
+
forward(length)
|
851
|
+
line_break = scan_line_break
|
852
|
+
breaks, end_mark = scan_block_scalar_breaks(indent)
|
853
|
+
if @column == indent && peek != 0
|
854
|
+
# Unfortunately, folding rules are ambiguous.
|
855
|
+
#
|
856
|
+
# This is the folding according to the specification:
|
857
|
+
if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
|
858
|
+
chunks << ' ' if breaks.empty?
|
859
|
+
else
|
860
|
+
chunks << line_break
|
861
|
+
end
|
862
|
+
# This is Clark Evans's interpretation (also in the spec
|
863
|
+
# examples):
|
864
|
+
#
|
865
|
+
#if folded and line_break == u'\n':
|
866
|
+
# if not breaks:
|
867
|
+
# if self.peek() not in ' \t':
|
868
|
+
# chunks.append(u' ')
|
869
|
+
# else:
|
870
|
+
# chunks.append(line_break)
|
871
|
+
#else:
|
872
|
+
# chunks.append(line_break)
|
873
|
+
else
|
874
|
+
break
|
875
|
+
end
|
876
|
+
end
|
877
|
+
|
878
|
+
# Chomp the tail.
|
879
|
+
if chomping
|
880
|
+
chunks << line_break
|
881
|
+
chunks += breaks
|
882
|
+
end
|
883
|
+
|
884
|
+
# We are done.
|
885
|
+
ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
|
886
|
+
end
|
887
|
+
|
888
|
+
def scan_block_scalar_indicators(start_mark)
|
889
|
+
# See the specification for details.
|
890
|
+
chomping = nil
|
891
|
+
increment = nil
|
892
|
+
ch = peek
|
893
|
+
if /[+-]/ =~ ch.chr
|
894
|
+
chomping = ch == ?+
|
895
|
+
forward
|
896
|
+
ch = peek
|
897
|
+
if (?0..?9) === ch
|
898
|
+
increment = ch.to_i
|
899
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
900
|
+
forward
|
901
|
+
end
|
902
|
+
elsif (?0..?9) === ch
|
903
|
+
increment = ch
|
904
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
905
|
+
forward
|
906
|
+
ch = peek
|
907
|
+
if /[+-]/ =~ ch.chr
|
908
|
+
chomping = ch == ?+
|
909
|
+
forward
|
910
|
+
end
|
911
|
+
end
|
912
|
+
ch = peek
|
913
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
914
|
+
[chomping, increment]
|
915
|
+
end
|
916
|
+
|
917
|
+
def scan_block_scalar_ignored_line(start_mark)
|
918
|
+
# See the specification for details.
|
919
|
+
forward while peek == 32
|
920
|
+
if peek == ?#
|
921
|
+
forward while !"\0\r\n\x85".include?(peek)
|
922
|
+
end
|
923
|
+
ch = peek
|
924
|
+
|
925
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
|
926
|
+
scan_line_break
|
927
|
+
end
|
928
|
+
|
929
|
+
def scan_block_scalar_indentation
|
930
|
+
# See the specification for details.
|
931
|
+
chunks = []
|
932
|
+
max_indent = 0
|
933
|
+
end_mark = get_mark
|
934
|
+
while " \r\n\x85".include?(peek)
|
935
|
+
if peek != 32
|
936
|
+
chunks << scan_line_break
|
937
|
+
end_mark = get_mark
|
938
|
+
else
|
939
|
+
forward
|
940
|
+
max_indent = @column if @column > max_indent
|
941
|
+
end
|
942
|
+
end
|
943
|
+
[chunks, max_indent, end_mark]
|
944
|
+
end
|
945
|
+
|
946
|
+
def scan_block_scalar_breaks(indent)
|
947
|
+
# See the specification for details.
|
948
|
+
chunks = []
|
949
|
+
end_mark = get_mark
|
950
|
+
forward while @column < indent && peek == 32
|
951
|
+
while "\r\n\x85".include?(peek)
|
952
|
+
chunks << scan_line_break
|
953
|
+
end_mark = get_mark
|
954
|
+
forward while @column < indent && peek == 32
|
955
|
+
end
|
956
|
+
[chunks, end_mark]
|
957
|
+
end
|
958
|
+
|
959
|
+
def scan_flow_scalar(style)
|
960
|
+
# See the specification for details.
|
961
|
+
# Note that we loose indentation rules for quoted scalars. Quoted
|
962
|
+
# scalars don't need to adhere indentation because " and ' clearly
|
963
|
+
# mark the beginning and the end of them. Therefore we are less
|
964
|
+
# restrictive then the specification requires. We only need to check
|
965
|
+
# that document separators are not included in scalars.
|
966
|
+
double = style == ?"
|
967
|
+
chunks = []
|
968
|
+
start_mark = get_mark
|
969
|
+
quote = peek
|
970
|
+
forward
|
971
|
+
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
972
|
+
while peek != quote
|
973
|
+
chunks += scan_flow_scalar_spaces(double, start_mark)
|
974
|
+
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
975
|
+
end
|
976
|
+
forward
|
977
|
+
end_mark = get_mark
|
978
|
+
ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
|
979
|
+
end
|
980
|
+
|
981
|
+
ESCAPE_REPLACEMENTS = {
|
982
|
+
"0" => "\0",
|
983
|
+
"a" => "\x07",
|
984
|
+
"b" => "\x08",
|
985
|
+
"t" => "\x09",
|
986
|
+
"\t" => "\x09",
|
987
|
+
"n" => "\x0A",
|
988
|
+
"v" => "\x0B",
|
989
|
+
"f" => "\x0C",
|
990
|
+
"r" => "\x0D",
|
991
|
+
"e" => "\x1B",
|
992
|
+
" " => "\x20",
|
993
|
+
'"' => '"',
|
994
|
+
"\\" => "\\",
|
995
|
+
"N" => "\x85",
|
996
|
+
"_" => "\xA0"
|
997
|
+
}
|
998
|
+
|
999
|
+
ESCAPE_CODES = {
|
1000
|
+
'x' => 2
|
1001
|
+
}
|
1002
|
+
|
1003
|
+
def scan_flow_scalar_non_spaces(double, start_mark)
|
1004
|
+
# See the specification for details.
|
1005
|
+
chunks = []
|
1006
|
+
while true
|
1007
|
+
length = 0
|
1008
|
+
length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
|
1009
|
+
if length!=0
|
1010
|
+
chunks << prefix(length)
|
1011
|
+
forward(length)
|
1012
|
+
end
|
1013
|
+
ch = peek
|
1014
|
+
if !double && ch == ?' && peek(1) == ?'
|
1015
|
+
chunks << ?'
|
1016
|
+
forward(2)
|
1017
|
+
elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
|
1018
|
+
chunks << ch
|
1019
|
+
forward
|
1020
|
+
elsif double && ch == ?\\
|
1021
|
+
forward
|
1022
|
+
ch = peek
|
1023
|
+
if ESCAPE_REPLACEMENTS.member?(ch.chr)
|
1024
|
+
chunks << ESCAPE_REPLACEMENTS[ch.chr]
|
1025
|
+
forward
|
1026
|
+
elsif ESCAPE_CODES.member?(ch.chr)
|
1027
|
+
length = ESCAPE_CODES[ch.chr]
|
1028
|
+
forward
|
1029
|
+
length.times do |k|
|
1030
|
+
if /[0-9A-Fa-f]/ !~ peek(k).chr
|
1031
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
|
1032
|
+
"expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
|
1033
|
+
end
|
1034
|
+
end
|
1035
|
+
code = prefix(length).to_i.to_s(16)
|
1036
|
+
chunks << code
|
1037
|
+
forward(length)
|
1038
|
+
elsif "\r\n\x85".include?(ch)
|
1039
|
+
scan_line_break
|
1040
|
+
chunks += scan_flow_scalar_breaks(double, start_mark)
|
1041
|
+
else
|
1042
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
|
1043
|
+
end
|
1044
|
+
else
|
1045
|
+
return chunks
|
1046
|
+
end
|
1047
|
+
end
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
def scan_flow_scalar_spaces(double, start_mark)
|
1051
|
+
# See the specification for details.
|
1052
|
+
chunks = []
|
1053
|
+
length = 0
|
1054
|
+
length += 1 while /[ \t]/ =~ peek(length).chr
|
1055
|
+
whitespaces = prefix(length)
|
1056
|
+
forward(length)
|
1057
|
+
ch = peek
|
1058
|
+
if ch == ?\0
|
1059
|
+
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
|
1060
|
+
elsif "\r\n\x85".include?(ch)
|
1061
|
+
line_break = scan_line_break
|
1062
|
+
breaks = scan_flow_scalar_breaks(double, start_mark)
|
1063
|
+
if line_break != ?\n
|
1064
|
+
chunks << line_break
|
1065
|
+
elsif breaks.empty?
|
1066
|
+
chunks << ' '
|
1067
|
+
end
|
1068
|
+
chunks += breaks
|
1069
|
+
else
|
1070
|
+
chunks << whitespaces
|
1071
|
+
end
|
1072
|
+
chunks
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
def scan_flow_scalar_breaks(double, start_mark)
|
1076
|
+
# See the specification for details.
|
1077
|
+
chunks = []
|
1078
|
+
while true
|
1079
|
+
# Instead of checking indentation, we check for document
|
1080
|
+
# separators.
|
1081
|
+
prefix = prefix(3)
|
1082
|
+
if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1083
|
+
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
|
1084
|
+
end
|
1085
|
+
forward while /[ \t]/ =~ peek.chr
|
1086
|
+
if "\r\n\x85".include?(peek)
|
1087
|
+
chunks << scan_line_break
|
1088
|
+
else
|
1089
|
+
return chunks
|
1090
|
+
end
|
1091
|
+
end
|
1092
|
+
end
|
1093
|
+
|
1094
|
+
def scan_plain
|
1095
|
+
# See the specification for details.
|
1096
|
+
# We add an additional restriction for the flow context:
|
1097
|
+
# plain scalars in the flow context cannot contain ',', ':' and '?'.
|
1098
|
+
# We also keep track of the `allow_simple_key` flag here.
|
1099
|
+
# Indentation rules are loosed for the flow context.
|
1100
|
+
chunks = []
|
1101
|
+
start_mark = get_mark
|
1102
|
+
end_mark = start_mark
|
1103
|
+
indent = @indent+1
|
1104
|
+
# We allow zero indentation for scalars, but then we need to check for
|
1105
|
+
# document separators at the beginning of the line.
|
1106
|
+
#if indent == 0
|
1107
|
+
# indent = 1
|
1108
|
+
spaces = []
|
1109
|
+
while true
|
1110
|
+
length = 0
|
1111
|
+
break if peek == ?#
|
1112
|
+
while true
|
1113
|
+
ch = peek(length)
|
1114
|
+
if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
|
1115
|
+
break
|
1116
|
+
end
|
1117
|
+
length += 1
|
1118
|
+
end
|
1119
|
+
if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
|
1120
|
+
forward(length)
|
1121
|
+
raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
|
1122
|
+
end
|
1123
|
+
break if length == 0
|
1124
|
+
@allow_simple_key = false
|
1125
|
+
chunks += spaces
|
1126
|
+
chunks << prefix(length)
|
1127
|
+
forward(length)
|
1128
|
+
end_mark = get_mark
|
1129
|
+
spaces = scan_plain_spaces(indent, start_mark)
|
1130
|
+
break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
|
1131
|
+
end
|
1132
|
+
return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
def scan_plain_spaces(indent, start_mark)
|
1136
|
+
# See the specification for details.
|
1137
|
+
# The specification is really confusing about tabs in plain scalars.
|
1138
|
+
# We just forbid them completely. Do not use tabs in YAML!
|
1139
|
+
chunks = []
|
1140
|
+
length = 0
|
1141
|
+
length += 1 while peek(length) == 32
|
1142
|
+
whitespaces = prefix(length)
|
1143
|
+
forward(length)
|
1144
|
+
ch = peek
|
1145
|
+
if "\r\n\x85".include?(ch)
|
1146
|
+
line_break = scan_line_break
|
1147
|
+
@allow_simple_key = true
|
1148
|
+
prefix = prefix(3)
|
1149
|
+
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1150
|
+
breaks = []
|
1151
|
+
while " \r\n\x85".include?(peek)
|
1152
|
+
if peek == 32
|
1153
|
+
forward
|
1154
|
+
else
|
1155
|
+
breaks << scan_line_break
|
1156
|
+
prefix = prefix(3)
|
1157
|
+
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1158
|
+
end
|
1159
|
+
end
|
1160
|
+
if line_break != '\n'
|
1161
|
+
chunks << line_break
|
1162
|
+
elsif breaks.empty?
|
1163
|
+
chunks << ' '
|
1164
|
+
end
|
1165
|
+
chunks += breaks
|
1166
|
+
elsif !whitespaces.empty?
|
1167
|
+
chunks << whitespaces
|
1168
|
+
end
|
1169
|
+
chunks
|
1170
|
+
end
|
1171
|
+
|
1172
|
+
def scan_tag_handle(name, start_mark)
|
1173
|
+
# See the specification for details.
|
1174
|
+
# For some strange reasons, the specification does not allow '_' in
|
1175
|
+
# tag handles. I have allowed it anyway.
|
1176
|
+
ch = peek
|
1177
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
|
1178
|
+
length = 1
|
1179
|
+
ch = peek(length)
|
1180
|
+
if ch != 32
|
1181
|
+
while /[-_0-9A-Za-z]/ =~ ch.chr
|
1182
|
+
length += 1
|
1183
|
+
ch = peek(length)
|
1184
|
+
end
|
1185
|
+
if ch != ?!
|
1186
|
+
forward(length)
|
1187
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
|
1188
|
+
end
|
1189
|
+
length += 1
|
1190
|
+
end
|
1191
|
+
value = prefix(length)
|
1192
|
+
forward(length)
|
1193
|
+
value
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
def scan_tag_uri(name, start_mark)
|
1197
|
+
# See the specification for details.
|
1198
|
+
# Note: we do not check if URI is well-formed.
|
1199
|
+
chunks = []
|
1200
|
+
length = 0
|
1201
|
+
ch = peek(length)
|
1202
|
+
while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
|
1203
|
+
if ch == ?%
|
1204
|
+
chunks << prefix(length)
|
1205
|
+
forward(length)
|
1206
|
+
length = 0
|
1207
|
+
chunks << scan_uri_escapes(name, start_mark)
|
1208
|
+
else
|
1209
|
+
length += 1
|
1210
|
+
end
|
1211
|
+
ch = peek(length)
|
1212
|
+
end
|
1213
|
+
if length!=0
|
1214
|
+
chunks << prefix(length)
|
1215
|
+
forward(length)
|
1216
|
+
length = 0
|
1217
|
+
end
|
1218
|
+
|
1219
|
+
raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
|
1220
|
+
chunks.join('')
|
1221
|
+
end
|
1222
|
+
|
1223
|
+
def scan_uri_escapes(name, start_mark)
|
1224
|
+
# See the specification for details.
|
1225
|
+
bytes = []
|
1226
|
+
mark = get_mark
|
1227
|
+
while peek == ?%
|
1228
|
+
forward
|
1229
|
+
2.times do |k|
|
1230
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
|
1231
|
+
get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
|
1232
|
+
end
|
1233
|
+
bytes << prefix(2).to_i.to_s(16)
|
1234
|
+
forward(2)
|
1235
|
+
end
|
1236
|
+
bytes.join('')
|
1237
|
+
end
|
1238
|
+
|
1239
|
+
def scan_line_break
|
1240
|
+
# Transforms:
|
1241
|
+
# '\r\n' : '\n'
|
1242
|
+
# '\r' : '\n'
|
1243
|
+
# '\n' : '\n'
|
1244
|
+
# '\x85' : '\n'
|
1245
|
+
# default : ''
|
1246
|
+
ch = peek
|
1247
|
+
if "\r\n\x85".include?(ch)
|
1248
|
+
if prefix(2) == "\r\n"
|
1249
|
+
forward(2)
|
1250
|
+
else
|
1251
|
+
forward
|
1252
|
+
end
|
1253
|
+
return "\n"
|
1254
|
+
end
|
1255
|
+
""
|
1256
|
+
end
|
1257
|
+
end
|
1258
|
+
end
|
1259
|
+
|