RbYAML 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbyaml.rb +14 -256
- data/lib/rbyaml.rb.~1.2.~ +383 -0
- data/lib/rbyaml/composer.rb +9 -11
- data/lib/rbyaml/{composer.rb.~1.2.~ → composer.rb.~1.3.~} +28 -25
- data/lib/rbyaml/constants.rb +95 -0
- data/lib/rbyaml/constructor.rb +180 -89
- data/lib/rbyaml/{constructor.rb.~1.2.~ → constructor.rb.~1.9.~} +137 -95
- data/lib/rbyaml/dumper.rb +12 -9
- data/lib/rbyaml/dumper.rb.~1.3.~ +36 -0
- data/lib/rbyaml/emitter.rb +14 -28
- data/lib/rbyaml/{emitter.rb.~1.2.~ → emitter.rb.~1.6.~} +22 -33
- data/lib/rbyaml/error.rb +4 -57
- data/lib/rbyaml/error.rb.~1.2.~ +75 -0
- data/lib/rbyaml/events.rb +8 -14
- data/lib/rbyaml/{events.rb.~1.2.~ → events.rb.~1.4.~} +29 -6
- data/lib/rbyaml/nodes.rb +5 -5
- data/lib/rbyaml/{nodes.rb.~1.2.~ → nodes.rb.~1.3.~} +13 -9
- data/lib/rbyaml/parser.rb +70 -108
- data/lib/rbyaml/parser.rb.~1.4.~ +632 -0
- data/lib/rbyaml/representer.rb +19 -157
- data/lib/rbyaml/representer.rb.old +317 -0
- data/lib/rbyaml/{representer.rb.~1.2.~ → representer.rb.~1.5.~} +60 -26
- data/lib/rbyaml/resolver.rb +6 -6
- data/lib/rbyaml/{resolver.rb.~1.1.~ → resolver.rb.~1.6.~} +20 -20
- data/lib/rbyaml/rubytypes.rb +391 -0
- data/lib/rbyaml/scanner.rb +123 -225
- data/lib/rbyaml/{scanner.rb.~1.2.~ → scanner.rb.~1.5.~} +466 -378
- data/lib/rbyaml/serializer.rb +9 -9
- data/lib/rbyaml/{serializer.rb.~1.2.~ → serializer.rb.~1.4.~} +19 -17
- data/lib/rbyaml/stream.rb +48 -0
- data/lib/rbyaml/tag.rb +72 -0
- data/lib/rbyaml/tokens.rb +22 -16
- data/lib/rbyaml/{tokens.rb.~1.2.~ → tokens.rb.~1.3.~} +44 -4
- data/lib/rbyaml/types.rb +146 -0
- data/lib/rbyaml/util.rb.~1.3.~ +38 -0
- data/lib/rbyaml/yaml.rb +22 -32
- data/lib/rbyaml/{yaml.rb.~1.2.~ → yaml.rb.~1.5.~} +17 -17
- data/test/load_one.rb +6 -0
- data/test/load_one_yaml.rb +6 -0
- data/test/output_events.rb +9 -0
- data/test/test_add_ctor.rb +51 -0
- data/test/test_add_ctor.rb.~1.1.~ +30 -0
- data/test/test_bm.rb +2 -2
- data/test/test_bm.rb.~1.1.~ +28 -0
- data/test/test_gems.rb +10 -0
- data/test/test_one.rb.~1.1.~ +5 -0
- data/test/test_one_syck.rb +5 -0
- data/test/test_rbyaml.rb +63 -32
- data/test/test_rbyaml.rb.~1.6.~ +59 -0
- data/test/{test_rbyaml.rb.~1.2.~ → test_rbyaml_old.rb} +13 -4
- data/test/test_time_events.rb +24 -0
- data/test/test_time_nodes.rb +24 -0
- data/test/test_time_tokens.rb +24 -0
- data/test/yaml/gems_new.yml +147456 -0
- data/test/yaml/test1.rb +8 -0
- data/test/yaml/test10.rb +14 -0
- data/test/yaml/test11.rb +13 -0
- data/test/yaml/test12.rb +9 -0
- data/test/yaml/test13.rb +9 -0
- data/test/yaml/test14.rb +13 -0
- data/test/yaml/test15.rb +12 -0
- data/test/yaml/test16.rb +11 -0
- data/test/yaml/test16.rb.~1.1.~ +11 -0
- data/test/yaml/test17.rb +10 -0
- data/test/yaml/test18.rb +13 -0
- data/test/yaml/test19.rb +9 -0
- data/test/yaml/test19.yml +1 -1
- data/test/yaml/test2.rb +8 -0
- data/test/yaml/test20.rb +11 -0
- data/test/yaml/test20.rb.~1.1.~ +9 -0
- data/test/yaml/test20.yml +1 -1
- data/test/yaml/test3.rb +13 -0
- data/test/yaml/test4.rb +13 -0
- data/test/yaml/test5.rb +8 -0
- data/test/yaml/test6.rb +10 -0
- data/test/yaml/test7.rb +15 -0
- data/test/yaml/test8.rb +15 -0
- data/test/yaml/test9.rb +13 -0
- metadata +61 -16
- data/lib/rbyaml/dumper.rb.~1.2.~ +0 -43
- data/lib/rbyaml/parser.rb.~1.2.~ +0 -494
@@ -23,42 +23,43 @@
|
|
23
23
|
# Read comments in the Scanner code for more details.
|
24
24
|
#
|
25
25
|
|
26
|
+
require 'rbyaml/util'
|
26
27
|
require 'rbyaml/error'
|
27
28
|
require 'rbyaml/tokens'
|
28
29
|
|
29
30
|
module RbYAML
|
30
31
|
class ScannerError < MarkedYAMLError
|
31
32
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
@
|
38
|
-
@
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
@
|
33
|
+
class ReaderError < YAMLError
|
34
|
+
def initialize(name, position, character, encoding, reason)
|
35
|
+
@name = name
|
36
|
+
@position = position
|
37
|
+
@character = character
|
38
|
+
@encoding = encoding
|
39
|
+
@reason = reason
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
if @character.__is_str
|
44
|
+
"'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
45
|
+
else
|
46
|
+
"unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
45
50
|
|
46
|
-
|
47
|
-
def initialize_scanner
|
48
|
-
# It is assumed that Scanner and Reader will mixin to the same point.
|
49
|
-
# Reader do the dirty work of checking for BOM. It also adds NUL to the end.
|
50
|
-
#
|
51
|
-
# Reader supports the following methods
|
52
|
-
# self.peek(i=0) # peek the next i-th character
|
53
|
-
# self.prefix(l=1) # peek the next l characters
|
54
|
-
# self.forward(l=1) # read the next l characters and move the pointer.
|
51
|
+
SimpleKey = Struct.new(:token_number, :required, :index, :line, :column, :mark)
|
55
52
|
|
53
|
+
class Scanner
|
54
|
+
attr_reader :column, :stream, :stream_pointer, :eof, :buffer, :pointer, :index, :line
|
55
|
+
def initialize(stream)
|
56
56
|
# Had we reached the end of the stream?
|
57
57
|
@done = false
|
58
58
|
|
59
59
|
# The number of unclosed '{' and '['. `flow_level == 0` means block
|
60
60
|
# context.
|
61
61
|
@flow_level = 0
|
62
|
+
@flow_zero = true
|
62
63
|
|
63
64
|
# List of processed tokens that are not yet emitted.
|
64
65
|
@tokens = []
|
@@ -104,6 +105,199 @@ module RbYAML
|
|
104
105
|
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
|
105
106
|
# '[', or '{' tokens.
|
106
107
|
@possible_simple_keys = {}
|
108
|
+
|
109
|
+
@stream = nil
|
110
|
+
@stream_pointer = 0
|
111
|
+
@eof = true
|
112
|
+
@buffer = ""
|
113
|
+
@buffer_length = 0
|
114
|
+
@pointer = 0
|
115
|
+
@pointer1 = 1
|
116
|
+
@column = 0
|
117
|
+
if stream.__is_str
|
118
|
+
@name = "<string>"
|
119
|
+
@raw_buffer = stream
|
120
|
+
else
|
121
|
+
@stream = stream
|
122
|
+
@name = stream.respond_to?(:path) ? stream.path : stream.inspect
|
123
|
+
@eof = false
|
124
|
+
@raw_buffer = ""
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def peek(index=0)
|
129
|
+
peekn(index)
|
130
|
+
end
|
131
|
+
|
132
|
+
def peek0
|
133
|
+
update(1) unless @pointer1 < @buffer_length
|
134
|
+
@buffer[@pointer]
|
135
|
+
end
|
136
|
+
|
137
|
+
def peek1
|
138
|
+
update(2) unless @pointer1+1 < @buffer_length
|
139
|
+
@buffer[@pointer1]
|
140
|
+
end
|
141
|
+
|
142
|
+
def peek2
|
143
|
+
update(3) unless @pointer1+2 < @buffer_length
|
144
|
+
@buffer[@pointer1+1]
|
145
|
+
end
|
146
|
+
|
147
|
+
def peek3
|
148
|
+
update(4) unless @pointer1+3 < @buffer_length
|
149
|
+
@buffer[@pointer1+2]
|
150
|
+
end
|
151
|
+
|
152
|
+
def peekn(index=0)
|
153
|
+
pix = @pointer1+index
|
154
|
+
unless pix < @buffer_length
|
155
|
+
update(index+1)
|
156
|
+
pix = @pointer1+index
|
157
|
+
end
|
158
|
+
@buffer[pix-1]
|
159
|
+
end
|
160
|
+
|
161
|
+
def prefix(length=1)
|
162
|
+
update(length) unless @pointer+length < @buffer_length
|
163
|
+
@buffer[@pointer...@pointer+length]
|
164
|
+
end
|
165
|
+
|
166
|
+
def prefix2()
|
167
|
+
update(2) unless @pointer1+1 < @buffer_length
|
168
|
+
@buffer[@pointer..@pointer1]
|
169
|
+
end
|
170
|
+
|
171
|
+
def forward(length=1)
|
172
|
+
case length
|
173
|
+
when 0: forward0
|
174
|
+
when 1: forward1
|
175
|
+
when 2: forward2
|
176
|
+
when 3: forward3
|
177
|
+
when 4: forward4
|
178
|
+
when 5: forward5
|
179
|
+
when 6: forward6
|
180
|
+
else forwardn(length)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def forward0
|
185
|
+
update(1) unless @pointer1 < @buffer_length
|
186
|
+
end
|
187
|
+
|
188
|
+
LINE_BR = "\n\x85"
|
189
|
+
|
190
|
+
def forward1
|
191
|
+
update(2) unless @pointer1+1 < @buffer_length
|
192
|
+
buff = @buffer[@pointer...@pointer1+1]
|
193
|
+
index = buff.rindex(LINE_BR_REG)
|
194
|
+
@column = index ? -index : column+1
|
195
|
+
@pointer += 1
|
196
|
+
@pointer1 += 1
|
197
|
+
end
|
198
|
+
|
199
|
+
def forward2
|
200
|
+
update(3) unless @pointer1+2 < @buffer_length
|
201
|
+
buff = @buffer[@pointer...@pointer1+2]
|
202
|
+
index = buff.rindex(LINE_BR_REG)
|
203
|
+
@column = index ? 1-index : column+2
|
204
|
+
@pointer += 2
|
205
|
+
@pointer1 += 2
|
206
|
+
end
|
207
|
+
|
208
|
+
def forward3
|
209
|
+
update(4) unless @pointer1+3 < @buffer_length
|
210
|
+
buff = @buffer[@pointer...@pointer1+3]
|
211
|
+
index = buff.rindex(LINE_BR_REG)
|
212
|
+
@column = index ? 2-index : column+3
|
213
|
+
@pointer += 3
|
214
|
+
@pointer1 += 3
|
215
|
+
end
|
216
|
+
|
217
|
+
def forward4
|
218
|
+
update(5) unless @pointer1+4 < @buffer_length
|
219
|
+
buff = @buffer[@pointer...@pointer1+4]
|
220
|
+
index = buff.rindex(LINE_BR_REG)
|
221
|
+
@column = index ? 3-index : column+4
|
222
|
+
@pointer += 4
|
223
|
+
@pointer1 += 4
|
224
|
+
end
|
225
|
+
|
226
|
+
def forward5
|
227
|
+
update(6) unless @pointer1+5 < @buffer_length
|
228
|
+
buff = @buffer[@pointer...@pointer1+5]
|
229
|
+
index = buff.rindex(LINE_BR_REG)
|
230
|
+
@column = index ? 4-index : column+5
|
231
|
+
@pointer += 5
|
232
|
+
@pointer1 += 5
|
233
|
+
end
|
234
|
+
|
235
|
+
def forward6
|
236
|
+
update(7) unless @pointer1+6 < @buffer_length
|
237
|
+
buff = @buffer[@pointer...@pointer1+6]
|
238
|
+
index = buff.rindex(LINE_BR_REG)
|
239
|
+
@column = index ? 5-index : column+6
|
240
|
+
@pointer += 6
|
241
|
+
@pointer1 += 6
|
242
|
+
end
|
243
|
+
|
244
|
+
LINE_BR_REG = /[\n\x85]|(?:\r[^\n])/
|
245
|
+
def forwardn(length)
|
246
|
+
update(length + 1) unless @pointer1+length < @buffer_length
|
247
|
+
buff = @buffer[@pointer...@pointer+length]
|
248
|
+
index = buff.rindex(LINE_BR_REG)
|
249
|
+
@column = index ? (length-index)-1 : column+length
|
250
|
+
@pointer += length
|
251
|
+
@pointer1 += length
|
252
|
+
end
|
253
|
+
|
254
|
+
def get_mark
|
255
|
+
if @stream.nil?
|
256
|
+
Mark.new(@name,@column,@buffer,@pointer)
|
257
|
+
else
|
258
|
+
Mark.new(@name,@column,nil,nil)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
|
263
|
+
def check_printable(data)
|
264
|
+
if NON_PRINTABLE =~ data
|
265
|
+
position = @buffer.length-@pointer+($~.offset(0)[0])
|
266
|
+
raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
|
271
|
+
def update(length)
|
272
|
+
return if @raw_buffer.nil?
|
273
|
+
@buffer = @buffer[@pointer..-1]
|
274
|
+
@pointer = 0
|
275
|
+
while @buffer.length < length
|
276
|
+
unless @eof
|
277
|
+
data = @stream.read(1024)
|
278
|
+
if data && !data.empty?
|
279
|
+
@buffer << data
|
280
|
+
@stream_pointer += data.length
|
281
|
+
@raw_buffer = ""
|
282
|
+
else
|
283
|
+
@eof = true
|
284
|
+
@buffer << ?\0
|
285
|
+
@raw_buffer = nil
|
286
|
+
break
|
287
|
+
end
|
288
|
+
else
|
289
|
+
@buffer << @raw_buffer << ?\0
|
290
|
+
@raw_buffer = nil
|
291
|
+
break
|
292
|
+
end
|
293
|
+
end
|
294
|
+
@buffer_length = @buffer.length
|
295
|
+
if @eof
|
296
|
+
check_printable(@buffer[(-length)..-2])
|
297
|
+
else
|
298
|
+
check_printable(@buffer[(-length)..-1])
|
299
|
+
end
|
300
|
+
@pointer1 = @pointer+1
|
107
301
|
end
|
108
302
|
|
109
303
|
def check_token(*choices)
|
@@ -144,70 +338,51 @@ module RbYAML
|
|
144
338
|
|
145
339
|
def need_more_tokens
|
146
340
|
return false if @done
|
147
|
-
|
148
|
-
# The current token may be a potential simple key, so we
|
149
|
-
# need to look further.
|
150
|
-
stale_possible_simple_keys
|
151
|
-
return true if next_possible_simple_key == @tokens_taken
|
341
|
+
@tokens.empty? || next_possible_simple_key == @tokens_taken
|
152
342
|
end
|
153
343
|
|
344
|
+
ENDING = /^---[\0 \t\r\n\x85]$/
|
345
|
+
START = /^\.\.\.[\0 \t\r\n\x85]$/
|
346
|
+
NULL_OR_OTHER = "\0 \t\r\n\x85"
|
347
|
+
# BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@`]|([\-?:][^\0 \t\r\n\x85]))/ #Since current SYCK handles this one wrong, we have to allow backtick right now.
|
348
|
+
BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@]|([\-?:][^\0 \t\r\n\x85]))/
|
154
349
|
def fetch_more_tokens
|
155
350
|
# Eat whitespaces and comments until we reach the next token.
|
156
351
|
scan_to_next_token
|
157
352
|
|
158
353
|
# Remove obsolete possible simple keys.
|
159
|
-
stale_possible_simple_keys
|
354
|
+
# stale_possible_simple_keys
|
160
355
|
|
161
356
|
# Compare the current indentation and column. It may add some tokens
|
162
357
|
# and decrease the current indentation level.
|
163
358
|
unwind_indent(@column)
|
164
359
|
|
165
360
|
# Peek the next character.
|
166
|
-
ch =
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
# Is it the value indicator?
|
192
|
-
when ch == ?: && check_value: fetch_value
|
193
|
-
# Is it an alias?
|
194
|
-
when ch == ?*: fetch_alias
|
195
|
-
# Is it an anchor?
|
196
|
-
when ch == ?&: fetch_anchor
|
197
|
-
# Is it a tag?
|
198
|
-
when ch == ?!: fetch_tag
|
199
|
-
# Is it a literal scalar?
|
200
|
-
when ch == ?| && @flow_level==0: fetch_literal
|
201
|
-
# Is it a folded scalar?
|
202
|
-
when ch == ?> && @flow_level==0: fetch_folded
|
203
|
-
# Is it a single quoted scalar?
|
204
|
-
when ch == ?': fetch_single
|
205
|
-
# Is it a double quoted scalar?
|
206
|
-
when ch == ?": fetch_double
|
207
|
-
# It must be a plain scalar then.
|
208
|
-
when check_plain: fetch_plain
|
209
|
-
else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
210
|
-
end
|
361
|
+
ch = peek0
|
362
|
+
colz = @column == 0
|
363
|
+
|
364
|
+
case ch
|
365
|
+
when ?\0: return fetch_stream_end
|
366
|
+
when ?': return fetch_single
|
367
|
+
when ?": return fetch_double
|
368
|
+
when ??: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_key end
|
369
|
+
when ?:: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_value end
|
370
|
+
when ?%: if colz: return fetch_stream_end end
|
371
|
+
when ?-: if colz && ENDING =~ prefix(4): return fetch_document_start; elsif NULL_OR_OTHER.include?(peek1): return fetch_block_entry end
|
372
|
+
when ?.: if colz && START =~ prefix(4): return fetch_document_end end
|
373
|
+
when ?[: return fetch_flow_sequence_start
|
374
|
+
when ?{: return fetch_flow_mapping_start
|
375
|
+
when ?]: return fetch_flow_sequence_end
|
376
|
+
when ?}: return fetch_flow_mapping_end
|
377
|
+
when ?,: return fetch_flow_entry
|
378
|
+
when ?*: return fetch_alias
|
379
|
+
when ?&: return fetch_anchor
|
380
|
+
when ?!: return fetch_tag
|
381
|
+
when ?|: if @flow_zero: return fetch_literal end
|
382
|
+
when ?>: if @flow_zero: return fetch_folded end
|
383
|
+
end
|
384
|
+
return fetch_plain if BEG =~ prefix(2)
|
385
|
+
raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
211
386
|
end
|
212
387
|
|
213
388
|
# Simple keys treatment.
|
@@ -215,58 +390,22 @@ module RbYAML
|
|
215
390
|
def next_possible_simple_key
|
216
391
|
# Return the number of the nearest possible simple key. Actually we
|
217
392
|
# don't need to loop through the whole dictionary.
|
218
|
-
|
219
|
-
|
220
|
-
key = @possible_simple_keys[level]
|
221
|
-
if min_token_number.nil? || key.token_number < min_token_number
|
222
|
-
min_token_number = key.token_number
|
223
|
-
end
|
224
|
-
end
|
225
|
-
min_token_number
|
393
|
+
@possible_simple_keys.each_value {|key| return key.token_number if key.token_number}
|
394
|
+
nil
|
226
395
|
end
|
227
396
|
|
228
|
-
def stale_possible_simple_keys
|
229
|
-
# Remove entries that are no longer possible simple keys. According to
|
230
|
-
# the YAML specification, simple keys
|
231
|
-
# - should be limited to a single line,
|
232
|
-
# - should be no longer than 1024 characters.
|
233
|
-
# Disabling this procedure will allow simple keys of any length and
|
234
|
-
# height (may cause problems if indentation is broken though).
|
235
|
-
@possible_simple_keys.delete_if {|level,key|
|
236
|
-
if key.line != @line || @index-key.index > 1024
|
237
|
-
raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
|
238
|
-
return true
|
239
|
-
end
|
240
|
-
return false
|
241
|
-
}
|
242
|
-
end
|
243
|
-
|
244
397
|
def save_possible_simple_key
|
245
398
|
# The next token may start a simple key. We check if it's possible
|
246
399
|
# and save its position. This function is called for
|
247
400
|
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
|
248
|
-
|
249
|
-
# Check if a simple key is required at the current position.
|
250
|
-
required = @flow_level==0 && @indent == @column
|
251
|
-
|
252
401
|
# The next token might be a simple key. Let's save it's number and
|
253
402
|
# position.
|
254
|
-
if @allow_simple_key
|
255
|
-
remove_possible_simple_key
|
256
|
-
token_number = @tokens_taken+@tokens.length
|
257
|
-
key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
|
258
|
-
@possible_simple_keys[@flow_level] = key
|
259
|
-
end
|
403
|
+
@possible_simple_keys[@flow_level] = SimpleKey.new(@tokens_taken+@tokens.length, @flow_zero && @indent == @column,-1,-1,column,get_mark) if @allow_simple_key
|
260
404
|
end
|
261
405
|
|
262
|
-
def remove_possible_simple_key
|
263
|
-
# Remove the saved possible key position at the current flow level.
|
264
|
-
key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
|
265
|
-
end
|
266
|
-
|
267
406
|
# Indentation functions.
|
268
407
|
|
269
|
-
def unwind_indent(
|
408
|
+
def unwind_indent(col)
|
270
409
|
## In flow context, tokens should respect indentation.
|
271
410
|
## Actually the condition should be `@indent >= column` according to
|
272
411
|
## the spec. But this condition will prohibit intuitively correct
|
@@ -280,20 +419,20 @@ module RbYAML
|
|
280
419
|
|
281
420
|
# In the flow context, indentation is ignored. We make the scanner less
|
282
421
|
# restrictive then specification requires.
|
283
|
-
return nil if
|
422
|
+
return nil if !@flow_zero
|
284
423
|
# In block context, we may need to issue the BLOCK-END tokens.
|
285
|
-
while @indent >
|
424
|
+
while @indent > col
|
286
425
|
mark = get_mark
|
287
|
-
@indent = @indents.pop
|
426
|
+
@indent = @indents.pop
|
288
427
|
@tokens << BlockEndToken.new(mark, mark)
|
289
428
|
end
|
290
429
|
end
|
291
430
|
|
292
|
-
def add_indent(
|
431
|
+
def add_indent(col)
|
293
432
|
# Check if we need to increase indentation.
|
294
|
-
if @indent <
|
433
|
+
if @indent < col
|
295
434
|
@indents << @indent
|
296
|
-
@indent =
|
435
|
+
@indent = col
|
297
436
|
return true
|
298
437
|
end
|
299
438
|
return false
|
@@ -329,7 +468,6 @@ module RbYAML
|
|
329
468
|
# Set the current intendation to -1.
|
330
469
|
unwind_indent(-1)
|
331
470
|
# Reset simple keys.
|
332
|
-
remove_possible_simple_key
|
333
471
|
@allow_simple_key = false
|
334
472
|
# Scan and add DIRECTIVE.
|
335
473
|
@tokens << scan_directive
|
@@ -348,11 +486,10 @@ module RbYAML
|
|
348
486
|
unwind_indent(-1)
|
349
487
|
# Reset simple keys. Note that there could not be a block collection
|
350
488
|
# after '---'.
|
351
|
-
remove_possible_simple_key
|
352
489
|
@allow_simple_key = false
|
353
490
|
# Add DOCUMENT-START or DOCUMENT-END.
|
354
491
|
start_mark = get_mark
|
355
|
-
|
492
|
+
forward3
|
356
493
|
end_mark = get_mark
|
357
494
|
@tokens << token.new(start_mark, end_mark)
|
358
495
|
end
|
@@ -370,11 +507,12 @@ module RbYAML
|
|
370
507
|
save_possible_simple_key
|
371
508
|
# Increase the flow level.
|
372
509
|
@flow_level += 1
|
510
|
+
@flow_zero = false
|
373
511
|
# Simple keys are allowed after '[' and '{'.
|
374
512
|
@allow_simple_key = true
|
375
513
|
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
|
376
514
|
start_mark = get_mark
|
377
|
-
|
515
|
+
forward1
|
378
516
|
end_mark = get_mark
|
379
517
|
@tokens << token.new(start_mark, end_mark)
|
380
518
|
end
|
@@ -388,15 +526,16 @@ module RbYAML
|
|
388
526
|
end
|
389
527
|
|
390
528
|
def fetch_flow_collection_end(token)
|
391
|
-
# Reset possible simple key on the current level.
|
392
|
-
remove_possible_simple_key
|
393
529
|
# Decrease the flow level.
|
394
530
|
@flow_level -= 1
|
531
|
+
if @flow_level == 0
|
532
|
+
@flow_zero = true
|
533
|
+
end
|
395
534
|
# No simple keys after ']' or '}'.
|
396
535
|
@allow_simple_key = false
|
397
536
|
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
|
398
537
|
start_mark = get_mark
|
399
|
-
|
538
|
+
forward1
|
400
539
|
end_mark = get_mark
|
401
540
|
@tokens << token.new(start_mark, end_mark)
|
402
541
|
end
|
@@ -404,21 +543,19 @@ module RbYAML
|
|
404
543
|
def fetch_flow_entry
|
405
544
|
# Simple keys are allowed after ','.
|
406
545
|
@allow_simple_key = true
|
407
|
-
# Reset possible simple key on the current level.
|
408
|
-
remove_possible_simple_key
|
409
546
|
# Add FLOW-ENTRY.
|
410
547
|
start_mark = get_mark
|
411
|
-
|
548
|
+
forward1
|
412
549
|
end_mark = get_mark
|
413
550
|
@tokens << FlowEntryToken.new(start_mark, end_mark)
|
414
551
|
end
|
415
552
|
|
416
553
|
def fetch_block_entry
|
417
554
|
# Block context needs additional checks.
|
418
|
-
if @
|
555
|
+
if @flow_zero
|
419
556
|
raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
|
420
557
|
# We may need to add BLOCK-SEQUENCE-START.
|
421
|
-
if add_indent(
|
558
|
+
if add_indent(column)
|
422
559
|
mark = get_mark
|
423
560
|
@tokens << BlockSequenceStartToken.new(mark, mark)
|
424
561
|
end
|
@@ -427,67 +564,63 @@ module RbYAML
|
|
427
564
|
end
|
428
565
|
# Simple keys are allowed after '-'.
|
429
566
|
@allow_simple_key = true
|
430
|
-
# Reset possible simple key on the current level.
|
431
|
-
remove_possible_simple_key
|
432
567
|
# Add BLOCK-ENTRY.
|
433
568
|
start_mark = get_mark
|
434
|
-
|
569
|
+
forward1
|
435
570
|
end_mark = get_mark
|
436
571
|
@tokens << BlockEntryToken.new(start_mark, end_mark)
|
437
572
|
end
|
438
573
|
|
439
574
|
def fetch_key
|
440
575
|
# Block context needs additional checks.
|
441
|
-
if @
|
576
|
+
if @flow_zero
|
442
577
|
# Are we allowed to start a key (not nessesary a simple)?
|
443
578
|
raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
|
444
579
|
# We may need to add BLOCK-MAPPING-START.
|
445
|
-
if add_indent(
|
580
|
+
if add_indent(column)
|
446
581
|
mark = get_mark
|
447
582
|
@tokens << BlockMappingStartToken.new(mark, mark)
|
448
583
|
end
|
449
584
|
end
|
450
585
|
# Simple keys are allowed after '?' in the block context.
|
451
|
-
@allow_simple_key = @
|
452
|
-
# Reset possible simple key on the current level.
|
453
|
-
remove_possible_simple_key
|
586
|
+
@allow_simple_key = @flow_zero
|
454
587
|
# Add KEY.
|
455
588
|
start_mark = get_mark
|
456
|
-
|
589
|
+
forward1
|
457
590
|
end_mark = get_mark
|
458
591
|
@tokens << KeyToken.new(start_mark, end_mark)
|
459
592
|
end
|
460
593
|
|
461
594
|
def fetch_value
|
595
|
+
key = @possible_simple_keys[@flow_level]
|
462
596
|
# Do we determine a simple key?
|
463
|
-
if
|
464
|
-
# Add KEY.
|
465
|
-
key = @possible_simple_keys[@flow_level]
|
466
|
-
@possible_simple_keys.delete(@flow_level)
|
467
|
-
@tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
|
468
|
-
# If this key starts a new block mapping, we need to add
|
469
|
-
# BLOCK-MAPPING-START.
|
470
|
-
@tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
|
471
|
-
# There cannot be two simple keys one after another.
|
472
|
-
@allow_simple_key = false
|
473
|
-
# It must be a part of a complex key.
|
474
|
-
else
|
597
|
+
if key.nil?
|
475
598
|
# Block context needs additional checks.
|
476
599
|
# (Do we really need them? They will be catched by the parser
|
477
600
|
# anyway.)
|
478
|
-
if @
|
601
|
+
if @flow_zero
|
479
602
|
# We are allowed to start a complex value if and only if
|
480
603
|
# we can start a simple key.
|
481
604
|
raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
|
482
605
|
# Simple keys are allowed after ':' in the block context.
|
483
|
-
@allow_simple_key =
|
484
|
-
# Reset possible simple key on the current level.
|
485
|
-
remove_possible_simple_key
|
606
|
+
@allow_simple_key = true
|
486
607
|
end
|
608
|
+
else
|
609
|
+
# Add KEY.
|
610
|
+
@possible_simple_keys.delete(@flow_level)
|
611
|
+
|
612
|
+
# If this key starts a new block mapping, we need to add
|
613
|
+
# BLOCK-MAPPING-START.
|
614
|
+
se = (@flow_zero && add_indent(key.column)) ? [BlockMappingStartToken.new(key.mark, key.mark)] : []
|
615
|
+
se << KeyToken.new(key.mark, key.mark)
|
616
|
+
@tokens.insert(key.token_number-@tokens_taken,*se)
|
617
|
+
# There cannot be two simple keys one after another.
|
618
|
+
@allow_simple_key = false
|
619
|
+
# It must be a part of a complex key.
|
487
620
|
end
|
488
621
|
# Add VALUE.
|
489
622
|
start_mark = get_mark
|
490
|
-
|
623
|
+
forward1
|
491
624
|
end_mark = get_mark
|
492
625
|
@tokens << ValueToken.new(start_mark, end_mark)
|
493
626
|
end
|
@@ -530,8 +663,6 @@ module RbYAML
|
|
530
663
|
def fetch_block_scalar(style)
|
531
664
|
# A simple key may follow a block scalar.
|
532
665
|
@allow_simple_key = true
|
533
|
-
# Reset possible simple key on the current level.
|
534
|
-
remove_possible_simple_key
|
535
666
|
# Scan and add SCALAR.
|
536
667
|
@tokens << scan_block_scalar(style)
|
537
668
|
end
|
@@ -564,65 +695,9 @@ module RbYAML
|
|
564
695
|
@tokens << scan_plain
|
565
696
|
end
|
566
697
|
|
567
|
-
# Checkers.
|
568
|
-
|
569
|
-
def check_directive
|
570
|
-
# DIRECTIVE: ^ '%' ...
|
571
|
-
# The '%' indicator is already checked.
|
572
|
-
@column == 0
|
573
|
-
end
|
574
|
-
|
575
|
-
def check_document_start
|
576
|
-
# DOCUMENT-START: ^ '---' (' '|'\n')
|
577
|
-
@column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
|
578
|
-
end
|
579
|
-
|
580
|
-
def check_document_end
|
581
|
-
# DOCUMENT-END: ^ '...' (' '|'\n')
|
582
|
-
@column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
|
583
|
-
end
|
584
|
-
|
585
|
-
def check_block_entry
|
586
|
-
# BLOCK-ENTRY: '-' (' '|'\n')
|
587
|
-
"\0 \t\r\n\x85".include?(peek(1))
|
588
|
-
end
|
589
|
-
|
590
|
-
def check_key
|
591
|
-
# KEY(flow context): '?'
|
592
|
-
# KEY(block context): '?' (' '|'\n')
|
593
|
-
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
594
|
-
end
|
595
|
-
|
596
|
-
def check_value
|
597
|
-
# VALUE(flow context): ':'
|
598
|
-
# VALUE(block context): ':' (' '|'\n')
|
599
|
-
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
600
|
-
end
|
601
|
-
|
602
|
-
def check_plain
|
603
|
-
# A plain scalar may start with any non-space character except:
|
604
|
-
# '-', '?', ':', ',', '[', ']', '{', '}',
|
605
|
-
# '#', '&', '*', '!', '|', '>', '\'', '\"',
|
606
|
-
# '%', '@', '`'.
|
607
|
-
#
|
608
|
-
# It may also start with
|
609
|
-
# '-', '?', ':'
|
610
|
-
# if it is followed by a non-space character.
|
611
|
-
#
|
612
|
-
# Note that we limit the last rule to the block context (except the
|
613
|
-
# '-' character) because we want the flow context to be space
|
614
|
-
# independent.
|
615
|
-
ch = peek
|
616
|
-
!("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
|
617
|
-
end
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
698
|
|
624
699
|
# Scanners.
|
625
|
-
|
700
|
+
NULL_OR_LINEBR = "\0\r\n\x85"
|
626
701
|
def scan_to_next_token
|
627
702
|
# We ignore spaces, line breaks and comments.
|
628
703
|
# If we find a line break in the block context, we set the flag
|
@@ -638,18 +713,20 @@ module RbYAML
|
|
638
713
|
# We also need to add the check for `allow_simple_keys == true` to
|
639
714
|
# `unwind_indent` before issuing BLOCK-END.
|
640
715
|
# Scanners for block, flow, and plain scalars need to be modified.
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
forward
|
716
|
+
while true
|
717
|
+
while peek0 == 32
|
718
|
+
forward1
|
645
719
|
end
|
646
|
-
if
|
647
|
-
|
720
|
+
if peek0 == ?#
|
721
|
+
while !NULL_OR_LINEBR.include?(peek0)
|
722
|
+
forward1
|
723
|
+
end
|
648
724
|
end
|
725
|
+
|
649
726
|
if !scan_line_break.empty?
|
650
|
-
@allow_simple_key = true if @
|
727
|
+
@allow_simple_key = true if @flow_zero
|
651
728
|
else
|
652
|
-
|
729
|
+
break
|
653
730
|
end
|
654
731
|
end
|
655
732
|
end
|
@@ -657,7 +734,7 @@ module RbYAML
|
|
657
734
|
def scan_directive
|
658
735
|
# See the specification for details.
|
659
736
|
start_mark = get_mark
|
660
|
-
|
737
|
+
forward1
|
661
738
|
name = scan_directive_name(start_mark)
|
662
739
|
value = nil
|
663
740
|
if name == "YAML"
|
@@ -668,45 +745,50 @@ module RbYAML
|
|
668
745
|
end_mark = get_mark
|
669
746
|
else
|
670
747
|
end_mark = get_mark
|
671
|
-
|
748
|
+
forward1 while !NULL_OR_LINEBR.include?(peek0)
|
672
749
|
end
|
673
750
|
scan_directive_ignored_line(start_mark)
|
674
751
|
DirectiveToken.new(name, value, start_mark, end_mark)
|
675
752
|
end
|
676
753
|
|
754
|
+
ALPHA_REG = /[-0-9A-Za-z_]/
|
755
|
+
NULL_BL_LINEBR = "\0 \r\n\x85"
|
756
|
+
NULL_BL_T_LINEBR = "\0 \t\r\n\x85"
|
677
757
|
def scan_directive_name(start_mark)
|
678
758
|
# See the specification for details.
|
679
759
|
length = 0
|
680
760
|
ch = peek(length)
|
681
|
-
|
761
|
+
zlen = true
|
762
|
+
while ALPHA_REG =~ ch.chr
|
763
|
+
zlen = false
|
682
764
|
length += 1
|
683
765
|
ch = peek(length)
|
684
766
|
end
|
685
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if
|
767
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if zlen
|
686
768
|
value = prefix(length)
|
687
769
|
forward(length)
|
688
|
-
ch =
|
689
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !
|
770
|
+
ch = peek0
|
771
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !NULL_BL_LINEBR.include?(ch)
|
690
772
|
value
|
691
773
|
end
|
692
774
|
|
693
775
|
def scan_yaml_directive_value(start_mark)
|
694
776
|
# See the specification for details.
|
695
|
-
|
777
|
+
forward1 while peek0 == 32
|
696
778
|
major = scan_yaml_directive_number(start_mark)
|
697
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if
|
698
|
-
|
779
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek0 != ?.
|
780
|
+
forward1
|
699
781
|
minor = scan_yaml_directive_number(start_mark)
|
700
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !
|
782
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
|
701
783
|
[major, minor]
|
702
784
|
end
|
703
785
|
|
704
786
|
def scan_yaml_directive_number(start_mark)
|
705
787
|
# See the specification for details.
|
706
|
-
ch =
|
707
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(
|
788
|
+
ch = peek0
|
789
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(ch.__is_ascii_num)
|
708
790
|
length = 0
|
709
|
-
length += 1 while (
|
791
|
+
length += 1 while (peek(length).__is_ascii_num)
|
710
792
|
value = prefix(length)
|
711
793
|
forward(length)
|
712
794
|
value
|
@@ -714,9 +796,9 @@ module RbYAML
|
|
714
796
|
|
715
797
|
def scan_tag_directive_value(start_mark)
|
716
798
|
# See the specification for details.
|
717
|
-
|
799
|
+
forward1 while peek0 == 32
|
718
800
|
handle = scan_tag_directive_handle(start_mark)
|
719
|
-
|
801
|
+
forward1 while peek0 == 32
|
720
802
|
prefix = scan_tag_directive_prefix(start_mark)
|
721
803
|
[handle, prefix]
|
722
804
|
end
|
@@ -724,30 +806,30 @@ module RbYAML
|
|
724
806
|
def scan_tag_directive_handle(start_mark)
|
725
807
|
# See the specification for details.
|
726
808
|
value = scan_tag_handle("directive", start_mark)
|
727
|
-
|
728
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
|
809
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if peek0 != 32
|
729
810
|
value
|
730
811
|
end
|
731
812
|
|
732
813
|
def scan_tag_directive_prefix(start_mark)
|
733
814
|
# See the specification for details.
|
734
815
|
value = scan_tag_uri("directive", start_mark)
|
735
|
-
|
736
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
|
816
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if !NULL_BL_LINEBR.include?(peek0)
|
737
817
|
value
|
738
818
|
end
|
739
819
|
|
740
820
|
def scan_directive_ignored_line(start_mark)
|
741
821
|
# See the specification for details.
|
742
|
-
|
743
|
-
if
|
744
|
-
|
822
|
+
forward1 while peek0 == 32
|
823
|
+
if peek0 == ?#
|
824
|
+
forward1 while !NULL_OR_LINEBR.include?(peek0)
|
745
825
|
end
|
746
|
-
ch =
|
747
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{
|
826
|
+
ch = peek0
|
827
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{peek0.to_s}",get_mark()) if !NULL_OR_LINEBR.include?(peek0)
|
748
828
|
scan_line_break
|
749
829
|
end
|
750
|
-
|
830
|
+
|
831
|
+
NON_ALPHA = /[^-0-9A-Za-z_]/
|
832
|
+
NON_ALPHA_OR_NUM = "\0 \t\r\n\x85?:,]}%@`"
|
751
833
|
def scan_anchor(token)
|
752
834
|
# The specification does not restrict characters for anchors and
|
753
835
|
# aliases. This may lead to problems, for instance, the document:
|
@@ -758,45 +840,47 @@ module RbYAML
|
|
758
840
|
# [ *alias , "value" ]
|
759
841
|
# Therefore we restrict aliases to numbers and ASCII letters.
|
760
842
|
start_mark = get_mark
|
761
|
-
indicator =
|
843
|
+
indicator = peek0
|
762
844
|
name = (indicator == ?*) ? "alias":"anchor"
|
763
|
-
|
845
|
+
forward1
|
764
846
|
length = 0
|
765
|
-
|
766
|
-
while
|
767
|
-
|
768
|
-
|
847
|
+
chunk_size = 16
|
848
|
+
while true
|
849
|
+
chunk = prefix(chunk_size)
|
850
|
+
if length = (NON_ALPHA =~ chunk)
|
851
|
+
break
|
852
|
+
end
|
853
|
+
chunk_size += 16
|
769
854
|
end
|
770
|
-
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found
|
855
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found something else...",get_mark) if length==0
|
771
856
|
value = prefix(length)
|
772
857
|
forward(length)
|
773
|
-
|
774
|
-
|
775
|
-
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
|
858
|
+
if !NON_ALPHA_OR_NUM.include?(peek0)
|
859
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{peek0}",get_mark)
|
776
860
|
end
|
777
861
|
end_mark = get_mark
|
778
862
|
token.new(value, start_mark, end_mark)
|
779
863
|
end
|
780
864
|
|
781
|
-
|
865
|
+
NULL_T_BL_LINEBR = "\0 \t\r\n\x85"
|
782
866
|
def scan_tag
|
783
867
|
# See the specification for details.
|
784
868
|
start_mark = get_mark
|
785
|
-
ch =
|
869
|
+
ch = peek1
|
786
870
|
if ch == ?<
|
787
871
|
handle = nil
|
788
|
-
|
872
|
+
forward2
|
789
873
|
suffix = scan_tag_uri("tag", start_mark)
|
790
|
-
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if
|
791
|
-
|
792
|
-
elsif
|
874
|
+
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek0 != ?>
|
875
|
+
forward1
|
876
|
+
elsif NULL_T_BL_LINEBR.include?(ch)
|
793
877
|
handle = nil
|
794
878
|
suffix = "!"
|
795
|
-
|
879
|
+
forward1
|
796
880
|
else
|
797
881
|
length = 1
|
798
882
|
use_handle = false
|
799
|
-
while !
|
883
|
+
while !NULL_T_BL_LINEBR.include?(ch)
|
800
884
|
if ch == ?!
|
801
885
|
use_handle = true
|
802
886
|
break
|
@@ -809,24 +893,24 @@ module RbYAML
|
|
809
893
|
handle = scan_tag_handle("tag", start_mark)
|
810
894
|
else
|
811
895
|
handle = "!"
|
812
|
-
|
896
|
+
forward1
|
813
897
|
end
|
814
898
|
suffix = scan_tag_uri("tag", start_mark)
|
815
899
|
end
|
816
|
-
|
817
|
-
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
900
|
+
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
|
818
901
|
value = [handle, suffix]
|
819
902
|
end_mark = get_mark
|
820
903
|
TagToken.new(value, start_mark, end_mark)
|
821
904
|
end
|
822
905
|
|
906
|
+
BLANK_T = " \t"
|
823
907
|
def scan_block_scalar(style)
|
824
908
|
# See the specification for details.
|
825
909
|
folded = style== ?>
|
826
910
|
chunks = []
|
827
911
|
start_mark = get_mark
|
828
912
|
# Scan the header.
|
829
|
-
|
913
|
+
forward1
|
830
914
|
chomping, increment = scan_block_scalar_indicators(start_mark)
|
831
915
|
scan_block_scalar_ignored_line(start_mark)
|
832
916
|
# Determine the indentation level and go to the first non-empty line.
|
@@ -841,20 +925,20 @@ module RbYAML
|
|
841
925
|
end
|
842
926
|
line_break = ''
|
843
927
|
# Scan the inner part of the block scalar.
|
844
|
-
while
|
928
|
+
while column == indent and peek0 != ?\0
|
845
929
|
chunks += breaks
|
846
|
-
leading_non_space = !
|
930
|
+
leading_non_space = !BLANK_T.include?(peek0)
|
847
931
|
length = 0
|
848
|
-
length += 1 while !
|
932
|
+
length += 1 while !NULL_OR_LINEBR.include?(peek(length))
|
849
933
|
chunks << prefix(length)
|
850
934
|
forward(length)
|
851
935
|
line_break = scan_line_break
|
852
936
|
breaks, end_mark = scan_block_scalar_breaks(indent)
|
853
|
-
if
|
937
|
+
if column == indent && peek0 != 0
|
854
938
|
# Unfortunately, folding rules are ambiguous.
|
855
939
|
#
|
856
940
|
# This is the folding according to the specification:
|
857
|
-
if folded && line_break ==
|
941
|
+
if folded && line_break == "\n" && leading_non_space && !BLANK_T.include?(peek0)
|
858
942
|
chunks << ' ' if breaks.empty?
|
859
943
|
else
|
860
944
|
chunks << line_break
|
@@ -882,76 +966,76 @@ module RbYAML
|
|
882
966
|
end
|
883
967
|
|
884
968
|
# We are done.
|
885
|
-
ScalarToken.new(chunks.
|
969
|
+
ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
|
886
970
|
end
|
887
971
|
|
972
|
+
PLUS_MIN = /[+-]/
|
888
973
|
def scan_block_scalar_indicators(start_mark)
|
889
974
|
# See the specification for details.
|
890
975
|
chomping = nil
|
891
976
|
increment = nil
|
892
|
-
ch =
|
893
|
-
if
|
977
|
+
ch = peek0
|
978
|
+
if PLUS_MIN =~ ch.chr
|
894
979
|
chomping = ch == ?+
|
895
|
-
|
896
|
-
ch =
|
897
|
-
if
|
898
|
-
increment = ch.to_i
|
980
|
+
forward1
|
981
|
+
ch = peek0
|
982
|
+
if ch.__is_ascii_num
|
983
|
+
increment = ch.chr.to_i
|
899
984
|
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
900
|
-
|
985
|
+
forward1
|
901
986
|
end
|
902
|
-
elsif
|
903
|
-
increment = ch
|
987
|
+
elsif ch.__is_ascii_num
|
988
|
+
increment = ch.chr.to_i
|
904
989
|
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
905
|
-
|
906
|
-
ch =
|
907
|
-
if
|
990
|
+
forward1
|
991
|
+
ch = peek0
|
992
|
+
if PLUS_MIN =~ ch.chr
|
908
993
|
chomping = ch == ?+
|
909
|
-
|
994
|
+
forward1
|
910
995
|
end
|
911
996
|
end
|
912
|
-
|
913
|
-
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
997
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
|
914
998
|
[chomping, increment]
|
915
999
|
end
|
916
1000
|
|
917
1001
|
def scan_block_scalar_ignored_line(start_mark)
|
918
1002
|
# See the specification for details.
|
919
|
-
|
920
|
-
if
|
921
|
-
|
1003
|
+
forward1 while peek0 == 32
|
1004
|
+
if peek0 == ?#
|
1005
|
+
forward1 while !NULL_OR_LINEBR.include?(peek0)
|
922
1006
|
end
|
923
|
-
|
924
|
-
|
925
|
-
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
|
1007
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{peek0}",get_mark) if !NULL_OR_LINEBR.include?(peek0)
|
926
1008
|
scan_line_break
|
927
1009
|
end
|
928
1010
|
|
1011
|
+
BLANK_OR_LINEBR = " \r\n\x85"
|
929
1012
|
def scan_block_scalar_indentation
|
930
1013
|
# See the specification for details.
|
931
1014
|
chunks = []
|
932
1015
|
max_indent = 0
|
933
1016
|
end_mark = get_mark
|
934
|
-
while
|
935
|
-
if
|
1017
|
+
while BLANK_OR_LINEBR.include?(peek0)
|
1018
|
+
if peek0 != 32
|
936
1019
|
chunks << scan_line_break
|
937
1020
|
end_mark = get_mark
|
938
1021
|
else
|
939
|
-
|
940
|
-
max_indent =
|
1022
|
+
forward1
|
1023
|
+
max_indent = column if column > max_indent
|
941
1024
|
end
|
942
1025
|
end
|
943
1026
|
[chunks, max_indent, end_mark]
|
944
1027
|
end
|
945
1028
|
|
1029
|
+
FULL_LINEBR = "\r\n\x85"
|
946
1030
|
def scan_block_scalar_breaks(indent)
|
947
1031
|
# See the specification for details.
|
948
1032
|
chunks = []
|
949
1033
|
end_mark = get_mark
|
950
|
-
|
951
|
-
while
|
1034
|
+
forward1 while @column < indent && peek0 == 32
|
1035
|
+
while FULL_LINEBR.include?(peek0)
|
952
1036
|
chunks << scan_line_break
|
953
1037
|
end_mark = get_mark
|
954
|
-
|
1038
|
+
forward1 while @column < indent && peek0 == 32
|
955
1039
|
end
|
956
1040
|
[chunks, end_mark]
|
957
1041
|
end
|
@@ -966,16 +1050,16 @@ module RbYAML
|
|
966
1050
|
double = style == ?"
|
967
1051
|
chunks = []
|
968
1052
|
start_mark = get_mark
|
969
|
-
quote =
|
970
|
-
|
1053
|
+
quote = peek0
|
1054
|
+
forward1
|
971
1055
|
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
972
|
-
while
|
1056
|
+
while peek0 != quote
|
973
1057
|
chunks += scan_flow_scalar_spaces(double, start_mark)
|
974
1058
|
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
975
1059
|
end
|
976
|
-
|
1060
|
+
forward1
|
977
1061
|
end_mark = get_mark
|
978
|
-
ScalarToken.new(chunks.
|
1062
|
+
ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
|
979
1063
|
end
|
980
1064
|
|
981
1065
|
ESCAPE_REPLACEMENTS = {
|
@@ -1000,42 +1084,43 @@ module RbYAML
|
|
1000
1084
|
'x' => 2
|
1001
1085
|
}
|
1002
1086
|
|
1087
|
+
SPACES_AND_STUFF = "'\"\\\0 \t\r\n\x85"
|
1088
|
+
DOUBLE_ESC = "\"\\"
|
1089
|
+
NOT_HEXA = /[^0-9A-Fa-f]/
|
1003
1090
|
def scan_flow_scalar_non_spaces(double, start_mark)
|
1004
1091
|
# See the specification for details.
|
1005
1092
|
chunks = []
|
1006
1093
|
while true
|
1007
1094
|
length = 0
|
1008
|
-
length += 1 while !
|
1095
|
+
length += 1 while !SPACES_AND_STUFF.include?(peek(length))
|
1009
1096
|
if length!=0
|
1010
1097
|
chunks << prefix(length)
|
1011
1098
|
forward(length)
|
1012
1099
|
end
|
1013
|
-
ch =
|
1014
|
-
if !double && ch == ?' &&
|
1100
|
+
ch = peek0
|
1101
|
+
if !double && ch == ?' && peek1 == ?'
|
1015
1102
|
chunks << ?'
|
1016
|
-
|
1017
|
-
elsif (double && ch == ?') || (!double &&
|
1103
|
+
forward2
|
1104
|
+
elsif (double && ch == ?') || (!double && DOUBLE_ESC.include?(ch))
|
1018
1105
|
chunks << ch
|
1019
|
-
|
1106
|
+
forward1
|
1020
1107
|
elsif double && ch == ?\\
|
1021
|
-
|
1022
|
-
ch =
|
1108
|
+
forward1
|
1109
|
+
ch = peek0
|
1023
1110
|
if ESCAPE_REPLACEMENTS.member?(ch.chr)
|
1024
1111
|
chunks << ESCAPE_REPLACEMENTS[ch.chr]
|
1025
|
-
|
1112
|
+
forward1
|
1026
1113
|
elsif ESCAPE_CODES.member?(ch.chr)
|
1027
1114
|
length = ESCAPE_CODES[ch.chr]
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
"expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
|
1033
|
-
end
|
1115
|
+
forward1
|
1116
|
+
if NOT_HEXA =~ prefix(length)
|
1117
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
|
1118
|
+
"expected escape sequence of #{length} hexdecimal numbers, but found something else: #{prefix(length)}}",get_mark)
|
1034
1119
|
end
|
1035
|
-
code = prefix(length).to_i
|
1120
|
+
code = prefix(length).to_i(16).to_s
|
1036
1121
|
chunks << code
|
1037
1122
|
forward(length)
|
1038
|
-
elsif
|
1123
|
+
elsif FULL_LINEBR.include?(ch)
|
1039
1124
|
scan_line_break
|
1040
1125
|
chunks += scan_flow_scalar_breaks(double, start_mark)
|
1041
1126
|
else
|
@@ -1051,16 +1136,16 @@ module RbYAML
|
|
1051
1136
|
# See the specification for details.
|
1052
1137
|
chunks = []
|
1053
1138
|
length = 0
|
1054
|
-
length += 1 while
|
1139
|
+
length += 1 while BLANK_T.include?(peek(length))
|
1055
1140
|
whitespaces = prefix(length)
|
1056
1141
|
forward(length)
|
1057
|
-
ch =
|
1142
|
+
ch = peek0
|
1058
1143
|
if ch == ?\0
|
1059
1144
|
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
|
1060
|
-
elsif
|
1145
|
+
elsif FULL_LINEBR.include?(ch)
|
1061
1146
|
line_break = scan_line_break
|
1062
1147
|
breaks = scan_flow_scalar_breaks(double, start_mark)
|
1063
|
-
if line_break !=
|
1148
|
+
if line_break != "\n"
|
1064
1149
|
chunks << line_break
|
1065
1150
|
elsif breaks.empty?
|
1066
1151
|
chunks << ' '
|
@@ -1079,17 +1164,22 @@ module RbYAML
|
|
1079
1164
|
# Instead of checking indentation, we check for document
|
1080
1165
|
# separators.
|
1081
1166
|
prefix = prefix(3)
|
1082
|
-
if (prefix == "---" || prefix == "...") &&
|
1167
|
+
if (prefix == "---" || prefix == "...") &&NULL_BL_T_LINEBR.include?(peek3)
|
1083
1168
|
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
|
1084
1169
|
end
|
1085
|
-
|
1086
|
-
if
|
1170
|
+
forward1 while BLANK_T.include?(peek0)
|
1171
|
+
if FULL_LINEBR.include?(peek0)
|
1087
1172
|
chunks << scan_line_break
|
1088
1173
|
else
|
1089
1174
|
return chunks
|
1090
1175
|
end
|
1091
1176
|
end
|
1092
1177
|
end
|
1178
|
+
|
1179
|
+
|
1180
|
+
R_flowzero = /[\0 \t\r\n\x85]|(:[\0 \t\r\n\x28])/
|
1181
|
+
R_flownonzero = /[\0 \t\r\n\x85\[\]{},:?]/
|
1182
|
+
S4 = "\0 \t\r\n\x28[]{}"
|
1093
1183
|
|
1094
1184
|
def scan_plain
|
1095
1185
|
# See the specification for details.
|
@@ -1098,25 +1188,25 @@ module RbYAML
|
|
1098
1188
|
# We also keep track of the `allow_simple_key` flag here.
|
1099
1189
|
# Indentation rules are loosed for the flow context.
|
1100
1190
|
chunks = []
|
1101
|
-
start_mark = get_mark
|
1102
|
-
end_mark = start_mark
|
1191
|
+
end_mark = start_mark = get_mark
|
1103
1192
|
indent = @indent+1
|
1104
1193
|
# We allow zero indentation for scalars, but then we need to check for
|
1105
1194
|
# document separators at the beginning of the line.
|
1106
1195
|
#if indent == 0
|
1107
1196
|
# indent = 1
|
1108
1197
|
spaces = []
|
1109
|
-
|
1198
|
+
if @flow_zero
|
1199
|
+
f_nzero, r_check = false, R_flowzero
|
1200
|
+
else
|
1201
|
+
f_nzero, r_check = true, R_flownonzero
|
1202
|
+
end
|
1203
|
+
|
1204
|
+
while peek0 != ?#
|
1110
1205
|
length = 0
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
break
|
1116
|
-
end
|
1117
|
-
length += 1
|
1118
|
-
end
|
1119
|
-
if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
|
1206
|
+
chunk_size = 32
|
1207
|
+
chunk_size += 32 until length = (r_check =~ prefix(chunk_size))
|
1208
|
+
ch = peek(length)
|
1209
|
+
if f_nzero && ch == ?: && !S4.include?(peek(length+1))
|
1120
1210
|
forward(length)
|
1121
1211
|
raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
|
1122
1212
|
end
|
@@ -1127,11 +1217,12 @@ module RbYAML
|
|
1127
1217
|
forward(length)
|
1128
1218
|
end_mark = get_mark
|
1129
1219
|
spaces = scan_plain_spaces(indent, start_mark)
|
1130
|
-
break if spaces
|
1220
|
+
break if !spaces || (@flow_zero && @column < indent)
|
1131
1221
|
end
|
1132
|
-
return ScalarToken.new(chunks.
|
1222
|
+
return ScalarToken.new(chunks.to_s, true, start_mark, end_mark)
|
1133
1223
|
end
|
1134
1224
|
|
1225
|
+
END_OR_START = /^(---|\.\.\.)[\0 \t\r\n\x85]$/
|
1135
1226
|
def scan_plain_spaces(indent, start_mark)
|
1136
1227
|
# See the specification for details.
|
1137
1228
|
# The specification is really confusing about tabs in plain scalars.
|
@@ -1141,44 +1232,43 @@ module RbYAML
|
|
1141
1232
|
length += 1 while peek(length) == 32
|
1142
1233
|
whitespaces = prefix(length)
|
1143
1234
|
forward(length)
|
1144
|
-
ch =
|
1145
|
-
if
|
1235
|
+
ch = peek0
|
1236
|
+
if FULL_LINEBR.include?(ch)
|
1146
1237
|
line_break = scan_line_break
|
1147
1238
|
@allow_simple_key = true
|
1148
|
-
|
1149
|
-
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1239
|
+
return if END_OR_START =~ prefix(4)
|
1150
1240
|
breaks = []
|
1151
|
-
while
|
1152
|
-
if
|
1153
|
-
|
1241
|
+
while BLANK_OR_LINEBR.include?(peek0)
|
1242
|
+
if peek0 == 32
|
1243
|
+
forward1
|
1154
1244
|
else
|
1155
1245
|
breaks << scan_line_break
|
1156
|
-
|
1157
|
-
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1246
|
+
return if END_OR_START =~ prefix(4)
|
1158
1247
|
end
|
1159
1248
|
end
|
1160
|
-
if line_break !=
|
1249
|
+
if line_break != "\n"
|
1161
1250
|
chunks << line_break
|
1162
|
-
elsif breaks.empty?
|
1163
|
-
chunks <<
|
1251
|
+
elsif breaks.nil? || breaks.empty?
|
1252
|
+
chunks << " "
|
1164
1253
|
end
|
1165
1254
|
chunks += breaks
|
1166
|
-
|
1255
|
+
else
|
1167
1256
|
chunks << whitespaces
|
1168
1257
|
end
|
1169
1258
|
chunks
|
1170
1259
|
end
|
1171
1260
|
|
1261
|
+
|
1172
1262
|
def scan_tag_handle(name, start_mark)
|
1173
1263
|
# See the specification for details.
|
1174
1264
|
# For some strange reasons, the specification does not allow '_' in
|
1175
1265
|
# tag handles. I have allowed it anyway.
|
1176
|
-
ch =
|
1266
|
+
ch = peek0
|
1177
1267
|
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
|
1178
1268
|
length = 1
|
1179
1269
|
ch = peek(length)
|
1180
1270
|
if ch != 32
|
1181
|
-
while
|
1271
|
+
while ALPHA_REG =~ ch.chr
|
1182
1272
|
length += 1
|
1183
1273
|
ch = peek(length)
|
1184
1274
|
end
|
@@ -1193,13 +1283,14 @@ module RbYAML
|
|
1193
1283
|
value
|
1194
1284
|
end
|
1195
1285
|
|
1286
|
+
STRANGE_CHR = /[\]\[\-';\/?:@&=+$,.!~*()%\w]/
|
1196
1287
|
def scan_tag_uri(name, start_mark)
|
1197
1288
|
# See the specification for details.
|
1198
1289
|
# Note: we do not check if URI is well-formed.
|
1199
1290
|
chunks = []
|
1200
1291
|
length = 0
|
1201
1292
|
ch = peek(length)
|
1202
|
-
while
|
1293
|
+
while STRANGE_CHR =~ ch.chr
|
1203
1294
|
if ch == ?%
|
1204
1295
|
chunks << prefix(length)
|
1205
1296
|
forward(length)
|
@@ -1213,29 +1304,27 @@ module RbYAML
|
|
1213
1304
|
if length!=0
|
1214
1305
|
chunks << prefix(length)
|
1215
1306
|
forward(length)
|
1216
|
-
length = 0
|
1217
1307
|
end
|
1218
1308
|
|
1219
1309
|
raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
|
1220
|
-
chunks.
|
1310
|
+
chunks.to_s
|
1221
1311
|
end
|
1222
1312
|
|
1313
|
+
HEXA_REG = /[0-9A-Fa-f]/
|
1223
1314
|
def scan_uri_escapes(name, start_mark)
|
1224
1315
|
# See the specification for details.
|
1225
1316
|
bytes = []
|
1226
1317
|
mark = get_mark
|
1227
|
-
while
|
1228
|
-
|
1229
|
-
2.
|
1230
|
-
|
1231
|
-
|
1232
|
-
end
|
1233
|
-
bytes << prefix(2).to_i.to_s(16)
|
1234
|
-
forward(2)
|
1318
|
+
while peek0 == ?%
|
1319
|
+
forward1
|
1320
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek1} and #{peek2}",get_mark) if HEXA_REG !~ peek1.chr || HEXA_REG !~ peek2.chr
|
1321
|
+
bytes << prefix(2).to_i(16).to_s
|
1322
|
+
forward2
|
1235
1323
|
end
|
1236
|
-
bytes.
|
1324
|
+
bytes.to_s
|
1237
1325
|
end
|
1238
1326
|
|
1327
|
+
RN = "\r\n"
|
1239
1328
|
def scan_line_break
|
1240
1329
|
# Transforms:
|
1241
1330
|
# '\r\n' : '\n'
|
@@ -1243,12 +1332,11 @@ module RbYAML
|
|
1243
1332
|
# '\n' : '\n'
|
1244
1333
|
# '\x85' : '\n'
|
1245
1334
|
# default : ''
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
forward(2)
|
1335
|
+
if FULL_LINEBR.include?(peek0)
|
1336
|
+
if prefix2 == RN
|
1337
|
+
forward2
|
1250
1338
|
else
|
1251
|
-
|
1339
|
+
forward1
|
1252
1340
|
end
|
1253
1341
|
return "\n"
|
1254
1342
|
end
|