RbYAML 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbyaml.rb +14 -256
- data/lib/rbyaml.rb.~1.2.~ +383 -0
- data/lib/rbyaml/composer.rb +9 -11
- data/lib/rbyaml/{composer.rb.~1.2.~ → composer.rb.~1.3.~} +28 -25
- data/lib/rbyaml/constants.rb +95 -0
- data/lib/rbyaml/constructor.rb +180 -89
- data/lib/rbyaml/{constructor.rb.~1.2.~ → constructor.rb.~1.9.~} +137 -95
- data/lib/rbyaml/dumper.rb +12 -9
- data/lib/rbyaml/dumper.rb.~1.3.~ +36 -0
- data/lib/rbyaml/emitter.rb +14 -28
- data/lib/rbyaml/{emitter.rb.~1.2.~ → emitter.rb.~1.6.~} +22 -33
- data/lib/rbyaml/error.rb +4 -57
- data/lib/rbyaml/error.rb.~1.2.~ +75 -0
- data/lib/rbyaml/events.rb +8 -14
- data/lib/rbyaml/{events.rb.~1.2.~ → events.rb.~1.4.~} +29 -6
- data/lib/rbyaml/nodes.rb +5 -5
- data/lib/rbyaml/{nodes.rb.~1.2.~ → nodes.rb.~1.3.~} +13 -9
- data/lib/rbyaml/parser.rb +70 -108
- data/lib/rbyaml/parser.rb.~1.4.~ +632 -0
- data/lib/rbyaml/representer.rb +19 -157
- data/lib/rbyaml/representer.rb.old +317 -0
- data/lib/rbyaml/{representer.rb.~1.2.~ → representer.rb.~1.5.~} +60 -26
- data/lib/rbyaml/resolver.rb +6 -6
- data/lib/rbyaml/{resolver.rb.~1.1.~ → resolver.rb.~1.6.~} +20 -20
- data/lib/rbyaml/rubytypes.rb +391 -0
- data/lib/rbyaml/scanner.rb +123 -225
- data/lib/rbyaml/{scanner.rb.~1.2.~ → scanner.rb.~1.5.~} +466 -378
- data/lib/rbyaml/serializer.rb +9 -9
- data/lib/rbyaml/{serializer.rb.~1.2.~ → serializer.rb.~1.4.~} +19 -17
- data/lib/rbyaml/stream.rb +48 -0
- data/lib/rbyaml/tag.rb +72 -0
- data/lib/rbyaml/tokens.rb +22 -16
- data/lib/rbyaml/{tokens.rb.~1.2.~ → tokens.rb.~1.3.~} +44 -4
- data/lib/rbyaml/types.rb +146 -0
- data/lib/rbyaml/util.rb.~1.3.~ +38 -0
- data/lib/rbyaml/yaml.rb +22 -32
- data/lib/rbyaml/{yaml.rb.~1.2.~ → yaml.rb.~1.5.~} +17 -17
- data/test/load_one.rb +6 -0
- data/test/load_one_yaml.rb +6 -0
- data/test/output_events.rb +9 -0
- data/test/test_add_ctor.rb +51 -0
- data/test/test_add_ctor.rb.~1.1.~ +30 -0
- data/test/test_bm.rb +2 -2
- data/test/test_bm.rb.~1.1.~ +28 -0
- data/test/test_gems.rb +10 -0
- data/test/test_one.rb.~1.1.~ +5 -0
- data/test/test_one_syck.rb +5 -0
- data/test/test_rbyaml.rb +63 -32
- data/test/test_rbyaml.rb.~1.6.~ +59 -0
- data/test/{test_rbyaml.rb.~1.2.~ → test_rbyaml_old.rb} +13 -4
- data/test/test_time_events.rb +24 -0
- data/test/test_time_nodes.rb +24 -0
- data/test/test_time_tokens.rb +24 -0
- data/test/yaml/gems_new.yml +147456 -0
- data/test/yaml/test1.rb +8 -0
- data/test/yaml/test10.rb +14 -0
- data/test/yaml/test11.rb +13 -0
- data/test/yaml/test12.rb +9 -0
- data/test/yaml/test13.rb +9 -0
- data/test/yaml/test14.rb +13 -0
- data/test/yaml/test15.rb +12 -0
- data/test/yaml/test16.rb +11 -0
- data/test/yaml/test16.rb.~1.1.~ +11 -0
- data/test/yaml/test17.rb +10 -0
- data/test/yaml/test18.rb +13 -0
- data/test/yaml/test19.rb +9 -0
- data/test/yaml/test19.yml +1 -1
- data/test/yaml/test2.rb +8 -0
- data/test/yaml/test20.rb +11 -0
- data/test/yaml/test20.rb.~1.1.~ +9 -0
- data/test/yaml/test20.yml +1 -1
- data/test/yaml/test3.rb +13 -0
- data/test/yaml/test4.rb +13 -0
- data/test/yaml/test5.rb +8 -0
- data/test/yaml/test6.rb +10 -0
- data/test/yaml/test7.rb +15 -0
- data/test/yaml/test8.rb +15 -0
- data/test/yaml/test9.rb +13 -0
- metadata +61 -16
- data/lib/rbyaml/dumper.rb.~1.2.~ +0 -43
- data/lib/rbyaml/parser.rb.~1.2.~ +0 -494
@@ -23,42 +23,43 @@
|
|
23
23
|
# Read comments in the Scanner code for more details.
|
24
24
|
#
|
25
25
|
|
26
|
+
require 'rbyaml/util'
|
26
27
|
require 'rbyaml/error'
|
27
28
|
require 'rbyaml/tokens'
|
28
29
|
|
29
30
|
module RbYAML
|
30
31
|
class ScannerError < MarkedYAMLError
|
31
32
|
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
@
|
38
|
-
@
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
@
|
33
|
+
class ReaderError < YAMLError
|
34
|
+
def initialize(name, position, character, encoding, reason)
|
35
|
+
@name = name
|
36
|
+
@position = position
|
37
|
+
@character = character
|
38
|
+
@encoding = encoding
|
39
|
+
@reason = reason
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_s
|
43
|
+
if @character.__is_str
|
44
|
+
"'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
45
|
+
else
|
46
|
+
"unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
47
|
+
end
|
43
48
|
end
|
44
49
|
end
|
45
50
|
|
46
|
-
|
47
|
-
def initialize_scanner
|
48
|
-
# It is assumed that Scanner and Reader will mixin to the same point.
|
49
|
-
# Reader do the dirty work of checking for BOM. It also adds NUL to the end.
|
50
|
-
#
|
51
|
-
# Reader supports the following methods
|
52
|
-
# self.peek(i=0) # peek the next i-th character
|
53
|
-
# self.prefix(l=1) # peek the next l characters
|
54
|
-
# self.forward(l=1) # read the next l characters and move the pointer.
|
51
|
+
SimpleKey = Struct.new(:token_number, :required, :index, :line, :column, :mark)
|
55
52
|
|
53
|
+
class Scanner
|
54
|
+
attr_reader :column, :stream, :stream_pointer, :eof, :buffer, :pointer, :index, :line
|
55
|
+
def initialize(stream)
|
56
56
|
# Had we reached the end of the stream?
|
57
57
|
@done = false
|
58
58
|
|
59
59
|
# The number of unclosed '{' and '['. `flow_level == 0` means block
|
60
60
|
# context.
|
61
61
|
@flow_level = 0
|
62
|
+
@flow_zero = true
|
62
63
|
|
63
64
|
# List of processed tokens that are not yet emitted.
|
64
65
|
@tokens = []
|
@@ -104,6 +105,199 @@ module RbYAML
|
|
104
105
|
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
|
105
106
|
# '[', or '{' tokens.
|
106
107
|
@possible_simple_keys = {}
|
108
|
+
|
109
|
+
@stream = nil
|
110
|
+
@stream_pointer = 0
|
111
|
+
@eof = true
|
112
|
+
@buffer = ""
|
113
|
+
@buffer_length = 0
|
114
|
+
@pointer = 0
|
115
|
+
@pointer1 = 1
|
116
|
+
@column = 0
|
117
|
+
if stream.__is_str
|
118
|
+
@name = "<string>"
|
119
|
+
@raw_buffer = stream
|
120
|
+
else
|
121
|
+
@stream = stream
|
122
|
+
@name = stream.respond_to?(:path) ? stream.path : stream.inspect
|
123
|
+
@eof = false
|
124
|
+
@raw_buffer = ""
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def peek(index=0)
|
129
|
+
peekn(index)
|
130
|
+
end
|
131
|
+
|
132
|
+
def peek0
|
133
|
+
update(1) unless @pointer1 < @buffer_length
|
134
|
+
@buffer[@pointer]
|
135
|
+
end
|
136
|
+
|
137
|
+
def peek1
|
138
|
+
update(2) unless @pointer1+1 < @buffer_length
|
139
|
+
@buffer[@pointer1]
|
140
|
+
end
|
141
|
+
|
142
|
+
def peek2
|
143
|
+
update(3) unless @pointer1+2 < @buffer_length
|
144
|
+
@buffer[@pointer1+1]
|
145
|
+
end
|
146
|
+
|
147
|
+
def peek3
|
148
|
+
update(4) unless @pointer1+3 < @buffer_length
|
149
|
+
@buffer[@pointer1+2]
|
150
|
+
end
|
151
|
+
|
152
|
+
def peekn(index=0)
|
153
|
+
pix = @pointer1+index
|
154
|
+
unless pix < @buffer_length
|
155
|
+
update(index+1)
|
156
|
+
pix = @pointer1+index
|
157
|
+
end
|
158
|
+
@buffer[pix-1]
|
159
|
+
end
|
160
|
+
|
161
|
+
def prefix(length=1)
|
162
|
+
update(length) unless @pointer+length < @buffer_length
|
163
|
+
@buffer[@pointer...@pointer+length]
|
164
|
+
end
|
165
|
+
|
166
|
+
def prefix2()
|
167
|
+
update(2) unless @pointer1+1 < @buffer_length
|
168
|
+
@buffer[@pointer..@pointer1]
|
169
|
+
end
|
170
|
+
|
171
|
+
def forward(length=1)
|
172
|
+
case length
|
173
|
+
when 0: forward0
|
174
|
+
when 1: forward1
|
175
|
+
when 2: forward2
|
176
|
+
when 3: forward3
|
177
|
+
when 4: forward4
|
178
|
+
when 5: forward5
|
179
|
+
when 6: forward6
|
180
|
+
else forwardn(length)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def forward0
|
185
|
+
update(1) unless @pointer1 < @buffer_length
|
186
|
+
end
|
187
|
+
|
188
|
+
LINE_BR = "\n\x85"
|
189
|
+
|
190
|
+
def forward1
|
191
|
+
update(2) unless @pointer1+1 < @buffer_length
|
192
|
+
buff = @buffer[@pointer...@pointer1+1]
|
193
|
+
index = buff.rindex(LINE_BR_REG)
|
194
|
+
@column = index ? -index : column+1
|
195
|
+
@pointer += 1
|
196
|
+
@pointer1 += 1
|
197
|
+
end
|
198
|
+
|
199
|
+
def forward2
|
200
|
+
update(3) unless @pointer1+2 < @buffer_length
|
201
|
+
buff = @buffer[@pointer...@pointer1+2]
|
202
|
+
index = buff.rindex(LINE_BR_REG)
|
203
|
+
@column = index ? 1-index : column+2
|
204
|
+
@pointer += 2
|
205
|
+
@pointer1 += 2
|
206
|
+
end
|
207
|
+
|
208
|
+
def forward3
|
209
|
+
update(4) unless @pointer1+3 < @buffer_length
|
210
|
+
buff = @buffer[@pointer...@pointer1+3]
|
211
|
+
index = buff.rindex(LINE_BR_REG)
|
212
|
+
@column = index ? 2-index : column+3
|
213
|
+
@pointer += 3
|
214
|
+
@pointer1 += 3
|
215
|
+
end
|
216
|
+
|
217
|
+
def forward4
|
218
|
+
update(5) unless @pointer1+4 < @buffer_length
|
219
|
+
buff = @buffer[@pointer...@pointer1+4]
|
220
|
+
index = buff.rindex(LINE_BR_REG)
|
221
|
+
@column = index ? 3-index : column+4
|
222
|
+
@pointer += 4
|
223
|
+
@pointer1 += 4
|
224
|
+
end
|
225
|
+
|
226
|
+
def forward5
|
227
|
+
update(6) unless @pointer1+5 < @buffer_length
|
228
|
+
buff = @buffer[@pointer...@pointer1+5]
|
229
|
+
index = buff.rindex(LINE_BR_REG)
|
230
|
+
@column = index ? 4-index : column+5
|
231
|
+
@pointer += 5
|
232
|
+
@pointer1 += 5
|
233
|
+
end
|
234
|
+
|
235
|
+
def forward6
|
236
|
+
update(7) unless @pointer1+6 < @buffer_length
|
237
|
+
buff = @buffer[@pointer...@pointer1+6]
|
238
|
+
index = buff.rindex(LINE_BR_REG)
|
239
|
+
@column = index ? 5-index : column+6
|
240
|
+
@pointer += 6
|
241
|
+
@pointer1 += 6
|
242
|
+
end
|
243
|
+
|
244
|
+
LINE_BR_REG = /[\n\x85]|(?:\r[^\n])/
|
245
|
+
def forwardn(length)
|
246
|
+
update(length + 1) unless @pointer1+length < @buffer_length
|
247
|
+
buff = @buffer[@pointer...@pointer+length]
|
248
|
+
index = buff.rindex(LINE_BR_REG)
|
249
|
+
@column = index ? (length-index)-1 : column+length
|
250
|
+
@pointer += length
|
251
|
+
@pointer1 += length
|
252
|
+
end
|
253
|
+
|
254
|
+
def get_mark
|
255
|
+
if @stream.nil?
|
256
|
+
Mark.new(@name,@column,@buffer,@pointer)
|
257
|
+
else
|
258
|
+
Mark.new(@name,@column,nil,nil)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
|
263
|
+
def check_printable(data)
|
264
|
+
if NON_PRINTABLE =~ data
|
265
|
+
position = @buffer.length-@pointer+($~.offset(0)[0])
|
266
|
+
raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
|
271
|
+
def update(length)
|
272
|
+
return if @raw_buffer.nil?
|
273
|
+
@buffer = @buffer[@pointer..-1]
|
274
|
+
@pointer = 0
|
275
|
+
while @buffer.length < length
|
276
|
+
unless @eof
|
277
|
+
data = @stream.read(1024)
|
278
|
+
if data && !data.empty?
|
279
|
+
@buffer << data
|
280
|
+
@stream_pointer += data.length
|
281
|
+
@raw_buffer = ""
|
282
|
+
else
|
283
|
+
@eof = true
|
284
|
+
@buffer << ?\0
|
285
|
+
@raw_buffer = nil
|
286
|
+
break
|
287
|
+
end
|
288
|
+
else
|
289
|
+
@buffer << @raw_buffer << ?\0
|
290
|
+
@raw_buffer = nil
|
291
|
+
break
|
292
|
+
end
|
293
|
+
end
|
294
|
+
@buffer_length = @buffer.length
|
295
|
+
if @eof
|
296
|
+
check_printable(@buffer[(-length)..-2])
|
297
|
+
else
|
298
|
+
check_printable(@buffer[(-length)..-1])
|
299
|
+
end
|
300
|
+
@pointer1 = @pointer+1
|
107
301
|
end
|
108
302
|
|
109
303
|
def check_token(*choices)
|
@@ -144,70 +338,51 @@ module RbYAML
|
|
144
338
|
|
145
339
|
def need_more_tokens
|
146
340
|
return false if @done
|
147
|
-
|
148
|
-
# The current token may be a potential simple key, so we
|
149
|
-
# need to look further.
|
150
|
-
stale_possible_simple_keys
|
151
|
-
return true if next_possible_simple_key == @tokens_taken
|
341
|
+
@tokens.empty? || next_possible_simple_key == @tokens_taken
|
152
342
|
end
|
153
343
|
|
344
|
+
ENDING = /^---[\0 \t\r\n\x85]$/
|
345
|
+
START = /^\.\.\.[\0 \t\r\n\x85]$/
|
346
|
+
NULL_OR_OTHER = "\0 \t\r\n\x85"
|
347
|
+
# BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@`]|([\-?:][^\0 \t\r\n\x85]))/ #Since current SYCK handles this one wrong, we have to allow backtick right now.
|
348
|
+
BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@]|([\-?:][^\0 \t\r\n\x85]))/
|
154
349
|
def fetch_more_tokens
|
155
350
|
# Eat whitespaces and comments until we reach the next token.
|
156
351
|
scan_to_next_token
|
157
352
|
|
158
353
|
# Remove obsolete possible simple keys.
|
159
|
-
stale_possible_simple_keys
|
354
|
+
# stale_possible_simple_keys
|
160
355
|
|
161
356
|
# Compare the current indentation and column. It may add some tokens
|
162
357
|
# and decrease the current indentation level.
|
163
358
|
unwind_indent(@column)
|
164
359
|
|
165
360
|
# Peek the next character.
|
166
|
-
ch =
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
# Is it the value indicator?
|
192
|
-
when ch == ?: && check_value: fetch_value
|
193
|
-
# Is it an alias?
|
194
|
-
when ch == ?*: fetch_alias
|
195
|
-
# Is it an anchor?
|
196
|
-
when ch == ?&: fetch_anchor
|
197
|
-
# Is it a tag?
|
198
|
-
when ch == ?!: fetch_tag
|
199
|
-
# Is it a literal scalar?
|
200
|
-
when ch == ?| && @flow_level==0: fetch_literal
|
201
|
-
# Is it a folded scalar?
|
202
|
-
when ch == ?> && @flow_level==0: fetch_folded
|
203
|
-
# Is it a single quoted scalar?
|
204
|
-
when ch == ?': fetch_single
|
205
|
-
# Is it a double quoted scalar?
|
206
|
-
when ch == ?": fetch_double
|
207
|
-
# It must be a plain scalar then.
|
208
|
-
when check_plain: fetch_plain
|
209
|
-
else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
210
|
-
end
|
361
|
+
ch = peek0
|
362
|
+
colz = @column == 0
|
363
|
+
|
364
|
+
case ch
|
365
|
+
when ?\0: return fetch_stream_end
|
366
|
+
when ?': return fetch_single
|
367
|
+
when ?": return fetch_double
|
368
|
+
when ??: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_key end
|
369
|
+
when ?:: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_value end
|
370
|
+
when ?%: if colz: return fetch_stream_end end
|
371
|
+
when ?-: if colz && ENDING =~ prefix(4): return fetch_document_start; elsif NULL_OR_OTHER.include?(peek1): return fetch_block_entry end
|
372
|
+
when ?.: if colz && START =~ prefix(4): return fetch_document_end end
|
373
|
+
when ?[: return fetch_flow_sequence_start
|
374
|
+
when ?{: return fetch_flow_mapping_start
|
375
|
+
when ?]: return fetch_flow_sequence_end
|
376
|
+
when ?}: return fetch_flow_mapping_end
|
377
|
+
when ?,: return fetch_flow_entry
|
378
|
+
when ?*: return fetch_alias
|
379
|
+
when ?&: return fetch_anchor
|
380
|
+
when ?!: return fetch_tag
|
381
|
+
when ?|: if @flow_zero: return fetch_literal end
|
382
|
+
when ?>: if @flow_zero: return fetch_folded end
|
383
|
+
end
|
384
|
+
return fetch_plain if BEG =~ prefix(2)
|
385
|
+
raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
211
386
|
end
|
212
387
|
|
213
388
|
# Simple keys treatment.
|
@@ -215,58 +390,22 @@ module RbYAML
|
|
215
390
|
def next_possible_simple_key
|
216
391
|
# Return the number of the nearest possible simple key. Actually we
|
217
392
|
# don't need to loop through the whole dictionary.
|
218
|
-
|
219
|
-
|
220
|
-
key = @possible_simple_keys[level]
|
221
|
-
if min_token_number.nil? || key.token_number < min_token_number
|
222
|
-
min_token_number = key.token_number
|
223
|
-
end
|
224
|
-
end
|
225
|
-
min_token_number
|
393
|
+
@possible_simple_keys.each_value {|key| return key.token_number if key.token_number}
|
394
|
+
nil
|
226
395
|
end
|
227
396
|
|
228
|
-
def stale_possible_simple_keys
|
229
|
-
# Remove entries that are no longer possible simple keys. According to
|
230
|
-
# the YAML specification, simple keys
|
231
|
-
# - should be limited to a single line,
|
232
|
-
# - should be no longer than 1024 characters.
|
233
|
-
# Disabling this procedure will allow simple keys of any length and
|
234
|
-
# height (may cause problems if indentation is broken though).
|
235
|
-
@possible_simple_keys.delete_if {|level,key|
|
236
|
-
if key.line != @line || @index-key.index > 1024
|
237
|
-
raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
|
238
|
-
return true
|
239
|
-
end
|
240
|
-
return false
|
241
|
-
}
|
242
|
-
end
|
243
|
-
|
244
397
|
def save_possible_simple_key
|
245
398
|
# The next token may start a simple key. We check if it's possible
|
246
399
|
# and save its position. This function is called for
|
247
400
|
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
|
248
|
-
|
249
|
-
# Check if a simple key is required at the current position.
|
250
|
-
required = @flow_level==0 && @indent == @column
|
251
|
-
|
252
401
|
# The next token might be a simple key. Let's save it's number and
|
253
402
|
# position.
|
254
|
-
if @allow_simple_key
|
255
|
-
remove_possible_simple_key
|
256
|
-
token_number = @tokens_taken+@tokens.length
|
257
|
-
key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
|
258
|
-
@possible_simple_keys[@flow_level] = key
|
259
|
-
end
|
403
|
+
@possible_simple_keys[@flow_level] = SimpleKey.new(@tokens_taken+@tokens.length, @flow_zero && @indent == @column,-1,-1,column,get_mark) if @allow_simple_key
|
260
404
|
end
|
261
405
|
|
262
|
-
def remove_possible_simple_key
|
263
|
-
# Remove the saved possible key position at the current flow level.
|
264
|
-
key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
|
265
|
-
end
|
266
|
-
|
267
406
|
# Indentation functions.
|
268
407
|
|
269
|
-
def unwind_indent(
|
408
|
+
def unwind_indent(col)
|
270
409
|
## In flow context, tokens should respect indentation.
|
271
410
|
## Actually the condition should be `@indent >= column` according to
|
272
411
|
## the spec. But this condition will prohibit intuitively correct
|
@@ -280,20 +419,20 @@ module RbYAML
|
|
280
419
|
|
281
420
|
# In the flow context, indentation is ignored. We make the scanner less
|
282
421
|
# restrictive then specification requires.
|
283
|
-
return nil if
|
422
|
+
return nil if !@flow_zero
|
284
423
|
# In block context, we may need to issue the BLOCK-END tokens.
|
285
|
-
while @indent >
|
424
|
+
while @indent > col
|
286
425
|
mark = get_mark
|
287
|
-
@indent = @indents.pop
|
426
|
+
@indent = @indents.pop
|
288
427
|
@tokens << BlockEndToken.new(mark, mark)
|
289
428
|
end
|
290
429
|
end
|
291
430
|
|
292
|
-
def add_indent(
|
431
|
+
def add_indent(col)
|
293
432
|
# Check if we need to increase indentation.
|
294
|
-
if @indent <
|
433
|
+
if @indent < col
|
295
434
|
@indents << @indent
|
296
|
-
@indent =
|
435
|
+
@indent = col
|
297
436
|
return true
|
298
437
|
end
|
299
438
|
return false
|
@@ -329,7 +468,6 @@ module RbYAML
|
|
329
468
|
# Set the current intendation to -1.
|
330
469
|
unwind_indent(-1)
|
331
470
|
# Reset simple keys.
|
332
|
-
remove_possible_simple_key
|
333
471
|
@allow_simple_key = false
|
334
472
|
# Scan and add DIRECTIVE.
|
335
473
|
@tokens << scan_directive
|
@@ -348,11 +486,10 @@ module RbYAML
|
|
348
486
|
unwind_indent(-1)
|
349
487
|
# Reset simple keys. Note that there could not be a block collection
|
350
488
|
# after '---'.
|
351
|
-
remove_possible_simple_key
|
352
489
|
@allow_simple_key = false
|
353
490
|
# Add DOCUMENT-START or DOCUMENT-END.
|
354
491
|
start_mark = get_mark
|
355
|
-
|
492
|
+
forward3
|
356
493
|
end_mark = get_mark
|
357
494
|
@tokens << token.new(start_mark, end_mark)
|
358
495
|
end
|
@@ -370,11 +507,12 @@ module RbYAML
|
|
370
507
|
save_possible_simple_key
|
371
508
|
# Increase the flow level.
|
372
509
|
@flow_level += 1
|
510
|
+
@flow_zero = false
|
373
511
|
# Simple keys are allowed after '[' and '{'.
|
374
512
|
@allow_simple_key = true
|
375
513
|
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
|
376
514
|
start_mark = get_mark
|
377
|
-
|
515
|
+
forward1
|
378
516
|
end_mark = get_mark
|
379
517
|
@tokens << token.new(start_mark, end_mark)
|
380
518
|
end
|
@@ -388,15 +526,16 @@ module RbYAML
|
|
388
526
|
end
|
389
527
|
|
390
528
|
def fetch_flow_collection_end(token)
|
391
|
-
# Reset possible simple key on the current level.
|
392
|
-
remove_possible_simple_key
|
393
529
|
# Decrease the flow level.
|
394
530
|
@flow_level -= 1
|
531
|
+
if @flow_level == 0
|
532
|
+
@flow_zero = true
|
533
|
+
end
|
395
534
|
# No simple keys after ']' or '}'.
|
396
535
|
@allow_simple_key = false
|
397
536
|
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
|
398
537
|
start_mark = get_mark
|
399
|
-
|
538
|
+
forward1
|
400
539
|
end_mark = get_mark
|
401
540
|
@tokens << token.new(start_mark, end_mark)
|
402
541
|
end
|
@@ -404,21 +543,19 @@ module RbYAML
|
|
404
543
|
def fetch_flow_entry
|
405
544
|
# Simple keys are allowed after ','.
|
406
545
|
@allow_simple_key = true
|
407
|
-
# Reset possible simple key on the current level.
|
408
|
-
remove_possible_simple_key
|
409
546
|
# Add FLOW-ENTRY.
|
410
547
|
start_mark = get_mark
|
411
|
-
|
548
|
+
forward1
|
412
549
|
end_mark = get_mark
|
413
550
|
@tokens << FlowEntryToken.new(start_mark, end_mark)
|
414
551
|
end
|
415
552
|
|
416
553
|
def fetch_block_entry
|
417
554
|
# Block context needs additional checks.
|
418
|
-
if @
|
555
|
+
if @flow_zero
|
419
556
|
raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
|
420
557
|
# We may need to add BLOCK-SEQUENCE-START.
|
421
|
-
if add_indent(
|
558
|
+
if add_indent(column)
|
422
559
|
mark = get_mark
|
423
560
|
@tokens << BlockSequenceStartToken.new(mark, mark)
|
424
561
|
end
|
@@ -427,67 +564,63 @@ module RbYAML
|
|
427
564
|
end
|
428
565
|
# Simple keys are allowed after '-'.
|
429
566
|
@allow_simple_key = true
|
430
|
-
# Reset possible simple key on the current level.
|
431
|
-
remove_possible_simple_key
|
432
567
|
# Add BLOCK-ENTRY.
|
433
568
|
start_mark = get_mark
|
434
|
-
|
569
|
+
forward1
|
435
570
|
end_mark = get_mark
|
436
571
|
@tokens << BlockEntryToken.new(start_mark, end_mark)
|
437
572
|
end
|
438
573
|
|
439
574
|
def fetch_key
|
440
575
|
# Block context needs additional checks.
|
441
|
-
if @
|
576
|
+
if @flow_zero
|
442
577
|
# Are we allowed to start a key (not nessesary a simple)?
|
443
578
|
raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
|
444
579
|
# We may need to add BLOCK-MAPPING-START.
|
445
|
-
if add_indent(
|
580
|
+
if add_indent(column)
|
446
581
|
mark = get_mark
|
447
582
|
@tokens << BlockMappingStartToken.new(mark, mark)
|
448
583
|
end
|
449
584
|
end
|
450
585
|
# Simple keys are allowed after '?' in the block context.
|
451
|
-
@allow_simple_key = @
|
452
|
-
# Reset possible simple key on the current level.
|
453
|
-
remove_possible_simple_key
|
586
|
+
@allow_simple_key = @flow_zero
|
454
587
|
# Add KEY.
|
455
588
|
start_mark = get_mark
|
456
|
-
|
589
|
+
forward1
|
457
590
|
end_mark = get_mark
|
458
591
|
@tokens << KeyToken.new(start_mark, end_mark)
|
459
592
|
end
|
460
593
|
|
461
594
|
def fetch_value
|
595
|
+
key = @possible_simple_keys[@flow_level]
|
462
596
|
# Do we determine a simple key?
|
463
|
-
if
|
464
|
-
# Add KEY.
|
465
|
-
key = @possible_simple_keys[@flow_level]
|
466
|
-
@possible_simple_keys.delete(@flow_level)
|
467
|
-
@tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
|
468
|
-
# If this key starts a new block mapping, we need to add
|
469
|
-
# BLOCK-MAPPING-START.
|
470
|
-
@tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
|
471
|
-
# There cannot be two simple keys one after another.
|
472
|
-
@allow_simple_key = false
|
473
|
-
# It must be a part of a complex key.
|
474
|
-
else
|
597
|
+
if key.nil?
|
475
598
|
# Block context needs additional checks.
|
476
599
|
# (Do we really need them? They will be catched by the parser
|
477
600
|
# anyway.)
|
478
|
-
if @
|
601
|
+
if @flow_zero
|
479
602
|
# We are allowed to start a complex value if and only if
|
480
603
|
# we can start a simple key.
|
481
604
|
raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
|
482
605
|
# Simple keys are allowed after ':' in the block context.
|
483
|
-
@allow_simple_key =
|
484
|
-
# Reset possible simple key on the current level.
|
485
|
-
remove_possible_simple_key
|
606
|
+
@allow_simple_key = true
|
486
607
|
end
|
608
|
+
else
|
609
|
+
# Add KEY.
|
610
|
+
@possible_simple_keys.delete(@flow_level)
|
611
|
+
|
612
|
+
# If this key starts a new block mapping, we need to add
|
613
|
+
# BLOCK-MAPPING-START.
|
614
|
+
se = (@flow_zero && add_indent(key.column)) ? [BlockMappingStartToken.new(key.mark, key.mark)] : []
|
615
|
+
se << KeyToken.new(key.mark, key.mark)
|
616
|
+
@tokens.insert(key.token_number-@tokens_taken,*se)
|
617
|
+
# There cannot be two simple keys one after another.
|
618
|
+
@allow_simple_key = false
|
619
|
+
# It must be a part of a complex key.
|
487
620
|
end
|
488
621
|
# Add VALUE.
|
489
622
|
start_mark = get_mark
|
490
|
-
|
623
|
+
forward1
|
491
624
|
end_mark = get_mark
|
492
625
|
@tokens << ValueToken.new(start_mark, end_mark)
|
493
626
|
end
|
@@ -530,8 +663,6 @@ module RbYAML
|
|
530
663
|
def fetch_block_scalar(style)
|
531
664
|
# A simple key may follow a block scalar.
|
532
665
|
@allow_simple_key = true
|
533
|
-
# Reset possible simple key on the current level.
|
534
|
-
remove_possible_simple_key
|
535
666
|
# Scan and add SCALAR.
|
536
667
|
@tokens << scan_block_scalar(style)
|
537
668
|
end
|
@@ -564,65 +695,9 @@ module RbYAML
|
|
564
695
|
@tokens << scan_plain
|
565
696
|
end
|
566
697
|
|
567
|
-
# Checkers.
|
568
|
-
|
569
|
-
def check_directive
|
570
|
-
# DIRECTIVE: ^ '%' ...
|
571
|
-
# The '%' indicator is already checked.
|
572
|
-
@column == 0
|
573
|
-
end
|
574
|
-
|
575
|
-
def check_document_start
|
576
|
-
# DOCUMENT-START: ^ '---' (' '|'\n')
|
577
|
-
@column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
|
578
|
-
end
|
579
|
-
|
580
|
-
def check_document_end
|
581
|
-
# DOCUMENT-END: ^ '...' (' '|'\n')
|
582
|
-
@column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
|
583
|
-
end
|
584
|
-
|
585
|
-
def check_block_entry
|
586
|
-
# BLOCK-ENTRY: '-' (' '|'\n')
|
587
|
-
"\0 \t\r\n\x85".include?(peek(1))
|
588
|
-
end
|
589
|
-
|
590
|
-
def check_key
|
591
|
-
# KEY(flow context): '?'
|
592
|
-
# KEY(block context): '?' (' '|'\n')
|
593
|
-
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
594
|
-
end
|
595
|
-
|
596
|
-
def check_value
|
597
|
-
# VALUE(flow context): ':'
|
598
|
-
# VALUE(block context): ':' (' '|'\n')
|
599
|
-
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
600
|
-
end
|
601
|
-
|
602
|
-
def check_plain
|
603
|
-
# A plain scalar may start with any non-space character except:
|
604
|
-
# '-', '?', ':', ',', '[', ']', '{', '}',
|
605
|
-
# '#', '&', '*', '!', '|', '>', '\'', '\"',
|
606
|
-
# '%', '@', '`'.
|
607
|
-
#
|
608
|
-
# It may also start with
|
609
|
-
# '-', '?', ':'
|
610
|
-
# if it is followed by a non-space character.
|
611
|
-
#
|
612
|
-
# Note that we limit the last rule to the block context (except the
|
613
|
-
# '-' character) because we want the flow context to be space
|
614
|
-
# independent.
|
615
|
-
ch = peek
|
616
|
-
!("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
|
617
|
-
end
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
698
|
|
624
699
|
# Scanners.
|
625
|
-
|
700
|
+
NULL_OR_LINEBR = "\0\r\n\x85"
|
626
701
|
def scan_to_next_token
|
627
702
|
# We ignore spaces, line breaks and comments.
|
628
703
|
# If we find a line break in the block context, we set the flag
|
@@ -638,18 +713,20 @@ module RbYAML
|
|
638
713
|
# We also need to add the check for `allow_simple_keys == true` to
|
639
714
|
# `unwind_indent` before issuing BLOCK-END.
|
640
715
|
# Scanners for block, flow, and plain scalars need to be modified.
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
forward
|
716
|
+
while true
|
717
|
+
while peek0 == 32
|
718
|
+
forward1
|
645
719
|
end
|
646
|
-
if
|
647
|
-
|
720
|
+
if peek0 == ?#
|
721
|
+
while !NULL_OR_LINEBR.include?(peek0)
|
722
|
+
forward1
|
723
|
+
end
|
648
724
|
end
|
725
|
+
|
649
726
|
if !scan_line_break.empty?
|
650
|
-
@allow_simple_key = true if @
|
727
|
+
@allow_simple_key = true if @flow_zero
|
651
728
|
else
|
652
|
-
|
729
|
+
break
|
653
730
|
end
|
654
731
|
end
|
655
732
|
end
|
@@ -657,7 +734,7 @@ module RbYAML
|
|
657
734
|
def scan_directive
|
658
735
|
# See the specification for details.
|
659
736
|
start_mark = get_mark
|
660
|
-
|
737
|
+
forward1
|
661
738
|
name = scan_directive_name(start_mark)
|
662
739
|
value = nil
|
663
740
|
if name == "YAML"
|
@@ -668,45 +745,50 @@ module RbYAML
|
|
668
745
|
end_mark = get_mark
|
669
746
|
else
|
670
747
|
end_mark = get_mark
|
671
|
-
|
748
|
+
forward1 while !NULL_OR_LINEBR.include?(peek0)
|
672
749
|
end
|
673
750
|
scan_directive_ignored_line(start_mark)
|
674
751
|
DirectiveToken.new(name, value, start_mark, end_mark)
|
675
752
|
end
|
676
753
|
|
754
|
+
ALPHA_REG = /[-0-9A-Za-z_]/
|
755
|
+
NULL_BL_LINEBR = "\0 \r\n\x85"
|
756
|
+
NULL_BL_T_LINEBR = "\0 \t\r\n\x85"
|
677
757
|
def scan_directive_name(start_mark)
|
678
758
|
# See the specification for details.
|
679
759
|
length = 0
|
680
760
|
ch = peek(length)
|
681
|
-
|
761
|
+
zlen = true
|
762
|
+
while ALPHA_REG =~ ch.chr
|
763
|
+
zlen = false
|
682
764
|
length += 1
|
683
765
|
ch = peek(length)
|
684
766
|
end
|
685
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if
|
767
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if zlen
|
686
768
|
value = prefix(length)
|
687
769
|
forward(length)
|
688
|
-
ch =
|
689
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !
|
770
|
+
ch = peek0
|
771
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !NULL_BL_LINEBR.include?(ch)
|
690
772
|
value
|
691
773
|
end
|
692
774
|
|
693
775
|
def scan_yaml_directive_value(start_mark)
|
694
776
|
# See the specification for details.
|
695
|
-
|
777
|
+
forward1 while peek0 == 32
|
696
778
|
major = scan_yaml_directive_number(start_mark)
|
697
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if
|
698
|
-
|
779
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek0 != ?.
|
780
|
+
forward1
|
699
781
|
minor = scan_yaml_directive_number(start_mark)
|
700
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !
|
782
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
|
701
783
|
[major, minor]
|
702
784
|
end
|
703
785
|
|
704
786
|
def scan_yaml_directive_number(start_mark)
|
705
787
|
# See the specification for details.
|
706
|
-
ch =
|
707
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(
|
788
|
+
ch = peek0
|
789
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(ch.__is_ascii_num)
|
708
790
|
length = 0
|
709
|
-
length += 1 while (
|
791
|
+
length += 1 while (peek(length).__is_ascii_num)
|
710
792
|
value = prefix(length)
|
711
793
|
forward(length)
|
712
794
|
value
|
@@ -714,9 +796,9 @@ module RbYAML
|
|
714
796
|
|
715
797
|
def scan_tag_directive_value(start_mark)
|
716
798
|
# See the specification for details.
|
717
|
-
|
799
|
+
forward1 while peek0 == 32
|
718
800
|
handle = scan_tag_directive_handle(start_mark)
|
719
|
-
|
801
|
+
forward1 while peek0 == 32
|
720
802
|
prefix = scan_tag_directive_prefix(start_mark)
|
721
803
|
[handle, prefix]
|
722
804
|
end
|
@@ -724,30 +806,30 @@ module RbYAML
|
|
724
806
|
def scan_tag_directive_handle(start_mark)
|
725
807
|
# See the specification for details.
|
726
808
|
value = scan_tag_handle("directive", start_mark)
|
727
|
-
|
728
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
|
809
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if peek0 != 32
|
729
810
|
value
|
730
811
|
end
|
731
812
|
|
732
813
|
def scan_tag_directive_prefix(start_mark)
|
733
814
|
# See the specification for details.
|
734
815
|
value = scan_tag_uri("directive", start_mark)
|
735
|
-
|
736
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
|
816
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if !NULL_BL_LINEBR.include?(peek0)
|
737
817
|
value
|
738
818
|
end
|
739
819
|
|
740
820
|
def scan_directive_ignored_line(start_mark)
|
741
821
|
# See the specification for details.
|
742
|
-
|
743
|
-
if
|
744
|
-
|
822
|
+
forward1 while peek0 == 32
|
823
|
+
if peek0 == ?#
|
824
|
+
forward1 while !NULL_OR_LINEBR.include?(peek0)
|
745
825
|
end
|
746
|
-
ch =
|
747
|
-
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{
|
826
|
+
ch = peek0
|
827
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{peek0.to_s}",get_mark()) if !NULL_OR_LINEBR.include?(peek0)
|
748
828
|
scan_line_break
|
749
829
|
end
|
750
|
-
|
830
|
+
|
831
|
+
NON_ALPHA = /[^-0-9A-Za-z_]/
|
832
|
+
NON_ALPHA_OR_NUM = "\0 \t\r\n\x85?:,]}%@`"
|
751
833
|
def scan_anchor(token)
|
752
834
|
# The specification does not restrict characters for anchors and
|
753
835
|
# aliases. This may lead to problems, for instance, the document:
|
@@ -758,45 +840,47 @@ module RbYAML
|
|
758
840
|
# [ *alias , "value" ]
|
759
841
|
# Therefore we restrict aliases to numbers and ASCII letters.
|
760
842
|
start_mark = get_mark
|
761
|
-
indicator =
|
843
|
+
indicator = peek0
|
762
844
|
name = (indicator == ?*) ? "alias":"anchor"
|
763
|
-
|
845
|
+
forward1
|
764
846
|
length = 0
|
765
|
-
|
766
|
-
while
|
767
|
-
|
768
|
-
|
847
|
+
chunk_size = 16
|
848
|
+
while true
|
849
|
+
chunk = prefix(chunk_size)
|
850
|
+
if length = (NON_ALPHA =~ chunk)
|
851
|
+
break
|
852
|
+
end
|
853
|
+
chunk_size += 16
|
769
854
|
end
|
770
|
-
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found
|
855
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found something else...",get_mark) if length==0
|
771
856
|
value = prefix(length)
|
772
857
|
forward(length)
|
773
|
-
|
774
|
-
|
775
|
-
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
|
858
|
+
if !NON_ALPHA_OR_NUM.include?(peek0)
|
859
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{peek0}",get_mark)
|
776
860
|
end
|
777
861
|
end_mark = get_mark
|
778
862
|
token.new(value, start_mark, end_mark)
|
779
863
|
end
|
780
864
|
|
781
|
-
|
865
|
+
NULL_T_BL_LINEBR = "\0 \t\r\n\x85"
|
782
866
|
def scan_tag
|
783
867
|
# See the specification for details.
|
784
868
|
start_mark = get_mark
|
785
|
-
ch =
|
869
|
+
ch = peek1
|
786
870
|
if ch == ?<
|
787
871
|
handle = nil
|
788
|
-
|
872
|
+
forward2
|
789
873
|
suffix = scan_tag_uri("tag", start_mark)
|
790
|
-
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if
|
791
|
-
|
792
|
-
elsif
|
874
|
+
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek0 != ?>
|
875
|
+
forward1
|
876
|
+
elsif NULL_T_BL_LINEBR.include?(ch)
|
793
877
|
handle = nil
|
794
878
|
suffix = "!"
|
795
|
-
|
879
|
+
forward1
|
796
880
|
else
|
797
881
|
length = 1
|
798
882
|
use_handle = false
|
799
|
-
while !
|
883
|
+
while !NULL_T_BL_LINEBR.include?(ch)
|
800
884
|
if ch == ?!
|
801
885
|
use_handle = true
|
802
886
|
break
|
@@ -809,24 +893,24 @@ module RbYAML
|
|
809
893
|
handle = scan_tag_handle("tag", start_mark)
|
810
894
|
else
|
811
895
|
handle = "!"
|
812
|
-
|
896
|
+
forward1
|
813
897
|
end
|
814
898
|
suffix = scan_tag_uri("tag", start_mark)
|
815
899
|
end
|
816
|
-
|
817
|
-
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
900
|
+
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
|
818
901
|
value = [handle, suffix]
|
819
902
|
end_mark = get_mark
|
820
903
|
TagToken.new(value, start_mark, end_mark)
|
821
904
|
end
|
822
905
|
|
906
|
+
BLANK_T = " \t"
|
823
907
|
def scan_block_scalar(style)
|
824
908
|
# See the specification for details.
|
825
909
|
folded = style== ?>
|
826
910
|
chunks = []
|
827
911
|
start_mark = get_mark
|
828
912
|
# Scan the header.
|
829
|
-
|
913
|
+
forward1
|
830
914
|
chomping, increment = scan_block_scalar_indicators(start_mark)
|
831
915
|
scan_block_scalar_ignored_line(start_mark)
|
832
916
|
# Determine the indentation level and go to the first non-empty line.
|
@@ -841,20 +925,20 @@ module RbYAML
|
|
841
925
|
end
|
842
926
|
line_break = ''
|
843
927
|
# Scan the inner part of the block scalar.
|
844
|
-
while
|
928
|
+
while column == indent and peek0 != ?\0
|
845
929
|
chunks += breaks
|
846
|
-
leading_non_space = !
|
930
|
+
leading_non_space = !BLANK_T.include?(peek0)
|
847
931
|
length = 0
|
848
|
-
length += 1 while !
|
932
|
+
length += 1 while !NULL_OR_LINEBR.include?(peek(length))
|
849
933
|
chunks << prefix(length)
|
850
934
|
forward(length)
|
851
935
|
line_break = scan_line_break
|
852
936
|
breaks, end_mark = scan_block_scalar_breaks(indent)
|
853
|
-
if
|
937
|
+
if column == indent && peek0 != 0
|
854
938
|
# Unfortunately, folding rules are ambiguous.
|
855
939
|
#
|
856
940
|
# This is the folding according to the specification:
|
857
|
-
if folded && line_break ==
|
941
|
+
if folded && line_break == "\n" && leading_non_space && !BLANK_T.include?(peek0)
|
858
942
|
chunks << ' ' if breaks.empty?
|
859
943
|
else
|
860
944
|
chunks << line_break
|
@@ -882,76 +966,76 @@ module RbYAML
|
|
882
966
|
end
|
883
967
|
|
884
968
|
# We are done.
|
885
|
-
ScalarToken.new(chunks.
|
969
|
+
ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
|
886
970
|
end
|
887
971
|
|
972
|
+
PLUS_MIN = /[+-]/
|
888
973
|
def scan_block_scalar_indicators(start_mark)
|
889
974
|
# See the specification for details.
|
890
975
|
chomping = nil
|
891
976
|
increment = nil
|
892
|
-
ch =
|
893
|
-
if
|
977
|
+
ch = peek0
|
978
|
+
if PLUS_MIN =~ ch.chr
|
894
979
|
chomping = ch == ?+
|
895
|
-
|
896
|
-
ch =
|
897
|
-
if
|
898
|
-
increment = ch.to_i
|
980
|
+
forward1
|
981
|
+
ch = peek0
|
982
|
+
if ch.__is_ascii_num
|
983
|
+
increment = ch.chr.to_i
|
899
984
|
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
900
|
-
|
985
|
+
forward1
|
901
986
|
end
|
902
|
-
elsif
|
903
|
-
increment = ch
|
987
|
+
elsif ch.__is_ascii_num
|
988
|
+
increment = ch.chr.to_i
|
904
989
|
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
905
|
-
|
906
|
-
ch =
|
907
|
-
if
|
990
|
+
forward1
|
991
|
+
ch = peek0
|
992
|
+
if PLUS_MIN =~ ch.chr
|
908
993
|
chomping = ch == ?+
|
909
|
-
|
994
|
+
forward1
|
910
995
|
end
|
911
996
|
end
|
912
|
-
|
913
|
-
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
997
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
|
914
998
|
[chomping, increment]
|
915
999
|
end
|
916
1000
|
|
917
1001
|
def scan_block_scalar_ignored_line(start_mark)
|
918
1002
|
# See the specification for details.
|
919
|
-
|
920
|
-
if
|
921
|
-
|
1003
|
+
forward1 while peek0 == 32
|
1004
|
+
if peek0 == ?#
|
1005
|
+
forward1 while !NULL_OR_LINEBR.include?(peek0)
|
922
1006
|
end
|
923
|
-
|
924
|
-
|
925
|
-
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
|
1007
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{peek0}",get_mark) if !NULL_OR_LINEBR.include?(peek0)
|
926
1008
|
scan_line_break
|
927
1009
|
end
|
928
1010
|
|
1011
|
+
BLANK_OR_LINEBR = " \r\n\x85"
|
929
1012
|
def scan_block_scalar_indentation
|
930
1013
|
# See the specification for details.
|
931
1014
|
chunks = []
|
932
1015
|
max_indent = 0
|
933
1016
|
end_mark = get_mark
|
934
|
-
while
|
935
|
-
if
|
1017
|
+
while BLANK_OR_LINEBR.include?(peek0)
|
1018
|
+
if peek0 != 32
|
936
1019
|
chunks << scan_line_break
|
937
1020
|
end_mark = get_mark
|
938
1021
|
else
|
939
|
-
|
940
|
-
max_indent =
|
1022
|
+
forward1
|
1023
|
+
max_indent = column if column > max_indent
|
941
1024
|
end
|
942
1025
|
end
|
943
1026
|
[chunks, max_indent, end_mark]
|
944
1027
|
end
|
945
1028
|
|
1029
|
+
FULL_LINEBR = "\r\n\x85"
|
946
1030
|
def scan_block_scalar_breaks(indent)
|
947
1031
|
# See the specification for details.
|
948
1032
|
chunks = []
|
949
1033
|
end_mark = get_mark
|
950
|
-
|
951
|
-
while
|
1034
|
+
forward1 while @column < indent && peek0 == 32
|
1035
|
+
while FULL_LINEBR.include?(peek0)
|
952
1036
|
chunks << scan_line_break
|
953
1037
|
end_mark = get_mark
|
954
|
-
|
1038
|
+
forward1 while @column < indent && peek0 == 32
|
955
1039
|
end
|
956
1040
|
[chunks, end_mark]
|
957
1041
|
end
|
@@ -966,16 +1050,16 @@ module RbYAML
|
|
966
1050
|
double = style == ?"
|
967
1051
|
chunks = []
|
968
1052
|
start_mark = get_mark
|
969
|
-
quote =
|
970
|
-
|
1053
|
+
quote = peek0
|
1054
|
+
forward1
|
971
1055
|
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
972
|
-
while
|
1056
|
+
while peek0 != quote
|
973
1057
|
chunks += scan_flow_scalar_spaces(double, start_mark)
|
974
1058
|
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
975
1059
|
end
|
976
|
-
|
1060
|
+
forward1
|
977
1061
|
end_mark = get_mark
|
978
|
-
ScalarToken.new(chunks.
|
1062
|
+
ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
|
979
1063
|
end
|
980
1064
|
|
981
1065
|
ESCAPE_REPLACEMENTS = {
|
@@ -1000,42 +1084,43 @@ module RbYAML
|
|
1000
1084
|
'x' => 2
|
1001
1085
|
}
|
1002
1086
|
|
1087
|
+
SPACES_AND_STUFF = "'\"\\\0 \t\r\n\x85"
|
1088
|
+
DOUBLE_ESC = "\"\\"
|
1089
|
+
NOT_HEXA = /[^0-9A-Fa-f]/
|
1003
1090
|
def scan_flow_scalar_non_spaces(double, start_mark)
|
1004
1091
|
# See the specification for details.
|
1005
1092
|
chunks = []
|
1006
1093
|
while true
|
1007
1094
|
length = 0
|
1008
|
-
length += 1 while !
|
1095
|
+
length += 1 while !SPACES_AND_STUFF.include?(peek(length))
|
1009
1096
|
if length!=0
|
1010
1097
|
chunks << prefix(length)
|
1011
1098
|
forward(length)
|
1012
1099
|
end
|
1013
|
-
ch =
|
1014
|
-
if !double && ch == ?' &&
|
1100
|
+
ch = peek0
|
1101
|
+
if !double && ch == ?' && peek1 == ?'
|
1015
1102
|
chunks << ?'
|
1016
|
-
|
1017
|
-
elsif (double && ch == ?') || (!double &&
|
1103
|
+
forward2
|
1104
|
+
elsif (double && ch == ?') || (!double && DOUBLE_ESC.include?(ch))
|
1018
1105
|
chunks << ch
|
1019
|
-
|
1106
|
+
forward1
|
1020
1107
|
elsif double && ch == ?\\
|
1021
|
-
|
1022
|
-
ch =
|
1108
|
+
forward1
|
1109
|
+
ch = peek0
|
1023
1110
|
if ESCAPE_REPLACEMENTS.member?(ch.chr)
|
1024
1111
|
chunks << ESCAPE_REPLACEMENTS[ch.chr]
|
1025
|
-
|
1112
|
+
forward1
|
1026
1113
|
elsif ESCAPE_CODES.member?(ch.chr)
|
1027
1114
|
length = ESCAPE_CODES[ch.chr]
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
"expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
|
1033
|
-
end
|
1115
|
+
forward1
|
1116
|
+
if NOT_HEXA =~ prefix(length)
|
1117
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
|
1118
|
+
"expected escape sequence of #{length} hexdecimal numbers, but found something else: #{prefix(length)}}",get_mark)
|
1034
1119
|
end
|
1035
|
-
code = prefix(length).to_i
|
1120
|
+
code = prefix(length).to_i(16).to_s
|
1036
1121
|
chunks << code
|
1037
1122
|
forward(length)
|
1038
|
-
elsif
|
1123
|
+
elsif FULL_LINEBR.include?(ch)
|
1039
1124
|
scan_line_break
|
1040
1125
|
chunks += scan_flow_scalar_breaks(double, start_mark)
|
1041
1126
|
else
|
@@ -1051,16 +1136,16 @@ module RbYAML
|
|
1051
1136
|
# See the specification for details.
|
1052
1137
|
chunks = []
|
1053
1138
|
length = 0
|
1054
|
-
length += 1 while
|
1139
|
+
length += 1 while BLANK_T.include?(peek(length))
|
1055
1140
|
whitespaces = prefix(length)
|
1056
1141
|
forward(length)
|
1057
|
-
ch =
|
1142
|
+
ch = peek0
|
1058
1143
|
if ch == ?\0
|
1059
1144
|
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
|
1060
|
-
elsif
|
1145
|
+
elsif FULL_LINEBR.include?(ch)
|
1061
1146
|
line_break = scan_line_break
|
1062
1147
|
breaks = scan_flow_scalar_breaks(double, start_mark)
|
1063
|
-
if line_break !=
|
1148
|
+
if line_break != "\n"
|
1064
1149
|
chunks << line_break
|
1065
1150
|
elsif breaks.empty?
|
1066
1151
|
chunks << ' '
|
@@ -1079,17 +1164,22 @@ module RbYAML
|
|
1079
1164
|
# Instead of checking indentation, we check for document
|
1080
1165
|
# separators.
|
1081
1166
|
prefix = prefix(3)
|
1082
|
-
if (prefix == "---" || prefix == "...") &&
|
1167
|
+
if (prefix == "---" || prefix == "...") &&NULL_BL_T_LINEBR.include?(peek3)
|
1083
1168
|
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
|
1084
1169
|
end
|
1085
|
-
|
1086
|
-
if
|
1170
|
+
forward1 while BLANK_T.include?(peek0)
|
1171
|
+
if FULL_LINEBR.include?(peek0)
|
1087
1172
|
chunks << scan_line_break
|
1088
1173
|
else
|
1089
1174
|
return chunks
|
1090
1175
|
end
|
1091
1176
|
end
|
1092
1177
|
end
|
1178
|
+
|
1179
|
+
|
1180
|
+
R_flowzero = /[\0 \t\r\n\x85]|(:[\0 \t\r\n\x28])/
|
1181
|
+
R_flownonzero = /[\0 \t\r\n\x85\[\]{},:?]/
|
1182
|
+
S4 = "\0 \t\r\n\x28[]{}"
|
1093
1183
|
|
1094
1184
|
def scan_plain
|
1095
1185
|
# See the specification for details.
|
@@ -1098,25 +1188,25 @@ module RbYAML
|
|
1098
1188
|
# We also keep track of the `allow_simple_key` flag here.
|
1099
1189
|
# Indentation rules are loosed for the flow context.
|
1100
1190
|
chunks = []
|
1101
|
-
start_mark = get_mark
|
1102
|
-
end_mark = start_mark
|
1191
|
+
end_mark = start_mark = get_mark
|
1103
1192
|
indent = @indent+1
|
1104
1193
|
# We allow zero indentation for scalars, but then we need to check for
|
1105
1194
|
# document separators at the beginning of the line.
|
1106
1195
|
#if indent == 0
|
1107
1196
|
# indent = 1
|
1108
1197
|
spaces = []
|
1109
|
-
|
1198
|
+
if @flow_zero
|
1199
|
+
f_nzero, r_check = false, R_flowzero
|
1200
|
+
else
|
1201
|
+
f_nzero, r_check = true, R_flownonzero
|
1202
|
+
end
|
1203
|
+
|
1204
|
+
while peek0 != ?#
|
1110
1205
|
length = 0
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
break
|
1116
|
-
end
|
1117
|
-
length += 1
|
1118
|
-
end
|
1119
|
-
if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
|
1206
|
+
chunk_size = 32
|
1207
|
+
chunk_size += 32 until length = (r_check =~ prefix(chunk_size))
|
1208
|
+
ch = peek(length)
|
1209
|
+
if f_nzero && ch == ?: && !S4.include?(peek(length+1))
|
1120
1210
|
forward(length)
|
1121
1211
|
raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
|
1122
1212
|
end
|
@@ -1127,11 +1217,12 @@ module RbYAML
|
|
1127
1217
|
forward(length)
|
1128
1218
|
end_mark = get_mark
|
1129
1219
|
spaces = scan_plain_spaces(indent, start_mark)
|
1130
|
-
break if spaces
|
1220
|
+
break if !spaces || (@flow_zero && @column < indent)
|
1131
1221
|
end
|
1132
|
-
return ScalarToken.new(chunks.
|
1222
|
+
return ScalarToken.new(chunks.to_s, true, start_mark, end_mark)
|
1133
1223
|
end
|
1134
1224
|
|
1225
|
+
END_OR_START = /^(---|\.\.\.)[\0 \t\r\n\x85]$/
|
1135
1226
|
def scan_plain_spaces(indent, start_mark)
|
1136
1227
|
# See the specification for details.
|
1137
1228
|
# The specification is really confusing about tabs in plain scalars.
|
@@ -1141,44 +1232,43 @@ module RbYAML
|
|
1141
1232
|
length += 1 while peek(length) == 32
|
1142
1233
|
whitespaces = prefix(length)
|
1143
1234
|
forward(length)
|
1144
|
-
ch =
|
1145
|
-
if
|
1235
|
+
ch = peek0
|
1236
|
+
if FULL_LINEBR.include?(ch)
|
1146
1237
|
line_break = scan_line_break
|
1147
1238
|
@allow_simple_key = true
|
1148
|
-
|
1149
|
-
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1239
|
+
return if END_OR_START =~ prefix(4)
|
1150
1240
|
breaks = []
|
1151
|
-
while
|
1152
|
-
if
|
1153
|
-
|
1241
|
+
while BLANK_OR_LINEBR.include?(peek0)
|
1242
|
+
if peek0 == 32
|
1243
|
+
forward1
|
1154
1244
|
else
|
1155
1245
|
breaks << scan_line_break
|
1156
|
-
|
1157
|
-
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1246
|
+
return if END_OR_START =~ prefix(4)
|
1158
1247
|
end
|
1159
1248
|
end
|
1160
|
-
if line_break !=
|
1249
|
+
if line_break != "\n"
|
1161
1250
|
chunks << line_break
|
1162
|
-
elsif breaks.empty?
|
1163
|
-
chunks <<
|
1251
|
+
elsif breaks.nil? || breaks.empty?
|
1252
|
+
chunks << " "
|
1164
1253
|
end
|
1165
1254
|
chunks += breaks
|
1166
|
-
|
1255
|
+
else
|
1167
1256
|
chunks << whitespaces
|
1168
1257
|
end
|
1169
1258
|
chunks
|
1170
1259
|
end
|
1171
1260
|
|
1261
|
+
|
1172
1262
|
def scan_tag_handle(name, start_mark)
|
1173
1263
|
# See the specification for details.
|
1174
1264
|
# For some strange reasons, the specification does not allow '_' in
|
1175
1265
|
# tag handles. I have allowed it anyway.
|
1176
|
-
ch =
|
1266
|
+
ch = peek0
|
1177
1267
|
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
|
1178
1268
|
length = 1
|
1179
1269
|
ch = peek(length)
|
1180
1270
|
if ch != 32
|
1181
|
-
while
|
1271
|
+
while ALPHA_REG =~ ch.chr
|
1182
1272
|
length += 1
|
1183
1273
|
ch = peek(length)
|
1184
1274
|
end
|
@@ -1193,13 +1283,14 @@ module RbYAML
|
|
1193
1283
|
value
|
1194
1284
|
end
|
1195
1285
|
|
1286
|
+
STRANGE_CHR = /[\]\[\-';\/?:@&=+$,.!~*()%\w]/
|
1196
1287
|
def scan_tag_uri(name, start_mark)
|
1197
1288
|
# See the specification for details.
|
1198
1289
|
# Note: we do not check if URI is well-formed.
|
1199
1290
|
chunks = []
|
1200
1291
|
length = 0
|
1201
1292
|
ch = peek(length)
|
1202
|
-
while
|
1293
|
+
while STRANGE_CHR =~ ch.chr
|
1203
1294
|
if ch == ?%
|
1204
1295
|
chunks << prefix(length)
|
1205
1296
|
forward(length)
|
@@ -1213,29 +1304,27 @@ module RbYAML
|
|
1213
1304
|
if length!=0
|
1214
1305
|
chunks << prefix(length)
|
1215
1306
|
forward(length)
|
1216
|
-
length = 0
|
1217
1307
|
end
|
1218
1308
|
|
1219
1309
|
raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
|
1220
|
-
chunks.
|
1310
|
+
chunks.to_s
|
1221
1311
|
end
|
1222
1312
|
|
1313
|
+
HEXA_REG = /[0-9A-Fa-f]/
|
1223
1314
|
def scan_uri_escapes(name, start_mark)
|
1224
1315
|
# See the specification for details.
|
1225
1316
|
bytes = []
|
1226
1317
|
mark = get_mark
|
1227
|
-
while
|
1228
|
-
|
1229
|
-
2.
|
1230
|
-
|
1231
|
-
|
1232
|
-
end
|
1233
|
-
bytes << prefix(2).to_i.to_s(16)
|
1234
|
-
forward(2)
|
1318
|
+
while peek0 == ?%
|
1319
|
+
forward1
|
1320
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek1} and #{peek2}",get_mark) if HEXA_REG !~ peek1.chr || HEXA_REG !~ peek2.chr
|
1321
|
+
bytes << prefix(2).to_i(16).to_s
|
1322
|
+
forward2
|
1235
1323
|
end
|
1236
|
-
bytes.
|
1324
|
+
bytes.to_s
|
1237
1325
|
end
|
1238
1326
|
|
1327
|
+
RN = "\r\n"
|
1239
1328
|
def scan_line_break
|
1240
1329
|
# Transforms:
|
1241
1330
|
# '\r\n' : '\n'
|
@@ -1243,12 +1332,11 @@ module RbYAML
|
|
1243
1332
|
# '\n' : '\n'
|
1244
1333
|
# '\x85' : '\n'
|
1245
1334
|
# default : ''
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
forward(2)
|
1335
|
+
if FULL_LINEBR.include?(peek0)
|
1336
|
+
if prefix2 == RN
|
1337
|
+
forward2
|
1250
1338
|
else
|
1251
|
-
|
1339
|
+
forward1
|
1252
1340
|
end
|
1253
1341
|
return "\n"
|
1254
1342
|
end
|