RbYAML 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/lib/rbyaml.rb +14 -256
  2. data/lib/rbyaml.rb.~1.2.~ +383 -0
  3. data/lib/rbyaml/composer.rb +9 -11
  4. data/lib/rbyaml/{composer.rb.~1.2.~ → composer.rb.~1.3.~} +28 -25
  5. data/lib/rbyaml/constants.rb +95 -0
  6. data/lib/rbyaml/constructor.rb +180 -89
  7. data/lib/rbyaml/{constructor.rb.~1.2.~ → constructor.rb.~1.9.~} +137 -95
  8. data/lib/rbyaml/dumper.rb +12 -9
  9. data/lib/rbyaml/dumper.rb.~1.3.~ +36 -0
  10. data/lib/rbyaml/emitter.rb +14 -28
  11. data/lib/rbyaml/{emitter.rb.~1.2.~ → emitter.rb.~1.6.~} +22 -33
  12. data/lib/rbyaml/error.rb +4 -57
  13. data/lib/rbyaml/error.rb.~1.2.~ +75 -0
  14. data/lib/rbyaml/events.rb +8 -14
  15. data/lib/rbyaml/{events.rb.~1.2.~ → events.rb.~1.4.~} +29 -6
  16. data/lib/rbyaml/nodes.rb +5 -5
  17. data/lib/rbyaml/{nodes.rb.~1.2.~ → nodes.rb.~1.3.~} +13 -9
  18. data/lib/rbyaml/parser.rb +70 -108
  19. data/lib/rbyaml/parser.rb.~1.4.~ +632 -0
  20. data/lib/rbyaml/representer.rb +19 -157
  21. data/lib/rbyaml/representer.rb.old +317 -0
  22. data/lib/rbyaml/{representer.rb.~1.2.~ → representer.rb.~1.5.~} +60 -26
  23. data/lib/rbyaml/resolver.rb +6 -6
  24. data/lib/rbyaml/{resolver.rb.~1.1.~ → resolver.rb.~1.6.~} +20 -20
  25. data/lib/rbyaml/rubytypes.rb +391 -0
  26. data/lib/rbyaml/scanner.rb +123 -225
  27. data/lib/rbyaml/{scanner.rb.~1.2.~ → scanner.rb.~1.5.~} +466 -378
  28. data/lib/rbyaml/serializer.rb +9 -9
  29. data/lib/rbyaml/{serializer.rb.~1.2.~ → serializer.rb.~1.4.~} +19 -17
  30. data/lib/rbyaml/stream.rb +48 -0
  31. data/lib/rbyaml/tag.rb +72 -0
  32. data/lib/rbyaml/tokens.rb +22 -16
  33. data/lib/rbyaml/{tokens.rb.~1.2.~ → tokens.rb.~1.3.~} +44 -4
  34. data/lib/rbyaml/types.rb +146 -0
  35. data/lib/rbyaml/util.rb.~1.3.~ +38 -0
  36. data/lib/rbyaml/yaml.rb +22 -32
  37. data/lib/rbyaml/{yaml.rb.~1.2.~ → yaml.rb.~1.5.~} +17 -17
  38. data/test/load_one.rb +6 -0
  39. data/test/load_one_yaml.rb +6 -0
  40. data/test/output_events.rb +9 -0
  41. data/test/test_add_ctor.rb +51 -0
  42. data/test/test_add_ctor.rb.~1.1.~ +30 -0
  43. data/test/test_bm.rb +2 -2
  44. data/test/test_bm.rb.~1.1.~ +28 -0
  45. data/test/test_gems.rb +10 -0
  46. data/test/test_one.rb.~1.1.~ +5 -0
  47. data/test/test_one_syck.rb +5 -0
  48. data/test/test_rbyaml.rb +63 -32
  49. data/test/test_rbyaml.rb.~1.6.~ +59 -0
  50. data/test/{test_rbyaml.rb.~1.2.~ → test_rbyaml_old.rb} +13 -4
  51. data/test/test_time_events.rb +24 -0
  52. data/test/test_time_nodes.rb +24 -0
  53. data/test/test_time_tokens.rb +24 -0
  54. data/test/yaml/gems_new.yml +147456 -0
  55. data/test/yaml/test1.rb +8 -0
  56. data/test/yaml/test10.rb +14 -0
  57. data/test/yaml/test11.rb +13 -0
  58. data/test/yaml/test12.rb +9 -0
  59. data/test/yaml/test13.rb +9 -0
  60. data/test/yaml/test14.rb +13 -0
  61. data/test/yaml/test15.rb +12 -0
  62. data/test/yaml/test16.rb +11 -0
  63. data/test/yaml/test16.rb.~1.1.~ +11 -0
  64. data/test/yaml/test17.rb +10 -0
  65. data/test/yaml/test18.rb +13 -0
  66. data/test/yaml/test19.rb +9 -0
  67. data/test/yaml/test19.yml +1 -1
  68. data/test/yaml/test2.rb +8 -0
  69. data/test/yaml/test20.rb +11 -0
  70. data/test/yaml/test20.rb.~1.1.~ +9 -0
  71. data/test/yaml/test20.yml +1 -1
  72. data/test/yaml/test3.rb +13 -0
  73. data/test/yaml/test4.rb +13 -0
  74. data/test/yaml/test5.rb +8 -0
  75. data/test/yaml/test6.rb +10 -0
  76. data/test/yaml/test7.rb +15 -0
  77. data/test/yaml/test8.rb +15 -0
  78. data/test/yaml/test9.rb +13 -0
  79. metadata +61 -16
  80. data/lib/rbyaml/dumper.rb.~1.2.~ +0 -43
  81. data/lib/rbyaml/parser.rb.~1.2.~ +0 -494
@@ -23,42 +23,43 @@
23
23
  # Read comments in the Scanner code for more details.
24
24
  #
25
25
 
26
+ require 'rbyaml/util'
26
27
  require 'rbyaml/error'
27
28
  require 'rbyaml/tokens'
28
29
 
29
30
  module RbYAML
30
31
  class ScannerError < MarkedYAMLError
31
32
  end
32
-
33
- class SimpleKey
34
- attr_reader :token_number, :required, :index, :line, :column, :mark
35
-
36
- def initialize(token_number,required,index,line,column,mark)
37
- @token_number = token_number
38
- @required = required
39
- @index = index
40
- @line = line
41
- @column = column
42
- @mark = mark
33
+ class ReaderError < YAMLError
34
+ def initialize(name, position, character, encoding, reason)
35
+ @name = name
36
+ @position = position
37
+ @character = character
38
+ @encoding = encoding
39
+ @reason = reason
40
+ end
41
+
42
+ def to_s
43
+ if @character.__is_str
44
+ "'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
45
+ else
46
+ "unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
47
+ end
43
48
  end
44
49
  end
45
50
 
46
- module Scanner
47
- def initialize_scanner
48
- # It is assumed that Scanner and Reader will mixin to the same point.
49
- # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
- #
51
- # Reader supports the following methods
52
- # self.peek(i=0) # peek the next i-th character
53
- # self.prefix(l=1) # peek the next l characters
54
- # self.forward(l=1) # read the next l characters and move the pointer.
51
+ SimpleKey = Struct.new(:token_number, :required, :index, :line, :column, :mark)
55
52
 
53
+ class Scanner
54
+ attr_reader :column, :stream, :stream_pointer, :eof, :buffer, :pointer, :index, :line
55
+ def initialize(stream)
56
56
  # Had we reached the end of the stream?
57
57
  @done = false
58
58
 
59
59
  # The number of unclosed '{' and '['. `flow_level == 0` means block
60
60
  # context.
61
61
  @flow_level = 0
62
+ @flow_zero = true
62
63
 
63
64
  # List of processed tokens that are not yet emitted.
64
65
  @tokens = []
@@ -104,6 +105,199 @@ module RbYAML
104
105
  # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
106
  # '[', or '{' tokens.
106
107
  @possible_simple_keys = {}
108
+
109
+ @stream = nil
110
+ @stream_pointer = 0
111
+ @eof = true
112
+ @buffer = ""
113
+ @buffer_length = 0
114
+ @pointer = 0
115
+ @pointer1 = 1
116
+ @column = 0
117
+ if stream.__is_str
118
+ @name = "<string>"
119
+ @raw_buffer = stream
120
+ else
121
+ @stream = stream
122
+ @name = stream.respond_to?(:path) ? stream.path : stream.inspect
123
+ @eof = false
124
+ @raw_buffer = ""
125
+ end
126
+ end
127
+
128
+ def peek(index=0)
129
+ peekn(index)
130
+ end
131
+
132
+ def peek0
133
+ update(1) unless @pointer1 < @buffer_length
134
+ @buffer[@pointer]
135
+ end
136
+
137
+ def peek1
138
+ update(2) unless @pointer1+1 < @buffer_length
139
+ @buffer[@pointer1]
140
+ end
141
+
142
+ def peek2
143
+ update(3) unless @pointer1+2 < @buffer_length
144
+ @buffer[@pointer1+1]
145
+ end
146
+
147
+ def peek3
148
+ update(4) unless @pointer1+3 < @buffer_length
149
+ @buffer[@pointer1+2]
150
+ end
151
+
152
+ def peekn(index=0)
153
+ pix = @pointer1+index
154
+ unless pix < @buffer_length
155
+ update(index+1)
156
+ pix = @pointer1+index
157
+ end
158
+ @buffer[pix-1]
159
+ end
160
+
161
+ def prefix(length=1)
162
+ update(length) unless @pointer+length < @buffer_length
163
+ @buffer[@pointer...@pointer+length]
164
+ end
165
+
166
+ def prefix2()
167
+ update(2) unless @pointer1+1 < @buffer_length
168
+ @buffer[@pointer..@pointer1]
169
+ end
170
+
171
+ def forward(length=1)
172
+ case length
173
+ when 0: forward0
174
+ when 1: forward1
175
+ when 2: forward2
176
+ when 3: forward3
177
+ when 4: forward4
178
+ when 5: forward5
179
+ when 6: forward6
180
+ else forwardn(length)
181
+ end
182
+ end
183
+
184
+ def forward0
185
+ update(1) unless @pointer1 < @buffer_length
186
+ end
187
+
188
+ LINE_BR = "\n\x85"
189
+
190
+ def forward1
191
+ update(2) unless @pointer1+1 < @buffer_length
192
+ buff = @buffer[@pointer...@pointer1+1]
193
+ index = buff.rindex(LINE_BR_REG)
194
+ @column = index ? -index : column+1
195
+ @pointer += 1
196
+ @pointer1 += 1
197
+ end
198
+
199
+ def forward2
200
+ update(3) unless @pointer1+2 < @buffer_length
201
+ buff = @buffer[@pointer...@pointer1+2]
202
+ index = buff.rindex(LINE_BR_REG)
203
+ @column = index ? 1-index : column+2
204
+ @pointer += 2
205
+ @pointer1 += 2
206
+ end
207
+
208
+ def forward3
209
+ update(4) unless @pointer1+3 < @buffer_length
210
+ buff = @buffer[@pointer...@pointer1+3]
211
+ index = buff.rindex(LINE_BR_REG)
212
+ @column = index ? 2-index : column+3
213
+ @pointer += 3
214
+ @pointer1 += 3
215
+ end
216
+
217
+ def forward4
218
+ update(5) unless @pointer1+4 < @buffer_length
219
+ buff = @buffer[@pointer...@pointer1+4]
220
+ index = buff.rindex(LINE_BR_REG)
221
+ @column = index ? 3-index : column+4
222
+ @pointer += 4
223
+ @pointer1 += 4
224
+ end
225
+
226
+ def forward5
227
+ update(6) unless @pointer1+5 < @buffer_length
228
+ buff = @buffer[@pointer...@pointer1+5]
229
+ index = buff.rindex(LINE_BR_REG)
230
+ @column = index ? 4-index : column+5
231
+ @pointer += 5
232
+ @pointer1 += 5
233
+ end
234
+
235
+ def forward6
236
+ update(7) unless @pointer1+6 < @buffer_length
237
+ buff = @buffer[@pointer...@pointer1+6]
238
+ index = buff.rindex(LINE_BR_REG)
239
+ @column = index ? 5-index : column+6
240
+ @pointer += 6
241
+ @pointer1 += 6
242
+ end
243
+
244
+ LINE_BR_REG = /[\n\x85]|(?:\r[^\n])/
245
+ def forwardn(length)
246
+ update(length + 1) unless @pointer1+length < @buffer_length
247
+ buff = @buffer[@pointer...@pointer+length]
248
+ index = buff.rindex(LINE_BR_REG)
249
+ @column = index ? (length-index)-1 : column+length
250
+ @pointer += length
251
+ @pointer1 += length
252
+ end
253
+
254
+ def get_mark
255
+ if @stream.nil?
256
+ Mark.new(@name,@column,@buffer,@pointer)
257
+ else
258
+ Mark.new(@name,@column,nil,nil)
259
+ end
260
+ end
261
+
262
+ NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
263
+ def check_printable(data)
264
+ if NON_PRINTABLE =~ data
265
+ position = @buffer.length-@pointer+($~.offset(0)[0])
266
+ raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
267
+ end
268
+ end
269
+
270
+
271
+ def update(length)
272
+ return if @raw_buffer.nil?
273
+ @buffer = @buffer[@pointer..-1]
274
+ @pointer = 0
275
+ while @buffer.length < length
276
+ unless @eof
277
+ data = @stream.read(1024)
278
+ if data && !data.empty?
279
+ @buffer << data
280
+ @stream_pointer += data.length
281
+ @raw_buffer = ""
282
+ else
283
+ @eof = true
284
+ @buffer << ?\0
285
+ @raw_buffer = nil
286
+ break
287
+ end
288
+ else
289
+ @buffer << @raw_buffer << ?\0
290
+ @raw_buffer = nil
291
+ break
292
+ end
293
+ end
294
+ @buffer_length = @buffer.length
295
+ if @eof
296
+ check_printable(@buffer[(-length)..-2])
297
+ else
298
+ check_printable(@buffer[(-length)..-1])
299
+ end
300
+ @pointer1 = @pointer+1
107
301
  end
108
302
 
109
303
  def check_token(*choices)
@@ -144,70 +338,51 @@ module RbYAML
144
338
 
145
339
  def need_more_tokens
146
340
  return false if @done
147
- return true if @tokens.empty?
148
- # The current token may be a potential simple key, so we
149
- # need to look further.
150
- stale_possible_simple_keys
151
- return true if next_possible_simple_key == @tokens_taken
341
+ @tokens.empty? || next_possible_simple_key == @tokens_taken
152
342
  end
153
343
 
344
+ ENDING = /^---[\0 \t\r\n\x85]$/
345
+ START = /^\.\.\.[\0 \t\r\n\x85]$/
346
+ NULL_OR_OTHER = "\0 \t\r\n\x85"
347
+ # BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@`]|([\-?:][^\0 \t\r\n\x85]))/ #Since current SYCK handles this one wrong, we have to allow backtick right now.
348
+ BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@]|([\-?:][^\0 \t\r\n\x85]))/
154
349
  def fetch_more_tokens
155
350
  # Eat whitespaces and comments until we reach the next token.
156
351
  scan_to_next_token
157
352
 
158
353
  # Remove obsolete possible simple keys.
159
- stale_possible_simple_keys
354
+ # stale_possible_simple_keys
160
355
 
161
356
  # Compare the current indentation and column. It may add some tokens
162
357
  # and decrease the current indentation level.
163
358
  unwind_indent(@column)
164
359
 
165
360
  # Peek the next character.
166
- ch = peek
167
-
168
- return case
169
- # Is it the end of stream?
170
- when ch == ?\0: fetch_stream_end
171
- # Is it a directive?
172
- when ch == ?% && check_directive: fetch_directive
173
- # Is it the document start?
174
- when ch == ?- && check_document_start: fetch_document_start
175
- # Is it the document end?
176
- when ch == ?. && check_document_end: fetch_document_end
177
- # Is it the flow sequence start indicator?
178
- when ch == ?[: fetch_flow_sequence_start
179
- # Is it the flow mapping start indicator?
180
- when ch == ?{: fetch_flow_mapping_start
181
- # Is it the flow sequence end indicator?
182
- when ch == ?]: fetch_flow_sequence_end
183
- # Is it the flow mapping end indicator?
184
- when ch == ?}: fetch_flow_mapping_end
185
- # Is it the flow entry indicator?
186
- when ch == ?,: fetch_flow_entry
187
- # Is it the block entry indicator?
188
- when ch == ?- && check_block_entry: fetch_block_entry
189
- # Is it the key indicator?
190
- when ch == ?? && check_key: fetch_key
191
- # Is it the value indicator?
192
- when ch == ?: && check_value: fetch_value
193
- # Is it an alias?
194
- when ch == ?*: fetch_alias
195
- # Is it an anchor?
196
- when ch == ?&: fetch_anchor
197
- # Is it a tag?
198
- when ch == ?!: fetch_tag
199
- # Is it a literal scalar?
200
- when ch == ?| && @flow_level==0: fetch_literal
201
- # Is it a folded scalar?
202
- when ch == ?> && @flow_level==0: fetch_folded
203
- # Is it a single quoted scalar?
204
- when ch == ?': fetch_single
205
- # Is it a double quoted scalar?
206
- when ch == ?": fetch_double
207
- # It must be a plain scalar then.
208
- when check_plain: fetch_plain
209
- else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
- end
361
+ ch = peek0
362
+ colz = @column == 0
363
+
364
+ case ch
365
+ when ?\0: return fetch_stream_end
366
+ when ?': return fetch_single
367
+ when ?": return fetch_double
368
+ when ??: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_key end
369
+ when ?:: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_value end
370
+ when ?%: if colz: return fetch_stream_end end
371
+ when ?-: if colz && ENDING =~ prefix(4): return fetch_document_start; elsif NULL_OR_OTHER.include?(peek1): return fetch_block_entry end
372
+ when ?.: if colz && START =~ prefix(4): return fetch_document_end end
373
+ when ?[: return fetch_flow_sequence_start
374
+ when ?{: return fetch_flow_mapping_start
375
+ when ?]: return fetch_flow_sequence_end
376
+ when ?}: return fetch_flow_mapping_end
377
+ when ?,: return fetch_flow_entry
378
+ when ?*: return fetch_alias
379
+ when ?&: return fetch_anchor
380
+ when ?!: return fetch_tag
381
+ when ?|: if @flow_zero: return fetch_literal end
382
+ when ?>: if @flow_zero: return fetch_folded end
383
+ end
384
+ return fetch_plain if BEG =~ prefix(2)
385
+ raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
211
386
  end
212
387
 
213
388
  # Simple keys treatment.
@@ -215,58 +390,22 @@ module RbYAML
215
390
  def next_possible_simple_key
216
391
  # Return the number of the nearest possible simple key. Actually we
217
392
  # don't need to loop through the whole dictionary.
218
- min_token_number = nil
219
- for level in @possible_simple_keys.keys
220
- key = @possible_simple_keys[level]
221
- if min_token_number.nil? || key.token_number < min_token_number
222
- min_token_number = key.token_number
223
- end
224
- end
225
- min_token_number
393
+ @possible_simple_keys.each_value {|key| return key.token_number if key.token_number}
394
+ nil
226
395
  end
227
396
 
228
- def stale_possible_simple_keys
229
- # Remove entries that are no longer possible simple keys. According to
230
- # the YAML specification, simple keys
231
- # - should be limited to a single line,
232
- # - should be no longer than 1024 characters.
233
- # Disabling this procedure will allow simple keys of any length and
234
- # height (may cause problems if indentation is broken though).
235
- @possible_simple_keys.delete_if {|level,key|
236
- if key.line != @line || @index-key.index > 1024
237
- raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
- return true
239
- end
240
- return false
241
- }
242
- end
243
-
244
397
  def save_possible_simple_key
245
398
  # The next token may start a simple key. We check if it's possible
246
399
  # and save its position. This function is called for
247
400
  # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
-
249
- # Check if a simple key is required at the current position.
250
- required = @flow_level==0 && @indent == @column
251
-
252
401
  # The next token might be a simple key. Let's save it's number and
253
402
  # position.
254
- if @allow_simple_key
255
- remove_possible_simple_key
256
- token_number = @tokens_taken+@tokens.length
257
- key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
- @possible_simple_keys[@flow_level] = key
259
- end
403
+ @possible_simple_keys[@flow_level] = SimpleKey.new(@tokens_taken+@tokens.length, @flow_zero && @indent == @column,-1,-1,column,get_mark) if @allow_simple_key
260
404
  end
261
405
 
262
- def remove_possible_simple_key
263
- # Remove the saved possible key position at the current flow level.
264
- key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
- end
266
-
267
406
  # Indentation functions.
268
407
 
269
- def unwind_indent(column)
408
+ def unwind_indent(col)
270
409
  ## In flow context, tokens should respect indentation.
271
410
  ## Actually the condition should be `@indent >= column` according to
272
411
  ## the spec. But this condition will prohibit intuitively correct
@@ -280,20 +419,20 @@ module RbYAML
280
419
 
281
420
  # In the flow context, indentation is ignored. We make the scanner less
282
421
  # restrictive then specification requires.
283
- return nil if @flow_level != 0
422
+ return nil if !@flow_zero
284
423
  # In block context, we may need to issue the BLOCK-END tokens.
285
- while @indent > column
424
+ while @indent > col
286
425
  mark = get_mark
287
- @indent = @indents.pop()
426
+ @indent = @indents.pop
288
427
  @tokens << BlockEndToken.new(mark, mark)
289
428
  end
290
429
  end
291
430
 
292
- def add_indent(column)
431
+ def add_indent(col)
293
432
  # Check if we need to increase indentation.
294
- if @indent < column
433
+ if @indent < col
295
434
  @indents << @indent
296
- @indent = column
435
+ @indent = col
297
436
  return true
298
437
  end
299
438
  return false
@@ -329,7 +468,6 @@ module RbYAML
329
468
  # Set the current intendation to -1.
330
469
  unwind_indent(-1)
331
470
  # Reset simple keys.
332
- remove_possible_simple_key
333
471
  @allow_simple_key = false
334
472
  # Scan and add DIRECTIVE.
335
473
  @tokens << scan_directive
@@ -348,11 +486,10 @@ module RbYAML
348
486
  unwind_indent(-1)
349
487
  # Reset simple keys. Note that there could not be a block collection
350
488
  # after '---'.
351
- remove_possible_simple_key
352
489
  @allow_simple_key = false
353
490
  # Add DOCUMENT-START or DOCUMENT-END.
354
491
  start_mark = get_mark
355
- forward(3)
492
+ forward3
356
493
  end_mark = get_mark
357
494
  @tokens << token.new(start_mark, end_mark)
358
495
  end
@@ -370,11 +507,12 @@ module RbYAML
370
507
  save_possible_simple_key
371
508
  # Increase the flow level.
372
509
  @flow_level += 1
510
+ @flow_zero = false
373
511
  # Simple keys are allowed after '[' and '{'.
374
512
  @allow_simple_key = true
375
513
  # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
514
  start_mark = get_mark
377
- forward
515
+ forward1
378
516
  end_mark = get_mark
379
517
  @tokens << token.new(start_mark, end_mark)
380
518
  end
@@ -388,15 +526,16 @@ module RbYAML
388
526
  end
389
527
 
390
528
  def fetch_flow_collection_end(token)
391
- # Reset possible simple key on the current level.
392
- remove_possible_simple_key
393
529
  # Decrease the flow level.
394
530
  @flow_level -= 1
531
+ if @flow_level == 0
532
+ @flow_zero = true
533
+ end
395
534
  # No simple keys after ']' or '}'.
396
535
  @allow_simple_key = false
397
536
  # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
537
  start_mark = get_mark
399
- forward
538
+ forward1
400
539
  end_mark = get_mark
401
540
  @tokens << token.new(start_mark, end_mark)
402
541
  end
@@ -404,21 +543,19 @@ module RbYAML
404
543
  def fetch_flow_entry
405
544
  # Simple keys are allowed after ','.
406
545
  @allow_simple_key = true
407
- # Reset possible simple key on the current level.
408
- remove_possible_simple_key
409
546
  # Add FLOW-ENTRY.
410
547
  start_mark = get_mark
411
- forward
548
+ forward1
412
549
  end_mark = get_mark
413
550
  @tokens << FlowEntryToken.new(start_mark, end_mark)
414
551
  end
415
552
 
416
553
  def fetch_block_entry
417
554
  # Block context needs additional checks.
418
- if @flow_level==0
555
+ if @flow_zero
419
556
  raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
557
  # We may need to add BLOCK-SEQUENCE-START.
421
- if add_indent(@column)
558
+ if add_indent(column)
422
559
  mark = get_mark
423
560
  @tokens << BlockSequenceStartToken.new(mark, mark)
424
561
  end
@@ -427,67 +564,63 @@ module RbYAML
427
564
  end
428
565
  # Simple keys are allowed after '-'.
429
566
  @allow_simple_key = true
430
- # Reset possible simple key on the current level.
431
- remove_possible_simple_key
432
567
  # Add BLOCK-ENTRY.
433
568
  start_mark = get_mark
434
- forward
569
+ forward1
435
570
  end_mark = get_mark
436
571
  @tokens << BlockEntryToken.new(start_mark, end_mark)
437
572
  end
438
573
 
439
574
  def fetch_key
440
575
  # Block context needs additional checks.
441
- if @flow_level==0
576
+ if @flow_zero
442
577
  # Are we allowed to start a key (not nessesary a simple)?
443
578
  raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
579
  # We may need to add BLOCK-MAPPING-START.
445
- if add_indent(@column)
580
+ if add_indent(column)
446
581
  mark = get_mark
447
582
  @tokens << BlockMappingStartToken.new(mark, mark)
448
583
  end
449
584
  end
450
585
  # Simple keys are allowed after '?' in the block context.
451
- @allow_simple_key = @flow_level==0
452
- # Reset possible simple key on the current level.
453
- remove_possible_simple_key
586
+ @allow_simple_key = @flow_zero
454
587
  # Add KEY.
455
588
  start_mark = get_mark
456
- forward
589
+ forward1
457
590
  end_mark = get_mark
458
591
  @tokens << KeyToken.new(start_mark, end_mark)
459
592
  end
460
593
 
461
594
  def fetch_value
595
+ key = @possible_simple_keys[@flow_level]
462
596
  # Do we determine a simple key?
463
- if @possible_simple_keys.include?(@flow_level)
464
- # Add KEY.
465
- key = @possible_simple_keys[@flow_level]
466
- @possible_simple_keys.delete(@flow_level)
467
- @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
- # If this key starts a new block mapping, we need to add
469
- # BLOCK-MAPPING-START.
470
- @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
- # There cannot be two simple keys one after another.
472
- @allow_simple_key = false
473
- # It must be a part of a complex key.
474
- else
597
+ if key.nil?
475
598
  # Block context needs additional checks.
476
599
  # (Do we really need them? They will be catched by the parser
477
600
  # anyway.)
478
- if @flow_level==0
601
+ if @flow_zero
479
602
  # We are allowed to start a complex value if and only if
480
603
  # we can start a simple key.
481
604
  raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
605
  # Simple keys are allowed after ':' in the block context.
483
- @allow_simple_key = @flow_level==0
484
- # Reset possible simple key on the current level.
485
- remove_possible_simple_key
606
+ @allow_simple_key = true
486
607
  end
608
+ else
609
+ # Add KEY.
610
+ @possible_simple_keys.delete(@flow_level)
611
+
612
+ # If this key starts a new block mapping, we need to add
613
+ # BLOCK-MAPPING-START.
614
+ se = (@flow_zero && add_indent(key.column)) ? [BlockMappingStartToken.new(key.mark, key.mark)] : []
615
+ se << KeyToken.new(key.mark, key.mark)
616
+ @tokens.insert(key.token_number-@tokens_taken,*se)
617
+ # There cannot be two simple keys one after another.
618
+ @allow_simple_key = false
619
+ # It must be a part of a complex key.
487
620
  end
488
621
  # Add VALUE.
489
622
  start_mark = get_mark
490
- forward
623
+ forward1
491
624
  end_mark = get_mark
492
625
  @tokens << ValueToken.new(start_mark, end_mark)
493
626
  end
@@ -530,8 +663,6 @@ module RbYAML
530
663
  def fetch_block_scalar(style)
531
664
  # A simple key may follow a block scalar.
532
665
  @allow_simple_key = true
533
- # Reset possible simple key on the current level.
534
- remove_possible_simple_key
535
666
  # Scan and add SCALAR.
536
667
  @tokens << scan_block_scalar(style)
537
668
  end
@@ -564,65 +695,9 @@ module RbYAML
564
695
  @tokens << scan_plain
565
696
  end
566
697
 
567
- # Checkers.
568
-
569
- def check_directive
570
- # DIRECTIVE: ^ '%' ...
571
- # The '%' indicator is already checked.
572
- @column == 0
573
- end
574
-
575
- def check_document_start
576
- # DOCUMENT-START: ^ '---' (' '|'\n')
577
- @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
- end
579
-
580
- def check_document_end
581
- # DOCUMENT-END: ^ '...' (' '|'\n')
582
- @column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
583
- end
584
-
585
- def check_block_entry
586
- # BLOCK-ENTRY: '-' (' '|'\n')
587
- "\0 \t\r\n\x85".include?(peek(1))
588
- end
589
-
590
- def check_key
591
- # KEY(flow context): '?'
592
- # KEY(block context): '?' (' '|'\n')
593
- @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
594
- end
595
-
596
- def check_value
597
- # VALUE(flow context): ':'
598
- # VALUE(block context): ':' (' '|'\n')
599
- @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
600
- end
601
-
602
- def check_plain
603
- # A plain scalar may start with any non-space character except:
604
- # '-', '?', ':', ',', '[', ']', '{', '}',
605
- # '#', '&', '*', '!', '|', '>', '\'', '\"',
606
- # '%', '@', '`'.
607
- #
608
- # It may also start with
609
- # '-', '?', ':'
610
- # if it is followed by a non-space character.
611
- #
612
- # Note that we limit the last rule to the block context (except the
613
- # '-' character) because we want the flow context to be space
614
- # independent.
615
- ch = peek
616
- !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
617
- end
618
-
619
-
620
-
621
-
622
-
623
698
 
624
699
  # Scanners.
625
-
700
+ NULL_OR_LINEBR = "\0\r\n\x85"
626
701
  def scan_to_next_token
627
702
  # We ignore spaces, line breaks and comments.
628
703
  # If we find a line break in the block context, we set the flag
@@ -638,18 +713,20 @@ module RbYAML
638
713
  # We also need to add the check for `allow_simple_keys == true` to
639
714
  # `unwind_indent` before issuing BLOCK-END.
640
715
  # Scanners for block, flow, and plain scalars need to be modified.
641
- found = false
642
- while !found
643
- while peek == 32
644
- forward
716
+ while true
717
+ while peek0 == 32
718
+ forward1
645
719
  end
646
- if peek == ?#
647
- forward while !"\0\r\n\x85".include?(peek)
720
+ if peek0 == ?#
721
+ while !NULL_OR_LINEBR.include?(peek0)
722
+ forward1
723
+ end
648
724
  end
725
+
649
726
  if !scan_line_break.empty?
650
- @allow_simple_key = true if @flow_level==0
727
+ @allow_simple_key = true if @flow_zero
651
728
  else
652
- found = true
729
+ break
653
730
  end
654
731
  end
655
732
  end
@@ -657,7 +734,7 @@ module RbYAML
657
734
  def scan_directive
658
735
  # See the specification for details.
659
736
  start_mark = get_mark
660
- forward
737
+ forward1
661
738
  name = scan_directive_name(start_mark)
662
739
  value = nil
663
740
  if name == "YAML"
@@ -668,45 +745,50 @@ module RbYAML
668
745
  end_mark = get_mark
669
746
  else
670
747
  end_mark = get_mark
671
- forward while !"\0\r\n\x85".include?(peek)
748
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
672
749
  end
673
750
  scan_directive_ignored_line(start_mark)
674
751
  DirectiveToken.new(name, value, start_mark, end_mark)
675
752
  end
676
753
 
754
+ ALPHA_REG = /[-0-9A-Za-z_]/
755
+ NULL_BL_LINEBR = "\0 \r\n\x85"
756
+ NULL_BL_T_LINEBR = "\0 \t\r\n\x85"
677
757
  def scan_directive_name(start_mark)
678
758
  # See the specification for details.
679
759
  length = 0
680
760
  ch = peek(length)
681
- while /[-0-9A-Za-z_]/ =~ ch.chr
761
+ zlen = true
762
+ while ALPHA_REG =~ ch.chr
763
+ zlen = false
682
764
  length += 1
683
765
  ch = peek(length)
684
766
  end
685
- raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
767
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if zlen
686
768
  value = prefix(length)
687
769
  forward(length)
688
- ch = peek()
689
- raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
770
+ ch = peek0
771
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !NULL_BL_LINEBR.include?(ch)
690
772
  value
691
773
  end
692
774
 
693
775
  def scan_yaml_directive_value(start_mark)
694
776
  # See the specification for details.
695
- forward while peek == 32
777
+ forward1 while peek0 == 32
696
778
  major = scan_yaml_directive_number(start_mark)
697
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
698
- forward
779
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek0 != ?.
780
+ forward1
699
781
  minor = scan_yaml_directive_number(start_mark)
700
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
782
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
701
783
  [major, minor]
702
784
  end
703
785
 
704
786
  def scan_yaml_directive_number(start_mark)
705
787
  # See the specification for details.
706
- ch = peek
707
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
788
+ ch = peek0
789
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(ch.__is_ascii_num)
708
790
  length = 0
709
- length += 1 while ((?0..?9) === peek(length))
791
+ length += 1 while (peek(length).__is_ascii_num)
710
792
  value = prefix(length)
711
793
  forward(length)
712
794
  value
@@ -714,9 +796,9 @@ module RbYAML
714
796
 
715
797
  def scan_tag_directive_value(start_mark)
716
798
  # See the specification for details.
717
- forward while peek == 32
799
+ forward1 while peek0 == 32
718
800
  handle = scan_tag_directive_handle(start_mark)
719
- forward while peek == 32
801
+ forward1 while peek0 == 32
720
802
  prefix = scan_tag_directive_prefix(start_mark)
721
803
  [handle, prefix]
722
804
  end
@@ -724,30 +806,30 @@ module RbYAML
724
806
  def scan_tag_directive_handle(start_mark)
725
807
  # See the specification for details.
726
808
  value = scan_tag_handle("directive", start_mark)
727
- ch = peek
728
- raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
809
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if peek0 != 32
729
810
  value
730
811
  end
731
812
 
732
813
  def scan_tag_directive_prefix(start_mark)
733
814
  # See the specification for details.
734
815
  value = scan_tag_uri("directive", start_mark)
735
- ch = peek
736
- raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
816
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if !NULL_BL_LINEBR.include?(peek0)
737
817
  value
738
818
  end
739
819
 
740
820
  def scan_directive_ignored_line(start_mark)
741
821
  # See the specification for details.
742
- forward while peek == 32
743
- if peek == ?#
744
- forward while !"\0\r\n\x85".include?(peek)
822
+ forward1 while peek0 == 32
823
+ if peek0 == ?#
824
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
745
825
  end
746
- ch = peek
747
- raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
826
+ ch = peek0
827
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{peek0.to_s}",get_mark()) if !NULL_OR_LINEBR.include?(peek0)
748
828
  scan_line_break
749
829
  end
750
-
830
+
831
+ NON_ALPHA = /[^-0-9A-Za-z_]/
832
+ NON_ALPHA_OR_NUM = "\0 \t\r\n\x85?:,]}%@`"
751
833
  def scan_anchor(token)
752
834
  # The specification does not restrict characters for anchors and
753
835
  # aliases. This may lead to problems, for instance, the document:
@@ -758,45 +840,47 @@ module RbYAML
758
840
  # [ *alias , "value" ]
759
841
  # Therefore we restrict aliases to numbers and ASCII letters.
760
842
  start_mark = get_mark
761
- indicator = peek
843
+ indicator = peek0
762
844
  name = (indicator == ?*) ? "alias":"anchor"
763
- forward
845
+ forward1
764
846
  length = 0
765
- ch = peek(length)
766
- while /[-0-9A-Za-z_]/ =~ ch.chr
767
- length += 1
768
- ch = peek(length)
847
+ chunk_size = 16
848
+ while true
849
+ chunk = prefix(chunk_size)
850
+ if length = (NON_ALPHA =~ chunk)
851
+ break
852
+ end
853
+ chunk_size += 16
769
854
  end
770
- raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
855
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found something else...",get_mark) if length==0
771
856
  value = prefix(length)
772
857
  forward(length)
773
- ch = peek
774
- if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
775
- raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
858
+ if !NON_ALPHA_OR_NUM.include?(peek0)
859
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{peek0}",get_mark)
776
860
  end
777
861
  end_mark = get_mark
778
862
  token.new(value, start_mark, end_mark)
779
863
  end
780
864
 
781
-
865
+ NULL_T_BL_LINEBR = "\0 \t\r\n\x85"
782
866
  def scan_tag
783
867
  # See the specification for details.
784
868
  start_mark = get_mark
785
- ch = peek(1)
869
+ ch = peek1
786
870
  if ch == ?<
787
871
  handle = nil
788
- forward(2)
872
+ forward2
789
873
  suffix = scan_tag_uri("tag", start_mark)
790
- raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
791
- forward
792
- elsif "\0 \t\r\n\x85".include?(ch)
874
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek0 != ?>
875
+ forward1
876
+ elsif NULL_T_BL_LINEBR.include?(ch)
793
877
  handle = nil
794
878
  suffix = "!"
795
- forward
879
+ forward1
796
880
  else
797
881
  length = 1
798
882
  use_handle = false
799
- while !"\0 \t\r\n\x85".include?(ch)
883
+ while !NULL_T_BL_LINEBR.include?(ch)
800
884
  if ch == ?!
801
885
  use_handle = true
802
886
  break
@@ -809,24 +893,24 @@ module RbYAML
809
893
  handle = scan_tag_handle("tag", start_mark)
810
894
  else
811
895
  handle = "!"
812
- forward
896
+ forward1
813
897
  end
814
898
  suffix = scan_tag_uri("tag", start_mark)
815
899
  end
816
- ch = peek
817
- raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
900
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
818
901
  value = [handle, suffix]
819
902
  end_mark = get_mark
820
903
  TagToken.new(value, start_mark, end_mark)
821
904
  end
822
905
 
906
+ BLANK_T = " \t"
823
907
  def scan_block_scalar(style)
824
908
  # See the specification for details.
825
909
  folded = style== ?>
826
910
  chunks = []
827
911
  start_mark = get_mark
828
912
  # Scan the header.
829
- forward
913
+ forward1
830
914
  chomping, increment = scan_block_scalar_indicators(start_mark)
831
915
  scan_block_scalar_ignored_line(start_mark)
832
916
  # Determine the indentation level and go to the first non-empty line.
@@ -841,20 +925,20 @@ module RbYAML
841
925
  end
842
926
  line_break = ''
843
927
  # Scan the inner part of the block scalar.
844
- while @column == indent and peek != ?\0
928
+ while column == indent and peek0 != ?\0
845
929
  chunks += breaks
846
- leading_non_space = !" \t".include?(peek)
930
+ leading_non_space = !BLANK_T.include?(peek0)
847
931
  length = 0
848
- length += 1 while !"\0\r\n\x85".include?(peek(length))
932
+ length += 1 while !NULL_OR_LINEBR.include?(peek(length))
849
933
  chunks << prefix(length)
850
934
  forward(length)
851
935
  line_break = scan_line_break
852
936
  breaks, end_mark = scan_block_scalar_breaks(indent)
853
- if @column == indent && peek != 0
937
+ if column == indent && peek0 != 0
854
938
  # Unfortunately, folding rules are ambiguous.
855
939
  #
856
940
  # This is the folding according to the specification:
857
- if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
941
+ if folded && line_break == "\n" && leading_non_space && !BLANK_T.include?(peek0)
858
942
  chunks << ' ' if breaks.empty?
859
943
  else
860
944
  chunks << line_break
@@ -882,76 +966,76 @@ module RbYAML
882
966
  end
883
967
 
884
968
  # We are done.
885
- ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
969
+ ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
886
970
  end
887
971
 
972
+ PLUS_MIN = /[+-]/
888
973
  def scan_block_scalar_indicators(start_mark)
889
974
  # See the specification for details.
890
975
  chomping = nil
891
976
  increment = nil
892
- ch = peek
893
- if /[+-]/ =~ ch.chr
977
+ ch = peek0
978
+ if PLUS_MIN =~ ch.chr
894
979
  chomping = ch == ?+
895
- forward
896
- ch = peek
897
- if (?0..?9) === ch
898
- increment = ch.to_i
980
+ forward1
981
+ ch = peek0
982
+ if ch.__is_ascii_num
983
+ increment = ch.chr.to_i
899
984
  raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
900
- forward
985
+ forward1
901
986
  end
902
- elsif (?0..?9) === ch
903
- increment = ch
987
+ elsif ch.__is_ascii_num
988
+ increment = ch.chr.to_i
904
989
  raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
905
- forward
906
- ch = peek
907
- if /[+-]/ =~ ch.chr
990
+ forward1
991
+ ch = peek0
992
+ if PLUS_MIN =~ ch.chr
908
993
  chomping = ch == ?+
909
- forward
994
+ forward1
910
995
  end
911
996
  end
912
- ch = peek
913
- raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
997
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
914
998
  [chomping, increment]
915
999
  end
916
1000
 
917
1001
  def scan_block_scalar_ignored_line(start_mark)
918
1002
  # See the specification for details.
919
- forward while peek == 32
920
- if peek == ?#
921
- forward while !"\0\r\n\x85".include?(peek)
1003
+ forward1 while peek0 == 32
1004
+ if peek0 == ?#
1005
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
922
1006
  end
923
- ch = peek
924
-
925
- raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
1007
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{peek0}",get_mark) if !NULL_OR_LINEBR.include?(peek0)
926
1008
  scan_line_break
927
1009
  end
928
1010
 
1011
+ BLANK_OR_LINEBR = " \r\n\x85"
929
1012
  def scan_block_scalar_indentation
930
1013
  # See the specification for details.
931
1014
  chunks = []
932
1015
  max_indent = 0
933
1016
  end_mark = get_mark
934
- while " \r\n\x85".include?(peek)
935
- if peek != 32
1017
+ while BLANK_OR_LINEBR.include?(peek0)
1018
+ if peek0 != 32
936
1019
  chunks << scan_line_break
937
1020
  end_mark = get_mark
938
1021
  else
939
- forward
940
- max_indent = @column if @column > max_indent
1022
+ forward1
1023
+ max_indent = column if column > max_indent
941
1024
  end
942
1025
  end
943
1026
  [chunks, max_indent, end_mark]
944
1027
  end
945
1028
 
1029
+ FULL_LINEBR = "\r\n\x85"
946
1030
  def scan_block_scalar_breaks(indent)
947
1031
  # See the specification for details.
948
1032
  chunks = []
949
1033
  end_mark = get_mark
950
- forward while @column < indent && peek == 32
951
- while "\r\n\x85".include?(peek)
1034
+ forward1 while @column < indent && peek0 == 32
1035
+ while FULL_LINEBR.include?(peek0)
952
1036
  chunks << scan_line_break
953
1037
  end_mark = get_mark
954
- forward while @column < indent && peek == 32
1038
+ forward1 while @column < indent && peek0 == 32
955
1039
  end
956
1040
  [chunks, end_mark]
957
1041
  end
@@ -966,16 +1050,16 @@ module RbYAML
966
1050
  double = style == ?"
967
1051
  chunks = []
968
1052
  start_mark = get_mark
969
- quote = peek
970
- forward
1053
+ quote = peek0
1054
+ forward1
971
1055
  chunks += scan_flow_scalar_non_spaces(double, start_mark)
972
- while peek != quote
1056
+ while peek0 != quote
973
1057
  chunks += scan_flow_scalar_spaces(double, start_mark)
974
1058
  chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
1059
  end
976
- forward
1060
+ forward1
977
1061
  end_mark = get_mark
978
- ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
1062
+ ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
979
1063
  end
980
1064
 
981
1065
  ESCAPE_REPLACEMENTS = {
@@ -1000,42 +1084,43 @@ module RbYAML
1000
1084
  'x' => 2
1001
1085
  }
1002
1086
 
1087
+ SPACES_AND_STUFF = "'\"\\\0 \t\r\n\x85"
1088
+ DOUBLE_ESC = "\"\\"
1089
+ NOT_HEXA = /[^0-9A-Fa-f]/
1003
1090
  def scan_flow_scalar_non_spaces(double, start_mark)
1004
1091
  # See the specification for details.
1005
1092
  chunks = []
1006
1093
  while true
1007
1094
  length = 0
1008
- length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1095
+ length += 1 while !SPACES_AND_STUFF.include?(peek(length))
1009
1096
  if length!=0
1010
1097
  chunks << prefix(length)
1011
1098
  forward(length)
1012
1099
  end
1013
- ch = peek
1014
- if !double && ch == ?' && peek(1) == ?'
1100
+ ch = peek0
1101
+ if !double && ch == ?' && peek1 == ?'
1015
1102
  chunks << ?'
1016
- forward(2)
1017
- elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1103
+ forward2
1104
+ elsif (double && ch == ?') || (!double && DOUBLE_ESC.include?(ch))
1018
1105
  chunks << ch
1019
- forward
1106
+ forward1
1020
1107
  elsif double && ch == ?\\
1021
- forward
1022
- ch = peek
1108
+ forward1
1109
+ ch = peek0
1023
1110
  if ESCAPE_REPLACEMENTS.member?(ch.chr)
1024
1111
  chunks << ESCAPE_REPLACEMENTS[ch.chr]
1025
- forward
1112
+ forward1
1026
1113
  elsif ESCAPE_CODES.member?(ch.chr)
1027
1114
  length = ESCAPE_CODES[ch.chr]
1028
- forward
1029
- length.times do |k|
1030
- if /[0-9A-Fa-f]/ !~ peek(k).chr
1031
- raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1032
- "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1033
- end
1115
+ forward1
1116
+ if NOT_HEXA =~ prefix(length)
1117
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1118
+ "expected escape sequence of #{length} hexdecimal numbers, but found something else: #{prefix(length)}}",get_mark)
1034
1119
  end
1035
- code = prefix(length).to_i.to_s(16)
1120
+ code = prefix(length).to_i(16).to_s
1036
1121
  chunks << code
1037
1122
  forward(length)
1038
- elsif "\r\n\x85".include?(ch)
1123
+ elsif FULL_LINEBR.include?(ch)
1039
1124
  scan_line_break
1040
1125
  chunks += scan_flow_scalar_breaks(double, start_mark)
1041
1126
  else
@@ -1051,16 +1136,16 @@ module RbYAML
1051
1136
  # See the specification for details.
1052
1137
  chunks = []
1053
1138
  length = 0
1054
- length += 1 while /[ \t]/ =~ peek(length).chr
1139
+ length += 1 while BLANK_T.include?(peek(length))
1055
1140
  whitespaces = prefix(length)
1056
1141
  forward(length)
1057
- ch = peek
1142
+ ch = peek0
1058
1143
  if ch == ?\0
1059
1144
  raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1060
- elsif "\r\n\x85".include?(ch)
1145
+ elsif FULL_LINEBR.include?(ch)
1061
1146
  line_break = scan_line_break
1062
1147
  breaks = scan_flow_scalar_breaks(double, start_mark)
1063
- if line_break != ?\n
1148
+ if line_break != "\n"
1064
1149
  chunks << line_break
1065
1150
  elsif breaks.empty?
1066
1151
  chunks << ' '
@@ -1079,17 +1164,22 @@ module RbYAML
1079
1164
  # Instead of checking indentation, we check for document
1080
1165
  # separators.
1081
1166
  prefix = prefix(3)
1082
- if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1167
+ if (prefix == "---" || prefix == "...") &&NULL_BL_T_LINEBR.include?(peek3)
1083
1168
  raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1084
1169
  end
1085
- forward while /[ \t]/ =~ peek.chr
1086
- if "\r\n\x85".include?(peek)
1170
+ forward1 while BLANK_T.include?(peek0)
1171
+ if FULL_LINEBR.include?(peek0)
1087
1172
  chunks << scan_line_break
1088
1173
  else
1089
1174
  return chunks
1090
1175
  end
1091
1176
  end
1092
1177
  end
1178
+
1179
+
1180
+ R_flowzero = /[\0 \t\r\n\x85]|(:[\0 \t\r\n\x28])/
1181
+ R_flownonzero = /[\0 \t\r\n\x85\[\]{},:?]/
1182
+ S4 = "\0 \t\r\n\x28[]{}"
1093
1183
 
1094
1184
  def scan_plain
1095
1185
  # See the specification for details.
@@ -1098,25 +1188,25 @@ module RbYAML
1098
1188
  # We also keep track of the `allow_simple_key` flag here.
1099
1189
  # Indentation rules are loosed for the flow context.
1100
1190
  chunks = []
1101
- start_mark = get_mark
1102
- end_mark = start_mark
1191
+ end_mark = start_mark = get_mark
1103
1192
  indent = @indent+1
1104
1193
  # We allow zero indentation for scalars, but then we need to check for
1105
1194
  # document separators at the beginning of the line.
1106
1195
  #if indent == 0
1107
1196
  # indent = 1
1108
1197
  spaces = []
1109
- while true
1198
+ if @flow_zero
1199
+ f_nzero, r_check = false, R_flowzero
1200
+ else
1201
+ f_nzero, r_check = true, R_flownonzero
1202
+ end
1203
+
1204
+ while peek0 != ?#
1110
1205
  length = 0
1111
- break if peek == ?#
1112
- while true
1113
- ch = peek(length)
1114
- if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1115
- break
1116
- end
1117
- length += 1
1118
- end
1119
- if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
1206
+ chunk_size = 32
1207
+ chunk_size += 32 until length = (r_check =~ prefix(chunk_size))
1208
+ ch = peek(length)
1209
+ if f_nzero && ch == ?: && !S4.include?(peek(length+1))
1120
1210
  forward(length)
1121
1211
  raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1122
1212
  end
@@ -1127,11 +1217,12 @@ module RbYAML
1127
1217
  forward(length)
1128
1218
  end_mark = get_mark
1129
1219
  spaces = scan_plain_spaces(indent, start_mark)
1130
- break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1220
+ break if !spaces || (@flow_zero && @column < indent)
1131
1221
  end
1132
- return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1222
+ return ScalarToken.new(chunks.to_s, true, start_mark, end_mark)
1133
1223
  end
1134
1224
 
1225
+ END_OR_START = /^(---|\.\.\.)[\0 \t\r\n\x85]$/
1135
1226
  def scan_plain_spaces(indent, start_mark)
1136
1227
  # See the specification for details.
1137
1228
  # The specification is really confusing about tabs in plain scalars.
@@ -1141,44 +1232,43 @@ module RbYAML
1141
1232
  length += 1 while peek(length) == 32
1142
1233
  whitespaces = prefix(length)
1143
1234
  forward(length)
1144
- ch = peek
1145
- if "\r\n\x85".include?(ch)
1235
+ ch = peek0
1236
+ if FULL_LINEBR.include?(ch)
1146
1237
  line_break = scan_line_break
1147
1238
  @allow_simple_key = true
1148
- prefix = prefix(3)
1149
- return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1239
+ return if END_OR_START =~ prefix(4)
1150
1240
  breaks = []
1151
- while " \r\n\x85".include?(peek)
1152
- if peek == 32
1153
- forward
1241
+ while BLANK_OR_LINEBR.include?(peek0)
1242
+ if peek0 == 32
1243
+ forward1
1154
1244
  else
1155
1245
  breaks << scan_line_break
1156
- prefix = prefix(3)
1157
- return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1246
+ return if END_OR_START =~ prefix(4)
1158
1247
  end
1159
1248
  end
1160
- if line_break != '\n'
1249
+ if line_break != "\n"
1161
1250
  chunks << line_break
1162
- elsif breaks.empty?
1163
- chunks << ' '
1251
+ elsif breaks.nil? || breaks.empty?
1252
+ chunks << " "
1164
1253
  end
1165
1254
  chunks += breaks
1166
- elsif !whitespaces.empty?
1255
+ else
1167
1256
  chunks << whitespaces
1168
1257
  end
1169
1258
  chunks
1170
1259
  end
1171
1260
 
1261
+
1172
1262
  def scan_tag_handle(name, start_mark)
1173
1263
  # See the specification for details.
1174
1264
  # For some strange reasons, the specification does not allow '_' in
1175
1265
  # tag handles. I have allowed it anyway.
1176
- ch = peek
1266
+ ch = peek0
1177
1267
  raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1178
1268
  length = 1
1179
1269
  ch = peek(length)
1180
1270
  if ch != 32
1181
- while /[-_0-9A-Za-z]/ =~ ch.chr
1271
+ while ALPHA_REG =~ ch.chr
1182
1272
  length += 1
1183
1273
  ch = peek(length)
1184
1274
  end
@@ -1193,13 +1283,14 @@ module RbYAML
1193
1283
  value
1194
1284
  end
1195
1285
 
1286
+ STRANGE_CHR = /[\]\[\-';\/?:@&=+$,.!~*()%\w]/
1196
1287
  def scan_tag_uri(name, start_mark)
1197
1288
  # See the specification for details.
1198
1289
  # Note: we do not check if URI is well-formed.
1199
1290
  chunks = []
1200
1291
  length = 0
1201
1292
  ch = peek(length)
1202
- while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1293
+ while STRANGE_CHR =~ ch.chr
1203
1294
  if ch == ?%
1204
1295
  chunks << prefix(length)
1205
1296
  forward(length)
@@ -1213,29 +1304,27 @@ module RbYAML
1213
1304
  if length!=0
1214
1305
  chunks << prefix(length)
1215
1306
  forward(length)
1216
- length = 0
1217
1307
  end
1218
1308
 
1219
1309
  raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1220
- chunks.join('')
1310
+ chunks.to_s
1221
1311
  end
1222
1312
 
1313
+ HEXA_REG = /[0-9A-Fa-f]/
1223
1314
  def scan_uri_escapes(name, start_mark)
1224
1315
  # See the specification for details.
1225
1316
  bytes = []
1226
1317
  mark = get_mark
1227
- while peek == ?%
1228
- forward
1229
- 2.times do |k|
1230
- raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1231
- get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1232
- end
1233
- bytes << prefix(2).to_i.to_s(16)
1234
- forward(2)
1318
+ while peek0 == ?%
1319
+ forward1
1320
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek1} and #{peek2}",get_mark) if HEXA_REG !~ peek1.chr || HEXA_REG !~ peek2.chr
1321
+ bytes << prefix(2).to_i(16).to_s
1322
+ forward2
1235
1323
  end
1236
- bytes.join('')
1324
+ bytes.to_s
1237
1325
  end
1238
1326
 
1327
+ RN = "\r\n"
1239
1328
  def scan_line_break
1240
1329
  # Transforms:
1241
1330
  # '\r\n' : '\n'
@@ -1243,12 +1332,11 @@ module RbYAML
1243
1332
  # '\n' : '\n'
1244
1333
  # '\x85' : '\n'
1245
1334
  # default : ''
1246
- ch = peek
1247
- if "\r\n\x85".include?(ch)
1248
- if prefix(2) == "\r\n"
1249
- forward(2)
1335
+ if FULL_LINEBR.include?(peek0)
1336
+ if prefix2 == RN
1337
+ forward2
1250
1338
  else
1251
- forward
1339
+ forward1
1252
1340
  end
1253
1341
  return "\n"
1254
1342
  end