RbYAML 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/lib/rbyaml.rb +14 -256
  2. data/lib/rbyaml.rb.~1.2.~ +383 -0
  3. data/lib/rbyaml/composer.rb +9 -11
  4. data/lib/rbyaml/{composer.rb.~1.2.~ → composer.rb.~1.3.~} +28 -25
  5. data/lib/rbyaml/constants.rb +95 -0
  6. data/lib/rbyaml/constructor.rb +180 -89
  7. data/lib/rbyaml/{constructor.rb.~1.2.~ → constructor.rb.~1.9.~} +137 -95
  8. data/lib/rbyaml/dumper.rb +12 -9
  9. data/lib/rbyaml/dumper.rb.~1.3.~ +36 -0
  10. data/lib/rbyaml/emitter.rb +14 -28
  11. data/lib/rbyaml/{emitter.rb.~1.2.~ → emitter.rb.~1.6.~} +22 -33
  12. data/lib/rbyaml/error.rb +4 -57
  13. data/lib/rbyaml/error.rb.~1.2.~ +75 -0
  14. data/lib/rbyaml/events.rb +8 -14
  15. data/lib/rbyaml/{events.rb.~1.2.~ → events.rb.~1.4.~} +29 -6
  16. data/lib/rbyaml/nodes.rb +5 -5
  17. data/lib/rbyaml/{nodes.rb.~1.2.~ → nodes.rb.~1.3.~} +13 -9
  18. data/lib/rbyaml/parser.rb +70 -108
  19. data/lib/rbyaml/parser.rb.~1.4.~ +632 -0
  20. data/lib/rbyaml/representer.rb +19 -157
  21. data/lib/rbyaml/representer.rb.old +317 -0
  22. data/lib/rbyaml/{representer.rb.~1.2.~ → representer.rb.~1.5.~} +60 -26
  23. data/lib/rbyaml/resolver.rb +6 -6
  24. data/lib/rbyaml/{resolver.rb.~1.1.~ → resolver.rb.~1.6.~} +20 -20
  25. data/lib/rbyaml/rubytypes.rb +391 -0
  26. data/lib/rbyaml/scanner.rb +123 -225
  27. data/lib/rbyaml/{scanner.rb.~1.2.~ → scanner.rb.~1.5.~} +466 -378
  28. data/lib/rbyaml/serializer.rb +9 -9
  29. data/lib/rbyaml/{serializer.rb.~1.2.~ → serializer.rb.~1.4.~} +19 -17
  30. data/lib/rbyaml/stream.rb +48 -0
  31. data/lib/rbyaml/tag.rb +72 -0
  32. data/lib/rbyaml/tokens.rb +22 -16
  33. data/lib/rbyaml/{tokens.rb.~1.2.~ → tokens.rb.~1.3.~} +44 -4
  34. data/lib/rbyaml/types.rb +146 -0
  35. data/lib/rbyaml/util.rb.~1.3.~ +38 -0
  36. data/lib/rbyaml/yaml.rb +22 -32
  37. data/lib/rbyaml/{yaml.rb.~1.2.~ → yaml.rb.~1.5.~} +17 -17
  38. data/test/load_one.rb +6 -0
  39. data/test/load_one_yaml.rb +6 -0
  40. data/test/output_events.rb +9 -0
  41. data/test/test_add_ctor.rb +51 -0
  42. data/test/test_add_ctor.rb.~1.1.~ +30 -0
  43. data/test/test_bm.rb +2 -2
  44. data/test/test_bm.rb.~1.1.~ +28 -0
  45. data/test/test_gems.rb +10 -0
  46. data/test/test_one.rb.~1.1.~ +5 -0
  47. data/test/test_one_syck.rb +5 -0
  48. data/test/test_rbyaml.rb +63 -32
  49. data/test/test_rbyaml.rb.~1.6.~ +59 -0
  50. data/test/{test_rbyaml.rb.~1.2.~ → test_rbyaml_old.rb} +13 -4
  51. data/test/test_time_events.rb +24 -0
  52. data/test/test_time_nodes.rb +24 -0
  53. data/test/test_time_tokens.rb +24 -0
  54. data/test/yaml/gems_new.yml +147456 -0
  55. data/test/yaml/test1.rb +8 -0
  56. data/test/yaml/test10.rb +14 -0
  57. data/test/yaml/test11.rb +13 -0
  58. data/test/yaml/test12.rb +9 -0
  59. data/test/yaml/test13.rb +9 -0
  60. data/test/yaml/test14.rb +13 -0
  61. data/test/yaml/test15.rb +12 -0
  62. data/test/yaml/test16.rb +11 -0
  63. data/test/yaml/test16.rb.~1.1.~ +11 -0
  64. data/test/yaml/test17.rb +10 -0
  65. data/test/yaml/test18.rb +13 -0
  66. data/test/yaml/test19.rb +9 -0
  67. data/test/yaml/test19.yml +1 -1
  68. data/test/yaml/test2.rb +8 -0
  69. data/test/yaml/test20.rb +11 -0
  70. data/test/yaml/test20.rb.~1.1.~ +9 -0
  71. data/test/yaml/test20.yml +1 -1
  72. data/test/yaml/test3.rb +13 -0
  73. data/test/yaml/test4.rb +13 -0
  74. data/test/yaml/test5.rb +8 -0
  75. data/test/yaml/test6.rb +10 -0
  76. data/test/yaml/test7.rb +15 -0
  77. data/test/yaml/test8.rb +15 -0
  78. data/test/yaml/test9.rb +13 -0
  79. metadata +61 -16
  80. data/lib/rbyaml/dumper.rb.~1.2.~ +0 -43
  81. data/lib/rbyaml/parser.rb.~1.2.~ +0 -494
@@ -23,42 +23,43 @@
23
23
  # Read comments in the Scanner code for more details.
24
24
  #
25
25
 
26
+ require 'rbyaml/util'
26
27
  require 'rbyaml/error'
27
28
  require 'rbyaml/tokens'
28
29
 
29
30
  module RbYAML
30
31
  class ScannerError < MarkedYAMLError
31
32
  end
32
-
33
- class SimpleKey
34
- attr_reader :token_number, :required, :index, :line, :column, :mark
35
-
36
- def initialize(token_number,required,index,line,column,mark)
37
- @token_number = token_number
38
- @required = required
39
- @index = index
40
- @line = line
41
- @column = column
42
- @mark = mark
33
+ class ReaderError < YAMLError
34
+ def initialize(name, position, character, encoding, reason)
35
+ @name = name
36
+ @position = position
37
+ @character = character
38
+ @encoding = encoding
39
+ @reason = reason
40
+ end
41
+
42
+ def to_s
43
+ if @character.__is_str
44
+ "'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
45
+ else
46
+ "unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
47
+ end
43
48
  end
44
49
  end
45
50
 
46
- module Scanner
47
- def initialize_scanner
48
- # It is assumed that Scanner and Reader will mixin to the same point.
49
- # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
- #
51
- # Reader supports the following methods
52
- # self.peek(i=0) # peek the next i-th character
53
- # self.prefix(l=1) # peek the next l characters
54
- # self.forward(l=1) # read the next l characters and move the pointer.
51
+ SimpleKey = Struct.new(:token_number, :required, :index, :line, :column, :mark)
55
52
 
53
+ class Scanner
54
+ attr_reader :column, :stream, :stream_pointer, :eof, :buffer, :pointer, :index, :line
55
+ def initialize(stream)
56
56
  # Had we reached the end of the stream?
57
57
  @done = false
58
58
 
59
59
  # The number of unclosed '{' and '['. `flow_level == 0` means block
60
60
  # context.
61
61
  @flow_level = 0
62
+ @flow_zero = true
62
63
 
63
64
  # List of processed tokens that are not yet emitted.
64
65
  @tokens = []
@@ -104,6 +105,199 @@ module RbYAML
104
105
  # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
106
  # '[', or '{' tokens.
106
107
  @possible_simple_keys = {}
108
+
109
+ @stream = nil
110
+ @stream_pointer = 0
111
+ @eof = true
112
+ @buffer = ""
113
+ @buffer_length = 0
114
+ @pointer = 0
115
+ @pointer1 = 1
116
+ @column = 0
117
+ if stream.__is_str
118
+ @name = "<string>"
119
+ @raw_buffer = stream
120
+ else
121
+ @stream = stream
122
+ @name = stream.respond_to?(:path) ? stream.path : stream.inspect
123
+ @eof = false
124
+ @raw_buffer = ""
125
+ end
126
+ end
127
+
128
+ def peek(index=0)
129
+ peekn(index)
130
+ end
131
+
132
+ def peek0
133
+ update(1) unless @pointer1 < @buffer_length
134
+ @buffer[@pointer]
135
+ end
136
+
137
+ def peek1
138
+ update(2) unless @pointer1+1 < @buffer_length
139
+ @buffer[@pointer1]
140
+ end
141
+
142
+ def peek2
143
+ update(3) unless @pointer1+2 < @buffer_length
144
+ @buffer[@pointer1+1]
145
+ end
146
+
147
+ def peek3
148
+ update(4) unless @pointer1+3 < @buffer_length
149
+ @buffer[@pointer1+2]
150
+ end
151
+
152
+ def peekn(index=0)
153
+ pix = @pointer1+index
154
+ unless pix < @buffer_length
155
+ update(index+1)
156
+ pix = @pointer1+index
157
+ end
158
+ @buffer[pix-1]
159
+ end
160
+
161
+ def prefix(length=1)
162
+ update(length) unless @pointer+length < @buffer_length
163
+ @buffer[@pointer...@pointer+length]
164
+ end
165
+
166
+ def prefix2()
167
+ update(2) unless @pointer1+1 < @buffer_length
168
+ @buffer[@pointer..@pointer1]
169
+ end
170
+
171
+ def forward(length=1)
172
+ case length
173
+ when 0: forward0
174
+ when 1: forward1
175
+ when 2: forward2
176
+ when 3: forward3
177
+ when 4: forward4
178
+ when 5: forward5
179
+ when 6: forward6
180
+ else forwardn(length)
181
+ end
182
+ end
183
+
184
+ def forward0
185
+ update(1) unless @pointer1 < @buffer_length
186
+ end
187
+
188
+ LINE_BR = "\n\x85"
189
+
190
+ def forward1
191
+ update(2) unless @pointer1+1 < @buffer_length
192
+ buff = @buffer[@pointer...@pointer1+1]
193
+ index = buff.rindex(LINE_BR_REG)
194
+ @column = index ? -index : column+1
195
+ @pointer += 1
196
+ @pointer1 += 1
197
+ end
198
+
199
+ def forward2
200
+ update(3) unless @pointer1+2 < @buffer_length
201
+ buff = @buffer[@pointer...@pointer1+2]
202
+ index = buff.rindex(LINE_BR_REG)
203
+ @column = index ? 1-index : column+2
204
+ @pointer += 2
205
+ @pointer1 += 2
206
+ end
207
+
208
+ def forward3
209
+ update(4) unless @pointer1+3 < @buffer_length
210
+ buff = @buffer[@pointer...@pointer1+3]
211
+ index = buff.rindex(LINE_BR_REG)
212
+ @column = index ? 2-index : column+3
213
+ @pointer += 3
214
+ @pointer1 += 3
215
+ end
216
+
217
+ def forward4
218
+ update(5) unless @pointer1+4 < @buffer_length
219
+ buff = @buffer[@pointer...@pointer1+4]
220
+ index = buff.rindex(LINE_BR_REG)
221
+ @column = index ? 3-index : column+4
222
+ @pointer += 4
223
+ @pointer1 += 4
224
+ end
225
+
226
+ def forward5
227
+ update(6) unless @pointer1+5 < @buffer_length
228
+ buff = @buffer[@pointer...@pointer1+5]
229
+ index = buff.rindex(LINE_BR_REG)
230
+ @column = index ? 4-index : column+5
231
+ @pointer += 5
232
+ @pointer1 += 5
233
+ end
234
+
235
+ def forward6
236
+ update(7) unless @pointer1+6 < @buffer_length
237
+ buff = @buffer[@pointer...@pointer1+6]
238
+ index = buff.rindex(LINE_BR_REG)
239
+ @column = index ? 5-index : column+6
240
+ @pointer += 6
241
+ @pointer1 += 6
242
+ end
243
+
244
+ LINE_BR_REG = /[\n\x85]|(?:\r[^\n])/
245
+ def forwardn(length)
246
+ update(length + 1) unless @pointer1+length < @buffer_length
247
+ buff = @buffer[@pointer...@pointer+length]
248
+ index = buff.rindex(LINE_BR_REG)
249
+ @column = index ? (length-index)-1 : column+length
250
+ @pointer += length
251
+ @pointer1 += length
252
+ end
253
+
254
+ def get_mark
255
+ if @stream.nil?
256
+ Mark.new(@name,@column,@buffer,@pointer)
257
+ else
258
+ Mark.new(@name,@column,nil,nil)
259
+ end
260
+ end
261
+
262
+ NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
263
+ def check_printable(data)
264
+ if NON_PRINTABLE =~ data
265
+ position = @buffer.length-@pointer+($~.offset(0)[0])
266
+ raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
267
+ end
268
+ end
269
+
270
+
271
+ def update(length)
272
+ return if @raw_buffer.nil?
273
+ @buffer = @buffer[@pointer..-1]
274
+ @pointer = 0
275
+ while @buffer.length < length
276
+ unless @eof
277
+ data = @stream.read(1024)
278
+ if data && !data.empty?
279
+ @buffer << data
280
+ @stream_pointer += data.length
281
+ @raw_buffer = ""
282
+ else
283
+ @eof = true
284
+ @buffer << ?\0
285
+ @raw_buffer = nil
286
+ break
287
+ end
288
+ else
289
+ @buffer << @raw_buffer << ?\0
290
+ @raw_buffer = nil
291
+ break
292
+ end
293
+ end
294
+ @buffer_length = @buffer.length
295
+ if @eof
296
+ check_printable(@buffer[(-length)..-2])
297
+ else
298
+ check_printable(@buffer[(-length)..-1])
299
+ end
300
+ @pointer1 = @pointer+1
107
301
  end
108
302
 
109
303
  def check_token(*choices)
@@ -144,70 +338,51 @@ module RbYAML
144
338
 
145
339
  def need_more_tokens
146
340
  return false if @done
147
- return true if @tokens.empty?
148
- # The current token may be a potential simple key, so we
149
- # need to look further.
150
- stale_possible_simple_keys
151
- return true if next_possible_simple_key == @tokens_taken
341
+ @tokens.empty? || next_possible_simple_key == @tokens_taken
152
342
  end
153
343
 
344
+ ENDING = /^---[\0 \t\r\n\x85]$/
345
+ START = /^\.\.\.[\0 \t\r\n\x85]$/
346
+ NULL_OR_OTHER = "\0 \t\r\n\x85"
347
+ # BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@`]|([\-?:][^\0 \t\r\n\x85]))/ #Since current SYCK handles this one wrong, we have to allow backtick right now.
348
+ BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@]|([\-?:][^\0 \t\r\n\x85]))/
154
349
  def fetch_more_tokens
155
350
  # Eat whitespaces and comments until we reach the next token.
156
351
  scan_to_next_token
157
352
 
158
353
  # Remove obsolete possible simple keys.
159
- stale_possible_simple_keys
354
+ # stale_possible_simple_keys
160
355
 
161
356
  # Compare the current indentation and column. It may add some tokens
162
357
  # and decrease the current indentation level.
163
358
  unwind_indent(@column)
164
359
 
165
360
  # Peek the next character.
166
- ch = peek
167
-
168
- return case
169
- # Is it the end of stream?
170
- when ch == ?\0: fetch_stream_end
171
- # Is it a directive?
172
- when ch == ?% && check_directive: fetch_directive
173
- # Is it the document start?
174
- when ch == ?- && check_document_start: fetch_document_start
175
- # Is it the document end?
176
- when ch == ?. && check_document_end: fetch_document_end
177
- # Is it the flow sequence start indicator?
178
- when ch == ?[: fetch_flow_sequence_start
179
- # Is it the flow mapping start indicator?
180
- when ch == ?{: fetch_flow_mapping_start
181
- # Is it the flow sequence end indicator?
182
- when ch == ?]: fetch_flow_sequence_end
183
- # Is it the flow mapping end indicator?
184
- when ch == ?}: fetch_flow_mapping_end
185
- # Is it the flow entry indicator?
186
- when ch == ?,: fetch_flow_entry
187
- # Is it the block entry indicator?
188
- when ch == ?- && check_block_entry: fetch_block_entry
189
- # Is it the key indicator?
190
- when ch == ?? && check_key: fetch_key
191
- # Is it the value indicator?
192
- when ch == ?: && check_value: fetch_value
193
- # Is it an alias?
194
- when ch == ?*: fetch_alias
195
- # Is it an anchor?
196
- when ch == ?&: fetch_anchor
197
- # Is it a tag?
198
- when ch == ?!: fetch_tag
199
- # Is it a literal scalar?
200
- when ch == ?| && @flow_level==0: fetch_literal
201
- # Is it a folded scalar?
202
- when ch == ?> && @flow_level==0: fetch_folded
203
- # Is it a single quoted scalar?
204
- when ch == ?': fetch_single
205
- # Is it a double quoted scalar?
206
- when ch == ?": fetch_double
207
- # It must be a plain scalar then.
208
- when check_plain: fetch_plain
209
- else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
- end
361
+ ch = peek0
362
+ colz = @column == 0
363
+
364
+ case ch
365
+ when ?\0: return fetch_stream_end
366
+ when ?': return fetch_single
367
+ when ?": return fetch_double
368
+ when ??: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_key end
369
+ when ?:: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_value end
370
+ when ?%: if colz: return fetch_stream_end end
371
+ when ?-: if colz && ENDING =~ prefix(4): return fetch_document_start; elsif NULL_OR_OTHER.include?(peek1): return fetch_block_entry end
372
+ when ?.: if colz && START =~ prefix(4): return fetch_document_end end
373
+ when ?[: return fetch_flow_sequence_start
374
+ when ?{: return fetch_flow_mapping_start
375
+ when ?]: return fetch_flow_sequence_end
376
+ when ?}: return fetch_flow_mapping_end
377
+ when ?,: return fetch_flow_entry
378
+ when ?*: return fetch_alias
379
+ when ?&: return fetch_anchor
380
+ when ?!: return fetch_tag
381
+ when ?|: if @flow_zero: return fetch_literal end
382
+ when ?>: if @flow_zero: return fetch_folded end
383
+ end
384
+ return fetch_plain if BEG =~ prefix(2)
385
+ raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
211
386
  end
212
387
 
213
388
  # Simple keys treatment.
@@ -215,58 +390,22 @@ module RbYAML
215
390
  def next_possible_simple_key
216
391
  # Return the number of the nearest possible simple key. Actually we
217
392
  # don't need to loop through the whole dictionary.
218
- min_token_number = nil
219
- for level in @possible_simple_keys.keys
220
- key = @possible_simple_keys[level]
221
- if min_token_number.nil? || key.token_number < min_token_number
222
- min_token_number = key.token_number
223
- end
224
- end
225
- min_token_number
393
+ @possible_simple_keys.each_value {|key| return key.token_number if key.token_number}
394
+ nil
226
395
  end
227
396
 
228
- def stale_possible_simple_keys
229
- # Remove entries that are no longer possible simple keys. According to
230
- # the YAML specification, simple keys
231
- # - should be limited to a single line,
232
- # - should be no longer than 1024 characters.
233
- # Disabling this procedure will allow simple keys of any length and
234
- # height (may cause problems if indentation is broken though).
235
- @possible_simple_keys.delete_if {|level,key|
236
- if key.line != @line || @index-key.index > 1024
237
- raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
- return true
239
- end
240
- return false
241
- }
242
- end
243
-
244
397
  def save_possible_simple_key
245
398
  # The next token may start a simple key. We check if it's possible
246
399
  # and save its position. This function is called for
247
400
  # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
-
249
- # Check if a simple key is required at the current position.
250
- required = @flow_level==0 && @indent == @column
251
-
252
401
  # The next token might be a simple key. Let's save it's number and
253
402
  # position.
254
- if @allow_simple_key
255
- remove_possible_simple_key
256
- token_number = @tokens_taken+@tokens.length
257
- key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
- @possible_simple_keys[@flow_level] = key
259
- end
403
+ @possible_simple_keys[@flow_level] = SimpleKey.new(@tokens_taken+@tokens.length, @flow_zero && @indent == @column,-1,-1,column,get_mark) if @allow_simple_key
260
404
  end
261
405
 
262
- def remove_possible_simple_key
263
- # Remove the saved possible key position at the current flow level.
264
- key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
- end
266
-
267
406
  # Indentation functions.
268
407
 
269
- def unwind_indent(column)
408
+ def unwind_indent(col)
270
409
  ## In flow context, tokens should respect indentation.
271
410
  ## Actually the condition should be `@indent >= column` according to
272
411
  ## the spec. But this condition will prohibit intuitively correct
@@ -280,20 +419,20 @@ module RbYAML
280
419
 
281
420
  # In the flow context, indentation is ignored. We make the scanner less
282
421
  # restrictive then specification requires.
283
- return nil if @flow_level != 0
422
+ return nil if !@flow_zero
284
423
  # In block context, we may need to issue the BLOCK-END tokens.
285
- while @indent > column
424
+ while @indent > col
286
425
  mark = get_mark
287
- @indent = @indents.pop()
426
+ @indent = @indents.pop
288
427
  @tokens << BlockEndToken.new(mark, mark)
289
428
  end
290
429
  end
291
430
 
292
- def add_indent(column)
431
+ def add_indent(col)
293
432
  # Check if we need to increase indentation.
294
- if @indent < column
433
+ if @indent < col
295
434
  @indents << @indent
296
- @indent = column
435
+ @indent = col
297
436
  return true
298
437
  end
299
438
  return false
@@ -329,7 +468,6 @@ module RbYAML
329
468
  # Set the current intendation to -1.
330
469
  unwind_indent(-1)
331
470
  # Reset simple keys.
332
- remove_possible_simple_key
333
471
  @allow_simple_key = false
334
472
  # Scan and add DIRECTIVE.
335
473
  @tokens << scan_directive
@@ -348,11 +486,10 @@ module RbYAML
348
486
  unwind_indent(-1)
349
487
  # Reset simple keys. Note that there could not be a block collection
350
488
  # after '---'.
351
- remove_possible_simple_key
352
489
  @allow_simple_key = false
353
490
  # Add DOCUMENT-START or DOCUMENT-END.
354
491
  start_mark = get_mark
355
- forward(3)
492
+ forward3
356
493
  end_mark = get_mark
357
494
  @tokens << token.new(start_mark, end_mark)
358
495
  end
@@ -370,11 +507,12 @@ module RbYAML
370
507
  save_possible_simple_key
371
508
  # Increase the flow level.
372
509
  @flow_level += 1
510
+ @flow_zero = false
373
511
  # Simple keys are allowed after '[' and '{'.
374
512
  @allow_simple_key = true
375
513
  # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
514
  start_mark = get_mark
377
- forward
515
+ forward1
378
516
  end_mark = get_mark
379
517
  @tokens << token.new(start_mark, end_mark)
380
518
  end
@@ -388,15 +526,16 @@ module RbYAML
388
526
  end
389
527
 
390
528
  def fetch_flow_collection_end(token)
391
- # Reset possible simple key on the current level.
392
- remove_possible_simple_key
393
529
  # Decrease the flow level.
394
530
  @flow_level -= 1
531
+ if @flow_level == 0
532
+ @flow_zero = true
533
+ end
395
534
  # No simple keys after ']' or '}'.
396
535
  @allow_simple_key = false
397
536
  # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
537
  start_mark = get_mark
399
- forward
538
+ forward1
400
539
  end_mark = get_mark
401
540
  @tokens << token.new(start_mark, end_mark)
402
541
  end
@@ -404,21 +543,19 @@ module RbYAML
404
543
  def fetch_flow_entry
405
544
  # Simple keys are allowed after ','.
406
545
  @allow_simple_key = true
407
- # Reset possible simple key on the current level.
408
- remove_possible_simple_key
409
546
  # Add FLOW-ENTRY.
410
547
  start_mark = get_mark
411
- forward
548
+ forward1
412
549
  end_mark = get_mark
413
550
  @tokens << FlowEntryToken.new(start_mark, end_mark)
414
551
  end
415
552
 
416
553
  def fetch_block_entry
417
554
  # Block context needs additional checks.
418
- if @flow_level==0
555
+ if @flow_zero
419
556
  raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
557
  # We may need to add BLOCK-SEQUENCE-START.
421
- if add_indent(@column)
558
+ if add_indent(column)
422
559
  mark = get_mark
423
560
  @tokens << BlockSequenceStartToken.new(mark, mark)
424
561
  end
@@ -427,67 +564,63 @@ module RbYAML
427
564
  end
428
565
  # Simple keys are allowed after '-'.
429
566
  @allow_simple_key = true
430
- # Reset possible simple key on the current level.
431
- remove_possible_simple_key
432
567
  # Add BLOCK-ENTRY.
433
568
  start_mark = get_mark
434
- forward
569
+ forward1
435
570
  end_mark = get_mark
436
571
  @tokens << BlockEntryToken.new(start_mark, end_mark)
437
572
  end
438
573
 
439
574
  def fetch_key
440
575
  # Block context needs additional checks.
441
- if @flow_level==0
576
+ if @flow_zero
442
577
  # Are we allowed to start a key (not nessesary a simple)?
443
578
  raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
579
  # We may need to add BLOCK-MAPPING-START.
445
- if add_indent(@column)
580
+ if add_indent(column)
446
581
  mark = get_mark
447
582
  @tokens << BlockMappingStartToken.new(mark, mark)
448
583
  end
449
584
  end
450
585
  # Simple keys are allowed after '?' in the block context.
451
- @allow_simple_key = @flow_level==0
452
- # Reset possible simple key on the current level.
453
- remove_possible_simple_key
586
+ @allow_simple_key = @flow_zero
454
587
  # Add KEY.
455
588
  start_mark = get_mark
456
- forward
589
+ forward1
457
590
  end_mark = get_mark
458
591
  @tokens << KeyToken.new(start_mark, end_mark)
459
592
  end
460
593
 
461
594
  def fetch_value
595
+ key = @possible_simple_keys[@flow_level]
462
596
  # Do we determine a simple key?
463
- if @possible_simple_keys.include?(@flow_level)
464
- # Add KEY.
465
- key = @possible_simple_keys[@flow_level]
466
- @possible_simple_keys.delete(@flow_level)
467
- @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
- # If this key starts a new block mapping, we need to add
469
- # BLOCK-MAPPING-START.
470
- @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
- # There cannot be two simple keys one after another.
472
- @allow_simple_key = false
473
- # It must be a part of a complex key.
474
- else
597
+ if key.nil?
475
598
  # Block context needs additional checks.
476
599
  # (Do we really need them? They will be catched by the parser
477
600
  # anyway.)
478
- if @flow_level==0
601
+ if @flow_zero
479
602
  # We are allowed to start a complex value if and only if
480
603
  # we can start a simple key.
481
604
  raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
605
  # Simple keys are allowed after ':' in the block context.
483
- @allow_simple_key = @flow_level==0
484
- # Reset possible simple key on the current level.
485
- remove_possible_simple_key
606
+ @allow_simple_key = true
486
607
  end
608
+ else
609
+ # Add KEY.
610
+ @possible_simple_keys.delete(@flow_level)
611
+
612
+ # If this key starts a new block mapping, we need to add
613
+ # BLOCK-MAPPING-START.
614
+ se = (@flow_zero && add_indent(key.column)) ? [BlockMappingStartToken.new(key.mark, key.mark)] : []
615
+ se << KeyToken.new(key.mark, key.mark)
616
+ @tokens.insert(key.token_number-@tokens_taken,*se)
617
+ # There cannot be two simple keys one after another.
618
+ @allow_simple_key = false
619
+ # It must be a part of a complex key.
487
620
  end
488
621
  # Add VALUE.
489
622
  start_mark = get_mark
490
- forward
623
+ forward1
491
624
  end_mark = get_mark
492
625
  @tokens << ValueToken.new(start_mark, end_mark)
493
626
  end
@@ -530,8 +663,6 @@ module RbYAML
530
663
  def fetch_block_scalar(style)
531
664
  # A simple key may follow a block scalar.
532
665
  @allow_simple_key = true
533
- # Reset possible simple key on the current level.
534
- remove_possible_simple_key
535
666
  # Scan and add SCALAR.
536
667
  @tokens << scan_block_scalar(style)
537
668
  end
@@ -564,65 +695,9 @@ module RbYAML
564
695
  @tokens << scan_plain
565
696
  end
566
697
 
567
- # Checkers.
568
-
569
- def check_directive
570
- # DIRECTIVE: ^ '%' ...
571
- # The '%' indicator is already checked.
572
- @column == 0
573
- end
574
-
575
- def check_document_start
576
- # DOCUMENT-START: ^ '---' (' '|'\n')
577
- @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
- end
579
-
580
- def check_document_end
581
- # DOCUMENT-END: ^ '...' (' '|'\n')
582
- @column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
583
- end
584
-
585
- def check_block_entry
586
- # BLOCK-ENTRY: '-' (' '|'\n')
587
- "\0 \t\r\n\x85".include?(peek(1))
588
- end
589
-
590
- def check_key
591
- # KEY(flow context): '?'
592
- # KEY(block context): '?' (' '|'\n')
593
- @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
594
- end
595
-
596
- def check_value
597
- # VALUE(flow context): ':'
598
- # VALUE(block context): ':' (' '|'\n')
599
- @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
600
- end
601
-
602
- def check_plain
603
- # A plain scalar may start with any non-space character except:
604
- # '-', '?', ':', ',', '[', ']', '{', '}',
605
- # '#', '&', '*', '!', '|', '>', '\'', '\"',
606
- # '%', '@', '`'.
607
- #
608
- # It may also start with
609
- # '-', '?', ':'
610
- # if it is followed by a non-space character.
611
- #
612
- # Note that we limit the last rule to the block context (except the
613
- # '-' character) because we want the flow context to be space
614
- # independent.
615
- ch = peek
616
- !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
617
- end
618
-
619
-
620
-
621
-
622
-
623
698
 
624
699
  # Scanners.
625
-
700
+ NULL_OR_LINEBR = "\0\r\n\x85"
626
701
  def scan_to_next_token
627
702
  # We ignore spaces, line breaks and comments.
628
703
  # If we find a line break in the block context, we set the flag
@@ -638,18 +713,20 @@ module RbYAML
638
713
  # We also need to add the check for `allow_simple_keys == true` to
639
714
  # `unwind_indent` before issuing BLOCK-END.
640
715
  # Scanners for block, flow, and plain scalars need to be modified.
641
- found = false
642
- while !found
643
- while peek == 32
644
- forward
716
+ while true
717
+ while peek0 == 32
718
+ forward1
645
719
  end
646
- if peek == ?#
647
- forward while !"\0\r\n\x85".include?(peek)
720
+ if peek0 == ?#
721
+ while !NULL_OR_LINEBR.include?(peek0)
722
+ forward1
723
+ end
648
724
  end
725
+
649
726
  if !scan_line_break.empty?
650
- @allow_simple_key = true if @flow_level==0
727
+ @allow_simple_key = true if @flow_zero
651
728
  else
652
- found = true
729
+ break
653
730
  end
654
731
  end
655
732
  end
@@ -657,7 +734,7 @@ module RbYAML
657
734
  def scan_directive
658
735
  # See the specification for details.
659
736
  start_mark = get_mark
660
- forward
737
+ forward1
661
738
  name = scan_directive_name(start_mark)
662
739
  value = nil
663
740
  if name == "YAML"
@@ -668,45 +745,50 @@ module RbYAML
668
745
  end_mark = get_mark
669
746
  else
670
747
  end_mark = get_mark
671
- forward while !"\0\r\n\x85".include?(peek)
748
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
672
749
  end
673
750
  scan_directive_ignored_line(start_mark)
674
751
  DirectiveToken.new(name, value, start_mark, end_mark)
675
752
  end
676
753
 
754
+ ALPHA_REG = /[-0-9A-Za-z_]/
755
+ NULL_BL_LINEBR = "\0 \r\n\x85"
756
+ NULL_BL_T_LINEBR = "\0 \t\r\n\x85"
677
757
  def scan_directive_name(start_mark)
678
758
  # See the specification for details.
679
759
  length = 0
680
760
  ch = peek(length)
681
- while /[-0-9A-Za-z_]/ =~ ch.chr
761
+ zlen = true
762
+ while ALPHA_REG =~ ch.chr
763
+ zlen = false
682
764
  length += 1
683
765
  ch = peek(length)
684
766
  end
685
- raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
767
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if zlen
686
768
  value = prefix(length)
687
769
  forward(length)
688
- ch = peek()
689
- raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
770
+ ch = peek0
771
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !NULL_BL_LINEBR.include?(ch)
690
772
  value
691
773
  end
692
774
 
693
775
  def scan_yaml_directive_value(start_mark)
694
776
  # See the specification for details.
695
- forward while peek == 32
777
+ forward1 while peek0 == 32
696
778
  major = scan_yaml_directive_number(start_mark)
697
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
698
- forward
779
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek0 != ?.
780
+ forward1
699
781
  minor = scan_yaml_directive_number(start_mark)
700
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
782
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
701
783
  [major, minor]
702
784
  end
703
785
 
704
786
  def scan_yaml_directive_number(start_mark)
705
787
  # See the specification for details.
706
- ch = peek
707
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
788
+ ch = peek0
789
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(ch.__is_ascii_num)
708
790
  length = 0
709
- length += 1 while ((?0..?9) === peek(length))
791
+ length += 1 while (peek(length).__is_ascii_num)
710
792
  value = prefix(length)
711
793
  forward(length)
712
794
  value
@@ -714,9 +796,9 @@ module RbYAML
714
796
 
715
797
  def scan_tag_directive_value(start_mark)
716
798
  # See the specification for details.
717
- forward while peek == 32
799
+ forward1 while peek0 == 32
718
800
  handle = scan_tag_directive_handle(start_mark)
719
- forward while peek == 32
801
+ forward1 while peek0 == 32
720
802
  prefix = scan_tag_directive_prefix(start_mark)
721
803
  [handle, prefix]
722
804
  end
@@ -724,30 +806,30 @@ module RbYAML
724
806
  def scan_tag_directive_handle(start_mark)
725
807
  # See the specification for details.
726
808
  value = scan_tag_handle("directive", start_mark)
727
- ch = peek
728
- raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
809
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if peek0 != 32
729
810
  value
730
811
  end
731
812
 
732
813
  def scan_tag_directive_prefix(start_mark)
733
814
  # See the specification for details.
734
815
  value = scan_tag_uri("directive", start_mark)
735
- ch = peek
736
- raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
816
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if !NULL_BL_LINEBR.include?(peek0)
737
817
  value
738
818
  end
739
819
 
740
820
  def scan_directive_ignored_line(start_mark)
741
821
  # See the specification for details.
742
- forward while peek == 32
743
- if peek == ?#
744
- forward while !"\0\r\n\x85".include?(peek)
822
+ forward1 while peek0 == 32
823
+ if peek0 == ?#
824
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
745
825
  end
746
- ch = peek
747
- raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
826
+ ch = peek0
827
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{peek0.to_s}",get_mark()) if !NULL_OR_LINEBR.include?(peek0)
748
828
  scan_line_break
749
829
  end
750
-
830
+
831
+ NON_ALPHA = /[^-0-9A-Za-z_]/
832
+ NON_ALPHA_OR_NUM = "\0 \t\r\n\x85?:,]}%@`"
751
833
  def scan_anchor(token)
752
834
  # The specification does not restrict characters for anchors and
753
835
  # aliases. This may lead to problems, for instance, the document:
@@ -758,45 +840,47 @@ module RbYAML
758
840
  # [ *alias , "value" ]
759
841
  # Therefore we restrict aliases to numbers and ASCII letters.
760
842
  start_mark = get_mark
761
- indicator = peek
843
+ indicator = peek0
762
844
  name = (indicator == ?*) ? "alias":"anchor"
763
- forward
845
+ forward1
764
846
  length = 0
765
- ch = peek(length)
766
- while /[-0-9A-Za-z_]/ =~ ch.chr
767
- length += 1
768
- ch = peek(length)
847
+ chunk_size = 16
848
+ while true
849
+ chunk = prefix(chunk_size)
850
+ if length = (NON_ALPHA =~ chunk)
851
+ break
852
+ end
853
+ chunk_size += 16
769
854
  end
770
- raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
855
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found something else...",get_mark) if length==0
771
856
  value = prefix(length)
772
857
  forward(length)
773
- ch = peek
774
- if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
775
- raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
858
+ if !NON_ALPHA_OR_NUM.include?(peek0)
859
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{peek0}",get_mark)
776
860
  end
777
861
  end_mark = get_mark
778
862
  token.new(value, start_mark, end_mark)
779
863
  end
780
864
 
781
-
865
+ NULL_T_BL_LINEBR = "\0 \t\r\n\x85"
782
866
  def scan_tag
783
867
  # See the specification for details.
784
868
  start_mark = get_mark
785
- ch = peek(1)
869
+ ch = peek1
786
870
  if ch == ?<
787
871
  handle = nil
788
- forward(2)
872
+ forward2
789
873
  suffix = scan_tag_uri("tag", start_mark)
790
- raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
791
- forward
792
- elsif "\0 \t\r\n\x85".include?(ch)
874
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek0 != ?>
875
+ forward1
876
+ elsif NULL_T_BL_LINEBR.include?(ch)
793
877
  handle = nil
794
878
  suffix = "!"
795
- forward
879
+ forward1
796
880
  else
797
881
  length = 1
798
882
  use_handle = false
799
- while !"\0 \t\r\n\x85".include?(ch)
883
+ while !NULL_T_BL_LINEBR.include?(ch)
800
884
  if ch == ?!
801
885
  use_handle = true
802
886
  break
@@ -809,24 +893,24 @@ module RbYAML
809
893
  handle = scan_tag_handle("tag", start_mark)
810
894
  else
811
895
  handle = "!"
812
- forward
896
+ forward1
813
897
  end
814
898
  suffix = scan_tag_uri("tag", start_mark)
815
899
  end
816
- ch = peek
817
- raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
900
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
818
901
  value = [handle, suffix]
819
902
  end_mark = get_mark
820
903
  TagToken.new(value, start_mark, end_mark)
821
904
  end
822
905
 
906
+ BLANK_T = " \t"
823
907
  def scan_block_scalar(style)
824
908
  # See the specification for details.
825
909
  folded = style== ?>
826
910
  chunks = []
827
911
  start_mark = get_mark
828
912
  # Scan the header.
829
- forward
913
+ forward1
830
914
  chomping, increment = scan_block_scalar_indicators(start_mark)
831
915
  scan_block_scalar_ignored_line(start_mark)
832
916
  # Determine the indentation level and go to the first non-empty line.
@@ -841,20 +925,20 @@ module RbYAML
841
925
  end
842
926
  line_break = ''
843
927
  # Scan the inner part of the block scalar.
844
- while @column == indent and peek != ?\0
928
+ while column == indent and peek0 != ?\0
845
929
  chunks += breaks
846
- leading_non_space = !" \t".include?(peek)
930
+ leading_non_space = !BLANK_T.include?(peek0)
847
931
  length = 0
848
- length += 1 while !"\0\r\n\x85".include?(peek(length))
932
+ length += 1 while !NULL_OR_LINEBR.include?(peek(length))
849
933
  chunks << prefix(length)
850
934
  forward(length)
851
935
  line_break = scan_line_break
852
936
  breaks, end_mark = scan_block_scalar_breaks(indent)
853
- if @column == indent && peek != 0
937
+ if column == indent && peek0 != 0
854
938
  # Unfortunately, folding rules are ambiguous.
855
939
  #
856
940
  # This is the folding according to the specification:
857
- if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
941
+ if folded && line_break == "\n" && leading_non_space && !BLANK_T.include?(peek0)
858
942
  chunks << ' ' if breaks.empty?
859
943
  else
860
944
  chunks << line_break
@@ -882,76 +966,76 @@ module RbYAML
882
966
  end
883
967
 
884
968
  # We are done.
885
- ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
969
+ ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
886
970
  end
887
971
 
972
+ PLUS_MIN = /[+-]/
888
973
  def scan_block_scalar_indicators(start_mark)
889
974
  # See the specification for details.
890
975
  chomping = nil
891
976
  increment = nil
892
- ch = peek
893
- if /[+-]/ =~ ch.chr
977
+ ch = peek0
978
+ if PLUS_MIN =~ ch.chr
894
979
  chomping = ch == ?+
895
- forward
896
- ch = peek
897
- if (?0..?9) === ch
898
- increment = ch.to_i
980
+ forward1
981
+ ch = peek0
982
+ if ch.__is_ascii_num
983
+ increment = ch.chr.to_i
899
984
  raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
900
- forward
985
+ forward1
901
986
  end
902
- elsif (?0..?9) === ch
903
- increment = ch
987
+ elsif ch.__is_ascii_num
988
+ increment = ch.chr.to_i
904
989
  raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
905
- forward
906
- ch = peek
907
- if /[+-]/ =~ ch.chr
990
+ forward1
991
+ ch = peek0
992
+ if PLUS_MIN =~ ch.chr
908
993
  chomping = ch == ?+
909
- forward
994
+ forward1
910
995
  end
911
996
  end
912
- ch = peek
913
- raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
997
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
914
998
  [chomping, increment]
915
999
  end
916
1000
 
917
1001
  def scan_block_scalar_ignored_line(start_mark)
918
1002
  # See the specification for details.
919
- forward while peek == 32
920
- if peek == ?#
921
- forward while !"\0\r\n\x85".include?(peek)
1003
+ forward1 while peek0 == 32
1004
+ if peek0 == ?#
1005
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
922
1006
  end
923
- ch = peek
924
-
925
- raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
1007
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{peek0}",get_mark) if !NULL_OR_LINEBR.include?(peek0)
926
1008
  scan_line_break
927
1009
  end
928
1010
 
1011
+ BLANK_OR_LINEBR = " \r\n\x85"
929
1012
  def scan_block_scalar_indentation
930
1013
  # See the specification for details.
931
1014
  chunks = []
932
1015
  max_indent = 0
933
1016
  end_mark = get_mark
934
- while " \r\n\x85".include?(peek)
935
- if peek != 32
1017
+ while BLANK_OR_LINEBR.include?(peek0)
1018
+ if peek0 != 32
936
1019
  chunks << scan_line_break
937
1020
  end_mark = get_mark
938
1021
  else
939
- forward
940
- max_indent = @column if @column > max_indent
1022
+ forward1
1023
+ max_indent = column if column > max_indent
941
1024
  end
942
1025
  end
943
1026
  [chunks, max_indent, end_mark]
944
1027
  end
945
1028
 
1029
+ FULL_LINEBR = "\r\n\x85"
946
1030
  def scan_block_scalar_breaks(indent)
947
1031
  # See the specification for details.
948
1032
  chunks = []
949
1033
  end_mark = get_mark
950
- forward while @column < indent && peek == 32
951
- while "\r\n\x85".include?(peek)
1034
+ forward1 while @column < indent && peek0 == 32
1035
+ while FULL_LINEBR.include?(peek0)
952
1036
  chunks << scan_line_break
953
1037
  end_mark = get_mark
954
- forward while @column < indent && peek == 32
1038
+ forward1 while @column < indent && peek0 == 32
955
1039
  end
956
1040
  [chunks, end_mark]
957
1041
  end
@@ -966,16 +1050,16 @@ module RbYAML
966
1050
  double = style == ?"
967
1051
  chunks = []
968
1052
  start_mark = get_mark
969
- quote = peek
970
- forward
1053
+ quote = peek0
1054
+ forward1
971
1055
  chunks += scan_flow_scalar_non_spaces(double, start_mark)
972
- while peek != quote
1056
+ while peek0 != quote
973
1057
  chunks += scan_flow_scalar_spaces(double, start_mark)
974
1058
  chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
1059
  end
976
- forward
1060
+ forward1
977
1061
  end_mark = get_mark
978
- ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
1062
+ ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
979
1063
  end
980
1064
 
981
1065
  ESCAPE_REPLACEMENTS = {
@@ -1000,42 +1084,43 @@ module RbYAML
1000
1084
  'x' => 2
1001
1085
  }
1002
1086
 
1087
+ SPACES_AND_STUFF = "'\"\\\0 \t\r\n\x85"
1088
+ DOUBLE_ESC = "\"\\"
1089
+ NOT_HEXA = /[^0-9A-Fa-f]/
1003
1090
  def scan_flow_scalar_non_spaces(double, start_mark)
1004
1091
  # See the specification for details.
1005
1092
  chunks = []
1006
1093
  while true
1007
1094
  length = 0
1008
- length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1095
+ length += 1 while !SPACES_AND_STUFF.include?(peek(length))
1009
1096
  if length!=0
1010
1097
  chunks << prefix(length)
1011
1098
  forward(length)
1012
1099
  end
1013
- ch = peek
1014
- if !double && ch == ?' && peek(1) == ?'
1100
+ ch = peek0
1101
+ if !double && ch == ?' && peek1 == ?'
1015
1102
  chunks << ?'
1016
- forward(2)
1017
- elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1103
+ forward2
1104
+ elsif (double && ch == ?') || (!double && DOUBLE_ESC.include?(ch))
1018
1105
  chunks << ch
1019
- forward
1106
+ forward1
1020
1107
  elsif double && ch == ?\\
1021
- forward
1022
- ch = peek
1108
+ forward1
1109
+ ch = peek0
1023
1110
  if ESCAPE_REPLACEMENTS.member?(ch.chr)
1024
1111
  chunks << ESCAPE_REPLACEMENTS[ch.chr]
1025
- forward
1112
+ forward1
1026
1113
  elsif ESCAPE_CODES.member?(ch.chr)
1027
1114
  length = ESCAPE_CODES[ch.chr]
1028
- forward
1029
- length.times do |k|
1030
- if /[0-9A-Fa-f]/ !~ peek(k).chr
1031
- raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1032
- "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1033
- end
1115
+ forward1
1116
+ if NOT_HEXA =~ prefix(length)
1117
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1118
+ "expected escape sequence of #{length} hexdecimal numbers, but found something else: #{prefix(length)}}",get_mark)
1034
1119
  end
1035
- code = prefix(length).to_i.to_s(16)
1120
+ code = prefix(length).to_i(16).to_s
1036
1121
  chunks << code
1037
1122
  forward(length)
1038
- elsif "\r\n\x85".include?(ch)
1123
+ elsif FULL_LINEBR.include?(ch)
1039
1124
  scan_line_break
1040
1125
  chunks += scan_flow_scalar_breaks(double, start_mark)
1041
1126
  else
@@ -1051,16 +1136,16 @@ module RbYAML
1051
1136
  # See the specification for details.
1052
1137
  chunks = []
1053
1138
  length = 0
1054
- length += 1 while /[ \t]/ =~ peek(length).chr
1139
+ length += 1 while BLANK_T.include?(peek(length))
1055
1140
  whitespaces = prefix(length)
1056
1141
  forward(length)
1057
- ch = peek
1142
+ ch = peek0
1058
1143
  if ch == ?\0
1059
1144
  raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1060
- elsif "\r\n\x85".include?(ch)
1145
+ elsif FULL_LINEBR.include?(ch)
1061
1146
  line_break = scan_line_break
1062
1147
  breaks = scan_flow_scalar_breaks(double, start_mark)
1063
- if line_break != ?\n
1148
+ if line_break != "\n"
1064
1149
  chunks << line_break
1065
1150
  elsif breaks.empty?
1066
1151
  chunks << ' '
@@ -1079,17 +1164,22 @@ module RbYAML
1079
1164
  # Instead of checking indentation, we check for document
1080
1165
  # separators.
1081
1166
  prefix = prefix(3)
1082
- if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1167
+ if (prefix == "---" || prefix == "...") &&NULL_BL_T_LINEBR.include?(peek3)
1083
1168
  raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1084
1169
  end
1085
- forward while /[ \t]/ =~ peek.chr
1086
- if "\r\n\x85".include?(peek)
1170
+ forward1 while BLANK_T.include?(peek0)
1171
+ if FULL_LINEBR.include?(peek0)
1087
1172
  chunks << scan_line_break
1088
1173
  else
1089
1174
  return chunks
1090
1175
  end
1091
1176
  end
1092
1177
  end
1178
+
1179
+
1180
+ R_flowzero = /[\0 \t\r\n\x85]|(:[\0 \t\r\n\x28])/
1181
+ R_flownonzero = /[\0 \t\r\n\x85\[\]{},:?]/
1182
+ S4 = "\0 \t\r\n\x28[]{}"
1093
1183
 
1094
1184
  def scan_plain
1095
1185
  # See the specification for details.
@@ -1098,25 +1188,25 @@ module RbYAML
1098
1188
  # We also keep track of the `allow_simple_key` flag here.
1099
1189
  # Indentation rules are loosed for the flow context.
1100
1190
  chunks = []
1101
- start_mark = get_mark
1102
- end_mark = start_mark
1191
+ end_mark = start_mark = get_mark
1103
1192
  indent = @indent+1
1104
1193
  # We allow zero indentation for scalars, but then we need to check for
1105
1194
  # document separators at the beginning of the line.
1106
1195
  #if indent == 0
1107
1196
  # indent = 1
1108
1197
  spaces = []
1109
- while true
1198
+ if @flow_zero
1199
+ f_nzero, r_check = false, R_flowzero
1200
+ else
1201
+ f_nzero, r_check = true, R_flownonzero
1202
+ end
1203
+
1204
+ while peek0 != ?#
1110
1205
  length = 0
1111
- break if peek == ?#
1112
- while true
1113
- ch = peek(length)
1114
- if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1115
- break
1116
- end
1117
- length += 1
1118
- end
1119
- if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
1206
+ chunk_size = 32
1207
+ chunk_size += 32 until length = (r_check =~ prefix(chunk_size))
1208
+ ch = peek(length)
1209
+ if f_nzero && ch == ?: && !S4.include?(peek(length+1))
1120
1210
  forward(length)
1121
1211
  raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1122
1212
  end
@@ -1127,11 +1217,12 @@ module RbYAML
1127
1217
  forward(length)
1128
1218
  end_mark = get_mark
1129
1219
  spaces = scan_plain_spaces(indent, start_mark)
1130
- break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1220
+ break if !spaces || (@flow_zero && @column < indent)
1131
1221
  end
1132
- return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1222
+ return ScalarToken.new(chunks.to_s, true, start_mark, end_mark)
1133
1223
  end
1134
1224
 
1225
+ END_OR_START = /^(---|\.\.\.)[\0 \t\r\n\x85]$/
1135
1226
  def scan_plain_spaces(indent, start_mark)
1136
1227
  # See the specification for details.
1137
1228
  # The specification is really confusing about tabs in plain scalars.
@@ -1141,44 +1232,43 @@ module RbYAML
1141
1232
  length += 1 while peek(length) == 32
1142
1233
  whitespaces = prefix(length)
1143
1234
  forward(length)
1144
- ch = peek
1145
- if "\r\n\x85".include?(ch)
1235
+ ch = peek0
1236
+ if FULL_LINEBR.include?(ch)
1146
1237
  line_break = scan_line_break
1147
1238
  @allow_simple_key = true
1148
- prefix = prefix(3)
1149
- return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1239
+ return if END_OR_START =~ prefix(4)
1150
1240
  breaks = []
1151
- while " \r\n\x85".include?(peek)
1152
- if peek == 32
1153
- forward
1241
+ while BLANK_OR_LINEBR.include?(peek0)
1242
+ if peek0 == 32
1243
+ forward1
1154
1244
  else
1155
1245
  breaks << scan_line_break
1156
- prefix = prefix(3)
1157
- return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1246
+ return if END_OR_START =~ prefix(4)
1158
1247
  end
1159
1248
  end
1160
- if line_break != '\n'
1249
+ if line_break != "\n"
1161
1250
  chunks << line_break
1162
- elsif breaks.empty?
1163
- chunks << ' '
1251
+ elsif breaks.nil? || breaks.empty?
1252
+ chunks << " "
1164
1253
  end
1165
1254
  chunks += breaks
1166
- elsif !whitespaces.empty?
1255
+ else
1167
1256
  chunks << whitespaces
1168
1257
  end
1169
1258
  chunks
1170
1259
  end
1171
1260
 
1261
+
1172
1262
  def scan_tag_handle(name, start_mark)
1173
1263
  # See the specification for details.
1174
1264
  # For some strange reasons, the specification does not allow '_' in
1175
1265
  # tag handles. I have allowed it anyway.
1176
- ch = peek
1266
+ ch = peek0
1177
1267
  raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1178
1268
  length = 1
1179
1269
  ch = peek(length)
1180
1270
  if ch != 32
1181
- while /[-_0-9A-Za-z]/ =~ ch.chr
1271
+ while ALPHA_REG =~ ch.chr
1182
1272
  length += 1
1183
1273
  ch = peek(length)
1184
1274
  end
@@ -1193,13 +1283,14 @@ module RbYAML
1193
1283
  value
1194
1284
  end
1195
1285
 
1286
+ STRANGE_CHR = /[\]\[\-';\/?:@&=+$,.!~*()%\w]/
1196
1287
  def scan_tag_uri(name, start_mark)
1197
1288
  # See the specification for details.
1198
1289
  # Note: we do not check if URI is well-formed.
1199
1290
  chunks = []
1200
1291
  length = 0
1201
1292
  ch = peek(length)
1202
- while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1293
+ while STRANGE_CHR =~ ch.chr
1203
1294
  if ch == ?%
1204
1295
  chunks << prefix(length)
1205
1296
  forward(length)
@@ -1213,29 +1304,27 @@ module RbYAML
1213
1304
  if length!=0
1214
1305
  chunks << prefix(length)
1215
1306
  forward(length)
1216
- length = 0
1217
1307
  end
1218
1308
 
1219
1309
  raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1220
- chunks.join('')
1310
+ chunks.to_s
1221
1311
  end
1222
1312
 
1313
+ HEXA_REG = /[0-9A-Fa-f]/
1223
1314
  def scan_uri_escapes(name, start_mark)
1224
1315
  # See the specification for details.
1225
1316
  bytes = []
1226
1317
  mark = get_mark
1227
- while peek == ?%
1228
- forward
1229
- 2.times do |k|
1230
- raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1231
- get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1232
- end
1233
- bytes << prefix(2).to_i.to_s(16)
1234
- forward(2)
1318
+ while peek0 == ?%
1319
+ forward1
1320
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek1} and #{peek2}",get_mark) if HEXA_REG !~ peek1.chr || HEXA_REG !~ peek2.chr
1321
+ bytes << prefix(2).to_i(16).to_s
1322
+ forward2
1235
1323
  end
1236
- bytes.join('')
1324
+ bytes.to_s
1237
1325
  end
1238
1326
 
1327
+ RN = "\r\n"
1239
1328
  def scan_line_break
1240
1329
  # Transforms:
1241
1330
  # '\r\n' : '\n'
@@ -1243,12 +1332,11 @@ module RbYAML
1243
1332
  # '\n' : '\n'
1244
1333
  # '\x85' : '\n'
1245
1334
  # default : ''
1246
- ch = peek
1247
- if "\r\n\x85".include?(ch)
1248
- if prefix(2) == "\r\n"
1249
- forward(2)
1335
+ if FULL_LINEBR.include?(peek0)
1336
+ if prefix2 == RN
1337
+ forward2
1250
1338
  else
1251
- forward
1339
+ forward1
1252
1340
  end
1253
1341
  return "\n"
1254
1342
  end