RbYAML 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README +1 -1
  2. data/lib/rbyaml/composer.rb +28 -25
  3. data/lib/rbyaml/composer.rb.~1.2.~ +109 -0
  4. data/lib/rbyaml/constructor.rb +94 -84
  5. data/lib/rbyaml/constructor.rb.~1.2.~ +381 -0
  6. data/lib/rbyaml/dumper.rb +10 -17
  7. data/lib/rbyaml/dumper.rb.~1.2.~ +43 -0
  8. data/lib/rbyaml/emitter.rb +13 -26
  9. data/lib/rbyaml/emitter.rb.~1.2.~ +1116 -0
  10. data/lib/rbyaml/error.rb +15 -21
  11. data/lib/rbyaml/events.rb +29 -5
  12. data/lib/rbyaml/events.rb.~1.2.~ +93 -0
  13. data/lib/rbyaml/loader.rb +11 -23
  14. data/lib/rbyaml/loader.rb.~1.2.~ +52 -0
  15. data/lib/rbyaml/nodes.rb +13 -9
  16. data/lib/rbyaml/nodes.rb.~1.2.~ +52 -0
  17. data/lib/rbyaml/parser.rb +481 -343
  18. data/lib/rbyaml/parser.rb.old +531 -0
  19. data/lib/rbyaml/parser.rb.~1.2.~ +494 -0
  20. data/lib/rbyaml/reader.rb.~1.1.1.1.~ +127 -0
  21. data/lib/rbyaml/representer.rb +26 -17
  22. data/lib/rbyaml/representer.rb.~1.2.~ +239 -0
  23. data/lib/rbyaml/resolver.rb +15 -15
  24. data/lib/rbyaml/resolver.rb.~1.1.~ +163 -0
  25. data/lib/rbyaml/scanner.rb +457 -366
  26. data/lib/rbyaml/scanner.rb.~1.2.~ +1259 -0
  27. data/lib/rbyaml/serializer.rb +19 -17
  28. data/lib/rbyaml/serializer.rb.~1.2.~ +115 -0
  29. data/lib/rbyaml/tokens.rb +44 -4
  30. data/lib/rbyaml/tokens.rb.~1.2.~ +164 -0
  31. data/lib/rbyaml/util.rb +28 -0
  32. data/lib/rbyaml/yaml.rb +12 -12
  33. data/lib/rbyaml/yaml.rb.~1.2.~ +136 -0
  34. data/test/test_bm.rb +28 -0
  35. data/test/test_bm_syck.rb +28 -0
  36. data/test/test_invoke.rb +31 -0
  37. data/test/test_one.rb +5 -0
  38. data/test/test_profile.rb +32 -0
  39. data/test/test_rbyaml.rb +2 -1
  40. data/test/test_rbyaml.rb.~1.2.~ +31 -0
  41. data/test/test_time.rb +13 -8
  42. data/test/test_time.rb.~1.1.~ +29 -0
  43. data/test/yamlx.rb +3563 -0
  44. metadata +27 -2
@@ -1,3 +1,4 @@
1
+ require 'rbyaml/util'
1
2
  require 'rbyaml/nodes'
2
3
  require 'rbyaml/error'
3
4
 
@@ -9,11 +10,11 @@ module RbYAML
9
10
  DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
10
11
  DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
11
12
 
12
- module BaseResolver
13
+ class BaseResolver
13
14
  @@yaml_implicit_resolvers = {}
14
15
  @@yaml_path_resolvers = {}
15
16
 
16
- def initialize_resolver
17
+ def initialize
17
18
  @resolver_exact_paths = []
18
19
  @resolver_prefix_paths = []
19
20
  end
@@ -31,7 +32,7 @@ module RbYAML
31
32
  def self.add_path_resolver(tag, path, kind=nil)
32
33
  new_path = []
33
34
  for element in path
34
- if Array === element
35
+ if element.__is_a
35
36
  if element.length == 2
36
37
  node_check, index_check = element
37
38
  elsif element.length == 1
@@ -50,10 +51,10 @@ module RbYAML
50
51
  node_check = SequenceNode
51
52
  elsif Hash == node_check
52
53
  node_check = MappingNode
53
- elsif ![ScalarNode, SequenceNode, MappingNode].include?(node_check) && !Symbol === node_check && !node_check.nil?
54
+ elsif ![ScalarNode, SequenceNode, MappingNode].include?(node_check) && !node_check.__is_sym && !node_check.nil?
54
55
  raise ResolverError.new("Invalid node checker: #{node_check}")
55
56
  end
56
- if !(String === index_check || Integer === index_check) && !index_check.nil?
57
+ if !(index_check.__is_str || index_check.__is_int) && !index_check.nil?
57
58
  raise ResolverError.new("Invalid index checker: #{index_check}")
58
59
  end
59
60
  new_path << [node_check, index_check]
@@ -104,16 +105,16 @@ module RbYAML
104
105
 
105
106
  def check_resolver_prefix(depth, path, kind, current_node, current_index)
106
107
  node_check, index_check = path[depth-1]
107
- if String === node_check
108
+ if node_check.__is_str
108
109
  return false if current_node.tag != node_check
109
110
  elsif !node_check.nil?
110
111
  return false if !node_check === current_node
111
112
  end
112
113
  return false if index_check==true && !current_index.nil?
113
114
  return false if !index_check && current_index.nil?
114
- if String === index_check
115
- return false if !(ScalarNode === current_index && index_check == current_index.value)
116
- elsif Integer === index_check
115
+ if index_check.__is_str
116
+ return false if !(current_index.__is_scalar && index_check == current_index.value)
117
+ elsif index_check.__is_int
117
118
  return false if index_check != current_index
118
119
  end
119
120
  true
@@ -122,11 +123,11 @@ module RbYAML
122
123
  def resolve(kind, value, implicit)
123
124
  if ScalarNode == kind && implicit[0]
124
125
  if value == ""
125
- resolvers = @@yaml_implicit_resolvers.fetch("", [])
126
+ resolvers = @@yaml_implicit_resolvers.fetch("",[])
126
127
  else
127
- resolvers = @@yaml_implicit_resolvers.fetch(value[0], [])
128
+ resolvers = @@yaml_implicit_resolvers.fetch(value[0],[])
128
129
  end
129
- resolvers += @@yaml_implicit_resolvers.fetch(nil, [])
130
+ resolvers += @@yaml_implicit_resolvers.fetch(nil,[])
130
131
  for tag, regexp in resolvers
131
132
  return tag if regexp =~ value
132
133
  end
@@ -140,13 +141,12 @@ module RbYAML
140
141
  elsif SequenceNode == kind
141
142
  return RbYAML::DEFAULT_SEQUENCE_TAG
142
143
  elsif MappingNode == kind
143
- return RbYAML::DEFAULT_MAPPING_TAG
144
+ return RbYAML::DEFAULT_MAPPING_TAG
144
145
  end
145
146
  end
146
147
  end
147
148
 
148
- module Resolver
149
- include BaseResolver
149
+ class Resolver < BaseResolver
150
150
  end
151
151
 
152
152
  BaseResolver.add_implicit_resolver('tag:yaml.org,2002:bool',/^(?:y|Y|yes|Yes|YES|n|N|no|No|NO|true|True|TRUE|false|False|FALSE|on|On|ON|off|Off|OFF)$/,'yYnNtTfFoO')
@@ -0,0 +1,163 @@
1
+ require 'rbyaml/nodes'
2
+ require 'rbyaml/error'
3
+
4
+ module RbYAML
5
+ class ResolverError < MarkedYAMLError
6
+ end
7
+
8
+ DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
9
+ DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
10
+ DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
11
+
12
+ module BaseResolver
13
+ @@yaml_implicit_resolvers = {}
14
+ @@yaml_path_resolvers = {}
15
+
16
+ def initialize_resolver
17
+ @resolver_exact_paths = []
18
+ @resolver_prefix_paths = []
19
+ end
20
+
21
+ def self.add_implicit_resolver(tag, regexp, first)
22
+ if first.nil?
23
+ first = ""
24
+ end
25
+ first.each_byte { |ch|
26
+ @@yaml_implicit_resolvers[ch] ||= []
27
+ @@yaml_implicit_resolvers[ch] << [tag,regexp]
28
+ }
29
+ end
30
+
31
+ def self.add_path_resolver(tag, path, kind=nil)
32
+ new_path = []
33
+ for element in path
34
+ if Array === element
35
+ if element.length == 2
36
+ node_check, index_check = element
37
+ elsif element.length == 1
38
+ node_check = element[0]
39
+ index_check = true
40
+ else
41
+ raise ResolverError.new("Invalid path element: #{element}")
42
+ end
43
+ else
44
+ node_check = nil
45
+ index_check = element
46
+ end
47
+ if String == node_check
48
+ node_check = ScalarNode
49
+ elsif Array == node_check
50
+ node_check = SequenceNode
51
+ elsif Hash == node_check
52
+ node_check = MappingNode
53
+ elsif ![ScalarNode, SequenceNode, MappingNode].include?(node_check) && !Symbol === node_check && !node_check.nil?
54
+ raise ResolverError.new("Invalid node checker: #{node_check}")
55
+ end
56
+ if !(String === index_check || Integer === index_check) && !index_check.nil?
57
+ raise ResolverError.new("Invalid index checker: #{index_check}")
58
+ end
59
+ new_path << [node_check, index_check]
60
+ end
61
+ if String == kind
62
+ kind = ScalarNode
63
+ elsif Array == kind
64
+ kind = SequenceNode
65
+ elsif Hash == kind
66
+ kind = MappingNode
67
+ elsif ![ScalarNode, SequenceNode, MappingNode].include?(kind) && !kind.nil?
68
+ raise ResolverError.new("Invalid node kind: #{kind}")
69
+ end
70
+ @@yaml_path_resolvers[[[new_path], kind]] = tag
71
+ end
72
+
73
+ def descend_resolver(current_node, current_index)
74
+ exact_paths = {}
75
+ prefix_paths = []
76
+ if current_node
77
+ depth = @resolver_prefix_paths.length
78
+ for path, kind in @resolver_prefix_paths[-1]
79
+ if check_resolver_prefix(depth, path, kind,current_node, current_index)
80
+ if path.length > depth
81
+ prefix_paths << [path, kind]
82
+ else
83
+ exact_paths[kind] = @@yaml_path_resolvers[[path, kind]]
84
+ end
85
+ end
86
+ end
87
+ else
88
+ for path, kind in @@yaml_path_resolvers
89
+ if !path
90
+ exact_paths[kind] = @@yaml_path_resolvers[[path, kind]]
91
+ else
92
+ prefix_paths << [path, kind]
93
+ end
94
+ end
95
+ end
96
+ @resolver_exact_paths << exact_paths
97
+ @resolver_prefix_paths << prefix_paths
98
+ end
99
+
100
+ def ascend_resolver
101
+ @resolver_exact_paths.pop
102
+ @resolver_prefix_paths.pop
103
+ end
104
+
105
+ def check_resolver_prefix(depth, path, kind, current_node, current_index)
106
+ node_check, index_check = path[depth-1]
107
+ if String === node_check
108
+ return false if current_node.tag != node_check
109
+ elsif !node_check.nil?
110
+ return false if !node_check === current_node
111
+ end
112
+ return false if index_check==true && !current_index.nil?
113
+ return false if !index_check && current_index.nil?
114
+ if String === index_check
115
+ return false if !(ScalarNode === current_index && index_check == current_index.value)
116
+ elsif Integer === index_check
117
+ return false if index_check != current_index
118
+ end
119
+ true
120
+ end
121
+
122
+ def resolve(kind, value, implicit)
123
+ if ScalarNode == kind && implicit[0]
124
+ if value == ""
125
+ resolvers = @@yaml_implicit_resolvers.fetch("", [])
126
+ else
127
+ resolvers = @@yaml_implicit_resolvers.fetch(value[0], [])
128
+ end
129
+ resolvers += @@yaml_implicit_resolvers.fetch(nil, [])
130
+ for tag, regexp in resolvers
131
+ return tag if regexp =~ value
132
+ end
133
+ implicit = implicit[1]
134
+ end
135
+ exact_paths = @resolver_exact_paths[-1]
136
+ return exact_paths[kind] if exact_paths.include?(kind)
137
+ return exact_paths[nil] if exact_paths.include?(nil)
138
+ if ScalarNode == kind
139
+ return RbYAML::DEFAULT_SCALAR_TAG
140
+ elsif SequenceNode == kind
141
+ return RbYAML::DEFAULT_SEQUENCE_TAG
142
+ elsif MappingNode == kind
143
+ return RbYAML::DEFAULT_MAPPING_TAG
144
+ end
145
+ end
146
+ end
147
+
148
+ module Resolver
149
+ include BaseResolver
150
+ end
151
+
152
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:bool',/^(?:y|Y|yes|Yes|YES|n|N|no|No|NO|true|True|TRUE|false|False|FALSE|on|On|ON|off|Off|OFF)$/,'yYnNtTfFoO')
153
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:float',/^(?:[-+]?(?:[0-9][0-9_]*)?\.[0-9_]*(?:[eE][-+][0-9]+)?|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*|[-+]?\.(?:inf|Inf|INF)|\.(?:nan|NaN|NAN))$/,'-+0123456789.')
154
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:int',/^(?:[-+]?0b[0-1_]+|[-+]?0[0-7_]+|[-+]?(?:0|[1-9][0-9_]*)|[-+]?0x[0-9a-fA-F_]+|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$/,'-+0123456789')
155
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:merge',/^(?:<<)$/,'<')
156
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:null',/^(?: ~|null|Null|NULL| )$/,'~nN' + ?\0.chr)
157
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:timestamp',/^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9][0-9]?-[0-9][0-9]?(?:[Tt]|[ \t]+)[0-9][0-9]?:[0-9][0-9]:[0-9][0-9](?:\.[0-9]*)?(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$/,'0123456789')
158
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:value',/^(?:=)$/,'=')
159
+ # The following implicit resolver is only for documentation purposes. It cannot work
160
+ # because plain scalars cannot start with '!', '&', or '*'.
161
+ BaseResolver.add_implicit_resolver('tag:yaml.org,2002:yaml',/^(?:!|&|\*)$/,'!&*')
162
+ end
163
+
@@ -23,42 +23,43 @@
23
23
  # Read comments in the Scanner code for more details.
24
24
  #
25
25
 
26
+ require 'rbyaml/util'
26
27
  require 'rbyaml/error'
27
28
  require 'rbyaml/tokens'
28
29
 
29
30
  module RbYAML
30
31
  class ScannerError < MarkedYAMLError
31
32
  end
32
-
33
- class SimpleKey
34
- attr_reader :token_number, :required, :index, :line, :column, :mark
35
-
36
- def initialize(token_number,required,index,line,column,mark)
37
- @token_number = token_number
38
- @required = required
39
- @index = index
40
- @line = line
41
- @column = column
42
- @mark = mark
33
+ class ReaderError < YAMLError
34
+ def initialize(name, position, character, encoding, reason)
35
+ @name = name
36
+ @position = position
37
+ @character = character
38
+ @encoding = encoding
39
+ @reason = reason
40
+ end
41
+
42
+ def to_s
43
+ if @character.__is_str
44
+ "'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
45
+ else
46
+ "unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
47
+ end
43
48
  end
44
49
  end
45
50
 
46
- module Scanner
47
- def initialize_scanner
48
- # It is assumed that Scanner and Reader will mixin to the same point.
49
- # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
- #
51
- # Reader supports the following methods
52
- # self.peek(i=0) # peek the next i-th character
53
- # self.prefix(l=1) # peek the next l characters
54
- # self.forward(l=1) # read the next l characters and move the pointer.
51
+ SimpleKey = Struct.new(:token_number, :required, :index, :line, :column, :mark)
55
52
 
53
+ class Scanner
54
+ attr_reader :column, :stream, :stream_pointer, :eof, :buffer, :pointer, :index, :line
55
+ def initialize(stream)
56
56
  # Had we reached the end of the stream?
57
57
  @done = false
58
58
 
59
59
  # The number of unclosed '{' and '['. `flow_level == 0` means block
60
60
  # context.
61
61
  @flow_level = 0
62
+ @flow_zero = true
62
63
 
63
64
  # List of processed tokens that are not yet emitted.
64
65
  @tokens = []
@@ -104,6 +105,199 @@ module RbYAML
104
105
  # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
106
  # '[', or '{' tokens.
106
107
  @possible_simple_keys = {}
108
+
109
+ @stream = nil
110
+ @stream_pointer = 0
111
+ @eof = true
112
+ @buffer = ""
113
+ @buffer_length = 0
114
+ @pointer = 0
115
+ @pointer1 = 1
116
+ @column = 0
117
+ if stream.__is_str
118
+ @name = "<string>"
119
+ @raw_buffer = stream
120
+ else
121
+ @stream = stream
122
+ @name = stream.respond_to?(:path) ? stream.path : stream.inspect
123
+ @eof = false
124
+ @raw_buffer = ""
125
+ end
126
+ end
127
+
128
+ def peek(index=0)
129
+ peekn(index)
130
+ end
131
+
132
+ def peek0
133
+ update(1) unless @pointer1 < @buffer_length
134
+ @buffer[@pointer]
135
+ end
136
+
137
+ def peek1
138
+ update(2) unless @pointer1+1 < @buffer_length
139
+ @buffer[@pointer1]
140
+ end
141
+
142
+ def peek2
143
+ update(3) unless @pointer1+2 < @buffer_length
144
+ @buffer[@pointer1+1]
145
+ end
146
+
147
+ def peek3
148
+ update(4) unless @pointer1+3 < @buffer_length
149
+ @buffer[@pointer1+2]
150
+ end
151
+
152
+ def peekn(index=0)
153
+ pix = @pointer1+index
154
+ unless pix < @buffer_length
155
+ update(index+1)
156
+ pix = @pointer1+index
157
+ end
158
+ @buffer[pix-1]
159
+ end
160
+
161
+ def prefix(length=1)
162
+ update(length) unless @pointer+length < @buffer_length
163
+ @buffer[@pointer...@pointer+length]
164
+ end
165
+
166
+ def prefix2()
167
+ update(2) unless @pointer1+1 < @buffer_length
168
+ @buffer[@pointer..@pointer1]
169
+ end
170
+
171
+ def forward(length=1)
172
+ case length
173
+ when 0: forward0
174
+ when 1: forward1
175
+ when 2: forward2
176
+ when 3: forward3
177
+ when 4: forward4
178
+ when 5: forward5
179
+ when 6: forward6
180
+ else forwardn(length)
181
+ end
182
+ end
183
+
184
+ def forward0
185
+ update(1) unless @pointer1 < @buffer_length
186
+ end
187
+
188
+ LINE_BR = "\n\x85"
189
+
190
+ def forward1
191
+ update(2) unless @pointer1+1 < @buffer_length
192
+ buff = @buffer[@pointer...@pointer1+1]
193
+ index = buff.rindex(LINE_BR_REG)
194
+ @column = index ? -index : column+1
195
+ @pointer += 1
196
+ @pointer1 += 1
197
+ end
198
+
199
+ def forward2
200
+ update(3) unless @pointer1+2 < @buffer_length
201
+ buff = @buffer[@pointer...@pointer1+2]
202
+ index = buff.rindex(LINE_BR_REG)
203
+ @column = index ? 1-index : column+2
204
+ @pointer += 2
205
+ @pointer1 += 2
206
+ end
207
+
208
+ def forward3
209
+ update(4) unless @pointer1+3 < @buffer_length
210
+ buff = @buffer[@pointer...@pointer1+3]
211
+ index = buff.rindex(LINE_BR_REG)
212
+ @column = index ? 2-index : column+3
213
+ @pointer += 3
214
+ @pointer1 += 3
215
+ end
216
+
217
+ def forward4
218
+ update(5) unless @pointer1+4 < @buffer_length
219
+ buff = @buffer[@pointer...@pointer1+4]
220
+ index = buff.rindex(LINE_BR_REG)
221
+ @column = index ? 3-index : column+4
222
+ @pointer += 4
223
+ @pointer1 += 4
224
+ end
225
+
226
+ def forward5
227
+ update(6) unless @pointer1+5 < @buffer_length
228
+ buff = @buffer[@pointer...@pointer1+5]
229
+ index = buff.rindex(LINE_BR_REG)
230
+ @column = index ? 4-index : column+5
231
+ @pointer += 5
232
+ @pointer1 += 5
233
+ end
234
+
235
+ def forward6
236
+ update(7) unless @pointer1+6 < @buffer_length
237
+ buff = @buffer[@pointer...@pointer1+6]
238
+ index = buff.rindex(LINE_BR_REG)
239
+ @column = index ? 5-index : column+6
240
+ @pointer += 6
241
+ @pointer1 += 6
242
+ end
243
+
244
+ LINE_BR_REG = /[\n\x85]|(?:\r[^\n])/
245
+ def forwardn(length)
246
+ update(length + 1) unless @pointer1+length < @buffer_length
247
+ buff = @buffer[@pointer...@pointer+length]
248
+ index = buff.rindex(LINE_BR_REG)
249
+ @column = index ? (length-index)-1 : column+length
250
+ @pointer += length
251
+ @pointer1 += length
252
+ end
253
+
254
+ def get_mark
255
+ if @stream.nil?
256
+ Mark.new(@name,@column,@buffer,@pointer)
257
+ else
258
+ Mark.new(@name,@column,nil,nil)
259
+ end
260
+ end
261
+
262
+ NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
263
+ def check_printable(data)
264
+ if NON_PRINTABLE =~ data
265
+ position = @buffer.length-@pointer+($~.offset(0)[0])
266
+ raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
267
+ end
268
+ end
269
+
270
+
271
+ def update(length)
272
+ return if @raw_buffer.nil?
273
+ @buffer = @buffer[@pointer..-1]
274
+ @pointer = 0
275
+ while @buffer.length < length
276
+ unless @eof
277
+ data = @stream.read(1024)
278
+ if data && !data.empty?
279
+ @buffer << data
280
+ @stream_pointer += data.length
281
+ @raw_buffer = ""
282
+ else
283
+ @eof = true
284
+ @buffer << ?\0
285
+ @raw_buffer = nil
286
+ break
287
+ end
288
+ else
289
+ @buffer << @raw_buffer << ?\0
290
+ @raw_buffer = nil
291
+ break
292
+ end
293
+ end
294
+ @buffer_length = @buffer.length
295
+ if @eof
296
+ check_printable(@buffer[(-length)..-2])
297
+ else
298
+ check_printable(@buffer[(-length)..-1])
299
+ end
300
+ @pointer1 = @pointer+1
107
301
  end
108
302
 
109
303
  def check_token(*choices)
@@ -144,70 +338,50 @@ module RbYAML
144
338
 
145
339
  def need_more_tokens
146
340
  return false if @done
147
- return true if @tokens.empty?
148
- # The current token may be a potential simple key, so we
149
- # need to look further.
150
- stale_possible_simple_keys
151
- return true if next_possible_simple_key == @tokens_taken
341
+ @tokens.empty? || next_possible_simple_key == @tokens_taken
152
342
  end
153
343
 
344
+ ENDING = /^---[\0 \t\r\n\x85]$/
345
+ START = /^\.\.\.[\0 \t\r\n\x85]$/
346
+ NULL_OR_OTHER = "\0 \t\r\n\x85"
347
+ BEG = /^([^\0 \t\r\n\x85\-?:,\[\]{}#&*!|>'"%@`]|([\-?:][^\0 \t\r\n\x85]))/
154
348
  def fetch_more_tokens
155
349
  # Eat whitespaces and comments until we reach the next token.
156
350
  scan_to_next_token
157
351
 
158
352
  # Remove obsolete possible simple keys.
159
- stale_possible_simple_keys
353
+ # stale_possible_simple_keys
160
354
 
161
355
  # Compare the current indentation and column. It may add some tokens
162
356
  # and decrease the current indentation level.
163
357
  unwind_indent(@column)
164
358
 
165
359
  # Peek the next character.
166
- ch = peek
167
-
168
- return case
169
- # Is it the end of stream?
170
- when ch == ?\0: fetch_stream_end
171
- # Is it a directive?
172
- when ch == ?% && check_directive: fetch_directive
173
- # Is it the document start?
174
- when ch == ?- && check_document_start: fetch_document_start
175
- # Is it the document end?
176
- when ch == ?. && check_document_end: fetch_document_end
177
- # Is it the flow sequence start indicator?
178
- when ch == ?[: fetch_flow_sequence_start
179
- # Is it the flow mapping start indicator?
180
- when ch == ?{: fetch_flow_mapping_start
181
- # Is it the flow sequence end indicator?
182
- when ch == ?]: fetch_flow_sequence_end
183
- # Is it the flow mapping end indicator?
184
- when ch == ?}: fetch_flow_mapping_end
185
- # Is it the flow entry indicator?
186
- when ch == ?,: fetch_flow_entry
187
- # Is it the block entry indicator?
188
- when ch == ?- && check_block_entry: fetch_block_entry
189
- # Is it the key indicator?
190
- when ch == ?? && check_key: fetch_key
191
- # Is it the value indicator?
192
- when ch == ?: && check_value: fetch_value
193
- # Is it an alias?
194
- when ch == ?*: fetch_alias
195
- # Is it an anchor?
196
- when ch == ?&: fetch_anchor
197
- # Is it a tag?
198
- when ch == ?!: fetch_tag
199
- # Is it a literal scalar?
200
- when ch == ?| && @flow_level==0: fetch_literal
201
- # Is it a folded scalar?
202
- when ch == ?> && @flow_level==0: fetch_folded
203
- # Is it a single quoted scalar?
204
- when ch == ?': fetch_single
205
- # Is it a double quoted scalar?
206
- when ch == ?": fetch_double
207
- # It must be a plain scalar then.
208
- when check_plain: fetch_plain
209
- else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
- end
360
+ ch = peek0
361
+ colz = @column == 0
362
+
363
+ case ch
364
+ when ?\0: return fetch_stream_end
365
+ when ?': return fetch_single
366
+ when ?": return fetch_double
367
+ when ??: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_key end
368
+ when ?:: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_value end
369
+ when ?%: if colz: return fetch_stream_end end
370
+ when ?-: if colz && ENDING =~ prefix(4): return fetch_document_start; elsif NULL_OR_OTHER.include?(peek1): return fetch_block_entry end
371
+ when ?.: if colz && START =~ prefix(4): return fetch_document_end end
372
+ when ?[: return fetch_flow_sequence_start
373
+ when ?{: return fetch_flow_mapping_start
374
+ when ?]: return fetch_flow_sequence_end
375
+ when ?}: return fetch_flow_mapping_end
376
+ when ?,: return fetch_flow_entry
377
+ when ?*: return fetch_alias
378
+ when ?&: return fetch_anchor
379
+ when ?!: return fetch_tag
380
+ when ?|: if @flow_zero: return fetch_literal end
381
+ when ?>: if @flow_zero: return fetch_folded end
382
+ end
383
+ return fetch_plain if BEG =~ prefix(2)
384
+ raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
211
385
  end
212
386
 
213
387
  # Simple keys treatment.
@@ -215,58 +389,22 @@ module RbYAML
215
389
  def next_possible_simple_key
216
390
  # Return the number of the nearest possible simple key. Actually we
217
391
  # don't need to loop through the whole dictionary.
218
- min_token_number = nil
219
- for level in @possible_simple_keys.keys
220
- key = @possible_simple_keys[level]
221
- if min_token_number.nil? || key.token_number < min_token_number
222
- min_token_number = key.token_number
223
- end
224
- end
225
- min_token_number
392
+ @possible_simple_keys.each_value {|key| return key.token_number if key.token_number}
393
+ nil
226
394
  end
227
395
 
228
- def stale_possible_simple_keys
229
- # Remove entries that are no longer possible simple keys. According to
230
- # the YAML specification, simple keys
231
- # - should be limited to a single line,
232
- # - should be no longer than 1024 characters.
233
- # Disabling this procedure will allow simple keys of any length and
234
- # height (may cause problems if indentation is broken though).
235
- @possible_simple_keys.delete_if {|level,key|
236
- if key.line != @line || @index-key.index > 1024
237
- raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
- return true
239
- end
240
- return false
241
- }
242
- end
243
-
244
396
  def save_possible_simple_key
245
397
  # The next token may start a simple key. We check if it's possible
246
398
  # and save its position. This function is called for
247
399
  # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
-
249
- # Check if a simple key is required at the current position.
250
- required = @flow_level==0 && @indent == @column
251
-
252
400
  # The next token might be a simple key. Let's save it's number and
253
401
  # position.
254
- if @allow_simple_key
255
- remove_possible_simple_key
256
- token_number = @tokens_taken+@tokens.length
257
- key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
- @possible_simple_keys[@flow_level] = key
259
- end
402
+ @possible_simple_keys[@flow_level] = SimpleKey.new(@tokens_taken+@tokens.length, @flow_zero && @indent == @column,index,line,column,get_mark) if @allow_simple_key
260
403
  end
261
404
 
262
- def remove_possible_simple_key
263
- # Remove the saved possible key position at the current flow level.
264
- key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
- end
266
-
267
405
  # Indentation functions.
268
406
 
269
- def unwind_indent(column)
407
+ def unwind_indent(col)
270
408
  ## In flow context, tokens should respect indentation.
271
409
  ## Actually the condition should be `@indent >= column` according to
272
410
  ## the spec. But this condition will prohibit intuitively correct
@@ -280,11 +418,11 @@ module RbYAML
280
418
 
281
419
  # In the flow context, indentation is ignored. We make the scanner less
282
420
  # restrictive then specification requires.
283
- return nil if @flow_level != 0
421
+ return nil if !@flow_zero
284
422
  # In block context, we may need to issue the BLOCK-END tokens.
285
- while @indent > column
423
+ while @indent > col
286
424
  mark = get_mark
287
- @indent = @indents.pop()
425
+ @indent = @indents.pop
288
426
  @tokens << BlockEndToken.new(mark, mark)
289
427
  end
290
428
  end
@@ -329,7 +467,6 @@ module RbYAML
329
467
  # Set the current intendation to -1.
330
468
  unwind_indent(-1)
331
469
  # Reset simple keys.
332
- remove_possible_simple_key
333
470
  @allow_simple_key = false
334
471
  # Scan and add DIRECTIVE.
335
472
  @tokens << scan_directive
@@ -348,11 +485,10 @@ module RbYAML
348
485
  unwind_indent(-1)
349
486
  # Reset simple keys. Note that there could not be a block collection
350
487
  # after '---'.
351
- remove_possible_simple_key
352
488
  @allow_simple_key = false
353
489
  # Add DOCUMENT-START or DOCUMENT-END.
354
490
  start_mark = get_mark
355
- forward(3)
491
+ forward3
356
492
  end_mark = get_mark
357
493
  @tokens << token.new(start_mark, end_mark)
358
494
  end
@@ -370,11 +506,12 @@ module RbYAML
370
506
  save_possible_simple_key
371
507
  # Increase the flow level.
372
508
  @flow_level += 1
509
+ @flow_zero = false
373
510
  # Simple keys are allowed after '[' and '{'.
374
511
  @allow_simple_key = true
375
512
  # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
513
  start_mark = get_mark
377
- forward
514
+ forward1
378
515
  end_mark = get_mark
379
516
  @tokens << token.new(start_mark, end_mark)
380
517
  end
@@ -388,15 +525,16 @@ module RbYAML
388
525
  end
389
526
 
390
527
  def fetch_flow_collection_end(token)
391
- # Reset possible simple key on the current level.
392
- remove_possible_simple_key
393
528
  # Decrease the flow level.
394
529
  @flow_level -= 1
530
+ if @flow_level == 0
531
+ @flow_zero = true
532
+ end
395
533
  # No simple keys after ']' or '}'.
396
534
  @allow_simple_key = false
397
535
  # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
536
  start_mark = get_mark
399
- forward
537
+ forward1
400
538
  end_mark = get_mark
401
539
  @tokens << token.new(start_mark, end_mark)
402
540
  end
@@ -404,21 +542,19 @@ module RbYAML
404
542
  def fetch_flow_entry
405
543
  # Simple keys are allowed after ','.
406
544
  @allow_simple_key = true
407
- # Reset possible simple key on the current level.
408
- remove_possible_simple_key
409
545
  # Add FLOW-ENTRY.
410
546
  start_mark = get_mark
411
- forward
547
+ forward1
412
548
  end_mark = get_mark
413
549
  @tokens << FlowEntryToken.new(start_mark, end_mark)
414
550
  end
415
551
 
416
552
  def fetch_block_entry
417
553
  # Block context needs additional checks.
418
- if @flow_level==0
554
+ if @flow_zero
419
555
  raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
556
  # We may need to add BLOCK-SEQUENCE-START.
421
- if add_indent(@column)
557
+ if add_indent(column)
422
558
  mark = get_mark
423
559
  @tokens << BlockSequenceStartToken.new(mark, mark)
424
560
  end
@@ -427,67 +563,63 @@ module RbYAML
427
563
  end
428
564
  # Simple keys are allowed after '-'.
429
565
  @allow_simple_key = true
430
- # Reset possible simple key on the current level.
431
- remove_possible_simple_key
432
566
  # Add BLOCK-ENTRY.
433
567
  start_mark = get_mark
434
- forward
568
+ forward1
435
569
  end_mark = get_mark
436
570
  @tokens << BlockEntryToken.new(start_mark, end_mark)
437
571
  end
438
572
 
439
573
  def fetch_key
440
574
  # Block context needs additional checks.
441
- if @flow_level==0
575
+ if @flow_zero
442
576
  # Are we allowed to start a key (not nessesary a simple)?
443
577
  raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
578
  # We may need to add BLOCK-MAPPING-START.
445
- if add_indent(@column)
579
+ if add_indent(column)
446
580
  mark = get_mark
447
581
  @tokens << BlockMappingStartToken.new(mark, mark)
448
582
  end
449
583
  end
450
584
  # Simple keys are allowed after '?' in the block context.
451
- @allow_simple_key = @flow_level==0
452
- # Reset possible simple key on the current level.
453
- remove_possible_simple_key
585
+ @allow_simple_key = @flow_zero
454
586
  # Add KEY.
455
587
  start_mark = get_mark
456
- forward
588
+ forward1
457
589
  end_mark = get_mark
458
590
  @tokens << KeyToken.new(start_mark, end_mark)
459
591
  end
460
592
 
461
593
  def fetch_value
594
+ key = @possible_simple_keys[@flow_level]
462
595
  # Do we determine a simple key?
463
- if @possible_simple_keys.include?(@flow_level)
464
- # Add KEY.
465
- key = @possible_simple_keys[@flow_level]
466
- @possible_simple_keys.delete(@flow_level)
467
- @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
- # If this key starts a new block mapping, we need to add
469
- # BLOCK-MAPPING-START.
470
- @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
- # There cannot be two simple keys one after another.
472
- @allow_simple_key = false
473
- # It must be a part of a complex key.
474
- else
596
+ if key.nil?
475
597
  # Block context needs additional checks.
476
598
  # (Do we really need them? They will be catched by the parser
477
599
  # anyway.)
478
- if @flow_level==0
600
+ if @flow_zero
479
601
  # We are allowed to start a complex value if and only if
480
602
  # we can start a simple key.
481
603
  raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
604
  # Simple keys are allowed after ':' in the block context.
483
- @allow_simple_key = @flow_level==0
484
- # Reset possible simple key on the current level.
485
- remove_possible_simple_key
605
+ @allow_simple_key = true
486
606
  end
607
+ else
608
+ # Add KEY.
609
+ @possible_simple_keys.delete(@flow_level)
610
+
611
+ # If this key starts a new block mapping, we need to add
612
+ # BLOCK-MAPPING-START.
613
+ se = (@flow_zero && add_indent(key.column)) ? [BlockMappingStartToken.new(key.mark, key.mark)] : []
614
+ se << KeyToken.new(key.mark, key.mark)
615
+ @tokens.insert(key.token_number-@tokens_taken,*se)
616
+ # There cannot be two simple keys one after another.
617
+ @allow_simple_key = false
618
+ # It must be a part of a complex key.
487
619
  end
488
620
  # Add VALUE.
489
621
  start_mark = get_mark
490
- forward
622
+ forward1
491
623
  end_mark = get_mark
492
624
  @tokens << ValueToken.new(start_mark, end_mark)
493
625
  end
@@ -530,8 +662,6 @@ module RbYAML
530
662
  def fetch_block_scalar(style)
531
663
  # A simple key may follow a block scalar.
532
664
  @allow_simple_key = true
533
- # Reset possible simple key on the current level.
534
- remove_possible_simple_key
535
665
  # Scan and add SCALAR.
536
666
  @tokens << scan_block_scalar(style)
537
667
  end
@@ -564,65 +694,9 @@ module RbYAML
564
694
  @tokens << scan_plain
565
695
  end
566
696
 
567
- # Checkers.
568
-
569
- def check_directive
570
- # DIRECTIVE: ^ '%' ...
571
- # The '%' indicator is already checked.
572
- @column == 0
573
- end
574
-
575
- def check_document_start
576
- # DOCUMENT-START: ^ '---' (' '|'\n')
577
- @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
- end
579
-
580
- def check_document_end
581
- # DOCUMENT-END: ^ '...' (' '|'\n')
582
- @column == 0 && prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
583
- end
584
-
585
- def check_block_entry
586
- # BLOCK-ENTRY: '-' (' '|'\n')
587
- "\0 \t\r\n\x85".include?(peek(1))
588
- end
589
-
590
- def check_key
591
- # KEY(flow context): '?'
592
- # KEY(block context): '?' (' '|'\n')
593
- @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
594
- end
595
-
596
- def check_value
597
- # VALUE(flow context): ':'
598
- # VALUE(block context): ':' (' '|'\n')
599
- @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
600
- end
601
-
602
- def check_plain
603
- # A plain scalar may start with any non-space character except:
604
- # '-', '?', ':', ',', '[', ']', '{', '}',
605
- # '#', '&', '*', '!', '|', '>', '\'', '\"',
606
- # '%', '@', '`'.
607
- #
608
- # It may also start with
609
- # '-', '?', ':'
610
- # if it is followed by a non-space character.
611
- #
612
- # Note that we limit the last rule to the block context (except the
613
- # '-' character) because we want the flow context to be space
614
- # independent.
615
- ch = peek
616
- !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
617
- end
618
-
619
-
620
-
621
-
622
-
623
697
 
624
698
  # Scanners.
625
-
699
+ NULL_OR_LINEBR = "\0\r\n\x85"
626
700
  def scan_to_next_token
627
701
  # We ignore spaces, line breaks and comments.
628
702
  # If we find a line break in the block context, we set the flag
@@ -638,18 +712,23 @@ module RbYAML
638
712
  # We also need to add the check for `allow_simple_keys == true` to
639
713
  # `unwind_indent` before issuing BLOCK-END.
640
714
  # Scanners for block, flow, and plain scalars need to be modified.
641
- found = false
642
- while !found
643
- while peek == 32
644
- forward
715
+ while true
716
+ peek_0 = peek0
717
+ while peek_0 == 32
718
+ forward1
719
+ peek_0 = peek0
645
720
  end
646
- if peek == ?#
647
- forward while !"\0\r\n\x85".include?(peek)
721
+ if peek_0 == ?#
722
+ while !NULL_OR_LINEBR.include?(peek0)
723
+ forward1
724
+ peek_0 = peek0
725
+ end
648
726
  end
727
+
649
728
  if !scan_line_break.empty?
650
- @allow_simple_key = true if @flow_level==0
729
+ @allow_simple_key = true if @flow_zero
651
730
  else
652
- found = true
731
+ break
653
732
  end
654
733
  end
655
734
  end
@@ -657,7 +736,7 @@ module RbYAML
657
736
  def scan_directive
658
737
  # See the specification for details.
659
738
  start_mark = get_mark
660
- forward
739
+ forward1
661
740
  name = scan_directive_name(start_mark)
662
741
  value = nil
663
742
  if name == "YAML"
@@ -668,45 +747,50 @@ module RbYAML
668
747
  end_mark = get_mark
669
748
  else
670
749
  end_mark = get_mark
671
- forward while !"\0\r\n\x85".include?(peek)
750
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
672
751
  end
673
752
  scan_directive_ignored_line(start_mark)
674
753
  DirectiveToken.new(name, value, start_mark, end_mark)
675
754
  end
676
755
 
756
+ ALPHA_REG = /[-0-9A-Za-z_]/
757
+ NULL_BL_LINEBR = "\0 \r\n\x85"
758
+ NULL_BL_T_LINEBR = "\0 \t\r\n\x85"
677
759
  def scan_directive_name(start_mark)
678
760
  # See the specification for details.
679
761
  length = 0
680
762
  ch = peek(length)
681
- while /[-0-9A-Za-z_]/ =~ ch.chr
763
+ zlen = false
764
+ while ALPHA_REG =~ ch.chr
765
+ zlen = true
682
766
  length += 1
683
767
  ch = peek(length)
684
768
  end
685
- raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
769
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if zlen
686
770
  value = prefix(length)
687
771
  forward(length)
688
- ch = peek()
689
- raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
772
+ ch = peek0
773
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !NULL_BL_LINEBR.include?(ch)
690
774
  value
691
775
  end
692
776
 
693
777
  def scan_yaml_directive_value(start_mark)
694
778
  # See the specification for details.
695
- forward while peek == 32
779
+ forward1 while peek0 == 32
696
780
  major = scan_yaml_directive_number(start_mark)
697
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
698
- forward
781
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek0 != ?.
782
+ forward1
699
783
  minor = scan_yaml_directive_number(start_mark)
700
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
784
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
701
785
  [major, minor]
702
786
  end
703
787
 
704
788
  def scan_yaml_directive_number(start_mark)
705
789
  # See the specification for details.
706
- ch = peek
707
- raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
790
+ ch = peek0
791
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !(ch.__is_ascii_num)
708
792
  length = 0
709
- length += 1 while ((?0..?9) === peek(length))
793
+ length += 1 while (peek(length).__is_ascii_num)
710
794
  value = prefix(length)
711
795
  forward(length)
712
796
  value
@@ -714,9 +798,9 @@ module RbYAML
714
798
 
715
799
  def scan_tag_directive_value(start_mark)
716
800
  # See the specification for details.
717
- forward while peek == 32
801
+ forward1 while peek0 == 32
718
802
  handle = scan_tag_directive_handle(start_mark)
719
- forward while peek == 32
803
+ forward1 while peek0 == 32
720
804
  prefix = scan_tag_directive_prefix(start_mark)
721
805
  [handle, prefix]
722
806
  end
@@ -724,30 +808,30 @@ module RbYAML
724
808
  def scan_tag_directive_handle(start_mark)
725
809
  # See the specification for details.
726
810
  value = scan_tag_handle("directive", start_mark)
727
- ch = peek
728
- raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
811
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if peek0 != 32
729
812
  value
730
813
  end
731
814
 
732
815
  def scan_tag_directive_prefix(start_mark)
733
816
  # See the specification for details.
734
817
  value = scan_tag_uri("directive", start_mark)
735
- ch = peek
736
- raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
818
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{peek0}",get_mark()) if !NULL_BL_LINEBR.include?(peek0)
737
819
  value
738
820
  end
739
821
 
740
822
  def scan_directive_ignored_line(start_mark)
741
823
  # See the specification for details.
742
- forward while peek == 32
743
- if peek == ?#
744
- forward while !"\0\r\n\x85".include?(peek)
824
+ forward1 while peek0 == 32
825
+ if peek0 == ?#
826
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
745
827
  end
746
- ch = peek
747
- raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
828
+ ch = peek0
829
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{peek0.to_s}",get_mark()) if !NULL_OR_LINEBR.include?(peek0)
748
830
  scan_line_break
749
831
  end
750
-
832
+
833
+ NON_ALPHA = /[^-0-9A-Za-z_]/
834
+ NON_ALPHA_OR_NUM = "\0 \t\r\n\x85?:,]}%@`"
751
835
  def scan_anchor(token)
752
836
  # The specification does not restrict characters for anchors and
753
837
  # aliases. This may lead to problems, for instance, the document:
@@ -758,45 +842,47 @@ module RbYAML
758
842
  # [ *alias , "value" ]
759
843
  # Therefore we restrict aliases to numbers and ASCII letters.
760
844
  start_mark = get_mark
761
- indicator = peek
845
+ indicator = peek0
762
846
  name = (indicator == ?*) ? "alias":"anchor"
763
- forward
847
+ forward1
764
848
  length = 0
765
- ch = peek(length)
766
- while /[-0-9A-Za-z_]/ =~ ch.chr
767
- length += 1
768
- ch = peek(length)
849
+ chunk_size = 16
850
+ while true
851
+ chunk = prefix(chunk_size)
852
+ if length = (NON_ALPHA =~ chunk)
853
+ break
854
+ end
855
+ chunk_size += 16
769
856
  end
770
- raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
857
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found something else...",get_mark) if length==0
771
858
  value = prefix(length)
772
859
  forward(length)
773
- ch = peek
774
- if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
775
- raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
860
+ if !NON_ALPHA_OR_NUM.include?(peek0)
861
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{peek0}",get_mark)
776
862
  end
777
863
  end_mark = get_mark
778
864
  token.new(value, start_mark, end_mark)
779
865
  end
780
866
 
781
-
867
+ NULL_T_BL_LINEBR = "\0 \t\r\n\x85"
782
868
  def scan_tag
783
869
  # See the specification for details.
784
870
  start_mark = get_mark
785
- ch = peek(1)
871
+ ch = peek1
786
872
  if ch == ?<
787
873
  handle = nil
788
- forward(2)
874
+ forward2
789
875
  suffix = scan_tag_uri("tag", start_mark)
790
- raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
791
- forward
792
- elsif "\0 \t\r\n\x85".include?(ch)
876
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek0 != ?>
877
+ forward1
878
+ elsif NULL_T_BL_LINEBR.include?(ch)
793
879
  handle = nil
794
880
  suffix = "!"
795
- forward
881
+ forward1
796
882
  else
797
883
  length = 1
798
884
  use_handle = false
799
- while !"\0 \t\r\n\x85".include?(ch)
885
+ while !NULL_T_BL_LINEBR.include?(ch)
800
886
  if ch == ?!
801
887
  use_handle = true
802
888
  break
@@ -809,24 +895,24 @@ module RbYAML
809
895
  handle = scan_tag_handle("tag", start_mark)
810
896
  else
811
897
  handle = "!"
812
- forward
898
+ forward1
813
899
  end
814
900
  suffix = scan_tag_uri("tag", start_mark)
815
901
  end
816
- ch = peek
817
- raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
902
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
818
903
  value = [handle, suffix]
819
904
  end_mark = get_mark
820
905
  TagToken.new(value, start_mark, end_mark)
821
906
  end
822
907
 
908
+ BLANK_T = " \t"
823
909
  def scan_block_scalar(style)
824
910
  # See the specification for details.
825
911
  folded = style== ?>
826
912
  chunks = []
827
913
  start_mark = get_mark
828
914
  # Scan the header.
829
- forward
915
+ forward1
830
916
  chomping, increment = scan_block_scalar_indicators(start_mark)
831
917
  scan_block_scalar_ignored_line(start_mark)
832
918
  # Determine the indentation level and go to the first non-empty line.
@@ -841,20 +927,20 @@ module RbYAML
841
927
  end
842
928
  line_break = ''
843
929
  # Scan the inner part of the block scalar.
844
- while @column == indent and peek != ?\0
930
+ while column == indent and peek0 != ?\0
845
931
  chunks += breaks
846
- leading_non_space = !" \t".include?(peek)
932
+ leading_non_space = !BLANK_T.include?(peek0)
847
933
  length = 0
848
- length += 1 while !"\0\r\n\x85".include?(peek(length))
934
+ length += 1 while !NULL_OR_LINEBR.include?(peek(length))
849
935
  chunks << prefix(length)
850
936
  forward(length)
851
937
  line_break = scan_line_break
852
938
  breaks, end_mark = scan_block_scalar_breaks(indent)
853
- if @column == indent && peek != 0
939
+ if column == indent && peek0 != 0
854
940
  # Unfortunately, folding rules are ambiguous.
855
941
  #
856
942
  # This is the folding according to the specification:
857
- if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
943
+ if folded && line_break == ?\n && leading_non_space && !BLANK_T.include?(peek0)
858
944
  chunks << ' ' if breaks.empty?
859
945
  else
860
946
  chunks << line_break
@@ -882,76 +968,76 @@ module RbYAML
882
968
  end
883
969
 
884
970
  # We are done.
885
- ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
971
+ ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
886
972
  end
887
973
 
974
+ PLUS_MIN = /[+-]/
888
975
  def scan_block_scalar_indicators(start_mark)
889
976
  # See the specification for details.
890
977
  chomping = nil
891
978
  increment = nil
892
- ch = peek
893
- if /[+-]/ =~ ch.chr
979
+ ch = peek0
980
+ if PLUS_MIN =~ ch.chr
894
981
  chomping = ch == ?+
895
- forward
896
- ch = peek
897
- if (?0..?9) === ch
982
+ forward1
983
+ ch = peek0
984
+ if ch.__is_ascii_num
898
985
  increment = ch.to_i
899
986
  raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
900
- forward
987
+ forward1
901
988
  end
902
- elsif (?0..?9) === ch
989
+ elsif ch.__is_ascii_num
903
990
  increment = ch
904
991
  raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
905
- forward
906
- ch = peek
907
- if /[+-]/ =~ ch.chr
992
+ forward1
993
+ ch = peek0
994
+ if PLUS_MIN =~ ch.chr
908
995
  chomping = ch == ?+
909
- forward
996
+ forward1
910
997
  end
911
998
  end
912
- ch = peek
913
- raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
999
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{peek0}",get_mark) if !NULL_BL_LINEBR.include?(peek0)
914
1000
  [chomping, increment]
915
1001
  end
916
1002
 
917
1003
  def scan_block_scalar_ignored_line(start_mark)
918
1004
  # See the specification for details.
919
- forward while peek == 32
920
- if peek == ?#
921
- forward while !"\0\r\n\x85".include?(peek)
1005
+ forward1 while peek0 == 32
1006
+ if peek0 == ?#
1007
+ forward1 while !NULL_OR_LINEBR.include?(peek0)
922
1008
  end
923
- ch = peek
924
-
925
- raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
1009
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{peek0}",get_mark) if !NULL_OR_LINEBR.include?(peek0)
926
1010
  scan_line_break
927
1011
  end
928
1012
 
1013
+ BLANK_OR_LINEBR = " \r\n\x85"
929
1014
  def scan_block_scalar_indentation
930
1015
  # See the specification for details.
931
1016
  chunks = []
932
1017
  max_indent = 0
933
1018
  end_mark = get_mark
934
- while " \r\n\x85".include?(peek)
935
- if peek != 32
1019
+ while BLANK_OR_LINEBR.include?(peek0)
1020
+ if peek0 != 32
936
1021
  chunks << scan_line_break
937
1022
  end_mark = get_mark
938
1023
  else
939
- forward
940
- max_indent = @column if @column > max_indent
1024
+ forward1
1025
+ max_indent = column if column > max_indent
941
1026
  end
942
1027
  end
943
1028
  [chunks, max_indent, end_mark]
944
1029
  end
945
1030
 
1031
+ FULL_LINEBR = "\r\n\x85"
946
1032
  def scan_block_scalar_breaks(indent)
947
1033
  # See the specification for details.
948
1034
  chunks = []
949
1035
  end_mark = get_mark
950
- forward while @column < indent && peek == 32
951
- while "\r\n\x85".include?(peek)
1036
+ forward1 while @column < indent && peek0 == 32
1037
+ while FULL_LINEBR.include?(peek0)
952
1038
  chunks << scan_line_break
953
1039
  end_mark = get_mark
954
- forward while @column < indent && peek == 32
1040
+ forward1 while @column < indent && peek0 == 32
955
1041
  end
956
1042
  [chunks, end_mark]
957
1043
  end
@@ -966,16 +1052,16 @@ module RbYAML
966
1052
  double = style == ?"
967
1053
  chunks = []
968
1054
  start_mark = get_mark
969
- quote = peek
970
- forward
1055
+ quote = peek0
1056
+ forward1
971
1057
  chunks += scan_flow_scalar_non_spaces(double, start_mark)
972
- while peek != quote
1058
+ while peek0 != quote
973
1059
  chunks += scan_flow_scalar_spaces(double, start_mark)
974
1060
  chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
1061
  end
976
- forward
1062
+ forward1
977
1063
  end_mark = get_mark
978
- ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
1064
+ ScalarToken.new(chunks.to_s, false, start_mark, end_mark,style)
979
1065
  end
980
1066
 
981
1067
  ESCAPE_REPLACEMENTS = {
@@ -1000,42 +1086,43 @@ module RbYAML
1000
1086
  'x' => 2
1001
1087
  }
1002
1088
 
1089
+ SPACES_AND_STUFF = "'\"\\\0 \t\r\n\x85"
1090
+ DOUBLE_ESC = "\"\\"
1091
+ NOT_HEXA = /[^0-9A-Fa-f]/
1003
1092
  def scan_flow_scalar_non_spaces(double, start_mark)
1004
1093
  # See the specification for details.
1005
1094
  chunks = []
1006
1095
  while true
1007
1096
  length = 0
1008
- length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1097
+ length += 1 while !SPACES_AND_STUFF.include?(peek(length))
1009
1098
  if length!=0
1010
1099
  chunks << prefix(length)
1011
1100
  forward(length)
1012
1101
  end
1013
- ch = peek
1014
- if !double && ch == ?' && peek(1) == ?'
1102
+ ch = peek0
1103
+ if !double && ch == ?' && peek1 == ?'
1015
1104
  chunks << ?'
1016
- forward(2)
1017
- elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1105
+ forward2
1106
+ elsif (double && ch == ?') || (!double && DOUBLE_ESC.include?(ch))
1018
1107
  chunks << ch
1019
- forward
1108
+ forward1
1020
1109
  elsif double && ch == ?\\
1021
- forward
1022
- ch = peek
1110
+ forward1
1111
+ ch = peek0
1023
1112
  if ESCAPE_REPLACEMENTS.member?(ch.chr)
1024
1113
  chunks << ESCAPE_REPLACEMENTS[ch.chr]
1025
- forward
1114
+ forward1
1026
1115
  elsif ESCAPE_CODES.member?(ch.chr)
1027
1116
  length = ESCAPE_CODES[ch.chr]
1028
- forward
1029
- length.times do |k|
1030
- if /[0-9A-Fa-f]/ !~ peek(k).chr
1031
- raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1032
- "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1033
- end
1117
+ forward1
1118
+ if NOT_HEXA =~ prefix(length)
1119
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1120
+ "expected escape sequence of #{length} hexdecimal numbers, but found something else: #{prefix(length)}}",get_mark)
1034
1121
  end
1035
1122
  code = prefix(length).to_i.to_s(16)
1036
1123
  chunks << code
1037
1124
  forward(length)
1038
- elsif "\r\n\x85".include?(ch)
1125
+ elsif FULL_LINEBR.include?(ch)
1039
1126
  scan_line_break
1040
1127
  chunks += scan_flow_scalar_breaks(double, start_mark)
1041
1128
  else
@@ -1051,13 +1138,13 @@ module RbYAML
1051
1138
  # See the specification for details.
1052
1139
  chunks = []
1053
1140
  length = 0
1054
- length += 1 while /[ \t]/ =~ peek(length).chr
1141
+ length += 1 while BLANK_T.include?(peek(length))
1055
1142
  whitespaces = prefix(length)
1056
1143
  forward(length)
1057
- ch = peek
1144
+ ch = peek0
1058
1145
  if ch == ?\0
1059
1146
  raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1060
- elsif "\r\n\x85".include?(ch)
1147
+ elsif FULL_LINEBR.include?(ch)
1061
1148
  line_break = scan_line_break
1062
1149
  breaks = scan_flow_scalar_breaks(double, start_mark)
1063
1150
  if line_break != ?\n
@@ -1079,17 +1166,22 @@ module RbYAML
1079
1166
  # Instead of checking indentation, we check for document
1080
1167
  # separators.
1081
1168
  prefix = prefix(3)
1082
- if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1169
+ if (prefix == "---" || prefix == "...") &&NULL_BL_T_LINEBR.include?(peek3)
1083
1170
  raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1084
1171
  end
1085
- forward while /[ \t]/ =~ peek.chr
1086
- if "\r\n\x85".include?(peek)
1172
+ forward1 while BLANK_T.include?(peek0)
1173
+ if FULL_LINEBR.include?(peek0)
1087
1174
  chunks << scan_line_break
1088
1175
  else
1089
1176
  return chunks
1090
1177
  end
1091
1178
  end
1092
1179
  end
1180
+
1181
+
1182
+ R_flowzero = /[\0 \t\r\n\x85]|(:[\0 \t\r\n\x28])/
1183
+ R_flownonzero = /[\0 \t\r\n\x85\[\]{},:?]/
1184
+ S4 = "\0 \t\r\n\x28[]{}"
1093
1185
 
1094
1186
  def scan_plain
1095
1187
  # See the specification for details.
@@ -1098,25 +1190,25 @@ module RbYAML
1098
1190
  # We also keep track of the `allow_simple_key` flag here.
1099
1191
  # Indentation rules are loosed for the flow context.
1100
1192
  chunks = []
1101
- start_mark = get_mark
1102
- end_mark = start_mark
1193
+ end_mark = start_mark = get_mark
1103
1194
  indent = @indent+1
1104
1195
  # We allow zero indentation for scalars, but then we need to check for
1105
1196
  # document separators at the beginning of the line.
1106
1197
  #if indent == 0
1107
1198
  # indent = 1
1108
1199
  spaces = []
1109
- while true
1200
+ if @flow_zero
1201
+ f_nzero, r_check = false, R_flowzero
1202
+ else
1203
+ f_nzero, r_check = true, R_flownonzero
1204
+ end
1205
+
1206
+ while peek0 != ?#
1110
1207
  length = 0
1111
- break if peek == ?#
1112
- while true
1113
- ch = peek(length)
1114
- if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1115
- break
1116
- end
1117
- length += 1
1118
- end
1119
- if @flow_level != 0 && ch == ?: && !"\0 \t\r\n\x28[]{}".include?(peek(length+1))
1208
+ chunk_size = 32
1209
+ chunk_size += 32 until length = (r_check =~ prefix(chunk_size))
1210
+ ch = peek(length)
1211
+ if f_nzero && ch == ?: && !S4.include?(peek(length+1))
1120
1212
  forward(length)
1121
1213
  raise ScannerError.new("while scanning a plain scalar",start_mark,"found unexpected ':'",get_mark,"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1122
1214
  end
@@ -1127,11 +1219,12 @@ module RbYAML
1127
1219
  forward(length)
1128
1220
  end_mark = get_mark
1129
1221
  spaces = scan_plain_spaces(indent, start_mark)
1130
- break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1222
+ break if !spaces || (@flow_zero && @column < indent)
1131
1223
  end
1132
- return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1224
+ return ScalarToken.new(chunks.to_s, true, start_mark, end_mark)
1133
1225
  end
1134
1226
 
1227
+ END_OR_START = /^(---|\.\.\.)[\0 \t\r\n\x85]$/
1135
1228
  def scan_plain_spaces(indent, start_mark)
1136
1229
  # See the specification for details.
1137
1230
  # The specification is really confusing about tabs in plain scalars.
@@ -1141,20 +1234,18 @@ module RbYAML
1141
1234
  length += 1 while peek(length) == 32
1142
1235
  whitespaces = prefix(length)
1143
1236
  forward(length)
1144
- ch = peek
1145
- if "\r\n\x85".include?(ch)
1237
+ ch = peek0
1238
+ if FULL_LINEBR.include?(ch)
1146
1239
  line_break = scan_line_break
1147
1240
  @allow_simple_key = true
1148
- prefix = prefix(3)
1149
- return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1241
+ return if END_OR_START =~ prefix(4)
1150
1242
  breaks = []
1151
- while " \r\n\x85".include?(peek)
1152
- if peek == 32
1153
- forward
1243
+ while BLANK_OR_LINEBR.include?(peek0)
1244
+ if peek0 == 32
1245
+ forward1
1154
1246
  else
1155
1247
  breaks << scan_line_break
1156
- prefix = prefix(3)
1157
- return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1248
+ return if END_OR_START =~ prefix(4)
1158
1249
  end
1159
1250
  end
1160
1251
  if line_break != '\n'
@@ -1163,22 +1254,23 @@ module RbYAML
1163
1254
  chunks << ' '
1164
1255
  end
1165
1256
  chunks += breaks
1166
- elsif !whitespaces.empty?
1257
+ else
1167
1258
  chunks << whitespaces
1168
1259
  end
1169
1260
  chunks
1170
1261
  end
1171
1262
 
1263
+
1172
1264
  def scan_tag_handle(name, start_mark)
1173
1265
  # See the specification for details.
1174
1266
  # For some strange reasons, the specification does not allow '_' in
1175
1267
  # tag handles. I have allowed it anyway.
1176
- ch = peek
1268
+ ch = peek0
1177
1269
  raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1178
1270
  length = 1
1179
1271
  ch = peek(length)
1180
1272
  if ch != 32
1181
- while /[-_0-9A-Za-z]/ =~ ch.chr
1273
+ while ALPHA_REG =~ ch.chr
1182
1274
  length += 1
1183
1275
  ch = peek(length)
1184
1276
  end
@@ -1193,13 +1285,14 @@ module RbYAML
1193
1285
  value
1194
1286
  end
1195
1287
 
1288
+ STRANGE_CHR = /[\]\[\-';\/?:@&=+$,.!~*()%\w]/
1196
1289
  def scan_tag_uri(name, start_mark)
1197
1290
  # See the specification for details.
1198
1291
  # Note: we do not check if URI is well-formed.
1199
1292
  chunks = []
1200
1293
  length = 0
1201
1294
  ch = peek(length)
1202
- while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1295
+ while STRANGE_CHR =~ ch.chr
1203
1296
  if ch == ?%
1204
1297
  chunks << prefix(length)
1205
1298
  forward(length)
@@ -1217,25 +1310,24 @@ module RbYAML
1217
1310
  end
1218
1311
 
1219
1312
  raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1220
- chunks.join('')
1313
+ chunks.to_s
1221
1314
  end
1222
1315
 
1316
+ HEXA_REG = /[0-9A-Fa-f]/
1223
1317
  def scan_uri_escapes(name, start_mark)
1224
1318
  # See the specification for details.
1225
1319
  bytes = []
1226
1320
  mark = get_mark
1227
- while peek == ?%
1228
- forward
1229
- 2.times do |k|
1230
- raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1231
- get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1232
- end
1321
+ while peek0 == ?%
1322
+ forward1
1323
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek1} and #{peek2}",get_mark) if HEXA_REG !~ peek1.chr || HEXA_REG !~ peek2.chr
1233
1324
  bytes << prefix(2).to_i.to_s(16)
1234
- forward(2)
1325
+ forward2
1235
1326
  end
1236
- bytes.join('')
1327
+ bytes.to_s
1237
1328
  end
1238
1329
 
1330
+ RN = "\r\n"
1239
1331
  def scan_line_break
1240
1332
  # Transforms:
1241
1333
  # '\r\n' : '\n'
@@ -1243,12 +1335,11 @@ module RbYAML
1243
1335
  # '\n' : '\n'
1244
1336
  # '\x85' : '\n'
1245
1337
  # default : ''
1246
- ch = peek
1247
- if "\r\n\x85".include?(ch)
1248
- if prefix(2) == "\r\n"
1249
- forward(2)
1338
+ if FULL_LINEBR.include?(peek0)
1339
+ if prefix2 == RN
1340
+ forward2
1250
1341
  else
1251
- forward
1342
+ forward1
1252
1343
  end
1253
1344
  return "\n"
1254
1345
  end