RbYAML 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/LICENSE +19 -0
  2. data/README +31 -0
  3. data/lib/rbyaml.rb +378 -0
  4. data/lib/rbyaml/composer.rb +189 -0
  5. data/lib/rbyaml/constructor.rb +374 -0
  6. data/lib/rbyaml/detector.rb +44 -0
  7. data/lib/rbyaml/dumper.rb +40 -0
  8. data/lib/rbyaml/emitter.rb +1116 -0
  9. data/lib/rbyaml/error.rb +81 -0
  10. data/lib/rbyaml/events.rb +92 -0
  11. data/lib/rbyaml/loader.rb +49 -0
  12. data/lib/rbyaml/nodes.rb +69 -0
  13. data/lib/rbyaml/parser.rb +488 -0
  14. data/lib/rbyaml/reader.rb +127 -0
  15. data/lib/rbyaml/representer.rb +183 -0
  16. data/lib/rbyaml/scanner.rb +1258 -0
  17. data/lib/rbyaml/serializer.rb +120 -0
  18. data/lib/rbyaml/test.rb +56 -0
  19. data/lib/rbyaml/tokens.rb +163 -0
  20. data/lib/rbyaml/yaml.rb +143 -0
  21. data/test/test_rbyaml.rb +18 -0
  22. data/test/yaml/gems.yml +130951 -0
  23. data/test/yaml/gems2.yml +113 -0
  24. data/test/yaml/test1.yml +3 -0
  25. data/test/yaml/test10.yml +8 -0
  26. data/test/yaml/test12.yml +8 -0
  27. data/test/yaml/test13.yml +4 -0
  28. data/test/yaml/test14.yml +4 -0
  29. data/test/yaml/test15.yml +8 -0
  30. data/test/yaml/test16.yml +7 -0
  31. data/test/yaml/test18.yml +6 -0
  32. data/test/yaml/test19.yml +5 -0
  33. data/test/yaml/test2.yml +3 -0
  34. data/test/yaml/test20.yml +6 -0
  35. data/test/yaml/test21.yml +4 -0
  36. data/test/yaml/test22.yml +4 -0
  37. data/test/yaml/test23.yml +13 -0
  38. data/test/yaml/test24.yml +14 -0
  39. data/test/yaml/test25.yml +7 -0
  40. data/test/yaml/test26.yml +7 -0
  41. data/test/yaml/test27.yml +29 -0
  42. data/test/yaml/test28.yml +26 -0
  43. data/test/yaml/test29.yml +13 -0
  44. data/test/yaml/test3.yml +8 -0
  45. data/test/yaml/test30.yml +7 -0
  46. data/test/yaml/test31.yml +2 -0
  47. data/test/yaml/test32.yml +13 -0
  48. data/test/yaml/test33.yml +2 -0
  49. data/test/yaml/test34.yml +8 -0
  50. data/test/yaml/test35.yml +4 -0
  51. data/test/yaml/test36.yml +8 -0
  52. data/test/yaml/test37.yml +2 -0
  53. data/test/yaml/test38.yml +8 -0
  54. data/test/yaml/test39.yml +2 -0
  55. data/test/yaml/test4.yml +8 -0
  56. data/test/yaml/test40.yml +3 -0
  57. data/test/yaml/test41.yml +5 -0
  58. data/test/yaml/test42.yml +12 -0
  59. data/test/yaml/test43.yml +15 -0
  60. data/test/yaml/test44.yml +23 -0
  61. data/test/yaml/test5.yml +3 -0
  62. data/test/yaml/test6.yml +5 -0
  63. data/test/yaml/test7.yml +10 -0
  64. data/test/yaml/test8.yml +10 -0
  65. data/test/yaml/test9.yml +8 -0
  66. metadata +111 -0
@@ -0,0 +1,127 @@
1
+ # This is a more or less straight translation of PyYAML3000 to Ruby
2
+
3
+ # the big difference in this implementation is that unicode support is not here...
4
+
5
+ require 'rbyaml/error'
6
+
7
+ module RbYAML
8
+
9
+ # Reader:
10
+ # - checks if characters are in allowed range,
11
+ # - adds '\0' to the end.
12
+ # Reader accepts
13
+ # - a String object
14
+ # - a duck-typed IO object
15
+ module Reader
16
+ def initialize_reader(stream)
17
+ @stream = nil
18
+ @stream_pointer = 0
19
+ @eof = true
20
+ @buffer = ""
21
+ @pointer = 0
22
+ @index = 0
23
+ @line = 0
24
+ @column = 0
25
+ if String === stream
26
+ @name = "<string>"
27
+ @raw_buffer = stream
28
+ else
29
+ @stream = stream
30
+ @name = stream.respond_to?(:path) ? stream.path : stream.inspect
31
+ @eof = false
32
+ @raw_buffer = ""
33
+ end
34
+ end
35
+
36
+ def peek(index=0)
37
+ update(index+1) if @pointer+index+1 >= @buffer.length
38
+ @buffer[@pointer+index]
39
+ end
40
+
41
+ def prefix(length=1)
42
+ update(length) if @pointer+length >= @buffer.length
43
+ @buffer[@pointer...@pointer+length]
44
+ end
45
+
46
+ def forward(length=1)
47
+ update(length+1) if @pointer+length+1 >= @buffer.length
48
+ length.times { |k|
49
+ ch = @buffer[@pointer]
50
+ @pointer += 1
51
+ @index += 1
52
+ if "\n\x85".include?(ch) || (ch == ?\r && @buffer[@pointer+1] != ?\n)
53
+ @line += 1
54
+ @column = 0
55
+ else
56
+ @column += 1
57
+ end
58
+ }
59
+ end
60
+
61
+ def get_mark
62
+ if @stream.nil?
63
+ Mark.new(@name,@index,@line,@column,@buffer,@pointer)
64
+ else
65
+ Mark.new(@name,@index,@line,@column,nil,nil)
66
+ end
67
+ end
68
+
69
+ NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
70
+ def check_printable(data)
71
+ if NON_PRINTABLE =~ data
72
+ position = @index+@buffer.length-@pointer+($~.offset(0)[0])
73
+ raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
74
+ end
75
+ end
76
+
77
+ def update(length)
78
+ return if @raw_buffer.nil?
79
+ @buffer = @buffer[@pointer..-1]
80
+ @pointer = 0
81
+ while @buffer.length < length
82
+ unless @eof
83
+ update_raw
84
+ end
85
+ data = @raw_buffer
86
+ converted = data.length
87
+ check_printable(data)
88
+ @buffer << data
89
+ @raw_buffer = @raw_buffer[converted..-1]
90
+ if @eof
91
+ @buffer << ?\0
92
+ @raw_buffer = nil
93
+ break
94
+ end
95
+ end
96
+ end
97
+
98
+ def update_raw(size=1024)
99
+ data = @stream.read(size)
100
+ if data && !data.empty?
101
+ @raw_buffer << data
102
+ @stream_pointer += data.length
103
+ else
104
+ @eof = true
105
+ end
106
+ end
107
+ end
108
+
109
+ class ReaderError < YAMLError
110
+ def initialize(name, position, character, encoding, reason)
111
+ @name = name
112
+ @position = position
113
+ @character = character
114
+ @encoding = encoding
115
+ @reason = reason
116
+ end
117
+
118
+ def to_s
119
+ if String === @character
120
+ "'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
121
+ else
122
+ "unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
123
+ end
124
+ end
125
+ end
126
+ end
127
+
@@ -0,0 +1,183 @@
1
+
2
+ require 'set'
3
+
4
+ require 'rbyaml/error'
5
+ require 'rbyaml/nodes'
6
+ require 'rbyaml/detector'
7
+
8
+ module RbYAML
9
+ class RepresenterError < YAMLError
10
+ end
11
+
12
+ module BaseRepresenter
13
+ @@yaml_representers = {}
14
+
15
+ def initialize_representer
16
+ @represented_objects = {}
17
+ end
18
+
19
+ def represent(data)
20
+ node = represent_object(data)
21
+ serialize(node)
22
+ represented_objects = {}
23
+ end
24
+
25
+ def represent_object(data)
26
+ if ignore_aliases(data)
27
+ alias_key = nil
28
+ else
29
+ alias_key = data.object_id
30
+ end
31
+
32
+ if !alias_key.nil?
33
+ if @represented_objects.include?(alias_key)
34
+ node = @represented_objects[alias_key]
35
+ raise RepresenterError.new("recursive objects are not allowed: #{data}") if node.nil?
36
+ return node
37
+ end
38
+ @represented_objects[alias_key] = nil
39
+ end
40
+
41
+ rerun = false
42
+
43
+ for data_type in data.class.ancestors
44
+ rerun = true
45
+ if @@yaml_representers.include?(data_type)
46
+ node = send(@@yaml_representers[data_type],data)
47
+ break
48
+ end
49
+ end
50
+ if !rerun
51
+ if @@yaml_representers.include?(nil)
52
+ node = send(@@yaml_representers[nil], data)
53
+ else
54
+ node = ScalarNode.new(data.taguri, data)
55
+ end
56
+ end
57
+ @represented_objects[alias_key] = node if !alias_key.nil?
58
+ node
59
+ end
60
+
61
+ def self.add_representer(data_type, representer)
62
+ @@yaml_representers[data_type] = representer
63
+ end
64
+
65
+ def represent_scalar(tag, value, style=nil)
66
+ ScalarNode.new(tag, value, style)
67
+ end
68
+
69
+ def represent_sequence(tag, sequence, flow_style=nil)
70
+ value = sequence.map {|item| represent_object(item)}
71
+ SequenceNode.new(tag, value, flow_style)
72
+ end
73
+
74
+ def represent_mapping(tag, mapping, flow_style=nil)
75
+ value = {}
76
+ mapping.each { |item_key,item_value| value[represent_object(item_key)] = represent_object(item_value) }
77
+ MappingNode.new(tag, value, flow_style)
78
+ end
79
+
80
+ def ignore_aliases(data)
81
+ false
82
+ end
83
+ end
84
+
85
+ module SafeRepresenter
86
+ include BaseRepresenter
87
+
88
+ def ignore_aliases(data)
89
+ data.nil? || String === data || TrueClass === data || FalseClass === data || Integer === data || Float === data
90
+ end
91
+
92
+ def represent_none(data)
93
+ represent_scalar(data.taguri,"null")
94
+ end
95
+
96
+ def represent_str(data)
97
+ represent_scalar(data.taguri,data)
98
+ end
99
+
100
+ def represent_symbol(data)
101
+ represent_scalar(data.taguri,data.to_s)
102
+ end
103
+
104
+ def represent_bool(data)
105
+ value = data ? "true" : "false"
106
+ represent_scalar(data.taguri,value)
107
+ end
108
+
109
+ def represent_int(data)
110
+ represent_scalar(data.taguri,data.to_s)
111
+ end
112
+
113
+ def represent_float(data)
114
+ if data.infinite? == 1
115
+ value = ".inf"
116
+ elsif data.infinite? == -1
117
+ value = "-.inf"
118
+ elsif data.nan? || data != data
119
+ value = ".nan"
120
+ else
121
+ value = data.to_s
122
+ end
123
+ represent_scalar(data.taguri, value)
124
+ end
125
+
126
+ def represent_list(data)
127
+ represent_sequence(data.taguri, data)
128
+ end
129
+
130
+ def represent_dict(data)
131
+ represent_mapping(data.taguri, data)
132
+ end
133
+
134
+ def represent_set(data)
135
+ value = {}
136
+ for key in data
137
+ value[key] = nil
138
+ end
139
+ represent_mapping(data.taguri, value)
140
+ end
141
+
142
+ def represent_datetime(data)
143
+ value = "%04d-%02d-%02d %02d:%02d:%02d" % [data.year, data.month, data.day, data.hour, data.min, data.sec]
144
+ if data.usec != 0
145
+ value += "." + (data.usec/1000000.0).to_s.split(/\./)[1]
146
+ end
147
+ if data.utc_offset != 0
148
+ value += data.utc_offset.to_s
149
+ end
150
+ represent_scalar(data.taguri, value)
151
+ end
152
+
153
+ def represent_yaml_object(tag, data, flow_style=nil)
154
+ state = data.to_yaml_properties
155
+ mapping = {}
156
+ state.each do |m|
157
+ map[m[1..-1]] = data.instance_variable_get(m)
158
+ end
159
+ represent_mapping(tag, mapping, flow_style)
160
+ end
161
+
162
+ def represent_undefined(data)
163
+ raise RepresenterError.new("cannot represent an object: #{data}")
164
+ end
165
+ end
166
+
167
+ BaseRepresenter.add_representer(NilClass,:represent_none)
168
+ BaseRepresenter.add_representer(String,:represent_str)
169
+ BaseRepresenter.add_representer(Symbol,:represent_symbol)
170
+ BaseRepresenter.add_representer(TrueClass,:represent_bool)
171
+ BaseRepresenter.add_representer(FalseClass,:represent_bool)
172
+ BaseRepresenter.add_representer(Integer,:represent_int)
173
+ BaseRepresenter.add_representer(Float,:represent_float)
174
+ BaseRepresenter.add_representer(Array,:represent_list)
175
+ BaseRepresenter.add_representer(Hash,:represent_dict)
176
+ BaseRepresenter.add_representer(Set,:represent_set)
177
+ BaseRepresenter.add_representer(Time,:represent_datetime)
178
+ BaseRepresenter.add_representer(nil,:represent_undefined)
179
+
180
+ module Representer
181
+ include SafeRepresenter
182
+ end
183
+ end
@@ -0,0 +1,1258 @@
1
+ # Scanner produces tokens of the following types:
2
+ # STREAM-START
3
+ # STREAM-END
4
+ # DIRECTIVE(name, value)
5
+ # DOCUMENT-START
6
+ # DOCUMENT-END
7
+ # BLOCK-SEQUENCE-START
8
+ # BLOCK-MAPPING-START
9
+ # BLOCK-END
10
+ # FLOW-SEQUENCE-START
11
+ # FLOW-MAPPING-START
12
+ # FLOW-SEQUENCE-END
13
+ # FLOW-MAPPING-END
14
+ # BLOCK-ENTRY
15
+ # FLOW-ENTRY
16
+ # KEY
17
+ # VALUE
18
+ # ALIAS(value)
19
+ # ANCHOR(value)
20
+ # TAG(value)
21
+ # SCALAR(value, plain)
22
+ #
23
+ # Read comments in the Scanner code for more details.
24
+ #
25
+
26
+ require 'rbyaml/error'
27
+ require 'rbyaml/tokens'
28
+
29
+ module RbYAML
30
+ class ScannerError < MarkedYAMLError
31
+ end
32
+
33
+ class SimpleKey
34
+ attr_reader :token_number, :required, :index, :line, :column, :mark
35
+
36
+ def initialize(token_number,required,index,line,column,mark)
37
+ @token_number = token_number
38
+ @required = required
39
+ @index = index
40
+ @line = line
41
+ @column = column
42
+ @mark = mark
43
+ end
44
+ end
45
+
46
+ module Scanner
47
+ def initialize_scanner
48
+ # It is assumed that Scanner and Reader will mixin to the same point.
49
+ # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
+ #
51
+ # Reader supports the following methods
52
+ # self.peek(i=0) # peek the next i-th character
53
+ # self.prefix(l=1) # peek the next l characters
54
+ # self.forward(l=1) # read the next l characters and move the pointer.
55
+
56
+ # Had we reached the end of the stream?
57
+ @done = false
58
+
59
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
60
+ # context.
61
+ @flow_level = 0
62
+
63
+ # List of processed tokens that are not yet emitted.
64
+ @tokens = []
65
+
66
+ # Add the STREAM-START token.
67
+ fetch_stream_start
68
+
69
+ # Number of tokens that were emitted through the `get_token` method.
70
+ @tokens_taken = 0
71
+
72
+ # The current indentation level.
73
+ @indent = -1
74
+
75
+ # Past indentation levels.
76
+ @indents = []
77
+
78
+ # Variables related to simple keys treatment.
79
+
80
+ # A simple key is a key that is not denoted by the '?' indicator.
81
+ # Example of simple keys:
82
+ # ---
83
+ # block simple key: value
84
+ # ? not a simple key:
85
+ # : { flow simple key: value }
86
+ # We emit the KEY token before all keys, so when we find a potential
87
+ # simple key, we try to locate the corresponding ':' indicator.
88
+ # Simple keys should be limited to a single line and 1024 characters.
89
+
90
+ # Can a simple key start at the current position? A simple key may
91
+ # start:
92
+ # - at the beginning of the line, not counting indentation spaces
93
+ # (in block context),
94
+ # - after '{', '[', ',' (in the flow context),
95
+ # - after '?', ':', '-' (in the block context).
96
+ # In the block context, this flag also signifies if a block collection
97
+ # may start at the current position.
98
+ @allow_simple_key = true
99
+
100
+ # Keep track of possible simple keys. This is a dictionary. The key
101
+ # is `flow_level`; there can be no more that one possible simple key
102
+ # for each level. The value is a SimpleKey record:
103
+ # (token_number, required, index, line, column, mark)
104
+ # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
+ # '[', or '{' tokens.
106
+ @possible_simple_keys = {}
107
+ end
108
+
109
+ def check_token(*choices)
110
+ # Check if the next token is one of the given types.
111
+ fetch_more_tokens while need_more_tokens
112
+ unless @tokens.empty?
113
+ return true if choices.empty?
114
+ for choice in choices
115
+ return true if choice === @tokens[0]
116
+ end
117
+ end
118
+ return false
119
+ end
120
+
121
+ def peek_token
122
+ # Return the next token, but do not delete if from the queue.
123
+ fetch_more_tokens while need_more_tokens
124
+ return @tokens[0] unless @tokens.empty?
125
+ end
126
+
127
+ def get_token
128
+ # Return the next token.
129
+ fetch_more_tokens while need_more_tokens
130
+ unless @tokens.empty?
131
+ @tokens_taken += 1
132
+ @tokens.shift
133
+ end
134
+ end
135
+
136
+ def each_token
137
+ fetch_more_tokens while need_more_tokens
138
+ while !@tokens.empty?
139
+ @tokens_taken += 1
140
+ yield @tokens.shift
141
+ fetch_more_tokens while need_more_tokens
142
+ end
143
+ end
144
+
145
+ def need_more_tokens
146
+ return false if @done
147
+ return true if @tokens.empty?
148
+ # The current token may be a potential simple key, so we
149
+ # need to look further.
150
+ stale_possible_simple_keys
151
+ return true if next_possible_simple_key == @tokens_taken
152
+ end
153
+
154
+ def fetch_more_tokens
155
+ # Eat whitespaces and comments until we reach the next token.
156
+ scan_to_next_token
157
+
158
+ # Remove obsolete possible simple keys.
159
+ stale_possible_simple_keys
160
+
161
+ # Compare the current indentation and column. It may add some tokens
162
+ # and decrease the current indentation level.
163
+ unwind_indent(@column)
164
+
165
+ # Peek the next character.
166
+ ch = peek
167
+
168
+ return case
169
+ # Is it the end of stream?
170
+ when ch == ?\0: fetch_stream_end
171
+ # Is it a directive?
172
+ when ch == ?% && check_directive: fetch_directive
173
+ # Is it the document start?
174
+ when ch == ?- && check_document_start: fetch_document_start
175
+ # Is it the document end?
176
+ when ch == ?. && check_document_end: fetch_document_end
177
+ # Is it the flow sequence start indicator?
178
+ when ch == ?[: fetch_flow_sequence_start
179
+ # Is it the flow mapping start indicator?
180
+ when ch == ?{: fetch_flow_mapping_start
181
+ # Is it the flow sequence end indicator?
182
+ when ch == ?]: fetch_flow_sequence_end
183
+ # Is it the flow mapping end indicator?
184
+ when ch == ?}: fetch_flow_mapping_end
185
+ # Is it the flow entry indicator?
186
+ when ch == ?,: fetch_flow_entry
187
+ # Is it the block entry indicator?
188
+ when ch == ?- && check_block_entry: fetch_block_entry
189
+ # Is it the key indicator?
190
+ when ch == ?? && check_key: fetch_key
191
+ # Is it the value indicator?
192
+ when ch == ?: && check_value: fetch_value
193
+ # Is it an alias?
194
+ when ch == ?*: fetch_alias
195
+ # Is it an anchor?
196
+ when ch == ?&: fetch_anchor
197
+ # Is it a tag?
198
+ when ch == ?!: fetch_tag
199
+ # Is it a literal scalar?
200
+ when ch == ?| && @flow_level==0: fetch_literal
201
+ # Is it a folded scalar?
202
+ when ch == ?> && @flow_level==0: fetch_folded
203
+ # Is it a single quoted scalar?
204
+ when ch == ?': fetch_single
205
+ # Is it a double quoted scalar?
206
+ when ch == ?": fetch_double
207
+ # It must be a plain scalar then.
208
+ when check_plain: fetch_plain
209
+ else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
+ end
211
+ end
212
+
213
+ # Simple keys treatment.
214
+
215
+ def next_possible_simple_key
216
+ # Return the number of the nearest possible simple key. Actually we
217
+ # don't need to loop through the whole dictionary.
218
+ min_token_number = nil
219
+ for level in @possible_simple_keys.keys
220
+ key = @possible_simple_keys[level]
221
+ if min_token_number.nil? || key.token_number < min_token_number
222
+ min_token_number = key.token_number
223
+ end
224
+ end
225
+ min_token_number
226
+ end
227
+
228
+ def stale_possible_simple_keys
229
+ # Remove entries that are no longer possible simple keys. According to
230
+ # the YAML specification, simple keys
231
+ # - should be limited to a single line,
232
+ # - should be no longer than 1024 characters.
233
+ # Disabling this procedure will allow simple keys of any length and
234
+ # height (may cause problems if indentation is broken though).
235
+ @possible_simple_keys.delete_if {|level,key|
236
+ if key.line != @line || @index-key.index > 1024
237
+ raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
+ return true
239
+ end
240
+ return false
241
+ }
242
+ end
243
+
244
+ def save_possible_simple_key
245
+ # The next token may start a simple key. We check if it's possible
246
+ # and save its position. This function is called for
247
+ # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
+
249
+ # Check if a simple key is required at the current position.
250
+ required = @flow_level==0 && @indent == @column
251
+
252
+ # The next token might be a simple key. Let's save it's number and
253
+ # position.
254
+ if @allow_simple_key
255
+ remove_possible_simple_key
256
+ token_number = @tokens_taken+@tokens.length
257
+ key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
+ @possible_simple_keys[@flow_level] = key
259
+ end
260
+ end
261
+
262
+ def remove_possible_simple_key
263
+ # Remove the saved possible key position at the current flow level.
264
+ key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
+ end
266
+
267
+ # Indentation functions.
268
+
269
+ def unwind_indent(column)
270
+ ## In flow context, tokens should respect indentation.
271
+ ## Actually the condition should be `@indent >= column` according to
272
+ ## the spec. But this condition will prohibit intuitively correct
273
+ ## constructions such as
274
+ ## key : {
275
+ ## }
276
+ #if @flow_level and @indent > column
277
+ # raise ScannerError(nil, nil,
278
+ # "invalid intendation or unclosed '[' or '{'",
279
+ # get_mark)
280
+
281
+ # In the flow context, indentation is ignored. We make the scanner less
282
+ # restrictive then specification requires.
283
+ return nil if @flow_level != 0
284
+ # In block context, we may need to issue the BLOCK-END tokens.
285
+ while @indent > column
286
+ mark = get_mark
287
+ @indent = @indents.pop()
288
+ @tokens << BlockEndToken.new(mark, mark)
289
+ end
290
+ end
291
+
292
+ def add_indent(column)
293
+ # Check if we need to increase indentation.
294
+ if @indent < column
295
+ @indents << @indent
296
+ @indent = column
297
+ return true
298
+ end
299
+ return false
300
+ end
301
+
302
+ # Fetchers.
303
+
304
+ def fetch_stream_start
305
+ # We always add STREAM-START as the first token and STREAM-END as the
306
+ # last token.
307
+ # Read the token.
308
+ mark = get_mark
309
+ # Add STREAM-START.
310
+ @tokens << StreamStartToken.new(mark, mark, @encoding)
311
+ end
312
+
313
+
314
+ def fetch_stream_end
315
+ # Set the current intendation to -1.
316
+ unwind_indent(-1)
317
+ # Reset everything (not really needed).
318
+ @allow_simple_key = false
319
+ @possible_simple_keys = {}
320
+ # Read the token.
321
+ mark = get_mark
322
+ # Add STREAM-END.
323
+ @tokens << StreamEndToken.new(mark, mark)
324
+ # The stream is finished.
325
+ @done = true
326
+ end
327
+
328
+ def fetch_directive
329
+ # Set the current intendation to -1.
330
+ unwind_indent(-1)
331
+ # Reset simple keys.
332
+ remove_possible_simple_key
333
+ @allow_simple_key = false
334
+ # Scan and add DIRECTIVE.
335
+ @tokens << scan_directive
336
+ end
337
+
338
+ def fetch_document_start
339
+ fetch_document_indicator(DocumentStartToken)
340
+ end
341
+
342
+ def fetch_document_end
343
+ fetch_document_indicator(DocumentEndToken)
344
+ end
345
+
346
+ def fetch_document_indicator(token)
347
+ # Set the current intendation to -1.
348
+ unwind_indent(-1)
349
+ # Reset simple keys. Note that there could not be a block collection
350
+ # after '---'.
351
+ remove_possible_simple_key
352
+ @allow_simple_key = false
353
+ # Add DOCUMENT-START or DOCUMENT-END.
354
+ start_mark = get_mark
355
+ forward(3)
356
+ end_mark = get_mark
357
+ @tokens << token.new(start_mark, end_mark)
358
+ end
359
+
360
+ def fetch_flow_sequence_start
361
+ fetch_flow_collection_start(FlowSequenceStartToken)
362
+ end
363
+
364
+ def fetch_flow_mapping_start
365
+ fetch_flow_collection_start(FlowMappingStartToken)
366
+ end
367
+
368
+ def fetch_flow_collection_start(token)
369
+ # '[' and '{' may start a simple key.
370
+ save_possible_simple_key
371
+ # Increase the flow level.
372
+ @flow_level += 1
373
+ # Simple keys are allowed after '[' and '{'.
374
+ @allow_simple_key = true
375
+ # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
+ start_mark = get_mark
377
+ forward
378
+ end_mark = get_mark
379
+ @tokens << token.new(start_mark, end_mark)
380
+ end
381
+
382
+ def fetch_flow_sequence_end
383
+ fetch_flow_collection_end(FlowSequenceEndToken)
384
+ end
385
+
386
+ def fetch_flow_mapping_end
387
+ fetch_flow_collection_end(FlowMappingEndToken)
388
+ end
389
+
390
+ def fetch_flow_collection_end(token)
391
+ # Reset possible simple key on the current level.
392
+ remove_possible_simple_key
393
+ # Decrease the flow level.
394
+ @flow_level -= 1
395
+ # No simple keys after ']' or '}'.
396
+ @allow_simple_key = false
397
+ # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
+ start_mark = get_mark
399
+ forward
400
+ end_mark = get_mark
401
+ @tokens << token.new(start_mark, end_mark)
402
+ end
403
+
404
+ def fetch_flow_entry
405
+ # Simple keys are allowed after ','.
406
+ @allow_simple_key = true
407
+ # Reset possible simple key on the current level.
408
+ remove_possible_simple_key
409
+ # Add FLOW-ENTRY.
410
+ start_mark = get_mark
411
+ forward
412
+ end_mark = get_mark
413
+ @tokens << FlowEntryToken.new(start_mark, end_mark)
414
+ end
415
+
416
+ def fetch_block_entry
417
+ # Block context needs additional checks.
418
+ if @flow_level==0
419
+ raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
+ # We may need to add BLOCK-SEQUENCE-START.
421
+ if add_indent(@column)
422
+ mark = get_mark
423
+ @tokens << BlockSequenceStartToken.new(mark, mark)
424
+ end
425
+ # It's an error for the block entry to occur in the flow context,
426
+ # but we let the parser detect this.
427
+ end
428
+ # Simple keys are allowed after '-'.
429
+ @allow_simple_key = true
430
+ # Reset possible simple key on the current level.
431
+ remove_possible_simple_key
432
+ # Add BLOCK-ENTRY.
433
+ start_mark = get_mark
434
+ forward
435
+ end_mark = get_mark
436
+ @tokens << BlockEntryToken.new(start_mark, end_mark)
437
+ end
438
+
439
+ def fetch_key
440
+ # Block context needs additional checks.
441
+ if @flow_level==0
442
+ # Are we allowed to start a key (not nessesary a simple)?
443
+ raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
+ # We may need to add BLOCK-MAPPING-START.
445
+ if add_indent(@column)
446
+ mark = get_mark
447
+ @tokens << BlockMappingStartToken.new(mark, mark)
448
+ end
449
+ end
450
+ # Simple keys are allowed after '?' in the block context.
451
+ @allow_simple_key = @flow_level==0
452
+ # Reset possible simple key on the current level.
453
+ remove_possible_simple_key
454
+ # Add KEY.
455
+ start_mark = get_mark
456
+ forward
457
+ end_mark = get_mark
458
+ @tokens << KeyToken.new(start_mark, end_mark)
459
+ end
460
+
461
+ def fetch_value
462
+ # Do we determine a simple key?
463
+ if @possible_simple_keys.include?(@flow_level)
464
+ # Add KEY.
465
+ key = @possible_simple_keys[@flow_level]
466
+ @possible_simple_keys.delete(@flow_level)
467
+ @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
+ # If this key starts a new block mapping, we need to add
469
+ # BLOCK-MAPPING-START.
470
+ @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
+ # There cannot be two simple keys one after another.
472
+ @allow_simple_key = false
473
+ # It must be a part of a complex key.
474
+ else
475
+ # Block context needs additional checks.
476
+ # (Do we really need them? They will be catched by the parser
477
+ # anyway.)
478
+ if @flow_level==0
479
+ # We are allowed to start a complex value if and only if
480
+ # we can start a simple key.
481
+ raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
+ # Simple keys are allowed after ':' in the block context.
483
+ @allow_simple_key = @flow_level==0
484
+ # Reset possible simple key on the current level.
485
+ remove_possible_simple_key
486
+ end
487
+ end
488
+ # Add VALUE.
489
+ start_mark = get_mark
490
+ forward
491
+ end_mark = get_mark
492
+ @tokens << ValueToken.new(start_mark, end_mark)
493
+ end
494
+
495
+ def fetch_alias
496
+ # ALIAS could be a simple key.
497
+ save_possible_simple_key
498
+ # No simple keys after ALIAS.
499
+ @allow_simple_key = false
500
+ # Scan and add ALIAS.
501
+ @tokens << scan_anchor(AliasToken)
502
+ end
503
+
504
+ def fetch_anchor
505
+ # ANCHOR could start a simple key.
506
+ save_possible_simple_key
507
+ # No simple keys after ANCHOR.
508
+ @allow_simple_key = false
509
+ # Scan and add ANCHOR.
510
+ @tokens << scan_anchor(AnchorToken)
511
+ end
512
+
513
+ def fetch_tag
514
+ # TAG could start a simple key.
515
+ save_possible_simple_key
516
+ # No simple keys after TAG.
517
+ @allow_simple_key = false
518
+ # Scan and add TAG.
519
+ @tokens << scan_tag
520
+ end
521
+
522
+ def fetch_literal
523
+ fetch_block_scalar(?|)
524
+ end
525
+
526
+ def fetch_folded
527
+ fetch_block_scalar(?>)
528
+ end
529
+
530
+ def fetch_block_scalar(style)
531
+ # A simple key may follow a block scalar.
532
+ @allow_simple_key = true
533
+ # Reset possible simple key on the current level.
534
+ remove_possible_simple_key
535
+ # Scan and add SCALAR.
536
+ @tokens << scan_block_scalar(style)
537
+ end
538
+
539
+ def fetch_single
540
+ fetch_flow_scalar(?')
541
+ end
542
+
543
+ def fetch_double
544
+ fetch_flow_scalar(?")
545
+ end
546
+
547
+ def fetch_flow_scalar(style)
548
+ # A flow scalar could be a simple key.
549
+ save_possible_simple_key
550
+ # No simple keys after flow scalars.
551
+ @allow_simple_key = false
552
+ # Scan and add SCALAR.
553
+ @tokens << scan_flow_scalar(style)
554
+ end
555
+
556
+ def fetch_plain
557
+ # A plain scalar could be a simple key.
558
+ save_possible_simple_key
559
+ # No simple keys after plain scalars. But note that `scan_plain` will
560
+ # change this flag if the scan is finished at the beginning of the
561
+ # line.
562
+ @allow_simple_key = false
563
+ # Scan and add SCALAR. May change `allow_simple_key`.
564
+ @tokens << scan_plain
565
+ end
566
+
567
+ # Checkers.
568
+
569
+ def check_directive
570
+ # DIRECTIVE: ^ '%' ...
571
+ # The '%' indicator is already checked.
572
+ @column == 0
573
+ end
574
+
575
+ def check_document_start
576
+ # DOCUMENT-START: ^ '---' (' '|'\n')
577
+ @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
+ end
579
+
580
+ def check_document_end
581
+ # DOCUMENT-END: ^ '...' (' '|'\n')
582
+ if @column == 0
583
+ prefix = peek(4)
584
+ return true if prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
585
+ end
586
+ end
587
+
588
+ def check_block_entry
589
+ # BLOCK-ENTRY: '-' (' '|'\n')
590
+ "\0 \t\r\n\x85".include?(peek(1))
591
+ end
592
+
593
+ def check_key
594
+ # KEY(flow context): '?'
595
+ # KEY(block context): '?' (' '|'\n')
596
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
597
+ end
598
+
599
+ def check_value
600
+ # VALUE(flow context): ':'
601
+ # VALUE(block context): ':' (' '|'\n')
602
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
603
+ end
604
+
605
+ def check_plain
606
+ # A plain scalar may start with any non-space character except:
607
+ # '-', '?', ':', ',', '[', ']', '{', '}',
608
+ # '#', '&', '*', '!', '|', '>', '\'', '\"',
609
+ # '%', '@', '`'.
610
+ #
611
+ # It may also start with
612
+ # '-', '?', ':'
613
+ # if it is followed by a non-space character.
614
+ #
615
+ # Note that we limit the last rule to the block context (except the
616
+ # '-' character) because we want the flow context to be space
617
+ # independent.
618
+ ch = peek
619
+ !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
620
+ end
621
+
622
+
623
+
624
+
625
+
626
+
627
+ # Scanners.
628
+
629
+ def scan_to_next_token
630
+ # We ignore spaces, line breaks and comments.
631
+ # If we find a line break in the block context, we set the flag
632
+ # `allow_simple_key` on.
633
+ #
634
+ # TODO: We need to make tab handling rules more sane. A good rule is
635
+ # Tabs cannot precede tokens
636
+ # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
637
+ # KEY(block), VALUE(block), BLOCK-ENTRY
638
+ # So the checking code is
639
+ # if <TAB>:
640
+ # @allow_simple_keys = false
641
+ # We also need to add the check for `allow_simple_keys == true` to
642
+ # `unwind_indent` before issuing BLOCK-END.
643
+ # Scanners for block, flow, and plain scalars need to be modified.
644
+ found = false
645
+ while !found
646
+ while peek == 32
647
+ forward
648
+ end
649
+ if peek == ?#
650
+ forward while !"\0\r\n\x85".include?(peek)
651
+ end
652
+ if !scan_line_break.empty?
653
+ @allow_simple_key = true if @flow_level==0
654
+ else
655
+ found = true
656
+ end
657
+ end
658
+ end
659
+
660
+ def scan_directive
661
+ # See the specification for details.
662
+ start_mark = get_mark
663
+ forward
664
+ name = scan_directive_name(start_mark)
665
+ value = nil
666
+ if name == "YAML"
667
+ value = scan_yaml_directive_value(start_mark)
668
+ end_mark = get_mark
669
+ elsif name == "TAG"
670
+ value = scan_tag_directive_value(start_mark)
671
+ end_mark = get_mark
672
+ else
673
+ end_mark = get_mark
674
+ forward while !"\0\r\n\x85".include?(peek)
675
+ end
676
+ scan_directive_ignored_line(start_mark)
677
+ DirectiveToken.new(name, value, start_mark, end_mark)
678
+ end
679
+
680
+ def scan_directive_name(start_mark)
681
+ # See the specification for details.
682
+ length = 0
683
+ ch = peek(length)
684
+ while /[-0-9A-Za-z_]/ =~ ch.chr
685
+ length += 1
686
+ ch = peek(length)
687
+ end
688
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
689
+ value = prefix(length)
690
+ forward(length)
691
+ ch = peek()
692
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
693
+ value
694
+ end
695
+
696
+ def scan_yaml_directive_value(start_mark)
697
+ # See the specification for details.
698
+ forward while peek == 32
699
+ major = scan_yaml_directive_number(start_mark)
700
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
701
+ forward
702
+ minor = scan_yaml_directive_number(start_mark)
703
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
704
+ [major, minor]
705
+ end
706
+
707
+ def scan_yaml_directive_number(start_mark)
708
+ # See the specification for details.
709
+ ch = peek
710
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
711
+ length = 0
712
+ length += 1 while ((?0..?9) === peek(length))
713
+ value = prefix(length)
714
+ forward(length)
715
+ value
716
+ end
717
+
718
+ def scan_tag_directive_value(start_mark)
719
+ # See the specification for details.
720
+ forward while peek == 32
721
+ handle = scan_tag_directive_handle(start_mark)
722
+ forward while peek == 32
723
+ prefix = scan_tag_directive_prefix(start_mark)
724
+ [handle, prefix]
725
+ end
726
+
727
+ def scan_tag_directive_handle(start_mark)
728
+ # See the specification for details.
729
+ value = scan_tag_handle("directive", start_mark)
730
+ ch = peek
731
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
732
+ value
733
+ end
734
+
735
+ def scan_tag_directive_prefix(start_mark)
736
+ # See the specification for details.
737
+ value = scan_tag_uri("directive", start_mark)
738
+ ch = peek
739
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
740
+ value
741
+ end
742
+
743
+ def scan_directive_ignored_line(start_mark)
744
+ # See the specification for details.
745
+ forward while peek == 32
746
+ if peek == ?#
747
+ forward while !"\0\r\n\x85".include?(peek)
748
+ end
749
+ ch = peek
750
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
751
+ scan_line_break
752
+ end
753
+
754
+ def scan_anchor(token)
755
+ # The specification does not restrict characters for anchors and
756
+ # aliases. This may lead to problems, for instance, the document:
757
+ # [ *alias, value ]
758
+ # can be interpteted in two ways, as
759
+ # [ "value" ]
760
+ # and
761
+ # [ *alias , "value" ]
762
+ # Therefore we restrict aliases to numbers and ASCII letters.
763
+ start_mark = get_mark
764
+ indicator = peek
765
+ name = (indicator == ?*) ? "alias":"anchor"
766
+ forward
767
+ length = 0
768
+ ch = peek(length)
769
+ while /[-0-9A-Za-z_]/ =~ ch.chr
770
+ length += 1
771
+ ch = peek(length)
772
+ end
773
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
774
+ value = prefix(length)
775
+ forward(length)
776
+ ch = peek
777
+ if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
778
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
779
+ end
780
+ end_mark = get_mark
781
+ token.new(value, start_mark, end_mark)
782
+ end
783
+
784
+
785
+ def scan_tag
786
+ # See the specification for details.
787
+ start_mark = get_mark
788
+ ch = peek(1)
789
+ if ch == ?<
790
+ handle = nil
791
+ forward(2)
792
+ suffix = scan_tag_uri("tag", start_mark)
793
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
794
+ forward
795
+ elsif "\0 \t\r\n\x85".include?(ch)
796
+ handle = nil
797
+ suffix = "!"
798
+ forward
799
+ else
800
+ length = 1
801
+ use_handle = false
802
+ while !"\0 \t\r\n\x85".include?(ch)
803
+ if ch == ?!
804
+ use_handle = true
805
+ break
806
+ end
807
+ length += 1
808
+ ch = peek(length)
809
+ end
810
+ handle = "!"
811
+ if use_handle
812
+ handle = scan_tag_handle("tag", start_mark)
813
+ else
814
+ handle = "!"
815
+ forward
816
+ end
817
+ suffix = scan_tag_uri("tag", start_mark)
818
+ end
819
+ ch = peek
820
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
821
+ value = [handle, suffix]
822
+ end_mark = get_mark
823
+ TagToken.new(value, start_mark, end_mark)
824
+ end
825
+
826
+ def scan_block_scalar(style)
827
+ # See the specification for details.
828
+ folded = style== ?>
829
+ chunks = []
830
+ start_mark = get_mark
831
+ # Scan the header.
832
+ forward
833
+ chomping, increment = scan_block_scalar_indicators(start_mark)
834
+ scan_block_scalar_ignored_line(start_mark)
835
+ # Determine the indentation level and go to the first non-empty line.
836
+ min_indent = @indent+1
837
+ min_indent = 1 if min_indent < 1
838
+ if increment.nil?
839
+ breaks, max_indent, end_mark = scan_block_scalar_indentation
840
+ indent = [min_indent, max_indent].max
841
+ else
842
+ indent = min_indent+increment-1
843
+ breaks, end_mark = scan_block_scalar_breaks(indent)
844
+ end
845
+ line_break = ''
846
+ # Scan the inner part of the block scalar.
847
+ while @column == indent and peek != ?\0
848
+ chunks += breaks
849
+ leading_non_space = !" \t".include?(peek)
850
+ length = 0
851
+ length += 1 while !"\0\r\n\x85".include?(peek(length))
852
+ chunks << prefix(length)
853
+ forward(length)
854
+ line_break = scan_line_break
855
+ breaks, end_mark = scan_block_scalar_breaks(indent)
856
+ if @column == indent && peek != 0
857
+ # Unfortunately, folding rules are ambiguous.
858
+ #
859
+ # This is the folding according to the specification:
860
+ if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
861
+ chunks << ' ' if breaks.empty?
862
+ else
863
+ chunks << line_break
864
+ end
865
+ # This is Clark Evans's interpretation (also in the spec
866
+ # examples):
867
+ #
868
+ #if folded and line_break == u'\n':
869
+ # if not breaks:
870
+ # if self.peek() not in ' \t':
871
+ # chunks.append(u' ')
872
+ # else:
873
+ # chunks.append(line_break)
874
+ #else:
875
+ # chunks.append(line_break)
876
+ else
877
+ break
878
+ end
879
+ end
880
+
881
+ # Chomp the tail.
882
+ if chomping
883
+ chunks << line_break
884
+ chunks += breaks
885
+ end
886
+
887
+ # We are done.
888
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
889
+ end
890
+
891
+ def scan_block_scalar_indicators(start_mark)
892
+ # See the specification for details.
893
+ chomping = nil
894
+ increment = nil
895
+ ch = peek
896
+ if /[+-]/ =~ ch.chr
897
+ chomping = ch == ?+
898
+ forward
899
+ ch = peek
900
+ if (?0..?9) === ch
901
+ increment = ch.to_i
902
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
903
+ forward
904
+ end
905
+ elsif (?0..?9) === ch
906
+ increment = ch
907
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
908
+ forward
909
+ ch = peek
910
+ if /[+-]/ =~ ch.chr
911
+ chomping = ch == ?+
912
+ forward
913
+ end
914
+ end
915
+ ch = peek
916
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
917
+ [chomping, increment]
918
+ end
919
+
920
+ def scan_block_scalar_ignored_line(start_mark)
921
+ # See the specification for details.
922
+ forward while peek == 32
923
+ if peek == ?#
924
+ forward while !"\0\r\n\x85".include?(peek)
925
+ end
926
+ ch = peek
927
+
928
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
929
+ scan_line_break
930
+ end
931
+
932
+ def scan_block_scalar_indentation
933
+ # See the specification for details.
934
+ chunks = []
935
+ max_indent = 0
936
+ end_mark = get_mark
937
+ while " \r\n\x85".include?(peek)
938
+ if peek != 32
939
+ chunks << scan_line_break
940
+ end_mark = get_mark
941
+ else
942
+ forward
943
+ max_indent = @column if @column > max_indent
944
+ end
945
+ end
946
+ [chunks, max_indent, end_mark]
947
+ end
948
+
949
+ def scan_block_scalar_breaks(indent)
950
+ # See the specification for details.
951
+ chunks = []
952
+ end_mark = get_mark
953
+ forward while @column < indent && peek == 32
954
+ while "\r\n\x85".include?(peek)
955
+ chunks << scan_line_break
956
+ end_mark = get_mark
957
+ forward while @column < indent && peek == 32
958
+ end
959
+ [chunks, end_mark]
960
+ end
961
+
962
+ def scan_flow_scalar(style)
963
+ # See the specification for details.
964
+ # Note that we loose indentation rules for quoted scalars. Quoted
965
+ # scalars don't need to adhere indentation because " and ' clearly
966
+ # mark the beginning and the end of them. Therefore we are less
967
+ # restrictive then the specification requires. We only need to check
968
+ # that document separators are not included in scalars.
969
+ double = style == ?"
970
+ chunks = []
971
+ start_mark = get_mark
972
+ quote = peek
973
+ forward
974
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
+ while peek != quote
976
+ chunks += scan_flow_scalar_spaces(double, start_mark)
977
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
978
+ end
979
+ forward
980
+ end_mark = get_mark
981
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
982
+ end
983
+
984
+ ESCAPE_REPLACEMENTS = {
985
+ "0" => "\0",
986
+ "a" => "\x07",
987
+ "b" => "\x08",
988
+ "t" => "\x09",
989
+ "\t" => "\x09",
990
+ "n" => "\x0A",
991
+ "v" => "\x0B",
992
+ "f" => "\x0C",
993
+ "r" => "\x0D",
994
+ "e" => "\x1B",
995
+ " " => "\x20",
996
+ '"' => '"',
997
+ "\\" => "\\",
998
+ "N" => "\x85",
999
+ "_" => "\xA0"
1000
+ }
1001
+
1002
+ ESCAPE_CODES = {
1003
+ 'x' => 2
1004
+ }
1005
+
1006
+ def scan_flow_scalar_non_spaces(double, start_mark)
1007
+ # See the specification for details.
1008
+ chunks = []
1009
+ while true
1010
+ length = 0
1011
+ length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1012
+ if length!=0
1013
+ chunks << prefix(length)
1014
+ forward(length)
1015
+ end
1016
+ ch = peek
1017
+ if !double && ch == ?' && peek(1) == ?'
1018
+ chunks << ?'
1019
+ forward(2)
1020
+ elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1021
+ chunks << ch
1022
+ forward
1023
+ elsif double && ch == ?\\
1024
+ forward
1025
+ ch = peek
1026
+ if ESCAPE_REPLACEMENTS.member?(ch.chr)
1027
+ chunks << ESCAPE_REPLACEMENTS[ch.chr]
1028
+ forward
1029
+ elsif ESCAPE_CODES.member?(ch.chr)
1030
+ length = ESCAPE_CODES[ch.chr]
1031
+ forward
1032
+ length.times do |k|
1033
+ if /[0-9A-Fa-f]/ !~ peek(k).chr
1034
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1035
+ "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1036
+ end
1037
+ end
1038
+ code = prefix(length).to_i.to_s(16)
1039
+ chunks << code
1040
+ forward(length)
1041
+ elsif "\r\n\x85".include?(ch)
1042
+ scan_line_break
1043
+ chunks += scan_flow_scalar_breaks(double, start_mark)
1044
+ else
1045
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
1046
+ end
1047
+ else
1048
+ return chunks
1049
+ end
1050
+ end
1051
+ end
1052
+
1053
+ def scan_flow_scalar_spaces(double, start_mark)
1054
+ # See the specification for details.
1055
+ chunks = []
1056
+ length = 0
1057
+ length += 1 while /[ \t]/ =~ peek(length).chr
1058
+ whitespaces = prefix(length)
1059
+ forward(length)
1060
+ ch = peek
1061
+ if ch == ?\0
1062
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1063
+ elsif "\r\n\x85".include?(ch)
1064
+ line_break = scan_line_break
1065
+ breaks = scan_flow_scalar_breaks(double, start_mark)
1066
+ if line_break != ?\n
1067
+ chunks << line_break
1068
+ elsif breaks.empty?
1069
+ chunks << ' '
1070
+ end
1071
+ chunks += breaks
1072
+ else
1073
+ chunks << whitespaces
1074
+ end
1075
+ chunks
1076
+ end
1077
+
1078
+ def scan_flow_scalar_breaks(double, start_mark)
1079
+ # See the specification for details.
1080
+ chunks = []
1081
+ while true
1082
+ # Instead of checking indentation, we check for document
1083
+ # separators.
1084
+ prefix = prefix(3)
1085
+ if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1086
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1087
+ end
1088
+ forward while /[ \t]/ =~ peek.chr
1089
+ if "\r\n\x85".include?(peek)
1090
+ chunks << scan_line_break
1091
+ else
1092
+ return chunks
1093
+ end
1094
+ end
1095
+ end
1096
+
1097
+ def scan_plain
1098
+ # See the specification for details.
1099
+ # We add an additional restriction for the flow context:
1100
+ # plain scalars in the flow context cannot contain ',', ':' and '?'.
1101
+ # We also keep track of the `allow_simple_key` flag here.
1102
+ # Indentation rules are loosed for the flow context.
1103
+ chunks = []
1104
+ start_mark = get_mark
1105
+ end_mark = start_mark
1106
+ indent = @indent+1
1107
+ # We allow zero indentation for scalars, but then we need to check for
1108
+ # document separators at the beginning of the line.
1109
+ #if indent == 0
1110
+ # indent = 1
1111
+ spaces = []
1112
+ while true
1113
+ length = 0
1114
+ break if peek == ?#
1115
+ while true
1116
+ ch = peek(length)
1117
+ if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1118
+ break
1119
+ end
1120
+ length += 1
1121
+ end
1122
+ break if length == 0
1123
+ @allow_simple_key = false
1124
+ chunks += spaces
1125
+ chunks << prefix(length)
1126
+ forward(length)
1127
+ end_mark = get_mark
1128
+ spaces = scan_plain_spaces(indent, start_mark)
1129
+ break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1130
+ end
1131
+ return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1132
+ end
1133
+
1134
+ def scan_plain_spaces(indent, start_mark)
1135
+ # See the specification for details.
1136
+ # The specification is really confusing about tabs in plain scalars.
1137
+ # We just forbid them completely. Do not use tabs in YAML!
1138
+ chunks = []
1139
+ length = 0
1140
+ length += 1 while peek(length) == 32
1141
+ whitespaces = prefix(length)
1142
+ forward(length)
1143
+ ch = peek
1144
+ if "\r\n\x85".include?(ch)
1145
+ line_break = scan_line_break
1146
+ @allow_simple_key = true
1147
+ prefix = prefix(3)
1148
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1149
+ breaks = []
1150
+ while " \r\n\x85".include?(peek)
1151
+ if peek == 32
1152
+ forward
1153
+ else
1154
+ breaks << scan_line_break
1155
+ prefix = prefix(3)
1156
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1157
+ end
1158
+ end
1159
+ if line_break != '\n'
1160
+ chunks << line_break
1161
+ elsif breaks.empty?
1162
+ chunks << ' '
1163
+ end
1164
+ chunks += breaks
1165
+ elsif !whitespaces.empty?
1166
+ chunks << whitespaces
1167
+ end
1168
+ chunks
1169
+ end
1170
+
1171
+ def scan_tag_handle(name, start_mark)
1172
+ # See the specification for details.
1173
+ # For some strange reasons, the specification does not allow '_' in
1174
+ # tag handles. I have allowed it anyway.
1175
+ ch = peek
1176
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1177
+ length = 1
1178
+ ch = peek(length)
1179
+ if ch != 32
1180
+ while /[-_0-9A-Za-z]/ =~ ch.chr
1181
+ length += 1
1182
+ ch = peek(length)
1183
+ end
1184
+ if ch != ?!
1185
+ forward(length)
1186
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
1187
+ end
1188
+ length += 1
1189
+ end
1190
+ value = prefix(length)
1191
+ forward(length)
1192
+ value
1193
+ end
1194
+
1195
+ def scan_tag_uri(name, start_mark)
1196
+ # See the specification for details.
1197
+ # Note: we do not check if URI is well-formed.
1198
+ chunks = []
1199
+ length = 0
1200
+ ch = peek(length)
1201
+ while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1202
+ if ch == ?%
1203
+ chunks << prefix(length)
1204
+ forward(length)
1205
+ length = 0
1206
+ chunks << scan_uri_escapes(name, start_mark)
1207
+ else
1208
+ length += 1
1209
+ end
1210
+ ch = peek(length)
1211
+ end
1212
+ if length!=0
1213
+ chunks << prefix(length)
1214
+ forward(length)
1215
+ length = 0
1216
+ end
1217
+
1218
+ raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1219
+ chunks.join('')
1220
+ end
1221
+
1222
+ def scan_uri_escapes(name, start_mark)
1223
+ # See the specification for details.
1224
+ bytes = []
1225
+ mark = get_mark
1226
+ while peek == ?%
1227
+ forward
1228
+ 2.times do |k|
1229
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1230
+ get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1231
+ end
1232
+ bytes << prefix(2).to_i.to_s(16)
1233
+ forward(2)
1234
+ end
1235
+ bytes.join('')
1236
+ end
1237
+
1238
+ def scan_line_break
1239
+ # Transforms:
1240
+ # '\r\n' : '\n'
1241
+ # '\r' : '\n'
1242
+ # '\n' : '\n'
1243
+ # '\x85' : '\n'
1244
+ # default : ''
1245
+ ch = peek
1246
+ if "\r\n\x85".include?(ch)
1247
+ if prefix(2) == "\r\n"
1248
+ forward(2)
1249
+ else
1250
+ forward
1251
+ end
1252
+ return "\n"
1253
+ end
1254
+ ""
1255
+ end
1256
+ end
1257
+ end
1258
+