RbYAML 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/LICENSE +19 -0
  2. data/README +31 -0
  3. data/lib/rbyaml.rb +378 -0
  4. data/lib/rbyaml/composer.rb +189 -0
  5. data/lib/rbyaml/constructor.rb +374 -0
  6. data/lib/rbyaml/detector.rb +44 -0
  7. data/lib/rbyaml/dumper.rb +40 -0
  8. data/lib/rbyaml/emitter.rb +1116 -0
  9. data/lib/rbyaml/error.rb +81 -0
  10. data/lib/rbyaml/events.rb +92 -0
  11. data/lib/rbyaml/loader.rb +49 -0
  12. data/lib/rbyaml/nodes.rb +69 -0
  13. data/lib/rbyaml/parser.rb +488 -0
  14. data/lib/rbyaml/reader.rb +127 -0
  15. data/lib/rbyaml/representer.rb +183 -0
  16. data/lib/rbyaml/scanner.rb +1258 -0
  17. data/lib/rbyaml/serializer.rb +120 -0
  18. data/lib/rbyaml/test.rb +56 -0
  19. data/lib/rbyaml/tokens.rb +163 -0
  20. data/lib/rbyaml/yaml.rb +143 -0
  21. data/test/test_rbyaml.rb +18 -0
  22. data/test/yaml/gems.yml +130951 -0
  23. data/test/yaml/gems2.yml +113 -0
  24. data/test/yaml/test1.yml +3 -0
  25. data/test/yaml/test10.yml +8 -0
  26. data/test/yaml/test12.yml +8 -0
  27. data/test/yaml/test13.yml +4 -0
  28. data/test/yaml/test14.yml +4 -0
  29. data/test/yaml/test15.yml +8 -0
  30. data/test/yaml/test16.yml +7 -0
  31. data/test/yaml/test18.yml +6 -0
  32. data/test/yaml/test19.yml +5 -0
  33. data/test/yaml/test2.yml +3 -0
  34. data/test/yaml/test20.yml +6 -0
  35. data/test/yaml/test21.yml +4 -0
  36. data/test/yaml/test22.yml +4 -0
  37. data/test/yaml/test23.yml +13 -0
  38. data/test/yaml/test24.yml +14 -0
  39. data/test/yaml/test25.yml +7 -0
  40. data/test/yaml/test26.yml +7 -0
  41. data/test/yaml/test27.yml +29 -0
  42. data/test/yaml/test28.yml +26 -0
  43. data/test/yaml/test29.yml +13 -0
  44. data/test/yaml/test3.yml +8 -0
  45. data/test/yaml/test30.yml +7 -0
  46. data/test/yaml/test31.yml +2 -0
  47. data/test/yaml/test32.yml +13 -0
  48. data/test/yaml/test33.yml +2 -0
  49. data/test/yaml/test34.yml +8 -0
  50. data/test/yaml/test35.yml +4 -0
  51. data/test/yaml/test36.yml +8 -0
  52. data/test/yaml/test37.yml +2 -0
  53. data/test/yaml/test38.yml +8 -0
  54. data/test/yaml/test39.yml +2 -0
  55. data/test/yaml/test4.yml +8 -0
  56. data/test/yaml/test40.yml +3 -0
  57. data/test/yaml/test41.yml +5 -0
  58. data/test/yaml/test42.yml +12 -0
  59. data/test/yaml/test43.yml +15 -0
  60. data/test/yaml/test44.yml +23 -0
  61. data/test/yaml/test5.yml +3 -0
  62. data/test/yaml/test6.yml +5 -0
  63. data/test/yaml/test7.yml +10 -0
  64. data/test/yaml/test8.yml +10 -0
  65. data/test/yaml/test9.yml +8 -0
  66. metadata +111 -0
@@ -0,0 +1,127 @@
1
+ # This is a more or less straight translation of PyYAML3000 to Ruby
2
+
3
+ # the big difference in this implementation is that unicode support is not here...
4
+
5
+ require 'rbyaml/error'
6
+
7
+ module RbYAML
8
+
9
+ # Reader:
10
+ # - checks if characters are in allowed range,
11
+ # - adds '\0' to the end.
12
+ # Reader accepts
13
+ # - a String object
14
+ # - a duck-typed IO object
15
+ module Reader
16
+ def initialize_reader(stream)
17
+ @stream = nil
18
+ @stream_pointer = 0
19
+ @eof = true
20
+ @buffer = ""
21
+ @pointer = 0
22
+ @index = 0
23
+ @line = 0
24
+ @column = 0
25
+ if String === stream
26
+ @name = "<string>"
27
+ @raw_buffer = stream
28
+ else
29
+ @stream = stream
30
+ @name = stream.respond_to?(:path) ? stream.path : stream.inspect
31
+ @eof = false
32
+ @raw_buffer = ""
33
+ end
34
+ end
35
+
36
+ def peek(index=0)
37
+ update(index+1) if @pointer+index+1 >= @buffer.length
38
+ @buffer[@pointer+index]
39
+ end
40
+
41
+ def prefix(length=1)
42
+ update(length) if @pointer+length >= @buffer.length
43
+ @buffer[@pointer...@pointer+length]
44
+ end
45
+
46
+ def forward(length=1)
47
+ update(length+1) if @pointer+length+1 >= @buffer.length
48
+ length.times { |k|
49
+ ch = @buffer[@pointer]
50
+ @pointer += 1
51
+ @index += 1
52
+ if "\n\x85".include?(ch) || (ch == ?\r && @buffer[@pointer+1] != ?\n)
53
+ @line += 1
54
+ @column = 0
55
+ else
56
+ @column += 1
57
+ end
58
+ }
59
+ end
60
+
61
+ def get_mark
62
+ if @stream.nil?
63
+ Mark.new(@name,@index,@line,@column,@buffer,@pointer)
64
+ else
65
+ Mark.new(@name,@index,@line,@column,nil,nil)
66
+ end
67
+ end
68
+
69
+ NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
70
+ def check_printable(data)
71
+ if NON_PRINTABLE =~ data
72
+ position = @index+@buffer.length-@pointer+($~.offset(0)[0])
73
+ raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
74
+ end
75
+ end
76
+
77
+ def update(length)
78
+ return if @raw_buffer.nil?
79
+ @buffer = @buffer[@pointer..-1]
80
+ @pointer = 0
81
+ while @buffer.length < length
82
+ unless @eof
83
+ update_raw
84
+ end
85
+ data = @raw_buffer
86
+ converted = data.length
87
+ check_printable(data)
88
+ @buffer << data
89
+ @raw_buffer = @raw_buffer[converted..-1]
90
+ if @eof
91
+ @buffer << ?\0
92
+ @raw_buffer = nil
93
+ break
94
+ end
95
+ end
96
+ end
97
+
98
+ def update_raw(size=1024)
99
+ data = @stream.read(size)
100
+ if data && !data.empty?
101
+ @raw_buffer << data
102
+ @stream_pointer += data.length
103
+ else
104
+ @eof = true
105
+ end
106
+ end
107
+ end
108
+
109
+ class ReaderError < YAMLError
110
+ def initialize(name, position, character, encoding, reason)
111
+ @name = name
112
+ @position = position
113
+ @character = character
114
+ @encoding = encoding
115
+ @reason = reason
116
+ end
117
+
118
+ def to_s
119
+ if String === @character
120
+ "'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
121
+ else
122
+ "unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
123
+ end
124
+ end
125
+ end
126
+ end
127
+
@@ -0,0 +1,183 @@
1
+
2
+ require 'set'
3
+
4
+ require 'rbyaml/error'
5
+ require 'rbyaml/nodes'
6
+ require 'rbyaml/detector'
7
+
8
+ module RbYAML
9
+ class RepresenterError < YAMLError
10
+ end
11
+
12
+ module BaseRepresenter
13
+ @@yaml_representers = {}
14
+
15
+ def initialize_representer
16
+ @represented_objects = {}
17
+ end
18
+
19
+ def represent(data)
20
+ node = represent_object(data)
21
+ serialize(node)
22
+ represented_objects = {}
23
+ end
24
+
25
+ def represent_object(data)
26
+ if ignore_aliases(data)
27
+ alias_key = nil
28
+ else
29
+ alias_key = data.object_id
30
+ end
31
+
32
+ if !alias_key.nil?
33
+ if @represented_objects.include?(alias_key)
34
+ node = @represented_objects[alias_key]
35
+ raise RepresenterError.new("recursive objects are not allowed: #{data}") if node.nil?
36
+ return node
37
+ end
38
+ @represented_objects[alias_key] = nil
39
+ end
40
+
41
+ rerun = false
42
+
43
+ for data_type in data.class.ancestors
44
+ rerun = true
45
+ if @@yaml_representers.include?(data_type)
46
+ node = send(@@yaml_representers[data_type],data)
47
+ break
48
+ end
49
+ end
50
+ if !rerun
51
+ if @@yaml_representers.include?(nil)
52
+ node = send(@@yaml_representers[nil], data)
53
+ else
54
+ node = ScalarNode.new(data.taguri, data)
55
+ end
56
+ end
57
+ @represented_objects[alias_key] = node if !alias_key.nil?
58
+ node
59
+ end
60
+
61
+ def self.add_representer(data_type, representer)
62
+ @@yaml_representers[data_type] = representer
63
+ end
64
+
65
+ def represent_scalar(tag, value, style=nil)
66
+ ScalarNode.new(tag, value, style)
67
+ end
68
+
69
+ def represent_sequence(tag, sequence, flow_style=nil)
70
+ value = sequence.map {|item| represent_object(item)}
71
+ SequenceNode.new(tag, value, flow_style)
72
+ end
73
+
74
+ def represent_mapping(tag, mapping, flow_style=nil)
75
+ value = {}
76
+ mapping.each { |item_key,item_value| value[represent_object(item_key)] = represent_object(item_value) }
77
+ MappingNode.new(tag, value, flow_style)
78
+ end
79
+
80
+ def ignore_aliases(data)
81
+ false
82
+ end
83
+ end
84
+
85
+ module SafeRepresenter
86
+ include BaseRepresenter
87
+
88
+ def ignore_aliases(data)
89
+ data.nil? || String === data || TrueClass === data || FalseClass === data || Integer === data || Float === data
90
+ end
91
+
92
+ def represent_none(data)
93
+ represent_scalar(data.taguri,"null")
94
+ end
95
+
96
+ def represent_str(data)
97
+ represent_scalar(data.taguri,data)
98
+ end
99
+
100
+ def represent_symbol(data)
101
+ represent_scalar(data.taguri,data.to_s)
102
+ end
103
+
104
+ def represent_bool(data)
105
+ value = data ? "true" : "false"
106
+ represent_scalar(data.taguri,value)
107
+ end
108
+
109
+ def represent_int(data)
110
+ represent_scalar(data.taguri,data.to_s)
111
+ end
112
+
113
+ def represent_float(data)
114
+ if data.infinite? == 1
115
+ value = ".inf"
116
+ elsif data.infinite? == -1
117
+ value = "-.inf"
118
+ elsif data.nan? || data != data
119
+ value = ".nan"
120
+ else
121
+ value = data.to_s
122
+ end
123
+ represent_scalar(data.taguri, value)
124
+ end
125
+
126
+ def represent_list(data)
127
+ represent_sequence(data.taguri, data)
128
+ end
129
+
130
+ def represent_dict(data)
131
+ represent_mapping(data.taguri, data)
132
+ end
133
+
134
+ def represent_set(data)
135
+ value = {}
136
+ for key in data
137
+ value[key] = nil
138
+ end
139
+ represent_mapping(data.taguri, value)
140
+ end
141
+
142
+ def represent_datetime(data)
143
+ value = "%04d-%02d-%02d %02d:%02d:%02d" % [data.year, data.month, data.day, data.hour, data.min, data.sec]
144
+ if data.usec != 0
145
+ value += "." + (data.usec/1000000.0).to_s.split(/\./)[1]
146
+ end
147
+ if data.utc_offset != 0
148
+ value += data.utc_offset.to_s
149
+ end
150
+ represent_scalar(data.taguri, value)
151
+ end
152
+
153
+ def represent_yaml_object(tag, data, flow_style=nil)
154
+ state = data.to_yaml_properties
155
+ mapping = {}
156
+ state.each do |m|
157
+ map[m[1..-1]] = data.instance_variable_get(m)
158
+ end
159
+ represent_mapping(tag, mapping, flow_style)
160
+ end
161
+
162
+ def represent_undefined(data)
163
+ raise RepresenterError.new("cannot represent an object: #{data}")
164
+ end
165
+ end
166
+
167
+ BaseRepresenter.add_representer(NilClass,:represent_none)
168
+ BaseRepresenter.add_representer(String,:represent_str)
169
+ BaseRepresenter.add_representer(Symbol,:represent_symbol)
170
+ BaseRepresenter.add_representer(TrueClass,:represent_bool)
171
+ BaseRepresenter.add_representer(FalseClass,:represent_bool)
172
+ BaseRepresenter.add_representer(Integer,:represent_int)
173
+ BaseRepresenter.add_representer(Float,:represent_float)
174
+ BaseRepresenter.add_representer(Array,:represent_list)
175
+ BaseRepresenter.add_representer(Hash,:represent_dict)
176
+ BaseRepresenter.add_representer(Set,:represent_set)
177
+ BaseRepresenter.add_representer(Time,:represent_datetime)
178
+ BaseRepresenter.add_representer(nil,:represent_undefined)
179
+
180
+ module Representer
181
+ include SafeRepresenter
182
+ end
183
+ end
@@ -0,0 +1,1258 @@
1
+ # Scanner produces tokens of the following types:
2
+ # STREAM-START
3
+ # STREAM-END
4
+ # DIRECTIVE(name, value)
5
+ # DOCUMENT-START
6
+ # DOCUMENT-END
7
+ # BLOCK-SEQUENCE-START
8
+ # BLOCK-MAPPING-START
9
+ # BLOCK-END
10
+ # FLOW-SEQUENCE-START
11
+ # FLOW-MAPPING-START
12
+ # FLOW-SEQUENCE-END
13
+ # FLOW-MAPPING-END
14
+ # BLOCK-ENTRY
15
+ # FLOW-ENTRY
16
+ # KEY
17
+ # VALUE
18
+ # ALIAS(value)
19
+ # ANCHOR(value)
20
+ # TAG(value)
21
+ # SCALAR(value, plain)
22
+ #
23
+ # Read comments in the Scanner code for more details.
24
+ #
25
+
26
+ require 'rbyaml/error'
27
+ require 'rbyaml/tokens'
28
+
29
+ module RbYAML
30
+ class ScannerError < MarkedYAMLError
31
+ end
32
+
33
+ class SimpleKey
34
+ attr_reader :token_number, :required, :index, :line, :column, :mark
35
+
36
+ def initialize(token_number,required,index,line,column,mark)
37
+ @token_number = token_number
38
+ @required = required
39
+ @index = index
40
+ @line = line
41
+ @column = column
42
+ @mark = mark
43
+ end
44
+ end
45
+
46
+ module Scanner
47
+ def initialize_scanner
48
+ # It is assumed that Scanner and Reader will mixin to the same point.
49
+ # Reader do the dirty work of checking for BOM. It also adds NUL to the end.
50
+ #
51
+ # Reader supports the following methods
52
+ # self.peek(i=0) # peek the next i-th character
53
+ # self.prefix(l=1) # peek the next l characters
54
+ # self.forward(l=1) # read the next l characters and move the pointer.
55
+
56
+ # Had we reached the end of the stream?
57
+ @done = false
58
+
59
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
60
+ # context.
61
+ @flow_level = 0
62
+
63
+ # List of processed tokens that are not yet emitted.
64
+ @tokens = []
65
+
66
+ # Add the STREAM-START token.
67
+ fetch_stream_start
68
+
69
+ # Number of tokens that were emitted through the `get_token` method.
70
+ @tokens_taken = 0
71
+
72
+ # The current indentation level.
73
+ @indent = -1
74
+
75
+ # Past indentation levels.
76
+ @indents = []
77
+
78
+ # Variables related to simple keys treatment.
79
+
80
+ # A simple key is a key that is not denoted by the '?' indicator.
81
+ # Example of simple keys:
82
+ # ---
83
+ # block simple key: value
84
+ # ? not a simple key:
85
+ # : { flow simple key: value }
86
+ # We emit the KEY token before all keys, so when we find a potential
87
+ # simple key, we try to locate the corresponding ':' indicator.
88
+ # Simple keys should be limited to a single line and 1024 characters.
89
+
90
+ # Can a simple key start at the current position? A simple key may
91
+ # start:
92
+ # - at the beginning of the line, not counting indentation spaces
93
+ # (in block context),
94
+ # - after '{', '[', ',' (in the flow context),
95
+ # - after '?', ':', '-' (in the block context).
96
+ # In the block context, this flag also signifies if a block collection
97
+ # may start at the current position.
98
+ @allow_simple_key = true
99
+
100
+ # Keep track of possible simple keys. This is a dictionary. The key
101
+ # is `flow_level`; there can be no more that one possible simple key
102
+ # for each level. The value is a SimpleKey record:
103
+ # (token_number, required, index, line, column, mark)
104
+ # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
105
+ # '[', or '{' tokens.
106
+ @possible_simple_keys = {}
107
+ end
108
+
109
+ def check_token(*choices)
110
+ # Check if the next token is one of the given types.
111
+ fetch_more_tokens while need_more_tokens
112
+ unless @tokens.empty?
113
+ return true if choices.empty?
114
+ for choice in choices
115
+ return true if choice === @tokens[0]
116
+ end
117
+ end
118
+ return false
119
+ end
120
+
121
+ def peek_token
122
+ # Return the next token, but do not delete if from the queue.
123
+ fetch_more_tokens while need_more_tokens
124
+ return @tokens[0] unless @tokens.empty?
125
+ end
126
+
127
+ def get_token
128
+ # Return the next token.
129
+ fetch_more_tokens while need_more_tokens
130
+ unless @tokens.empty?
131
+ @tokens_taken += 1
132
+ @tokens.shift
133
+ end
134
+ end
135
+
136
+ def each_token
137
+ fetch_more_tokens while need_more_tokens
138
+ while !@tokens.empty?
139
+ @tokens_taken += 1
140
+ yield @tokens.shift
141
+ fetch_more_tokens while need_more_tokens
142
+ end
143
+ end
144
+
145
+ def need_more_tokens
146
+ return false if @done
147
+ return true if @tokens.empty?
148
+ # The current token may be a potential simple key, so we
149
+ # need to look further.
150
+ stale_possible_simple_keys
151
+ return true if next_possible_simple_key == @tokens_taken
152
+ end
153
+
154
+ def fetch_more_tokens
155
+ # Eat whitespaces and comments until we reach the next token.
156
+ scan_to_next_token
157
+
158
+ # Remove obsolete possible simple keys.
159
+ stale_possible_simple_keys
160
+
161
+ # Compare the current indentation and column. It may add some tokens
162
+ # and decrease the current indentation level.
163
+ unwind_indent(@column)
164
+
165
+ # Peek the next character.
166
+ ch = peek
167
+
168
+ return case
169
+ # Is it the end of stream?
170
+ when ch == ?\0: fetch_stream_end
171
+ # Is it a directive?
172
+ when ch == ?% && check_directive: fetch_directive
173
+ # Is it the document start?
174
+ when ch == ?- && check_document_start: fetch_document_start
175
+ # Is it the document end?
176
+ when ch == ?. && check_document_end: fetch_document_end
177
+ # Is it the flow sequence start indicator?
178
+ when ch == ?[: fetch_flow_sequence_start
179
+ # Is it the flow mapping start indicator?
180
+ when ch == ?{: fetch_flow_mapping_start
181
+ # Is it the flow sequence end indicator?
182
+ when ch == ?]: fetch_flow_sequence_end
183
+ # Is it the flow mapping end indicator?
184
+ when ch == ?}: fetch_flow_mapping_end
185
+ # Is it the flow entry indicator?
186
+ when ch == ?,: fetch_flow_entry
187
+ # Is it the block entry indicator?
188
+ when ch == ?- && check_block_entry: fetch_block_entry
189
+ # Is it the key indicator?
190
+ when ch == ?? && check_key: fetch_key
191
+ # Is it the value indicator?
192
+ when ch == ?: && check_value: fetch_value
193
+ # Is it an alias?
194
+ when ch == ?*: fetch_alias
195
+ # Is it an anchor?
196
+ when ch == ?&: fetch_anchor
197
+ # Is it a tag?
198
+ when ch == ?!: fetch_tag
199
+ # Is it a literal scalar?
200
+ when ch == ?| && @flow_level==0: fetch_literal
201
+ # Is it a folded scalar?
202
+ when ch == ?> && @flow_level==0: fetch_folded
203
+ # Is it a single quoted scalar?
204
+ when ch == ?': fetch_single
205
+ # Is it a double quoted scalar?
206
+ when ch == ?": fetch_double
207
+ # It must be a plain scalar then.
208
+ when check_plain: fetch_plain
209
+ else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
210
+ end
211
+ end
212
+
213
+ # Simple keys treatment.
214
+
215
+ def next_possible_simple_key
216
+ # Return the number of the nearest possible simple key. Actually we
217
+ # don't need to loop through the whole dictionary.
218
+ min_token_number = nil
219
+ for level in @possible_simple_keys.keys
220
+ key = @possible_simple_keys[level]
221
+ if min_token_number.nil? || key.token_number < min_token_number
222
+ min_token_number = key.token_number
223
+ end
224
+ end
225
+ min_token_number
226
+ end
227
+
228
+ def stale_possible_simple_keys
229
+ # Remove entries that are no longer possible simple keys. According to
230
+ # the YAML specification, simple keys
231
+ # - should be limited to a single line,
232
+ # - should be no longer than 1024 characters.
233
+ # Disabling this procedure will allow simple keys of any length and
234
+ # height (may cause problems if indentation is broken though).
235
+ @possible_simple_keys.delete_if {|level,key|
236
+ if key.line != @line || @index-key.index > 1024
237
+ raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
238
+ return true
239
+ end
240
+ return false
241
+ }
242
+ end
243
+
244
+ def save_possible_simple_key
245
+ # The next token may start a simple key. We check if it's possible
246
+ # and save its position. This function is called for
247
+ # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
248
+
249
+ # Check if a simple key is required at the current position.
250
+ required = @flow_level==0 && @indent == @column
251
+
252
+ # The next token might be a simple key. Let's save it's number and
253
+ # position.
254
+ if @allow_simple_key
255
+ remove_possible_simple_key
256
+ token_number = @tokens_taken+@tokens.length
257
+ key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
258
+ @possible_simple_keys[@flow_level] = key
259
+ end
260
+ end
261
+
262
+ def remove_possible_simple_key
263
+ # Remove the saved possible key position at the current flow level.
264
+ key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
265
+ end
266
+
267
+ # Indentation functions.
268
+
269
+ def unwind_indent(column)
270
+ ## In flow context, tokens should respect indentation.
271
+ ## Actually the condition should be `@indent >= column` according to
272
+ ## the spec. But this condition will prohibit intuitively correct
273
+ ## constructions such as
274
+ ## key : {
275
+ ## }
276
+ #if @flow_level and @indent > column
277
+ # raise ScannerError(nil, nil,
278
+ # "invalid intendation or unclosed '[' or '{'",
279
+ # get_mark)
280
+
281
+ # In the flow context, indentation is ignored. We make the scanner less
282
+ # restrictive then specification requires.
283
+ return nil if @flow_level != 0
284
+ # In block context, we may need to issue the BLOCK-END tokens.
285
+ while @indent > column
286
+ mark = get_mark
287
+ @indent = @indents.pop()
288
+ @tokens << BlockEndToken.new(mark, mark)
289
+ end
290
+ end
291
+
292
+ def add_indent(column)
293
+ # Check if we need to increase indentation.
294
+ if @indent < column
295
+ @indents << @indent
296
+ @indent = column
297
+ return true
298
+ end
299
+ return false
300
+ end
301
+
302
+ # Fetchers.
303
+
304
+ def fetch_stream_start
305
+ # We always add STREAM-START as the first token and STREAM-END as the
306
+ # last token.
307
+ # Read the token.
308
+ mark = get_mark
309
+ # Add STREAM-START.
310
+ @tokens << StreamStartToken.new(mark, mark, @encoding)
311
+ end
312
+
313
+
314
+ def fetch_stream_end
315
+ # Set the current intendation to -1.
316
+ unwind_indent(-1)
317
+ # Reset everything (not really needed).
318
+ @allow_simple_key = false
319
+ @possible_simple_keys = {}
320
+ # Read the token.
321
+ mark = get_mark
322
+ # Add STREAM-END.
323
+ @tokens << StreamEndToken.new(mark, mark)
324
+ # The stream is finished.
325
+ @done = true
326
+ end
327
+
328
+ def fetch_directive
329
+ # Set the current intendation to -1.
330
+ unwind_indent(-1)
331
+ # Reset simple keys.
332
+ remove_possible_simple_key
333
+ @allow_simple_key = false
334
+ # Scan and add DIRECTIVE.
335
+ @tokens << scan_directive
336
+ end
337
+
338
+ def fetch_document_start
339
+ fetch_document_indicator(DocumentStartToken)
340
+ end
341
+
342
+ def fetch_document_end
343
+ fetch_document_indicator(DocumentEndToken)
344
+ end
345
+
346
+ def fetch_document_indicator(token)
347
+ # Set the current intendation to -1.
348
+ unwind_indent(-1)
349
+ # Reset simple keys. Note that there could not be a block collection
350
+ # after '---'.
351
+ remove_possible_simple_key
352
+ @allow_simple_key = false
353
+ # Add DOCUMENT-START or DOCUMENT-END.
354
+ start_mark = get_mark
355
+ forward(3)
356
+ end_mark = get_mark
357
+ @tokens << token.new(start_mark, end_mark)
358
+ end
359
+
360
+ def fetch_flow_sequence_start
361
+ fetch_flow_collection_start(FlowSequenceStartToken)
362
+ end
363
+
364
+ def fetch_flow_mapping_start
365
+ fetch_flow_collection_start(FlowMappingStartToken)
366
+ end
367
+
368
+ def fetch_flow_collection_start(token)
369
+ # '[' and '{' may start a simple key.
370
+ save_possible_simple_key
371
+ # Increase the flow level.
372
+ @flow_level += 1
373
+ # Simple keys are allowed after '[' and '{'.
374
+ @allow_simple_key = true
375
+ # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
376
+ start_mark = get_mark
377
+ forward
378
+ end_mark = get_mark
379
+ @tokens << token.new(start_mark, end_mark)
380
+ end
381
+
382
+ def fetch_flow_sequence_end
383
+ fetch_flow_collection_end(FlowSequenceEndToken)
384
+ end
385
+
386
+ def fetch_flow_mapping_end
387
+ fetch_flow_collection_end(FlowMappingEndToken)
388
+ end
389
+
390
+ def fetch_flow_collection_end(token)
391
+ # Reset possible simple key on the current level.
392
+ remove_possible_simple_key
393
+ # Decrease the flow level.
394
+ @flow_level -= 1
395
+ # No simple keys after ']' or '}'.
396
+ @allow_simple_key = false
397
+ # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
398
+ start_mark = get_mark
399
+ forward
400
+ end_mark = get_mark
401
+ @tokens << token.new(start_mark, end_mark)
402
+ end
403
+
404
+ def fetch_flow_entry
405
+ # Simple keys are allowed after ','.
406
+ @allow_simple_key = true
407
+ # Reset possible simple key on the current level.
408
+ remove_possible_simple_key
409
+ # Add FLOW-ENTRY.
410
+ start_mark = get_mark
411
+ forward
412
+ end_mark = get_mark
413
+ @tokens << FlowEntryToken.new(start_mark, end_mark)
414
+ end
415
+
416
+ def fetch_block_entry
417
+ # Block context needs additional checks.
418
+ if @flow_level==0
419
+ raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
420
+ # We may need to add BLOCK-SEQUENCE-START.
421
+ if add_indent(@column)
422
+ mark = get_mark
423
+ @tokens << BlockSequenceStartToken.new(mark, mark)
424
+ end
425
+ # It's an error for the block entry to occur in the flow context,
426
+ # but we let the parser detect this.
427
+ end
428
+ # Simple keys are allowed after '-'.
429
+ @allow_simple_key = true
430
+ # Reset possible simple key on the current level.
431
+ remove_possible_simple_key
432
+ # Add BLOCK-ENTRY.
433
+ start_mark = get_mark
434
+ forward
435
+ end_mark = get_mark
436
+ @tokens << BlockEntryToken.new(start_mark, end_mark)
437
+ end
438
+
439
+ def fetch_key
440
+ # Block context needs additional checks.
441
+ if @flow_level==0
442
+ # Are we allowed to start a key (not nessesary a simple)?
443
+ raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
444
+ # We may need to add BLOCK-MAPPING-START.
445
+ if add_indent(@column)
446
+ mark = get_mark
447
+ @tokens << BlockMappingStartToken.new(mark, mark)
448
+ end
449
+ end
450
+ # Simple keys are allowed after '?' in the block context.
451
+ @allow_simple_key = @flow_level==0
452
+ # Reset possible simple key on the current level.
453
+ remove_possible_simple_key
454
+ # Add KEY.
455
+ start_mark = get_mark
456
+ forward
457
+ end_mark = get_mark
458
+ @tokens << KeyToken.new(start_mark, end_mark)
459
+ end
460
+
461
+ def fetch_value
462
+ # Do we determine a simple key?
463
+ if @possible_simple_keys.include?(@flow_level)
464
+ # Add KEY.
465
+ key = @possible_simple_keys[@flow_level]
466
+ @possible_simple_keys.delete(@flow_level)
467
+ @tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
468
+ # If this key starts a new block mapping, we need to add
469
+ # BLOCK-MAPPING-START.
470
+ @tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
471
+ # There cannot be two simple keys one after another.
472
+ @allow_simple_key = false
473
+ # It must be a part of a complex key.
474
+ else
475
+ # Block context needs additional checks.
476
+ # (Do we really need them? They will be catched by the parser
477
+ # anyway.)
478
+ if @flow_level==0
479
+ # We are allowed to start a complex value if and only if
480
+ # we can start a simple key.
481
+ raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
482
+ # Simple keys are allowed after ':' in the block context.
483
+ @allow_simple_key = @flow_level==0
484
+ # Reset possible simple key on the current level.
485
+ remove_possible_simple_key
486
+ end
487
+ end
488
+ # Add VALUE.
489
+ start_mark = get_mark
490
+ forward
491
+ end_mark = get_mark
492
+ @tokens << ValueToken.new(start_mark, end_mark)
493
+ end
494
+
495
+ def fetch_alias
496
+ # ALIAS could be a simple key.
497
+ save_possible_simple_key
498
+ # No simple keys after ALIAS.
499
+ @allow_simple_key = false
500
+ # Scan and add ALIAS.
501
+ @tokens << scan_anchor(AliasToken)
502
+ end
503
+
504
+ def fetch_anchor
505
+ # ANCHOR could start a simple key.
506
+ save_possible_simple_key
507
+ # No simple keys after ANCHOR.
508
+ @allow_simple_key = false
509
+ # Scan and add ANCHOR.
510
+ @tokens << scan_anchor(AnchorToken)
511
+ end
512
+
513
+ def fetch_tag
514
+ # TAG could start a simple key.
515
+ save_possible_simple_key
516
+ # No simple keys after TAG.
517
+ @allow_simple_key = false
518
+ # Scan and add TAG.
519
+ @tokens << scan_tag
520
+ end
521
+
522
+ def fetch_literal
523
+ fetch_block_scalar(?|)
524
+ end
525
+
526
+ def fetch_folded
527
+ fetch_block_scalar(?>)
528
+ end
529
+
530
+ def fetch_block_scalar(style)
531
+ # A simple key may follow a block scalar.
532
+ @allow_simple_key = true
533
+ # Reset possible simple key on the current level.
534
+ remove_possible_simple_key
535
+ # Scan and add SCALAR.
536
+ @tokens << scan_block_scalar(style)
537
+ end
538
+
539
+ def fetch_single
540
+ fetch_flow_scalar(?')
541
+ end
542
+
543
+ def fetch_double
544
+ fetch_flow_scalar(?")
545
+ end
546
+
547
+ def fetch_flow_scalar(style)
548
+ # A flow scalar could be a simple key.
549
+ save_possible_simple_key
550
+ # No simple keys after flow scalars.
551
+ @allow_simple_key = false
552
+ # Scan and add SCALAR.
553
+ @tokens << scan_flow_scalar(style)
554
+ end
555
+
556
+ def fetch_plain
557
+ # A plain scalar could be a simple key.
558
+ save_possible_simple_key
559
+ # No simple keys after plain scalars. But note that `scan_plain` will
560
+ # change this flag if the scan is finished at the beginning of the
561
+ # line.
562
+ @allow_simple_key = false
563
+ # Scan and add SCALAR. May change `allow_simple_key`.
564
+ @tokens << scan_plain
565
+ end
566
+
567
+ # Checkers.
568
+
569
+ def check_directive
570
+ # DIRECTIVE: ^ '%' ...
571
+ # The '%' indicator is already checked.
572
+ @column == 0
573
+ end
574
+
575
+ def check_document_start
576
+ # DOCUMENT-START: ^ '---' (' '|'\n')
577
+ @column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
578
+ end
579
+
580
+ def check_document_end
581
+ # DOCUMENT-END: ^ '...' (' '|'\n')
582
+ if @column == 0
583
+ prefix = peek(4)
584
+ return true if prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
585
+ end
586
+ end
587
+
588
+ def check_block_entry
589
+ # BLOCK-ENTRY: '-' (' '|'\n')
590
+ "\0 \t\r\n\x85".include?(peek(1))
591
+ end
592
+
593
+ def check_key
594
+ # KEY(flow context): '?'
595
+ # KEY(block context): '?' (' '|'\n')
596
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
597
+ end
598
+
599
+ def check_value
600
+ # VALUE(flow context): ':'
601
+ # VALUE(block context): ':' (' '|'\n')
602
+ @flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
603
+ end
604
+
605
+ def check_plain
606
+ # A plain scalar may start with any non-space character except:
607
+ # '-', '?', ':', ',', '[', ']', '{', '}',
608
+ # '#', '&', '*', '!', '|', '>', '\'', '\"',
609
+ # '%', '@', '`'.
610
+ #
611
+ # It may also start with
612
+ # '-', '?', ':'
613
+ # if it is followed by a non-space character.
614
+ #
615
+ # Note that we limit the last rule to the block context (except the
616
+ # '-' character) because we want the flow context to be space
617
+ # independent.
618
+ ch = peek
619
+ !("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
620
+ end
621
+
622
+
623
+
624
+
625
+
626
+
627
+ # Scanners.
628
+
629
+ def scan_to_next_token
630
+ # We ignore spaces, line breaks and comments.
631
+ # If we find a line break in the block context, we set the flag
632
+ # `allow_simple_key` on.
633
+ #
634
+ # TODO: We need to make tab handling rules more sane. A good rule is
635
+ # Tabs cannot precede tokens
636
+ # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
637
+ # KEY(block), VALUE(block), BLOCK-ENTRY
638
+ # So the checking code is
639
+ # if <TAB>:
640
+ # @allow_simple_keys = false
641
+ # We also need to add the check for `allow_simple_keys == true` to
642
+ # `unwind_indent` before issuing BLOCK-END.
643
+ # Scanners for block, flow, and plain scalars need to be modified.
644
+ found = false
645
+ while !found
646
+ while peek == 32
647
+ forward
648
+ end
649
+ if peek == ?#
650
+ forward while !"\0\r\n\x85".include?(peek)
651
+ end
652
+ if !scan_line_break.empty?
653
+ @allow_simple_key = true if @flow_level==0
654
+ else
655
+ found = true
656
+ end
657
+ end
658
+ end
659
+
660
+ def scan_directive
661
+ # See the specification for details.
662
+ start_mark = get_mark
663
+ forward
664
+ name = scan_directive_name(start_mark)
665
+ value = nil
666
+ if name == "YAML"
667
+ value = scan_yaml_directive_value(start_mark)
668
+ end_mark = get_mark
669
+ elsif name == "TAG"
670
+ value = scan_tag_directive_value(start_mark)
671
+ end_mark = get_mark
672
+ else
673
+ end_mark = get_mark
674
+ forward while !"\0\r\n\x85".include?(peek)
675
+ end
676
+ scan_directive_ignored_line(start_mark)
677
+ DirectiveToken.new(name, value, start_mark, end_mark)
678
+ end
679
+
680
+ def scan_directive_name(start_mark)
681
+ # See the specification for details.
682
+ length = 0
683
+ ch = peek(length)
684
+ while /[-0-9A-Za-z_]/ =~ ch.chr
685
+ length += 1
686
+ ch = peek(length)
687
+ end
688
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
689
+ value = prefix(length)
690
+ forward(length)
691
+ ch = peek()
692
+ raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
693
+ value
694
+ end
695
+
696
+ def scan_yaml_directive_value(start_mark)
697
+ # See the specification for details.
698
+ forward while peek == 32
699
+ major = scan_yaml_directive_number(start_mark)
700
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
701
+ forward
702
+ minor = scan_yaml_directive_number(start_mark)
703
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
704
+ [major, minor]
705
+ end
706
+
707
+ def scan_yaml_directive_number(start_mark)
708
+ # See the specification for details.
709
+ ch = peek
710
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
711
+ length = 0
712
+ length += 1 while ((?0..?9) === peek(length))
713
+ value = prefix(length)
714
+ forward(length)
715
+ value
716
+ end
717
+
718
+ def scan_tag_directive_value(start_mark)
719
+ # See the specification for details.
720
+ forward while peek == 32
721
+ handle = scan_tag_directive_handle(start_mark)
722
+ forward while peek == 32
723
+ prefix = scan_tag_directive_prefix(start_mark)
724
+ [handle, prefix]
725
+ end
726
+
727
+ def scan_tag_directive_handle(start_mark)
728
+ # See the specification for details.
729
+ value = scan_tag_handle("directive", start_mark)
730
+ ch = peek
731
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
732
+ value
733
+ end
734
+
735
+ def scan_tag_directive_prefix(start_mark)
736
+ # See the specification for details.
737
+ value = scan_tag_uri("directive", start_mark)
738
+ ch = peek
739
+ raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
740
+ value
741
+ end
742
+
743
+ def scan_directive_ignored_line(start_mark)
744
+ # See the specification for details.
745
+ forward while peek == 32
746
+ if peek == ?#
747
+ forward while !"\0\r\n\x85".include?(peek)
748
+ end
749
+ ch = peek
750
+ raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
751
+ scan_line_break
752
+ end
753
+
754
+ def scan_anchor(token)
755
+ # The specification does not restrict characters for anchors and
756
+ # aliases. This may lead to problems, for instance, the document:
757
+ # [ *alias, value ]
758
+ # can be interpteted in two ways, as
759
+ # [ "value" ]
760
+ # and
761
+ # [ *alias , "value" ]
762
+ # Therefore we restrict aliases to numbers and ASCII letters.
763
+ start_mark = get_mark
764
+ indicator = peek
765
+ name = (indicator == ?*) ? "alias":"anchor"
766
+ forward
767
+ length = 0
768
+ ch = peek(length)
769
+ while /[-0-9A-Za-z_]/ =~ ch.chr
770
+ length += 1
771
+ ch = peek(length)
772
+ end
773
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
774
+ value = prefix(length)
775
+ forward(length)
776
+ ch = peek
777
+ if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
778
+ raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
779
+ end
780
+ end_mark = get_mark
781
+ token.new(value, start_mark, end_mark)
782
+ end
783
+
784
+
785
+ def scan_tag
786
+ # See the specification for details.
787
+ start_mark = get_mark
788
+ ch = peek(1)
789
+ if ch == ?<
790
+ handle = nil
791
+ forward(2)
792
+ suffix = scan_tag_uri("tag", start_mark)
793
+ raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
794
+ forward
795
+ elsif "\0 \t\r\n\x85".include?(ch)
796
+ handle = nil
797
+ suffix = "!"
798
+ forward
799
+ else
800
+ length = 1
801
+ use_handle = false
802
+ while !"\0 \t\r\n\x85".include?(ch)
803
+ if ch == ?!
804
+ use_handle = true
805
+ break
806
+ end
807
+ length += 1
808
+ ch = peek(length)
809
+ end
810
+ handle = "!"
811
+ if use_handle
812
+ handle = scan_tag_handle("tag", start_mark)
813
+ else
814
+ handle = "!"
815
+ forward
816
+ end
817
+ suffix = scan_tag_uri("tag", start_mark)
818
+ end
819
+ ch = peek
820
+ raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
821
+ value = [handle, suffix]
822
+ end_mark = get_mark
823
+ TagToken.new(value, start_mark, end_mark)
824
+ end
825
+
826
+ def scan_block_scalar(style)
827
+ # See the specification for details.
828
+ folded = style== ?>
829
+ chunks = []
830
+ start_mark = get_mark
831
+ # Scan the header.
832
+ forward
833
+ chomping, increment = scan_block_scalar_indicators(start_mark)
834
+ scan_block_scalar_ignored_line(start_mark)
835
+ # Determine the indentation level and go to the first non-empty line.
836
+ min_indent = @indent+1
837
+ min_indent = 1 if min_indent < 1
838
+ if increment.nil?
839
+ breaks, max_indent, end_mark = scan_block_scalar_indentation
840
+ indent = [min_indent, max_indent].max
841
+ else
842
+ indent = min_indent+increment-1
843
+ breaks, end_mark = scan_block_scalar_breaks(indent)
844
+ end
845
+ line_break = ''
846
+ # Scan the inner part of the block scalar.
847
+ while @column == indent and peek != ?\0
848
+ chunks += breaks
849
+ leading_non_space = !" \t".include?(peek)
850
+ length = 0
851
+ length += 1 while !"\0\r\n\x85".include?(peek(length))
852
+ chunks << prefix(length)
853
+ forward(length)
854
+ line_break = scan_line_break
855
+ breaks, end_mark = scan_block_scalar_breaks(indent)
856
+ if @column == indent && peek != 0
857
+ # Unfortunately, folding rules are ambiguous.
858
+ #
859
+ # This is the folding according to the specification:
860
+ if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
861
+ chunks << ' ' if breaks.empty?
862
+ else
863
+ chunks << line_break
864
+ end
865
+ # This is Clark Evans's interpretation (also in the spec
866
+ # examples):
867
+ #
868
+ #if folded and line_break == u'\n':
869
+ # if not breaks:
870
+ # if self.peek() not in ' \t':
871
+ # chunks.append(u' ')
872
+ # else:
873
+ # chunks.append(line_break)
874
+ #else:
875
+ # chunks.append(line_break)
876
+ else
877
+ break
878
+ end
879
+ end
880
+
881
+ # Chomp the tail.
882
+ if chomping
883
+ chunks << line_break
884
+ chunks += breaks
885
+ end
886
+
887
+ # We are done.
888
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
889
+ end
890
+
891
+ def scan_block_scalar_indicators(start_mark)
892
+ # See the specification for details.
893
+ chomping = nil
894
+ increment = nil
895
+ ch = peek
896
+ if /[+-]/ =~ ch.chr
897
+ chomping = ch == ?+
898
+ forward
899
+ ch = peek
900
+ if (?0..?9) === ch
901
+ increment = ch.to_i
902
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
903
+ forward
904
+ end
905
+ elsif (?0..?9) === ch
906
+ increment = ch
907
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
908
+ forward
909
+ ch = peek
910
+ if /[+-]/ =~ ch.chr
911
+ chomping = ch == ?+
912
+ forward
913
+ end
914
+ end
915
+ ch = peek
916
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
917
+ [chomping, increment]
918
+ end
919
+
920
+ def scan_block_scalar_ignored_line(start_mark)
921
+ # See the specification for details.
922
+ forward while peek == 32
923
+ if peek == ?#
924
+ forward while !"\0\r\n\x85".include?(peek)
925
+ end
926
+ ch = peek
927
+
928
+ raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
929
+ scan_line_break
930
+ end
931
+
932
+ def scan_block_scalar_indentation
933
+ # See the specification for details.
934
+ chunks = []
935
+ max_indent = 0
936
+ end_mark = get_mark
937
+ while " \r\n\x85".include?(peek)
938
+ if peek != 32
939
+ chunks << scan_line_break
940
+ end_mark = get_mark
941
+ else
942
+ forward
943
+ max_indent = @column if @column > max_indent
944
+ end
945
+ end
946
+ [chunks, max_indent, end_mark]
947
+ end
948
+
949
+ def scan_block_scalar_breaks(indent)
950
+ # See the specification for details.
951
+ chunks = []
952
+ end_mark = get_mark
953
+ forward while @column < indent && peek == 32
954
+ while "\r\n\x85".include?(peek)
955
+ chunks << scan_line_break
956
+ end_mark = get_mark
957
+ forward while @column < indent && peek == 32
958
+ end
959
+ [chunks, end_mark]
960
+ end
961
+
962
+ def scan_flow_scalar(style)
963
+ # See the specification for details.
964
+ # Note that we loose indentation rules for quoted scalars. Quoted
965
+ # scalars don't need to adhere indentation because " and ' clearly
966
+ # mark the beginning and the end of them. Therefore we are less
967
+ # restrictive then the specification requires. We only need to check
968
+ # that document separators are not included in scalars.
969
+ double = style == ?"
970
+ chunks = []
971
+ start_mark = get_mark
972
+ quote = peek
973
+ forward
974
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
975
+ while peek != quote
976
+ chunks += scan_flow_scalar_spaces(double, start_mark)
977
+ chunks += scan_flow_scalar_non_spaces(double, start_mark)
978
+ end
979
+ forward
980
+ end_mark = get_mark
981
+ ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
982
+ end
983
+
984
+ ESCAPE_REPLACEMENTS = {
985
+ "0" => "\0",
986
+ "a" => "\x07",
987
+ "b" => "\x08",
988
+ "t" => "\x09",
989
+ "\t" => "\x09",
990
+ "n" => "\x0A",
991
+ "v" => "\x0B",
992
+ "f" => "\x0C",
993
+ "r" => "\x0D",
994
+ "e" => "\x1B",
995
+ " " => "\x20",
996
+ '"' => '"',
997
+ "\\" => "\\",
998
+ "N" => "\x85",
999
+ "_" => "\xA0"
1000
+ }
1001
+
1002
+ ESCAPE_CODES = {
1003
+ 'x' => 2
1004
+ }
1005
+
1006
+ def scan_flow_scalar_non_spaces(double, start_mark)
1007
+ # See the specification for details.
1008
+ chunks = []
1009
+ while true
1010
+ length = 0
1011
+ length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
1012
+ if length!=0
1013
+ chunks << prefix(length)
1014
+ forward(length)
1015
+ end
1016
+ ch = peek
1017
+ if !double && ch == ?' && peek(1) == ?'
1018
+ chunks << ?'
1019
+ forward(2)
1020
+ elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
1021
+ chunks << ch
1022
+ forward
1023
+ elsif double && ch == ?\\
1024
+ forward
1025
+ ch = peek
1026
+ if ESCAPE_REPLACEMENTS.member?(ch.chr)
1027
+ chunks << ESCAPE_REPLACEMENTS[ch.chr]
1028
+ forward
1029
+ elsif ESCAPE_CODES.member?(ch.chr)
1030
+ length = ESCAPE_CODES[ch.chr]
1031
+ forward
1032
+ length.times do |k|
1033
+ if /[0-9A-Fa-f]/ !~ peek(k).chr
1034
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
1035
+ "expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
1036
+ end
1037
+ end
1038
+ code = prefix(length).to_i.to_s(16)
1039
+ chunks << code
1040
+ forward(length)
1041
+ elsif "\r\n\x85".include?(ch)
1042
+ scan_line_break
1043
+ chunks += scan_flow_scalar_breaks(double, start_mark)
1044
+ else
1045
+ raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
1046
+ end
1047
+ else
1048
+ return chunks
1049
+ end
1050
+ end
1051
+ end
1052
+
1053
+ def scan_flow_scalar_spaces(double, start_mark)
1054
+ # See the specification for details.
1055
+ chunks = []
1056
+ length = 0
1057
+ length += 1 while /[ \t]/ =~ peek(length).chr
1058
+ whitespaces = prefix(length)
1059
+ forward(length)
1060
+ ch = peek
1061
+ if ch == ?\0
1062
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
1063
+ elsif "\r\n\x85".include?(ch)
1064
+ line_break = scan_line_break
1065
+ breaks = scan_flow_scalar_breaks(double, start_mark)
1066
+ if line_break != ?\n
1067
+ chunks << line_break
1068
+ elsif breaks.empty?
1069
+ chunks << ' '
1070
+ end
1071
+ chunks += breaks
1072
+ else
1073
+ chunks << whitespaces
1074
+ end
1075
+ chunks
1076
+ end
1077
+
1078
+ def scan_flow_scalar_breaks(double, start_mark)
1079
+ # See the specification for details.
1080
+ chunks = []
1081
+ while true
1082
+ # Instead of checking indentation, we check for document
1083
+ # separators.
1084
+ prefix = prefix(3)
1085
+ if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1086
+ raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
1087
+ end
1088
+ forward while /[ \t]/ =~ peek.chr
1089
+ if "\r\n\x85".include?(peek)
1090
+ chunks << scan_line_break
1091
+ else
1092
+ return chunks
1093
+ end
1094
+ end
1095
+ end
1096
+
1097
+ def scan_plain
1098
+ # See the specification for details.
1099
+ # We add an additional restriction for the flow context:
1100
+ # plain scalars in the flow context cannot contain ',', ':' and '?'.
1101
+ # We also keep track of the `allow_simple_key` flag here.
1102
+ # Indentation rules are loosed for the flow context.
1103
+ chunks = []
1104
+ start_mark = get_mark
1105
+ end_mark = start_mark
1106
+ indent = @indent+1
1107
+ # We allow zero indentation for scalars, but then we need to check for
1108
+ # document separators at the beginning of the line.
1109
+ #if indent == 0
1110
+ # indent = 1
1111
+ spaces = []
1112
+ while true
1113
+ length = 0
1114
+ break if peek == ?#
1115
+ while true
1116
+ ch = peek(length)
1117
+ if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
1118
+ break
1119
+ end
1120
+ length += 1
1121
+ end
1122
+ break if length == 0
1123
+ @allow_simple_key = false
1124
+ chunks += spaces
1125
+ chunks << prefix(length)
1126
+ forward(length)
1127
+ end_mark = get_mark
1128
+ spaces = scan_plain_spaces(indent, start_mark)
1129
+ break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
1130
+ end
1131
+ return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
1132
+ end
1133
+
1134
+ def scan_plain_spaces(indent, start_mark)
1135
+ # See the specification for details.
1136
+ # The specification is really confusing about tabs in plain scalars.
1137
+ # We just forbid them completely. Do not use tabs in YAML!
1138
+ chunks = []
1139
+ length = 0
1140
+ length += 1 while peek(length) == 32
1141
+ whitespaces = prefix(length)
1142
+ forward(length)
1143
+ ch = peek
1144
+ if "\r\n\x85".include?(ch)
1145
+ line_break = scan_line_break
1146
+ @allow_simple_key = true
1147
+ prefix = prefix(3)
1148
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1149
+ breaks = []
1150
+ while " \r\n\x85".include?(peek)
1151
+ if peek == 32
1152
+ forward
1153
+ else
1154
+ breaks << scan_line_break
1155
+ prefix = prefix(3)
1156
+ return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
1157
+ end
1158
+ end
1159
+ if line_break != '\n'
1160
+ chunks << line_break
1161
+ elsif breaks.empty?
1162
+ chunks << ' '
1163
+ end
1164
+ chunks += breaks
1165
+ elsif !whitespaces.empty?
1166
+ chunks << whitespaces
1167
+ end
1168
+ chunks
1169
+ end
1170
+
1171
+ def scan_tag_handle(name, start_mark)
1172
+ # See the specification for details.
1173
+ # For some strange reasons, the specification does not allow '_' in
1174
+ # tag handles. I have allowed it anyway.
1175
+ ch = peek
1176
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
1177
+ length = 1
1178
+ ch = peek(length)
1179
+ if ch != 32
1180
+ while /[-_0-9A-Za-z]/ =~ ch.chr
1181
+ length += 1
1182
+ ch = peek(length)
1183
+ end
1184
+ if ch != ?!
1185
+ forward(length)
1186
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
1187
+ end
1188
+ length += 1
1189
+ end
1190
+ value = prefix(length)
1191
+ forward(length)
1192
+ value
1193
+ end
1194
+
1195
+ def scan_tag_uri(name, start_mark)
1196
+ # See the specification for details.
1197
+ # Note: we do not check if URI is well-formed.
1198
+ chunks = []
1199
+ length = 0
1200
+ ch = peek(length)
1201
+ while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
1202
+ if ch == ?%
1203
+ chunks << prefix(length)
1204
+ forward(length)
1205
+ length = 0
1206
+ chunks << scan_uri_escapes(name, start_mark)
1207
+ else
1208
+ length += 1
1209
+ end
1210
+ ch = peek(length)
1211
+ end
1212
+ if length!=0
1213
+ chunks << prefix(length)
1214
+ forward(length)
1215
+ length = 0
1216
+ end
1217
+
1218
+ raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
1219
+ chunks.join('')
1220
+ end
1221
+
1222
+ def scan_uri_escapes(name, start_mark)
1223
+ # See the specification for details.
1224
+ bytes = []
1225
+ mark = get_mark
1226
+ while peek == ?%
1227
+ forward
1228
+ 2.times do |k|
1229
+ raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
1230
+ get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
1231
+ end
1232
+ bytes << prefix(2).to_i.to_s(16)
1233
+ forward(2)
1234
+ end
1235
+ bytes.join('')
1236
+ end
1237
+
1238
+ def scan_line_break
1239
+ # Transforms:
1240
+ # '\r\n' : '\n'
1241
+ # '\r' : '\n'
1242
+ # '\n' : '\n'
1243
+ # '\x85' : '\n'
1244
+ # default : ''
1245
+ ch = peek
1246
+ if "\r\n\x85".include?(ch)
1247
+ if prefix(2) == "\r\n"
1248
+ forward(2)
1249
+ else
1250
+ forward
1251
+ end
1252
+ return "\n"
1253
+ end
1254
+ ""
1255
+ end
1256
+ end
1257
+ end
1258
+