RbYAML 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +19 -0
- data/README +31 -0
- data/lib/rbyaml.rb +378 -0
- data/lib/rbyaml/composer.rb +189 -0
- data/lib/rbyaml/constructor.rb +374 -0
- data/lib/rbyaml/detector.rb +44 -0
- data/lib/rbyaml/dumper.rb +40 -0
- data/lib/rbyaml/emitter.rb +1116 -0
- data/lib/rbyaml/error.rb +81 -0
- data/lib/rbyaml/events.rb +92 -0
- data/lib/rbyaml/loader.rb +49 -0
- data/lib/rbyaml/nodes.rb +69 -0
- data/lib/rbyaml/parser.rb +488 -0
- data/lib/rbyaml/reader.rb +127 -0
- data/lib/rbyaml/representer.rb +183 -0
- data/lib/rbyaml/scanner.rb +1258 -0
- data/lib/rbyaml/serializer.rb +120 -0
- data/lib/rbyaml/test.rb +56 -0
- data/lib/rbyaml/tokens.rb +163 -0
- data/lib/rbyaml/yaml.rb +143 -0
- data/test/test_rbyaml.rb +18 -0
- data/test/yaml/gems.yml +130951 -0
- data/test/yaml/gems2.yml +113 -0
- data/test/yaml/test1.yml +3 -0
- data/test/yaml/test10.yml +8 -0
- data/test/yaml/test12.yml +8 -0
- data/test/yaml/test13.yml +4 -0
- data/test/yaml/test14.yml +4 -0
- data/test/yaml/test15.yml +8 -0
- data/test/yaml/test16.yml +7 -0
- data/test/yaml/test18.yml +6 -0
- data/test/yaml/test19.yml +5 -0
- data/test/yaml/test2.yml +3 -0
- data/test/yaml/test20.yml +6 -0
- data/test/yaml/test21.yml +4 -0
- data/test/yaml/test22.yml +4 -0
- data/test/yaml/test23.yml +13 -0
- data/test/yaml/test24.yml +14 -0
- data/test/yaml/test25.yml +7 -0
- data/test/yaml/test26.yml +7 -0
- data/test/yaml/test27.yml +29 -0
- data/test/yaml/test28.yml +26 -0
- data/test/yaml/test29.yml +13 -0
- data/test/yaml/test3.yml +8 -0
- data/test/yaml/test30.yml +7 -0
- data/test/yaml/test31.yml +2 -0
- data/test/yaml/test32.yml +13 -0
- data/test/yaml/test33.yml +2 -0
- data/test/yaml/test34.yml +8 -0
- data/test/yaml/test35.yml +4 -0
- data/test/yaml/test36.yml +8 -0
- data/test/yaml/test37.yml +2 -0
- data/test/yaml/test38.yml +8 -0
- data/test/yaml/test39.yml +2 -0
- data/test/yaml/test4.yml +8 -0
- data/test/yaml/test40.yml +3 -0
- data/test/yaml/test41.yml +5 -0
- data/test/yaml/test42.yml +12 -0
- data/test/yaml/test43.yml +15 -0
- data/test/yaml/test44.yml +23 -0
- data/test/yaml/test5.yml +3 -0
- data/test/yaml/test6.yml +5 -0
- data/test/yaml/test7.yml +10 -0
- data/test/yaml/test8.yml +10 -0
- data/test/yaml/test9.yml +8 -0
- metadata +111 -0
@@ -0,0 +1,127 @@
|
|
1
|
+
# This is a more or less straight translation of PyYAML3000 to Ruby
|
2
|
+
|
3
|
+
# the big difference in this implementation is that unicode support is not here...
|
4
|
+
|
5
|
+
require 'rbyaml/error'
|
6
|
+
|
7
|
+
module RbYAML
|
8
|
+
|
9
|
+
# Reader:
|
10
|
+
# - checks if characters are in allowed range,
|
11
|
+
# - adds '\0' to the end.
|
12
|
+
# Reader accepts
|
13
|
+
# - a String object
|
14
|
+
# - a duck-typed IO object
|
15
|
+
module Reader
|
16
|
+
def initialize_reader(stream)
|
17
|
+
@stream = nil
|
18
|
+
@stream_pointer = 0
|
19
|
+
@eof = true
|
20
|
+
@buffer = ""
|
21
|
+
@pointer = 0
|
22
|
+
@index = 0
|
23
|
+
@line = 0
|
24
|
+
@column = 0
|
25
|
+
if String === stream
|
26
|
+
@name = "<string>"
|
27
|
+
@raw_buffer = stream
|
28
|
+
else
|
29
|
+
@stream = stream
|
30
|
+
@name = stream.respond_to?(:path) ? stream.path : stream.inspect
|
31
|
+
@eof = false
|
32
|
+
@raw_buffer = ""
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def peek(index=0)
|
37
|
+
update(index+1) if @pointer+index+1 >= @buffer.length
|
38
|
+
@buffer[@pointer+index]
|
39
|
+
end
|
40
|
+
|
41
|
+
def prefix(length=1)
|
42
|
+
update(length) if @pointer+length >= @buffer.length
|
43
|
+
@buffer[@pointer...@pointer+length]
|
44
|
+
end
|
45
|
+
|
46
|
+
def forward(length=1)
|
47
|
+
update(length+1) if @pointer+length+1 >= @buffer.length
|
48
|
+
length.times { |k|
|
49
|
+
ch = @buffer[@pointer]
|
50
|
+
@pointer += 1
|
51
|
+
@index += 1
|
52
|
+
if "\n\x85".include?(ch) || (ch == ?\r && @buffer[@pointer+1] != ?\n)
|
53
|
+
@line += 1
|
54
|
+
@column = 0
|
55
|
+
else
|
56
|
+
@column += 1
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_mark
|
62
|
+
if @stream.nil?
|
63
|
+
Mark.new(@name,@index,@line,@column,@buffer,@pointer)
|
64
|
+
else
|
65
|
+
Mark.new(@name,@index,@line,@column,nil,nil)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
|
70
|
+
def check_printable(data)
|
71
|
+
if NON_PRINTABLE =~ data
|
72
|
+
position = @index+@buffer.length-@pointer+($~.offset(0)[0])
|
73
|
+
raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def update(length)
|
78
|
+
return if @raw_buffer.nil?
|
79
|
+
@buffer = @buffer[@pointer..-1]
|
80
|
+
@pointer = 0
|
81
|
+
while @buffer.length < length
|
82
|
+
unless @eof
|
83
|
+
update_raw
|
84
|
+
end
|
85
|
+
data = @raw_buffer
|
86
|
+
converted = data.length
|
87
|
+
check_printable(data)
|
88
|
+
@buffer << data
|
89
|
+
@raw_buffer = @raw_buffer[converted..-1]
|
90
|
+
if @eof
|
91
|
+
@buffer << ?\0
|
92
|
+
@raw_buffer = nil
|
93
|
+
break
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def update_raw(size=1024)
|
99
|
+
data = @stream.read(size)
|
100
|
+
if data && !data.empty?
|
101
|
+
@raw_buffer << data
|
102
|
+
@stream_pointer += data.length
|
103
|
+
else
|
104
|
+
@eof = true
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class ReaderError < YAMLError
|
110
|
+
def initialize(name, position, character, encoding, reason)
|
111
|
+
@name = name
|
112
|
+
@position = position
|
113
|
+
@character = character
|
114
|
+
@encoding = encoding
|
115
|
+
@reason = reason
|
116
|
+
end
|
117
|
+
|
118
|
+
def to_s
|
119
|
+
if String === @character
|
120
|
+
"'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
121
|
+
else
|
122
|
+
"unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
require 'rbyaml/error'
|
5
|
+
require 'rbyaml/nodes'
|
6
|
+
require 'rbyaml/detector'
|
7
|
+
|
8
|
+
module RbYAML
|
9
|
+
class RepresenterError < YAMLError
|
10
|
+
end
|
11
|
+
|
12
|
+
module BaseRepresenter
|
13
|
+
@@yaml_representers = {}
|
14
|
+
|
15
|
+
def initialize_representer
|
16
|
+
@represented_objects = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
def represent(data)
|
20
|
+
node = represent_object(data)
|
21
|
+
serialize(node)
|
22
|
+
represented_objects = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def represent_object(data)
|
26
|
+
if ignore_aliases(data)
|
27
|
+
alias_key = nil
|
28
|
+
else
|
29
|
+
alias_key = data.object_id
|
30
|
+
end
|
31
|
+
|
32
|
+
if !alias_key.nil?
|
33
|
+
if @represented_objects.include?(alias_key)
|
34
|
+
node = @represented_objects[alias_key]
|
35
|
+
raise RepresenterError.new("recursive objects are not allowed: #{data}") if node.nil?
|
36
|
+
return node
|
37
|
+
end
|
38
|
+
@represented_objects[alias_key] = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
rerun = false
|
42
|
+
|
43
|
+
for data_type in data.class.ancestors
|
44
|
+
rerun = true
|
45
|
+
if @@yaml_representers.include?(data_type)
|
46
|
+
node = send(@@yaml_representers[data_type],data)
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
if !rerun
|
51
|
+
if @@yaml_representers.include?(nil)
|
52
|
+
node = send(@@yaml_representers[nil], data)
|
53
|
+
else
|
54
|
+
node = ScalarNode.new(data.taguri, data)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
@represented_objects[alias_key] = node if !alias_key.nil?
|
58
|
+
node
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.add_representer(data_type, representer)
|
62
|
+
@@yaml_representers[data_type] = representer
|
63
|
+
end
|
64
|
+
|
65
|
+
def represent_scalar(tag, value, style=nil)
|
66
|
+
ScalarNode.new(tag, value, style)
|
67
|
+
end
|
68
|
+
|
69
|
+
def represent_sequence(tag, sequence, flow_style=nil)
|
70
|
+
value = sequence.map {|item| represent_object(item)}
|
71
|
+
SequenceNode.new(tag, value, flow_style)
|
72
|
+
end
|
73
|
+
|
74
|
+
def represent_mapping(tag, mapping, flow_style=nil)
|
75
|
+
value = {}
|
76
|
+
mapping.each { |item_key,item_value| value[represent_object(item_key)] = represent_object(item_value) }
|
77
|
+
MappingNode.new(tag, value, flow_style)
|
78
|
+
end
|
79
|
+
|
80
|
+
def ignore_aliases(data)
|
81
|
+
false
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
module SafeRepresenter
|
86
|
+
include BaseRepresenter
|
87
|
+
|
88
|
+
def ignore_aliases(data)
|
89
|
+
data.nil? || String === data || TrueClass === data || FalseClass === data || Integer === data || Float === data
|
90
|
+
end
|
91
|
+
|
92
|
+
def represent_none(data)
|
93
|
+
represent_scalar(data.taguri,"null")
|
94
|
+
end
|
95
|
+
|
96
|
+
def represent_str(data)
|
97
|
+
represent_scalar(data.taguri,data)
|
98
|
+
end
|
99
|
+
|
100
|
+
def represent_symbol(data)
|
101
|
+
represent_scalar(data.taguri,data.to_s)
|
102
|
+
end
|
103
|
+
|
104
|
+
def represent_bool(data)
|
105
|
+
value = data ? "true" : "false"
|
106
|
+
represent_scalar(data.taguri,value)
|
107
|
+
end
|
108
|
+
|
109
|
+
def represent_int(data)
|
110
|
+
represent_scalar(data.taguri,data.to_s)
|
111
|
+
end
|
112
|
+
|
113
|
+
def represent_float(data)
|
114
|
+
if data.infinite? == 1
|
115
|
+
value = ".inf"
|
116
|
+
elsif data.infinite? == -1
|
117
|
+
value = "-.inf"
|
118
|
+
elsif data.nan? || data != data
|
119
|
+
value = ".nan"
|
120
|
+
else
|
121
|
+
value = data.to_s
|
122
|
+
end
|
123
|
+
represent_scalar(data.taguri, value)
|
124
|
+
end
|
125
|
+
|
126
|
+
def represent_list(data)
|
127
|
+
represent_sequence(data.taguri, data)
|
128
|
+
end
|
129
|
+
|
130
|
+
def represent_dict(data)
|
131
|
+
represent_mapping(data.taguri, data)
|
132
|
+
end
|
133
|
+
|
134
|
+
def represent_set(data)
|
135
|
+
value = {}
|
136
|
+
for key in data
|
137
|
+
value[key] = nil
|
138
|
+
end
|
139
|
+
represent_mapping(data.taguri, value)
|
140
|
+
end
|
141
|
+
|
142
|
+
def represent_datetime(data)
|
143
|
+
value = "%04d-%02d-%02d %02d:%02d:%02d" % [data.year, data.month, data.day, data.hour, data.min, data.sec]
|
144
|
+
if data.usec != 0
|
145
|
+
value += "." + (data.usec/1000000.0).to_s.split(/\./)[1]
|
146
|
+
end
|
147
|
+
if data.utc_offset != 0
|
148
|
+
value += data.utc_offset.to_s
|
149
|
+
end
|
150
|
+
represent_scalar(data.taguri, value)
|
151
|
+
end
|
152
|
+
|
153
|
+
def represent_yaml_object(tag, data, flow_style=nil)
|
154
|
+
state = data.to_yaml_properties
|
155
|
+
mapping = {}
|
156
|
+
state.each do |m|
|
157
|
+
map[m[1..-1]] = data.instance_variable_get(m)
|
158
|
+
end
|
159
|
+
represent_mapping(tag, mapping, flow_style)
|
160
|
+
end
|
161
|
+
|
162
|
+
def represent_undefined(data)
|
163
|
+
raise RepresenterError.new("cannot represent an object: #{data}")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
BaseRepresenter.add_representer(NilClass,:represent_none)
|
168
|
+
BaseRepresenter.add_representer(String,:represent_str)
|
169
|
+
BaseRepresenter.add_representer(Symbol,:represent_symbol)
|
170
|
+
BaseRepresenter.add_representer(TrueClass,:represent_bool)
|
171
|
+
BaseRepresenter.add_representer(FalseClass,:represent_bool)
|
172
|
+
BaseRepresenter.add_representer(Integer,:represent_int)
|
173
|
+
BaseRepresenter.add_representer(Float,:represent_float)
|
174
|
+
BaseRepresenter.add_representer(Array,:represent_list)
|
175
|
+
BaseRepresenter.add_representer(Hash,:represent_dict)
|
176
|
+
BaseRepresenter.add_representer(Set,:represent_set)
|
177
|
+
BaseRepresenter.add_representer(Time,:represent_datetime)
|
178
|
+
BaseRepresenter.add_representer(nil,:represent_undefined)
|
179
|
+
|
180
|
+
module Representer
|
181
|
+
include SafeRepresenter
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,1258 @@
|
|
1
|
+
# Scanner produces tokens of the following types:
|
2
|
+
# STREAM-START
|
3
|
+
# STREAM-END
|
4
|
+
# DIRECTIVE(name, value)
|
5
|
+
# DOCUMENT-START
|
6
|
+
# DOCUMENT-END
|
7
|
+
# BLOCK-SEQUENCE-START
|
8
|
+
# BLOCK-MAPPING-START
|
9
|
+
# BLOCK-END
|
10
|
+
# FLOW-SEQUENCE-START
|
11
|
+
# FLOW-MAPPING-START
|
12
|
+
# FLOW-SEQUENCE-END
|
13
|
+
# FLOW-MAPPING-END
|
14
|
+
# BLOCK-ENTRY
|
15
|
+
# FLOW-ENTRY
|
16
|
+
# KEY
|
17
|
+
# VALUE
|
18
|
+
# ALIAS(value)
|
19
|
+
# ANCHOR(value)
|
20
|
+
# TAG(value)
|
21
|
+
# SCALAR(value, plain)
|
22
|
+
#
|
23
|
+
# Read comments in the Scanner code for more details.
|
24
|
+
#
|
25
|
+
|
26
|
+
require 'rbyaml/error'
|
27
|
+
require 'rbyaml/tokens'
|
28
|
+
|
29
|
+
module RbYAML
|
30
|
+
class ScannerError < MarkedYAMLError
|
31
|
+
end
|
32
|
+
|
33
|
+
class SimpleKey
|
34
|
+
attr_reader :token_number, :required, :index, :line, :column, :mark
|
35
|
+
|
36
|
+
def initialize(token_number,required,index,line,column,mark)
|
37
|
+
@token_number = token_number
|
38
|
+
@required = required
|
39
|
+
@index = index
|
40
|
+
@line = line
|
41
|
+
@column = column
|
42
|
+
@mark = mark
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
module Scanner
|
47
|
+
def initialize_scanner
|
48
|
+
# It is assumed that Scanner and Reader will mixin to the same point.
|
49
|
+
# Reader do the dirty work of checking for BOM. It also adds NUL to the end.
|
50
|
+
#
|
51
|
+
# Reader supports the following methods
|
52
|
+
# self.peek(i=0) # peek the next i-th character
|
53
|
+
# self.prefix(l=1) # peek the next l characters
|
54
|
+
# self.forward(l=1) # read the next l characters and move the pointer.
|
55
|
+
|
56
|
+
# Had we reached the end of the stream?
|
57
|
+
@done = false
|
58
|
+
|
59
|
+
# The number of unclosed '{' and '['. `flow_level == 0` means block
|
60
|
+
# context.
|
61
|
+
@flow_level = 0
|
62
|
+
|
63
|
+
# List of processed tokens that are not yet emitted.
|
64
|
+
@tokens = []
|
65
|
+
|
66
|
+
# Add the STREAM-START token.
|
67
|
+
fetch_stream_start
|
68
|
+
|
69
|
+
# Number of tokens that were emitted through the `get_token` method.
|
70
|
+
@tokens_taken = 0
|
71
|
+
|
72
|
+
# The current indentation level.
|
73
|
+
@indent = -1
|
74
|
+
|
75
|
+
# Past indentation levels.
|
76
|
+
@indents = []
|
77
|
+
|
78
|
+
# Variables related to simple keys treatment.
|
79
|
+
|
80
|
+
# A simple key is a key that is not denoted by the '?' indicator.
|
81
|
+
# Example of simple keys:
|
82
|
+
# ---
|
83
|
+
# block simple key: value
|
84
|
+
# ? not a simple key:
|
85
|
+
# : { flow simple key: value }
|
86
|
+
# We emit the KEY token before all keys, so when we find a potential
|
87
|
+
# simple key, we try to locate the corresponding ':' indicator.
|
88
|
+
# Simple keys should be limited to a single line and 1024 characters.
|
89
|
+
|
90
|
+
# Can a simple key start at the current position? A simple key may
|
91
|
+
# start:
|
92
|
+
# - at the beginning of the line, not counting indentation spaces
|
93
|
+
# (in block context),
|
94
|
+
# - after '{', '[', ',' (in the flow context),
|
95
|
+
# - after '?', ':', '-' (in the block context).
|
96
|
+
# In the block context, this flag also signifies if a block collection
|
97
|
+
# may start at the current position.
|
98
|
+
@allow_simple_key = true
|
99
|
+
|
100
|
+
# Keep track of possible simple keys. This is a dictionary. The key
|
101
|
+
# is `flow_level`; there can be no more that one possible simple key
|
102
|
+
# for each level. The value is a SimpleKey record:
|
103
|
+
# (token_number, required, index, line, column, mark)
|
104
|
+
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
|
105
|
+
# '[', or '{' tokens.
|
106
|
+
@possible_simple_keys = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def check_token(*choices)
|
110
|
+
# Check if the next token is one of the given types.
|
111
|
+
fetch_more_tokens while need_more_tokens
|
112
|
+
unless @tokens.empty?
|
113
|
+
return true if choices.empty?
|
114
|
+
for choice in choices
|
115
|
+
return true if choice === @tokens[0]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
|
121
|
+
def peek_token
|
122
|
+
# Return the next token, but do not delete if from the queue.
|
123
|
+
fetch_more_tokens while need_more_tokens
|
124
|
+
return @tokens[0] unless @tokens.empty?
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_token
|
128
|
+
# Return the next token.
|
129
|
+
fetch_more_tokens while need_more_tokens
|
130
|
+
unless @tokens.empty?
|
131
|
+
@tokens_taken += 1
|
132
|
+
@tokens.shift
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def each_token
|
137
|
+
fetch_more_tokens while need_more_tokens
|
138
|
+
while !@tokens.empty?
|
139
|
+
@tokens_taken += 1
|
140
|
+
yield @tokens.shift
|
141
|
+
fetch_more_tokens while need_more_tokens
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def need_more_tokens
|
146
|
+
return false if @done
|
147
|
+
return true if @tokens.empty?
|
148
|
+
# The current token may be a potential simple key, so we
|
149
|
+
# need to look further.
|
150
|
+
stale_possible_simple_keys
|
151
|
+
return true if next_possible_simple_key == @tokens_taken
|
152
|
+
end
|
153
|
+
|
154
|
+
def fetch_more_tokens
|
155
|
+
# Eat whitespaces and comments until we reach the next token.
|
156
|
+
scan_to_next_token
|
157
|
+
|
158
|
+
# Remove obsolete possible simple keys.
|
159
|
+
stale_possible_simple_keys
|
160
|
+
|
161
|
+
# Compare the current indentation and column. It may add some tokens
|
162
|
+
# and decrease the current indentation level.
|
163
|
+
unwind_indent(@column)
|
164
|
+
|
165
|
+
# Peek the next character.
|
166
|
+
ch = peek
|
167
|
+
|
168
|
+
return case
|
169
|
+
# Is it the end of stream?
|
170
|
+
when ch == ?\0: fetch_stream_end
|
171
|
+
# Is it a directive?
|
172
|
+
when ch == ?% && check_directive: fetch_directive
|
173
|
+
# Is it the document start?
|
174
|
+
when ch == ?- && check_document_start: fetch_document_start
|
175
|
+
# Is it the document end?
|
176
|
+
when ch == ?. && check_document_end: fetch_document_end
|
177
|
+
# Is it the flow sequence start indicator?
|
178
|
+
when ch == ?[: fetch_flow_sequence_start
|
179
|
+
# Is it the flow mapping start indicator?
|
180
|
+
when ch == ?{: fetch_flow_mapping_start
|
181
|
+
# Is it the flow sequence end indicator?
|
182
|
+
when ch == ?]: fetch_flow_sequence_end
|
183
|
+
# Is it the flow mapping end indicator?
|
184
|
+
when ch == ?}: fetch_flow_mapping_end
|
185
|
+
# Is it the flow entry indicator?
|
186
|
+
when ch == ?,: fetch_flow_entry
|
187
|
+
# Is it the block entry indicator?
|
188
|
+
when ch == ?- && check_block_entry: fetch_block_entry
|
189
|
+
# Is it the key indicator?
|
190
|
+
when ch == ?? && check_key: fetch_key
|
191
|
+
# Is it the value indicator?
|
192
|
+
when ch == ?: && check_value: fetch_value
|
193
|
+
# Is it an alias?
|
194
|
+
when ch == ?*: fetch_alias
|
195
|
+
# Is it an anchor?
|
196
|
+
when ch == ?&: fetch_anchor
|
197
|
+
# Is it a tag?
|
198
|
+
when ch == ?!: fetch_tag
|
199
|
+
# Is it a literal scalar?
|
200
|
+
when ch == ?| && @flow_level==0: fetch_literal
|
201
|
+
# Is it a folded scalar?
|
202
|
+
when ch == ?> && @flow_level==0: fetch_folded
|
203
|
+
# Is it a single quoted scalar?
|
204
|
+
when ch == ?': fetch_single
|
205
|
+
# Is it a double quoted scalar?
|
206
|
+
when ch == ?": fetch_double
|
207
|
+
# It must be a plain scalar then.
|
208
|
+
when check_plain: fetch_plain
|
209
|
+
else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Simple keys treatment.
|
214
|
+
|
215
|
+
def next_possible_simple_key
|
216
|
+
# Return the number of the nearest possible simple key. Actually we
|
217
|
+
# don't need to loop through the whole dictionary.
|
218
|
+
min_token_number = nil
|
219
|
+
for level in @possible_simple_keys.keys
|
220
|
+
key = @possible_simple_keys[level]
|
221
|
+
if min_token_number.nil? || key.token_number < min_token_number
|
222
|
+
min_token_number = key.token_number
|
223
|
+
end
|
224
|
+
end
|
225
|
+
min_token_number
|
226
|
+
end
|
227
|
+
|
228
|
+
def stale_possible_simple_keys
|
229
|
+
# Remove entries that are no longer possible simple keys. According to
|
230
|
+
# the YAML specification, simple keys
|
231
|
+
# - should be limited to a single line,
|
232
|
+
# - should be no longer than 1024 characters.
|
233
|
+
# Disabling this procedure will allow simple keys of any length and
|
234
|
+
# height (may cause problems if indentation is broken though).
|
235
|
+
@possible_simple_keys.delete_if {|level,key|
|
236
|
+
if key.line != @line || @index-key.index > 1024
|
237
|
+
raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
|
238
|
+
return true
|
239
|
+
end
|
240
|
+
return false
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
def save_possible_simple_key
|
245
|
+
# The next token may start a simple key. We check if it's possible
|
246
|
+
# and save its position. This function is called for
|
247
|
+
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
|
248
|
+
|
249
|
+
# Check if a simple key is required at the current position.
|
250
|
+
required = @flow_level==0 && @indent == @column
|
251
|
+
|
252
|
+
# The next token might be a simple key. Let's save it's number and
|
253
|
+
# position.
|
254
|
+
if @allow_simple_key
|
255
|
+
remove_possible_simple_key
|
256
|
+
token_number = @tokens_taken+@tokens.length
|
257
|
+
key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
|
258
|
+
@possible_simple_keys[@flow_level] = key
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def remove_possible_simple_key
|
263
|
+
# Remove the saved possible key position at the current flow level.
|
264
|
+
key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
|
265
|
+
end
|
266
|
+
|
267
|
+
# Indentation functions.
|
268
|
+
|
269
|
+
def unwind_indent(column)
|
270
|
+
## In flow context, tokens should respect indentation.
|
271
|
+
## Actually the condition should be `@indent >= column` according to
|
272
|
+
## the spec. But this condition will prohibit intuitively correct
|
273
|
+
## constructions such as
|
274
|
+
## key : {
|
275
|
+
## }
|
276
|
+
#if @flow_level and @indent > column
|
277
|
+
# raise ScannerError(nil, nil,
|
278
|
+
# "invalid intendation or unclosed '[' or '{'",
|
279
|
+
# get_mark)
|
280
|
+
|
281
|
+
# In the flow context, indentation is ignored. We make the scanner less
|
282
|
+
# restrictive then specification requires.
|
283
|
+
return nil if @flow_level != 0
|
284
|
+
# In block context, we may need to issue the BLOCK-END tokens.
|
285
|
+
while @indent > column
|
286
|
+
mark = get_mark
|
287
|
+
@indent = @indents.pop()
|
288
|
+
@tokens << BlockEndToken.new(mark, mark)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def add_indent(column)
|
293
|
+
# Check if we need to increase indentation.
|
294
|
+
if @indent < column
|
295
|
+
@indents << @indent
|
296
|
+
@indent = column
|
297
|
+
return true
|
298
|
+
end
|
299
|
+
return false
|
300
|
+
end
|
301
|
+
|
302
|
+
# Fetchers.
|
303
|
+
|
304
|
+
def fetch_stream_start
|
305
|
+
# We always add STREAM-START as the first token and STREAM-END as the
|
306
|
+
# last token.
|
307
|
+
# Read the token.
|
308
|
+
mark = get_mark
|
309
|
+
# Add STREAM-START.
|
310
|
+
@tokens << StreamStartToken.new(mark, mark, @encoding)
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def fetch_stream_end
|
315
|
+
# Set the current intendation to -1.
|
316
|
+
unwind_indent(-1)
|
317
|
+
# Reset everything (not really needed).
|
318
|
+
@allow_simple_key = false
|
319
|
+
@possible_simple_keys = {}
|
320
|
+
# Read the token.
|
321
|
+
mark = get_mark
|
322
|
+
# Add STREAM-END.
|
323
|
+
@tokens << StreamEndToken.new(mark, mark)
|
324
|
+
# The stream is finished.
|
325
|
+
@done = true
|
326
|
+
end
|
327
|
+
|
328
|
+
def fetch_directive
|
329
|
+
# Set the current intendation to -1.
|
330
|
+
unwind_indent(-1)
|
331
|
+
# Reset simple keys.
|
332
|
+
remove_possible_simple_key
|
333
|
+
@allow_simple_key = false
|
334
|
+
# Scan and add DIRECTIVE.
|
335
|
+
@tokens << scan_directive
|
336
|
+
end
|
337
|
+
|
338
|
+
def fetch_document_start
|
339
|
+
fetch_document_indicator(DocumentStartToken)
|
340
|
+
end
|
341
|
+
|
342
|
+
def fetch_document_end
|
343
|
+
fetch_document_indicator(DocumentEndToken)
|
344
|
+
end
|
345
|
+
|
346
|
+
def fetch_document_indicator(token)
|
347
|
+
# Set the current intendation to -1.
|
348
|
+
unwind_indent(-1)
|
349
|
+
# Reset simple keys. Note that there could not be a block collection
|
350
|
+
# after '---'.
|
351
|
+
remove_possible_simple_key
|
352
|
+
@allow_simple_key = false
|
353
|
+
# Add DOCUMENT-START or DOCUMENT-END.
|
354
|
+
start_mark = get_mark
|
355
|
+
forward(3)
|
356
|
+
end_mark = get_mark
|
357
|
+
@tokens << token.new(start_mark, end_mark)
|
358
|
+
end
|
359
|
+
|
360
|
+
def fetch_flow_sequence_start
|
361
|
+
fetch_flow_collection_start(FlowSequenceStartToken)
|
362
|
+
end
|
363
|
+
|
364
|
+
def fetch_flow_mapping_start
|
365
|
+
fetch_flow_collection_start(FlowMappingStartToken)
|
366
|
+
end
|
367
|
+
|
368
|
+
def fetch_flow_collection_start(token)
|
369
|
+
# '[' and '{' may start a simple key.
|
370
|
+
save_possible_simple_key
|
371
|
+
# Increase the flow level.
|
372
|
+
@flow_level += 1
|
373
|
+
# Simple keys are allowed after '[' and '{'.
|
374
|
+
@allow_simple_key = true
|
375
|
+
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
|
376
|
+
start_mark = get_mark
|
377
|
+
forward
|
378
|
+
end_mark = get_mark
|
379
|
+
@tokens << token.new(start_mark, end_mark)
|
380
|
+
end
|
381
|
+
|
382
|
+
def fetch_flow_sequence_end
|
383
|
+
fetch_flow_collection_end(FlowSequenceEndToken)
|
384
|
+
end
|
385
|
+
|
386
|
+
def fetch_flow_mapping_end
|
387
|
+
fetch_flow_collection_end(FlowMappingEndToken)
|
388
|
+
end
|
389
|
+
|
390
|
+
def fetch_flow_collection_end(token)
|
391
|
+
# Reset possible simple key on the current level.
|
392
|
+
remove_possible_simple_key
|
393
|
+
# Decrease the flow level.
|
394
|
+
@flow_level -= 1
|
395
|
+
# No simple keys after ']' or '}'.
|
396
|
+
@allow_simple_key = false
|
397
|
+
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
|
398
|
+
start_mark = get_mark
|
399
|
+
forward
|
400
|
+
end_mark = get_mark
|
401
|
+
@tokens << token.new(start_mark, end_mark)
|
402
|
+
end
|
403
|
+
|
404
|
+
def fetch_flow_entry
|
405
|
+
# Simple keys are allowed after ','.
|
406
|
+
@allow_simple_key = true
|
407
|
+
# Reset possible simple key on the current level.
|
408
|
+
remove_possible_simple_key
|
409
|
+
# Add FLOW-ENTRY.
|
410
|
+
start_mark = get_mark
|
411
|
+
forward
|
412
|
+
end_mark = get_mark
|
413
|
+
@tokens << FlowEntryToken.new(start_mark, end_mark)
|
414
|
+
end
|
415
|
+
|
416
|
+
def fetch_block_entry
|
417
|
+
# Block context needs additional checks.
|
418
|
+
if @flow_level==0
|
419
|
+
raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
|
420
|
+
# We may need to add BLOCK-SEQUENCE-START.
|
421
|
+
if add_indent(@column)
|
422
|
+
mark = get_mark
|
423
|
+
@tokens << BlockSequenceStartToken.new(mark, mark)
|
424
|
+
end
|
425
|
+
# It's an error for the block entry to occur in the flow context,
|
426
|
+
# but we let the parser detect this.
|
427
|
+
end
|
428
|
+
# Simple keys are allowed after '-'.
|
429
|
+
@allow_simple_key = true
|
430
|
+
# Reset possible simple key on the current level.
|
431
|
+
remove_possible_simple_key
|
432
|
+
# Add BLOCK-ENTRY.
|
433
|
+
start_mark = get_mark
|
434
|
+
forward
|
435
|
+
end_mark = get_mark
|
436
|
+
@tokens << BlockEntryToken.new(start_mark, end_mark)
|
437
|
+
end
|
438
|
+
|
439
|
+
def fetch_key
|
440
|
+
# Block context needs additional checks.
|
441
|
+
if @flow_level==0
|
442
|
+
# Are we allowed to start a key (not nessesary a simple)?
|
443
|
+
raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
|
444
|
+
# We may need to add BLOCK-MAPPING-START.
|
445
|
+
if add_indent(@column)
|
446
|
+
mark = get_mark
|
447
|
+
@tokens << BlockMappingStartToken.new(mark, mark)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
# Simple keys are allowed after '?' in the block context.
|
451
|
+
@allow_simple_key = @flow_level==0
|
452
|
+
# Reset possible simple key on the current level.
|
453
|
+
remove_possible_simple_key
|
454
|
+
# Add KEY.
|
455
|
+
start_mark = get_mark
|
456
|
+
forward
|
457
|
+
end_mark = get_mark
|
458
|
+
@tokens << KeyToken.new(start_mark, end_mark)
|
459
|
+
end
|
460
|
+
|
461
|
+
def fetch_value
|
462
|
+
# Do we determine a simple key?
|
463
|
+
if @possible_simple_keys.include?(@flow_level)
|
464
|
+
# Add KEY.
|
465
|
+
key = @possible_simple_keys[@flow_level]
|
466
|
+
@possible_simple_keys.delete(@flow_level)
|
467
|
+
@tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
|
468
|
+
# If this key starts a new block mapping, we need to add
|
469
|
+
# BLOCK-MAPPING-START.
|
470
|
+
@tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
|
471
|
+
# There cannot be two simple keys one after another.
|
472
|
+
@allow_simple_key = false
|
473
|
+
# It must be a part of a complex key.
|
474
|
+
else
|
475
|
+
# Block context needs additional checks.
|
476
|
+
# (Do we really need them? They will be catched by the parser
|
477
|
+
# anyway.)
|
478
|
+
if @flow_level==0
|
479
|
+
# We are allowed to start a complex value if and only if
|
480
|
+
# we can start a simple key.
|
481
|
+
raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
|
482
|
+
# Simple keys are allowed after ':' in the block context.
|
483
|
+
@allow_simple_key = @flow_level==0
|
484
|
+
# Reset possible simple key on the current level.
|
485
|
+
remove_possible_simple_key
|
486
|
+
end
|
487
|
+
end
|
488
|
+
# Add VALUE.
|
489
|
+
start_mark = get_mark
|
490
|
+
forward
|
491
|
+
end_mark = get_mark
|
492
|
+
@tokens << ValueToken.new(start_mark, end_mark)
|
493
|
+
end
|
494
|
+
|
495
|
+
def fetch_alias
|
496
|
+
# ALIAS could be a simple key.
|
497
|
+
save_possible_simple_key
|
498
|
+
# No simple keys after ALIAS.
|
499
|
+
@allow_simple_key = false
|
500
|
+
# Scan and add ALIAS.
|
501
|
+
@tokens << scan_anchor(AliasToken)
|
502
|
+
end
|
503
|
+
|
504
|
+
def fetch_anchor
|
505
|
+
# ANCHOR could start a simple key.
|
506
|
+
save_possible_simple_key
|
507
|
+
# No simple keys after ANCHOR.
|
508
|
+
@allow_simple_key = false
|
509
|
+
# Scan and add ANCHOR.
|
510
|
+
@tokens << scan_anchor(AnchorToken)
|
511
|
+
end
|
512
|
+
|
513
|
+
def fetch_tag
|
514
|
+
# TAG could start a simple key.
|
515
|
+
save_possible_simple_key
|
516
|
+
# No simple keys after TAG.
|
517
|
+
@allow_simple_key = false
|
518
|
+
# Scan and add TAG.
|
519
|
+
@tokens << scan_tag
|
520
|
+
end
|
521
|
+
|
522
|
+
def fetch_literal
|
523
|
+
fetch_block_scalar(?|)
|
524
|
+
end
|
525
|
+
|
526
|
+
def fetch_folded
|
527
|
+
fetch_block_scalar(?>)
|
528
|
+
end
|
529
|
+
|
530
|
+
def fetch_block_scalar(style)
|
531
|
+
# A simple key may follow a block scalar.
|
532
|
+
@allow_simple_key = true
|
533
|
+
# Reset possible simple key on the current level.
|
534
|
+
remove_possible_simple_key
|
535
|
+
# Scan and add SCALAR.
|
536
|
+
@tokens << scan_block_scalar(style)
|
537
|
+
end
|
538
|
+
|
539
|
+
def fetch_single
|
540
|
+
fetch_flow_scalar(?')
|
541
|
+
end
|
542
|
+
|
543
|
+
def fetch_double
|
544
|
+
fetch_flow_scalar(?")
|
545
|
+
end
|
546
|
+
|
547
|
+
def fetch_flow_scalar(style)
|
548
|
+
# A flow scalar could be a simple key.
|
549
|
+
save_possible_simple_key
|
550
|
+
# No simple keys after flow scalars.
|
551
|
+
@allow_simple_key = false
|
552
|
+
# Scan and add SCALAR.
|
553
|
+
@tokens << scan_flow_scalar(style)
|
554
|
+
end
|
555
|
+
|
556
|
+
def fetch_plain
|
557
|
+
# A plain scalar could be a simple key.
|
558
|
+
save_possible_simple_key
|
559
|
+
# No simple keys after plain scalars. But note that `scan_plain` will
|
560
|
+
# change this flag if the scan is finished at the beginning of the
|
561
|
+
# line.
|
562
|
+
@allow_simple_key = false
|
563
|
+
# Scan and add SCALAR. May change `allow_simple_key`.
|
564
|
+
@tokens << scan_plain
|
565
|
+
end
|
566
|
+
|
567
|
+
# Checkers.
|
568
|
+
|
569
|
+
def check_directive
|
570
|
+
# DIRECTIVE: ^ '%' ...
|
571
|
+
# The '%' indicator is already checked.
|
572
|
+
@column == 0
|
573
|
+
end
|
574
|
+
|
575
|
+
def check_document_start
|
576
|
+
# DOCUMENT-START: ^ '---' (' '|'\n')
|
577
|
+
@column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
|
578
|
+
end
|
579
|
+
|
580
|
+
def check_document_end
|
581
|
+
# DOCUMENT-END: ^ '...' (' '|'\n')
|
582
|
+
if @column == 0
|
583
|
+
prefix = peek(4)
|
584
|
+
return true if prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
def check_block_entry
|
589
|
+
# BLOCK-ENTRY: '-' (' '|'\n')
|
590
|
+
"\0 \t\r\n\x85".include?(peek(1))
|
591
|
+
end
|
592
|
+
|
593
|
+
def check_key
|
594
|
+
# KEY(flow context): '?'
|
595
|
+
# KEY(block context): '?' (' '|'\n')
|
596
|
+
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
597
|
+
end
|
598
|
+
|
599
|
+
def check_value
|
600
|
+
# VALUE(flow context): ':'
|
601
|
+
# VALUE(block context): ':' (' '|'\n')
|
602
|
+
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
603
|
+
end
|
604
|
+
|
605
|
+
def check_plain
|
606
|
+
# A plain scalar may start with any non-space character except:
|
607
|
+
# '-', '?', ':', ',', '[', ']', '{', '}',
|
608
|
+
# '#', '&', '*', '!', '|', '>', '\'', '\"',
|
609
|
+
# '%', '@', '`'.
|
610
|
+
#
|
611
|
+
# It may also start with
|
612
|
+
# '-', '?', ':'
|
613
|
+
# if it is followed by a non-space character.
|
614
|
+
#
|
615
|
+
# Note that we limit the last rule to the block context (except the
|
616
|
+
# '-' character) because we want the flow context to be space
|
617
|
+
# independent.
|
618
|
+
ch = peek
|
619
|
+
!("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
|
620
|
+
end
|
621
|
+
|
622
|
+
|
623
|
+
|
624
|
+
|
625
|
+
|
626
|
+
|
627
|
+
# Scanners.
|
628
|
+
|
629
|
+
def scan_to_next_token
|
630
|
+
# We ignore spaces, line breaks and comments.
|
631
|
+
# If we find a line break in the block context, we set the flag
|
632
|
+
# `allow_simple_key` on.
|
633
|
+
#
|
634
|
+
# TODO: We need to make tab handling rules more sane. A good rule is
|
635
|
+
# Tabs cannot precede tokens
|
636
|
+
# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
|
637
|
+
# KEY(block), VALUE(block), BLOCK-ENTRY
|
638
|
+
# So the checking code is
|
639
|
+
# if <TAB>:
|
640
|
+
# @allow_simple_keys = false
|
641
|
+
# We also need to add the check for `allow_simple_keys == true` to
|
642
|
+
# `unwind_indent` before issuing BLOCK-END.
|
643
|
+
# Scanners for block, flow, and plain scalars need to be modified.
|
644
|
+
found = false
|
645
|
+
while !found
|
646
|
+
while peek == 32
|
647
|
+
forward
|
648
|
+
end
|
649
|
+
if peek == ?#
|
650
|
+
forward while !"\0\r\n\x85".include?(peek)
|
651
|
+
end
|
652
|
+
if !scan_line_break.empty?
|
653
|
+
@allow_simple_key = true if @flow_level==0
|
654
|
+
else
|
655
|
+
found = true
|
656
|
+
end
|
657
|
+
end
|
658
|
+
end
|
659
|
+
|
660
|
+
def scan_directive
|
661
|
+
# See the specification for details.
|
662
|
+
start_mark = get_mark
|
663
|
+
forward
|
664
|
+
name = scan_directive_name(start_mark)
|
665
|
+
value = nil
|
666
|
+
if name == "YAML"
|
667
|
+
value = scan_yaml_directive_value(start_mark)
|
668
|
+
end_mark = get_mark
|
669
|
+
elsif name == "TAG"
|
670
|
+
value = scan_tag_directive_value(start_mark)
|
671
|
+
end_mark = get_mark
|
672
|
+
else
|
673
|
+
end_mark = get_mark
|
674
|
+
forward while !"\0\r\n\x85".include?(peek)
|
675
|
+
end
|
676
|
+
scan_directive_ignored_line(start_mark)
|
677
|
+
DirectiveToken.new(name, value, start_mark, end_mark)
|
678
|
+
end
|
679
|
+
|
680
|
+
def scan_directive_name(start_mark)
|
681
|
+
# See the specification for details.
|
682
|
+
length = 0
|
683
|
+
ch = peek(length)
|
684
|
+
while /[-0-9A-Za-z_]/ =~ ch.chr
|
685
|
+
length += 1
|
686
|
+
ch = peek(length)
|
687
|
+
end
|
688
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
|
689
|
+
value = prefix(length)
|
690
|
+
forward(length)
|
691
|
+
ch = peek()
|
692
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
693
|
+
value
|
694
|
+
end
|
695
|
+
|
696
|
+
def scan_yaml_directive_value(start_mark)
|
697
|
+
# See the specification for details.
|
698
|
+
forward while peek == 32
|
699
|
+
major = scan_yaml_directive_number(start_mark)
|
700
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
|
701
|
+
forward
|
702
|
+
minor = scan_yaml_directive_number(start_mark)
|
703
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
|
704
|
+
[major, minor]
|
705
|
+
end
|
706
|
+
|
707
|
+
def scan_yaml_directive_number(start_mark)
|
708
|
+
# See the specification for details.
|
709
|
+
ch = peek
|
710
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
|
711
|
+
length = 0
|
712
|
+
length += 1 while ((?0..?9) === peek(length))
|
713
|
+
value = prefix(length)
|
714
|
+
forward(length)
|
715
|
+
value
|
716
|
+
end
|
717
|
+
|
718
|
+
def scan_tag_directive_value(start_mark)
|
719
|
+
# See the specification for details.
|
720
|
+
forward while peek == 32
|
721
|
+
handle = scan_tag_directive_handle(start_mark)
|
722
|
+
forward while peek == 32
|
723
|
+
prefix = scan_tag_directive_prefix(start_mark)
|
724
|
+
[handle, prefix]
|
725
|
+
end
|
726
|
+
|
727
|
+
def scan_tag_directive_handle(start_mark)
|
728
|
+
# See the specification for details.
|
729
|
+
value = scan_tag_handle("directive", start_mark)
|
730
|
+
ch = peek
|
731
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
|
732
|
+
value
|
733
|
+
end
|
734
|
+
|
735
|
+
def scan_tag_directive_prefix(start_mark)
|
736
|
+
# See the specification for details.
|
737
|
+
value = scan_tag_uri("directive", start_mark)
|
738
|
+
ch = peek
|
739
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
|
740
|
+
value
|
741
|
+
end
|
742
|
+
|
743
|
+
def scan_directive_ignored_line(start_mark)
|
744
|
+
# See the specification for details.
|
745
|
+
forward while peek == 32
|
746
|
+
if peek == ?#
|
747
|
+
forward while !"\0\r\n\x85".include?(peek)
|
748
|
+
end
|
749
|
+
ch = peek
|
750
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
|
751
|
+
scan_line_break
|
752
|
+
end
|
753
|
+
|
754
|
+
def scan_anchor(token)
|
755
|
+
# The specification does not restrict characters for anchors and
|
756
|
+
# aliases. This may lead to problems, for instance, the document:
|
757
|
+
# [ *alias, value ]
|
758
|
+
# can be interpteted in two ways, as
|
759
|
+
# [ "value" ]
|
760
|
+
# and
|
761
|
+
# [ *alias , "value" ]
|
762
|
+
# Therefore we restrict aliases to numbers and ASCII letters.
|
763
|
+
start_mark = get_mark
|
764
|
+
indicator = peek
|
765
|
+
name = (indicator == ?*) ? "alias":"anchor"
|
766
|
+
forward
|
767
|
+
length = 0
|
768
|
+
ch = peek(length)
|
769
|
+
while /[-0-9A-Za-z_]/ =~ ch.chr
|
770
|
+
length += 1
|
771
|
+
ch = peek(length)
|
772
|
+
end
|
773
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
|
774
|
+
value = prefix(length)
|
775
|
+
forward(length)
|
776
|
+
ch = peek
|
777
|
+
if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
|
778
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
|
779
|
+
end
|
780
|
+
end_mark = get_mark
|
781
|
+
token.new(value, start_mark, end_mark)
|
782
|
+
end
|
783
|
+
|
784
|
+
|
785
|
+
def scan_tag
|
786
|
+
# See the specification for details.
|
787
|
+
start_mark = get_mark
|
788
|
+
ch = peek(1)
|
789
|
+
if ch == ?<
|
790
|
+
handle = nil
|
791
|
+
forward(2)
|
792
|
+
suffix = scan_tag_uri("tag", start_mark)
|
793
|
+
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
|
794
|
+
forward
|
795
|
+
elsif "\0 \t\r\n\x85".include?(ch)
|
796
|
+
handle = nil
|
797
|
+
suffix = "!"
|
798
|
+
forward
|
799
|
+
else
|
800
|
+
length = 1
|
801
|
+
use_handle = false
|
802
|
+
while !"\0 \t\r\n\x85".include?(ch)
|
803
|
+
if ch == ?!
|
804
|
+
use_handle = true
|
805
|
+
break
|
806
|
+
end
|
807
|
+
length += 1
|
808
|
+
ch = peek(length)
|
809
|
+
end
|
810
|
+
handle = "!"
|
811
|
+
if use_handle
|
812
|
+
handle = scan_tag_handle("tag", start_mark)
|
813
|
+
else
|
814
|
+
handle = "!"
|
815
|
+
forward
|
816
|
+
end
|
817
|
+
suffix = scan_tag_uri("tag", start_mark)
|
818
|
+
end
|
819
|
+
ch = peek
|
820
|
+
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
821
|
+
value = [handle, suffix]
|
822
|
+
end_mark = get_mark
|
823
|
+
TagToken.new(value, start_mark, end_mark)
|
824
|
+
end
|
825
|
+
|
826
|
+
def scan_block_scalar(style)
|
827
|
+
# See the specification for details.
|
828
|
+
folded = style== ?>
|
829
|
+
chunks = []
|
830
|
+
start_mark = get_mark
|
831
|
+
# Scan the header.
|
832
|
+
forward
|
833
|
+
chomping, increment = scan_block_scalar_indicators(start_mark)
|
834
|
+
scan_block_scalar_ignored_line(start_mark)
|
835
|
+
# Determine the indentation level and go to the first non-empty line.
|
836
|
+
min_indent = @indent+1
|
837
|
+
min_indent = 1 if min_indent < 1
|
838
|
+
if increment.nil?
|
839
|
+
breaks, max_indent, end_mark = scan_block_scalar_indentation
|
840
|
+
indent = [min_indent, max_indent].max
|
841
|
+
else
|
842
|
+
indent = min_indent+increment-1
|
843
|
+
breaks, end_mark = scan_block_scalar_breaks(indent)
|
844
|
+
end
|
845
|
+
line_break = ''
|
846
|
+
# Scan the inner part of the block scalar.
|
847
|
+
while @column == indent and peek != ?\0
|
848
|
+
chunks += breaks
|
849
|
+
leading_non_space = !" \t".include?(peek)
|
850
|
+
length = 0
|
851
|
+
length += 1 while !"\0\r\n\x85".include?(peek(length))
|
852
|
+
chunks << prefix(length)
|
853
|
+
forward(length)
|
854
|
+
line_break = scan_line_break
|
855
|
+
breaks, end_mark = scan_block_scalar_breaks(indent)
|
856
|
+
if @column == indent && peek != 0
|
857
|
+
# Unfortunately, folding rules are ambiguous.
|
858
|
+
#
|
859
|
+
# This is the folding according to the specification:
|
860
|
+
if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
|
861
|
+
chunks << ' ' if breaks.empty?
|
862
|
+
else
|
863
|
+
chunks << line_break
|
864
|
+
end
|
865
|
+
# This is Clark Evans's interpretation (also in the spec
|
866
|
+
# examples):
|
867
|
+
#
|
868
|
+
#if folded and line_break == u'\n':
|
869
|
+
# if not breaks:
|
870
|
+
# if self.peek() not in ' \t':
|
871
|
+
# chunks.append(u' ')
|
872
|
+
# else:
|
873
|
+
# chunks.append(line_break)
|
874
|
+
#else:
|
875
|
+
# chunks.append(line_break)
|
876
|
+
else
|
877
|
+
break
|
878
|
+
end
|
879
|
+
end
|
880
|
+
|
881
|
+
# Chomp the tail.
|
882
|
+
if chomping
|
883
|
+
chunks << line_break
|
884
|
+
chunks += breaks
|
885
|
+
end
|
886
|
+
|
887
|
+
# We are done.
|
888
|
+
ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
|
889
|
+
end
|
890
|
+
|
891
|
+
def scan_block_scalar_indicators(start_mark)
|
892
|
+
# See the specification for details.
|
893
|
+
chomping = nil
|
894
|
+
increment = nil
|
895
|
+
ch = peek
|
896
|
+
if /[+-]/ =~ ch.chr
|
897
|
+
chomping = ch == ?+
|
898
|
+
forward
|
899
|
+
ch = peek
|
900
|
+
if (?0..?9) === ch
|
901
|
+
increment = ch.to_i
|
902
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
903
|
+
forward
|
904
|
+
end
|
905
|
+
elsif (?0..?9) === ch
|
906
|
+
increment = ch
|
907
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
908
|
+
forward
|
909
|
+
ch = peek
|
910
|
+
if /[+-]/ =~ ch.chr
|
911
|
+
chomping = ch == ?+
|
912
|
+
forward
|
913
|
+
end
|
914
|
+
end
|
915
|
+
ch = peek
|
916
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
917
|
+
[chomping, increment]
|
918
|
+
end
|
919
|
+
|
920
|
+
def scan_block_scalar_ignored_line(start_mark)
|
921
|
+
# See the specification for details.
|
922
|
+
forward while peek == 32
|
923
|
+
if peek == ?#
|
924
|
+
forward while !"\0\r\n\x85".include?(peek)
|
925
|
+
end
|
926
|
+
ch = peek
|
927
|
+
|
928
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
|
929
|
+
scan_line_break
|
930
|
+
end
|
931
|
+
|
932
|
+
def scan_block_scalar_indentation
|
933
|
+
# See the specification for details.
|
934
|
+
chunks = []
|
935
|
+
max_indent = 0
|
936
|
+
end_mark = get_mark
|
937
|
+
while " \r\n\x85".include?(peek)
|
938
|
+
if peek != 32
|
939
|
+
chunks << scan_line_break
|
940
|
+
end_mark = get_mark
|
941
|
+
else
|
942
|
+
forward
|
943
|
+
max_indent = @column if @column > max_indent
|
944
|
+
end
|
945
|
+
end
|
946
|
+
[chunks, max_indent, end_mark]
|
947
|
+
end
|
948
|
+
|
949
|
+
def scan_block_scalar_breaks(indent)
|
950
|
+
# See the specification for details.
|
951
|
+
chunks = []
|
952
|
+
end_mark = get_mark
|
953
|
+
forward while @column < indent && peek == 32
|
954
|
+
while "\r\n\x85".include?(peek)
|
955
|
+
chunks << scan_line_break
|
956
|
+
end_mark = get_mark
|
957
|
+
forward while @column < indent && peek == 32
|
958
|
+
end
|
959
|
+
[chunks, end_mark]
|
960
|
+
end
|
961
|
+
|
962
|
+
def scan_flow_scalar(style)
|
963
|
+
# See the specification for details.
|
964
|
+
# Note that we loose indentation rules for quoted scalars. Quoted
|
965
|
+
# scalars don't need to adhere indentation because " and ' clearly
|
966
|
+
# mark the beginning and the end of them. Therefore we are less
|
967
|
+
# restrictive then the specification requires. We only need to check
|
968
|
+
# that document separators are not included in scalars.
|
969
|
+
double = style == ?"
|
970
|
+
chunks = []
|
971
|
+
start_mark = get_mark
|
972
|
+
quote = peek
|
973
|
+
forward
|
974
|
+
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
975
|
+
while peek != quote
|
976
|
+
chunks += scan_flow_scalar_spaces(double, start_mark)
|
977
|
+
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
978
|
+
end
|
979
|
+
forward
|
980
|
+
end_mark = get_mark
|
981
|
+
ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
|
982
|
+
end
|
983
|
+
|
984
|
+
ESCAPE_REPLACEMENTS = {
|
985
|
+
"0" => "\0",
|
986
|
+
"a" => "\x07",
|
987
|
+
"b" => "\x08",
|
988
|
+
"t" => "\x09",
|
989
|
+
"\t" => "\x09",
|
990
|
+
"n" => "\x0A",
|
991
|
+
"v" => "\x0B",
|
992
|
+
"f" => "\x0C",
|
993
|
+
"r" => "\x0D",
|
994
|
+
"e" => "\x1B",
|
995
|
+
" " => "\x20",
|
996
|
+
'"' => '"',
|
997
|
+
"\\" => "\\",
|
998
|
+
"N" => "\x85",
|
999
|
+
"_" => "\xA0"
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
ESCAPE_CODES = {
|
1003
|
+
'x' => 2
|
1004
|
+
}
|
1005
|
+
|
1006
|
+
def scan_flow_scalar_non_spaces(double, start_mark)
|
1007
|
+
# See the specification for details.
|
1008
|
+
chunks = []
|
1009
|
+
while true
|
1010
|
+
length = 0
|
1011
|
+
length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
|
1012
|
+
if length!=0
|
1013
|
+
chunks << prefix(length)
|
1014
|
+
forward(length)
|
1015
|
+
end
|
1016
|
+
ch = peek
|
1017
|
+
if !double && ch == ?' && peek(1) == ?'
|
1018
|
+
chunks << ?'
|
1019
|
+
forward(2)
|
1020
|
+
elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
|
1021
|
+
chunks << ch
|
1022
|
+
forward
|
1023
|
+
elsif double && ch == ?\\
|
1024
|
+
forward
|
1025
|
+
ch = peek
|
1026
|
+
if ESCAPE_REPLACEMENTS.member?(ch.chr)
|
1027
|
+
chunks << ESCAPE_REPLACEMENTS[ch.chr]
|
1028
|
+
forward
|
1029
|
+
elsif ESCAPE_CODES.member?(ch.chr)
|
1030
|
+
length = ESCAPE_CODES[ch.chr]
|
1031
|
+
forward
|
1032
|
+
length.times do |k|
|
1033
|
+
if /[0-9A-Fa-f]/ !~ peek(k).chr
|
1034
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
|
1035
|
+
"expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
|
1036
|
+
end
|
1037
|
+
end
|
1038
|
+
code = prefix(length).to_i.to_s(16)
|
1039
|
+
chunks << code
|
1040
|
+
forward(length)
|
1041
|
+
elsif "\r\n\x85".include?(ch)
|
1042
|
+
scan_line_break
|
1043
|
+
chunks += scan_flow_scalar_breaks(double, start_mark)
|
1044
|
+
else
|
1045
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
|
1046
|
+
end
|
1047
|
+
else
|
1048
|
+
return chunks
|
1049
|
+
end
|
1050
|
+
end
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
def scan_flow_scalar_spaces(double, start_mark)
|
1054
|
+
# See the specification for details.
|
1055
|
+
chunks = []
|
1056
|
+
length = 0
|
1057
|
+
length += 1 while /[ \t]/ =~ peek(length).chr
|
1058
|
+
whitespaces = prefix(length)
|
1059
|
+
forward(length)
|
1060
|
+
ch = peek
|
1061
|
+
if ch == ?\0
|
1062
|
+
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
|
1063
|
+
elsif "\r\n\x85".include?(ch)
|
1064
|
+
line_break = scan_line_break
|
1065
|
+
breaks = scan_flow_scalar_breaks(double, start_mark)
|
1066
|
+
if line_break != ?\n
|
1067
|
+
chunks << line_break
|
1068
|
+
elsif breaks.empty?
|
1069
|
+
chunks << ' '
|
1070
|
+
end
|
1071
|
+
chunks += breaks
|
1072
|
+
else
|
1073
|
+
chunks << whitespaces
|
1074
|
+
end
|
1075
|
+
chunks
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
def scan_flow_scalar_breaks(double, start_mark)
|
1079
|
+
# See the specification for details.
|
1080
|
+
chunks = []
|
1081
|
+
while true
|
1082
|
+
# Instead of checking indentation, we check for document
|
1083
|
+
# separators.
|
1084
|
+
prefix = prefix(3)
|
1085
|
+
if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1086
|
+
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
|
1087
|
+
end
|
1088
|
+
forward while /[ \t]/ =~ peek.chr
|
1089
|
+
if "\r\n\x85".include?(peek)
|
1090
|
+
chunks << scan_line_break
|
1091
|
+
else
|
1092
|
+
return chunks
|
1093
|
+
end
|
1094
|
+
end
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
def scan_plain
|
1098
|
+
# See the specification for details.
|
1099
|
+
# We add an additional restriction for the flow context:
|
1100
|
+
# plain scalars in the flow context cannot contain ',', ':' and '?'.
|
1101
|
+
# We also keep track of the `allow_simple_key` flag here.
|
1102
|
+
# Indentation rules are loosed for the flow context.
|
1103
|
+
chunks = []
|
1104
|
+
start_mark = get_mark
|
1105
|
+
end_mark = start_mark
|
1106
|
+
indent = @indent+1
|
1107
|
+
# We allow zero indentation for scalars, but then we need to check for
|
1108
|
+
# document separators at the beginning of the line.
|
1109
|
+
#if indent == 0
|
1110
|
+
# indent = 1
|
1111
|
+
spaces = []
|
1112
|
+
while true
|
1113
|
+
length = 0
|
1114
|
+
break if peek == ?#
|
1115
|
+
while true
|
1116
|
+
ch = peek(length)
|
1117
|
+
if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
|
1118
|
+
break
|
1119
|
+
end
|
1120
|
+
length += 1
|
1121
|
+
end
|
1122
|
+
break if length == 0
|
1123
|
+
@allow_simple_key = false
|
1124
|
+
chunks += spaces
|
1125
|
+
chunks << prefix(length)
|
1126
|
+
forward(length)
|
1127
|
+
end_mark = get_mark
|
1128
|
+
spaces = scan_plain_spaces(indent, start_mark)
|
1129
|
+
break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
|
1130
|
+
end
|
1131
|
+
return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
|
1132
|
+
end
|
1133
|
+
|
1134
|
+
def scan_plain_spaces(indent, start_mark)
|
1135
|
+
# See the specification for details.
|
1136
|
+
# The specification is really confusing about tabs in plain scalars.
|
1137
|
+
# We just forbid them completely. Do not use tabs in YAML!
|
1138
|
+
chunks = []
|
1139
|
+
length = 0
|
1140
|
+
length += 1 while peek(length) == 32
|
1141
|
+
whitespaces = prefix(length)
|
1142
|
+
forward(length)
|
1143
|
+
ch = peek
|
1144
|
+
if "\r\n\x85".include?(ch)
|
1145
|
+
line_break = scan_line_break
|
1146
|
+
@allow_simple_key = true
|
1147
|
+
prefix = prefix(3)
|
1148
|
+
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1149
|
+
breaks = []
|
1150
|
+
while " \r\n\x85".include?(peek)
|
1151
|
+
if peek == 32
|
1152
|
+
forward
|
1153
|
+
else
|
1154
|
+
breaks << scan_line_break
|
1155
|
+
prefix = prefix(3)
|
1156
|
+
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1157
|
+
end
|
1158
|
+
end
|
1159
|
+
if line_break != '\n'
|
1160
|
+
chunks << line_break
|
1161
|
+
elsif breaks.empty?
|
1162
|
+
chunks << ' '
|
1163
|
+
end
|
1164
|
+
chunks += breaks
|
1165
|
+
elsif !whitespaces.empty?
|
1166
|
+
chunks << whitespaces
|
1167
|
+
end
|
1168
|
+
chunks
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
def scan_tag_handle(name, start_mark)
|
1172
|
+
# See the specification for details.
|
1173
|
+
# For some strange reasons, the specification does not allow '_' in
|
1174
|
+
# tag handles. I have allowed it anyway.
|
1175
|
+
ch = peek
|
1176
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
|
1177
|
+
length = 1
|
1178
|
+
ch = peek(length)
|
1179
|
+
if ch != 32
|
1180
|
+
while /[-_0-9A-Za-z]/ =~ ch.chr
|
1181
|
+
length += 1
|
1182
|
+
ch = peek(length)
|
1183
|
+
end
|
1184
|
+
if ch != ?!
|
1185
|
+
forward(length)
|
1186
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
|
1187
|
+
end
|
1188
|
+
length += 1
|
1189
|
+
end
|
1190
|
+
value = prefix(length)
|
1191
|
+
forward(length)
|
1192
|
+
value
|
1193
|
+
end
|
1194
|
+
|
1195
|
+
def scan_tag_uri(name, start_mark)
|
1196
|
+
# See the specification for details.
|
1197
|
+
# Note: we do not check if URI is well-formed.
|
1198
|
+
chunks = []
|
1199
|
+
length = 0
|
1200
|
+
ch = peek(length)
|
1201
|
+
while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
|
1202
|
+
if ch == ?%
|
1203
|
+
chunks << prefix(length)
|
1204
|
+
forward(length)
|
1205
|
+
length = 0
|
1206
|
+
chunks << scan_uri_escapes(name, start_mark)
|
1207
|
+
else
|
1208
|
+
length += 1
|
1209
|
+
end
|
1210
|
+
ch = peek(length)
|
1211
|
+
end
|
1212
|
+
if length!=0
|
1213
|
+
chunks << prefix(length)
|
1214
|
+
forward(length)
|
1215
|
+
length = 0
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
|
1219
|
+
chunks.join('')
|
1220
|
+
end
|
1221
|
+
|
1222
|
+
def scan_uri_escapes(name, start_mark)
|
1223
|
+
# See the specification for details.
|
1224
|
+
bytes = []
|
1225
|
+
mark = get_mark
|
1226
|
+
while peek == ?%
|
1227
|
+
forward
|
1228
|
+
2.times do |k|
|
1229
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
|
1230
|
+
get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
|
1231
|
+
end
|
1232
|
+
bytes << prefix(2).to_i.to_s(16)
|
1233
|
+
forward(2)
|
1234
|
+
end
|
1235
|
+
bytes.join('')
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
def scan_line_break
|
1239
|
+
# Transforms:
|
1240
|
+
# '\r\n' : '\n'
|
1241
|
+
# '\r' : '\n'
|
1242
|
+
# '\n' : '\n'
|
1243
|
+
# '\x85' : '\n'
|
1244
|
+
# default : ''
|
1245
|
+
ch = peek
|
1246
|
+
if "\r\n\x85".include?(ch)
|
1247
|
+
if prefix(2) == "\r\n"
|
1248
|
+
forward(2)
|
1249
|
+
else
|
1250
|
+
forward
|
1251
|
+
end
|
1252
|
+
return "\n"
|
1253
|
+
end
|
1254
|
+
""
|
1255
|
+
end
|
1256
|
+
end
|
1257
|
+
end
|
1258
|
+
|