RbYAML 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +19 -0
- data/README +31 -0
- data/lib/rbyaml.rb +378 -0
- data/lib/rbyaml/composer.rb +189 -0
- data/lib/rbyaml/constructor.rb +374 -0
- data/lib/rbyaml/detector.rb +44 -0
- data/lib/rbyaml/dumper.rb +40 -0
- data/lib/rbyaml/emitter.rb +1116 -0
- data/lib/rbyaml/error.rb +81 -0
- data/lib/rbyaml/events.rb +92 -0
- data/lib/rbyaml/loader.rb +49 -0
- data/lib/rbyaml/nodes.rb +69 -0
- data/lib/rbyaml/parser.rb +488 -0
- data/lib/rbyaml/reader.rb +127 -0
- data/lib/rbyaml/representer.rb +183 -0
- data/lib/rbyaml/scanner.rb +1258 -0
- data/lib/rbyaml/serializer.rb +120 -0
- data/lib/rbyaml/test.rb +56 -0
- data/lib/rbyaml/tokens.rb +163 -0
- data/lib/rbyaml/yaml.rb +143 -0
- data/test/test_rbyaml.rb +18 -0
- data/test/yaml/gems.yml +130951 -0
- data/test/yaml/gems2.yml +113 -0
- data/test/yaml/test1.yml +3 -0
- data/test/yaml/test10.yml +8 -0
- data/test/yaml/test12.yml +8 -0
- data/test/yaml/test13.yml +4 -0
- data/test/yaml/test14.yml +4 -0
- data/test/yaml/test15.yml +8 -0
- data/test/yaml/test16.yml +7 -0
- data/test/yaml/test18.yml +6 -0
- data/test/yaml/test19.yml +5 -0
- data/test/yaml/test2.yml +3 -0
- data/test/yaml/test20.yml +6 -0
- data/test/yaml/test21.yml +4 -0
- data/test/yaml/test22.yml +4 -0
- data/test/yaml/test23.yml +13 -0
- data/test/yaml/test24.yml +14 -0
- data/test/yaml/test25.yml +7 -0
- data/test/yaml/test26.yml +7 -0
- data/test/yaml/test27.yml +29 -0
- data/test/yaml/test28.yml +26 -0
- data/test/yaml/test29.yml +13 -0
- data/test/yaml/test3.yml +8 -0
- data/test/yaml/test30.yml +7 -0
- data/test/yaml/test31.yml +2 -0
- data/test/yaml/test32.yml +13 -0
- data/test/yaml/test33.yml +2 -0
- data/test/yaml/test34.yml +8 -0
- data/test/yaml/test35.yml +4 -0
- data/test/yaml/test36.yml +8 -0
- data/test/yaml/test37.yml +2 -0
- data/test/yaml/test38.yml +8 -0
- data/test/yaml/test39.yml +2 -0
- data/test/yaml/test4.yml +8 -0
- data/test/yaml/test40.yml +3 -0
- data/test/yaml/test41.yml +5 -0
- data/test/yaml/test42.yml +12 -0
- data/test/yaml/test43.yml +15 -0
- data/test/yaml/test44.yml +23 -0
- data/test/yaml/test5.yml +3 -0
- data/test/yaml/test6.yml +5 -0
- data/test/yaml/test7.yml +10 -0
- data/test/yaml/test8.yml +10 -0
- data/test/yaml/test9.yml +8 -0
- metadata +111 -0
@@ -0,0 +1,127 @@
|
|
1
|
+
# This is a more or less straight translation of PyYAML3000 to Ruby
|
2
|
+
|
3
|
+
# the big difference in this implementation is that unicode support is not here...
|
4
|
+
|
5
|
+
require 'rbyaml/error'
|
6
|
+
|
7
|
+
module RbYAML
|
8
|
+
|
9
|
+
# Reader:
|
10
|
+
# - checks if characters are in allowed range,
|
11
|
+
# - adds '\0' to the end.
|
12
|
+
# Reader accepts
|
13
|
+
# - a String object
|
14
|
+
# - a duck-typed IO object
|
15
|
+
module Reader
|
16
|
+
def initialize_reader(stream)
|
17
|
+
@stream = nil
|
18
|
+
@stream_pointer = 0
|
19
|
+
@eof = true
|
20
|
+
@buffer = ""
|
21
|
+
@pointer = 0
|
22
|
+
@index = 0
|
23
|
+
@line = 0
|
24
|
+
@column = 0
|
25
|
+
if String === stream
|
26
|
+
@name = "<string>"
|
27
|
+
@raw_buffer = stream
|
28
|
+
else
|
29
|
+
@stream = stream
|
30
|
+
@name = stream.respond_to?(:path) ? stream.path : stream.inspect
|
31
|
+
@eof = false
|
32
|
+
@raw_buffer = ""
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def peek(index=0)
|
37
|
+
update(index+1) if @pointer+index+1 >= @buffer.length
|
38
|
+
@buffer[@pointer+index]
|
39
|
+
end
|
40
|
+
|
41
|
+
def prefix(length=1)
|
42
|
+
update(length) if @pointer+length >= @buffer.length
|
43
|
+
@buffer[@pointer...@pointer+length]
|
44
|
+
end
|
45
|
+
|
46
|
+
def forward(length=1)
|
47
|
+
update(length+1) if @pointer+length+1 >= @buffer.length
|
48
|
+
length.times { |k|
|
49
|
+
ch = @buffer[@pointer]
|
50
|
+
@pointer += 1
|
51
|
+
@index += 1
|
52
|
+
if "\n\x85".include?(ch) || (ch == ?\r && @buffer[@pointer+1] != ?\n)
|
53
|
+
@line += 1
|
54
|
+
@column = 0
|
55
|
+
else
|
56
|
+
@column += 1
|
57
|
+
end
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_mark
|
62
|
+
if @stream.nil?
|
63
|
+
Mark.new(@name,@index,@line,@column,@buffer,@pointer)
|
64
|
+
else
|
65
|
+
Mark.new(@name,@index,@line,@column,nil,nil)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
NON_PRINTABLE = /[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\xFF]/
|
70
|
+
def check_printable(data)
|
71
|
+
if NON_PRINTABLE =~ data
|
72
|
+
position = @index+@buffer.length-@pointer+($~.offset(0)[0])
|
73
|
+
raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def update(length)
|
78
|
+
return if @raw_buffer.nil?
|
79
|
+
@buffer = @buffer[@pointer..-1]
|
80
|
+
@pointer = 0
|
81
|
+
while @buffer.length < length
|
82
|
+
unless @eof
|
83
|
+
update_raw
|
84
|
+
end
|
85
|
+
data = @raw_buffer
|
86
|
+
converted = data.length
|
87
|
+
check_printable(data)
|
88
|
+
@buffer << data
|
89
|
+
@raw_buffer = @raw_buffer[converted..-1]
|
90
|
+
if @eof
|
91
|
+
@buffer << ?\0
|
92
|
+
@raw_buffer = nil
|
93
|
+
break
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def update_raw(size=1024)
|
99
|
+
data = @stream.read(size)
|
100
|
+
if data && !data.empty?
|
101
|
+
@raw_buffer << data
|
102
|
+
@stream_pointer += data.length
|
103
|
+
else
|
104
|
+
@eof = true
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
class ReaderError < YAMLError
|
110
|
+
def initialize(name, position, character, encoding, reason)
|
111
|
+
@name = name
|
112
|
+
@position = position
|
113
|
+
@character = character
|
114
|
+
@encoding = encoding
|
115
|
+
@reason = reason
|
116
|
+
end
|
117
|
+
|
118
|
+
def to_s
|
119
|
+
if String === @character
|
120
|
+
"'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
121
|
+
else
|
122
|
+
"unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
require 'set'
|
3
|
+
|
4
|
+
require 'rbyaml/error'
|
5
|
+
require 'rbyaml/nodes'
|
6
|
+
require 'rbyaml/detector'
|
7
|
+
|
8
|
+
module RbYAML
|
9
|
+
class RepresenterError < YAMLError
|
10
|
+
end
|
11
|
+
|
12
|
+
module BaseRepresenter
|
13
|
+
@@yaml_representers = {}
|
14
|
+
|
15
|
+
def initialize_representer
|
16
|
+
@represented_objects = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
def represent(data)
|
20
|
+
node = represent_object(data)
|
21
|
+
serialize(node)
|
22
|
+
represented_objects = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def represent_object(data)
|
26
|
+
if ignore_aliases(data)
|
27
|
+
alias_key = nil
|
28
|
+
else
|
29
|
+
alias_key = data.object_id
|
30
|
+
end
|
31
|
+
|
32
|
+
if !alias_key.nil?
|
33
|
+
if @represented_objects.include?(alias_key)
|
34
|
+
node = @represented_objects[alias_key]
|
35
|
+
raise RepresenterError.new("recursive objects are not allowed: #{data}") if node.nil?
|
36
|
+
return node
|
37
|
+
end
|
38
|
+
@represented_objects[alias_key] = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
rerun = false
|
42
|
+
|
43
|
+
for data_type in data.class.ancestors
|
44
|
+
rerun = true
|
45
|
+
if @@yaml_representers.include?(data_type)
|
46
|
+
node = send(@@yaml_representers[data_type],data)
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
if !rerun
|
51
|
+
if @@yaml_representers.include?(nil)
|
52
|
+
node = send(@@yaml_representers[nil], data)
|
53
|
+
else
|
54
|
+
node = ScalarNode.new(data.taguri, data)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
@represented_objects[alias_key] = node if !alias_key.nil?
|
58
|
+
node
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.add_representer(data_type, representer)
|
62
|
+
@@yaml_representers[data_type] = representer
|
63
|
+
end
|
64
|
+
|
65
|
+
def represent_scalar(tag, value, style=nil)
|
66
|
+
ScalarNode.new(tag, value, style)
|
67
|
+
end
|
68
|
+
|
69
|
+
def represent_sequence(tag, sequence, flow_style=nil)
|
70
|
+
value = sequence.map {|item| represent_object(item)}
|
71
|
+
SequenceNode.new(tag, value, flow_style)
|
72
|
+
end
|
73
|
+
|
74
|
+
def represent_mapping(tag, mapping, flow_style=nil)
|
75
|
+
value = {}
|
76
|
+
mapping.each { |item_key,item_value| value[represent_object(item_key)] = represent_object(item_value) }
|
77
|
+
MappingNode.new(tag, value, flow_style)
|
78
|
+
end
|
79
|
+
|
80
|
+
def ignore_aliases(data)
|
81
|
+
false
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
module SafeRepresenter
|
86
|
+
include BaseRepresenter
|
87
|
+
|
88
|
+
def ignore_aliases(data)
|
89
|
+
data.nil? || String === data || TrueClass === data || FalseClass === data || Integer === data || Float === data
|
90
|
+
end
|
91
|
+
|
92
|
+
def represent_none(data)
|
93
|
+
represent_scalar(data.taguri,"null")
|
94
|
+
end
|
95
|
+
|
96
|
+
def represent_str(data)
|
97
|
+
represent_scalar(data.taguri,data)
|
98
|
+
end
|
99
|
+
|
100
|
+
def represent_symbol(data)
|
101
|
+
represent_scalar(data.taguri,data.to_s)
|
102
|
+
end
|
103
|
+
|
104
|
+
def represent_bool(data)
|
105
|
+
value = data ? "true" : "false"
|
106
|
+
represent_scalar(data.taguri,value)
|
107
|
+
end
|
108
|
+
|
109
|
+
def represent_int(data)
|
110
|
+
represent_scalar(data.taguri,data.to_s)
|
111
|
+
end
|
112
|
+
|
113
|
+
def represent_float(data)
|
114
|
+
if data.infinite? == 1
|
115
|
+
value = ".inf"
|
116
|
+
elsif data.infinite? == -1
|
117
|
+
value = "-.inf"
|
118
|
+
elsif data.nan? || data != data
|
119
|
+
value = ".nan"
|
120
|
+
else
|
121
|
+
value = data.to_s
|
122
|
+
end
|
123
|
+
represent_scalar(data.taguri, value)
|
124
|
+
end
|
125
|
+
|
126
|
+
def represent_list(data)
|
127
|
+
represent_sequence(data.taguri, data)
|
128
|
+
end
|
129
|
+
|
130
|
+
def represent_dict(data)
|
131
|
+
represent_mapping(data.taguri, data)
|
132
|
+
end
|
133
|
+
|
134
|
+
def represent_set(data)
|
135
|
+
value = {}
|
136
|
+
for key in data
|
137
|
+
value[key] = nil
|
138
|
+
end
|
139
|
+
represent_mapping(data.taguri, value)
|
140
|
+
end
|
141
|
+
|
142
|
+
def represent_datetime(data)
|
143
|
+
value = "%04d-%02d-%02d %02d:%02d:%02d" % [data.year, data.month, data.day, data.hour, data.min, data.sec]
|
144
|
+
if data.usec != 0
|
145
|
+
value += "." + (data.usec/1000000.0).to_s.split(/\./)[1]
|
146
|
+
end
|
147
|
+
if data.utc_offset != 0
|
148
|
+
value += data.utc_offset.to_s
|
149
|
+
end
|
150
|
+
represent_scalar(data.taguri, value)
|
151
|
+
end
|
152
|
+
|
153
|
+
def represent_yaml_object(tag, data, flow_style=nil)
|
154
|
+
state = data.to_yaml_properties
|
155
|
+
mapping = {}
|
156
|
+
state.each do |m|
|
157
|
+
map[m[1..-1]] = data.instance_variable_get(m)
|
158
|
+
end
|
159
|
+
represent_mapping(tag, mapping, flow_style)
|
160
|
+
end
|
161
|
+
|
162
|
+
def represent_undefined(data)
|
163
|
+
raise RepresenterError.new("cannot represent an object: #{data}")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
BaseRepresenter.add_representer(NilClass,:represent_none)
|
168
|
+
BaseRepresenter.add_representer(String,:represent_str)
|
169
|
+
BaseRepresenter.add_representer(Symbol,:represent_symbol)
|
170
|
+
BaseRepresenter.add_representer(TrueClass,:represent_bool)
|
171
|
+
BaseRepresenter.add_representer(FalseClass,:represent_bool)
|
172
|
+
BaseRepresenter.add_representer(Integer,:represent_int)
|
173
|
+
BaseRepresenter.add_representer(Float,:represent_float)
|
174
|
+
BaseRepresenter.add_representer(Array,:represent_list)
|
175
|
+
BaseRepresenter.add_representer(Hash,:represent_dict)
|
176
|
+
BaseRepresenter.add_representer(Set,:represent_set)
|
177
|
+
BaseRepresenter.add_representer(Time,:represent_datetime)
|
178
|
+
BaseRepresenter.add_representer(nil,:represent_undefined)
|
179
|
+
|
180
|
+
module Representer
|
181
|
+
include SafeRepresenter
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,1258 @@
|
|
1
|
+
# Scanner produces tokens of the following types:
|
2
|
+
# STREAM-START
|
3
|
+
# STREAM-END
|
4
|
+
# DIRECTIVE(name, value)
|
5
|
+
# DOCUMENT-START
|
6
|
+
# DOCUMENT-END
|
7
|
+
# BLOCK-SEQUENCE-START
|
8
|
+
# BLOCK-MAPPING-START
|
9
|
+
# BLOCK-END
|
10
|
+
# FLOW-SEQUENCE-START
|
11
|
+
# FLOW-MAPPING-START
|
12
|
+
# FLOW-SEQUENCE-END
|
13
|
+
# FLOW-MAPPING-END
|
14
|
+
# BLOCK-ENTRY
|
15
|
+
# FLOW-ENTRY
|
16
|
+
# KEY
|
17
|
+
# VALUE
|
18
|
+
# ALIAS(value)
|
19
|
+
# ANCHOR(value)
|
20
|
+
# TAG(value)
|
21
|
+
# SCALAR(value, plain)
|
22
|
+
#
|
23
|
+
# Read comments in the Scanner code for more details.
|
24
|
+
#
|
25
|
+
|
26
|
+
require 'rbyaml/error'
|
27
|
+
require 'rbyaml/tokens'
|
28
|
+
|
29
|
+
module RbYAML
|
30
|
+
class ScannerError < MarkedYAMLError
|
31
|
+
end
|
32
|
+
|
33
|
+
class SimpleKey
|
34
|
+
attr_reader :token_number, :required, :index, :line, :column, :mark
|
35
|
+
|
36
|
+
def initialize(token_number,required,index,line,column,mark)
|
37
|
+
@token_number = token_number
|
38
|
+
@required = required
|
39
|
+
@index = index
|
40
|
+
@line = line
|
41
|
+
@column = column
|
42
|
+
@mark = mark
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
module Scanner
|
47
|
+
def initialize_scanner
|
48
|
+
# It is assumed that Scanner and Reader will mixin to the same point.
|
49
|
+
# Reader do the dirty work of checking for BOM. It also adds NUL to the end.
|
50
|
+
#
|
51
|
+
# Reader supports the following methods
|
52
|
+
# self.peek(i=0) # peek the next i-th character
|
53
|
+
# self.prefix(l=1) # peek the next l characters
|
54
|
+
# self.forward(l=1) # read the next l characters and move the pointer.
|
55
|
+
|
56
|
+
# Had we reached the end of the stream?
|
57
|
+
@done = false
|
58
|
+
|
59
|
+
# The number of unclosed '{' and '['. `flow_level == 0` means block
|
60
|
+
# context.
|
61
|
+
@flow_level = 0
|
62
|
+
|
63
|
+
# List of processed tokens that are not yet emitted.
|
64
|
+
@tokens = []
|
65
|
+
|
66
|
+
# Add the STREAM-START token.
|
67
|
+
fetch_stream_start
|
68
|
+
|
69
|
+
# Number of tokens that were emitted through the `get_token` method.
|
70
|
+
@tokens_taken = 0
|
71
|
+
|
72
|
+
# The current indentation level.
|
73
|
+
@indent = -1
|
74
|
+
|
75
|
+
# Past indentation levels.
|
76
|
+
@indents = []
|
77
|
+
|
78
|
+
# Variables related to simple keys treatment.
|
79
|
+
|
80
|
+
# A simple key is a key that is not denoted by the '?' indicator.
|
81
|
+
# Example of simple keys:
|
82
|
+
# ---
|
83
|
+
# block simple key: value
|
84
|
+
# ? not a simple key:
|
85
|
+
# : { flow simple key: value }
|
86
|
+
# We emit the KEY token before all keys, so when we find a potential
|
87
|
+
# simple key, we try to locate the corresponding ':' indicator.
|
88
|
+
# Simple keys should be limited to a single line and 1024 characters.
|
89
|
+
|
90
|
+
# Can a simple key start at the current position? A simple key may
|
91
|
+
# start:
|
92
|
+
# - at the beginning of the line, not counting indentation spaces
|
93
|
+
# (in block context),
|
94
|
+
# - after '{', '[', ',' (in the flow context),
|
95
|
+
# - after '?', ':', '-' (in the block context).
|
96
|
+
# In the block context, this flag also signifies if a block collection
|
97
|
+
# may start at the current position.
|
98
|
+
@allow_simple_key = true
|
99
|
+
|
100
|
+
# Keep track of possible simple keys. This is a dictionary. The key
|
101
|
+
# is `flow_level`; there can be no more that one possible simple key
|
102
|
+
# for each level. The value is a SimpleKey record:
|
103
|
+
# (token_number, required, index, line, column, mark)
|
104
|
+
# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
|
105
|
+
# '[', or '{' tokens.
|
106
|
+
@possible_simple_keys = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def check_token(*choices)
|
110
|
+
# Check if the next token is one of the given types.
|
111
|
+
fetch_more_tokens while need_more_tokens
|
112
|
+
unless @tokens.empty?
|
113
|
+
return true if choices.empty?
|
114
|
+
for choice in choices
|
115
|
+
return true if choice === @tokens[0]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
|
121
|
+
def peek_token
|
122
|
+
# Return the next token, but do not delete if from the queue.
|
123
|
+
fetch_more_tokens while need_more_tokens
|
124
|
+
return @tokens[0] unless @tokens.empty?
|
125
|
+
end
|
126
|
+
|
127
|
+
def get_token
|
128
|
+
# Return the next token.
|
129
|
+
fetch_more_tokens while need_more_tokens
|
130
|
+
unless @tokens.empty?
|
131
|
+
@tokens_taken += 1
|
132
|
+
@tokens.shift
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def each_token
|
137
|
+
fetch_more_tokens while need_more_tokens
|
138
|
+
while !@tokens.empty?
|
139
|
+
@tokens_taken += 1
|
140
|
+
yield @tokens.shift
|
141
|
+
fetch_more_tokens while need_more_tokens
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def need_more_tokens
|
146
|
+
return false if @done
|
147
|
+
return true if @tokens.empty?
|
148
|
+
# The current token may be a potential simple key, so we
|
149
|
+
# need to look further.
|
150
|
+
stale_possible_simple_keys
|
151
|
+
return true if next_possible_simple_key == @tokens_taken
|
152
|
+
end
|
153
|
+
|
154
|
+
def fetch_more_tokens
|
155
|
+
# Eat whitespaces and comments until we reach the next token.
|
156
|
+
scan_to_next_token
|
157
|
+
|
158
|
+
# Remove obsolete possible simple keys.
|
159
|
+
stale_possible_simple_keys
|
160
|
+
|
161
|
+
# Compare the current indentation and column. It may add some tokens
|
162
|
+
# and decrease the current indentation level.
|
163
|
+
unwind_indent(@column)
|
164
|
+
|
165
|
+
# Peek the next character.
|
166
|
+
ch = peek
|
167
|
+
|
168
|
+
return case
|
169
|
+
# Is it the end of stream?
|
170
|
+
when ch == ?\0: fetch_stream_end
|
171
|
+
# Is it a directive?
|
172
|
+
when ch == ?% && check_directive: fetch_directive
|
173
|
+
# Is it the document start?
|
174
|
+
when ch == ?- && check_document_start: fetch_document_start
|
175
|
+
# Is it the document end?
|
176
|
+
when ch == ?. && check_document_end: fetch_document_end
|
177
|
+
# Is it the flow sequence start indicator?
|
178
|
+
when ch == ?[: fetch_flow_sequence_start
|
179
|
+
# Is it the flow mapping start indicator?
|
180
|
+
when ch == ?{: fetch_flow_mapping_start
|
181
|
+
# Is it the flow sequence end indicator?
|
182
|
+
when ch == ?]: fetch_flow_sequence_end
|
183
|
+
# Is it the flow mapping end indicator?
|
184
|
+
when ch == ?}: fetch_flow_mapping_end
|
185
|
+
# Is it the flow entry indicator?
|
186
|
+
when ch == ?,: fetch_flow_entry
|
187
|
+
# Is it the block entry indicator?
|
188
|
+
when ch == ?- && check_block_entry: fetch_block_entry
|
189
|
+
# Is it the key indicator?
|
190
|
+
when ch == ?? && check_key: fetch_key
|
191
|
+
# Is it the value indicator?
|
192
|
+
when ch == ?: && check_value: fetch_value
|
193
|
+
# Is it an alias?
|
194
|
+
when ch == ?*: fetch_alias
|
195
|
+
# Is it an anchor?
|
196
|
+
when ch == ?&: fetch_anchor
|
197
|
+
# Is it a tag?
|
198
|
+
when ch == ?!: fetch_tag
|
199
|
+
# Is it a literal scalar?
|
200
|
+
when ch == ?| && @flow_level==0: fetch_literal
|
201
|
+
# Is it a folded scalar?
|
202
|
+
when ch == ?> && @flow_level==0: fetch_folded
|
203
|
+
# Is it a single quoted scalar?
|
204
|
+
when ch == ?': fetch_single
|
205
|
+
# Is it a double quoted scalar?
|
206
|
+
when ch == ?": fetch_double
|
207
|
+
# It must be a plain scalar then.
|
208
|
+
when check_plain: fetch_plain
|
209
|
+
else raise ScannerError.new("while scanning for the next token", nil,"found character #{ch.chr}(#{ch}) that cannot start any token",get_mark)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Simple keys treatment.
|
214
|
+
|
215
|
+
def next_possible_simple_key
|
216
|
+
# Return the number of the nearest possible simple key. Actually we
|
217
|
+
# don't need to loop through the whole dictionary.
|
218
|
+
min_token_number = nil
|
219
|
+
for level in @possible_simple_keys.keys
|
220
|
+
key = @possible_simple_keys[level]
|
221
|
+
if min_token_number.nil? || key.token_number < min_token_number
|
222
|
+
min_token_number = key.token_number
|
223
|
+
end
|
224
|
+
end
|
225
|
+
min_token_number
|
226
|
+
end
|
227
|
+
|
228
|
+
def stale_possible_simple_keys
|
229
|
+
# Remove entries that are no longer possible simple keys. According to
|
230
|
+
# the YAML specification, simple keys
|
231
|
+
# - should be limited to a single line,
|
232
|
+
# - should be no longer than 1024 characters.
|
233
|
+
# Disabling this procedure will allow simple keys of any length and
|
234
|
+
# height (may cause problems if indentation is broken though).
|
235
|
+
@possible_simple_keys.delete_if {|level,key|
|
236
|
+
if key.line != @line || @index-key.index > 1024
|
237
|
+
raise ScannerError.new("while scanning a simple key", key.mark, "could not found expected ':'",get_mark) if key.required
|
238
|
+
return true
|
239
|
+
end
|
240
|
+
return false
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
def save_possible_simple_key
|
245
|
+
# The next token may start a simple key. We check if it's possible
|
246
|
+
# and save its position. This function is called for
|
247
|
+
# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
|
248
|
+
|
249
|
+
# Check if a simple key is required at the current position.
|
250
|
+
required = @flow_level==0 && @indent == @column
|
251
|
+
|
252
|
+
# The next token might be a simple key. Let's save it's number and
|
253
|
+
# position.
|
254
|
+
if @allow_simple_key
|
255
|
+
remove_possible_simple_key
|
256
|
+
token_number = @tokens_taken+@tokens.length
|
257
|
+
key = SimpleKey.new(token_number, required,@index,@line,@column,get_mark)
|
258
|
+
@possible_simple_keys[@flow_level] = key
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def remove_possible_simple_key
|
263
|
+
# Remove the saved possible key position at the current flow level.
|
264
|
+
key = @possible_simple_keys[@flow_level] if @possible_simple_keys.member?(@flow_level)
|
265
|
+
end
|
266
|
+
|
267
|
+
# Indentation functions.
|
268
|
+
|
269
|
+
def unwind_indent(column)
|
270
|
+
## In flow context, tokens should respect indentation.
|
271
|
+
## Actually the condition should be `@indent >= column` according to
|
272
|
+
## the spec. But this condition will prohibit intuitively correct
|
273
|
+
## constructions such as
|
274
|
+
## key : {
|
275
|
+
## }
|
276
|
+
#if @flow_level and @indent > column
|
277
|
+
# raise ScannerError(nil, nil,
|
278
|
+
# "invalid intendation or unclosed '[' or '{'",
|
279
|
+
# get_mark)
|
280
|
+
|
281
|
+
# In the flow context, indentation is ignored. We make the scanner less
|
282
|
+
# restrictive then specification requires.
|
283
|
+
return nil if @flow_level != 0
|
284
|
+
# In block context, we may need to issue the BLOCK-END tokens.
|
285
|
+
while @indent > column
|
286
|
+
mark = get_mark
|
287
|
+
@indent = @indents.pop()
|
288
|
+
@tokens << BlockEndToken.new(mark, mark)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def add_indent(column)
|
293
|
+
# Check if we need to increase indentation.
|
294
|
+
if @indent < column
|
295
|
+
@indents << @indent
|
296
|
+
@indent = column
|
297
|
+
return true
|
298
|
+
end
|
299
|
+
return false
|
300
|
+
end
|
301
|
+
|
302
|
+
# Fetchers.
|
303
|
+
|
304
|
+
def fetch_stream_start
|
305
|
+
# We always add STREAM-START as the first token and STREAM-END as the
|
306
|
+
# last token.
|
307
|
+
# Read the token.
|
308
|
+
mark = get_mark
|
309
|
+
# Add STREAM-START.
|
310
|
+
@tokens << StreamStartToken.new(mark, mark, @encoding)
|
311
|
+
end
|
312
|
+
|
313
|
+
|
314
|
+
def fetch_stream_end
|
315
|
+
# Set the current intendation to -1.
|
316
|
+
unwind_indent(-1)
|
317
|
+
# Reset everything (not really needed).
|
318
|
+
@allow_simple_key = false
|
319
|
+
@possible_simple_keys = {}
|
320
|
+
# Read the token.
|
321
|
+
mark = get_mark
|
322
|
+
# Add STREAM-END.
|
323
|
+
@tokens << StreamEndToken.new(mark, mark)
|
324
|
+
# The stream is finished.
|
325
|
+
@done = true
|
326
|
+
end
|
327
|
+
|
328
|
+
def fetch_directive
|
329
|
+
# Set the current intendation to -1.
|
330
|
+
unwind_indent(-1)
|
331
|
+
# Reset simple keys.
|
332
|
+
remove_possible_simple_key
|
333
|
+
@allow_simple_key = false
|
334
|
+
# Scan and add DIRECTIVE.
|
335
|
+
@tokens << scan_directive
|
336
|
+
end
|
337
|
+
|
338
|
+
def fetch_document_start
|
339
|
+
fetch_document_indicator(DocumentStartToken)
|
340
|
+
end
|
341
|
+
|
342
|
+
def fetch_document_end
|
343
|
+
fetch_document_indicator(DocumentEndToken)
|
344
|
+
end
|
345
|
+
|
346
|
+
def fetch_document_indicator(token)
|
347
|
+
# Set the current intendation to -1.
|
348
|
+
unwind_indent(-1)
|
349
|
+
# Reset simple keys. Note that there could not be a block collection
|
350
|
+
# after '---'.
|
351
|
+
remove_possible_simple_key
|
352
|
+
@allow_simple_key = false
|
353
|
+
# Add DOCUMENT-START or DOCUMENT-END.
|
354
|
+
start_mark = get_mark
|
355
|
+
forward(3)
|
356
|
+
end_mark = get_mark
|
357
|
+
@tokens << token.new(start_mark, end_mark)
|
358
|
+
end
|
359
|
+
|
360
|
+
def fetch_flow_sequence_start
|
361
|
+
fetch_flow_collection_start(FlowSequenceStartToken)
|
362
|
+
end
|
363
|
+
|
364
|
+
def fetch_flow_mapping_start
|
365
|
+
fetch_flow_collection_start(FlowMappingStartToken)
|
366
|
+
end
|
367
|
+
|
368
|
+
def fetch_flow_collection_start(token)
|
369
|
+
# '[' and '{' may start a simple key.
|
370
|
+
save_possible_simple_key
|
371
|
+
# Increase the flow level.
|
372
|
+
@flow_level += 1
|
373
|
+
# Simple keys are allowed after '[' and '{'.
|
374
|
+
@allow_simple_key = true
|
375
|
+
# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
|
376
|
+
start_mark = get_mark
|
377
|
+
forward
|
378
|
+
end_mark = get_mark
|
379
|
+
@tokens << token.new(start_mark, end_mark)
|
380
|
+
end
|
381
|
+
|
382
|
+
def fetch_flow_sequence_end
|
383
|
+
fetch_flow_collection_end(FlowSequenceEndToken)
|
384
|
+
end
|
385
|
+
|
386
|
+
def fetch_flow_mapping_end
|
387
|
+
fetch_flow_collection_end(FlowMappingEndToken)
|
388
|
+
end
|
389
|
+
|
390
|
+
def fetch_flow_collection_end(token)
|
391
|
+
# Reset possible simple key on the current level.
|
392
|
+
remove_possible_simple_key
|
393
|
+
# Decrease the flow level.
|
394
|
+
@flow_level -= 1
|
395
|
+
# No simple keys after ']' or '}'.
|
396
|
+
@allow_simple_key = false
|
397
|
+
# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
|
398
|
+
start_mark = get_mark
|
399
|
+
forward
|
400
|
+
end_mark = get_mark
|
401
|
+
@tokens << token.new(start_mark, end_mark)
|
402
|
+
end
|
403
|
+
|
404
|
+
def fetch_flow_entry
|
405
|
+
# Simple keys are allowed after ','.
|
406
|
+
@allow_simple_key = true
|
407
|
+
# Reset possible simple key on the current level.
|
408
|
+
remove_possible_simple_key
|
409
|
+
# Add FLOW-ENTRY.
|
410
|
+
start_mark = get_mark
|
411
|
+
forward
|
412
|
+
end_mark = get_mark
|
413
|
+
@tokens << FlowEntryToken.new(start_mark, end_mark)
|
414
|
+
end
|
415
|
+
|
416
|
+
def fetch_block_entry
|
417
|
+
# Block context needs additional checks.
|
418
|
+
if @flow_level==0
|
419
|
+
raise ScannerError.new(nil,nil,"sequence entries are not allowed here",get_mark) if !@allow_simple_key
|
420
|
+
# We may need to add BLOCK-SEQUENCE-START.
|
421
|
+
if add_indent(@column)
|
422
|
+
mark = get_mark
|
423
|
+
@tokens << BlockSequenceStartToken.new(mark, mark)
|
424
|
+
end
|
425
|
+
# It's an error for the block entry to occur in the flow context,
|
426
|
+
# but we let the parser detect this.
|
427
|
+
end
|
428
|
+
# Simple keys are allowed after '-'.
|
429
|
+
@allow_simple_key = true
|
430
|
+
# Reset possible simple key on the current level.
|
431
|
+
remove_possible_simple_key
|
432
|
+
# Add BLOCK-ENTRY.
|
433
|
+
start_mark = get_mark
|
434
|
+
forward
|
435
|
+
end_mark = get_mark
|
436
|
+
@tokens << BlockEntryToken.new(start_mark, end_mark)
|
437
|
+
end
|
438
|
+
|
439
|
+
def fetch_key
|
440
|
+
# Block context needs additional checks.
|
441
|
+
if @flow_level==0
|
442
|
+
# Are we allowed to start a key (not nessesary a simple)?
|
443
|
+
raise ScannerError.new(nil,nil,"mapping keys are not allowed here",get_mark) if !@allow_simple_key
|
444
|
+
# We may need to add BLOCK-MAPPING-START.
|
445
|
+
if add_indent(@column)
|
446
|
+
mark = get_mark
|
447
|
+
@tokens << BlockMappingStartToken.new(mark, mark)
|
448
|
+
end
|
449
|
+
end
|
450
|
+
# Simple keys are allowed after '?' in the block context.
|
451
|
+
@allow_simple_key = @flow_level==0
|
452
|
+
# Reset possible simple key on the current level.
|
453
|
+
remove_possible_simple_key
|
454
|
+
# Add KEY.
|
455
|
+
start_mark = get_mark
|
456
|
+
forward
|
457
|
+
end_mark = get_mark
|
458
|
+
@tokens << KeyToken.new(start_mark, end_mark)
|
459
|
+
end
|
460
|
+
|
461
|
+
def fetch_value
|
462
|
+
# Do we determine a simple key?
|
463
|
+
if @possible_simple_keys.include?(@flow_level)
|
464
|
+
# Add KEY.
|
465
|
+
key = @possible_simple_keys[@flow_level]
|
466
|
+
@possible_simple_keys.delete(@flow_level)
|
467
|
+
@tokens.insert(key.token_number-@tokens_taken,KeyToken.new(key.mark, key.mark))
|
468
|
+
# If this key starts a new block mapping, we need to add
|
469
|
+
# BLOCK-MAPPING-START.
|
470
|
+
@tokens.insert(key.token_number-@tokens_taken,BlockMappingStartToken.new(key.mark, key.mark)) if @flow_level==0 && add_indent(key.column)
|
471
|
+
# There cannot be two simple keys one after another.
|
472
|
+
@allow_simple_key = false
|
473
|
+
# It must be a part of a complex key.
|
474
|
+
else
|
475
|
+
# Block context needs additional checks.
|
476
|
+
# (Do we really need them? They will be catched by the parser
|
477
|
+
# anyway.)
|
478
|
+
if @flow_level==0
|
479
|
+
# We are allowed to start a complex value if and only if
|
480
|
+
# we can start a simple key.
|
481
|
+
raise ScannerError.new(nil,nil,"mapping values are not allowed here",get_mark) if !@allow_simple_key
|
482
|
+
# Simple keys are allowed after ':' in the block context.
|
483
|
+
@allow_simple_key = @flow_level==0
|
484
|
+
# Reset possible simple key on the current level.
|
485
|
+
remove_possible_simple_key
|
486
|
+
end
|
487
|
+
end
|
488
|
+
# Add VALUE.
|
489
|
+
start_mark = get_mark
|
490
|
+
forward
|
491
|
+
end_mark = get_mark
|
492
|
+
@tokens << ValueToken.new(start_mark, end_mark)
|
493
|
+
end
|
494
|
+
|
495
|
+
def fetch_alias
|
496
|
+
# ALIAS could be a simple key.
|
497
|
+
save_possible_simple_key
|
498
|
+
# No simple keys after ALIAS.
|
499
|
+
@allow_simple_key = false
|
500
|
+
# Scan and add ALIAS.
|
501
|
+
@tokens << scan_anchor(AliasToken)
|
502
|
+
end
|
503
|
+
|
504
|
+
def fetch_anchor
|
505
|
+
# ANCHOR could start a simple key.
|
506
|
+
save_possible_simple_key
|
507
|
+
# No simple keys after ANCHOR.
|
508
|
+
@allow_simple_key = false
|
509
|
+
# Scan and add ANCHOR.
|
510
|
+
@tokens << scan_anchor(AnchorToken)
|
511
|
+
end
|
512
|
+
|
513
|
+
def fetch_tag
|
514
|
+
# TAG could start a simple key.
|
515
|
+
save_possible_simple_key
|
516
|
+
# No simple keys after TAG.
|
517
|
+
@allow_simple_key = false
|
518
|
+
# Scan and add TAG.
|
519
|
+
@tokens << scan_tag
|
520
|
+
end
|
521
|
+
|
522
|
+
def fetch_literal
|
523
|
+
fetch_block_scalar(?|)
|
524
|
+
end
|
525
|
+
|
526
|
+
def fetch_folded
|
527
|
+
fetch_block_scalar(?>)
|
528
|
+
end
|
529
|
+
|
530
|
+
def fetch_block_scalar(style)
|
531
|
+
# A simple key may follow a block scalar.
|
532
|
+
@allow_simple_key = true
|
533
|
+
# Reset possible simple key on the current level.
|
534
|
+
remove_possible_simple_key
|
535
|
+
# Scan and add SCALAR.
|
536
|
+
@tokens << scan_block_scalar(style)
|
537
|
+
end
|
538
|
+
|
539
|
+
def fetch_single
|
540
|
+
fetch_flow_scalar(?')
|
541
|
+
end
|
542
|
+
|
543
|
+
def fetch_double
|
544
|
+
fetch_flow_scalar(?")
|
545
|
+
end
|
546
|
+
|
547
|
+
def fetch_flow_scalar(style)
|
548
|
+
# A flow scalar could be a simple key.
|
549
|
+
save_possible_simple_key
|
550
|
+
# No simple keys after flow scalars.
|
551
|
+
@allow_simple_key = false
|
552
|
+
# Scan and add SCALAR.
|
553
|
+
@tokens << scan_flow_scalar(style)
|
554
|
+
end
|
555
|
+
|
556
|
+
def fetch_plain
|
557
|
+
# A plain scalar could be a simple key.
|
558
|
+
save_possible_simple_key
|
559
|
+
# No simple keys after plain scalars. But note that `scan_plain` will
|
560
|
+
# change this flag if the scan is finished at the beginning of the
|
561
|
+
# line.
|
562
|
+
@allow_simple_key = false
|
563
|
+
# Scan and add SCALAR. May change `allow_simple_key`.
|
564
|
+
@tokens << scan_plain
|
565
|
+
end
|
566
|
+
|
567
|
+
# Checkers.
|
568
|
+
|
569
|
+
def check_directive
|
570
|
+
# DIRECTIVE: ^ '%' ...
|
571
|
+
# The '%' indicator is already checked.
|
572
|
+
@column == 0
|
573
|
+
end
|
574
|
+
|
575
|
+
def check_document_start
|
576
|
+
# DOCUMENT-START: ^ '---' (' '|'\n')
|
577
|
+
@column == 0 && prefix(3) == "---" && "\0 \t\r\n\x85".include?(peek(3))
|
578
|
+
end
|
579
|
+
|
580
|
+
def check_document_end
|
581
|
+
# DOCUMENT-END: ^ '...' (' '|'\n')
|
582
|
+
if @column == 0
|
583
|
+
prefix = peek(4)
|
584
|
+
return true if prefix(3) == "..." && "\0 \t\r\n\x85".include?(peek(3))
|
585
|
+
end
|
586
|
+
end
|
587
|
+
|
588
|
+
def check_block_entry
|
589
|
+
# BLOCK-ENTRY: '-' (' '|'\n')
|
590
|
+
"\0 \t\r\n\x85".include?(peek(1))
|
591
|
+
end
|
592
|
+
|
593
|
+
def check_key
|
594
|
+
# KEY(flow context): '?'
|
595
|
+
# KEY(block context): '?' (' '|'\n')
|
596
|
+
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
597
|
+
end
|
598
|
+
|
599
|
+
def check_value
|
600
|
+
# VALUE(flow context): ':'
|
601
|
+
# VALUE(block context): ':' (' '|'\n')
|
602
|
+
@flow_level!=0 || "\0 \t\r\n\x85".include?(peek(1))
|
603
|
+
end
|
604
|
+
|
605
|
+
def check_plain
|
606
|
+
# A plain scalar may start with any non-space character except:
|
607
|
+
# '-', '?', ':', ',', '[', ']', '{', '}',
|
608
|
+
# '#', '&', '*', '!', '|', '>', '\'', '\"',
|
609
|
+
# '%', '@', '`'.
|
610
|
+
#
|
611
|
+
# It may also start with
|
612
|
+
# '-', '?', ':'
|
613
|
+
# if it is followed by a non-space character.
|
614
|
+
#
|
615
|
+
# Note that we limit the last rule to the block context (except the
|
616
|
+
# '-' character) because we want the flow context to be space
|
617
|
+
# independent.
|
618
|
+
ch = peek
|
619
|
+
!("\0 \t\r\n\x85-?:,[]{}#&*!|>'\"%@`".include?(ch)) || (!("\0 \t\r\n\x85".include?(peek(1)) && (ch == ?- || (@flow_level==0 && "?:".include?(ch)))))
|
620
|
+
end
|
621
|
+
|
622
|
+
|
623
|
+
|
624
|
+
|
625
|
+
|
626
|
+
|
627
|
+
# Scanners.
|
628
|
+
|
629
|
+
def scan_to_next_token
|
630
|
+
# We ignore spaces, line breaks and comments.
|
631
|
+
# If we find a line break in the block context, we set the flag
|
632
|
+
# `allow_simple_key` on.
|
633
|
+
#
|
634
|
+
# TODO: We need to make tab handling rules more sane. A good rule is
|
635
|
+
# Tabs cannot precede tokens
|
636
|
+
# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
|
637
|
+
# KEY(block), VALUE(block), BLOCK-ENTRY
|
638
|
+
# So the checking code is
|
639
|
+
# if <TAB>:
|
640
|
+
# @allow_simple_keys = false
|
641
|
+
# We also need to add the check for `allow_simple_keys == true` to
|
642
|
+
# `unwind_indent` before issuing BLOCK-END.
|
643
|
+
# Scanners for block, flow, and plain scalars need to be modified.
|
644
|
+
found = false
|
645
|
+
while !found
|
646
|
+
while peek == 32
|
647
|
+
forward
|
648
|
+
end
|
649
|
+
if peek == ?#
|
650
|
+
forward while !"\0\r\n\x85".include?(peek)
|
651
|
+
end
|
652
|
+
if !scan_line_break.empty?
|
653
|
+
@allow_simple_key = true if @flow_level==0
|
654
|
+
else
|
655
|
+
found = true
|
656
|
+
end
|
657
|
+
end
|
658
|
+
end
|
659
|
+
|
660
|
+
def scan_directive
|
661
|
+
# See the specification for details.
|
662
|
+
start_mark = get_mark
|
663
|
+
forward
|
664
|
+
name = scan_directive_name(start_mark)
|
665
|
+
value = nil
|
666
|
+
if name == "YAML"
|
667
|
+
value = scan_yaml_directive_value(start_mark)
|
668
|
+
end_mark = get_mark
|
669
|
+
elsif name == "TAG"
|
670
|
+
value = scan_tag_directive_value(start_mark)
|
671
|
+
end_mark = get_mark
|
672
|
+
else
|
673
|
+
end_mark = get_mark
|
674
|
+
forward while !"\0\r\n\x85".include?(peek)
|
675
|
+
end
|
676
|
+
scan_directive_ignored_line(start_mark)
|
677
|
+
DirectiveToken.new(name, value, start_mark, end_mark)
|
678
|
+
end
|
679
|
+
|
680
|
+
def scan_directive_name(start_mark)
|
681
|
+
# See the specification for details.
|
682
|
+
length = 0
|
683
|
+
ch = peek(length)
|
684
|
+
while /[-0-9A-Za-z_]/ =~ ch.chr
|
685
|
+
length += 1
|
686
|
+
ch = peek(length)
|
687
|
+
end
|
688
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if length==0
|
689
|
+
value = prefix(length)
|
690
|
+
forward(length)
|
691
|
+
ch = peek()
|
692
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected alphabetic or numeric character, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
693
|
+
value
|
694
|
+
end
|
695
|
+
|
696
|
+
def scan_yaml_directive_value(start_mark)
|
697
|
+
# See the specification for details.
|
698
|
+
forward while peek == 32
|
699
|
+
major = scan_yaml_directive_number(start_mark)
|
700
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or '.', but found #{peek.to_s}",get_mark) if peek != ?.
|
701
|
+
forward
|
702
|
+
minor = scan_yaml_directive_number(start_mark)
|
703
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit or ' ', but found #{peek.to_s}",get_mark) if !"\0 \r\n\x85".include?(peek)
|
704
|
+
[major, minor]
|
705
|
+
end
|
706
|
+
|
707
|
+
def scan_yaml_directive_number(start_mark)
|
708
|
+
# See the specification for details.
|
709
|
+
ch = peek
|
710
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a digit, but found #{ch.to_s}",get_mark) if !((?0..?9) === ch)
|
711
|
+
length = 0
|
712
|
+
length += 1 while ((?0..?9) === peek(length))
|
713
|
+
value = prefix(length)
|
714
|
+
forward(length)
|
715
|
+
value
|
716
|
+
end
|
717
|
+
|
718
|
+
def scan_tag_directive_value(start_mark)
|
719
|
+
# See the specification for details.
|
720
|
+
forward while peek == 32
|
721
|
+
handle = scan_tag_directive_handle(start_mark)
|
722
|
+
forward while peek == 32
|
723
|
+
prefix = scan_tag_directive_prefix(start_mark)
|
724
|
+
[handle, prefix]
|
725
|
+
end
|
726
|
+
|
727
|
+
def scan_tag_directive_handle(start_mark)
|
728
|
+
# See the specification for details.
|
729
|
+
value = scan_tag_handle("directive", start_mark)
|
730
|
+
ch = peek
|
731
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if ch != 32
|
732
|
+
value
|
733
|
+
end
|
734
|
+
|
735
|
+
def scan_tag_directive_prefix(start_mark)
|
736
|
+
# See the specification for details.
|
737
|
+
value = scan_tag_uri("directive", start_mark)
|
738
|
+
ch = peek
|
739
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected ' ', but found #{ch}",get_mark()) if !"\0 \r\n\x85".include?(ch)
|
740
|
+
value
|
741
|
+
end
|
742
|
+
|
743
|
+
def scan_directive_ignored_line(start_mark)
|
744
|
+
# See the specification for details.
|
745
|
+
forward while peek == 32
|
746
|
+
if peek == ?#
|
747
|
+
forward while !"\0\r\n\x85".include?(peek)
|
748
|
+
end
|
749
|
+
ch = peek
|
750
|
+
raise ScannerError.new("while scanning a directive", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark()) if !"\0\r\n\x85".include?(ch)
|
751
|
+
scan_line_break
|
752
|
+
end
|
753
|
+
|
754
|
+
def scan_anchor(token)
|
755
|
+
# The specification does not restrict characters for anchors and
|
756
|
+
# aliases. This may lead to problems, for instance, the document:
|
757
|
+
# [ *alias, value ]
|
758
|
+
# can be interpteted in two ways, as
|
759
|
+
# [ "value" ]
|
760
|
+
# and
|
761
|
+
# [ *alias , "value" ]
|
762
|
+
# Therefore we restrict aliases to numbers and ASCII letters.
|
763
|
+
start_mark = get_mark
|
764
|
+
indicator = peek
|
765
|
+
name = (indicator == ?*) ? "alias":"anchor"
|
766
|
+
forward
|
767
|
+
length = 0
|
768
|
+
ch = peek(length)
|
769
|
+
while /[-0-9A-Za-z_]/ =~ ch.chr
|
770
|
+
length += 1
|
771
|
+
ch = peek(length)
|
772
|
+
end
|
773
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark) if length==0
|
774
|
+
value = prefix(length)
|
775
|
+
forward(length)
|
776
|
+
ch = peek
|
777
|
+
if !"\0 \t\r\n\x85?:,]}%@`".include?(ch)
|
778
|
+
raise ScannerError.new("while scanning an #{name}", start_mark,"expected alphabetic or numeric character, but found #{ch}",get_mark)
|
779
|
+
end
|
780
|
+
end_mark = get_mark
|
781
|
+
token.new(value, start_mark, end_mark)
|
782
|
+
end
|
783
|
+
|
784
|
+
|
785
|
+
def scan_tag
|
786
|
+
# See the specification for details.
|
787
|
+
start_mark = get_mark
|
788
|
+
ch = peek(1)
|
789
|
+
if ch == ?<
|
790
|
+
handle = nil
|
791
|
+
forward(2)
|
792
|
+
suffix = scan_tag_uri("tag", start_mark)
|
793
|
+
raise ScannerError.new("while parsing a tag", start_mark,"expected '>', but found #{peek.to_s}",get_mark) if peek != ?>
|
794
|
+
forward
|
795
|
+
elsif "\0 \t\r\n\x85".include?(ch)
|
796
|
+
handle = nil
|
797
|
+
suffix = "!"
|
798
|
+
forward
|
799
|
+
else
|
800
|
+
length = 1
|
801
|
+
use_handle = false
|
802
|
+
while !"\0 \t\r\n\x85".include?(ch)
|
803
|
+
if ch == ?!
|
804
|
+
use_handle = true
|
805
|
+
break
|
806
|
+
end
|
807
|
+
length += 1
|
808
|
+
ch = peek(length)
|
809
|
+
end
|
810
|
+
handle = "!"
|
811
|
+
if use_handle
|
812
|
+
handle = scan_tag_handle("tag", start_mark)
|
813
|
+
else
|
814
|
+
handle = "!"
|
815
|
+
forward
|
816
|
+
end
|
817
|
+
suffix = scan_tag_uri("tag", start_mark)
|
818
|
+
end
|
819
|
+
ch = peek
|
820
|
+
raise ScannerError.new("while scanning a tag",start_mark,"expected ' ', but found #{ch}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
821
|
+
value = [handle, suffix]
|
822
|
+
end_mark = get_mark
|
823
|
+
TagToken.new(value, start_mark, end_mark)
|
824
|
+
end
|
825
|
+
|
826
|
+
def scan_block_scalar(style)
|
827
|
+
# See the specification for details.
|
828
|
+
folded = style== ?>
|
829
|
+
chunks = []
|
830
|
+
start_mark = get_mark
|
831
|
+
# Scan the header.
|
832
|
+
forward
|
833
|
+
chomping, increment = scan_block_scalar_indicators(start_mark)
|
834
|
+
scan_block_scalar_ignored_line(start_mark)
|
835
|
+
# Determine the indentation level and go to the first non-empty line.
|
836
|
+
min_indent = @indent+1
|
837
|
+
min_indent = 1 if min_indent < 1
|
838
|
+
if increment.nil?
|
839
|
+
breaks, max_indent, end_mark = scan_block_scalar_indentation
|
840
|
+
indent = [min_indent, max_indent].max
|
841
|
+
else
|
842
|
+
indent = min_indent+increment-1
|
843
|
+
breaks, end_mark = scan_block_scalar_breaks(indent)
|
844
|
+
end
|
845
|
+
line_break = ''
|
846
|
+
# Scan the inner part of the block scalar.
|
847
|
+
while @column == indent and peek != ?\0
|
848
|
+
chunks += breaks
|
849
|
+
leading_non_space = !" \t".include?(peek)
|
850
|
+
length = 0
|
851
|
+
length += 1 while !"\0\r\n\x85".include?(peek(length))
|
852
|
+
chunks << prefix(length)
|
853
|
+
forward(length)
|
854
|
+
line_break = scan_line_break
|
855
|
+
breaks, end_mark = scan_block_scalar_breaks(indent)
|
856
|
+
if @column == indent && peek != 0
|
857
|
+
# Unfortunately, folding rules are ambiguous.
|
858
|
+
#
|
859
|
+
# This is the folding according to the specification:
|
860
|
+
if folded && line_break == ?\n && leading_non_space && !" \t".include?(peek())
|
861
|
+
chunks << ' ' if breaks.empty?
|
862
|
+
else
|
863
|
+
chunks << line_break
|
864
|
+
end
|
865
|
+
# This is Clark Evans's interpretation (also in the spec
|
866
|
+
# examples):
|
867
|
+
#
|
868
|
+
#if folded and line_break == u'\n':
|
869
|
+
# if not breaks:
|
870
|
+
# if self.peek() not in ' \t':
|
871
|
+
# chunks.append(u' ')
|
872
|
+
# else:
|
873
|
+
# chunks.append(line_break)
|
874
|
+
#else:
|
875
|
+
# chunks.append(line_break)
|
876
|
+
else
|
877
|
+
break
|
878
|
+
end
|
879
|
+
end
|
880
|
+
|
881
|
+
# Chomp the tail.
|
882
|
+
if chomping
|
883
|
+
chunks << line_break
|
884
|
+
chunks += breaks
|
885
|
+
end
|
886
|
+
|
887
|
+
# We are done.
|
888
|
+
ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
|
889
|
+
end
|
890
|
+
|
891
|
+
def scan_block_scalar_indicators(start_mark)
|
892
|
+
# See the specification for details.
|
893
|
+
chomping = nil
|
894
|
+
increment = nil
|
895
|
+
ch = peek
|
896
|
+
if /[+-]/ =~ ch.chr
|
897
|
+
chomping = ch == ?+
|
898
|
+
forward
|
899
|
+
ch = peek
|
900
|
+
if (?0..?9) === ch
|
901
|
+
increment = ch.to_i
|
902
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
903
|
+
forward
|
904
|
+
end
|
905
|
+
elsif (?0..?9) === ch
|
906
|
+
increment = ch
|
907
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected indentation indicator in the range 1-9, but found 0",get_mark) if increment == 0
|
908
|
+
forward
|
909
|
+
ch = peek
|
910
|
+
if /[+-]/ =~ ch.chr
|
911
|
+
chomping = ch == ?+
|
912
|
+
forward
|
913
|
+
end
|
914
|
+
end
|
915
|
+
ch = peek
|
916
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected chomping or indentation indicators, but found #{ch.to_s}",get_mark) if !"\0 \r\n\x85".include?(ch)
|
917
|
+
[chomping, increment]
|
918
|
+
end
|
919
|
+
|
920
|
+
def scan_block_scalar_ignored_line(start_mark)
|
921
|
+
# See the specification for details.
|
922
|
+
forward while peek == 32
|
923
|
+
if peek == ?#
|
924
|
+
forward while !"\0\r\n\x85".include?(peek)
|
925
|
+
end
|
926
|
+
ch = peek
|
927
|
+
|
928
|
+
raise ScannerError.new("while scanning a block scalar", start_mark,"expected a comment or a line break, but found #{ch.to_s}",get_mark) if !"\0\r\n\x85".include?(ch)
|
929
|
+
scan_line_break
|
930
|
+
end
|
931
|
+
|
932
|
+
def scan_block_scalar_indentation
|
933
|
+
# See the specification for details.
|
934
|
+
chunks = []
|
935
|
+
max_indent = 0
|
936
|
+
end_mark = get_mark
|
937
|
+
while " \r\n\x85".include?(peek)
|
938
|
+
if peek != 32
|
939
|
+
chunks << scan_line_break
|
940
|
+
end_mark = get_mark
|
941
|
+
else
|
942
|
+
forward
|
943
|
+
max_indent = @column if @column > max_indent
|
944
|
+
end
|
945
|
+
end
|
946
|
+
[chunks, max_indent, end_mark]
|
947
|
+
end
|
948
|
+
|
949
|
+
def scan_block_scalar_breaks(indent)
|
950
|
+
# See the specification for details.
|
951
|
+
chunks = []
|
952
|
+
end_mark = get_mark
|
953
|
+
forward while @column < indent && peek == 32
|
954
|
+
while "\r\n\x85".include?(peek)
|
955
|
+
chunks << scan_line_break
|
956
|
+
end_mark = get_mark
|
957
|
+
forward while @column < indent && peek == 32
|
958
|
+
end
|
959
|
+
[chunks, end_mark]
|
960
|
+
end
|
961
|
+
|
962
|
+
def scan_flow_scalar(style)
|
963
|
+
# See the specification for details.
|
964
|
+
# Note that we loose indentation rules for quoted scalars. Quoted
|
965
|
+
# scalars don't need to adhere indentation because " and ' clearly
|
966
|
+
# mark the beginning and the end of them. Therefore we are less
|
967
|
+
# restrictive then the specification requires. We only need to check
|
968
|
+
# that document separators are not included in scalars.
|
969
|
+
double = style == ?"
|
970
|
+
chunks = []
|
971
|
+
start_mark = get_mark
|
972
|
+
quote = peek
|
973
|
+
forward
|
974
|
+
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
975
|
+
while peek != quote
|
976
|
+
chunks += scan_flow_scalar_spaces(double, start_mark)
|
977
|
+
chunks += scan_flow_scalar_non_spaces(double, start_mark)
|
978
|
+
end
|
979
|
+
forward
|
980
|
+
end_mark = get_mark
|
981
|
+
ScalarToken.new(chunks.join(''), false, start_mark, end_mark,style)
|
982
|
+
end
|
983
|
+
|
984
|
+
ESCAPE_REPLACEMENTS = {
|
985
|
+
"0" => "\0",
|
986
|
+
"a" => "\x07",
|
987
|
+
"b" => "\x08",
|
988
|
+
"t" => "\x09",
|
989
|
+
"\t" => "\x09",
|
990
|
+
"n" => "\x0A",
|
991
|
+
"v" => "\x0B",
|
992
|
+
"f" => "\x0C",
|
993
|
+
"r" => "\x0D",
|
994
|
+
"e" => "\x1B",
|
995
|
+
" " => "\x20",
|
996
|
+
'"' => '"',
|
997
|
+
"\\" => "\\",
|
998
|
+
"N" => "\x85",
|
999
|
+
"_" => "\xA0"
|
1000
|
+
}
|
1001
|
+
|
1002
|
+
ESCAPE_CODES = {
|
1003
|
+
'x' => 2
|
1004
|
+
}
|
1005
|
+
|
1006
|
+
def scan_flow_scalar_non_spaces(double, start_mark)
|
1007
|
+
# See the specification for details.
|
1008
|
+
chunks = []
|
1009
|
+
while true
|
1010
|
+
length = 0
|
1011
|
+
length += 1 while !"'\"\\\0 \t\r\n\x85".include?(peek(length))
|
1012
|
+
if length!=0
|
1013
|
+
chunks << prefix(length)
|
1014
|
+
forward(length)
|
1015
|
+
end
|
1016
|
+
ch = peek
|
1017
|
+
if !double && ch == ?' && peek(1) == ?'
|
1018
|
+
chunks << ?'
|
1019
|
+
forward(2)
|
1020
|
+
elsif (double && ch == ?') || (!double && "\"\\".include?(ch))
|
1021
|
+
chunks << ch
|
1022
|
+
forward
|
1023
|
+
elsif double && ch == ?\\
|
1024
|
+
forward
|
1025
|
+
ch = peek
|
1026
|
+
if ESCAPE_REPLACEMENTS.member?(ch.chr)
|
1027
|
+
chunks << ESCAPE_REPLACEMENTS[ch.chr]
|
1028
|
+
forward
|
1029
|
+
elsif ESCAPE_CODES.member?(ch.chr)
|
1030
|
+
length = ESCAPE_CODES[ch.chr]
|
1031
|
+
forward
|
1032
|
+
length.times do |k|
|
1033
|
+
if /[0-9A-Fa-f]/ !~ peek(k).chr
|
1034
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,
|
1035
|
+
"expected escape sequence of #{length} hexdecimal numbers, but found #{peek(k)}",get_mark)
|
1036
|
+
end
|
1037
|
+
end
|
1038
|
+
code = prefix(length).to_i.to_s(16)
|
1039
|
+
chunks << code
|
1040
|
+
forward(length)
|
1041
|
+
elsif "\r\n\x85".include?(ch)
|
1042
|
+
scan_line_break
|
1043
|
+
chunks += scan_flow_scalar_breaks(double, start_mark)
|
1044
|
+
else
|
1045
|
+
raise ScannerError.new("while scanning a double-quoted scalar", start_mark,"found unknown escape character #{ch}",get_mark)
|
1046
|
+
end
|
1047
|
+
else
|
1048
|
+
return chunks
|
1049
|
+
end
|
1050
|
+
end
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
def scan_flow_scalar_spaces(double, start_mark)
|
1054
|
+
# See the specification for details.
|
1055
|
+
chunks = []
|
1056
|
+
length = 0
|
1057
|
+
length += 1 while /[ \t]/ =~ peek(length).chr
|
1058
|
+
whitespaces = prefix(length)
|
1059
|
+
forward(length)
|
1060
|
+
ch = peek
|
1061
|
+
if ch == ?\0
|
1062
|
+
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected end of stream",get_mark)
|
1063
|
+
elsif "\r\n\x85".include?(ch)
|
1064
|
+
line_break = scan_line_break
|
1065
|
+
breaks = scan_flow_scalar_breaks(double, start_mark)
|
1066
|
+
if line_break != ?\n
|
1067
|
+
chunks << line_break
|
1068
|
+
elsif breaks.empty?
|
1069
|
+
chunks << ' '
|
1070
|
+
end
|
1071
|
+
chunks += breaks
|
1072
|
+
else
|
1073
|
+
chunks << whitespaces
|
1074
|
+
end
|
1075
|
+
chunks
|
1076
|
+
end
|
1077
|
+
|
1078
|
+
def scan_flow_scalar_breaks(double, start_mark)
|
1079
|
+
# See the specification for details.
|
1080
|
+
chunks = []
|
1081
|
+
while true
|
1082
|
+
# Instead of checking indentation, we check for document
|
1083
|
+
# separators.
|
1084
|
+
prefix = prefix(3)
|
1085
|
+
if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1086
|
+
raise ScannerError.new("while scanning a quoted scalar", start_mark,"found unexpected document separator", get_mark)
|
1087
|
+
end
|
1088
|
+
forward while /[ \t]/ =~ peek.chr
|
1089
|
+
if "\r\n\x85".include?(peek)
|
1090
|
+
chunks << scan_line_break
|
1091
|
+
else
|
1092
|
+
return chunks
|
1093
|
+
end
|
1094
|
+
end
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
def scan_plain
|
1098
|
+
# See the specification for details.
|
1099
|
+
# We add an additional restriction for the flow context:
|
1100
|
+
# plain scalars in the flow context cannot contain ',', ':' and '?'.
|
1101
|
+
# We also keep track of the `allow_simple_key` flag here.
|
1102
|
+
# Indentation rules are loosed for the flow context.
|
1103
|
+
chunks = []
|
1104
|
+
start_mark = get_mark
|
1105
|
+
end_mark = start_mark
|
1106
|
+
indent = @indent+1
|
1107
|
+
# We allow zero indentation for scalars, but then we need to check for
|
1108
|
+
# document separators at the beginning of the line.
|
1109
|
+
#if indent == 0
|
1110
|
+
# indent = 1
|
1111
|
+
spaces = []
|
1112
|
+
while true
|
1113
|
+
length = 0
|
1114
|
+
break if peek == ?#
|
1115
|
+
while true
|
1116
|
+
ch = peek(length)
|
1117
|
+
if "\0 \t\r\n\x85".include?(ch) || (@flow_level==0 && ch == ?: && "\0 \t\r\n\x28".include?(peek(length+1))) || (@flow_level!=0 && ",:?[]{}".include?(ch))
|
1118
|
+
break
|
1119
|
+
end
|
1120
|
+
length += 1
|
1121
|
+
end
|
1122
|
+
break if length == 0
|
1123
|
+
@allow_simple_key = false
|
1124
|
+
chunks += spaces
|
1125
|
+
chunks << prefix(length)
|
1126
|
+
forward(length)
|
1127
|
+
end_mark = get_mark
|
1128
|
+
spaces = scan_plain_spaces(indent, start_mark)
|
1129
|
+
break if spaces.nil? || spaces.empty? || peek == ?# || (@flow_level==0 && @column < indent)
|
1130
|
+
end
|
1131
|
+
return ScalarToken.new(chunks.join(''), true, start_mark, end_mark)
|
1132
|
+
end
|
1133
|
+
|
1134
|
+
def scan_plain_spaces(indent, start_mark)
|
1135
|
+
# See the specification for details.
|
1136
|
+
# The specification is really confusing about tabs in plain scalars.
|
1137
|
+
# We just forbid them completely. Do not use tabs in YAML!
|
1138
|
+
chunks = []
|
1139
|
+
length = 0
|
1140
|
+
length += 1 while peek(length) == 32
|
1141
|
+
whitespaces = prefix(length)
|
1142
|
+
forward(length)
|
1143
|
+
ch = peek
|
1144
|
+
if "\r\n\x85".include?(ch)
|
1145
|
+
line_break = scan_line_break
|
1146
|
+
@allow_simple_key = true
|
1147
|
+
prefix = prefix(3)
|
1148
|
+
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1149
|
+
breaks = []
|
1150
|
+
while " \r\n\x85".include?(peek)
|
1151
|
+
if peek == 32
|
1152
|
+
forward
|
1153
|
+
else
|
1154
|
+
breaks << scan_line_break
|
1155
|
+
prefix = prefix(3)
|
1156
|
+
return if (prefix == "---" || prefix == "...") && "\0 \t\r\n\x85".include?(peek(3))
|
1157
|
+
end
|
1158
|
+
end
|
1159
|
+
if line_break != '\n'
|
1160
|
+
chunks << line_break
|
1161
|
+
elsif breaks.empty?
|
1162
|
+
chunks << ' '
|
1163
|
+
end
|
1164
|
+
chunks += breaks
|
1165
|
+
elsif !whitespaces.empty?
|
1166
|
+
chunks << whitespaces
|
1167
|
+
end
|
1168
|
+
chunks
|
1169
|
+
end
|
1170
|
+
|
1171
|
+
def scan_tag_handle(name, start_mark)
|
1172
|
+
# See the specification for details.
|
1173
|
+
# For some strange reasons, the specification does not allow '_' in
|
1174
|
+
# tag handles. I have allowed it anyway.
|
1175
|
+
ch = peek
|
1176
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark) if ch != ?!
|
1177
|
+
length = 1
|
1178
|
+
ch = peek(length)
|
1179
|
+
if ch != 32
|
1180
|
+
while /[-_0-9A-Za-z]/ =~ ch.chr
|
1181
|
+
length += 1
|
1182
|
+
ch = peek(length)
|
1183
|
+
end
|
1184
|
+
if ch != ?!
|
1185
|
+
forward(length)
|
1186
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected '!', but found #{ch}",get_mark)
|
1187
|
+
end
|
1188
|
+
length += 1
|
1189
|
+
end
|
1190
|
+
value = prefix(length)
|
1191
|
+
forward(length)
|
1192
|
+
value
|
1193
|
+
end
|
1194
|
+
|
1195
|
+
def scan_tag_uri(name, start_mark)
|
1196
|
+
# See the specification for details.
|
1197
|
+
# Note: we do not check if URI is well-formed.
|
1198
|
+
chunks = []
|
1199
|
+
length = 0
|
1200
|
+
ch = peek(length)
|
1201
|
+
while /[\]\[\-';\/?:@&=+$,.!~*()%\w]/ =~ ch.chr
|
1202
|
+
if ch == ?%
|
1203
|
+
chunks << prefix(length)
|
1204
|
+
forward(length)
|
1205
|
+
length = 0
|
1206
|
+
chunks << scan_uri_escapes(name, start_mark)
|
1207
|
+
else
|
1208
|
+
length += 1
|
1209
|
+
end
|
1210
|
+
ch = peek(length)
|
1211
|
+
end
|
1212
|
+
if length!=0
|
1213
|
+
chunks << prefix(length)
|
1214
|
+
forward(length)
|
1215
|
+
length = 0
|
1216
|
+
end
|
1217
|
+
|
1218
|
+
raise ScannerError.new("while parsing a #{name}", start_mark,"expected URI, but found #{ch}",get_mark) if chunks.empty?
|
1219
|
+
chunks.join('')
|
1220
|
+
end
|
1221
|
+
|
1222
|
+
def scan_uri_escapes(name, start_mark)
|
1223
|
+
# See the specification for details.
|
1224
|
+
bytes = []
|
1225
|
+
mark = get_mark
|
1226
|
+
while peek == ?%
|
1227
|
+
forward
|
1228
|
+
2.times do |k|
|
1229
|
+
raise ScannerError.new("while scanning a #{name}", start_mark,"expected URI escape sequence of 2 hexdecimal numbers, but found #{peek(k)}",
|
1230
|
+
get_mark) if /[0-9A-Fa-f]/ !~ peek(k).chr
|
1231
|
+
end
|
1232
|
+
bytes << prefix(2).to_i.to_s(16)
|
1233
|
+
forward(2)
|
1234
|
+
end
|
1235
|
+
bytes.join('')
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
def scan_line_break
|
1239
|
+
# Transforms:
|
1240
|
+
# '\r\n' : '\n'
|
1241
|
+
# '\r' : '\n'
|
1242
|
+
# '\n' : '\n'
|
1243
|
+
# '\x85' : '\n'
|
1244
|
+
# default : ''
|
1245
|
+
ch = peek
|
1246
|
+
if "\r\n\x85".include?(ch)
|
1247
|
+
if prefix(2) == "\r\n"
|
1248
|
+
forward(2)
|
1249
|
+
else
|
1250
|
+
forward
|
1251
|
+
end
|
1252
|
+
return "\n"
|
1253
|
+
end
|
1254
|
+
""
|
1255
|
+
end
|
1256
|
+
end
|
1257
|
+
end
|
1258
|
+
|