rexml 3.2.6 → 3.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +370 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +16 -31
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +433 -265
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +134 -98
- data/lib/rexml/text.rb +39 -17
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +6 -50
@@ -47,6 +47,18 @@ module REXML
|
|
47
47
|
@listeners << listener
|
48
48
|
end
|
49
49
|
|
50
|
+
def entity_expansion_count
|
51
|
+
@parser.entity_expansion_count
|
52
|
+
end
|
53
|
+
|
54
|
+
def entity_expansion_limit=( limit )
|
55
|
+
@parser.entity_expansion_limit = limit
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_expansion_text_limit=( limit )
|
59
|
+
@parser.entity_expansion_text_limit = limit
|
60
|
+
end
|
61
|
+
|
50
62
|
def each
|
51
63
|
while has_next?
|
52
64
|
yield self.pull
|
@@ -22,6 +22,18 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
25
37
|
def add_listener( listener )
|
26
38
|
@parser.add_listener( listener )
|
27
39
|
end
|
@@ -157,25 +169,8 @@ module REXML
|
|
157
169
|
end
|
158
170
|
end
|
159
171
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
172
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
173
|
+
handle( :characters, unnormalized )
|
179
174
|
when :entitydecl
|
180
175
|
handle_entitydecl( event )
|
181
176
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -264,6 +259,8 @@ module REXML
|
|
264
259
|
end
|
265
260
|
|
266
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
267
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
268
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
269
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -7,37 +7,42 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
17
29
|
def parse
|
18
30
|
# entity string
|
19
31
|
while true
|
20
32
|
event = @parser.pull
|
21
33
|
case event[0]
|
22
34
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
35
|
return
|
29
36
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
37
|
attrs = event[2].each do |n, v|
|
32
38
|
event[2][n] = @parser.unnormalize( v )
|
33
39
|
end
|
34
40
|
@listener.tag_start( event[1], attrs )
|
35
41
|
when :end_element
|
36
42
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
43
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
44
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
45
|
+
@listener.text( unnormalized )
|
41
46
|
when :processing_instruction
|
42
47
|
@listener.instruction( *event[1,2] )
|
43
48
|
when :start_doctype
|
@@ -48,6 +53,7 @@ module REXML
|
|
48
53
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
54
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
55
|
when :entitydecl, :notationdecl
|
56
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
57
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
58
|
when :externalentity
|
53
59
|
entity_reference = event[1]
|
@@ -15,8 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
|
-
in_doctype = false
|
20
18
|
entities = nil
|
21
19
|
begin
|
22
20
|
while true
|
@@ -24,32 +22,24 @@ module REXML
|
|
24
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
25
23
|
case event[0]
|
26
24
|
when :end_document
|
27
|
-
unless tag_stack.empty?
|
28
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
29
|
-
@parser.source, @parser)
|
30
|
-
end
|
31
25
|
return
|
32
26
|
when :start_element
|
33
|
-
tag_stack.push(event[1])
|
34
27
|
el = @build_context = @build_context.add_element( event[1] )
|
35
28
|
event[2].each do |key, value|
|
36
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
37
30
|
end
|
38
31
|
when :end_element
|
39
|
-
tag_stack.pop
|
40
32
|
@build_context = @build_context.parent
|
41
33
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
34
|
+
if @build_context[-1].instance_of? Text
|
35
|
+
@build_context[-1] << event[1]
|
36
|
+
else
|
37
|
+
@build_context.add(
|
38
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
39
|
+
) unless (
|
40
|
+
@build_context.ignore_whitespace_nodes and
|
41
|
+
event[1].strip.size==0
|
42
|
+
)
|
53
43
|
end
|
54
44
|
when :comment
|
55
45
|
c = Comment.new( event[1] )
|
@@ -60,14 +50,12 @@ module REXML
|
|
60
50
|
when :processing_instruction
|
61
51
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
52
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
53
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
54
|
@build_context = @build_context.parent
|
66
55
|
when :start_doctype
|
67
56
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
57
|
@build_context = doctype
|
69
58
|
entities = {}
|
70
|
-
in_doctype = true
|
71
59
|
when :attlistdecl
|
72
60
|
n = AttlistDecl.new( event[1..-1] )
|
73
61
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -30,26 +50,50 @@ module REXML
|
|
30
50
|
# objects and provides consumption of text
|
31
51
|
class Source
|
32
52
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
53
|
# The line number of the last consumed text
|
36
54
|
attr_reader :line
|
37
55
|
attr_reader :encoding
|
38
56
|
|
57
|
+
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
59
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
60
|
+
pre_defined_terms = ["'", '"', "<"]
|
61
|
+
pre_defined_terms.each do |term|
|
62
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
63
|
+
end
|
64
|
+
end
|
65
|
+
private_constant :Private
|
66
|
+
|
39
67
|
# Constructor
|
40
68
|
# @param arg must be a String, and should be a valid XML document
|
41
69
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
70
|
# value, overriding all encoding detection
|
43
71
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
72
|
+
@orig = arg
|
73
|
+
@scanner = StringScanner.new(@orig)
|
45
74
|
if encoding
|
46
75
|
self.encoding = encoding
|
47
76
|
else
|
48
77
|
detect_encoding
|
49
78
|
end
|
50
79
|
@line = 0
|
80
|
+
@term_encord = {}
|
51
81
|
end
|
52
82
|
|
83
|
+
# The current buffer (what we're going to read next)
|
84
|
+
def buffer
|
85
|
+
@scanner.rest
|
86
|
+
end
|
87
|
+
|
88
|
+
def drop_parsed_content
|
89
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
90
|
+
@scanner.string = @scanner.rest
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def buffer_encoding=(encoding)
|
95
|
+
@scanner.string.force_encoding(encoding)
|
96
|
+
end
|
53
97
|
|
54
98
|
# Inherited from Encoding
|
55
99
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +102,78 @@ module REXML
|
|
58
102
|
encoding_updated
|
59
103
|
end
|
60
104
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
105
|
+
def read(term = nil)
|
82
106
|
end
|
83
107
|
|
84
|
-
def
|
108
|
+
def read_until(term)
|
109
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
110
|
+
data = @scanner.scan_until(pattern)
|
111
|
+
unless data
|
112
|
+
data = @scanner.rest
|
113
|
+
@scanner.pos = @scanner.string.bytesize
|
114
|
+
end
|
115
|
+
data
|
85
116
|
end
|
86
117
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
118
|
+
def ensure_buffer
|
89
119
|
end
|
90
120
|
|
91
|
-
def
|
92
|
-
|
121
|
+
def match(pattern, cons=false)
|
122
|
+
if cons
|
123
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
124
|
+
else
|
125
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
126
|
+
end
|
93
127
|
end
|
94
128
|
|
95
|
-
def
|
96
|
-
|
97
|
-
@buffer = $'
|
98
|
-
return md
|
129
|
+
def position
|
130
|
+
@scanner.pos
|
99
131
|
end
|
100
132
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
133
|
+
def position=(pos)
|
134
|
+
@scanner.pos = pos
|
105
135
|
end
|
106
136
|
|
107
137
|
# @return true if the Source is exhausted
|
108
138
|
def empty?
|
109
|
-
@
|
110
|
-
end
|
111
|
-
|
112
|
-
def position
|
113
|
-
@orig.index( @buffer )
|
139
|
+
@scanner.eos?
|
114
140
|
end
|
115
141
|
|
116
142
|
# @return the current line in the source
|
117
143
|
def current_line
|
118
144
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
145
|
+
res = lines.grep @scanner.rest[0..30]
|
120
146
|
res = res[-1] if res.kind_of? Array
|
121
147
|
lines.index( res ) if res
|
122
148
|
end
|
123
149
|
|
124
150
|
private
|
151
|
+
|
125
152
|
def detect_encoding
|
126
|
-
|
153
|
+
scanner_encoding = @scanner.rest.encoding
|
127
154
|
detected_encoding = "UTF-8"
|
128
155
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
156
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
157
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
158
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
159
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
160
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
161
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
162
|
detected_encoding = "UTF-8"
|
139
163
|
end
|
140
164
|
ensure
|
141
|
-
@
|
165
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
166
|
end
|
143
167
|
self.encoding = detected_encoding
|
144
168
|
end
|
145
169
|
|
146
170
|
def encoding_updated
|
147
171
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
172
|
+
@scanner.string = decode(@scanner.rest)
|
149
173
|
@to_utf = true
|
150
174
|
else
|
151
175
|
@to_utf = false
|
152
|
-
@
|
176
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
177
|
end
|
154
178
|
end
|
155
179
|
end
|
@@ -172,7 +196,7 @@ module REXML
|
|
172
196
|
end
|
173
197
|
|
174
198
|
if !@to_utf and
|
175
|
-
@
|
199
|
+
@orig.respond_to?(:force_encoding) and
|
176
200
|
@source.respond_to?(:external_encoding) and
|
177
201
|
@source.external_encoding != ::Encoding::UTF_8
|
178
202
|
@force_utf8 = true
|
@@ -181,65 +205,72 @@ module REXML
|
|
181
205
|
end
|
182
206
|
end
|
183
207
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
208
|
+
def read(term = nil, min_bytes = 1)
|
209
|
+
term = encode(term) if term
|
210
|
+
begin
|
211
|
+
str = readline(term)
|
212
|
+
@scanner << str
|
213
|
+
read_bytes = str.bytesize
|
214
|
+
begin
|
215
|
+
while read_bytes < min_bytes
|
216
|
+
str = readline(term)
|
217
|
+
@scanner << str
|
218
|
+
read_bytes += str.bytesize
|
199
219
|
end
|
220
|
+
rescue IOError
|
200
221
|
end
|
201
|
-
|
222
|
+
true
|
223
|
+
rescue Exception, NameError
|
224
|
+
@source = nil
|
225
|
+
false
|
202
226
|
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
227
|
end
|
206
228
|
|
207
|
-
def
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@source
|
229
|
+
def read_until(term)
|
230
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
231
|
+
term = @term_encord[term] ||= encode(term)
|
232
|
+
until str = @scanner.scan_until(pattern)
|
233
|
+
break if @source.nil?
|
234
|
+
break if @source.eof?
|
235
|
+
@scanner << readline(term)
|
236
|
+
end
|
237
|
+
if str
|
238
|
+
read if @scanner.eos? and !@source.eof?
|
239
|
+
str
|
240
|
+
else
|
241
|
+
rest = @scanner.rest
|
242
|
+
@scanner.pos = @scanner.string.bytesize
|
243
|
+
rest
|
212
244
|
end
|
213
245
|
end
|
214
246
|
|
215
|
-
def
|
216
|
-
|
247
|
+
def ensure_buffer
|
248
|
+
read if @scanner.eos? && @source
|
217
249
|
end
|
218
250
|
|
219
251
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
while
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
rescue
|
228
|
-
@source = nil
|
252
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
253
|
+
min_bytes = 1
|
254
|
+
while true
|
255
|
+
if cons
|
256
|
+
md = @scanner.scan(pattern)
|
257
|
+
else
|
258
|
+
md = @scanner.check(pattern)
|
229
259
|
end
|
260
|
+
break if md
|
261
|
+
return nil if pattern.is_a?(String)
|
262
|
+
return nil if @source.nil?
|
263
|
+
return nil unless read(nil, min_bytes)
|
264
|
+
min_bytes *= 2
|
230
265
|
end
|
231
|
-
|
232
|
-
|
266
|
+
|
267
|
+
md.nil? ? nil : @scanner
|
233
268
|
end
|
234
269
|
|
235
270
|
def empty?
|
236
271
|
super and ( @source.nil? || @source.eof? )
|
237
272
|
end
|
238
273
|
|
239
|
-
def position
|
240
|
-
@er_source.pos rescue 0
|
241
|
-
end
|
242
|
-
|
243
274
|
# @return the current line in the source
|
244
275
|
def current_line
|
245
276
|
begin
|
@@ -263,15 +294,20 @@ module REXML
|
|
263
294
|
end
|
264
295
|
|
265
296
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
297
|
+
def readline(term = nil)
|
268
298
|
if @pending_buffer
|
299
|
+
begin
|
300
|
+
str = @source.readline(term || @line_break)
|
301
|
+
rescue IOError
|
302
|
+
end
|
269
303
|
if str.nil?
|
270
304
|
str = @pending_buffer
|
271
305
|
else
|
272
306
|
str = @pending_buffer + str
|
273
307
|
end
|
274
308
|
@pending_buffer = nil
|
309
|
+
else
|
310
|
+
str = @source.readline(term || @line_break)
|
275
311
|
end
|
276
312
|
return nil if str.nil?
|
277
313
|
|
@@ -290,7 +326,7 @@ module REXML
|
|
290
326
|
@source.set_encoding(@encoding, @encoding)
|
291
327
|
end
|
292
328
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
329
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
330
|
@pending_buffer.force_encoding(@encoding)
|
295
331
|
super
|
296
332
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
@@ -248,7 +268,8 @@ module REXML
|
|
248
268
|
# u = Text.new( "sean russell", false, nil, true )
|
249
269
|
# u.value #-> "sean russell"
|
250
270
|
def value
|
251
|
-
@unnormalized ||= Text::unnormalize(
|
271
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
272
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
252
273
|
end
|
253
274
|
|
254
275
|
# Sets the contents of this text node. This expects the text to be
|
@@ -391,11 +412,12 @@ module REXML
|
|
391
412
|
end
|
392
413
|
|
393
414
|
# Unescapes all possible entities
|
394
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
415
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
416
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
395
417
|
sum = 0
|
396
418
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
397
419
|
s = Text.expand($&, doctype, filter)
|
398
|
-
if sum + s.bytesize >
|
420
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
399
421
|
raise "entity expansion has grown too large"
|
400
422
|
else
|
401
423
|
sum += s.bytesize
|