rexml 3.2.5 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +499 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +17 -11
- data/lib/rexml/document.rb +6 -2
- data/lib/rexml/element.rb +19 -34
- data/lib/rexml/entity.rb +9 -38
- data/lib/rexml/formatters/pretty.rb +3 -3
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +446 -274
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +171 -100
- data/lib/rexml/text.rb +60 -61
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +13 -52
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,39 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "stringio"
|
5
|
+
require "strscan"
|
6
|
+
|
3
7
|
require_relative 'encoding'
|
4
8
|
|
5
9
|
module REXML
|
10
|
+
if StringScanner::Version < "1.0.0"
|
11
|
+
module StringScannerCheckScanString
|
12
|
+
refine StringScanner do
|
13
|
+
def check(pattern)
|
14
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
15
|
+
super(pattern)
|
16
|
+
end
|
17
|
+
|
18
|
+
def scan(pattern)
|
19
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
20
|
+
super(pattern)
|
21
|
+
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
using StringScannerCheckScanString
|
35
|
+
end
|
36
|
+
|
6
37
|
# Generates Source-s. USE THIS CLASS.
|
7
38
|
class SourceFactory
|
8
39
|
# Generates a Source object
|
@@ -15,7 +46,6 @@ module REXML
|
|
15
46
|
arg.respond_to? :eof?
|
16
47
|
IOSource.new(arg)
|
17
48
|
elsif arg.respond_to? :to_str
|
18
|
-
require 'stringio'
|
19
49
|
IOSource.new(StringIO.new(arg))
|
20
50
|
elsif arg.kind_of? Source
|
21
51
|
arg
|
@@ -30,26 +60,50 @@ module REXML
|
|
30
60
|
# objects and provides consumption of text
|
31
61
|
class Source
|
32
62
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
63
|
# The line number of the last consumed text
|
36
64
|
attr_reader :line
|
37
65
|
attr_reader :encoding
|
38
66
|
|
67
|
+
module Private
|
68
|
+
SCANNER_RESET_SIZE = 100000
|
69
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
70
|
+
pre_defined_terms = ["'", '"', "<"]
|
71
|
+
pre_defined_terms.each do |term|
|
72
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
73
|
+
end
|
74
|
+
end
|
75
|
+
private_constant :Private
|
76
|
+
|
39
77
|
# Constructor
|
40
78
|
# @param arg must be a String, and should be a valid XML document
|
41
79
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
80
|
# value, overriding all encoding detection
|
43
81
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
82
|
+
@orig = arg
|
83
|
+
@scanner = StringScanner.new(@orig)
|
45
84
|
if encoding
|
46
85
|
self.encoding = encoding
|
47
86
|
else
|
48
87
|
detect_encoding
|
49
88
|
end
|
50
89
|
@line = 0
|
90
|
+
@encoded_terms = {}
|
91
|
+
end
|
92
|
+
|
93
|
+
# The current buffer (what we're going to read next)
|
94
|
+
def buffer
|
95
|
+
@scanner.rest
|
51
96
|
end
|
52
97
|
|
98
|
+
def drop_parsed_content
|
99
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
100
|
+
@scanner.string = @scanner.rest
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def buffer_encoding=(encoding)
|
105
|
+
@scanner.string.force_encoding(encoding)
|
106
|
+
end
|
53
107
|
|
54
108
|
# Inherited from Encoding
|
55
109
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +112,86 @@ module REXML
|
|
58
112
|
encoding_updated
|
59
113
|
end
|
60
114
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
115
|
+
def read(term = nil)
|
82
116
|
end
|
83
117
|
|
84
|
-
def
|
118
|
+
def read_until(term)
|
119
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
120
|
+
data = @scanner.scan_until(pattern)
|
121
|
+
unless data
|
122
|
+
data = @scanner.rest
|
123
|
+
@scanner.pos = @scanner.string.bytesize
|
124
|
+
end
|
125
|
+
data
|
85
126
|
end
|
86
127
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
128
|
+
def ensure_buffer
|
89
129
|
end
|
90
130
|
|
91
|
-
def
|
92
|
-
|
131
|
+
def match(pattern, cons=false)
|
132
|
+
if cons
|
133
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
134
|
+
else
|
135
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
136
|
+
end
|
93
137
|
end
|
94
138
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
139
|
+
def match?(pattern, cons=false)
|
140
|
+
if cons
|
141
|
+
!@scanner.skip(pattern).nil?
|
142
|
+
else
|
143
|
+
!@scanner.match?(pattern).nil?
|
144
|
+
end
|
99
145
|
end
|
100
146
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
147
|
+
def position
|
148
|
+
@scanner.pos
|
105
149
|
end
|
106
150
|
|
107
|
-
|
108
|
-
|
109
|
-
@buffer == ""
|
151
|
+
def position=(pos)
|
152
|
+
@scanner.pos = pos
|
110
153
|
end
|
111
154
|
|
112
|
-
|
113
|
-
|
155
|
+
# @return true if the Source is exhausted
|
156
|
+
def empty?
|
157
|
+
@scanner.eos?
|
114
158
|
end
|
115
159
|
|
116
160
|
# @return the current line in the source
|
117
161
|
def current_line
|
118
162
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
163
|
+
res = lines.grep @scanner.rest[0..30]
|
120
164
|
res = res[-1] if res.kind_of? Array
|
121
165
|
lines.index( res ) if res
|
122
166
|
end
|
123
167
|
|
124
168
|
private
|
169
|
+
|
125
170
|
def detect_encoding
|
126
|
-
|
171
|
+
scanner_encoding = @scanner.rest.encoding
|
127
172
|
detected_encoding = "UTF-8"
|
128
173
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
174
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
175
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
176
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
177
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
178
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
179
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
180
|
detected_encoding = "UTF-8"
|
139
181
|
end
|
140
182
|
ensure
|
141
|
-
@
|
183
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
184
|
end
|
143
185
|
self.encoding = detected_encoding
|
144
186
|
end
|
145
187
|
|
146
188
|
def encoding_updated
|
147
189
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
190
|
+
@scanner.string = decode(@scanner.rest)
|
149
191
|
@to_utf = true
|
150
192
|
else
|
151
193
|
@to_utf = false
|
152
|
-
@
|
194
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
195
|
end
|
154
196
|
end
|
155
197
|
end
|
@@ -172,7 +214,7 @@ module REXML
|
|
172
214
|
end
|
173
215
|
|
174
216
|
if !@to_utf and
|
175
|
-
@
|
217
|
+
@orig.respond_to?(:force_encoding) and
|
176
218
|
@source.respond_to?(:external_encoding) and
|
177
219
|
@source.external_encoding != ::Encoding::UTF_8
|
178
220
|
@force_utf8 = true
|
@@ -181,63 +223,87 @@ module REXML
|
|
181
223
|
end
|
182
224
|
end
|
183
225
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
226
|
+
def read(term = nil, min_bytes = 1)
|
227
|
+
term = encode(term) if term
|
228
|
+
begin
|
229
|
+
str = readline(term)
|
230
|
+
@scanner << str
|
231
|
+
read_bytes = str.bytesize
|
232
|
+
begin
|
233
|
+
while read_bytes < min_bytes
|
234
|
+
str = readline(term)
|
235
|
+
@scanner << str
|
236
|
+
read_bytes += str.bytesize
|
199
237
|
end
|
238
|
+
rescue IOError
|
200
239
|
end
|
201
|
-
|
240
|
+
true
|
241
|
+
rescue Exception, NameError
|
242
|
+
@source = nil
|
243
|
+
false
|
202
244
|
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
245
|
end
|
206
246
|
|
207
|
-
def
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@source
|
247
|
+
def read_until(term)
|
248
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
249
|
+
term = @encoded_terms[term] ||= encode(term)
|
250
|
+
until str = @scanner.scan_until(pattern)
|
251
|
+
break if @source.nil?
|
252
|
+
break if @source.eof?
|
253
|
+
@scanner << readline(term)
|
254
|
+
end
|
255
|
+
if str
|
256
|
+
read if @scanner.eos? and !@source.eof?
|
257
|
+
str
|
258
|
+
else
|
259
|
+
rest = @scanner.rest
|
260
|
+
@scanner.pos = @scanner.string.bytesize
|
261
|
+
rest
|
212
262
|
end
|
213
263
|
end
|
214
264
|
|
215
|
-
def
|
216
|
-
|
265
|
+
def ensure_buffer
|
266
|
+
read if @scanner.eos? && @source
|
217
267
|
end
|
218
268
|
|
219
269
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
while
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
rescue
|
228
|
-
@source = nil
|
270
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
271
|
+
min_bytes = 1
|
272
|
+
while true
|
273
|
+
if cons
|
274
|
+
md = @scanner.scan(pattern)
|
275
|
+
else
|
276
|
+
md = @scanner.check(pattern)
|
229
277
|
end
|
278
|
+
break if md
|
279
|
+
return nil if pattern.is_a?(String)
|
280
|
+
return nil if @source.nil?
|
281
|
+
return nil unless read(nil, min_bytes)
|
282
|
+
min_bytes *= 2
|
230
283
|
end
|
231
|
-
|
232
|
-
|
284
|
+
|
285
|
+
md.nil? ? nil : @scanner
|
233
286
|
end
|
234
287
|
|
235
|
-
def
|
236
|
-
|
288
|
+
def match?( pattern, cons=false )
|
289
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
290
|
+
min_bytes = 1
|
291
|
+
while true
|
292
|
+
if cons
|
293
|
+
n_matched_bytes = @scanner.skip(pattern)
|
294
|
+
else
|
295
|
+
n_matched_bytes = @scanner.match?(pattern)
|
296
|
+
end
|
297
|
+
return true if n_matched_bytes
|
298
|
+
return false if pattern.is_a?(String)
|
299
|
+
return false if @source.nil?
|
300
|
+
return false unless read(nil, min_bytes)
|
301
|
+
min_bytes *= 2
|
302
|
+
end
|
237
303
|
end
|
238
304
|
|
239
|
-
def
|
240
|
-
@
|
305
|
+
def empty?
|
306
|
+
super and ( @source.nil? || @source.eof? )
|
241
307
|
end
|
242
308
|
|
243
309
|
# @return the current line in the source
|
@@ -255,7 +321,7 @@ module REXML
|
|
255
321
|
rescue
|
256
322
|
end
|
257
323
|
@er_source.seek(pos)
|
258
|
-
rescue IOError
|
324
|
+
rescue IOError, SystemCallError
|
259
325
|
pos = -1
|
260
326
|
line = -1
|
261
327
|
end
|
@@ -263,15 +329,20 @@ module REXML
|
|
263
329
|
end
|
264
330
|
|
265
331
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
332
|
+
def readline(term = nil)
|
268
333
|
if @pending_buffer
|
334
|
+
begin
|
335
|
+
str = @source.readline(term || @line_break)
|
336
|
+
rescue IOError
|
337
|
+
end
|
269
338
|
if str.nil?
|
270
339
|
str = @pending_buffer
|
271
340
|
else
|
272
341
|
str = @pending_buffer + str
|
273
342
|
end
|
274
343
|
@pending_buffer = nil
|
344
|
+
else
|
345
|
+
str = @source.readline(term || @line_break)
|
275
346
|
end
|
276
347
|
return nil if str.nil?
|
277
348
|
|
@@ -290,7 +361,7 @@ module REXML
|
|
290
361
|
@source.set_encoding(@encoding, @encoding)
|
291
362
|
end
|
292
363
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
364
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
365
|
@pending_buffer.force_encoding(@encoding)
|
295
366
|
super
|
296
367
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'security'
|
3
3
|
require_relative 'entity'
|
4
4
|
require_relative 'doctype'
|
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -131,45 +116,55 @@ module REXML
|
|
131
116
|
def Text.check string, pattern, doctype
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
|
-
if string
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
119
|
+
if !string.match?(VALID_XML_CHARS)
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
153
128
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
129
|
+
pos = 0
|
130
|
+
while (index = string.index(/<|&/, pos))
|
131
|
+
if string[index] == "<"
|
132
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
133
|
+
end
|
134
|
+
|
135
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
136
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
137
|
+
end
|
138
|
+
|
139
|
+
value = string[(index + 1)..end_index]
|
140
|
+
if /\s/.match?(value)
|
141
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
142
|
+
end
|
143
|
+
|
144
|
+
if value[0] == "#"
|
145
|
+
character_reference = value[1..-1]
|
146
|
+
|
147
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
148
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
149
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
150
|
else
|
163
|
-
raise "Illegal character #{
|
151
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
152
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
153
|
end
|
154
|
+
|
155
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
156
|
+
when *VALID_CHAR
|
157
|
+
else
|
158
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
159
|
+
end
|
160
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
162
|
end
|
163
|
+
|
164
|
+
pos = end_index + 1
|
172
165
|
end
|
166
|
+
|
167
|
+
string
|
173
168
|
end
|
174
169
|
|
175
170
|
def node_type
|
@@ -248,7 +243,8 @@ module REXML
|
|
248
243
|
# u = Text.new( "sean russell", false, nil, true )
|
249
244
|
# u.value #-> "sean russell"
|
250
245
|
def value
|
251
|
-
@unnormalized ||= Text::unnormalize(
|
246
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
247
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
252
248
|
end
|
253
249
|
|
254
250
|
# Sets the contents of this text node. This expects the text to be
|
@@ -371,7 +367,7 @@ module REXML
|
|
371
367
|
copy = input.to_s
|
372
368
|
# Doing it like this rather than in a loop improves the speed
|
373
369
|
#copy = copy.gsub( EREFERENCE, '&' )
|
374
|
-
copy = copy.gsub( "&", "&" )
|
370
|
+
copy = copy.gsub( "&", "&" ) if copy.include?("&")
|
375
371
|
if doctype
|
376
372
|
# Replace all ampersands that aren't part of an entity
|
377
373
|
doctype.entities.each_value do |entity|
|
@@ -382,18 +378,21 @@ module REXML
|
|
382
378
|
else
|
383
379
|
# Replace all ampersands that aren't part of an entity
|
384
380
|
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
385
|
-
|
381
|
+
if copy.include?(entity.value)
|
382
|
+
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
383
|
+
end
|
386
384
|
end
|
387
385
|
end
|
388
386
|
copy
|
389
387
|
end
|
390
388
|
|
391
389
|
# Unescapes all possible entities
|
392
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
390
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
391
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
393
392
|
sum = 0
|
394
393
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
395
394
|
s = Text.expand($&, doctype, filter)
|
396
|
-
if sum + s.bytesize >
|
395
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
397
396
|
raise "entity expansion has grown too large"
|
398
397
|
else
|
399
398
|
sum += s.bytesize
|
data/lib/rexml/xpath_parser.rb
CHANGED
@@ -590,6 +590,7 @@ module REXML
|
|
590
590
|
|
591
591
|
def evaluate_predicate(expression, nodesets)
|
592
592
|
enter(:predicate, expression, nodesets) if @debug
|
593
|
+
new_nodeset_count = 0
|
593
594
|
new_nodesets = nodesets.collect do |nodeset|
|
594
595
|
new_nodeset = []
|
595
596
|
subcontext = { :size => nodeset.size }
|
@@ -606,17 +607,20 @@ module REXML
|
|
606
607
|
result = result[0] if result.kind_of? Array and result.length == 1
|
607
608
|
if result.kind_of? Numeric
|
608
609
|
if result == node.position
|
609
|
-
|
610
|
+
new_nodeset_count += 1
|
611
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
610
612
|
end
|
611
613
|
elsif result.instance_of? Array
|
612
614
|
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
613
615
|
if result.size > 0
|
614
|
-
|
616
|
+
new_nodeset_count += 1
|
617
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
615
618
|
end
|
616
619
|
end
|
617
620
|
else
|
618
621
|
if result
|
619
|
-
|
622
|
+
new_nodeset_count += 1
|
623
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
620
624
|
end
|
621
625
|
end
|
622
626
|
end
|