rexml 3.2.5 → 3.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +523 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +17 -11
- data/lib/rexml/document.rb +6 -2
- data/lib/rexml/element.rb +19 -34
- data/lib/rexml/entity.rb +9 -38
- data/lib/rexml/formatters/pretty.rb +3 -3
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +466 -273
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +185 -100
- data/lib/rexml/text.rb +60 -61
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +10 -52
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,39 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "stringio"
|
5
|
+
require "strscan"
|
6
|
+
|
3
7
|
require_relative 'encoding'
|
4
8
|
|
5
9
|
module REXML
|
10
|
+
if StringScanner::Version < "1.0.0"
|
11
|
+
module StringScannerCheckScanString
|
12
|
+
refine StringScanner do
|
13
|
+
def check(pattern)
|
14
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
15
|
+
super(pattern)
|
16
|
+
end
|
17
|
+
|
18
|
+
def scan(pattern)
|
19
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
20
|
+
super(pattern)
|
21
|
+
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
using StringScannerCheckScanString
|
35
|
+
end
|
36
|
+
|
6
37
|
# Generates Source-s. USE THIS CLASS.
|
7
38
|
class SourceFactory
|
8
39
|
# Generates a Source object
|
@@ -15,7 +46,6 @@ module REXML
|
|
15
46
|
arg.respond_to? :eof?
|
16
47
|
IOSource.new(arg)
|
17
48
|
elsif arg.respond_to? :to_str
|
18
|
-
require 'stringio'
|
19
49
|
IOSource.new(StringIO.new(arg))
|
20
50
|
elsif arg.kind_of? Source
|
21
51
|
arg
|
@@ -30,26 +60,56 @@ module REXML
|
|
30
60
|
# objects and provides consumption of text
|
31
61
|
class Source
|
32
62
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
63
|
# The line number of the last consumed text
|
36
64
|
attr_reader :line
|
37
65
|
attr_reader :encoding
|
38
66
|
|
67
|
+
module Private
|
68
|
+
SCANNER_RESET_SIZE = 100000
|
69
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
70
|
+
pre_defined_terms = ["'", '"', "<"]
|
71
|
+
if StringScanner::Version < "3.1.1"
|
72
|
+
pre_defined_terms.each do |term|
|
73
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
74
|
+
end
|
75
|
+
else
|
76
|
+
pre_defined_terms.each do |term|
|
77
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
private_constant :Private
|
82
|
+
|
39
83
|
# Constructor
|
40
84
|
# @param arg must be a String, and should be a valid XML document
|
41
85
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
86
|
# value, overriding all encoding detection
|
43
87
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
88
|
+
@orig = arg
|
89
|
+
@scanner = StringScanner.new(@orig)
|
45
90
|
if encoding
|
46
91
|
self.encoding = encoding
|
47
92
|
else
|
48
93
|
detect_encoding
|
49
94
|
end
|
50
95
|
@line = 0
|
96
|
+
@encoded_terms = {}
|
97
|
+
end
|
98
|
+
|
99
|
+
# The current buffer (what we're going to read next)
|
100
|
+
def buffer
|
101
|
+
@scanner.rest
|
51
102
|
end
|
52
103
|
|
104
|
+
def drop_parsed_content
|
105
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
106
|
+
@scanner.string = @scanner.rest
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def buffer_encoding=(encoding)
|
111
|
+
@scanner.string.force_encoding(encoding)
|
112
|
+
end
|
53
113
|
|
54
114
|
# Inherited from Encoding
|
55
115
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +118,94 @@ module REXML
|
|
58
118
|
encoding_updated
|
59
119
|
end
|
60
120
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
121
|
+
def read(term = nil)
|
82
122
|
end
|
83
123
|
|
84
|
-
def
|
124
|
+
def read_until(term)
|
125
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
126
|
+
data = @scanner.scan_until(pattern)
|
127
|
+
unless data
|
128
|
+
data = @scanner.rest
|
129
|
+
@scanner.pos = @scanner.string.bytesize
|
130
|
+
end
|
131
|
+
data
|
85
132
|
end
|
86
133
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
134
|
+
def ensure_buffer
|
89
135
|
end
|
90
136
|
|
91
|
-
def
|
92
|
-
|
137
|
+
def match(pattern, cons=false)
|
138
|
+
if cons
|
139
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
140
|
+
else
|
141
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
142
|
+
end
|
93
143
|
end
|
94
144
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
145
|
+
def match?(pattern, cons=false)
|
146
|
+
if cons
|
147
|
+
!@scanner.skip(pattern).nil?
|
148
|
+
else
|
149
|
+
!@scanner.match?(pattern).nil?
|
150
|
+
end
|
99
151
|
end
|
100
152
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
153
|
+
def position
|
154
|
+
@scanner.pos
|
105
155
|
end
|
106
156
|
|
107
|
-
|
108
|
-
|
109
|
-
@buffer == ""
|
157
|
+
def position=(pos)
|
158
|
+
@scanner.pos = pos
|
110
159
|
end
|
111
160
|
|
112
|
-
def
|
113
|
-
@
|
161
|
+
def peek_byte
|
162
|
+
@scanner.peek_byte
|
163
|
+
end
|
164
|
+
|
165
|
+
def scan_byte
|
166
|
+
@scanner.scan_byte
|
167
|
+
end
|
168
|
+
|
169
|
+
# @return true if the Source is exhausted
|
170
|
+
def empty?
|
171
|
+
@scanner.eos?
|
114
172
|
end
|
115
173
|
|
116
174
|
# @return the current line in the source
|
117
175
|
def current_line
|
118
176
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
177
|
+
res = lines.grep @scanner.rest[0..30]
|
120
178
|
res = res[-1] if res.kind_of? Array
|
121
179
|
lines.index( res ) if res
|
122
180
|
end
|
123
181
|
|
124
182
|
private
|
183
|
+
|
125
184
|
def detect_encoding
|
126
|
-
|
185
|
+
scanner_encoding = @scanner.rest.encoding
|
127
186
|
detected_encoding = "UTF-8"
|
128
187
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
188
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
189
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
190
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
191
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
192
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
193
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
194
|
detected_encoding = "UTF-8"
|
139
195
|
end
|
140
196
|
ensure
|
141
|
-
@
|
197
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
198
|
end
|
143
199
|
self.encoding = detected_encoding
|
144
200
|
end
|
145
201
|
|
146
202
|
def encoding_updated
|
147
203
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
204
|
+
@scanner.string = decode(@scanner.rest)
|
149
205
|
@to_utf = true
|
150
206
|
else
|
151
207
|
@to_utf = false
|
152
|
-
@
|
208
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
209
|
end
|
154
210
|
end
|
155
211
|
end
|
@@ -172,7 +228,7 @@ module REXML
|
|
172
228
|
end
|
173
229
|
|
174
230
|
if !@to_utf and
|
175
|
-
@
|
231
|
+
@orig.respond_to?(:force_encoding) and
|
176
232
|
@source.respond_to?(:external_encoding) and
|
177
233
|
@source.external_encoding != ::Encoding::UTF_8
|
178
234
|
@force_utf8 = true
|
@@ -181,63 +237,87 @@ module REXML
|
|
181
237
|
end
|
182
238
|
end
|
183
239
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
240
|
+
def read(term = nil, min_bytes = 1)
|
241
|
+
term = encode(term) if term
|
242
|
+
begin
|
243
|
+
str = readline(term)
|
244
|
+
@scanner << str
|
245
|
+
read_bytes = str.bytesize
|
246
|
+
begin
|
247
|
+
while read_bytes < min_bytes
|
248
|
+
str = readline(term)
|
249
|
+
@scanner << str
|
250
|
+
read_bytes += str.bytesize
|
199
251
|
end
|
252
|
+
rescue IOError
|
200
253
|
end
|
201
|
-
|
254
|
+
true
|
255
|
+
rescue Exception, NameError
|
256
|
+
@source = nil
|
257
|
+
false
|
202
258
|
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
259
|
end
|
206
260
|
|
207
|
-
def
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@source
|
261
|
+
def read_until(term)
|
262
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
263
|
+
term = @encoded_terms[term] ||= encode(term)
|
264
|
+
until str = @scanner.scan_until(pattern)
|
265
|
+
break if @source.nil?
|
266
|
+
break if @source.eof?
|
267
|
+
@scanner << readline(term)
|
268
|
+
end
|
269
|
+
if str
|
270
|
+
read if @scanner.eos? and !@source.eof?
|
271
|
+
str
|
272
|
+
else
|
273
|
+
rest = @scanner.rest
|
274
|
+
@scanner.pos = @scanner.string.bytesize
|
275
|
+
rest
|
212
276
|
end
|
213
277
|
end
|
214
278
|
|
215
|
-
def
|
216
|
-
|
279
|
+
def ensure_buffer
|
280
|
+
read if @scanner.eos? && @source
|
217
281
|
end
|
218
282
|
|
219
283
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
while
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
rescue
|
228
|
-
@source = nil
|
284
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
285
|
+
min_bytes = 1
|
286
|
+
while true
|
287
|
+
if cons
|
288
|
+
md = @scanner.scan(pattern)
|
289
|
+
else
|
290
|
+
md = @scanner.check(pattern)
|
229
291
|
end
|
292
|
+
break if md
|
293
|
+
return nil if pattern.is_a?(String)
|
294
|
+
return nil if @source.nil?
|
295
|
+
return nil unless read(nil, min_bytes)
|
296
|
+
min_bytes *= 2
|
230
297
|
end
|
231
|
-
|
232
|
-
|
298
|
+
|
299
|
+
md.nil? ? nil : @scanner
|
233
300
|
end
|
234
301
|
|
235
|
-
def
|
236
|
-
|
302
|
+
def match?( pattern, cons=false )
|
303
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
304
|
+
min_bytes = 1
|
305
|
+
while true
|
306
|
+
if cons
|
307
|
+
n_matched_bytes = @scanner.skip(pattern)
|
308
|
+
else
|
309
|
+
n_matched_bytes = @scanner.match?(pattern)
|
310
|
+
end
|
311
|
+
return true if n_matched_bytes
|
312
|
+
return false if pattern.is_a?(String)
|
313
|
+
return false if @source.nil?
|
314
|
+
return false unless read(nil, min_bytes)
|
315
|
+
min_bytes *= 2
|
316
|
+
end
|
237
317
|
end
|
238
318
|
|
239
|
-
def
|
240
|
-
@
|
319
|
+
def empty?
|
320
|
+
super and ( @source.nil? || @source.eof? )
|
241
321
|
end
|
242
322
|
|
243
323
|
# @return the current line in the source
|
@@ -255,7 +335,7 @@ module REXML
|
|
255
335
|
rescue
|
256
336
|
end
|
257
337
|
@er_source.seek(pos)
|
258
|
-
rescue IOError
|
338
|
+
rescue IOError, SystemCallError
|
259
339
|
pos = -1
|
260
340
|
line = -1
|
261
341
|
end
|
@@ -263,15 +343,20 @@ module REXML
|
|
263
343
|
end
|
264
344
|
|
265
345
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
346
|
+
def readline(term = nil)
|
268
347
|
if @pending_buffer
|
348
|
+
begin
|
349
|
+
str = @source.readline(term || @line_break)
|
350
|
+
rescue IOError
|
351
|
+
end
|
269
352
|
if str.nil?
|
270
353
|
str = @pending_buffer
|
271
354
|
else
|
272
355
|
str = @pending_buffer + str
|
273
356
|
end
|
274
357
|
@pending_buffer = nil
|
358
|
+
else
|
359
|
+
str = @source.readline(term || @line_break)
|
275
360
|
end
|
276
361
|
return nil if str.nil?
|
277
362
|
|
@@ -290,7 +375,7 @@ module REXML
|
|
290
375
|
@source.set_encoding(@encoding, @encoding)
|
291
376
|
end
|
292
377
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
378
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
379
|
@pending_buffer.force_encoding(@encoding)
|
295
380
|
super
|
296
381
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'security'
|
3
3
|
require_relative 'entity'
|
4
4
|
require_relative 'doctype'
|
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -131,45 +116,55 @@ module REXML
|
|
131
116
|
def Text.check string, pattern, doctype
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
|
-
if string
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
119
|
+
if !string.match?(VALID_XML_CHARS)
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
153
128
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
129
|
+
pos = 0
|
130
|
+
while (index = string.index(/<|&/, pos))
|
131
|
+
if string[index] == "<"
|
132
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
133
|
+
end
|
134
|
+
|
135
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
136
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
137
|
+
end
|
138
|
+
|
139
|
+
value = string[(index + 1)..end_index]
|
140
|
+
if /\s/.match?(value)
|
141
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
142
|
+
end
|
143
|
+
|
144
|
+
if value[0] == "#"
|
145
|
+
character_reference = value[1..-1]
|
146
|
+
|
147
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
148
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
149
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
150
|
else
|
163
|
-
raise "Illegal character #{
|
151
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
152
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
153
|
end
|
154
|
+
|
155
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
156
|
+
when *VALID_CHAR
|
157
|
+
else
|
158
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
159
|
+
end
|
160
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
162
|
end
|
163
|
+
|
164
|
+
pos = end_index + 1
|
172
165
|
end
|
166
|
+
|
167
|
+
string
|
173
168
|
end
|
174
169
|
|
175
170
|
def node_type
|
@@ -248,7 +243,8 @@ module REXML
|
|
248
243
|
# u = Text.new( "sean russell", false, nil, true )
|
249
244
|
# u.value #-> "sean russell"
|
250
245
|
def value
|
251
|
-
@unnormalized ||= Text::unnormalize(
|
246
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
247
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
252
248
|
end
|
253
249
|
|
254
250
|
# Sets the contents of this text node. This expects the text to be
|
@@ -371,7 +367,7 @@ module REXML
|
|
371
367
|
copy = input.to_s
|
372
368
|
# Doing it like this rather than in a loop improves the speed
|
373
369
|
#copy = copy.gsub( EREFERENCE, '&' )
|
374
|
-
copy = copy.gsub( "&", "&" )
|
370
|
+
copy = copy.gsub( "&", "&" ) if copy.include?("&")
|
375
371
|
if doctype
|
376
372
|
# Replace all ampersands that aren't part of an entity
|
377
373
|
doctype.entities.each_value do |entity|
|
@@ -382,18 +378,21 @@ module REXML
|
|
382
378
|
else
|
383
379
|
# Replace all ampersands that aren't part of an entity
|
384
380
|
DocType::DEFAULT_ENTITIES.each_value do |entity|
|
385
|
-
|
381
|
+
if copy.include?(entity.value)
|
382
|
+
copy = copy.gsub(entity.value, "&#{entity.name};" )
|
383
|
+
end
|
386
384
|
end
|
387
385
|
end
|
388
386
|
copy
|
389
387
|
end
|
390
388
|
|
391
389
|
# Unescapes all possible entities
|
392
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
390
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
391
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
393
392
|
sum = 0
|
394
393
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
395
394
|
s = Text.expand($&, doctype, filter)
|
396
|
-
if sum + s.bytesize >
|
395
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
397
396
|
raise "entity expansion has grown too large"
|
398
397
|
else
|
399
398
|
sum += s.bytesize
|
data/lib/rexml/xpath_parser.rb
CHANGED
@@ -590,6 +590,7 @@ module REXML
|
|
590
590
|
|
591
591
|
def evaluate_predicate(expression, nodesets)
|
592
592
|
enter(:predicate, expression, nodesets) if @debug
|
593
|
+
new_nodeset_count = 0
|
593
594
|
new_nodesets = nodesets.collect do |nodeset|
|
594
595
|
new_nodeset = []
|
595
596
|
subcontext = { :size => nodeset.size }
|
@@ -606,17 +607,20 @@ module REXML
|
|
606
607
|
result = result[0] if result.kind_of? Array and result.length == 1
|
607
608
|
if result.kind_of? Numeric
|
608
609
|
if result == node.position
|
609
|
-
|
610
|
+
new_nodeset_count += 1
|
611
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
610
612
|
end
|
611
613
|
elsif result.instance_of? Array
|
612
614
|
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
613
615
|
if result.size > 0
|
614
|
-
|
616
|
+
new_nodeset_count += 1
|
617
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
615
618
|
end
|
616
619
|
end
|
617
620
|
else
|
618
621
|
if result
|
619
|
-
|
622
|
+
new_nodeset_count += 1
|
623
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset_count)
|
620
624
|
end
|
621
625
|
end
|
622
626
|
end
|