rexml 3.2.6 → 3.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +525 -0
- data/lib/rexml/attribute.rb +10 -10
- data/lib/rexml/cdata.rb +1 -1
- data/lib/rexml/child.rb +2 -3
- data/lib/rexml/comment.rb +1 -1
- data/lib/rexml/doctype.rb +3 -8
- data/lib/rexml/document.rb +23 -5
- data/lib/rexml/element.rb +63 -84
- data/lib/rexml/encoding.rb +3 -6
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/functions.rb +4 -5
- data/lib/rexml/instruction.rb +1 -1
- data/lib/rexml/namespace.rb +4 -4
- data/lib/rexml/node.rb +10 -6
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +538 -288
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/parsers/xpathparser.rb +4 -4
- data/lib/rexml/quickpath.rb +19 -18
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/security.rb +2 -2
- data/lib/rexml/source.rb +190 -100
- data/lib/rexml/text.rb +68 -74
- data/lib/rexml/validation/relaxng.rb +27 -26
- data/lib/rexml/validation/validation.rb +8 -8
- data/lib/rexml/xpath.rb +2 -13
- data/lib/rexml/xpath_parser.rb +51 -45
- metadata +6 -50
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,39 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "stringio"
|
5
|
+
require "strscan"
|
6
|
+
|
3
7
|
require_relative 'encoding'
|
4
8
|
|
5
9
|
module REXML
|
10
|
+
if StringScanner::Version < "1.0.0"
|
11
|
+
module StringScannerCheckScanString
|
12
|
+
refine StringScanner do
|
13
|
+
def check(pattern)
|
14
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
15
|
+
super(pattern)
|
16
|
+
end
|
17
|
+
|
18
|
+
def scan(pattern)
|
19
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
20
|
+
super(pattern)
|
21
|
+
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
using StringScannerCheckScanString
|
35
|
+
end
|
36
|
+
|
6
37
|
# Generates Source-s. USE THIS CLASS.
|
7
38
|
class SourceFactory
|
8
39
|
# Generates a Source object
|
@@ -15,7 +46,6 @@ module REXML
|
|
15
46
|
arg.respond_to? :eof?
|
16
47
|
IOSource.new(arg)
|
17
48
|
elsif arg.respond_to? :to_str
|
18
|
-
require 'stringio'
|
19
49
|
IOSource.new(StringIO.new(arg))
|
20
50
|
elsif arg.kind_of? Source
|
21
51
|
arg
|
@@ -30,26 +60,57 @@ module REXML
|
|
30
60
|
# objects and provides consumption of text
|
31
61
|
class Source
|
32
62
|
include Encoding
|
33
|
-
# The current buffer (what we're going to read next)
|
34
|
-
attr_reader :buffer
|
35
63
|
# The line number of the last consumed text
|
36
64
|
attr_reader :line
|
37
65
|
attr_reader :encoding
|
38
66
|
|
67
|
+
module Private
|
68
|
+
SPACES_PATTERN = /\s+/um
|
69
|
+
SCANNER_RESET_SIZE = 100000
|
70
|
+
PRE_DEFINED_TERM_PATTERNS = {}
|
71
|
+
pre_defined_terms = ["'", '"', "<", "]]>", "?>"]
|
72
|
+
if StringScanner::Version < "3.1.1"
|
73
|
+
pre_defined_terms.each do |term|
|
74
|
+
PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/
|
75
|
+
end
|
76
|
+
else
|
77
|
+
pre_defined_terms.each do |term|
|
78
|
+
PRE_DEFINED_TERM_PATTERNS[term] = term
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
private_constant :Private
|
83
|
+
|
39
84
|
# Constructor
|
40
85
|
# @param arg must be a String, and should be a valid XML document
|
41
86
|
# @param encoding if non-null, sets the encoding of the source to this
|
42
87
|
# value, overriding all encoding detection
|
43
88
|
def initialize(arg, encoding=nil)
|
44
|
-
@orig =
|
89
|
+
@orig = arg
|
90
|
+
@scanner = StringScanner.new(@orig)
|
45
91
|
if encoding
|
46
92
|
self.encoding = encoding
|
47
93
|
else
|
48
94
|
detect_encoding
|
49
95
|
end
|
50
96
|
@line = 0
|
97
|
+
@encoded_terms = {}
|
98
|
+
end
|
99
|
+
|
100
|
+
# The current buffer (what we're going to read next)
|
101
|
+
def buffer
|
102
|
+
@scanner.rest
|
51
103
|
end
|
52
104
|
|
105
|
+
def drop_parsed_content
|
106
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
107
|
+
@scanner.string = @scanner.rest
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def buffer_encoding=(encoding)
|
112
|
+
@scanner.string.force_encoding(encoding)
|
113
|
+
end
|
53
114
|
|
54
115
|
# Inherited from Encoding
|
55
116
|
# Overridden to support optimized en/decoding
|
@@ -58,98 +119,98 @@ module REXML
|
|
58
119
|
encoding_updated
|
59
120
|
end
|
60
121
|
|
61
|
-
|
62
|
-
# usual scan() method. For one thing, the pattern argument has some
|
63
|
-
# requirements; for another, the source can be consumed. You can easily
|
64
|
-
# confuse this method. Originally, the patterns were easier
|
65
|
-
# to construct and this method more robust, because this method
|
66
|
-
# generated search regexps on the fly; however, this was
|
67
|
-
# computationally expensive and slowed down the entire REXML package
|
68
|
-
# considerably, since this is by far the most commonly called method.
|
69
|
-
# @param pattern must be a Regexp, and must be in the form of
|
70
|
-
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
|
71
|
-
# will be returned; the second group is used if the consume flag is
|
72
|
-
# set.
|
73
|
-
# @param consume if true, the pattern returned will be consumed, leaving
|
74
|
-
# everything after it in the Source.
|
75
|
-
# @return the pattern, if found, or nil if the Source is empty or the
|
76
|
-
# pattern is not found.
|
77
|
-
def scan(pattern, cons=false)
|
78
|
-
return nil if @buffer.nil?
|
79
|
-
rv = @buffer.scan(pattern)
|
80
|
-
@buffer = $' if cons and rv.size>0
|
81
|
-
rv
|
122
|
+
def read(term = nil)
|
82
123
|
end
|
83
124
|
|
84
|
-
def
|
125
|
+
def read_until(term)
|
126
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
127
|
+
data = @scanner.scan_until(pattern)
|
128
|
+
unless data
|
129
|
+
data = @scanner.rest
|
130
|
+
@scanner.pos = @scanner.string.bytesize
|
131
|
+
end
|
132
|
+
data
|
85
133
|
end
|
86
134
|
|
87
|
-
def
|
88
|
-
@buffer = $' if pattern.match( @buffer )
|
135
|
+
def ensure_buffer
|
89
136
|
end
|
90
137
|
|
91
|
-
def
|
92
|
-
|
138
|
+
def match(pattern, cons=false)
|
139
|
+
if cons
|
140
|
+
@scanner.scan(pattern).nil? ? nil : @scanner
|
141
|
+
else
|
142
|
+
@scanner.check(pattern).nil? ? nil : @scanner
|
143
|
+
end
|
93
144
|
end
|
94
145
|
|
95
|
-
def
|
96
|
-
|
97
|
-
|
98
|
-
|
146
|
+
def match?(pattern, cons=false)
|
147
|
+
if cons
|
148
|
+
!@scanner.skip(pattern).nil?
|
149
|
+
else
|
150
|
+
!@scanner.match?(pattern).nil?
|
151
|
+
end
|
99
152
|
end
|
100
153
|
|
101
|
-
def
|
102
|
-
|
103
|
-
@buffer = $' if cons and md
|
104
|
-
return md
|
154
|
+
def skip_spaces
|
155
|
+
@scanner.skip(Private::SPACES_PATTERN) ? true : false
|
105
156
|
end
|
106
157
|
|
107
|
-
|
108
|
-
|
109
|
-
@buffer == ""
|
158
|
+
def position
|
159
|
+
@scanner.pos
|
110
160
|
end
|
111
161
|
|
112
|
-
def position
|
113
|
-
@
|
162
|
+
def position=(pos)
|
163
|
+
@scanner.pos = pos
|
164
|
+
end
|
165
|
+
|
166
|
+
def peek_byte
|
167
|
+
@scanner.peek_byte
|
168
|
+
end
|
169
|
+
|
170
|
+
def scan_byte
|
171
|
+
@scanner.scan_byte
|
172
|
+
end
|
173
|
+
|
174
|
+
# @return true if the Source is exhausted
|
175
|
+
def empty?
|
176
|
+
@scanner.eos?
|
114
177
|
end
|
115
178
|
|
116
179
|
# @return the current line in the source
|
117
180
|
def current_line
|
118
181
|
lines = @orig.split
|
119
|
-
res = lines.grep @
|
182
|
+
res = lines.grep @scanner.rest[0..30]
|
120
183
|
res = res[-1] if res.kind_of? Array
|
121
184
|
lines.index( res ) if res
|
122
185
|
end
|
123
186
|
|
124
187
|
private
|
188
|
+
|
125
189
|
def detect_encoding
|
126
|
-
|
190
|
+
scanner_encoding = @scanner.rest.encoding
|
127
191
|
detected_encoding = "UTF-8"
|
128
192
|
begin
|
129
|
-
@
|
130
|
-
if @
|
131
|
-
@buffer[0, 2] = ""
|
193
|
+
@scanner.string.force_encoding("ASCII-8BIT")
|
194
|
+
if @scanner.scan(/\xfe\xff/n)
|
132
195
|
detected_encoding = "UTF-16BE"
|
133
|
-
elsif @
|
134
|
-
@buffer[0, 2] = ""
|
196
|
+
elsif @scanner.scan(/\xff\xfe/n)
|
135
197
|
detected_encoding = "UTF-16LE"
|
136
|
-
elsif @
|
137
|
-
@buffer[0, 3] = ""
|
198
|
+
elsif @scanner.scan(/\xef\xbb\xbf/n)
|
138
199
|
detected_encoding = "UTF-8"
|
139
200
|
end
|
140
201
|
ensure
|
141
|
-
@
|
202
|
+
@scanner.string.force_encoding(scanner_encoding)
|
142
203
|
end
|
143
204
|
self.encoding = detected_encoding
|
144
205
|
end
|
145
206
|
|
146
207
|
def encoding_updated
|
147
208
|
if @encoding != 'UTF-8'
|
148
|
-
@
|
209
|
+
@scanner.string = decode(@scanner.rest)
|
149
210
|
@to_utf = true
|
150
211
|
else
|
151
212
|
@to_utf = false
|
152
|
-
@
|
213
|
+
@scanner.string.force_encoding(::Encoding::UTF_8)
|
153
214
|
end
|
154
215
|
end
|
155
216
|
end
|
@@ -172,7 +233,7 @@ module REXML
|
|
172
233
|
end
|
173
234
|
|
174
235
|
if !@to_utf and
|
175
|
-
@
|
236
|
+
@orig.respond_to?(:force_encoding) and
|
176
237
|
@source.respond_to?(:external_encoding) and
|
177
238
|
@source.external_encoding != ::Encoding::UTF_8
|
178
239
|
@force_utf8 = true
|
@@ -181,63 +242,87 @@ module REXML
|
|
181
242
|
end
|
182
243
|
end
|
183
244
|
|
184
|
-
def
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
rescue Iconv::IllegalSequence
|
196
|
-
raise
|
197
|
-
rescue
|
198
|
-
@source = nil
|
245
|
+
def read(term = nil, min_bytes = 1)
|
246
|
+
term = encode(term) if term
|
247
|
+
begin
|
248
|
+
str = readline(term)
|
249
|
+
@scanner << str
|
250
|
+
read_bytes = str.bytesize
|
251
|
+
begin
|
252
|
+
while read_bytes < min_bytes
|
253
|
+
str = readline(term)
|
254
|
+
@scanner << str
|
255
|
+
read_bytes += str.bytesize
|
199
256
|
end
|
257
|
+
rescue IOError
|
200
258
|
end
|
201
|
-
|
259
|
+
true
|
260
|
+
rescue Exception, NameError
|
261
|
+
@source = nil
|
262
|
+
false
|
202
263
|
end
|
203
|
-
rv.taint if RUBY_VERSION < '2.7'
|
204
|
-
rv
|
205
264
|
end
|
206
265
|
|
207
|
-
def
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@source
|
266
|
+
def read_until(term)
|
267
|
+
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
268
|
+
term = @encoded_terms[term] ||= encode(term)
|
269
|
+
until str = @scanner.scan_until(pattern)
|
270
|
+
break if @source.nil?
|
271
|
+
break if @source.eof?
|
272
|
+
@scanner << readline(term)
|
273
|
+
end
|
274
|
+
if str
|
275
|
+
read if @scanner.eos? and !@source.eof?
|
276
|
+
str
|
277
|
+
else
|
278
|
+
rest = @scanner.rest
|
279
|
+
@scanner.pos = @scanner.string.bytesize
|
280
|
+
rest
|
212
281
|
end
|
213
282
|
end
|
214
283
|
|
215
|
-
def
|
216
|
-
|
284
|
+
def ensure_buffer
|
285
|
+
read if @scanner.eos? && @source
|
217
286
|
end
|
218
287
|
|
219
288
|
def match( pattern, cons=false )
|
220
|
-
|
221
|
-
|
222
|
-
while
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
rescue
|
228
|
-
@source = nil
|
289
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
290
|
+
min_bytes = 1
|
291
|
+
while true
|
292
|
+
if cons
|
293
|
+
md = @scanner.scan(pattern)
|
294
|
+
else
|
295
|
+
md = @scanner.check(pattern)
|
229
296
|
end
|
297
|
+
break if md
|
298
|
+
return nil if pattern.is_a?(String)
|
299
|
+
return nil if @source.nil?
|
300
|
+
return nil unless read(nil, min_bytes)
|
301
|
+
min_bytes *= 2
|
230
302
|
end
|
231
|
-
|
232
|
-
|
303
|
+
|
304
|
+
md.nil? ? nil : @scanner
|
233
305
|
end
|
234
306
|
|
235
|
-
def
|
236
|
-
|
307
|
+
def match?( pattern, cons=false )
|
308
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
309
|
+
min_bytes = 1
|
310
|
+
while true
|
311
|
+
if cons
|
312
|
+
n_matched_bytes = @scanner.skip(pattern)
|
313
|
+
else
|
314
|
+
n_matched_bytes = @scanner.match?(pattern)
|
315
|
+
end
|
316
|
+
return true if n_matched_bytes
|
317
|
+
return false if pattern.is_a?(String)
|
318
|
+
return false if @source.nil?
|
319
|
+
return false unless read(nil, min_bytes)
|
320
|
+
min_bytes *= 2
|
321
|
+
end
|
237
322
|
end
|
238
323
|
|
239
|
-
def
|
240
|
-
@
|
324
|
+
def empty?
|
325
|
+
super and ( @source.nil? || @source.eof? )
|
241
326
|
end
|
242
327
|
|
243
328
|
# @return the current line in the source
|
@@ -255,7 +340,7 @@ module REXML
|
|
255
340
|
rescue
|
256
341
|
end
|
257
342
|
@er_source.seek(pos)
|
258
|
-
rescue IOError
|
343
|
+
rescue IOError, SystemCallError
|
259
344
|
pos = -1
|
260
345
|
line = -1
|
261
346
|
end
|
@@ -263,15 +348,20 @@ module REXML
|
|
263
348
|
end
|
264
349
|
|
265
350
|
private
|
266
|
-
def readline
|
267
|
-
str = @source.readline(@line_break)
|
351
|
+
def readline(term = nil)
|
268
352
|
if @pending_buffer
|
353
|
+
begin
|
354
|
+
str = @source.readline(term || @line_break)
|
355
|
+
rescue IOError
|
356
|
+
end
|
269
357
|
if str.nil?
|
270
358
|
str = @pending_buffer
|
271
359
|
else
|
272
360
|
str = @pending_buffer + str
|
273
361
|
end
|
274
362
|
@pending_buffer = nil
|
363
|
+
else
|
364
|
+
str = @source.readline(term || @line_break)
|
275
365
|
end
|
276
366
|
return nil if str.nil?
|
277
367
|
|
@@ -290,7 +380,7 @@ module REXML
|
|
290
380
|
@source.set_encoding(@encoding, @encoding)
|
291
381
|
end
|
292
382
|
@line_break = encode(">")
|
293
|
-
@pending_buffer, @
|
383
|
+
@pending_buffer, @scanner.string = @scanner.rest, ""
|
294
384
|
@pending_buffer.force_encoding(@encoding)
|
295
385
|
super
|
296
386
|
end
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -119,57 +104,67 @@ module REXML
|
|
119
104
|
@entity_filter = entity_filter if entity_filter
|
120
105
|
clear_cache
|
121
106
|
|
122
|
-
Text.check(@string, illegal
|
107
|
+
Text.check(@string, illegal) if @raw
|
123
108
|
end
|
124
109
|
|
125
110
|
def parent= parent
|
126
111
|
super(parent)
|
127
|
-
Text.check(@string, NEEDS_A_SECOND_CHECK
|
112
|
+
Text.check(@string, NEEDS_A_SECOND_CHECK) if @raw and @parent
|
128
113
|
end
|
129
114
|
|
130
115
|
# check for illegal characters
|
131
|
-
def Text.check string, pattern, doctype
|
116
|
+
def Text.check string, pattern, doctype = nil
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
153
128
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
129
|
+
pos = 0
|
130
|
+
while (index = string.index(/<|&/, pos))
|
131
|
+
if string[index] == "<"
|
132
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
133
|
+
end
|
134
|
+
|
135
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
136
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
137
|
+
end
|
138
|
+
|
139
|
+
value = string[(index + 1)..end_index]
|
140
|
+
if /\s/.match?(value)
|
141
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
142
|
+
end
|
143
|
+
|
144
|
+
if value[0] == "#"
|
145
|
+
character_reference = value[1..-1]
|
146
|
+
|
147
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
148
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
149
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
150
|
else
|
163
|
-
raise "Illegal character #{
|
151
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
152
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
153
|
end
|
154
|
+
|
155
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
156
|
+
when *VALID_CHAR
|
157
|
+
else
|
158
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
159
|
+
end
|
160
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
162
|
end
|
163
|
+
|
164
|
+
pos = end_index + 1
|
172
165
|
end
|
166
|
+
|
167
|
+
string
|
173
168
|
end
|
174
169
|
|
175
170
|
def node_type
|
@@ -182,7 +177,7 @@ module REXML
|
|
182
177
|
|
183
178
|
|
184
179
|
def clone
|
185
|
-
|
180
|
+
Text.new(self, true)
|
186
181
|
end
|
187
182
|
|
188
183
|
|
@@ -205,10 +200,7 @@ module REXML
|
|
205
200
|
end
|
206
201
|
|
207
202
|
def doctype
|
208
|
-
|
209
|
-
doc = @parent.document
|
210
|
-
doc.doctype if doc
|
211
|
-
end
|
203
|
+
@parent&.document&.doctype
|
212
204
|
end
|
213
205
|
|
214
206
|
REFERENCE = /#{Entity::REFERENCE}/
|
@@ -248,7 +240,8 @@ module REXML
|
|
248
240
|
# u = Text.new( "sean russell", false, nil, true )
|
249
241
|
# u.value #-> "sean russell"
|
250
242
|
def value
|
251
|
-
@unnormalized ||= Text::unnormalize(
|
243
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
244
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
252
245
|
end
|
253
246
|
|
254
247
|
# Sets the contents of this text node. This expects the text to be
|
@@ -268,30 +261,32 @@ module REXML
|
|
268
261
|
# Recursively wrap string at width.
|
269
262
|
return string if string.length <= width
|
270
263
|
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
|
271
|
-
if addnewline
|
272
|
-
|
264
|
+
if addnewline
|
265
|
+
"\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
273
266
|
else
|
274
|
-
|
267
|
+
string[0,place] + "\n" + wrap(string[place+1..-1], width)
|
275
268
|
end
|
276
269
|
end
|
277
270
|
|
278
271
|
def indent_text(string, level=1, style="\t", indentfirstline=true)
|
272
|
+
Kernel.warn("#{self.class.name}#indent_text is deprecated. See REXML::Formatters", uplevel: 1)
|
279
273
|
return string if level < 0
|
280
|
-
|
274
|
+
|
275
|
+
new_string = +''
|
281
276
|
string.each_line { |line|
|
282
277
|
indent_string = style * level
|
283
278
|
new_line = (indent_string + line).sub(/[\s]+$/,'')
|
284
279
|
new_string << new_line
|
285
280
|
}
|
286
281
|
new_string.strip! unless indentfirstline
|
287
|
-
|
282
|
+
new_string
|
288
283
|
end
|
289
284
|
|
290
285
|
# == DEPRECATED
|
291
286
|
# See REXML::Formatters
|
292
287
|
#
|
293
288
|
def write( writer, indent=-1, transitive=false, ie_hack=false )
|
294
|
-
Kernel.warn("#{self.class.name}
|
289
|
+
Kernel.warn("#{self.class.name}#write is deprecated. See REXML::Formatters", uplevel: 1)
|
295
290
|
formatter = if indent > -1
|
296
291
|
REXML::Formatters::Pretty.new( indent )
|
297
292
|
else
|
@@ -303,9 +298,7 @@ module REXML
|
|
303
298
|
# FIXME
|
304
299
|
# This probably won't work properly
|
305
300
|
def xpath
|
306
|
-
|
307
|
-
path += "/text()"
|
308
|
-
return path
|
301
|
+
@parent.xpath + "/text()"
|
309
302
|
end
|
310
303
|
|
311
304
|
# Writes out text, substituting special characters beforehand.
|
@@ -391,11 +384,12 @@ module REXML
|
|
391
384
|
end
|
392
385
|
|
393
386
|
# Unescapes all possible entities
|
394
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
387
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
388
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
395
389
|
sum = 0
|
396
390
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
397
391
|
s = Text.expand($&, doctype, filter)
|
398
|
-
if sum + s.bytesize >
|
392
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
399
393
|
raise "entity expansion has grown too large"
|
400
394
|
else
|
401
395
|
sum += s.bytesize
|