rexml 3.3.2 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +200 -0
- data/lib/rexml/attribute.rb +3 -2
- data/lib/rexml/document.rb +5 -1
- data/lib/rexml/element.rb +14 -16
- data/lib/rexml/entity.rb +9 -48
- data/lib/rexml/parsers/baseparser.rb +206 -101
- data/lib/rexml/parsers/pullparser.rb +16 -0
- data/lib/rexml/parsers/sax2parser.rb +14 -0
- data/lib/rexml/parsers/streamparser.rb +15 -9
- data/lib/rexml/parsers/treeparser.rb +0 -7
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +63 -12
- data/lib/rexml/text.rb +20 -43
- metadata +8 -19
@@ -22,6 +22,18 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
29
|
+
def entity_expansion_limit=( limit )
|
30
|
+
@parser.entity_expansion_limit = limit
|
31
|
+
end
|
32
|
+
|
33
|
+
def entity_expansion_text_limit=( limit )
|
34
|
+
@parser.entity_expansion_text_limit = limit
|
35
|
+
end
|
36
|
+
|
25
37
|
def add_listener( listener )
|
26
38
|
@parser.add_listener( listener )
|
27
39
|
end
|
@@ -247,6 +259,8 @@ module REXML
|
|
247
259
|
end
|
248
260
|
|
249
261
|
def get_namespace( prefix )
|
262
|
+
return nil if @namespace_stack.empty?
|
263
|
+
|
250
264
|
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
|
251
265
|
(@namespace_stack.find { |ns| not ns[nil].nil? })
|
252
266
|
uris[-1][prefix] unless uris.nil? or 0 == uris.size
|
@@ -7,36 +7,41 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
21
|
+
def entity_expansion_limit=( limit )
|
22
|
+
@parser.entity_expansion_limit = limit
|
23
|
+
end
|
24
|
+
|
25
|
+
def entity_expansion_text_limit=( limit )
|
26
|
+
@parser.entity_expansion_text_limit = limit
|
27
|
+
end
|
28
|
+
|
17
29
|
def parse
|
18
30
|
# entity string
|
19
31
|
while true
|
20
32
|
event = @parser.pull
|
21
33
|
case event[0]
|
22
34
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
35
|
return
|
29
36
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
37
|
attrs = event[2].each do |n, v|
|
32
38
|
event[2][n] = @parser.unnormalize( v )
|
33
39
|
end
|
34
40
|
@listener.tag_start( event[1], attrs )
|
35
41
|
when :end_element
|
36
42
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
43
|
when :text
|
39
|
-
unnormalized = @parser.unnormalize( event[1] )
|
44
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
40
45
|
@listener.text( unnormalized )
|
41
46
|
when :processing_instruction
|
42
47
|
@listener.instruction( *event[1,2] )
|
@@ -48,6 +53,7 @@ module REXML
|
|
48
53
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
54
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
55
|
when :entitydecl, :notationdecl
|
56
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
57
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
58
|
when :externalentity
|
53
59
|
entity_reference = event[1]
|
@@ -15,7 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
18
|
entities = nil
|
20
19
|
begin
|
21
20
|
while true
|
@@ -23,19 +22,13 @@ module REXML
|
|
23
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
24
23
|
case event[0]
|
25
24
|
when :end_document
|
26
|
-
unless tag_stack.empty?
|
27
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
28
|
-
@parser.source, @parser)
|
29
|
-
end
|
30
25
|
return
|
31
26
|
when :start_element
|
32
|
-
tag_stack.push(event[1])
|
33
27
|
el = @build_context = @build_context.add_element( event[1] )
|
34
28
|
event[2].each do |key, value|
|
35
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
36
30
|
end
|
37
31
|
when :end_element
|
38
|
-
tag_stack.pop
|
39
32
|
@build_context = @build_context.parent
|
40
33
|
when :text
|
41
34
|
if @build_context[-1].instance_of? Text
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
3
|
|
4
|
+
require "stringio"
|
4
5
|
require "strscan"
|
5
6
|
|
6
7
|
require_relative 'encoding'
|
@@ -18,6 +19,16 @@ module REXML
|
|
18
19
|
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
20
|
super(pattern)
|
20
21
|
end
|
22
|
+
|
23
|
+
def match?(pattern)
|
24
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
25
|
+
super(pattern)
|
26
|
+
end
|
27
|
+
|
28
|
+
def skip(pattern)
|
29
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
30
|
+
super(pattern)
|
31
|
+
end
|
21
32
|
end
|
22
33
|
end
|
23
34
|
using StringScannerCheckScanString
|
@@ -35,7 +46,6 @@ module REXML
|
|
35
46
|
arg.respond_to? :eof?
|
36
47
|
IOSource.new(arg)
|
37
48
|
elsif arg.respond_to? :to_str
|
38
|
-
require 'stringio'
|
39
49
|
IOSource.new(StringIO.new(arg))
|
40
50
|
elsif arg.kind_of? Source
|
41
51
|
arg
|
@@ -77,6 +87,7 @@ module REXML
|
|
77
87
|
detect_encoding
|
78
88
|
end
|
79
89
|
@line = 0
|
90
|
+
@encoded_terms = {}
|
80
91
|
end
|
81
92
|
|
82
93
|
# The current buffer (what we're going to read next)
|
@@ -117,7 +128,7 @@ module REXML
|
|
117
128
|
def ensure_buffer
|
118
129
|
end
|
119
130
|
|
120
|
-
def match(pattern, cons=false
|
131
|
+
def match(pattern, cons=false)
|
121
132
|
if cons
|
122
133
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
134
|
else
|
@@ -125,6 +136,14 @@ module REXML
|
|
125
136
|
end
|
126
137
|
end
|
127
138
|
|
139
|
+
def match?(pattern, cons=false)
|
140
|
+
if cons
|
141
|
+
!@scanner.skip(pattern).nil?
|
142
|
+
else
|
143
|
+
!@scanner.match?(pattern).nil?
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
128
147
|
def position
|
129
148
|
@scanner.pos
|
130
149
|
end
|
@@ -204,10 +223,20 @@ module REXML
|
|
204
223
|
end
|
205
224
|
end
|
206
225
|
|
207
|
-
def read(term = nil)
|
226
|
+
def read(term = nil, min_bytes = 1)
|
208
227
|
term = encode(term) if term
|
209
228
|
begin
|
210
|
-
|
229
|
+
str = readline(term)
|
230
|
+
@scanner << str
|
231
|
+
read_bytes = str.bytesize
|
232
|
+
begin
|
233
|
+
while read_bytes < min_bytes
|
234
|
+
str = readline(term)
|
235
|
+
@scanner << str
|
236
|
+
read_bytes += str.bytesize
|
237
|
+
end
|
238
|
+
rescue IOError
|
239
|
+
end
|
211
240
|
true
|
212
241
|
rescue Exception, NameError
|
213
242
|
@source = nil
|
@@ -217,7 +246,7 @@ module REXML
|
|
217
246
|
|
218
247
|
def read_until(term)
|
219
248
|
pattern = Private::PRE_DEFINED_TERM_PATTERNS[term] || /#{Regexp.escape(term)}/
|
220
|
-
term = encode(term)
|
249
|
+
term = @encoded_terms[term] ||= encode(term)
|
221
250
|
until str = @scanner.scan_until(pattern)
|
222
251
|
break if @source.nil?
|
223
252
|
break if @source.eof?
|
@@ -237,10 +266,9 @@ module REXML
|
|
237
266
|
read if @scanner.eos? && @source
|
238
267
|
end
|
239
268
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
def match( pattern, cons=false, term: nil )
|
269
|
+
def match( pattern, cons=false )
|
270
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
271
|
+
min_bytes = 1
|
244
272
|
while true
|
245
273
|
if cons
|
246
274
|
md = @scanner.scan(pattern)
|
@@ -250,12 +278,30 @@ module REXML
|
|
250
278
|
break if md
|
251
279
|
return nil if pattern.is_a?(String)
|
252
280
|
return nil if @source.nil?
|
253
|
-
return nil unless read(
|
281
|
+
return nil unless read(nil, min_bytes)
|
282
|
+
min_bytes *= 2
|
254
283
|
end
|
255
284
|
|
256
285
|
md.nil? ? nil : @scanner
|
257
286
|
end
|
258
287
|
|
288
|
+
def match?( pattern, cons=false )
|
289
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
290
|
+
min_bytes = 1
|
291
|
+
while true
|
292
|
+
if cons
|
293
|
+
n_matched_bytes = @scanner.skip(pattern)
|
294
|
+
else
|
295
|
+
n_matched_bytes = @scanner.match?(pattern)
|
296
|
+
end
|
297
|
+
return true if n_matched_bytes
|
298
|
+
return false if pattern.is_a?(String)
|
299
|
+
return false if @source.nil?
|
300
|
+
return false unless read(nil, min_bytes)
|
301
|
+
min_bytes *= 2
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
259
305
|
def empty?
|
260
306
|
super and ( @source.nil? || @source.eof? )
|
261
307
|
end
|
@@ -275,7 +321,7 @@ module REXML
|
|
275
321
|
rescue
|
276
322
|
end
|
277
323
|
@er_source.seek(pos)
|
278
|
-
rescue IOError
|
324
|
+
rescue IOError, SystemCallError
|
279
325
|
pos = -1
|
280
326
|
line = -1
|
281
327
|
end
|
@@ -284,14 +330,19 @@ module REXML
|
|
284
330
|
|
285
331
|
private
|
286
332
|
def readline(term = nil)
|
287
|
-
str = @source.readline(term || @line_break)
|
288
333
|
if @pending_buffer
|
334
|
+
begin
|
335
|
+
str = @source.readline(term || @line_break)
|
336
|
+
rescue IOError
|
337
|
+
end
|
289
338
|
if str.nil?
|
290
339
|
str = @pending_buffer
|
291
340
|
else
|
292
341
|
str = @pending_buffer + str
|
293
342
|
end
|
294
343
|
@pending_buffer = nil
|
344
|
+
else
|
345
|
+
str = @source.readline(term || @line_break)
|
295
346
|
end
|
296
347
|
return nil if str.nil?
|
297
348
|
|
data/lib/rexml/text.rb
CHANGED
@@ -29,31 +29,16 @@ module REXML
|
|
29
29
|
(0x10000..0x10FFFF)
|
30
30
|
]
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
']*$')
|
43
|
-
else
|
44
|
-
VALID_XML_CHARS = /^(
|
45
|
-
[\x09\x0A\x0D\x20-\x7E] # ASCII
|
46
|
-
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
47
|
-
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
48
|
-
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
|
49
|
-
| \xEF[\x80-\xBE]{2} #
|
50
|
-
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
|
51
|
-
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
52
|
-
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
53
|
-
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
54
|
-
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
55
|
-
)*$/nx;
|
56
|
-
end
|
32
|
+
VALID_XML_CHARS = Regexp.new('^['+
|
33
|
+
VALID_CHAR.map { |item|
|
34
|
+
case item
|
35
|
+
when Integer
|
36
|
+
[item].pack('U').force_encoding('utf-8')
|
37
|
+
when Range
|
38
|
+
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
|
39
|
+
end
|
40
|
+
}.join +
|
41
|
+
']*$')
|
57
42
|
|
58
43
|
# Constructor
|
59
44
|
# +arg+ if a String, the content is set to the String. If a Text,
|
@@ -132,21 +117,11 @@ module REXML
|
|
132
117
|
|
133
118
|
# illegal anywhere
|
134
119
|
if !string.match?(VALID_XML_CHARS)
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
141
|
-
end
|
142
|
-
end
|
143
|
-
else
|
144
|
-
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/n) do |c|
|
145
|
-
case c.unpack('U')
|
146
|
-
when *VALID_CHAR
|
147
|
-
else
|
148
|
-
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
149
|
-
end
|
120
|
+
string.chars.each do |c|
|
121
|
+
case c.ord
|
122
|
+
when *VALID_CHAR
|
123
|
+
else
|
124
|
+
raise "Illegal character #{c.inspect} in raw string #{string.inspect}"
|
150
125
|
end
|
151
126
|
end
|
152
127
|
end
|
@@ -268,7 +243,8 @@ module REXML
|
|
268
243
|
# u = Text.new( "sean russell", false, nil, true )
|
269
244
|
# u.value #-> "sean russell"
|
270
245
|
def value
|
271
|
-
@unnormalized ||= Text::unnormalize(
|
246
|
+
@unnormalized ||= Text::unnormalize(@string, doctype,
|
247
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
272
248
|
end
|
273
249
|
|
274
250
|
# Sets the contents of this text node. This expects the text to be
|
@@ -411,11 +387,12 @@ module REXML
|
|
411
387
|
end
|
412
388
|
|
413
389
|
# Unescapes all possible entities
|
414
|
-
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
|
390
|
+
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil, entity_expansion_text_limit: nil )
|
391
|
+
entity_expansion_text_limit ||= Security.entity_expansion_text_limit
|
415
392
|
sum = 0
|
416
393
|
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) {
|
417
394
|
s = Text.expand($&, doctype, filter)
|
418
|
-
if sum + s.bytesize >
|
395
|
+
if sum + s.bytesize > entity_expansion_text_limit
|
419
396
|
raise "entity expansion has grown too large"
|
420
397
|
else
|
421
398
|
sum += s.bytesize
|
metadata
CHANGED
@@ -1,28 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
|
+
autorequire:
|
8
9
|
bindir: bin
|
9
10
|
cert_chain: []
|
10
|
-
date: 2024-
|
11
|
-
dependencies:
|
12
|
-
- !ruby/object:Gem::Dependency
|
13
|
-
name: strscan
|
14
|
-
requirement: !ruby/object:Gem::Requirement
|
15
|
-
requirements:
|
16
|
-
- - ">="
|
17
|
-
- !ruby/object:Gem::Version
|
18
|
-
version: '0'
|
19
|
-
type: :runtime
|
20
|
-
prerelease: false
|
21
|
-
version_requirements: !ruby/object:Gem::Requirement
|
22
|
-
requirements:
|
23
|
-
- - ">="
|
24
|
-
- !ruby/object:Gem::Version
|
25
|
-
version: '0'
|
11
|
+
date: 2024-12-15 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
26
13
|
description: An XML toolkit for Ruby
|
27
14
|
email:
|
28
15
|
- kou@cozmixng.org
|
@@ -116,7 +103,8 @@ homepage: https://github.com/ruby/rexml
|
|
116
103
|
licenses:
|
117
104
|
- BSD-2-Clause
|
118
105
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.
|
106
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.4.0
|
107
|
+
post_install_message:
|
120
108
|
rdoc_options:
|
121
109
|
- "--main"
|
122
110
|
- README.md
|
@@ -133,7 +121,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
121
|
- !ruby/object:Gem::Version
|
134
122
|
version: '0'
|
135
123
|
requirements: []
|
136
|
-
rubygems_version: 3.
|
124
|
+
rubygems_version: 3.5.22
|
125
|
+
signing_key:
|
137
126
|
specification_version: 4
|
138
127
|
summary: An XML toolkit for Ruby
|
139
128
|
test_files: []
|