rexml 3.3.2 → 3.3.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +37 -0
- data/lib/rexml/parsers/baseparser.rb +55 -33
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +18 -8
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e5e2317fb4a12cc855de221be85a9d62c2966c4997ead5a4ede3600561d5ede
|
4
|
+
data.tar.gz: a2b8f326e706211d00a9a8446b84ebd658c9cb82a4f7c98e5760ed2b10d8866c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d26167dc282f9ff928b263927a9f003bddb6591a938b43dfddcd8a2fe2c1ddb4f931f09ec52dd3bf1912953365dcaafafb359bdd6dba1f9ca33a55bbc62ec5b
|
7
|
+
data.tar.gz: b3216114c5978079b102a6492cd0d8afde5eaf0af5ebc803873dc7a9ad4dc9afa785000c923f296b88c3b5c663a543348f65a3734801149f792518a1bcb5844c
|
data/NEWS.md
CHANGED
@@ -1,5 +1,39 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for detecting invalid XML that has unsupported
|
8
|
+
content before root element
|
9
|
+
* GH-184
|
10
|
+
* Patch by NAITOH Jun.
|
11
|
+
|
12
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
13
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
14
|
+
parsers
|
15
|
+
* GH-187
|
16
|
+
* Patch by NAITOH Jun.
|
17
|
+
|
18
|
+
* Added more tests for invalid XMLs.
|
19
|
+
* GH-183
|
20
|
+
* Patch by Watson.
|
21
|
+
|
22
|
+
* Added more performance tests.
|
23
|
+
* Patch by Watson.
|
24
|
+
|
25
|
+
* Improved parse performance.
|
26
|
+
* GH-186
|
27
|
+
* Patch by tomoya ishida.
|
28
|
+
|
29
|
+
### Thanks
|
30
|
+
|
31
|
+
* NAITOH Jun
|
32
|
+
|
33
|
+
* Watson
|
34
|
+
|
35
|
+
* tomoya ishida
|
36
|
+
|
3
37
|
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
38
|
|
5
39
|
### Improvements
|
@@ -15,6 +49,9 @@
|
|
15
49
|
* GH-172
|
16
50
|
* GH-173
|
17
51
|
* GH-174
|
52
|
+
* GH-175
|
53
|
+
* GH-176
|
54
|
+
* GH-177
|
18
55
|
* Patch by Watson.
|
19
56
|
|
20
57
|
* Added support for raising a parse exception when an XML has extra
|
@@ -124,19 +124,10 @@ module REXML
|
|
124
124
|
}
|
125
125
|
|
126
126
|
module Private
|
127
|
-
# Terminal requires two or more letters.
|
128
|
-
INSTRUCTION_TERM = "?>"
|
129
|
-
COMMENT_TERM = "-->"
|
130
|
-
CDATA_TERM = "]]>"
|
131
|
-
DOCTYPE_TERM = "]>"
|
132
|
-
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
-
ENTITY_TERM = DOCTYPE_TERM
|
134
|
-
|
135
|
-
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
136
127
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
137
128
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
138
129
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
139
|
-
NAME_PATTERN =
|
130
|
+
NAME_PATTERN = /#{NAME}/um
|
140
131
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
141
132
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
142
133
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
@@ -154,6 +145,7 @@ module REXML
|
|
154
145
|
self.stream = source
|
155
146
|
@listeners = []
|
156
147
|
@prefixes = Set.new
|
148
|
+
@entity_expansion_count = 0
|
157
149
|
end
|
158
150
|
|
159
151
|
def add_listener( listener )
|
@@ -161,6 +153,7 @@ module REXML
|
|
161
153
|
end
|
162
154
|
|
163
155
|
attr_reader :source
|
156
|
+
attr_reader :entity_expansion_count
|
164
157
|
|
165
158
|
def stream=( source )
|
166
159
|
@source = SourceFactory.create_from( source )
|
@@ -248,10 +241,10 @@ module REXML
|
|
248
241
|
if @document_status == nil
|
249
242
|
start_position = @source.position
|
250
243
|
if @source.match("<?", true)
|
251
|
-
return process_instruction
|
244
|
+
return process_instruction
|
252
245
|
elsif @source.match("<!", true)
|
253
246
|
if @source.match("--", true)
|
254
|
-
md = @source.match(/(.*?)-->/um, true
|
247
|
+
md = @source.match(/(.*?)-->/um, true)
|
255
248
|
if md.nil?
|
256
249
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
250
|
end
|
@@ -318,7 +311,11 @@ module REXML
|
|
318
311
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
319
312
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
320
313
|
elsif @source.match("ENTITY", true)
|
321
|
-
|
314
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
315
|
+
unless match_data
|
316
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
317
|
+
end
|
318
|
+
match = [:entitydecl, *match_data.captures.compact]
|
322
319
|
ref = false
|
323
320
|
if match[1] == '%'
|
324
321
|
ref = true
|
@@ -383,14 +380,14 @@ module REXML
|
|
383
380
|
raise REXML::ParseException.new(message, @source)
|
384
381
|
end
|
385
382
|
return [:notationdecl, name, *id]
|
386
|
-
elsif md = @source.match(/--(.*?)-->/um, true
|
383
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
387
384
|
case md[1]
|
388
385
|
when /--/, /-\z/
|
389
386
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
387
|
end
|
391
388
|
return [ :comment, md[1] ] if md
|
392
389
|
end
|
393
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true
|
390
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
394
391
|
return [ :externalentity, match[1] ]
|
395
392
|
elsif @source.match(/\]\s*>/um, true)
|
396
393
|
@document_status = :after_doctype
|
@@ -430,7 +427,7 @@ module REXML
|
|
430
427
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
431
428
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
432
429
|
if md[0][0] == ?-
|
433
|
-
md = @source.match(/--(.*?)-->/um, true
|
430
|
+
md = @source.match(/--(.*?)-->/um, true)
|
434
431
|
|
435
432
|
if md.nil? || /--|-\z/.match?(md[1])
|
436
433
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -438,13 +435,13 @@ module REXML
|
|
438
435
|
|
439
436
|
return [ :comment, md[1] ]
|
440
437
|
else
|
441
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true
|
438
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
442
439
|
return [ :cdata, md[1] ] if md
|
443
440
|
end
|
444
441
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
445
442
|
"in the doctype declaration.", @source)
|
446
443
|
elsif @source.match("?", true)
|
447
|
-
return process_instruction
|
444
|
+
return process_instruction
|
448
445
|
else
|
449
446
|
# Get the next tag
|
450
447
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -482,11 +479,15 @@ module REXML
|
|
482
479
|
if text.chomp!("<")
|
483
480
|
@source.position -= "<".bytesize
|
484
481
|
end
|
485
|
-
if @tags.empty?
|
482
|
+
if @tags.empty?
|
486
483
|
unless /\A\s*\z/.match?(text)
|
487
|
-
|
484
|
+
if @have_root
|
485
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
486
|
+
else
|
487
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
488
|
+
end
|
488
489
|
end
|
489
|
-
return pull_event
|
490
|
+
return pull_event if @have_root
|
490
491
|
end
|
491
492
|
return [ :text, text ]
|
492
493
|
end
|
@@ -505,7 +506,9 @@ module REXML
|
|
505
506
|
def entity( reference, entities )
|
506
507
|
value = nil
|
507
508
|
value = entities[ reference ] if entities
|
508
|
-
if
|
509
|
+
if value
|
510
|
+
record_entity_expansion
|
511
|
+
else
|
509
512
|
value = DEFAULT_ENTITIES[ reference ]
|
510
513
|
value = value[2] if value
|
511
514
|
end
|
@@ -544,12 +547,17 @@ module REXML
|
|
544
547
|
}
|
545
548
|
matches.collect!{|x|x[0]}.compact!
|
546
549
|
if matches.size > 0
|
550
|
+
sum = 0
|
547
551
|
matches.each do |entity_reference|
|
548
552
|
unless filter and filter.include?(entity_reference)
|
549
553
|
entity_value = entity( entity_reference, entities )
|
550
554
|
if entity_value
|
551
555
|
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
552
556
|
rv.gsub!( re, entity_value )
|
557
|
+
sum += rv.bytesize
|
558
|
+
if sum > Security.entity_expansion_text_limit
|
559
|
+
raise "entity expansion has grown too large"
|
560
|
+
end
|
553
561
|
else
|
554
562
|
er = DEFAULT_ENTITIES[entity_reference]
|
555
563
|
rv.gsub!( er[0], er[2] ) if er
|
@@ -562,6 +570,14 @@ module REXML
|
|
562
570
|
end
|
563
571
|
|
564
572
|
private
|
573
|
+
|
574
|
+
def record_entity_expansion
|
575
|
+
@entity_expansion_count += 1
|
576
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
577
|
+
raise "number of entity expansions exceeded, processing aborted."
|
578
|
+
end
|
579
|
+
end
|
580
|
+
|
565
581
|
def need_source_encoding_update?(xml_declaration_encoding)
|
566
582
|
return false if xml_declaration_encoding.nil?
|
567
583
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -571,14 +587,14 @@ module REXML
|
|
571
587
|
def parse_name(base_error_message)
|
572
588
|
md = @source.match(Private::NAME_PATTERN, true)
|
573
589
|
unless md
|
574
|
-
if @source.match(/\
|
590
|
+
if @source.match(/\S/um)
|
575
591
|
message = "#{base_error_message}: invalid name"
|
576
592
|
else
|
577
593
|
message = "#{base_error_message}: name is missing"
|
578
594
|
end
|
579
595
|
raise REXML::ParseException.new(message, @source)
|
580
596
|
end
|
581
|
-
md[
|
597
|
+
md[0]
|
582
598
|
end
|
583
599
|
|
584
600
|
def parse_id(base_error_message,
|
@@ -647,18 +663,24 @@ module REXML
|
|
647
663
|
end
|
648
664
|
end
|
649
665
|
|
650
|
-
def process_instruction
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
666
|
+
def process_instruction
|
667
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
668
|
+
if @source.match(/\s+/um, true)
|
669
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
670
|
+
unless match_data
|
671
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
672
|
+
end
|
673
|
+
content = match_data[1]
|
674
|
+
else
|
675
|
+
content = nil
|
676
|
+
unless @source.match("?>", true)
|
677
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
678
|
+
end
|
656
679
|
end
|
657
|
-
if
|
680
|
+
if name == "xml"
|
658
681
|
if @document_status
|
659
682
|
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
683
|
end
|
661
|
-
content = match_data[2]
|
662
684
|
version = VERSION.match(content)
|
663
685
|
version = version[1] unless version.nil?
|
664
686
|
encoding = ENCODING.match(content)
|
@@ -673,7 +695,7 @@ module REXML
|
|
673
695
|
standalone = standalone[1] unless standalone.nil?
|
674
696
|
return [ :xmldecl, version, encoding, standalone ]
|
675
697
|
end
|
676
|
-
[:processing_instruction,
|
698
|
+
[:processing_instruction, name, content]
|
677
699
|
end
|
678
700
|
|
679
701
|
def parse_attributes(prefixes, curr_ns)
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -117,7 +117,7 @@ module REXML
|
|
117
117
|
def ensure_buffer
|
118
118
|
end
|
119
119
|
|
120
|
-
def match(pattern, cons=false
|
120
|
+
def match(pattern, cons=false)
|
121
121
|
if cons
|
122
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
123
|
else
|
@@ -204,10 +204,20 @@ module REXML
|
|
204
204
|
end
|
205
205
|
end
|
206
206
|
|
207
|
-
def read(term = nil)
|
207
|
+
def read(term = nil, min_bytes = 1)
|
208
208
|
term = encode(term) if term
|
209
209
|
begin
|
210
|
-
|
210
|
+
str = readline(term)
|
211
|
+
@scanner << str
|
212
|
+
read_bytes = str.bytesize
|
213
|
+
begin
|
214
|
+
while read_bytes < min_bytes
|
215
|
+
str = readline(term)
|
216
|
+
@scanner << str
|
217
|
+
read_bytes += str.bytesize
|
218
|
+
end
|
219
|
+
rescue IOError
|
220
|
+
end
|
211
221
|
true
|
212
222
|
rescue Exception, NameError
|
213
223
|
@source = nil
|
@@ -237,10 +247,9 @@ module REXML
|
|
237
247
|
read if @scanner.eos? && @source
|
238
248
|
end
|
239
249
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
def match( pattern, cons=false, term: nil )
|
250
|
+
def match( pattern, cons=false )
|
251
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
252
|
+
min_bytes = 1
|
244
253
|
while true
|
245
254
|
if cons
|
246
255
|
md = @scanner.scan(pattern)
|
@@ -250,7 +259,8 @@ module REXML
|
|
250
259
|
break if md
|
251
260
|
return nil if pattern.is_a?(String)
|
252
261
|
return nil if @source.nil?
|
253
|
-
return nil unless read(
|
262
|
+
return nil unless read(nil, min_bytes)
|
263
|
+
min_bytes *= 2
|
254
264
|
end
|
255
265
|
|
256
266
|
md.nil? ? nil : @scanner
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-08-01 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
118
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.3
|
120
120
|
rdoc_options:
|
121
121
|
- "--main"
|
122
122
|
- README.md
|