rexml 3.3.2 → 3.3.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +51 -0
- data/lib/rexml/parsers/baseparser.rb +56 -33
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +18 -8
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e47ba1209ca1ca2ae0584348378fcefe05de5dc277273d434a37d62e04c676b3
|
4
|
+
data.tar.gz: 867f9e01423f83063aac7c59e07670c88c20f527f676e28cdf9d098248293c56
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d87d9cd9384218f3a9bd65870cef99e057022c83bae434318daeab781444378ea830ce46ae20879954f2ae54a7a00cc54eac2839784b989612315ddef909c809
|
7
|
+
data.tar.gz: 1e61927c65b9a058626d0ab19c7f5af0d49169d896e76402e0152476cc772dabf41b8f7a135040b12f5c46eac933de8e60d21fdea8388ed7342be8cc6f9114e9
|
data/NEWS.md
CHANGED
@@ -1,5 +1,53 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.4 - 2024-08-01 {#version-3-3-4}
|
4
|
+
|
5
|
+
### Fixes
|
6
|
+
|
7
|
+
* Fixed a bug that `REXML::Security` isn't defined when
|
8
|
+
`REXML::Parsers::StreamParser` is used and
|
9
|
+
`rexml/parsers/streamparser` is only required.
|
10
|
+
* GH-189
|
11
|
+
* Patch by takuya kodama.
|
12
|
+
|
13
|
+
### Thanks
|
14
|
+
|
15
|
+
* takuya kodama
|
16
|
+
|
17
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
18
|
+
|
19
|
+
### Improvements
|
20
|
+
|
21
|
+
* Added support for detecting invalid XML that has unsupported
|
22
|
+
content before root element
|
23
|
+
* GH-184
|
24
|
+
* Patch by NAITOH Jun.
|
25
|
+
|
26
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
27
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
28
|
+
parsers
|
29
|
+
* GH-187
|
30
|
+
* Patch by NAITOH Jun.
|
31
|
+
|
32
|
+
* Added more tests for invalid XMLs.
|
33
|
+
* GH-183
|
34
|
+
* Patch by Watson.
|
35
|
+
|
36
|
+
* Added more performance tests.
|
37
|
+
* Patch by Watson.
|
38
|
+
|
39
|
+
* Improved parse performance.
|
40
|
+
* GH-186
|
41
|
+
* Patch by tomoya ishida.
|
42
|
+
|
43
|
+
### Thanks
|
44
|
+
|
45
|
+
* NAITOH Jun
|
46
|
+
|
47
|
+
* Watson
|
48
|
+
|
49
|
+
* tomoya ishida
|
50
|
+
|
3
51
|
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
52
|
|
5
53
|
### Improvements
|
@@ -15,6 +63,9 @@
|
|
15
63
|
* GH-172
|
16
64
|
* GH-173
|
17
65
|
* GH-174
|
66
|
+
* GH-175
|
67
|
+
* GH-176
|
68
|
+
* GH-177
|
18
69
|
* Patch by Watson.
|
19
70
|
|
20
71
|
* Added support for raising a parse exception when an XML has extra
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
@@ -124,19 +125,10 @@ module REXML
|
|
124
125
|
}
|
125
126
|
|
126
127
|
module Private
|
127
|
-
# Terminal requires two or more letters.
|
128
|
-
INSTRUCTION_TERM = "?>"
|
129
|
-
COMMENT_TERM = "-->"
|
130
|
-
CDATA_TERM = "]]>"
|
131
|
-
DOCTYPE_TERM = "]>"
|
132
|
-
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
-
ENTITY_TERM = DOCTYPE_TERM
|
134
|
-
|
135
|
-
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
136
128
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
137
129
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
138
130
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
139
|
-
NAME_PATTERN =
|
131
|
+
NAME_PATTERN = /#{NAME}/um
|
140
132
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
141
133
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
142
134
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
@@ -154,6 +146,7 @@ module REXML
|
|
154
146
|
self.stream = source
|
155
147
|
@listeners = []
|
156
148
|
@prefixes = Set.new
|
149
|
+
@entity_expansion_count = 0
|
157
150
|
end
|
158
151
|
|
159
152
|
def add_listener( listener )
|
@@ -161,6 +154,7 @@ module REXML
|
|
161
154
|
end
|
162
155
|
|
163
156
|
attr_reader :source
|
157
|
+
attr_reader :entity_expansion_count
|
164
158
|
|
165
159
|
def stream=( source )
|
166
160
|
@source = SourceFactory.create_from( source )
|
@@ -248,10 +242,10 @@ module REXML
|
|
248
242
|
if @document_status == nil
|
249
243
|
start_position = @source.position
|
250
244
|
if @source.match("<?", true)
|
251
|
-
return process_instruction
|
245
|
+
return process_instruction
|
252
246
|
elsif @source.match("<!", true)
|
253
247
|
if @source.match("--", true)
|
254
|
-
md = @source.match(/(.*?)-->/um, true
|
248
|
+
md = @source.match(/(.*?)-->/um, true)
|
255
249
|
if md.nil?
|
256
250
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
251
|
end
|
@@ -318,7 +312,11 @@ module REXML
|
|
318
312
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
319
313
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
320
314
|
elsif @source.match("ENTITY", true)
|
321
|
-
|
315
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
316
|
+
unless match_data
|
317
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
318
|
+
end
|
319
|
+
match = [:entitydecl, *match_data.captures.compact]
|
322
320
|
ref = false
|
323
321
|
if match[1] == '%'
|
324
322
|
ref = true
|
@@ -383,14 +381,14 @@ module REXML
|
|
383
381
|
raise REXML::ParseException.new(message, @source)
|
384
382
|
end
|
385
383
|
return [:notationdecl, name, *id]
|
386
|
-
elsif md = @source.match(/--(.*?)-->/um, true
|
384
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
387
385
|
case md[1]
|
388
386
|
when /--/, /-\z/
|
389
387
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
388
|
end
|
391
389
|
return [ :comment, md[1] ] if md
|
392
390
|
end
|
393
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true
|
391
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
394
392
|
return [ :externalentity, match[1] ]
|
395
393
|
elsif @source.match(/\]\s*>/um, true)
|
396
394
|
@document_status = :after_doctype
|
@@ -430,7 +428,7 @@ module REXML
|
|
430
428
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
431
429
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
432
430
|
if md[0][0] == ?-
|
433
|
-
md = @source.match(/--(.*?)-->/um, true
|
431
|
+
md = @source.match(/--(.*?)-->/um, true)
|
434
432
|
|
435
433
|
if md.nil? || /--|-\z/.match?(md[1])
|
436
434
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -438,13 +436,13 @@ module REXML
|
|
438
436
|
|
439
437
|
return [ :comment, md[1] ]
|
440
438
|
else
|
441
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true
|
439
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
442
440
|
return [ :cdata, md[1] ] if md
|
443
441
|
end
|
444
442
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
445
443
|
"in the doctype declaration.", @source)
|
446
444
|
elsif @source.match("?", true)
|
447
|
-
return process_instruction
|
445
|
+
return process_instruction
|
448
446
|
else
|
449
447
|
# Get the next tag
|
450
448
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -482,11 +480,15 @@ module REXML
|
|
482
480
|
if text.chomp!("<")
|
483
481
|
@source.position -= "<".bytesize
|
484
482
|
end
|
485
|
-
if @tags.empty?
|
483
|
+
if @tags.empty?
|
486
484
|
unless /\A\s*\z/.match?(text)
|
487
|
-
|
485
|
+
if @have_root
|
486
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
487
|
+
else
|
488
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
489
|
+
end
|
488
490
|
end
|
489
|
-
return pull_event
|
491
|
+
return pull_event if @have_root
|
490
492
|
end
|
491
493
|
return [ :text, text ]
|
492
494
|
end
|
@@ -505,7 +507,9 @@ module REXML
|
|
505
507
|
def entity( reference, entities )
|
506
508
|
value = nil
|
507
509
|
value = entities[ reference ] if entities
|
508
|
-
if
|
510
|
+
if value
|
511
|
+
record_entity_expansion
|
512
|
+
else
|
509
513
|
value = DEFAULT_ENTITIES[ reference ]
|
510
514
|
value = value[2] if value
|
511
515
|
end
|
@@ -544,12 +548,17 @@ module REXML
|
|
544
548
|
}
|
545
549
|
matches.collect!{|x|x[0]}.compact!
|
546
550
|
if matches.size > 0
|
551
|
+
sum = 0
|
547
552
|
matches.each do |entity_reference|
|
548
553
|
unless filter and filter.include?(entity_reference)
|
549
554
|
entity_value = entity( entity_reference, entities )
|
550
555
|
if entity_value
|
551
556
|
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
552
557
|
rv.gsub!( re, entity_value )
|
558
|
+
sum += rv.bytesize
|
559
|
+
if sum > Security.entity_expansion_text_limit
|
560
|
+
raise "entity expansion has grown too large"
|
561
|
+
end
|
553
562
|
else
|
554
563
|
er = DEFAULT_ENTITIES[entity_reference]
|
555
564
|
rv.gsub!( er[0], er[2] ) if er
|
@@ -562,6 +571,14 @@ module REXML
|
|
562
571
|
end
|
563
572
|
|
564
573
|
private
|
574
|
+
|
575
|
+
def record_entity_expansion
|
576
|
+
@entity_expansion_count += 1
|
577
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
578
|
+
raise "number of entity expansions exceeded, processing aborted."
|
579
|
+
end
|
580
|
+
end
|
581
|
+
|
565
582
|
def need_source_encoding_update?(xml_declaration_encoding)
|
566
583
|
return false if xml_declaration_encoding.nil?
|
567
584
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -571,14 +588,14 @@ module REXML
|
|
571
588
|
def parse_name(base_error_message)
|
572
589
|
md = @source.match(Private::NAME_PATTERN, true)
|
573
590
|
unless md
|
574
|
-
if @source.match(/\
|
591
|
+
if @source.match(/\S/um)
|
575
592
|
message = "#{base_error_message}: invalid name"
|
576
593
|
else
|
577
594
|
message = "#{base_error_message}: name is missing"
|
578
595
|
end
|
579
596
|
raise REXML::ParseException.new(message, @source)
|
580
597
|
end
|
581
|
-
md[
|
598
|
+
md[0]
|
582
599
|
end
|
583
600
|
|
584
601
|
def parse_id(base_error_message,
|
@@ -647,18 +664,24 @@ module REXML
|
|
647
664
|
end
|
648
665
|
end
|
649
666
|
|
650
|
-
def process_instruction
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
667
|
+
def process_instruction
|
668
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
669
|
+
if @source.match(/\s+/um, true)
|
670
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
671
|
+
unless match_data
|
672
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
673
|
+
end
|
674
|
+
content = match_data[1]
|
675
|
+
else
|
676
|
+
content = nil
|
677
|
+
unless @source.match("?>", true)
|
678
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
679
|
+
end
|
656
680
|
end
|
657
|
-
if
|
681
|
+
if name == "xml"
|
658
682
|
if @document_status
|
659
683
|
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
684
|
end
|
661
|
-
content = match_data[2]
|
662
685
|
version = VERSION.match(content)
|
663
686
|
version = version[1] unless version.nil?
|
664
687
|
encoding = ENCODING.match(content)
|
@@ -673,7 +696,7 @@ module REXML
|
|
673
696
|
standalone = standalone[1] unless standalone.nil?
|
674
697
|
return [ :xmldecl, version, encoding, standalone ]
|
675
698
|
end
|
676
|
-
[:processing_instruction,
|
699
|
+
[:processing_instruction, name, content]
|
677
700
|
end
|
678
701
|
|
679
702
|
def parse_attributes(prefixes, curr_ns)
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -117,7 +117,7 @@ module REXML
|
|
117
117
|
def ensure_buffer
|
118
118
|
end
|
119
119
|
|
120
|
-
def match(pattern, cons=false
|
120
|
+
def match(pattern, cons=false)
|
121
121
|
if cons
|
122
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
123
|
else
|
@@ -204,10 +204,20 @@ module REXML
|
|
204
204
|
end
|
205
205
|
end
|
206
206
|
|
207
|
-
def read(term = nil)
|
207
|
+
def read(term = nil, min_bytes = 1)
|
208
208
|
term = encode(term) if term
|
209
209
|
begin
|
210
|
-
|
210
|
+
str = readline(term)
|
211
|
+
@scanner << str
|
212
|
+
read_bytes = str.bytesize
|
213
|
+
begin
|
214
|
+
while read_bytes < min_bytes
|
215
|
+
str = readline(term)
|
216
|
+
@scanner << str
|
217
|
+
read_bytes += str.bytesize
|
218
|
+
end
|
219
|
+
rescue IOError
|
220
|
+
end
|
211
221
|
true
|
212
222
|
rescue Exception, NameError
|
213
223
|
@source = nil
|
@@ -237,10 +247,9 @@ module REXML
|
|
237
247
|
read if @scanner.eos? && @source
|
238
248
|
end
|
239
249
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
def match( pattern, cons=false, term: nil )
|
250
|
+
def match( pattern, cons=false )
|
251
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
252
|
+
min_bytes = 1
|
244
253
|
while true
|
245
254
|
if cons
|
246
255
|
md = @scanner.scan(pattern)
|
@@ -250,7 +259,8 @@ module REXML
|
|
250
259
|
break if md
|
251
260
|
return nil if pattern.is_a?(String)
|
252
261
|
return nil if @source.nil?
|
253
|
-
return nil unless read(
|
262
|
+
return nil unless read(nil, min_bytes)
|
263
|
+
min_bytes *= 2
|
254
264
|
end
|
255
265
|
|
256
266
|
md.nil? ? nil : @scanner
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-08-01 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
118
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.4
|
120
120
|
rdoc_options:
|
121
121
|
- "--main"
|
122
122
|
- README.md
|