rexml 3.3.2 → 3.3.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +68 -0
- data/lib/rexml/parsers/baseparser.rb +54 -33
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +4 -0
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +18 -8
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e2ee370ff6c1ab70149f6743a12ddf1eeae2c2af3c20f8cb7c6e56ff9699eec
|
4
|
+
data.tar.gz: 158254197a12b1038b9b5e116c9abc89a329ef97acda8031399a56d3aee45fe9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b805e28e50ef71bbc5d0349fdd4ec57ec4811bba94fe4c3f8aa17bedb81971da48e98205c53a8eadd18f07b69a2f68c8200529d546aef4187f9f3e903670857
|
7
|
+
data.tar.gz: df3e369135f9b156475772a77702a91d45b8ee64ad49f608b2b33dc63d7b07dd271d7ac458d0b5e944e613798a0940231282997a747c4838e3e5c3afaf60253b
|
data/NEWS.md
CHANGED
@@ -1,5 +1,70 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.5 - 2024-08-12 {#version-3-3-5}
|
4
|
+
|
5
|
+
### Fixes
|
6
|
+
|
7
|
+
* Fixed a bug that `REXML::Security.entity_expansion_text_limit`
|
8
|
+
check has wrong text size calculation in SAX and pull parsers.
|
9
|
+
* GH-193
|
10
|
+
* GH-195
|
11
|
+
* Reported by Viktor Ivarsson.
|
12
|
+
* Patch by NAITOH Jun.
|
13
|
+
|
14
|
+
### Thanks
|
15
|
+
|
16
|
+
* Viktor Ivarsson
|
17
|
+
|
18
|
+
* NAITOH Jun
|
19
|
+
|
20
|
+
## 3.3.4 - 2024-08-01 {#version-3-3-4}
|
21
|
+
|
22
|
+
### Fixes
|
23
|
+
|
24
|
+
* Fixed a bug that `REXML::Security` isn't defined when
|
25
|
+
`REXML::Parsers::StreamParser` is used and
|
26
|
+
`rexml/parsers/streamparser` is only required.
|
27
|
+
* GH-189
|
28
|
+
* Patch by takuya kodama.
|
29
|
+
|
30
|
+
### Thanks
|
31
|
+
|
32
|
+
* takuya kodama
|
33
|
+
|
34
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
35
|
+
|
36
|
+
### Improvements
|
37
|
+
|
38
|
+
* Added support for detecting invalid XML that has unsupported
|
39
|
+
content before root element
|
40
|
+
* GH-184
|
41
|
+
* Patch by NAITOH Jun.
|
42
|
+
|
43
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
44
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
45
|
+
parsers
|
46
|
+
* GH-187
|
47
|
+
* Patch by NAITOH Jun.
|
48
|
+
|
49
|
+
* Added more tests for invalid XMLs.
|
50
|
+
* GH-183
|
51
|
+
* Patch by Watson.
|
52
|
+
|
53
|
+
* Added more performance tests.
|
54
|
+
* Patch by Watson.
|
55
|
+
|
56
|
+
* Improved parse performance.
|
57
|
+
* GH-186
|
58
|
+
* Patch by tomoya ishida.
|
59
|
+
|
60
|
+
### Thanks
|
61
|
+
|
62
|
+
* NAITOH Jun
|
63
|
+
|
64
|
+
* Watson
|
65
|
+
|
66
|
+
* tomoya ishida
|
67
|
+
|
3
68
|
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
69
|
|
5
70
|
### Improvements
|
@@ -15,6 +80,9 @@
|
|
15
80
|
* GH-172
|
16
81
|
* GH-173
|
17
82
|
* GH-174
|
83
|
+
* GH-175
|
84
|
+
* GH-176
|
85
|
+
* GH-177
|
18
86
|
* Patch by Watson.
|
19
87
|
|
20
88
|
* Added support for raising a parse exception when an XML has extra
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
@@ -124,19 +125,10 @@ module REXML
|
|
124
125
|
}
|
125
126
|
|
126
127
|
module Private
|
127
|
-
# Terminal requires two or more letters.
|
128
|
-
INSTRUCTION_TERM = "?>"
|
129
|
-
COMMENT_TERM = "-->"
|
130
|
-
CDATA_TERM = "]]>"
|
131
|
-
DOCTYPE_TERM = "]>"
|
132
|
-
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
-
ENTITY_TERM = DOCTYPE_TERM
|
134
|
-
|
135
|
-
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
136
128
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
137
129
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
138
130
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
139
|
-
NAME_PATTERN =
|
131
|
+
NAME_PATTERN = /#{NAME}/um
|
140
132
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
141
133
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
142
134
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
@@ -154,6 +146,7 @@ module REXML
|
|
154
146
|
self.stream = source
|
155
147
|
@listeners = []
|
156
148
|
@prefixes = Set.new
|
149
|
+
@entity_expansion_count = 0
|
157
150
|
end
|
158
151
|
|
159
152
|
def add_listener( listener )
|
@@ -161,6 +154,7 @@ module REXML
|
|
161
154
|
end
|
162
155
|
|
163
156
|
attr_reader :source
|
157
|
+
attr_reader :entity_expansion_count
|
164
158
|
|
165
159
|
def stream=( source )
|
166
160
|
@source = SourceFactory.create_from( source )
|
@@ -248,10 +242,10 @@ module REXML
|
|
248
242
|
if @document_status == nil
|
249
243
|
start_position = @source.position
|
250
244
|
if @source.match("<?", true)
|
251
|
-
return process_instruction
|
245
|
+
return process_instruction
|
252
246
|
elsif @source.match("<!", true)
|
253
247
|
if @source.match("--", true)
|
254
|
-
md = @source.match(/(.*?)-->/um, true
|
248
|
+
md = @source.match(/(.*?)-->/um, true)
|
255
249
|
if md.nil?
|
256
250
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
251
|
end
|
@@ -318,7 +312,11 @@ module REXML
|
|
318
312
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
319
313
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
320
314
|
elsif @source.match("ENTITY", true)
|
321
|
-
|
315
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
316
|
+
unless match_data
|
317
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
318
|
+
end
|
319
|
+
match = [:entitydecl, *match_data.captures.compact]
|
322
320
|
ref = false
|
323
321
|
if match[1] == '%'
|
324
322
|
ref = true
|
@@ -383,14 +381,14 @@ module REXML
|
|
383
381
|
raise REXML::ParseException.new(message, @source)
|
384
382
|
end
|
385
383
|
return [:notationdecl, name, *id]
|
386
|
-
elsif md = @source.match(/--(.*?)-->/um, true
|
384
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
387
385
|
case md[1]
|
388
386
|
when /--/, /-\z/
|
389
387
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
388
|
end
|
391
389
|
return [ :comment, md[1] ] if md
|
392
390
|
end
|
393
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true
|
391
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
394
392
|
return [ :externalentity, match[1] ]
|
395
393
|
elsif @source.match(/\]\s*>/um, true)
|
396
394
|
@document_status = :after_doctype
|
@@ -430,7 +428,7 @@ module REXML
|
|
430
428
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
431
429
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
432
430
|
if md[0][0] == ?-
|
433
|
-
md = @source.match(/--(.*?)-->/um, true
|
431
|
+
md = @source.match(/--(.*?)-->/um, true)
|
434
432
|
|
435
433
|
if md.nil? || /--|-\z/.match?(md[1])
|
436
434
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -438,13 +436,13 @@ module REXML
|
|
438
436
|
|
439
437
|
return [ :comment, md[1] ]
|
440
438
|
else
|
441
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true
|
439
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
442
440
|
return [ :cdata, md[1] ] if md
|
443
441
|
end
|
444
442
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
445
443
|
"in the doctype declaration.", @source)
|
446
444
|
elsif @source.match("?", true)
|
447
|
-
return process_instruction
|
445
|
+
return process_instruction
|
448
446
|
else
|
449
447
|
# Get the next tag
|
450
448
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -482,11 +480,15 @@ module REXML
|
|
482
480
|
if text.chomp!("<")
|
483
481
|
@source.position -= "<".bytesize
|
484
482
|
end
|
485
|
-
if @tags.empty?
|
483
|
+
if @tags.empty?
|
486
484
|
unless /\A\s*\z/.match?(text)
|
487
|
-
|
485
|
+
if @have_root
|
486
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
487
|
+
else
|
488
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
489
|
+
end
|
488
490
|
end
|
489
|
-
return pull_event
|
491
|
+
return pull_event if @have_root
|
490
492
|
end
|
491
493
|
return [ :text, text ]
|
492
494
|
end
|
@@ -505,7 +507,9 @@ module REXML
|
|
505
507
|
def entity( reference, entities )
|
506
508
|
value = nil
|
507
509
|
value = entities[ reference ] if entities
|
508
|
-
if
|
510
|
+
if value
|
511
|
+
record_entity_expansion
|
512
|
+
else
|
509
513
|
value = DEFAULT_ENTITIES[ reference ]
|
510
514
|
value = value[2] if value
|
511
515
|
end
|
@@ -550,6 +554,9 @@ module REXML
|
|
550
554
|
if entity_value
|
551
555
|
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
552
556
|
rv.gsub!( re, entity_value )
|
557
|
+
if rv.bytesize > Security.entity_expansion_text_limit
|
558
|
+
raise "entity expansion has grown too large"
|
559
|
+
end
|
553
560
|
else
|
554
561
|
er = DEFAULT_ENTITIES[entity_reference]
|
555
562
|
rv.gsub!( er[0], er[2] ) if er
|
@@ -562,6 +569,14 @@ module REXML
|
|
562
569
|
end
|
563
570
|
|
564
571
|
private
|
572
|
+
|
573
|
+
def record_entity_expansion
|
574
|
+
@entity_expansion_count += 1
|
575
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
576
|
+
raise "number of entity expansions exceeded, processing aborted."
|
577
|
+
end
|
578
|
+
end
|
579
|
+
|
565
580
|
def need_source_encoding_update?(xml_declaration_encoding)
|
566
581
|
return false if xml_declaration_encoding.nil?
|
567
582
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -571,14 +586,14 @@ module REXML
|
|
571
586
|
def parse_name(base_error_message)
|
572
587
|
md = @source.match(Private::NAME_PATTERN, true)
|
573
588
|
unless md
|
574
|
-
if @source.match(/\
|
589
|
+
if @source.match(/\S/um)
|
575
590
|
message = "#{base_error_message}: invalid name"
|
576
591
|
else
|
577
592
|
message = "#{base_error_message}: name is missing"
|
578
593
|
end
|
579
594
|
raise REXML::ParseException.new(message, @source)
|
580
595
|
end
|
581
|
-
md[
|
596
|
+
md[0]
|
582
597
|
end
|
583
598
|
|
584
599
|
def parse_id(base_error_message,
|
@@ -647,18 +662,24 @@ module REXML
|
|
647
662
|
end
|
648
663
|
end
|
649
664
|
|
650
|
-
def process_instruction
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
665
|
+
def process_instruction
|
666
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
667
|
+
if @source.match(/\s+/um, true)
|
668
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
669
|
+
unless match_data
|
670
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
671
|
+
end
|
672
|
+
content = match_data[1]
|
673
|
+
else
|
674
|
+
content = nil
|
675
|
+
unless @source.match("?>", true)
|
676
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
677
|
+
end
|
656
678
|
end
|
657
|
-
if
|
679
|
+
if name == "xml"
|
658
680
|
if @document_status
|
659
681
|
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
682
|
end
|
661
|
-
content = match_data[2]
|
662
683
|
version = VERSION.match(content)
|
663
684
|
version = version[1] unless version.nil?
|
664
685
|
encoding = ENCODING.match(content)
|
@@ -673,7 +694,7 @@ module REXML
|
|
673
694
|
standalone = standalone[1] unless standalone.nil?
|
674
695
|
return [ :xmldecl, version, encoding, standalone ]
|
675
696
|
end
|
676
|
-
[:processing_instruction,
|
697
|
+
[:processing_instruction, name, content]
|
677
698
|
end
|
678
699
|
|
679
700
|
def parse_attributes(prefixes, curr_ns)
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -117,7 +117,7 @@ module REXML
|
|
117
117
|
def ensure_buffer
|
118
118
|
end
|
119
119
|
|
120
|
-
def match(pattern, cons=false
|
120
|
+
def match(pattern, cons=false)
|
121
121
|
if cons
|
122
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
123
|
else
|
@@ -204,10 +204,20 @@ module REXML
|
|
204
204
|
end
|
205
205
|
end
|
206
206
|
|
207
|
-
def read(term = nil)
|
207
|
+
def read(term = nil, min_bytes = 1)
|
208
208
|
term = encode(term) if term
|
209
209
|
begin
|
210
|
-
|
210
|
+
str = readline(term)
|
211
|
+
@scanner << str
|
212
|
+
read_bytes = str.bytesize
|
213
|
+
begin
|
214
|
+
while read_bytes < min_bytes
|
215
|
+
str = readline(term)
|
216
|
+
@scanner << str
|
217
|
+
read_bytes += str.bytesize
|
218
|
+
end
|
219
|
+
rescue IOError
|
220
|
+
end
|
211
221
|
true
|
212
222
|
rescue Exception, NameError
|
213
223
|
@source = nil
|
@@ -237,10 +247,9 @@ module REXML
|
|
237
247
|
read if @scanner.eos? && @source
|
238
248
|
end
|
239
249
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
def match( pattern, cons=false, term: nil )
|
250
|
+
def match( pattern, cons=false )
|
251
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
252
|
+
min_bytes = 1
|
244
253
|
while true
|
245
254
|
if cons
|
246
255
|
md = @scanner.scan(pattern)
|
@@ -250,7 +259,8 @@ module REXML
|
|
250
259
|
break if md
|
251
260
|
return nil if pattern.is_a?(String)
|
252
261
|
return nil if @source.nil?
|
253
|
-
return nil unless read(
|
262
|
+
return nil unless read(nil, min_bytes)
|
263
|
+
min_bytes *= 2
|
254
264
|
end
|
255
265
|
|
256
266
|
md.nil? ? nil : @scanner
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-08-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
118
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.5
|
120
120
|
rdoc_options:
|
121
121
|
- "--main"
|
122
122
|
- README.md
|