rexml 3.3.1 → 3.3.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +48 -0
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/parsers/baseparser.rb +36 -10
- data/lib/rexml/parsers/sax2parser.rb +2 -19
- data/lib/rexml/parsers/streamparser.rb +2 -2
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +3 -3
- data/lib/rexml/text.rb +34 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70ccd1465a05dba3d53dcfc4a98e76dec865a4f6ac833b954aff4234bce6c255
|
4
|
+
data.tar.gz: 53f43fab8f531e0ba7461ce091e5eae6bec27b12e9139450c7b3e748b4eeacdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b46818d79ae57075c4e0bd620802e82c6958dddc7da1b182504c3fdc16685c887ac0ddd6a4838a080483abba330839e9ef4b2db22cc81b9eae3eac71ac14c965
|
7
|
+
data.tar.gz: 1e5205905eb435c02038dd0539de22472f5364ffc47635f13a1752cb79a423dcca558fb47394ac5d624b358e779b07cbcafedfd06b99742026856f9988109976
|
data/NEWS.md
CHANGED
@@ -1,5 +1,53 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Improved parse performance.
|
8
|
+
* GH-160
|
9
|
+
* Patch by NAITOH Jun.
|
10
|
+
|
11
|
+
* Improved parse performance.
|
12
|
+
* GH-169
|
13
|
+
* GH-170
|
14
|
+
* GH-171
|
15
|
+
* GH-172
|
16
|
+
* GH-173
|
17
|
+
* GH-174
|
18
|
+
* Patch by Watson.
|
19
|
+
|
20
|
+
* Added support for raising a parse exception when an XML has extra
|
21
|
+
content after the root element.
|
22
|
+
* GH-161
|
23
|
+
* Patch by NAITOH Jun.
|
24
|
+
|
25
|
+
* Added support for raising a parse exception when an XML
|
26
|
+
declaration exists in wrong position.
|
27
|
+
* GH-162
|
28
|
+
* Patch by NAITOH Jun.
|
29
|
+
|
30
|
+
* Removed needless a space after XML declaration in pretty print mode.
|
31
|
+
* GH-164
|
32
|
+
* Patch by NAITOH Jun.
|
33
|
+
|
34
|
+
* Stopped to emit `:text` event after the root element.
|
35
|
+
* GH-167
|
36
|
+
* Patch by NAITOH Jun.
|
37
|
+
|
38
|
+
### Fixes
|
39
|
+
|
40
|
+
* Fixed a bug that SAX2 parser doesn't expand predefined entities for
|
41
|
+
`characters` callback.
|
42
|
+
* GH-168
|
43
|
+
* Patch by NAITOH Jun.
|
44
|
+
|
45
|
+
### Thanks
|
46
|
+
|
47
|
+
* NAITOH Jun
|
48
|
+
|
49
|
+
* Watson
|
50
|
+
|
3
51
|
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
4
52
|
|
5
53
|
### Improvements
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
@@ -124,6 +124,14 @@ module REXML
|
|
124
124
|
}
|
125
125
|
|
126
126
|
module Private
|
127
|
+
# Terminal requires two or more letters.
|
128
|
+
INSTRUCTION_TERM = "?>"
|
129
|
+
COMMENT_TERM = "-->"
|
130
|
+
CDATA_TERM = "]]>"
|
131
|
+
DOCTYPE_TERM = "]>"
|
132
|
+
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
+
ENTITY_TERM = DOCTYPE_TERM
|
134
|
+
|
127
135
|
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
128
136
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
129
137
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
@@ -157,6 +165,7 @@ module REXML
|
|
157
165
|
def stream=( source )
|
158
166
|
@source = SourceFactory.create_from( source )
|
159
167
|
@closed = nil
|
168
|
+
@have_root = false
|
160
169
|
@document_status = nil
|
161
170
|
@tags = []
|
162
171
|
@stack = []
|
@@ -242,7 +251,7 @@ module REXML
|
|
242
251
|
return process_instruction(start_position)
|
243
252
|
elsif @source.match("<!", true)
|
244
253
|
if @source.match("--", true)
|
245
|
-
md = @source.match(/(.*?)-->/um, true)
|
254
|
+
md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
246
255
|
if md.nil?
|
247
256
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
248
257
|
end
|
@@ -309,7 +318,7 @@ module REXML
|
|
309
318
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
310
319
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
311
320
|
elsif @source.match("ENTITY", true)
|
312
|
-
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
|
321
|
+
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
|
313
322
|
ref = false
|
314
323
|
if match[1] == '%'
|
315
324
|
ref = true
|
@@ -341,7 +350,7 @@ module REXML
|
|
341
350
|
contents = md[0]
|
342
351
|
|
343
352
|
pairs = {}
|
344
|
-
values = md[0].scan( ATTDEF_RE )
|
353
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
345
354
|
values.each do |attdef|
|
346
355
|
unless attdef[3] == "#IMPLIED"
|
347
356
|
attdef.compact!
|
@@ -374,14 +383,14 @@ module REXML
|
|
374
383
|
raise REXML::ParseException.new(message, @source)
|
375
384
|
end
|
376
385
|
return [:notationdecl, name, *id]
|
377
|
-
elsif md = @source.match(/--(.*?)-->/um, true)
|
386
|
+
elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
378
387
|
case md[1]
|
379
388
|
when /--/, /-\z/
|
380
389
|
raise REXML::ParseException.new("Malformed comment", @source)
|
381
390
|
end
|
382
391
|
return [ :comment, md[1] ] if md
|
383
392
|
end
|
384
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
393
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
|
385
394
|
return [ :externalentity, match[1] ]
|
386
395
|
elsif @source.match(/\]\s*>/um, true)
|
387
396
|
@document_status = :after_doctype
|
@@ -421,7 +430,7 @@ module REXML
|
|
421
430
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
422
431
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
423
432
|
if md[0][0] == ?-
|
424
|
-
md = @source.match(/--(.*?)-->/um, true)
|
433
|
+
md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
|
425
434
|
|
426
435
|
if md.nil? || /--|-\z/.match?(md[1])
|
427
436
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -429,7 +438,7 @@ module REXML
|
|
429
438
|
|
430
439
|
return [ :comment, md[1] ]
|
431
440
|
else
|
432
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
441
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
|
433
442
|
return [ :cdata, md[1] ] if md
|
434
443
|
end
|
435
444
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
@@ -460,8 +469,12 @@ module REXML
|
|
460
469
|
@closed = tag
|
461
470
|
@nsstack.shift
|
462
471
|
else
|
472
|
+
if @tags.empty? and @have_root
|
473
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
474
|
+
end
|
463
475
|
@tags.push( tag )
|
464
476
|
end
|
477
|
+
@have_root = true
|
465
478
|
return [ :start_element, tag, attributes ]
|
466
479
|
end
|
467
480
|
else
|
@@ -469,6 +482,12 @@ module REXML
|
|
469
482
|
if text.chomp!("<")
|
470
483
|
@source.position -= "<".bytesize
|
471
484
|
end
|
485
|
+
if @tags.empty? and @have_root
|
486
|
+
unless /\A\s*\z/.match?(text)
|
487
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
488
|
+
end
|
489
|
+
return pull_event
|
490
|
+
end
|
472
491
|
return [ :text, text ]
|
473
492
|
end
|
474
493
|
rescue REXML::UndefinedNamespaceException
|
@@ -511,7 +530,11 @@ module REXML
|
|
511
530
|
|
512
531
|
# Unescapes all possible entities
|
513
532
|
def unnormalize( string, entities=nil, filter=nil )
|
514
|
-
|
533
|
+
if string.include?("\r")
|
534
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
535
|
+
else
|
536
|
+
rv = string.dup
|
537
|
+
end
|
515
538
|
matches = rv.scan( REFERENCE_RE )
|
516
539
|
return rv if matches.size == 0
|
517
540
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
@@ -625,13 +648,16 @@ module REXML
|
|
625
648
|
end
|
626
649
|
|
627
650
|
def process_instruction(start_position)
|
628
|
-
match_data = @source.match(Private::INSTRUCTION_END, true)
|
651
|
+
match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
|
629
652
|
unless match_data
|
630
653
|
message = "Invalid processing instruction node"
|
631
654
|
@source.position = start_position
|
632
655
|
raise REXML::ParseException.new(message, @source)
|
633
656
|
end
|
634
|
-
if
|
657
|
+
if match_data[1] == "xml"
|
658
|
+
if @document_status
|
659
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
|
+
end
|
635
661
|
content = match_data[2]
|
636
662
|
version = VERSION.match(content)
|
637
663
|
version = version[1] unless version.nil?
|
@@ -157,25 +157,8 @@ module REXML
|
|
157
157
|
end
|
158
158
|
end
|
159
159
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
160
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
161
|
+
handle( :characters, unnormalized )
|
179
162
|
when :entitydecl
|
180
163
|
handle_entitydecl( event )
|
181
164
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -36,8 +36,8 @@ module REXML
|
|
36
36
|
@listener.tag_end( event[1] )
|
37
37
|
@tag_stack.pop
|
38
38
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
39
|
+
unnormalized = @parser.unnormalize( event[1] )
|
40
|
+
@listener.text( unnormalized )
|
41
41
|
when :processing_instruction
|
42
42
|
@listener.instruction( *event[1,2] )
|
43
43
|
when :start_doctype
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -117,7 +117,7 @@ module REXML
|
|
117
117
|
def ensure_buffer
|
118
118
|
end
|
119
119
|
|
120
|
-
def match(pattern, cons=false)
|
120
|
+
def match(pattern, cons=false, term: nil)
|
121
121
|
if cons
|
122
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
123
|
else
|
@@ -240,7 +240,7 @@ module REXML
|
|
240
240
|
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
241
241
|
# - ">"
|
242
242
|
# - "XXX>" (X is any string excluding '>')
|
243
|
-
def match( pattern, cons=false )
|
243
|
+
def match( pattern, cons=false, term: nil )
|
244
244
|
while true
|
245
245
|
if cons
|
246
246
|
md = @scanner.scan(pattern)
|
@@ -250,7 +250,7 @@ module REXML
|
|
250
250
|
break if md
|
251
251
|
return nil if pattern.is_a?(String)
|
252
252
|
return nil if @source.nil?
|
253
|
-
return nil unless read
|
253
|
+
return nil unless read(term)
|
254
254
|
end
|
255
255
|
|
256
256
|
md.nil? ? nil : @scanner
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-07-16 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
118
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.2
|
120
120
|
rdoc_options:
|
121
121
|
- "--main"
|
122
122
|
- README.md
|