rexml 3.2.9 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +61 -2
- data/lib/rexml/element.rb +2 -15
- data/lib/rexml/parsers/baseparser.rb +44 -21
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +27 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
|
4
|
+
data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
|
7
|
+
data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
|
data/NEWS.md
CHANGED
@@ -1,12 +1,71 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
-
## 3.
|
3
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for detecting malformed top-level comments.
|
8
|
+
* GH-145
|
9
|
+
* Patch by Hiroya Fujinami.
|
10
|
+
|
11
|
+
* Improved `REXML::Element#attribute` performance.
|
12
|
+
* GH-146
|
13
|
+
* Patch by Hiroya Fujinami.
|
14
|
+
|
15
|
+
* Added support for detecting malformed `<!-->` comments.
|
16
|
+
* GH-147
|
17
|
+
* Patch by Hiroya Fujinami.
|
18
|
+
|
19
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
20
|
+
* GH-152
|
21
|
+
* Patch by Hiroya Fujinami.
|
22
|
+
|
23
|
+
* Added `changlog_uri` metadata to gemspec.
|
24
|
+
* GH-156
|
25
|
+
* Patch by fynsta.
|
26
|
+
|
27
|
+
* Improved parse performance.
|
28
|
+
* GH-157
|
29
|
+
* GH-158
|
30
|
+
* Patch by NAITOH Jun.
|
31
|
+
|
32
|
+
### Fixes
|
33
|
+
|
34
|
+
* Fixed a bug that large XML can't be parsed.
|
35
|
+
* GH-154
|
36
|
+
* Patch by NAITOH Jun.
|
37
|
+
|
38
|
+
* Fixed a bug that private constants are visible.
|
39
|
+
* GH-155
|
40
|
+
* Patch by NAITOH Jun.
|
41
|
+
|
42
|
+
### Thanks
|
43
|
+
|
44
|
+
* Hiroya Fujinami
|
45
|
+
|
46
|
+
* NAITOH Jun
|
47
|
+
|
48
|
+
* fynsta
|
49
|
+
|
50
|
+
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
51
|
+
|
52
|
+
### Improvements
|
53
|
+
|
54
|
+
* Added support for strscan 0.7.0 installed with Ruby 2.6.
|
55
|
+
* GH-142
|
56
|
+
* Reported by Fernando Trigoso.
|
57
|
+
|
58
|
+
### Thanks
|
59
|
+
|
60
|
+
* Fernando Trigoso
|
61
|
+
|
62
|
+
## 3.2.9 - 2024-06-09 {#version-3-2-9}
|
4
63
|
|
5
64
|
### Improvements
|
6
65
|
|
7
66
|
* Added support for old strscan.
|
8
67
|
* GH-132
|
9
|
-
* Reported by Adam
|
68
|
+
* Reported by Adam.
|
10
69
|
|
11
70
|
* Improved attribute value parse performance.
|
12
71
|
* GH-135
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -1284,16 +1276,11 @@ module REXML
|
|
1284
1276
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1277
|
#
|
1286
1278
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1279
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1280
|
prefix = nil if prefix == 'xmlns'
|
1294
1281
|
|
1295
1282
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1283
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1284
|
|
1298
1285
|
return ret_val unless ret_val.nil?
|
1299
1286
|
return nil if prefix.nil?
|
@@ -132,13 +132,20 @@ module REXML
|
|
132
132
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
133
133
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
134
134
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
135
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
136
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
137
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
138
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
139
|
+
default_entities.each do |term|
|
140
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
141
|
+
end
|
135
142
|
end
|
136
143
|
private_constant :Private
|
137
|
-
include Private
|
138
144
|
|
139
145
|
def initialize( source )
|
140
146
|
self.stream = source
|
141
147
|
@listeners = []
|
148
|
+
@prefixes = Set.new
|
142
149
|
end
|
143
150
|
|
144
151
|
def add_listener( listener )
|
@@ -204,6 +211,8 @@ module REXML
|
|
204
211
|
|
205
212
|
# Returns the next event. This is a +PullEvent+ object.
|
206
213
|
def pull
|
214
|
+
@source.drop_parsed_content
|
215
|
+
|
207
216
|
pull_event.tap do |event|
|
208
217
|
@listeners.each do |listener|
|
209
218
|
listener.receive event
|
@@ -216,7 +225,12 @@ module REXML
|
|
216
225
|
x, @closed = @closed, nil
|
217
226
|
return [ :end_element, x ]
|
218
227
|
end
|
219
|
-
|
228
|
+
if empty?
|
229
|
+
if @document_status == :in_doctype
|
230
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
231
|
+
end
|
232
|
+
return [ :end_document ]
|
233
|
+
end
|
220
234
|
return @stack.shift if @stack.size > 0
|
221
235
|
#STDERR.puts @source.encoding
|
222
236
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -228,7 +242,14 @@ module REXML
|
|
228
242
|
return process_instruction(start_position)
|
229
243
|
elsif @source.match("<!", true)
|
230
244
|
if @source.match("--", true)
|
231
|
-
|
245
|
+
md = @source.match(/(.*?)-->/um, true)
|
246
|
+
if md.nil?
|
247
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
248
|
+
end
|
249
|
+
if /--|-\z/.match?(md[1])
|
250
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
251
|
+
end
|
252
|
+
return [ :comment, md[1] ]
|
232
253
|
elsif @source.match("DOCTYPE", true)
|
233
254
|
base_error_message = "Malformed DOCTYPE"
|
234
255
|
unless @source.match(/\s+/um, true)
|
@@ -240,7 +261,7 @@ module REXML
|
|
240
261
|
@source.position = start_position
|
241
262
|
raise REXML::ParseException.new(message, @source)
|
242
263
|
end
|
243
|
-
@nsstack.unshift(
|
264
|
+
@nsstack.unshift(Set.new)
|
244
265
|
name = parse_name(base_error_message)
|
245
266
|
if @source.match(/\s*\[/um, true)
|
246
267
|
id = [nil, nil, nil]
|
@@ -288,7 +309,7 @@ module REXML
|
|
288
309
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
289
310
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
290
311
|
elsif @source.match("ENTITY", true)
|
291
|
-
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
312
|
+
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
|
292
313
|
ref = false
|
293
314
|
if match[1] == '%'
|
294
315
|
ref = true
|
@@ -314,7 +335,7 @@ module REXML
|
|
314
335
|
match << '%' if ref
|
315
336
|
return match
|
316
337
|
elsif @source.match("ATTLIST", true)
|
317
|
-
md = @source.match(ATTLISTDECL_END, true)
|
338
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
318
339
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
319
340
|
element = md[1]
|
320
341
|
contents = md[0]
|
@@ -366,6 +387,9 @@ module REXML
|
|
366
387
|
@document_status = :after_doctype
|
367
388
|
return [ :end_doctype ]
|
368
389
|
end
|
390
|
+
if @document_status == :in_doctype
|
391
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
392
|
+
end
|
369
393
|
end
|
370
394
|
if @document_status == :after_doctype
|
371
395
|
@source.match(/\s*/um, true)
|
@@ -380,7 +404,7 @@ module REXML
|
|
380
404
|
if @source.match("/", true)
|
381
405
|
@nsstack.shift
|
382
406
|
last_tag = @tags.pop
|
383
|
-
md = @source.match(CLOSE_PATTERN, true)
|
407
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
384
408
|
if md and !last_tag
|
385
409
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
386
410
|
raise REXML::ParseException.new(message, @source)
|
@@ -399,12 +423,11 @@ module REXML
|
|
399
423
|
if md[0][0] == ?-
|
400
424
|
md = @source.match(/--(.*?)-->/um, true)
|
401
425
|
|
402
|
-
|
403
|
-
when /--/, /-\z/
|
426
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
404
427
|
raise REXML::ParseException.new("Malformed comment", @source)
|
405
428
|
end
|
406
429
|
|
407
|
-
return [ :comment, md[1] ]
|
430
|
+
return [ :comment, md[1] ]
|
408
431
|
else
|
409
432
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
410
433
|
return [ :cdata, md[1] ] if md
|
@@ -415,19 +438,19 @@ module REXML
|
|
415
438
|
return process_instruction(start_position)
|
416
439
|
else
|
417
440
|
# Get the next tag
|
418
|
-
md = @source.match(TAG_PATTERN, true)
|
441
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
419
442
|
unless md
|
420
443
|
@source.position = start_position
|
421
444
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
422
445
|
end
|
423
446
|
tag = md[1]
|
424
447
|
@document_status = :in_element
|
425
|
-
prefixes
|
426
|
-
prefixes << md[2] if md[2]
|
448
|
+
@prefixes.clear
|
449
|
+
@prefixes << md[2] if md[2]
|
427
450
|
@nsstack.unshift(curr_ns=Set.new)
|
428
|
-
attributes, closed = parse_attributes(prefixes, curr_ns)
|
451
|
+
attributes, closed = parse_attributes(@prefixes, curr_ns)
|
429
452
|
# Verify that all of the prefixes have been defined
|
430
|
-
for prefix in prefixes
|
453
|
+
for prefix in @prefixes
|
431
454
|
unless @nsstack.find{|k| k.member?(prefix)}
|
432
455
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
433
456
|
end
|
@@ -488,10 +511,10 @@ module REXML
|
|
488
511
|
|
489
512
|
# Unescapes all possible entities
|
490
513
|
def unnormalize( string, entities=nil, filter=nil )
|
491
|
-
rv = string.gsub(
|
514
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
492
515
|
matches = rv.scan( REFERENCE_RE )
|
493
516
|
return rv if matches.size == 0
|
494
|
-
rv.gsub!(
|
517
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
495
518
|
m=$1
|
496
519
|
m = "0#{m}" if m[0] == ?x
|
497
520
|
[Integer(m)].pack('U*')
|
@@ -502,7 +525,7 @@ module REXML
|
|
502
525
|
unless filter and filter.include?(entity_reference)
|
503
526
|
entity_value = entity( entity_reference, entities )
|
504
527
|
if entity_value
|
505
|
-
re = /&#{entity_reference};/
|
528
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
506
529
|
rv.gsub!( re, entity_value )
|
507
530
|
else
|
508
531
|
er = DEFAULT_ENTITIES[entity_reference]
|
@@ -510,7 +533,7 @@ module REXML
|
|
510
533
|
end
|
511
534
|
end
|
512
535
|
end
|
513
|
-
rv.gsub!(
|
536
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
514
537
|
end
|
515
538
|
rv
|
516
539
|
end
|
@@ -523,7 +546,7 @@ module REXML
|
|
523
546
|
end
|
524
547
|
|
525
548
|
def parse_name(base_error_message)
|
526
|
-
md = @source.match(NAME_PATTERN, true)
|
549
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
527
550
|
unless md
|
528
551
|
if @source.match(/\s*\S/um)
|
529
552
|
message = "#{base_error_message}: invalid name"
|
@@ -602,7 +625,7 @@ module REXML
|
|
602
625
|
end
|
603
626
|
|
604
627
|
def process_instruction(start_position)
|
605
|
-
match_data = @source.match(INSTRUCTION_END, true)
|
628
|
+
match_data = @source.match(Private::INSTRUCTION_END, true)
|
606
629
|
unless match_data
|
607
630
|
message = "Invalid processing instruction node"
|
608
631
|
@source.position = start_position
|
@@ -16,7 +16,6 @@ module REXML
|
|
16
16
|
|
17
17
|
def parse
|
18
18
|
tag_stack = []
|
19
|
-
in_doctype = false
|
20
19
|
entities = nil
|
21
20
|
begin
|
22
21
|
while true
|
@@ -39,17 +38,15 @@ module REXML
|
|
39
38
|
tag_stack.pop
|
40
39
|
@build_context = @build_context.parent
|
41
40
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
41
|
+
if @build_context[-1].instance_of? Text
|
42
|
+
@build_context[-1] << event[1]
|
43
|
+
else
|
44
|
+
@build_context.add(
|
45
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
46
|
+
) unless (
|
47
|
+
@build_context.ignore_whitespace_nodes and
|
48
|
+
event[1].strip.size==0
|
49
|
+
)
|
53
50
|
end
|
54
51
|
when :comment
|
55
52
|
c = Comment.new( event[1] )
|
@@ -60,14 +57,12 @@ module REXML
|
|
60
57
|
when :processing_instruction
|
61
58
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
59
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
60
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
61
|
@build_context = @build_context.parent
|
66
62
|
when :start_doctype
|
67
63
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
64
|
@build_context = doctype
|
69
65
|
entities = {}
|
70
|
-
in_doctype = true
|
71
66
|
when :attlistdecl
|
72
67
|
n = AttlistDecl.new( event[1..-1] )
|
73
68
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -1,8 +1,28 @@
|
|
1
1
|
# coding: US-ASCII
|
2
2
|
# frozen_string_literal: false
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
|
3
6
|
require_relative 'encoding'
|
4
7
|
|
5
8
|
module REXML
|
9
|
+
if StringScanner::Version < "1.0.0"
|
10
|
+
module StringScannerCheckScanString
|
11
|
+
refine StringScanner do
|
12
|
+
def check(pattern)
|
13
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
14
|
+
super(pattern)
|
15
|
+
end
|
16
|
+
|
17
|
+
def scan(pattern)
|
18
|
+
pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String)
|
19
|
+
super(pattern)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
using StringScannerCheckScanString
|
24
|
+
end
|
25
|
+
|
6
26
|
# Generates Source-s. USE THIS CLASS.
|
7
27
|
class SourceFactory
|
8
28
|
# Generates a Source object
|
@@ -35,6 +55,7 @@ module REXML
|
|
35
55
|
attr_reader :encoding
|
36
56
|
|
37
57
|
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
38
59
|
PRE_DEFINED_TERM_PATTERNS = {}
|
39
60
|
pre_defined_terms = ["'", '"', "<"]
|
40
61
|
pre_defined_terms.each do |term|
|
@@ -42,7 +63,6 @@ module REXML
|
|
42
63
|
end
|
43
64
|
end
|
44
65
|
private_constant :Private
|
45
|
-
include Private
|
46
66
|
|
47
67
|
# Constructor
|
48
68
|
# @param arg must be a String, and should be a valid XML document
|
@@ -64,6 +84,12 @@ module REXML
|
|
64
84
|
@scanner.rest
|
65
85
|
end
|
66
86
|
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
67
93
|
def buffer_encoding=(encoding)
|
68
94
|
@scanner.string.force_encoding(encoding)
|
69
95
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-06-
|
10
|
+
date: 2024-06-25 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -115,7 +115,8 @@ files:
|
|
115
115
|
homepage: https://github.com/ruby/rexml
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
|
-
metadata:
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
|
119
120
|
rdoc_options:
|
120
121
|
- "--main"
|
121
122
|
- README.md
|