rexml 3.3.0 → 3.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +47 -0
- data/lib/rexml/element.rb +2 -15
- data/lib/rexml/parsers/baseparser.rb +44 -21
- data/lib/rexml/parsers/treeparser.rb +9 -14
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +7 -1
- metadata +5 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afaa8e7d5241253a1c36a218f94eeff525cc19378d2ed104f738abfc01693889
|
4
|
+
data.tar.gz: 665e18c0db75cce5e3db16c674c02e986ff9141df54fd7ff3da704b4403a928d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86ea7a0ce4847b320f297b1eb03158003c2931847c07ea118f0a7413f476660dcf40baec8b59a92a2e7096eb665ace359b04c5d8e82617b7162305465472c88d
|
7
|
+
data.tar.gz: ae248f28516ab6c76170623bcc5e5a30389596823133fd0a13cb74235d6101dd469235bab8b1e15bcbd7a7795f04b44e4674dfdcb1712109dce58001cea01648
|
data/NEWS.md
CHANGED
@@ -1,5 +1,52 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added support for detecting malformed top-level comments.
|
8
|
+
* GH-145
|
9
|
+
* Patch by Hiroya Fujinami.
|
10
|
+
|
11
|
+
* Improved `REXML::Element#attribute` performance.
|
12
|
+
* GH-146
|
13
|
+
* Patch by Hiroya Fujinami.
|
14
|
+
|
15
|
+
* Added support for detecting malformed `<!-->` comments.
|
16
|
+
* GH-147
|
17
|
+
* Patch by Hiroya Fujinami.
|
18
|
+
|
19
|
+
* Added support for detecting unclosed `DOCTYPE`.
|
20
|
+
* GH-152
|
21
|
+
* Patch by Hiroya Fujinami.
|
22
|
+
|
23
|
+
* Added `changlog_uri` metadata to gemspec.
|
24
|
+
* GH-156
|
25
|
+
* Patch by fynsta.
|
26
|
+
|
27
|
+
* Improved parse performance.
|
28
|
+
* GH-157
|
29
|
+
* GH-158
|
30
|
+
* Patch by NAITOH Jun.
|
31
|
+
|
32
|
+
### Fixes
|
33
|
+
|
34
|
+
* Fixed a bug that large XML can't be parsed.
|
35
|
+
* GH-154
|
36
|
+
* Patch by NAITOH Jun.
|
37
|
+
|
38
|
+
* Fixed a bug that private constants are visible.
|
39
|
+
* GH-155
|
40
|
+
* Patch by NAITOH Jun.
|
41
|
+
|
42
|
+
### Thanks
|
43
|
+
|
44
|
+
* Hiroya Fujinami
|
45
|
+
|
46
|
+
* NAITOH Jun
|
47
|
+
|
48
|
+
* fynsta
|
49
|
+
|
3
50
|
## 3.3.0 - 2024-06-11 {#version-3-3-0}
|
4
51
|
|
5
52
|
### Improvements
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -1284,16 +1276,11 @@ module REXML
|
|
1284
1276
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1277
|
#
|
1286
1278
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1279
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1280
|
prefix = nil if prefix == 'xmlns'
|
1294
1281
|
|
1295
1282
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1283
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1284
|
|
1298
1285
|
return ret_val unless ret_val.nil?
|
1299
1286
|
return nil if prefix.nil?
|
@@ -132,13 +132,20 @@ module REXML
|
|
132
132
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
133
133
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
134
134
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
135
|
+
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
|
136
|
+
CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/
|
137
|
+
DEFAULT_ENTITIES_PATTERNS = {}
|
138
|
+
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
|
139
|
+
default_entities.each do |term|
|
140
|
+
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
|
141
|
+
end
|
135
142
|
end
|
136
143
|
private_constant :Private
|
137
|
-
include Private
|
138
144
|
|
139
145
|
def initialize( source )
|
140
146
|
self.stream = source
|
141
147
|
@listeners = []
|
148
|
+
@prefixes = Set.new
|
142
149
|
end
|
143
150
|
|
144
151
|
def add_listener( listener )
|
@@ -204,6 +211,8 @@ module REXML
|
|
204
211
|
|
205
212
|
# Returns the next event. This is a +PullEvent+ object.
|
206
213
|
def pull
|
214
|
+
@source.drop_parsed_content
|
215
|
+
|
207
216
|
pull_event.tap do |event|
|
208
217
|
@listeners.each do |listener|
|
209
218
|
listener.receive event
|
@@ -216,7 +225,12 @@ module REXML
|
|
216
225
|
x, @closed = @closed, nil
|
217
226
|
return [ :end_element, x ]
|
218
227
|
end
|
219
|
-
|
228
|
+
if empty?
|
229
|
+
if @document_status == :in_doctype
|
230
|
+
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
231
|
+
end
|
232
|
+
return [ :end_document ]
|
233
|
+
end
|
220
234
|
return @stack.shift if @stack.size > 0
|
221
235
|
#STDERR.puts @source.encoding
|
222
236
|
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
|
@@ -228,7 +242,14 @@ module REXML
|
|
228
242
|
return process_instruction(start_position)
|
229
243
|
elsif @source.match("<!", true)
|
230
244
|
if @source.match("--", true)
|
231
|
-
|
245
|
+
md = @source.match(/(.*?)-->/um, true)
|
246
|
+
if md.nil?
|
247
|
+
raise REXML::ParseException.new("Unclosed comment", @source)
|
248
|
+
end
|
249
|
+
if /--|-\z/.match?(md[1])
|
250
|
+
raise REXML::ParseException.new("Malformed comment", @source)
|
251
|
+
end
|
252
|
+
return [ :comment, md[1] ]
|
232
253
|
elsif @source.match("DOCTYPE", true)
|
233
254
|
base_error_message = "Malformed DOCTYPE"
|
234
255
|
unless @source.match(/\s+/um, true)
|
@@ -240,7 +261,7 @@ module REXML
|
|
240
261
|
@source.position = start_position
|
241
262
|
raise REXML::ParseException.new(message, @source)
|
242
263
|
end
|
243
|
-
@nsstack.unshift(
|
264
|
+
@nsstack.unshift(Set.new)
|
244
265
|
name = parse_name(base_error_message)
|
245
266
|
if @source.match(/\s*\[/um, true)
|
246
267
|
id = [nil, nil, nil]
|
@@ -288,7 +309,7 @@ module REXML
|
|
288
309
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
289
310
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
290
311
|
elsif @source.match("ENTITY", true)
|
291
|
-
match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
|
312
|
+
match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
|
292
313
|
ref = false
|
293
314
|
if match[1] == '%'
|
294
315
|
ref = true
|
@@ -314,7 +335,7 @@ module REXML
|
|
314
335
|
match << '%' if ref
|
315
336
|
return match
|
316
337
|
elsif @source.match("ATTLIST", true)
|
317
|
-
md = @source.match(ATTLISTDECL_END, true)
|
338
|
+
md = @source.match(Private::ATTLISTDECL_END, true)
|
318
339
|
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
|
319
340
|
element = md[1]
|
320
341
|
contents = md[0]
|
@@ -366,6 +387,9 @@ module REXML
|
|
366
387
|
@document_status = :after_doctype
|
367
388
|
return [ :end_doctype ]
|
368
389
|
end
|
390
|
+
if @document_status == :in_doctype
|
391
|
+
raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
|
392
|
+
end
|
369
393
|
end
|
370
394
|
if @document_status == :after_doctype
|
371
395
|
@source.match(/\s*/um, true)
|
@@ -380,7 +404,7 @@ module REXML
|
|
380
404
|
if @source.match("/", true)
|
381
405
|
@nsstack.shift
|
382
406
|
last_tag = @tags.pop
|
383
|
-
md = @source.match(CLOSE_PATTERN, true)
|
407
|
+
md = @source.match(Private::CLOSE_PATTERN, true)
|
384
408
|
if md and !last_tag
|
385
409
|
message = "Unexpected top-level end tag (got '#{md[1]}')"
|
386
410
|
raise REXML::ParseException.new(message, @source)
|
@@ -399,12 +423,11 @@ module REXML
|
|
399
423
|
if md[0][0] == ?-
|
400
424
|
md = @source.match(/--(.*?)-->/um, true)
|
401
425
|
|
402
|
-
|
403
|
-
when /--/, /-\z/
|
426
|
+
if md.nil? || /--|-\z/.match?(md[1])
|
404
427
|
raise REXML::ParseException.new("Malformed comment", @source)
|
405
428
|
end
|
406
429
|
|
407
|
-
return [ :comment, md[1] ]
|
430
|
+
return [ :comment, md[1] ]
|
408
431
|
else
|
409
432
|
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
410
433
|
return [ :cdata, md[1] ] if md
|
@@ -415,19 +438,19 @@ module REXML
|
|
415
438
|
return process_instruction(start_position)
|
416
439
|
else
|
417
440
|
# Get the next tag
|
418
|
-
md = @source.match(TAG_PATTERN, true)
|
441
|
+
md = @source.match(Private::TAG_PATTERN, true)
|
419
442
|
unless md
|
420
443
|
@source.position = start_position
|
421
444
|
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
|
422
445
|
end
|
423
446
|
tag = md[1]
|
424
447
|
@document_status = :in_element
|
425
|
-
prefixes
|
426
|
-
prefixes << md[2] if md[2]
|
448
|
+
@prefixes.clear
|
449
|
+
@prefixes << md[2] if md[2]
|
427
450
|
@nsstack.unshift(curr_ns=Set.new)
|
428
|
-
attributes, closed = parse_attributes(prefixes, curr_ns)
|
451
|
+
attributes, closed = parse_attributes(@prefixes, curr_ns)
|
429
452
|
# Verify that all of the prefixes have been defined
|
430
|
-
for prefix in prefixes
|
453
|
+
for prefix in @prefixes
|
431
454
|
unless @nsstack.find{|k| k.member?(prefix)}
|
432
455
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
433
456
|
end
|
@@ -488,10 +511,10 @@ module REXML
|
|
488
511
|
|
489
512
|
# Unescapes all possible entities
|
490
513
|
def unnormalize( string, entities=nil, filter=nil )
|
491
|
-
rv = string.gsub(
|
514
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
492
515
|
matches = rv.scan( REFERENCE_RE )
|
493
516
|
return rv if matches.size == 0
|
494
|
-
rv.gsub!(
|
517
|
+
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
495
518
|
m=$1
|
496
519
|
m = "0#{m}" if m[0] == ?x
|
497
520
|
[Integer(m)].pack('U*')
|
@@ -502,7 +525,7 @@ module REXML
|
|
502
525
|
unless filter and filter.include?(entity_reference)
|
503
526
|
entity_value = entity( entity_reference, entities )
|
504
527
|
if entity_value
|
505
|
-
re = /&#{entity_reference};/
|
528
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
506
529
|
rv.gsub!( re, entity_value )
|
507
530
|
else
|
508
531
|
er = DEFAULT_ENTITIES[entity_reference]
|
@@ -510,7 +533,7 @@ module REXML
|
|
510
533
|
end
|
511
534
|
end
|
512
535
|
end
|
513
|
-
rv.gsub!(
|
536
|
+
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
514
537
|
end
|
515
538
|
rv
|
516
539
|
end
|
@@ -523,7 +546,7 @@ module REXML
|
|
523
546
|
end
|
524
547
|
|
525
548
|
def parse_name(base_error_message)
|
526
|
-
md = @source.match(NAME_PATTERN, true)
|
549
|
+
md = @source.match(Private::NAME_PATTERN, true)
|
527
550
|
unless md
|
528
551
|
if @source.match(/\s*\S/um)
|
529
552
|
message = "#{base_error_message}: invalid name"
|
@@ -602,7 +625,7 @@ module REXML
|
|
602
625
|
end
|
603
626
|
|
604
627
|
def process_instruction(start_position)
|
605
|
-
match_data = @source.match(INSTRUCTION_END, true)
|
628
|
+
match_data = @source.match(Private::INSTRUCTION_END, true)
|
606
629
|
unless match_data
|
607
630
|
message = "Invalid processing instruction node"
|
608
631
|
@source.position = start_position
|
@@ -16,7 +16,6 @@ module REXML
|
|
16
16
|
|
17
17
|
def parse
|
18
18
|
tag_stack = []
|
19
|
-
in_doctype = false
|
20
19
|
entities = nil
|
21
20
|
begin
|
22
21
|
while true
|
@@ -39,17 +38,15 @@ module REXML
|
|
39
38
|
tag_stack.pop
|
40
39
|
@build_context = @build_context.parent
|
41
40
|
when :text
|
42
|
-
if
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
@build_context.
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
)
|
52
|
-
end
|
41
|
+
if @build_context[-1].instance_of? Text
|
42
|
+
@build_context[-1] << event[1]
|
43
|
+
else
|
44
|
+
@build_context.add(
|
45
|
+
Text.new(event[1], @build_context.whitespace, nil, true)
|
46
|
+
) unless (
|
47
|
+
@build_context.ignore_whitespace_nodes and
|
48
|
+
event[1].strip.size==0
|
49
|
+
)
|
53
50
|
end
|
54
51
|
when :comment
|
55
52
|
c = Comment.new( event[1] )
|
@@ -60,14 +57,12 @@ module REXML
|
|
60
57
|
when :processing_instruction
|
61
58
|
@build_context.add( Instruction.new( event[1], event[2] ) )
|
62
59
|
when :end_doctype
|
63
|
-
in_doctype = false
|
64
60
|
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
|
65
61
|
@build_context = @build_context.parent
|
66
62
|
when :start_doctype
|
67
63
|
doctype = DocType.new( event[1..-1], @build_context )
|
68
64
|
@build_context = doctype
|
69
65
|
entities = {}
|
70
|
-
in_doctype = true
|
71
66
|
when :attlistdecl
|
72
67
|
n = AttlistDecl.new( event[1..-1] )
|
73
68
|
@build_context.add( n )
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -55,6 +55,7 @@ module REXML
|
|
55
55
|
attr_reader :encoding
|
56
56
|
|
57
57
|
module Private
|
58
|
+
SCANNER_RESET_SIZE = 100000
|
58
59
|
PRE_DEFINED_TERM_PATTERNS = {}
|
59
60
|
pre_defined_terms = ["'", '"', "<"]
|
60
61
|
pre_defined_terms.each do |term|
|
@@ -62,7 +63,6 @@ module REXML
|
|
62
63
|
end
|
63
64
|
end
|
64
65
|
private_constant :Private
|
65
|
-
include Private
|
66
66
|
|
67
67
|
# Constructor
|
68
68
|
# @param arg must be a String, and should be a valid XML document
|
@@ -84,6 +84,12 @@ module REXML
|
|
84
84
|
@scanner.rest
|
85
85
|
end
|
86
86
|
|
87
|
+
def drop_parsed_content
|
88
|
+
if @scanner.pos > Private::SCANNER_RESET_SIZE
|
89
|
+
@scanner.string = @scanner.rest
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
87
93
|
def buffer_encoding=(encoding)
|
88
94
|
@scanner.string.force_encoding(encoding)
|
89
95
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date: 2024-06-
|
10
|
+
date: 2024-06-25 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: strscan
|
@@ -116,8 +115,8 @@ files:
|
|
116
115
|
homepage: https://github.com/ruby/rexml
|
117
116
|
licenses:
|
118
117
|
- BSD-2-Clause
|
119
|
-
metadata:
|
120
|
-
|
118
|
+
metadata:
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.1
|
121
120
|
rdoc_options:
|
122
121
|
- "--main"
|
123
122
|
- README.md
|
@@ -134,8 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
134
133
|
- !ruby/object:Gem::Version
|
135
134
|
version: '0'
|
136
135
|
requirements: []
|
137
|
-
rubygems_version: 3.
|
138
|
-
signing_key:
|
136
|
+
rubygems_version: 3.6.0.dev
|
139
137
|
specification_version: 4
|
140
138
|
summary: An XML toolkit for Ruby
|
141
139
|
test_files: []
|