rexml 3.3.2 → 3.3.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +107 -0
- data/lib/rexml/element.rb +14 -16
- data/lib/rexml/entity.rb +5 -47
- data/lib/rexml/parsers/baseparser.rb +149 -58
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +4 -0
- data/lib/rexml/parsers/streamparser.rb +7 -9
- data/lib/rexml/parsers/treeparser.rb +0 -7
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +18 -8
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
|
4
|
+
data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
|
7
|
+
data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
|
data/NEWS.md
CHANGED
@@ -1,5 +1,109 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.6 - 2024-08-22 {#version-3-3-6}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Removed duplicated entity expansions for performance.
|
8
|
+
* GH-194
|
9
|
+
* Patch by Viktor Ivarsson.
|
10
|
+
|
11
|
+
* Improved namespace conflicted attribute check performance. It was
|
12
|
+
too slow for deep elements.
|
13
|
+
* Reported by l33thaxor.
|
14
|
+
|
15
|
+
### Fixes
|
16
|
+
|
17
|
+
* Fixed a bug that default entity expansions are counted for
|
18
|
+
security check. Default entity expansions should not be counted
|
19
|
+
because they don't have a security risk.
|
20
|
+
* GH-198
|
21
|
+
* GH-199
|
22
|
+
* Patch Viktor Ivarsson
|
23
|
+
|
24
|
+
* Fixed a parser bug that parameter entity references in internal
|
25
|
+
subsets are expanded. It's not allowed in the XML specification.
|
26
|
+
* GH-191
|
27
|
+
* Patch by NAITOH Jun.
|
28
|
+
|
29
|
+
* Fixed a stream parser bug that user-defined entity references in
|
30
|
+
text aren't expanded.
|
31
|
+
* GH-200
|
32
|
+
* Patch by NAITOH Jun.
|
33
|
+
|
34
|
+
### Thanks
|
35
|
+
|
36
|
+
* Viktor Ivarsson
|
37
|
+
|
38
|
+
* NAITOH Jun
|
39
|
+
|
40
|
+
* l33thaxor
|
41
|
+
|
42
|
+
## 3.3.5 - 2024-08-12 {#version-3-3-5}
|
43
|
+
|
44
|
+
### Fixes
|
45
|
+
|
46
|
+
* Fixed a bug that `REXML::Security.entity_expansion_text_limit`
|
47
|
+
check has wrong text size calculation in SAX and pull parsers.
|
48
|
+
* GH-193
|
49
|
+
* GH-195
|
50
|
+
* Reported by Viktor Ivarsson.
|
51
|
+
* Patch by NAITOH Jun.
|
52
|
+
|
53
|
+
### Thanks
|
54
|
+
|
55
|
+
* Viktor Ivarsson
|
56
|
+
|
57
|
+
* NAITOH Jun
|
58
|
+
|
59
|
+
## 3.3.4 - 2024-08-01 {#version-3-3-4}
|
60
|
+
|
61
|
+
### Fixes
|
62
|
+
|
63
|
+
* Fixed a bug that `REXML::Security` isn't defined when
|
64
|
+
`REXML::Parsers::StreamParser` is used and
|
65
|
+
`rexml/parsers/streamparser` is only required.
|
66
|
+
* GH-189
|
67
|
+
* Patch by takuya kodama.
|
68
|
+
|
69
|
+
### Thanks
|
70
|
+
|
71
|
+
* takuya kodama
|
72
|
+
|
73
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
74
|
+
|
75
|
+
### Improvements
|
76
|
+
|
77
|
+
* Added support for detecting invalid XML that has unsupported
|
78
|
+
content before root element
|
79
|
+
* GH-184
|
80
|
+
* Patch by NAITOH Jun.
|
81
|
+
|
82
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
83
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
84
|
+
parsers
|
85
|
+
* GH-187
|
86
|
+
* Patch by NAITOH Jun.
|
87
|
+
|
88
|
+
* Added more tests for invalid XMLs.
|
89
|
+
* GH-183
|
90
|
+
* Patch by Watson.
|
91
|
+
|
92
|
+
* Added more performance tests.
|
93
|
+
* Patch by Watson.
|
94
|
+
|
95
|
+
* Improved parse performance.
|
96
|
+
* GH-186
|
97
|
+
* Patch by tomoya ishida.
|
98
|
+
|
99
|
+
### Thanks
|
100
|
+
|
101
|
+
* NAITOH Jun
|
102
|
+
|
103
|
+
* Watson
|
104
|
+
|
105
|
+
* tomoya ishida
|
106
|
+
|
3
107
|
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
4
108
|
|
5
109
|
### Improvements
|
@@ -15,6 +119,9 @@
|
|
15
119
|
* GH-172
|
16
120
|
* GH-173
|
17
121
|
* GH-174
|
122
|
+
* GH-175
|
123
|
+
* GH-176
|
124
|
+
* GH-177
|
18
125
|
* Patch by Watson.
|
19
126
|
|
20
127
|
* Added support for raising a parse exception when an XML has extra
|
data/lib/rexml/element.rb
CHANGED
@@ -441,9 +441,14 @@ module REXML
|
|
441
441
|
# Related: #root_node, #document.
|
442
442
|
#
|
443
443
|
def root
|
444
|
-
|
445
|
-
|
446
|
-
|
444
|
+
target = self
|
445
|
+
while target
|
446
|
+
return target.elements[1] if target.kind_of? Document
|
447
|
+
parent = target.parent
|
448
|
+
return target if parent.kind_of? Document or parent.nil?
|
449
|
+
target = parent
|
450
|
+
end
|
451
|
+
nil
|
447
452
|
end
|
448
453
|
|
449
454
|
# :call-seq:
|
@@ -619,8 +624,12 @@ module REXML
|
|
619
624
|
else
|
620
625
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
621
626
|
end
|
622
|
-
ns =
|
623
|
-
|
627
|
+
ns = nil
|
628
|
+
target = self
|
629
|
+
while ns.nil? and target
|
630
|
+
ns = target.attributes[prefix]
|
631
|
+
target = target.parent
|
632
|
+
end
|
624
633
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
625
634
|
return ns
|
626
635
|
end
|
@@ -2375,17 +2384,6 @@ module REXML
|
|
2375
2384
|
elsif old_attr.kind_of? Hash
|
2376
2385
|
old_attr[value.prefix] = value
|
2377
2386
|
elsif old_attr.prefix != value.prefix
|
2378
|
-
# Check for conflicting namespaces
|
2379
|
-
if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
2380
|
-
old_namespace = old_attr.namespace
|
2381
|
-
new_namespace = value.namespace
|
2382
|
-
if old_namespace == new_namespace
|
2383
|
-
raise ParseException.new(
|
2384
|
-
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
2385
|
-
"Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
2386
|
-
"prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
2387
|
-
end
|
2388
|
-
end
|
2389
2387
|
store value.name, {old_attr.prefix => old_attr,
|
2390
2388
|
value.prefix => value}
|
2391
2389
|
else
|
data/lib/rexml/entity.rb
CHANGED
@@ -12,6 +12,7 @@ module REXML
|
|
12
12
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
13
13
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
14
14
|
PEREFERENCE = "%#{NAME};"
|
15
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
15
16
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
16
17
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
17
18
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
@@ -19,7 +20,7 @@ module REXML
|
|
19
20
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
20
21
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
21
22
|
|
22
|
-
attr_reader :name, :external, :ref, :ndata, :pubid
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
|
23
24
|
|
24
25
|
# Create a new entity. Simple entities can be constructed by passing a
|
25
26
|
# name, value to the constructor; this creates a generic, plain entity
|
@@ -68,14 +69,11 @@ module REXML
|
|
68
69
|
end
|
69
70
|
|
70
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
71
|
-
#
|
72
|
-
# +value()+ in that +value+ only replaces %ent; entities.
|
72
|
+
# &ent; entities.
|
73
73
|
def unnormalized
|
74
74
|
document.record_entity_expansion unless document.nil?
|
75
|
-
|
76
|
-
|
77
|
-
@unnormalized = Text::unnormalize(v, parent)
|
78
|
-
@unnormalized
|
75
|
+
return nil if @value.nil?
|
76
|
+
@unnormalized = Text::unnormalize(@value, parent)
|
79
77
|
end
|
80
78
|
|
81
79
|
#once :unnormalized
|
@@ -121,46 +119,6 @@ module REXML
|
|
121
119
|
write rv
|
122
120
|
rv
|
123
121
|
end
|
124
|
-
|
125
|
-
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
126
|
-
# Returns the value of this entity. At the moment, only internal entities
|
127
|
-
# are processed. If the value contains internal references (IE,
|
128
|
-
# %blah;), those are replaced with their values. IE, if the doctype
|
129
|
-
# contains:
|
130
|
-
# <!ENTITY % foo "bar">
|
131
|
-
# <!ENTITY yada "nanoo %foo; nanoo>
|
132
|
-
# then:
|
133
|
-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
|
-
def value
|
135
|
-
@resolved_value ||= resolve_value
|
136
|
-
end
|
137
|
-
|
138
|
-
def parent=(other)
|
139
|
-
@resolved_value = nil
|
140
|
-
super
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
def resolve_value
|
145
|
-
return nil if @value.nil?
|
146
|
-
return @value unless @value.match?(PEREFERENCE_RE)
|
147
|
-
|
148
|
-
matches = @value.scan(PEREFERENCE_RE)
|
149
|
-
rv = @value.clone
|
150
|
-
if @parent
|
151
|
-
sum = 0
|
152
|
-
matches.each do |entity_reference|
|
153
|
-
entity_value = @parent.entity( entity_reference[0] )
|
154
|
-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
155
|
-
raise "entity expansion has grown too large"
|
156
|
-
else
|
157
|
-
sum += entity_value.bytesize
|
158
|
-
end
|
159
|
-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
160
|
-
end
|
161
|
-
end
|
162
|
-
rv
|
163
|
-
end
|
164
122
|
end
|
165
123
|
|
166
124
|
# This is a set of entity constants -- the ones defined in the XML
|
@@ -1,12 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
10
27
|
if StringScanner::Version < "3.0.8"
|
11
28
|
module StringScannerCaptures
|
12
29
|
refine StringScanner do
|
@@ -124,19 +141,11 @@ module REXML
|
|
124
141
|
}
|
125
142
|
|
126
143
|
module Private
|
127
|
-
|
128
|
-
INSTRUCTION_TERM = "?>"
|
129
|
-
COMMENT_TERM = "-->"
|
130
|
-
CDATA_TERM = "]]>"
|
131
|
-
DOCTYPE_TERM = "]>"
|
132
|
-
# Read to the end of DOCTYPE because there is no proper ENTITY termination
|
133
|
-
ENTITY_TERM = DOCTYPE_TERM
|
134
|
-
|
135
|
-
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
136
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
137
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
138
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
139
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
140
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
141
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
142
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
@@ -154,6 +163,7 @@ module REXML
|
|
154
163
|
self.stream = source
|
155
164
|
@listeners = []
|
156
165
|
@prefixes = Set.new
|
166
|
+
@entity_expansion_count = 0
|
157
167
|
end
|
158
168
|
|
159
169
|
def add_listener( listener )
|
@@ -161,6 +171,7 @@ module REXML
|
|
161
171
|
end
|
162
172
|
|
163
173
|
attr_reader :source
|
174
|
+
attr_reader :entity_expansion_count
|
164
175
|
|
165
176
|
def stream=( source )
|
166
177
|
@source = SourceFactory.create_from( source )
|
@@ -170,7 +181,8 @@ module REXML
|
|
170
181
|
@tags = []
|
171
182
|
@stack = []
|
172
183
|
@entities = []
|
173
|
-
@
|
184
|
+
@namespaces = {}
|
185
|
+
@namespaces_restore_stack = []
|
174
186
|
end
|
175
187
|
|
176
188
|
def position
|
@@ -238,6 +250,10 @@ module REXML
|
|
238
250
|
if @document_status == :in_doctype
|
239
251
|
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
240
252
|
end
|
253
|
+
unless @tags.empty?
|
254
|
+
path = "/" + @tags.join("/")
|
255
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
256
|
+
end
|
241
257
|
return [ :end_document ]
|
242
258
|
end
|
243
259
|
return @stack.shift if @stack.size > 0
|
@@ -248,10 +264,10 @@ module REXML
|
|
248
264
|
if @document_status == nil
|
249
265
|
start_position = @source.position
|
250
266
|
if @source.match("<?", true)
|
251
|
-
return process_instruction
|
267
|
+
return process_instruction
|
252
268
|
elsif @source.match("<!", true)
|
253
269
|
if @source.match("--", true)
|
254
|
-
md = @source.match(/(.*?)-->/um, true
|
270
|
+
md = @source.match(/(.*?)-->/um, true)
|
255
271
|
if md.nil?
|
256
272
|
raise REXML::ParseException.new("Unclosed comment", @source)
|
257
273
|
end
|
@@ -270,7 +286,6 @@ module REXML
|
|
270
286
|
@source.position = start_position
|
271
287
|
raise REXML::ParseException.new(message, @source)
|
272
288
|
end
|
273
|
-
@nsstack.unshift(Set.new)
|
274
289
|
name = parse_name(base_error_message)
|
275
290
|
if @source.match(/\s*\[/um, true)
|
276
291
|
id = [nil, nil, nil]
|
@@ -318,7 +333,11 @@ module REXML
|
|
318
333
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
319
334
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
320
335
|
elsif @source.match("ENTITY", true)
|
321
|
-
|
336
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
337
|
+
unless match_data
|
338
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
339
|
+
end
|
340
|
+
match = [:entitydecl, *match_data.captures.compact]
|
322
341
|
ref = false
|
323
342
|
if match[1] == '%'
|
324
343
|
ref = true
|
@@ -336,6 +355,8 @@ module REXML
|
|
336
355
|
match[4] = match[4][1..-2] # HREF
|
337
356
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
338
357
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
358
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
359
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
339
360
|
else
|
340
361
|
match[2] = match[2][1..-2]
|
341
362
|
match.pop if match.size == 4
|
@@ -358,7 +379,7 @@ module REXML
|
|
358
379
|
val = attdef[4] if val == "#FIXED "
|
359
380
|
pairs[attdef[0]] = val
|
360
381
|
if attdef[0] =~ /^xmlns:(.*)/
|
361
|
-
@
|
382
|
+
@namespaces[$1] = val
|
362
383
|
end
|
363
384
|
end
|
364
385
|
end
|
@@ -383,14 +404,14 @@ module REXML
|
|
383
404
|
raise REXML::ParseException.new(message, @source)
|
384
405
|
end
|
385
406
|
return [:notationdecl, name, *id]
|
386
|
-
elsif md = @source.match(/--(.*?)-->/um, true
|
407
|
+
elsif md = @source.match(/--(.*?)-->/um, true)
|
387
408
|
case md[1]
|
388
409
|
when /--/, /-\z/
|
389
410
|
raise REXML::ParseException.new("Malformed comment", @source)
|
390
411
|
end
|
391
412
|
return [ :comment, md[1] ] if md
|
392
413
|
end
|
393
|
-
elsif match = @source.match(/(%.*?;)\s*/um, true
|
414
|
+
elsif match = @source.match(/(%.*?;)\s*/um, true)
|
394
415
|
return [ :externalentity, match[1] ]
|
395
416
|
elsif @source.match(/\]\s*>/um, true)
|
396
417
|
@document_status = :after_doctype
|
@@ -411,7 +432,7 @@ module REXML
|
|
411
432
|
# here explicitly.
|
412
433
|
@source.ensure_buffer
|
413
434
|
if @source.match("/", true)
|
414
|
-
@
|
435
|
+
@namespaces_restore_stack.pop
|
415
436
|
last_tag = @tags.pop
|
416
437
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
417
438
|
if md and !last_tag
|
@@ -430,7 +451,7 @@ module REXML
|
|
430
451
|
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
|
431
452
|
raise REXML::ParseException.new("Malformed node", @source) unless md
|
432
453
|
if md[0][0] == ?-
|
433
|
-
md = @source.match(/--(.*?)-->/um, true
|
454
|
+
md = @source.match(/--(.*?)-->/um, true)
|
434
455
|
|
435
456
|
if md.nil? || /--|-\z/.match?(md[1])
|
436
457
|
raise REXML::ParseException.new("Malformed comment", @source)
|
@@ -438,13 +459,13 @@ module REXML
|
|
438
459
|
|
439
460
|
return [ :comment, md[1] ]
|
440
461
|
else
|
441
|
-
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true
|
462
|
+
md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
|
442
463
|
return [ :cdata, md[1] ] if md
|
443
464
|
end
|
444
465
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
445
466
|
"in the doctype declaration.", @source)
|
446
467
|
elsif @source.match("?", true)
|
447
|
-
return process_instruction
|
468
|
+
return process_instruction
|
448
469
|
else
|
449
470
|
# Get the next tag
|
450
471
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -456,18 +477,18 @@ module REXML
|
|
456
477
|
@document_status = :in_element
|
457
478
|
@prefixes.clear
|
458
479
|
@prefixes << md[2] if md[2]
|
459
|
-
|
460
|
-
attributes, closed = parse_attributes(@prefixes
|
480
|
+
push_namespaces_restore
|
481
|
+
attributes, closed = parse_attributes(@prefixes)
|
461
482
|
# Verify that all of the prefixes have been defined
|
462
483
|
for prefix in @prefixes
|
463
|
-
unless @
|
484
|
+
unless @namespaces.key?(prefix)
|
464
485
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
465
486
|
end
|
466
487
|
end
|
467
488
|
|
468
489
|
if closed
|
469
490
|
@closed = tag
|
470
|
-
|
491
|
+
pop_namespaces_restore
|
471
492
|
else
|
472
493
|
if @tags.empty? and @have_root
|
473
494
|
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
@@ -482,11 +503,15 @@ module REXML
|
|
482
503
|
if text.chomp!("<")
|
483
504
|
@source.position -= "<".bytesize
|
484
505
|
end
|
485
|
-
if @tags.empty?
|
506
|
+
if @tags.empty?
|
486
507
|
unless /\A\s*\z/.match?(text)
|
487
|
-
|
508
|
+
if @have_root
|
509
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
510
|
+
else
|
511
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
512
|
+
end
|
488
513
|
end
|
489
|
-
return pull_event
|
514
|
+
return pull_event if @have_root
|
490
515
|
end
|
491
516
|
return [ :text, text ]
|
492
517
|
end
|
@@ -503,13 +528,13 @@ module REXML
|
|
503
528
|
private :pull_event
|
504
529
|
|
505
530
|
def entity( reference, entities )
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
unnormalize( value, entities )
|
531
|
+
return unless entities
|
532
|
+
|
533
|
+
value = entities[ reference ]
|
534
|
+
return if value.nil?
|
535
|
+
|
536
|
+
record_entity_expansion
|
537
|
+
unnormalize( value, entities )
|
513
538
|
end
|
514
539
|
|
515
540
|
# Escapes all possible entities
|
@@ -543,17 +568,29 @@ module REXML
|
|
543
568
|
[Integer(m)].pack('U*')
|
544
569
|
}
|
545
570
|
matches.collect!{|x|x[0]}.compact!
|
571
|
+
if filter
|
572
|
+
matches.reject! do |entity_reference|
|
573
|
+
filter.include?(entity_reference)
|
574
|
+
end
|
575
|
+
end
|
546
576
|
if matches.size > 0
|
547
|
-
matches.each do |entity_reference|
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
577
|
+
matches.tally.each do |entity_reference, n|
|
578
|
+
entity_expansion_count_before = @entity_expansion_count
|
579
|
+
entity_value = entity( entity_reference, entities )
|
580
|
+
if entity_value
|
581
|
+
if n > 1
|
582
|
+
entity_expansion_count_delta =
|
583
|
+
@entity_expansion_count - entity_expansion_count_before
|
584
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
585
|
+
end
|
586
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
587
|
+
rv.gsub!( re, entity_value )
|
588
|
+
if rv.bytesize > Security.entity_expansion_text_limit
|
589
|
+
raise "entity expansion has grown too large"
|
556
590
|
end
|
591
|
+
else
|
592
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
593
|
+
rv.gsub!( er[0], er[2] ) if er
|
557
594
|
end
|
558
595
|
end
|
559
596
|
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
@@ -562,6 +599,39 @@ module REXML
|
|
562
599
|
end
|
563
600
|
|
564
601
|
private
|
602
|
+
def add_namespace(prefix, uri)
|
603
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
604
|
+
if uri.nil?
|
605
|
+
@namespaces.delete(prefix)
|
606
|
+
else
|
607
|
+
@namespaces[prefix] = uri
|
608
|
+
end
|
609
|
+
end
|
610
|
+
|
611
|
+
def push_namespaces_restore
|
612
|
+
namespaces_restore = {}
|
613
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
614
|
+
namespaces_restore
|
615
|
+
end
|
616
|
+
|
617
|
+
def pop_namespaces_restore
|
618
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
619
|
+
namespaces_restore.each do |prefix, uri|
|
620
|
+
if uri.nil?
|
621
|
+
@namespaces.delete(prefix)
|
622
|
+
else
|
623
|
+
@namespaces[prefix] = uri
|
624
|
+
end
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def record_entity_expansion(delta=1)
|
629
|
+
@entity_expansion_count += delta
|
630
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
631
|
+
raise "number of entity expansions exceeded, processing aborted."
|
632
|
+
end
|
633
|
+
end
|
634
|
+
|
565
635
|
def need_source_encoding_update?(xml_declaration_encoding)
|
566
636
|
return false if xml_declaration_encoding.nil?
|
567
637
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -571,14 +641,14 @@ module REXML
|
|
571
641
|
def parse_name(base_error_message)
|
572
642
|
md = @source.match(Private::NAME_PATTERN, true)
|
573
643
|
unless md
|
574
|
-
if @source.match(/\
|
644
|
+
if @source.match(/\S/um)
|
575
645
|
message = "#{base_error_message}: invalid name"
|
576
646
|
else
|
577
647
|
message = "#{base_error_message}: name is missing"
|
578
648
|
end
|
579
649
|
raise REXML::ParseException.new(message, @source)
|
580
650
|
end
|
581
|
-
md[
|
651
|
+
md[0]
|
582
652
|
end
|
583
653
|
|
584
654
|
def parse_id(base_error_message,
|
@@ -647,18 +717,24 @@ module REXML
|
|
647
717
|
end
|
648
718
|
end
|
649
719
|
|
650
|
-
def process_instruction
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
720
|
+
def process_instruction
|
721
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
722
|
+
if @source.match(/\s+/um, true)
|
723
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
724
|
+
unless match_data
|
725
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
726
|
+
end
|
727
|
+
content = match_data[1]
|
728
|
+
else
|
729
|
+
content = nil
|
730
|
+
unless @source.match("?>", true)
|
731
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
732
|
+
end
|
656
733
|
end
|
657
|
-
if
|
734
|
+
if name == "xml"
|
658
735
|
if @document_status
|
659
736
|
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
660
737
|
end
|
661
|
-
content = match_data[2]
|
662
738
|
version = VERSION.match(content)
|
663
739
|
version = version[1] unless version.nil?
|
664
740
|
encoding = ENCODING.match(content)
|
@@ -673,11 +749,12 @@ module REXML
|
|
673
749
|
standalone = standalone[1] unless standalone.nil?
|
674
750
|
return [ :xmldecl, version, encoding, standalone ]
|
675
751
|
end
|
676
|
-
[:processing_instruction,
|
752
|
+
[:processing_instruction, name, content]
|
677
753
|
end
|
678
754
|
|
679
|
-
def parse_attributes(prefixes
|
755
|
+
def parse_attributes(prefixes)
|
680
756
|
attributes = {}
|
757
|
+
expanded_names = {}
|
681
758
|
closed = false
|
682
759
|
while true
|
683
760
|
if @source.match(">", true)
|
@@ -719,7 +796,7 @@ module REXML
|
|
719
796
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
720
797
|
raise REXML::ParseException.new( msg, @source, self)
|
721
798
|
end
|
722
|
-
|
799
|
+
add_namespace(local_part, value)
|
723
800
|
elsif prefix
|
724
801
|
prefixes << prefix unless prefix == "xml"
|
725
802
|
end
|
@@ -729,6 +806,20 @@ module REXML
|
|
729
806
|
raise REXML::ParseException.new(msg, @source, self)
|
730
807
|
end
|
731
808
|
|
809
|
+
unless prefix == "xmlns"
|
810
|
+
uri = @namespaces[prefix]
|
811
|
+
expanded_name = [uri, local_part]
|
812
|
+
existing_prefix = expanded_names[expanded_name]
|
813
|
+
if existing_prefix
|
814
|
+
message = "Namespace conflict in adding attribute " +
|
815
|
+
"\"#{local_part}\": " +
|
816
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
817
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
818
|
+
raise REXML::ParseException.new(message, @source, self)
|
819
|
+
end
|
820
|
+
expanded_names[expanded_name] = prefix
|
821
|
+
end
|
822
|
+
|
732
823
|
attributes[name] = value
|
733
824
|
else
|
734
825
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -7,36 +7,33 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
17
21
|
def parse
|
18
22
|
# entity string
|
19
23
|
while true
|
20
24
|
event = @parser.pull
|
21
25
|
case event[0]
|
22
26
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
27
|
return
|
29
28
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
29
|
attrs = event[2].each do |n, v|
|
32
30
|
event[2][n] = @parser.unnormalize( v )
|
33
31
|
end
|
34
32
|
@listener.tag_start( event[1], attrs )
|
35
33
|
when :end_element
|
36
34
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
35
|
when :text
|
39
|
-
unnormalized = @parser.unnormalize( event[1] )
|
36
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
40
37
|
@listener.text( unnormalized )
|
41
38
|
when :processing_instruction
|
42
39
|
@listener.instruction( *event[1,2] )
|
@@ -48,6 +45,7 @@ module REXML
|
|
48
45
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
46
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
47
|
when :entitydecl, :notationdecl
|
48
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
49
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
50
|
when :externalentity
|
53
51
|
entity_reference = event[1]
|
@@ -15,7 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
18
|
entities = nil
|
20
19
|
begin
|
21
20
|
while true
|
@@ -23,19 +22,13 @@ module REXML
|
|
23
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
24
23
|
case event[0]
|
25
24
|
when :end_document
|
26
|
-
unless tag_stack.empty?
|
27
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
28
|
-
@parser.source, @parser)
|
29
|
-
end
|
30
25
|
return
|
31
26
|
when :start_element
|
32
|
-
tag_stack.push(event[1])
|
33
27
|
el = @build_context = @build_context.add_element( event[1] )
|
34
28
|
event[2].each do |key, value|
|
35
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
36
30
|
end
|
37
31
|
when :end_element
|
38
|
-
tag_stack.pop
|
39
32
|
@build_context = @build_context.parent
|
40
33
|
when :text
|
41
34
|
if @build_context[-1].instance_of? Text
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -117,7 +117,7 @@ module REXML
|
|
117
117
|
def ensure_buffer
|
118
118
|
end
|
119
119
|
|
120
|
-
def match(pattern, cons=false
|
120
|
+
def match(pattern, cons=false)
|
121
121
|
if cons
|
122
122
|
@scanner.scan(pattern).nil? ? nil : @scanner
|
123
123
|
else
|
@@ -204,10 +204,20 @@ module REXML
|
|
204
204
|
end
|
205
205
|
end
|
206
206
|
|
207
|
-
def read(term = nil)
|
207
|
+
def read(term = nil, min_bytes = 1)
|
208
208
|
term = encode(term) if term
|
209
209
|
begin
|
210
|
-
|
210
|
+
str = readline(term)
|
211
|
+
@scanner << str
|
212
|
+
read_bytes = str.bytesize
|
213
|
+
begin
|
214
|
+
while read_bytes < min_bytes
|
215
|
+
str = readline(term)
|
216
|
+
@scanner << str
|
217
|
+
read_bytes += str.bytesize
|
218
|
+
end
|
219
|
+
rescue IOError
|
220
|
+
end
|
211
221
|
true
|
212
222
|
rescue Exception, NameError
|
213
223
|
@source = nil
|
@@ -237,10 +247,9 @@ module REXML
|
|
237
247
|
read if @scanner.eos? && @source
|
238
248
|
end
|
239
249
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
def match( pattern, cons=false, term: nil )
|
250
|
+
def match( pattern, cons=false )
|
251
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
252
|
+
min_bytes = 1
|
244
253
|
while true
|
245
254
|
if cons
|
246
255
|
md = @scanner.scan(pattern)
|
@@ -250,7 +259,8 @@ module REXML
|
|
250
259
|
break if md
|
251
260
|
return nil if pattern.is_a?(String)
|
252
261
|
return nil if @source.nil?
|
253
|
-
return nil unless read(
|
262
|
+
return nil unless read(nil, min_bytes)
|
263
|
+
min_bytes *= 2
|
254
264
|
end
|
255
265
|
|
256
266
|
md.nil? ? nil : @scanner
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-08-22 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
118
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
|
120
120
|
rdoc_options:
|
121
121
|
- "--main"
|
122
122
|
- README.md
|