rexml 3.3.1 → 3.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +155 -0
- data/lib/rexml/element.rb +14 -16
- data/lib/rexml/entity.rb +5 -47
- data/lib/rexml/formatters/pretty.rb +1 -1
- data/lib/rexml/parsers/baseparser.rb +161 -44
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +6 -19
- data/lib/rexml/parsers/streamparser.rb +8 -10
- data/lib/rexml/parsers/treeparser.rb +0 -7
- data/lib/rexml/rexml.rb +1 -1
- data/lib/rexml/source.rb +16 -6
- data/lib/rexml/text.rb +34 -14
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b79c22060286dad847e18d30b4b336bda21d2772ccb35413fb9ba51a0012ed2
|
4
|
+
data.tar.gz: feb56a4a3071541e983acd33b8baa6b9052f8d67d871102cfe6e69773a0cfcfe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b615c95f8624212e151443ad03ba9b64f39aee8a200ea212150a10116340157cfda1bf974ab3d03161c0fb37d866e8c1c69ccc6a9549a13398452b32166af2d8
|
7
|
+
data.tar.gz: db7dcac658e1f51f30575c24d6f36dc256349331fa1951c8fdfaf214baf97a5a446a1fcc411358a76d2c6fc36388ec8b1178adeacc3225d16d5d95ac53a8c4b3
|
data/NEWS.md
CHANGED
@@ -1,5 +1,160 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.3.6 - 2024-08-22 {#version-3-3-6}
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Removed duplicated entity expansions for performance.
|
8
|
+
* GH-194
|
9
|
+
* Patch by Viktor Ivarsson.
|
10
|
+
|
11
|
+
* Improved namespace conflicted attribute check performance. It was
|
12
|
+
too slow for deep elements.
|
13
|
+
* Reported by l33thaxor.
|
14
|
+
|
15
|
+
### Fixes
|
16
|
+
|
17
|
+
* Fixed a bug that default entity expansions are counted for
|
18
|
+
security check. Default entity expansions should not be counted
|
19
|
+
because they don't have a security risk.
|
20
|
+
* GH-198
|
21
|
+
* GH-199
|
22
|
+
* Patch Viktor Ivarsson
|
23
|
+
|
24
|
+
* Fixed a parser bug that parameter entity references in internal
|
25
|
+
subsets are expanded. It's not allowed in the XML specification.
|
26
|
+
* GH-191
|
27
|
+
* Patch by NAITOH Jun.
|
28
|
+
|
29
|
+
* Fixed a stream parser bug that user-defined entity references in
|
30
|
+
text aren't expanded.
|
31
|
+
* GH-200
|
32
|
+
* Patch by NAITOH Jun.
|
33
|
+
|
34
|
+
### Thanks
|
35
|
+
|
36
|
+
* Viktor Ivarsson
|
37
|
+
|
38
|
+
* NAITOH Jun
|
39
|
+
|
40
|
+
* l33thaxor
|
41
|
+
|
42
|
+
## 3.3.5 - 2024-08-12 {#version-3-3-5}
|
43
|
+
|
44
|
+
### Fixes
|
45
|
+
|
46
|
+
* Fixed a bug that `REXML::Security.entity_expansion_text_limit`
|
47
|
+
check has wrong text size calculation in SAX and pull parsers.
|
48
|
+
* GH-193
|
49
|
+
* GH-195
|
50
|
+
* Reported by Viktor Ivarsson.
|
51
|
+
* Patch by NAITOH Jun.
|
52
|
+
|
53
|
+
### Thanks
|
54
|
+
|
55
|
+
* Viktor Ivarsson
|
56
|
+
|
57
|
+
* NAITOH Jun
|
58
|
+
|
59
|
+
## 3.3.4 - 2024-08-01 {#version-3-3-4}
|
60
|
+
|
61
|
+
### Fixes
|
62
|
+
|
63
|
+
* Fixed a bug that `REXML::Security` isn't defined when
|
64
|
+
`REXML::Parsers::StreamParser` is used and
|
65
|
+
`rexml/parsers/streamparser` is only required.
|
66
|
+
* GH-189
|
67
|
+
* Patch by takuya kodama.
|
68
|
+
|
69
|
+
### Thanks
|
70
|
+
|
71
|
+
* takuya kodama
|
72
|
+
|
73
|
+
## 3.3.3 - 2024-08-01 {#version-3-3-3}
|
74
|
+
|
75
|
+
### Improvements
|
76
|
+
|
77
|
+
* Added support for detecting invalid XML that has unsupported
|
78
|
+
content before root element
|
79
|
+
* GH-184
|
80
|
+
* Patch by NAITOH Jun.
|
81
|
+
|
82
|
+
* Added support for `REXML::Security.entity_expansion_limit=` and
|
83
|
+
`REXML::Security.entity_expansion_text_limit=` in SAX2 and pull
|
84
|
+
parsers
|
85
|
+
* GH-187
|
86
|
+
* Patch by NAITOH Jun.
|
87
|
+
|
88
|
+
* Added more tests for invalid XMLs.
|
89
|
+
* GH-183
|
90
|
+
* Patch by Watson.
|
91
|
+
|
92
|
+
* Added more performance tests.
|
93
|
+
* Patch by Watson.
|
94
|
+
|
95
|
+
* Improved parse performance.
|
96
|
+
* GH-186
|
97
|
+
* Patch by tomoya ishida.
|
98
|
+
|
99
|
+
### Thanks
|
100
|
+
|
101
|
+
* NAITOH Jun
|
102
|
+
|
103
|
+
* Watson
|
104
|
+
|
105
|
+
* tomoya ishida
|
106
|
+
|
107
|
+
## 3.3.2 - 2024-07-16 {#version-3-3-2}
|
108
|
+
|
109
|
+
### Improvements
|
110
|
+
|
111
|
+
* Improved parse performance.
|
112
|
+
* GH-160
|
113
|
+
* Patch by NAITOH Jun.
|
114
|
+
|
115
|
+
* Improved parse performance.
|
116
|
+
* GH-169
|
117
|
+
* GH-170
|
118
|
+
* GH-171
|
119
|
+
* GH-172
|
120
|
+
* GH-173
|
121
|
+
* GH-174
|
122
|
+
* GH-175
|
123
|
+
* GH-176
|
124
|
+
* GH-177
|
125
|
+
* Patch by Watson.
|
126
|
+
|
127
|
+
* Added support for raising a parse exception when an XML has extra
|
128
|
+
content after the root element.
|
129
|
+
* GH-161
|
130
|
+
* Patch by NAITOH Jun.
|
131
|
+
|
132
|
+
* Added support for raising a parse exception when an XML
|
133
|
+
declaration exists in wrong position.
|
134
|
+
* GH-162
|
135
|
+
* Patch by NAITOH Jun.
|
136
|
+
|
137
|
+
* Removed needless a space after XML declaration in pretty print mode.
|
138
|
+
* GH-164
|
139
|
+
* Patch by NAITOH Jun.
|
140
|
+
|
141
|
+
* Stopped to emit `:text` event after the root element.
|
142
|
+
* GH-167
|
143
|
+
* Patch by NAITOH Jun.
|
144
|
+
|
145
|
+
### Fixes
|
146
|
+
|
147
|
+
* Fixed a bug that SAX2 parser doesn't expand predefined entities for
|
148
|
+
`characters` callback.
|
149
|
+
* GH-168
|
150
|
+
* Patch by NAITOH Jun.
|
151
|
+
|
152
|
+
### Thanks
|
153
|
+
|
154
|
+
* NAITOH Jun
|
155
|
+
|
156
|
+
* Watson
|
157
|
+
|
3
158
|
## 3.3.1 - 2024-06-25 {#version-3-3-1}
|
4
159
|
|
5
160
|
### Improvements
|
data/lib/rexml/element.rb
CHANGED
@@ -441,9 +441,14 @@ module REXML
|
|
441
441
|
# Related: #root_node, #document.
|
442
442
|
#
|
443
443
|
def root
|
444
|
-
|
445
|
-
|
446
|
-
|
444
|
+
target = self
|
445
|
+
while target
|
446
|
+
return target.elements[1] if target.kind_of? Document
|
447
|
+
parent = target.parent
|
448
|
+
return target if parent.kind_of? Document or parent.nil?
|
449
|
+
target = parent
|
450
|
+
end
|
451
|
+
nil
|
447
452
|
end
|
448
453
|
|
449
454
|
# :call-seq:
|
@@ -619,8 +624,12 @@ module REXML
|
|
619
624
|
else
|
620
625
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
621
626
|
end
|
622
|
-
ns =
|
623
|
-
|
627
|
+
ns = nil
|
628
|
+
target = self
|
629
|
+
while ns.nil? and target
|
630
|
+
ns = target.attributes[prefix]
|
631
|
+
target = target.parent
|
632
|
+
end
|
624
633
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
625
634
|
return ns
|
626
635
|
end
|
@@ -2375,17 +2384,6 @@ module REXML
|
|
2375
2384
|
elsif old_attr.kind_of? Hash
|
2376
2385
|
old_attr[value.prefix] = value
|
2377
2386
|
elsif old_attr.prefix != value.prefix
|
2378
|
-
# Check for conflicting namespaces
|
2379
|
-
if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
2380
|
-
old_namespace = old_attr.namespace
|
2381
|
-
new_namespace = value.namespace
|
2382
|
-
if old_namespace == new_namespace
|
2383
|
-
raise ParseException.new(
|
2384
|
-
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
2385
|
-
"Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
2386
|
-
"prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
2387
|
-
end
|
2388
|
-
end
|
2389
2387
|
store value.name, {old_attr.prefix => old_attr,
|
2390
2388
|
value.prefix => value}
|
2391
2389
|
else
|
data/lib/rexml/entity.rb
CHANGED
@@ -12,6 +12,7 @@ module REXML
|
|
12
12
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
13
13
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
14
14
|
PEREFERENCE = "%#{NAME};"
|
15
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
15
16
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
16
17
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
17
18
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
@@ -19,7 +20,7 @@ module REXML
|
|
19
20
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
20
21
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
21
22
|
|
22
|
-
attr_reader :name, :external, :ref, :ndata, :pubid
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
|
23
24
|
|
24
25
|
# Create a new entity. Simple entities can be constructed by passing a
|
25
26
|
# name, value to the constructor; this creates a generic, plain entity
|
@@ -68,14 +69,11 @@ module REXML
|
|
68
69
|
end
|
69
70
|
|
70
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
71
|
-
#
|
72
|
-
# +value()+ in that +value+ only replaces %ent; entities.
|
72
|
+
# &ent; entities.
|
73
73
|
def unnormalized
|
74
74
|
document.record_entity_expansion unless document.nil?
|
75
|
-
|
76
|
-
|
77
|
-
@unnormalized = Text::unnormalize(v, parent)
|
78
|
-
@unnormalized
|
75
|
+
return nil if @value.nil?
|
76
|
+
@unnormalized = Text::unnormalize(@value, parent)
|
79
77
|
end
|
80
78
|
|
81
79
|
#once :unnormalized
|
@@ -121,46 +119,6 @@ module REXML
|
|
121
119
|
write rv
|
122
120
|
rv
|
123
121
|
end
|
124
|
-
|
125
|
-
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
126
|
-
# Returns the value of this entity. At the moment, only internal entities
|
127
|
-
# are processed. If the value contains internal references (IE,
|
128
|
-
# %blah;), those are replaced with their values. IE, if the doctype
|
129
|
-
# contains:
|
130
|
-
# <!ENTITY % foo "bar">
|
131
|
-
# <!ENTITY yada "nanoo %foo; nanoo>
|
132
|
-
# then:
|
133
|
-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
|
-
def value
|
135
|
-
@resolved_value ||= resolve_value
|
136
|
-
end
|
137
|
-
|
138
|
-
def parent=(other)
|
139
|
-
@resolved_value = nil
|
140
|
-
super
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
def resolve_value
|
145
|
-
return nil if @value.nil?
|
146
|
-
return @value unless @value.match?(PEREFERENCE_RE)
|
147
|
-
|
148
|
-
matches = @value.scan(PEREFERENCE_RE)
|
149
|
-
rv = @value.clone
|
150
|
-
if @parent
|
151
|
-
sum = 0
|
152
|
-
matches.each do |entity_reference|
|
153
|
-
entity_value = @parent.entity( entity_reference[0] )
|
154
|
-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
155
|
-
raise "entity expansion has grown too large"
|
156
|
-
else
|
157
|
-
sum += entity_value.bytesize
|
158
|
-
end
|
159
|
-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
160
|
-
end
|
161
|
-
end
|
162
|
-
rv
|
163
|
-
end
|
164
122
|
end
|
165
123
|
|
166
124
|
# This is a set of entity constants -- the ones defined in the XML
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
@@ -1,12 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require_relative '../parseexception'
|
3
3
|
require_relative '../undefinednamespaceexception'
|
4
|
+
require_relative '../security'
|
4
5
|
require_relative '../source'
|
5
6
|
require 'set'
|
6
7
|
require "strscan"
|
7
8
|
|
8
9
|
module REXML
|
9
10
|
module Parsers
|
11
|
+
unless [].respond_to?(:tally)
|
12
|
+
module EnumerableTally
|
13
|
+
refine Enumerable do
|
14
|
+
def tally
|
15
|
+
counts = {}
|
16
|
+
each do |item|
|
17
|
+
counts[item] ||= 0
|
18
|
+
counts[item] += 1
|
19
|
+
end
|
20
|
+
counts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
using EnumerableTally
|
25
|
+
end
|
26
|
+
|
10
27
|
if StringScanner::Version < "3.0.8"
|
11
28
|
module StringScannerCaptures
|
12
29
|
refine StringScanner do
|
@@ -124,11 +141,11 @@ module REXML
|
|
124
141
|
}
|
125
142
|
|
126
143
|
module Private
|
127
|
-
|
144
|
+
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
|
128
145
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
129
146
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
130
147
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
131
|
-
NAME_PATTERN =
|
148
|
+
NAME_PATTERN = /#{NAME}/um
|
132
149
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
133
150
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
134
151
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
@@ -146,6 +163,7 @@ module REXML
|
|
146
163
|
self.stream = source
|
147
164
|
@listeners = []
|
148
165
|
@prefixes = Set.new
|
166
|
+
@entity_expansion_count = 0
|
149
167
|
end
|
150
168
|
|
151
169
|
def add_listener( listener )
|
@@ -153,15 +171,18 @@ module REXML
|
|
153
171
|
end
|
154
172
|
|
155
173
|
attr_reader :source
|
174
|
+
attr_reader :entity_expansion_count
|
156
175
|
|
157
176
|
def stream=( source )
|
158
177
|
@source = SourceFactory.create_from( source )
|
159
178
|
@closed = nil
|
179
|
+
@have_root = false
|
160
180
|
@document_status = nil
|
161
181
|
@tags = []
|
162
182
|
@stack = []
|
163
183
|
@entities = []
|
164
|
-
@
|
184
|
+
@namespaces = {}
|
185
|
+
@namespaces_restore_stack = []
|
165
186
|
end
|
166
187
|
|
167
188
|
def position
|
@@ -229,6 +250,10 @@ module REXML
|
|
229
250
|
if @document_status == :in_doctype
|
230
251
|
raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
|
231
252
|
end
|
253
|
+
unless @tags.empty?
|
254
|
+
path = "/" + @tags.join("/")
|
255
|
+
raise ParseException.new("Missing end tag for '#{path}'", @source)
|
256
|
+
end
|
232
257
|
return [ :end_document ]
|
233
258
|
end
|
234
259
|
return @stack.shift if @stack.size > 0
|
@@ -239,7 +264,7 @@ module REXML
|
|
239
264
|
if @document_status == nil
|
240
265
|
start_position = @source.position
|
241
266
|
if @source.match("<?", true)
|
242
|
-
return process_instruction
|
267
|
+
return process_instruction
|
243
268
|
elsif @source.match("<!", true)
|
244
269
|
if @source.match("--", true)
|
245
270
|
md = @source.match(/(.*?)-->/um, true)
|
@@ -261,7 +286,6 @@ module REXML
|
|
261
286
|
@source.position = start_position
|
262
287
|
raise REXML::ParseException.new(message, @source)
|
263
288
|
end
|
264
|
-
@nsstack.unshift(Set.new)
|
265
289
|
name = parse_name(base_error_message)
|
266
290
|
if @source.match(/\s*\[/um, true)
|
267
291
|
id = [nil, nil, nil]
|
@@ -309,7 +333,11 @@ module REXML
|
|
309
333
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
310
334
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
311
335
|
elsif @source.match("ENTITY", true)
|
312
|
-
|
336
|
+
match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
337
|
+
unless match_data
|
338
|
+
raise REXML::ParseException.new("Malformed entity declaration", @source)
|
339
|
+
end
|
340
|
+
match = [:entitydecl, *match_data.captures.compact]
|
313
341
|
ref = false
|
314
342
|
if match[1] == '%'
|
315
343
|
ref = true
|
@@ -327,6 +355,8 @@ module REXML
|
|
327
355
|
match[4] = match[4][1..-2] # HREF
|
328
356
|
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
|
329
357
|
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
|
358
|
+
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
|
359
|
+
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
|
330
360
|
else
|
331
361
|
match[2] = match[2][1..-2]
|
332
362
|
match.pop if match.size == 4
|
@@ -341,7 +371,7 @@ module REXML
|
|
341
371
|
contents = md[0]
|
342
372
|
|
343
373
|
pairs = {}
|
344
|
-
values = md[0].scan( ATTDEF_RE )
|
374
|
+
values = md[0].strip.scan( ATTDEF_RE )
|
345
375
|
values.each do |attdef|
|
346
376
|
unless attdef[3] == "#IMPLIED"
|
347
377
|
attdef.compact!
|
@@ -349,7 +379,7 @@ module REXML
|
|
349
379
|
val = attdef[4] if val == "#FIXED "
|
350
380
|
pairs[attdef[0]] = val
|
351
381
|
if attdef[0] =~ /^xmlns:(.*)/
|
352
|
-
@
|
382
|
+
@namespaces[$1] = val
|
353
383
|
end
|
354
384
|
end
|
355
385
|
end
|
@@ -402,7 +432,7 @@ module REXML
|
|
402
432
|
# here explicitly.
|
403
433
|
@source.ensure_buffer
|
404
434
|
if @source.match("/", true)
|
405
|
-
@
|
435
|
+
@namespaces_restore_stack.pop
|
406
436
|
last_tag = @tags.pop
|
407
437
|
md = @source.match(Private::CLOSE_PATTERN, true)
|
408
438
|
if md and !last_tag
|
@@ -435,7 +465,7 @@ module REXML
|
|
435
465
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
436
466
|
"in the doctype declaration.", @source)
|
437
467
|
elsif @source.match("?", true)
|
438
|
-
return process_instruction
|
468
|
+
return process_instruction
|
439
469
|
else
|
440
470
|
# Get the next tag
|
441
471
|
md = @source.match(Private::TAG_PATTERN, true)
|
@@ -447,21 +477,25 @@ module REXML
|
|
447
477
|
@document_status = :in_element
|
448
478
|
@prefixes.clear
|
449
479
|
@prefixes << md[2] if md[2]
|
450
|
-
|
451
|
-
attributes, closed = parse_attributes(@prefixes
|
480
|
+
push_namespaces_restore
|
481
|
+
attributes, closed = parse_attributes(@prefixes)
|
452
482
|
# Verify that all of the prefixes have been defined
|
453
483
|
for prefix in @prefixes
|
454
|
-
unless @
|
484
|
+
unless @namespaces.key?(prefix)
|
455
485
|
raise UndefinedNamespaceException.new(prefix,@source,self)
|
456
486
|
end
|
457
487
|
end
|
458
488
|
|
459
489
|
if closed
|
460
490
|
@closed = tag
|
461
|
-
|
491
|
+
pop_namespaces_restore
|
462
492
|
else
|
493
|
+
if @tags.empty? and @have_root
|
494
|
+
raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
495
|
+
end
|
463
496
|
@tags.push( tag )
|
464
497
|
end
|
498
|
+
@have_root = true
|
465
499
|
return [ :start_element, tag, attributes ]
|
466
500
|
end
|
467
501
|
else
|
@@ -469,6 +503,16 @@ module REXML
|
|
469
503
|
if text.chomp!("<")
|
470
504
|
@source.position -= "<".bytesize
|
471
505
|
end
|
506
|
+
if @tags.empty?
|
507
|
+
unless /\A\s*\z/.match?(text)
|
508
|
+
if @have_root
|
509
|
+
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
510
|
+
else
|
511
|
+
raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
512
|
+
end
|
513
|
+
end
|
514
|
+
return pull_event if @have_root
|
515
|
+
end
|
472
516
|
return [ :text, text ]
|
473
517
|
end
|
474
518
|
rescue REXML::UndefinedNamespaceException
|
@@ -484,13 +528,13 @@ module REXML
|
|
484
528
|
private :pull_event
|
485
529
|
|
486
530
|
def entity( reference, entities )
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
unnormalize( value, entities )
|
531
|
+
return unless entities
|
532
|
+
|
533
|
+
value = entities[ reference ]
|
534
|
+
return if value.nil?
|
535
|
+
|
536
|
+
record_entity_expansion
|
537
|
+
unnormalize( value, entities )
|
494
538
|
end
|
495
539
|
|
496
540
|
# Escapes all possible entities
|
@@ -511,7 +555,11 @@ module REXML
|
|
511
555
|
|
512
556
|
# Unescapes all possible entities
|
513
557
|
def unnormalize( string, entities=nil, filter=nil )
|
514
|
-
|
558
|
+
if string.include?("\r")
|
559
|
+
rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
560
|
+
else
|
561
|
+
rv = string.dup
|
562
|
+
end
|
515
563
|
matches = rv.scan( REFERENCE_RE )
|
516
564
|
return rv if matches.size == 0
|
517
565
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
@@ -520,17 +568,29 @@ module REXML
|
|
520
568
|
[Integer(m)].pack('U*')
|
521
569
|
}
|
522
570
|
matches.collect!{|x|x[0]}.compact!
|
571
|
+
if filter
|
572
|
+
matches.reject! do |entity_reference|
|
573
|
+
filter.include?(entity_reference)
|
574
|
+
end
|
575
|
+
end
|
523
576
|
if matches.size > 0
|
524
|
-
matches.each do |entity_reference|
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
rv.gsub!( er[0], er[2] ) if er
|
577
|
+
matches.tally.each do |entity_reference, n|
|
578
|
+
entity_expansion_count_before = @entity_expansion_count
|
579
|
+
entity_value = entity( entity_reference, entities )
|
580
|
+
if entity_value
|
581
|
+
if n > 1
|
582
|
+
entity_expansion_count_delta =
|
583
|
+
@entity_expansion_count - entity_expansion_count_before
|
584
|
+
record_entity_expansion(entity_expansion_count_delta * (n - 1))
|
533
585
|
end
|
586
|
+
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
|
587
|
+
rv.gsub!( re, entity_value )
|
588
|
+
if rv.bytesize > Security.entity_expansion_text_limit
|
589
|
+
raise "entity expansion has grown too large"
|
590
|
+
end
|
591
|
+
else
|
592
|
+
er = DEFAULT_ENTITIES[entity_reference]
|
593
|
+
rv.gsub!( er[0], er[2] ) if er
|
534
594
|
end
|
535
595
|
end
|
536
596
|
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
|
@@ -539,6 +599,39 @@ module REXML
|
|
539
599
|
end
|
540
600
|
|
541
601
|
private
|
602
|
+
def add_namespace(prefix, uri)
|
603
|
+
@namespaces_restore_stack.last[prefix] = @namespaces[prefix]
|
604
|
+
if uri.nil?
|
605
|
+
@namespaces.delete(prefix)
|
606
|
+
else
|
607
|
+
@namespaces[prefix] = uri
|
608
|
+
end
|
609
|
+
end
|
610
|
+
|
611
|
+
def push_namespaces_restore
|
612
|
+
namespaces_restore = {}
|
613
|
+
@namespaces_restore_stack.push(namespaces_restore)
|
614
|
+
namespaces_restore
|
615
|
+
end
|
616
|
+
|
617
|
+
def pop_namespaces_restore
|
618
|
+
namespaces_restore = @namespaces_restore_stack.pop
|
619
|
+
namespaces_restore.each do |prefix, uri|
|
620
|
+
if uri.nil?
|
621
|
+
@namespaces.delete(prefix)
|
622
|
+
else
|
623
|
+
@namespaces[prefix] = uri
|
624
|
+
end
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def record_entity_expansion(delta=1)
|
629
|
+
@entity_expansion_count += delta
|
630
|
+
if @entity_expansion_count > Security.entity_expansion_limit
|
631
|
+
raise "number of entity expansions exceeded, processing aborted."
|
632
|
+
end
|
633
|
+
end
|
634
|
+
|
542
635
|
def need_source_encoding_update?(xml_declaration_encoding)
|
543
636
|
return false if xml_declaration_encoding.nil?
|
544
637
|
return false if /\AUTF-16\z/i =~ xml_declaration_encoding
|
@@ -548,14 +641,14 @@ module REXML
|
|
548
641
|
def parse_name(base_error_message)
|
549
642
|
md = @source.match(Private::NAME_PATTERN, true)
|
550
643
|
unless md
|
551
|
-
if @source.match(/\
|
644
|
+
if @source.match(/\S/um)
|
552
645
|
message = "#{base_error_message}: invalid name"
|
553
646
|
else
|
554
647
|
message = "#{base_error_message}: name is missing"
|
555
648
|
end
|
556
649
|
raise REXML::ParseException.new(message, @source)
|
557
650
|
end
|
558
|
-
md[
|
651
|
+
md[0]
|
559
652
|
end
|
560
653
|
|
561
654
|
def parse_id(base_error_message,
|
@@ -624,15 +717,24 @@ module REXML
|
|
624
717
|
end
|
625
718
|
end
|
626
719
|
|
627
|
-
def process_instruction
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
720
|
+
def process_instruction
|
721
|
+
name = parse_name("Malformed XML: Invalid processing instruction node")
|
722
|
+
if @source.match(/\s+/um, true)
|
723
|
+
match_data = @source.match(/(.*?)\?>/um, true)
|
724
|
+
unless match_data
|
725
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
726
|
+
end
|
727
|
+
content = match_data[1]
|
728
|
+
else
|
729
|
+
content = nil
|
730
|
+
unless @source.match("?>", true)
|
731
|
+
raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
732
|
+
end
|
633
733
|
end
|
634
|
-
if
|
635
|
-
|
734
|
+
if name == "xml"
|
735
|
+
if @document_status
|
736
|
+
raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
737
|
+
end
|
636
738
|
version = VERSION.match(content)
|
637
739
|
version = version[1] unless version.nil?
|
638
740
|
encoding = ENCODING.match(content)
|
@@ -647,11 +749,12 @@ module REXML
|
|
647
749
|
standalone = standalone[1] unless standalone.nil?
|
648
750
|
return [ :xmldecl, version, encoding, standalone ]
|
649
751
|
end
|
650
|
-
[:processing_instruction,
|
752
|
+
[:processing_instruction, name, content]
|
651
753
|
end
|
652
754
|
|
653
|
-
def parse_attributes(prefixes
|
755
|
+
def parse_attributes(prefixes)
|
654
756
|
attributes = {}
|
757
|
+
expanded_names = {}
|
655
758
|
closed = false
|
656
759
|
while true
|
657
760
|
if @source.match(">", true)
|
@@ -693,7 +796,7 @@ module REXML
|
|
693
796
|
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
|
694
797
|
raise REXML::ParseException.new( msg, @source, self)
|
695
798
|
end
|
696
|
-
|
799
|
+
add_namespace(local_part, value)
|
697
800
|
elsif prefix
|
698
801
|
prefixes << prefix unless prefix == "xml"
|
699
802
|
end
|
@@ -703,6 +806,20 @@ module REXML
|
|
703
806
|
raise REXML::ParseException.new(msg, @source, self)
|
704
807
|
end
|
705
808
|
|
809
|
+
unless prefix == "xmlns"
|
810
|
+
uri = @namespaces[prefix]
|
811
|
+
expanded_name = [uri, local_part]
|
812
|
+
existing_prefix = expanded_names[expanded_name]
|
813
|
+
if existing_prefix
|
814
|
+
message = "Namespace conflict in adding attribute " +
|
815
|
+
"\"#{local_part}\": " +
|
816
|
+
"Prefix \"#{existing_prefix}\" = \"#{uri}\" and " +
|
817
|
+
"prefix \"#{prefix}\" = \"#{uri}\""
|
818
|
+
raise REXML::ParseException.new(message, @source, self)
|
819
|
+
end
|
820
|
+
expanded_names[expanded_name] = prefix
|
821
|
+
end
|
822
|
+
|
706
823
|
attributes[name] = value
|
707
824
|
else
|
708
825
|
message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>"
|
@@ -22,6 +22,10 @@ module REXML
|
|
22
22
|
@parser.source
|
23
23
|
end
|
24
24
|
|
25
|
+
def entity_expansion_count
|
26
|
+
@parser.entity_expansion_count
|
27
|
+
end
|
28
|
+
|
25
29
|
def add_listener( listener )
|
26
30
|
@parser.add_listener( listener )
|
27
31
|
end
|
@@ -157,25 +161,8 @@ module REXML
|
|
157
161
|
end
|
158
162
|
end
|
159
163
|
when :text
|
160
|
-
|
161
|
-
|
162
|
-
copy = event[1].clone
|
163
|
-
|
164
|
-
esub = proc { |match|
|
165
|
-
if @entities.has_key?($1)
|
166
|
-
@entities[$1].gsub(Text::REFERENCE, &esub)
|
167
|
-
else
|
168
|
-
match
|
169
|
-
end
|
170
|
-
}
|
171
|
-
|
172
|
-
copy.gsub!( Text::REFERENCE, &esub )
|
173
|
-
copy.gsub!( Text::NUMERICENTITY ) {|m|
|
174
|
-
m=$1
|
175
|
-
m = "0#{m}" if m[0] == ?x
|
176
|
-
[Integer(m)].pack('U*')
|
177
|
-
}
|
178
|
-
handle( :characters, copy )
|
164
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
165
|
+
handle( :characters, unnormalized )
|
179
166
|
when :entitydecl
|
180
167
|
handle_entitydecl( event )
|
181
168
|
when :processing_instruction, :comment, :attlistdecl,
|
@@ -7,37 +7,34 @@ module REXML
|
|
7
7
|
def initialize source, listener
|
8
8
|
@listener = listener
|
9
9
|
@parser = BaseParser.new( source )
|
10
|
-
@
|
10
|
+
@entities = {}
|
11
11
|
end
|
12
12
|
|
13
13
|
def add_listener( listener )
|
14
14
|
@parser.add_listener( listener )
|
15
15
|
end
|
16
16
|
|
17
|
+
def entity_expansion_count
|
18
|
+
@parser.entity_expansion_count
|
19
|
+
end
|
20
|
+
|
17
21
|
def parse
|
18
22
|
# entity string
|
19
23
|
while true
|
20
24
|
event = @parser.pull
|
21
25
|
case event[0]
|
22
26
|
when :end_document
|
23
|
-
unless @tag_stack.empty?
|
24
|
-
tag_path = "/" + @tag_stack.join("/")
|
25
|
-
raise ParseException.new("Missing end tag for '#{tag_path}'",
|
26
|
-
@parser.source)
|
27
|
-
end
|
28
27
|
return
|
29
28
|
when :start_element
|
30
|
-
@tag_stack << event[1]
|
31
29
|
attrs = event[2].each do |n, v|
|
32
30
|
event[2][n] = @parser.unnormalize( v )
|
33
31
|
end
|
34
32
|
@listener.tag_start( event[1], attrs )
|
35
33
|
when :end_element
|
36
34
|
@listener.tag_end( event[1] )
|
37
|
-
@tag_stack.pop
|
38
35
|
when :text
|
39
|
-
|
40
|
-
@listener.text(
|
36
|
+
unnormalized = @parser.unnormalize( event[1], @entities )
|
37
|
+
@listener.text( unnormalized )
|
41
38
|
when :processing_instruction
|
42
39
|
@listener.instruction( *event[1,2] )
|
43
40
|
when :start_doctype
|
@@ -48,6 +45,7 @@ module REXML
|
|
48
45
|
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
|
49
46
|
@listener.send( event[0].to_s, *event[1..-1] )
|
50
47
|
when :entitydecl, :notationdecl
|
48
|
+
@entities[ event[1] ] = event[2] if event.size == 3
|
51
49
|
@listener.send( event[0].to_s, event[1..-1] )
|
52
50
|
when :externalentity
|
53
51
|
entity_reference = event[1]
|
@@ -15,7 +15,6 @@ module REXML
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def parse
|
18
|
-
tag_stack = []
|
19
18
|
entities = nil
|
20
19
|
begin
|
21
20
|
while true
|
@@ -23,19 +22,13 @@ module REXML
|
|
23
22
|
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
|
24
23
|
case event[0]
|
25
24
|
when :end_document
|
26
|
-
unless tag_stack.empty?
|
27
|
-
raise ParseException.new("No close tag for #{@build_context.xpath}",
|
28
|
-
@parser.source, @parser)
|
29
|
-
end
|
30
25
|
return
|
31
26
|
when :start_element
|
32
|
-
tag_stack.push(event[1])
|
33
27
|
el = @build_context = @build_context.add_element( event[1] )
|
34
28
|
event[2].each do |key, value|
|
35
29
|
el.attributes[key]=Attribute.new(key,value,self)
|
36
30
|
end
|
37
31
|
when :end_element
|
38
|
-
tag_stack.pop
|
39
32
|
@build_context = @build_context.parent
|
40
33
|
when :text
|
41
34
|
if @build_context[-1].instance_of? Text
|
data/lib/rexml/rexml.rb
CHANGED
data/lib/rexml/source.rb
CHANGED
@@ -204,10 +204,20 @@ module REXML
|
|
204
204
|
end
|
205
205
|
end
|
206
206
|
|
207
|
-
def read(term = nil)
|
207
|
+
def read(term = nil, min_bytes = 1)
|
208
208
|
term = encode(term) if term
|
209
209
|
begin
|
210
|
-
|
210
|
+
str = readline(term)
|
211
|
+
@scanner << str
|
212
|
+
read_bytes = str.bytesize
|
213
|
+
begin
|
214
|
+
while read_bytes < min_bytes
|
215
|
+
str = readline(term)
|
216
|
+
@scanner << str
|
217
|
+
read_bytes += str.bytesize
|
218
|
+
end
|
219
|
+
rescue IOError
|
220
|
+
end
|
211
221
|
true
|
212
222
|
rescue Exception, NameError
|
213
223
|
@source = nil
|
@@ -237,10 +247,9 @@ module REXML
|
|
237
247
|
read if @scanner.eos? && @source
|
238
248
|
end
|
239
249
|
|
240
|
-
# Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
241
|
-
# - ">"
|
242
|
-
# - "XXX>" (X is any string excluding '>')
|
243
250
|
def match( pattern, cons=false )
|
251
|
+
# To avoid performance issue, we need to increase bytes to read per scan
|
252
|
+
min_bytes = 1
|
244
253
|
while true
|
245
254
|
if cons
|
246
255
|
md = @scanner.scan(pattern)
|
@@ -250,7 +259,8 @@ module REXML
|
|
250
259
|
break if md
|
251
260
|
return nil if pattern.is_a?(String)
|
252
261
|
return nil if @source.nil?
|
253
|
-
return nil unless read
|
262
|
+
return nil unless read(nil, min_bytes)
|
263
|
+
min_bytes *= 2
|
254
264
|
end
|
255
265
|
|
256
266
|
md.nil? ? nil : @scanner
|
data/lib/rexml/text.rb
CHANGED
@@ -151,25 +151,45 @@ module REXML
|
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
154
|
-
|
155
|
-
string.
|
156
|
-
if
|
157
|
-
raise "Illegal character #{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
154
|
+
pos = 0
|
155
|
+
while (index = string.index(/<|&/, pos))
|
156
|
+
if string[index] == "<"
|
157
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
158
|
+
end
|
159
|
+
|
160
|
+
unless (end_index = string.index(/[^\s];/, index + 1))
|
161
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
|
+
end
|
163
|
+
|
164
|
+
value = string[(index + 1)..end_index]
|
165
|
+
if /\s/.match?(value)
|
166
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
167
|
+
end
|
168
|
+
|
169
|
+
if value[0] == "#"
|
170
|
+
character_reference = value[1..-1]
|
171
|
+
|
172
|
+
unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
173
|
+
if character_reference[0] == "x" || character_reference[-1] == "x"
|
174
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
162
175
|
else
|
163
|
-
raise "Illegal character #{
|
176
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
164
177
|
end
|
165
|
-
# FIXME: below can't work but this needs API change.
|
166
|
-
# elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
167
|
-
# if !doctype or !doctype.entities.has_key?($3)
|
168
|
-
# raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
169
|
-
# end
|
170
178
|
end
|
179
|
+
|
180
|
+
case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
181
|
+
when *VALID_CHAR
|
182
|
+
else
|
183
|
+
raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
184
|
+
end
|
185
|
+
elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
186
|
+
raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
171
187
|
end
|
188
|
+
|
189
|
+
pos = end_index + 1
|
172
190
|
end
|
191
|
+
|
192
|
+
string
|
173
193
|
end
|
174
194
|
|
175
195
|
def node_type
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.3.
|
4
|
+
version: 3.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date: 2024-
|
10
|
+
date: 2024-08-22 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: strscan
|
@@ -116,7 +116,7 @@ homepage: https://github.com/ruby/rexml
|
|
116
116
|
licenses:
|
117
117
|
- BSD-2-Clause
|
118
118
|
metadata:
|
119
|
-
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.
|
119
|
+
changelog_uri: https://github.com/ruby/rexml/releases/tag/v3.3.6
|
120
120
|
rdoc_options:
|
121
121
|
- "--main"
|
122
122
|
- README.md
|