rexml 3.2.5 → 3.3.8
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/NEWS.md +449 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +17 -11
- data/lib/rexml/document.rb +6 -2
- data/lib/rexml/element.rb +19 -34
- data/lib/rexml/entity.rb +9 -38
- data/lib/rexml/formatters/pretty.rb +3 -3
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +426 -263
- data/lib/rexml/parsers/pullparser.rb +12 -0
- data/lib/rexml/parsers/sax2parser.rb +16 -19
- data/lib/rexml/parsers/streamparser.rb +16 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +128 -98
- data/lib/rexml/text.rb +45 -21
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +10 -52
data/lib/rexml/attribute.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative "namespace"
|
3
3
|
require_relative 'text'
|
4
4
|
|
@@ -13,9 +13,6 @@ module REXML
|
|
13
13
|
|
14
14
|
# The element to which this attribute belongs
|
15
15
|
attr_reader :element
|
16
|
-
# The normalized value of this attribute. That is, the attribute with
|
17
|
-
# entities intact.
|
18
|
-
attr_writer :normalized
|
19
16
|
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
20
17
|
|
21
18
|
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
@@ -122,10 +119,13 @@ module REXML
|
|
122
119
|
# b = Attribute.new( "ns:x", "y" )
|
123
120
|
# b.to_string # -> "ns:x='y'"
|
124
121
|
def to_string
|
122
|
+
value = to_s
|
125
123
|
if @element and @element.context and @element.context[:attribute_quote] == :quote
|
126
|
-
|
124
|
+
value = value.gsub('"', '"') if value.include?('"')
|
125
|
+
%Q^#@expanded_name="#{value}"^
|
127
126
|
else
|
128
|
-
|
127
|
+
value = value.gsub("'", ''') if value.include?("'")
|
128
|
+
"#@expanded_name='#{value}'"
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
@@ -141,7 +141,6 @@ module REXML
|
|
141
141
|
return @normalized if @normalized
|
142
142
|
|
143
143
|
@normalized = Text::normalize( @unnormalized, doctype )
|
144
|
-
@unnormalized = nil
|
145
144
|
@normalized
|
146
145
|
end
|
147
146
|
|
@@ -149,9 +148,16 @@ module REXML
|
|
149
148
|
# have been expanded to their values
|
150
149
|
def value
|
151
150
|
return @unnormalized if @unnormalized
|
152
|
-
|
153
|
-
@
|
154
|
-
|
151
|
+
|
152
|
+
@unnormalized = Text::unnormalize(@normalized, doctype,
|
153
|
+
entity_expansion_text_limit: @element&.document&.entity_expansion_text_limit)
|
154
|
+
end
|
155
|
+
|
156
|
+
# The normalized value of this attribute. That is, the attribute with
|
157
|
+
# entities intact.
|
158
|
+
def normalized=(new_normalized)
|
159
|
+
@normalized = new_normalized
|
160
|
+
@unnormalized = nil
|
155
161
|
end
|
156
162
|
|
157
163
|
# Returns a copy of this attribute
|
@@ -190,7 +196,7 @@ module REXML
|
|
190
196
|
end
|
191
197
|
|
192
198
|
def inspect
|
193
|
-
rv = ""
|
199
|
+
rv = +""
|
194
200
|
write( rv )
|
195
201
|
rv
|
196
202
|
end
|
data/lib/rexml/document.rb
CHANGED
@@ -69,7 +69,7 @@ module REXML
|
|
69
69
|
# d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
|
70
70
|
#
|
71
71
|
# When argument +document+ is given, it must be an existing
|
72
|
-
# document object, whose context and attributes (but not
|
72
|
+
# document object, whose context and attributes (but not children)
|
73
73
|
# are cloned into the new document:
|
74
74
|
#
|
75
75
|
# d = REXML::Document.new(xml_string)
|
@@ -91,6 +91,8 @@ module REXML
|
|
91
91
|
#
|
92
92
|
def initialize( source = nil, context = {} )
|
93
93
|
@entity_expansion_count = 0
|
94
|
+
@entity_expansion_limit = Security.entity_expansion_limit
|
95
|
+
@entity_expansion_text_limit = Security.entity_expansion_text_limit
|
94
96
|
super()
|
95
97
|
@context = context
|
96
98
|
return if source.nil?
|
@@ -431,10 +433,12 @@ module REXML
|
|
431
433
|
end
|
432
434
|
|
433
435
|
attr_reader :entity_expansion_count
|
436
|
+
attr_writer :entity_expansion_limit
|
437
|
+
attr_accessor :entity_expansion_text_limit
|
434
438
|
|
435
439
|
def record_entity_expansion
|
436
440
|
@entity_expansion_count += 1
|
437
|
-
if @entity_expansion_count >
|
441
|
+
if @entity_expansion_count > @entity_expansion_limit
|
438
442
|
raise "number of entity expansions exceeded, processing aborted."
|
439
443
|
end
|
440
444
|
end
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -449,9 +441,14 @@ module REXML
|
|
449
441
|
# Related: #root_node, #document.
|
450
442
|
#
|
451
443
|
def root
|
452
|
-
|
453
|
-
|
454
|
-
|
444
|
+
target = self
|
445
|
+
while target
|
446
|
+
return target.elements[1] if target.kind_of? Document
|
447
|
+
parent = target.parent
|
448
|
+
return target if parent.kind_of? Document or parent.nil?
|
449
|
+
target = parent
|
450
|
+
end
|
451
|
+
nil
|
455
452
|
end
|
456
453
|
|
457
454
|
# :call-seq:
|
@@ -627,8 +624,12 @@ module REXML
|
|
627
624
|
else
|
628
625
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
629
626
|
end
|
630
|
-
ns =
|
631
|
-
|
627
|
+
ns = nil
|
628
|
+
target = self
|
629
|
+
while ns.nil? and target
|
630
|
+
ns = target.attributes[prefix]
|
631
|
+
target = target.parent
|
632
|
+
end
|
632
633
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
633
634
|
return ns
|
634
635
|
end
|
@@ -989,7 +990,7 @@ module REXML
|
|
989
990
|
# :call-seq:
|
990
991
|
# has_text? -> true or false
|
991
992
|
#
|
992
|
-
# Returns +true if the element has one or more text noded,
|
993
|
+
# Returns +true+ if the element has one or more text noded,
|
993
994
|
# +false+ otherwise:
|
994
995
|
#
|
995
996
|
# d = REXML::Document.new '<a><b/>text<c/></a>'
|
@@ -1006,7 +1007,7 @@ module REXML
|
|
1006
1007
|
# text(xpath = nil) -> text_string or nil
|
1007
1008
|
#
|
1008
1009
|
# Returns the text string from the first text node child
|
1009
|
-
# in a specified element, if it exists,
|
1010
|
+
# in a specified element, if it exists, +nil+ otherwise.
|
1010
1011
|
#
|
1011
1012
|
# With no argument, returns the text from the first text node in +self+:
|
1012
1013
|
#
|
@@ -1014,7 +1015,7 @@ module REXML
|
|
1014
1015
|
# d.root.text.class # => String
|
1015
1016
|
# d.root.text # => "some text "
|
1016
1017
|
#
|
1017
|
-
# With argument +xpath+, returns text from the
|
1018
|
+
# With argument +xpath+, returns text from the first text node
|
1018
1019
|
# in the element that matches +xpath+:
|
1019
1020
|
#
|
1020
1021
|
# d.root.text(1) # => "this is bold!"
|
@@ -1284,16 +1285,11 @@ module REXML
|
|
1284
1285
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1286
|
#
|
1286
1287
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1288
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1289
|
prefix = nil if prefix == 'xmlns'
|
1294
1290
|
|
1295
1291
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1292
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1293
|
|
1298
1294
|
return ret_val unless ret_val.nil?
|
1299
1295
|
return nil if prefix.nil?
|
@@ -2388,17 +2384,6 @@ module REXML
|
|
2388
2384
|
elsif old_attr.kind_of? Hash
|
2389
2385
|
old_attr[value.prefix] = value
|
2390
2386
|
elsif old_attr.prefix != value.prefix
|
2391
|
-
# Check for conflicting namespaces
|
2392
|
-
if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
2393
|
-
old_namespace = old_attr.namespace
|
2394
|
-
new_namespace = value.namespace
|
2395
|
-
if old_namespace == new_namespace
|
2396
|
-
raise ParseException.new(
|
2397
|
-
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
2398
|
-
"Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
2399
|
-
"prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
2400
|
-
end
|
2401
|
-
end
|
2402
2387
|
store value.name, {old_attr.prefix => old_attr,
|
2403
2388
|
value.prefix => value}
|
2404
2389
|
else
|
data/lib/rexml/entity.rb
CHANGED
@@ -12,6 +12,7 @@ module REXML
|
|
12
12
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
13
13
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
14
14
|
PEREFERENCE = "%#{NAME};"
|
15
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
15
16
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
16
17
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
17
18
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
@@ -19,7 +20,7 @@ module REXML
|
|
19
20
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
20
21
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
21
22
|
|
22
|
-
attr_reader :name, :external, :ref, :ndata, :pubid
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
|
23
24
|
|
24
25
|
# Create a new entity. Simple entities can be constructed by passing a
|
25
26
|
# name, value to the constructor; this creates a generic, plain entity
|
@@ -68,14 +69,14 @@ module REXML
|
|
68
69
|
end
|
69
70
|
|
70
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
71
|
-
#
|
72
|
-
# +value()+ in that +value+ only replaces %ent; entities.
|
72
|
+
# &ent; entities.
|
73
73
|
def unnormalized
|
74
|
-
document
|
75
|
-
|
76
|
-
return nil if
|
77
|
-
|
78
|
-
@unnormalized
|
74
|
+
document&.record_entity_expansion
|
75
|
+
|
76
|
+
return nil if @value.nil?
|
77
|
+
|
78
|
+
@unnormalized = Text::unnormalize(@value, parent,
|
79
|
+
entity_expansion_text_limit: document&.entity_expansion_text_limit)
|
79
80
|
end
|
80
81
|
|
81
82
|
#once :unnormalized
|
@@ -121,36 +122,6 @@ module REXML
|
|
121
122
|
write rv
|
122
123
|
rv
|
123
124
|
end
|
124
|
-
|
125
|
-
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
126
|
-
# Returns the value of this entity. At the moment, only internal entities
|
127
|
-
# are processed. If the value contains internal references (IE,
|
128
|
-
# %blah;), those are replaced with their values. IE, if the doctype
|
129
|
-
# contains:
|
130
|
-
# <!ENTITY % foo "bar">
|
131
|
-
# <!ENTITY yada "nanoo %foo; nanoo>
|
132
|
-
# then:
|
133
|
-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
|
-
def value
|
135
|
-
if @value
|
136
|
-
matches = @value.scan(PEREFERENCE_RE)
|
137
|
-
rv = @value.clone
|
138
|
-
if @parent
|
139
|
-
sum = 0
|
140
|
-
matches.each do |entity_reference|
|
141
|
-
entity_value = @parent.entity( entity_reference[0] )
|
142
|
-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
143
|
-
raise "entity expansion has grown too large"
|
144
|
-
else
|
145
|
-
sum += entity_value.bytesize
|
146
|
-
end
|
147
|
-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
148
|
-
end
|
149
|
-
end
|
150
|
-
return rv
|
151
|
-
end
|
152
|
-
nil
|
153
|
-
end
|
154
125
|
end
|
155
126
|
|
156
127
|
# This is a set of entity constants -- the ones defined in the XML
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'default'
|
3
3
|
|
4
4
|
module REXML
|
@@ -58,7 +58,7 @@ module REXML
|
|
58
58
|
skip = false
|
59
59
|
if compact
|
60
60
|
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
61
|
-
string = ""
|
61
|
+
string = +""
|
62
62
|
old_level = @level
|
63
63
|
@level = 0
|
64
64
|
node.children.each { |child| write( child, string ) }
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
data/lib/rexml/functions.rb
CHANGED
@@ -262,11 +262,10 @@ module REXML
|
|
262
262
|
string(string).length
|
263
263
|
end
|
264
264
|
|
265
|
-
# UNTESTED
|
266
265
|
def Functions::normalize_space( string=nil )
|
267
266
|
string = string(@@context[:node]) if string.nil?
|
268
267
|
if string.kind_of? Array
|
269
|
-
string.collect{|x|
|
268
|
+
string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
|
270
269
|
else
|
271
270
|
string.to_s.strip.gsub(/\s+/um, ' ')
|
272
271
|
end
|
data/lib/rexml/namespace.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'xmltokens'
|
4
4
|
|
@@ -10,13 +10,17 @@ module REXML
|
|
10
10
|
# The expanded name of the object, valid if name is set
|
11
11
|
attr_accessor :prefix
|
12
12
|
include XMLTokens
|
13
|
+
NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
|
13
14
|
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
14
15
|
|
15
16
|
# Sets the name and the expanded name
|
16
17
|
def name=( name )
|
17
18
|
@expanded_name = name
|
18
|
-
|
19
|
-
|
19
|
+
if name.match?(NAME_WITHOUT_NAMESPACE)
|
20
|
+
@prefix = ""
|
21
|
+
@namespace = ""
|
22
|
+
@name = name
|
23
|
+
elsif name =~ NAMESPLIT
|
20
24
|
if $1
|
21
25
|
@prefix = $1
|
22
26
|
else
|
@@ -24,7 +28,7 @@ module REXML
|
|
24
28
|
@namespace = ""
|
25
29
|
end
|
26
30
|
@name = $2
|
27
|
-
|
31
|
+
elsif name == ""
|
28
32
|
@prefix = nil
|
29
33
|
@namespace = nil
|
30
34
|
@name = nil
|
data/lib/rexml/node.rb
CHANGED
@@ -52,10 +52,14 @@ module REXML
|
|
52
52
|
|
53
53
|
# Visit all subnodes of +self+ recursively
|
54
54
|
def each_recursive(&block) # :yields: node
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
stack = []
|
56
|
+
each { |child| stack.unshift child if child.node_type == :element }
|
57
|
+
until stack.empty?
|
58
|
+
child = stack.pop
|
59
|
+
yield child
|
60
|
+
n = stack.size
|
61
|
+
child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Find (and return) first subnode (recursively) for which the block
|
data/lib/rexml/parseexception.rb
CHANGED