rexml 3.2.5 → 3.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +406 -2
- data/README.md +10 -1
- data/doc/rexml/tasks/rdoc/element.rdoc +2 -2
- data/doc/rexml/tutorial.rdoc +1358 -0
- data/lib/rexml/attribute.rb +14 -9
- data/lib/rexml/document.rb +1 -1
- data/lib/rexml/element.rb +19 -34
- data/lib/rexml/entity.rb +5 -37
- data/lib/rexml/formatters/pretty.rb +3 -3
- data/lib/rexml/functions.rb +1 -2
- data/lib/rexml/namespace.rb +8 -4
- data/lib/rexml/node.rb +8 -4
- data/lib/rexml/parseexception.rb +1 -0
- data/lib/rexml/parsers/baseparser.rb +421 -263
- data/lib/rexml/parsers/pullparser.rb +4 -0
- data/lib/rexml/parsers/sax2parser.rb +6 -19
- data/lib/rexml/parsers/streamparser.rb +8 -10
- data/lib/rexml/parsers/treeparser.rb +9 -21
- data/lib/rexml/parsers/xpathparser.rb +136 -86
- data/lib/rexml/rexml.rb +3 -1
- data/lib/rexml/source.rb +128 -98
- data/lib/rexml/text.rb +40 -18
- data/lib/rexml/xpath_parser.rb +7 -3
- metadata +11 -39
data/lib/rexml/attribute.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative "namespace"
|
3
3
|
require_relative 'text'
|
4
4
|
|
@@ -13,9 +13,6 @@ module REXML
|
|
13
13
|
|
14
14
|
# The element to which this attribute belongs
|
15
15
|
attr_reader :element
|
16
|
-
# The normalized value of this attribute. That is, the attribute with
|
17
|
-
# entities intact.
|
18
|
-
attr_writer :normalized
|
19
16
|
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
|
20
17
|
|
21
18
|
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
|
@@ -122,10 +119,13 @@ module REXML
|
|
122
119
|
# b = Attribute.new( "ns:x", "y" )
|
123
120
|
# b.to_string # -> "ns:x='y'"
|
124
121
|
def to_string
|
122
|
+
value = to_s
|
125
123
|
if @element and @element.context and @element.context[:attribute_quote] == :quote
|
126
|
-
|
124
|
+
value = value.gsub('"', '"') if value.include?('"')
|
125
|
+
%Q^#@expanded_name="#{value}"^
|
127
126
|
else
|
128
|
-
|
127
|
+
value = value.gsub("'", ''') if value.include?("'")
|
128
|
+
"#@expanded_name='#{value}'"
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
@@ -141,7 +141,6 @@ module REXML
|
|
141
141
|
return @normalized if @normalized
|
142
142
|
|
143
143
|
@normalized = Text::normalize( @unnormalized, doctype )
|
144
|
-
@unnormalized = nil
|
145
144
|
@normalized
|
146
145
|
end
|
147
146
|
|
@@ -150,10 +149,16 @@ module REXML
|
|
150
149
|
def value
|
151
150
|
return @unnormalized if @unnormalized
|
152
151
|
@unnormalized = Text::unnormalize( @normalized, doctype )
|
153
|
-
@normalized = nil
|
154
152
|
@unnormalized
|
155
153
|
end
|
156
154
|
|
155
|
+
# The normalized value of this attribute. That is, the attribute with
|
156
|
+
# entities intact.
|
157
|
+
def normalized=(new_normalized)
|
158
|
+
@normalized = new_normalized
|
159
|
+
@unnormalized = nil
|
160
|
+
end
|
161
|
+
|
157
162
|
# Returns a copy of this attribute
|
158
163
|
def clone
|
159
164
|
Attribute.new self
|
@@ -190,7 +195,7 @@ module REXML
|
|
190
195
|
end
|
191
196
|
|
192
197
|
def inspect
|
193
|
-
rv = ""
|
198
|
+
rv = +""
|
194
199
|
write( rv )
|
195
200
|
rv
|
196
201
|
end
|
data/lib/rexml/document.rb
CHANGED
@@ -69,7 +69,7 @@ module REXML
|
|
69
69
|
# d.to_s # => "<root><foo>Foo</foo><bar>Bar</bar></root>"
|
70
70
|
#
|
71
71
|
# When argument +document+ is given, it must be an existing
|
72
|
-
# document object, whose context and attributes (but not
|
72
|
+
# document object, whose context and attributes (but not children)
|
73
73
|
# are cloned into the new document:
|
74
74
|
#
|
75
75
|
# d = REXML::Document.new(xml_string)
|
data/lib/rexml/element.rb
CHANGED
@@ -7,14 +7,6 @@ require_relative "xpath"
|
|
7
7
|
require_relative "parseexception"
|
8
8
|
|
9
9
|
module REXML
|
10
|
-
# An implementation note about namespaces:
|
11
|
-
# As we parse, when we find namespaces we put them in a hash and assign
|
12
|
-
# them a unique ID. We then convert the namespace prefix for the node
|
13
|
-
# to the unique ID. This makes namespace lookup much faster for the
|
14
|
-
# cost of extra memory use. We save the namespace prefix for the
|
15
|
-
# context node and convert it back when we write it.
|
16
|
-
@@namespaces = {}
|
17
|
-
|
18
10
|
# An \REXML::Element object represents an XML element.
|
19
11
|
#
|
20
12
|
# An element:
|
@@ -449,9 +441,14 @@ module REXML
|
|
449
441
|
# Related: #root_node, #document.
|
450
442
|
#
|
451
443
|
def root
|
452
|
-
|
453
|
-
|
454
|
-
|
444
|
+
target = self
|
445
|
+
while target
|
446
|
+
return target.elements[1] if target.kind_of? Document
|
447
|
+
parent = target.parent
|
448
|
+
return target if parent.kind_of? Document or parent.nil?
|
449
|
+
target = parent
|
450
|
+
end
|
451
|
+
nil
|
455
452
|
end
|
456
453
|
|
457
454
|
# :call-seq:
|
@@ -627,8 +624,12 @@ module REXML
|
|
627
624
|
else
|
628
625
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
629
626
|
end
|
630
|
-
ns =
|
631
|
-
|
627
|
+
ns = nil
|
628
|
+
target = self
|
629
|
+
while ns.nil? and target
|
630
|
+
ns = target.attributes[prefix]
|
631
|
+
target = target.parent
|
632
|
+
end
|
632
633
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
633
634
|
return ns
|
634
635
|
end
|
@@ -989,7 +990,7 @@ module REXML
|
|
989
990
|
# :call-seq:
|
990
991
|
# has_text? -> true or false
|
991
992
|
#
|
992
|
-
# Returns +true if the element has one or more text noded,
|
993
|
+
# Returns +true+ if the element has one or more text noded,
|
993
994
|
# +false+ otherwise:
|
994
995
|
#
|
995
996
|
# d = REXML::Document.new '<a><b/>text<c/></a>'
|
@@ -1006,7 +1007,7 @@ module REXML
|
|
1006
1007
|
# text(xpath = nil) -> text_string or nil
|
1007
1008
|
#
|
1008
1009
|
# Returns the text string from the first text node child
|
1009
|
-
# in a specified element, if it exists,
|
1010
|
+
# in a specified element, if it exists, +nil+ otherwise.
|
1010
1011
|
#
|
1011
1012
|
# With no argument, returns the text from the first text node in +self+:
|
1012
1013
|
#
|
@@ -1014,7 +1015,7 @@ module REXML
|
|
1014
1015
|
# d.root.text.class # => String
|
1015
1016
|
# d.root.text # => "some text "
|
1016
1017
|
#
|
1017
|
-
# With argument +xpath+, returns text from the
|
1018
|
+
# With argument +xpath+, returns text from the first text node
|
1018
1019
|
# in the element that matches +xpath+:
|
1019
1020
|
#
|
1020
1021
|
# d.root.text(1) # => "this is bold!"
|
@@ -1284,16 +1285,11 @@ module REXML
|
|
1284
1285
|
# document.root.attribute("x", "a") # => a:x='a:x'
|
1285
1286
|
#
|
1286
1287
|
def attribute( name, namespace=nil )
|
1287
|
-
prefix =
|
1288
|
-
if namespaces.respond_to? :key
|
1289
|
-
prefix = namespaces.key(namespace) if namespace
|
1290
|
-
else
|
1291
|
-
prefix = namespaces.index(namespace) if namespace
|
1292
|
-
end
|
1288
|
+
prefix = namespaces.key(namespace) if namespace
|
1293
1289
|
prefix = nil if prefix == 'xmlns'
|
1294
1290
|
|
1295
1291
|
ret_val =
|
1296
|
-
attributes.get_attribute(
|
1292
|
+
attributes.get_attribute( prefix ? "#{prefix}:#{name}" : name )
|
1297
1293
|
|
1298
1294
|
return ret_val unless ret_val.nil?
|
1299
1295
|
return nil if prefix.nil?
|
@@ -2388,17 +2384,6 @@ module REXML
|
|
2388
2384
|
elsif old_attr.kind_of? Hash
|
2389
2385
|
old_attr[value.prefix] = value
|
2390
2386
|
elsif old_attr.prefix != value.prefix
|
2391
|
-
# Check for conflicting namespaces
|
2392
|
-
if value.prefix != "xmlns" and old_attr.prefix != "xmlns"
|
2393
|
-
old_namespace = old_attr.namespace
|
2394
|
-
new_namespace = value.namespace
|
2395
|
-
if old_namespace == new_namespace
|
2396
|
-
raise ParseException.new(
|
2397
|
-
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
2398
|
-
"Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+
|
2399
|
-
"prefix \"#{value.prefix}\" = \"#{new_namespace}\"")
|
2400
|
-
end
|
2401
|
-
end
|
2402
2387
|
store value.name, {old_attr.prefix => old_attr,
|
2403
2388
|
value.prefix => value}
|
2404
2389
|
else
|
data/lib/rexml/entity.rb
CHANGED
@@ -12,6 +12,7 @@ module REXML
|
|
12
12
|
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
|
13
13
|
NDATADECL = "\\s+NDATA\\s+#{NAME}"
|
14
14
|
PEREFERENCE = "%#{NAME};"
|
15
|
+
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
15
16
|
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
|
16
17
|
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
|
17
18
|
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
|
@@ -19,7 +20,7 @@ module REXML
|
|
19
20
|
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
20
21
|
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
|
21
22
|
|
22
|
-
attr_reader :name, :external, :ref, :ndata, :pubid
|
23
|
+
attr_reader :name, :external, :ref, :ndata, :pubid, :value
|
23
24
|
|
24
25
|
# Create a new entity. Simple entities can be constructed by passing a
|
25
26
|
# name, value to the constructor; this creates a generic, plain entity
|
@@ -68,14 +69,11 @@ module REXML
|
|
68
69
|
end
|
69
70
|
|
70
71
|
# Evaluates to the unnormalized value of this entity; that is, replacing
|
71
|
-
#
|
72
|
-
# +value()+ in that +value+ only replaces %ent; entities.
|
72
|
+
# &ent; entities.
|
73
73
|
def unnormalized
|
74
74
|
document.record_entity_expansion unless document.nil?
|
75
|
-
|
76
|
-
|
77
|
-
@unnormalized = Text::unnormalize(v, parent)
|
78
|
-
@unnormalized
|
75
|
+
return nil if @value.nil?
|
76
|
+
@unnormalized = Text::unnormalize(@value, parent)
|
79
77
|
end
|
80
78
|
|
81
79
|
#once :unnormalized
|
@@ -121,36 +119,6 @@ module REXML
|
|
121
119
|
write rv
|
122
120
|
rv
|
123
121
|
end
|
124
|
-
|
125
|
-
PEREFERENCE_RE = /#{PEREFERENCE}/um
|
126
|
-
# Returns the value of this entity. At the moment, only internal entities
|
127
|
-
# are processed. If the value contains internal references (IE,
|
128
|
-
# %blah;), those are replaced with their values. IE, if the doctype
|
129
|
-
# contains:
|
130
|
-
# <!ENTITY % foo "bar">
|
131
|
-
# <!ENTITY yada "nanoo %foo; nanoo>
|
132
|
-
# then:
|
133
|
-
# doctype.entity('yada').value #-> "nanoo bar nanoo"
|
134
|
-
def value
|
135
|
-
if @value
|
136
|
-
matches = @value.scan(PEREFERENCE_RE)
|
137
|
-
rv = @value.clone
|
138
|
-
if @parent
|
139
|
-
sum = 0
|
140
|
-
matches.each do |entity_reference|
|
141
|
-
entity_value = @parent.entity( entity_reference[0] )
|
142
|
-
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
|
143
|
-
raise "entity expansion has grown too large"
|
144
|
-
else
|
145
|
-
sum += entity_value.bytesize
|
146
|
-
end
|
147
|
-
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
|
148
|
-
end
|
149
|
-
end
|
150
|
-
return rv
|
151
|
-
end
|
152
|
-
nil
|
153
|
-
end
|
154
122
|
end
|
155
123
|
|
156
124
|
# This is a set of entity constants -- the ones defined in the XML
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
require_relative 'default'
|
3
3
|
|
4
4
|
module REXML
|
@@ -58,7 +58,7 @@ module REXML
|
|
58
58
|
skip = false
|
59
59
|
if compact
|
60
60
|
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
|
61
|
-
string = ""
|
61
|
+
string = +""
|
62
62
|
old_level = @level
|
63
63
|
@level = 0
|
64
64
|
node.children.each { |child| write( child, string ) }
|
@@ -111,7 +111,7 @@ module REXML
|
|
111
111
|
# itself, then we don't need a carriage return... which makes this
|
112
112
|
# logic more complex.
|
113
113
|
node.children.each { |child|
|
114
|
-
next if child
|
114
|
+
next if child.instance_of?(Text)
|
115
115
|
unless child == node.children[0] or child.instance_of?(Text) or
|
116
116
|
(child == node.children[1] and !node.children[0].writethis)
|
117
117
|
output << "\n"
|
data/lib/rexml/functions.rb
CHANGED
@@ -262,11 +262,10 @@ module REXML
|
|
262
262
|
string(string).length
|
263
263
|
end
|
264
264
|
|
265
|
-
# UNTESTED
|
266
265
|
def Functions::normalize_space( string=nil )
|
267
266
|
string = string(@@context[:node]) if string.nil?
|
268
267
|
if string.kind_of? Array
|
269
|
-
string.collect{|x|
|
268
|
+
string.collect{|x| x.to_s.strip.gsub(/\s+/um, ' ') if x}
|
270
269
|
else
|
271
270
|
string.to_s.strip.gsub(/\s+/um, ' ')
|
272
271
|
end
|
data/lib/rexml/namespace.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# frozen_string_literal:
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'xmltokens'
|
4
4
|
|
@@ -10,13 +10,17 @@ module REXML
|
|
10
10
|
# The expanded name of the object, valid if name is set
|
11
11
|
attr_accessor :prefix
|
12
12
|
include XMLTokens
|
13
|
+
NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
|
13
14
|
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
|
14
15
|
|
15
16
|
# Sets the name and the expanded name
|
16
17
|
def name=( name )
|
17
18
|
@expanded_name = name
|
18
|
-
|
19
|
-
|
19
|
+
if name.match?(NAME_WITHOUT_NAMESPACE)
|
20
|
+
@prefix = ""
|
21
|
+
@namespace = ""
|
22
|
+
@name = name
|
23
|
+
elsif name =~ NAMESPLIT
|
20
24
|
if $1
|
21
25
|
@prefix = $1
|
22
26
|
else
|
@@ -24,7 +28,7 @@ module REXML
|
|
24
28
|
@namespace = ""
|
25
29
|
end
|
26
30
|
@name = $2
|
27
|
-
|
31
|
+
elsif name == ""
|
28
32
|
@prefix = nil
|
29
33
|
@namespace = nil
|
30
34
|
@name = nil
|
data/lib/rexml/node.rb
CHANGED
@@ -52,10 +52,14 @@ module REXML
|
|
52
52
|
|
53
53
|
# Visit all subnodes of +self+ recursively
|
54
54
|
def each_recursive(&block) # :yields: node
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
55
|
+
stack = []
|
56
|
+
each { |child| stack.unshift child if child.node_type == :element }
|
57
|
+
until stack.empty?
|
58
|
+
child = stack.pop
|
59
|
+
yield child
|
60
|
+
n = stack.size
|
61
|
+
child.each { |grandchild| stack.insert n, grandchild if grandchild.node_type == :element }
|
62
|
+
end
|
59
63
|
end
|
60
64
|
|
61
65
|
# Find (and return) first subnode (recursively) for which the block
|
data/lib/rexml/parseexception.rb
CHANGED