rexml 3.1.7.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of rexml might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.travis.yml +10 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +60 -0
  7. data/Rakefile +10 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +8 -0
  10. data/lib/rexml/attlistdecl.rb +63 -0
  11. data/lib/rexml/attribute.rb +192 -0
  12. data/lib/rexml/cdata.rb +68 -0
  13. data/lib/rexml/child.rb +97 -0
  14. data/lib/rexml/comment.rb +80 -0
  15. data/lib/rexml/doctype.rb +270 -0
  16. data/lib/rexml/document.rb +291 -0
  17. data/lib/rexml/dtd/attlistdecl.rb +11 -0
  18. data/lib/rexml/dtd/dtd.rb +47 -0
  19. data/lib/rexml/dtd/elementdecl.rb +18 -0
  20. data/lib/rexml/dtd/entitydecl.rb +57 -0
  21. data/lib/rexml/dtd/notationdecl.rb +40 -0
  22. data/lib/rexml/element.rb +1267 -0
  23. data/lib/rexml/encoding.rb +51 -0
  24. data/lib/rexml/entity.rb +171 -0
  25. data/lib/rexml/formatters/default.rb +112 -0
  26. data/lib/rexml/formatters/pretty.rb +142 -0
  27. data/lib/rexml/formatters/transitive.rb +58 -0
  28. data/lib/rexml/functions.rb +447 -0
  29. data/lib/rexml/instruction.rb +71 -0
  30. data/lib/rexml/light/node.rb +196 -0
  31. data/lib/rexml/namespace.rb +48 -0
  32. data/lib/rexml/node.rb +76 -0
  33. data/lib/rexml/output.rb +30 -0
  34. data/lib/rexml/parent.rb +166 -0
  35. data/lib/rexml/parseexception.rb +52 -0
  36. data/lib/rexml/parsers/baseparser.rb +586 -0
  37. data/lib/rexml/parsers/lightparser.rb +59 -0
  38. data/lib/rexml/parsers/pullparser.rb +197 -0
  39. data/lib/rexml/parsers/sax2parser.rb +273 -0
  40. data/lib/rexml/parsers/streamparser.rb +61 -0
  41. data/lib/rexml/parsers/treeparser.rb +101 -0
  42. data/lib/rexml/parsers/ultralightparser.rb +57 -0
  43. data/lib/rexml/parsers/xpathparser.rb +675 -0
  44. data/lib/rexml/quickpath.rb +266 -0
  45. data/lib/rexml/rexml.rb +32 -0
  46. data/lib/rexml/sax2listener.rb +98 -0
  47. data/lib/rexml/security.rb +28 -0
  48. data/lib/rexml/source.rb +298 -0
  49. data/lib/rexml/streamlistener.rb +93 -0
  50. data/lib/rexml/syncenumerator.rb +33 -0
  51. data/lib/rexml/text.rb +424 -0
  52. data/lib/rexml/undefinednamespaceexception.rb +9 -0
  53. data/lib/rexml/validation/relaxng.rb +539 -0
  54. data/lib/rexml/validation/validation.rb +144 -0
  55. data/lib/rexml/validation/validationexception.rb +10 -0
  56. data/lib/rexml/xmldecl.rb +116 -0
  57. data/lib/rexml/xmltokens.rb +85 -0
  58. data/lib/rexml/xpath.rb +81 -0
  59. data/lib/rexml/xpath_parser.rb +934 -0
  60. data/rexml.gemspec +42 -0
  61. metadata +131 -0
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: false
2
+ require_relative "../child"
3
+ module REXML
4
+ module DTD
5
+ class AttlistDecl < Child
6
+ START = "<!ATTLIST"
7
+ START_RE = /^\s*#{START}/um
8
+ PATTERN_RE = /\s*(#{START}.*?>)/um
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: false
2
+ require_relative "elementdecl"
3
+ require_relative "entitydecl"
4
+ require_relative "../comment"
5
+ require_relative "notationdecl"
6
+ require_relative "attlistdecl"
7
+ require_relative "../parent"
8
+
9
+ module REXML
10
+ module DTD
11
+ class Parser
12
+ def Parser.parse( input )
13
+ case input
14
+ when String
15
+ parse_helper input
16
+ when File
17
+ parse_helper input.read
18
+ end
19
+ end
20
+
21
+ # Takes a String and parses it out
22
+ def Parser.parse_helper( input )
23
+ contents = Parent.new
24
+ while input.size > 0
25
+ case input
26
+ when ElementDecl.PATTERN_RE
27
+ match = $&
28
+ contents << ElementDecl.new( match )
29
+ when AttlistDecl.PATTERN_RE
30
+ matchdata = $~
31
+ contents << AttlistDecl.new( matchdata )
32
+ when EntityDecl.PATTERN_RE
33
+ matchdata = $~
34
+ contents << EntityDecl.new( matchdata )
35
+ when Comment.PATTERN_RE
36
+ matchdata = $~
37
+ contents << Comment.new( matchdata )
38
+ when NotationDecl.PATTERN_RE
39
+ matchdata = $~
40
+ contents << NotationDecl.new( matchdata )
41
+ end
42
+ end
43
+ contents
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: false
2
+ require_relative "../child"
3
+ module REXML
4
+ module DTD
5
+ class ElementDecl < Child
6
+ START = "<!ELEMENT"
7
+ START_RE = /^\s*#{START}/um
8
+ # PATTERN_RE = /^\s*(#{START}.*?)>/um
9
+ PATTERN_RE = /^\s*#{START}\s+((?:[:\w][-\.\w]*:)?[-!\*\.\w]*)(.*?)>/
10
+ #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
11
+
12
+ def initialize match
13
+ @name = match[1]
14
+ @rest = match[2]
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: false
2
+ require_relative "../child"
3
+ module REXML
4
+ module DTD
5
+ class EntityDecl < Child
6
+ START = "<!ENTITY"
7
+ START_RE = /^\s*#{START}/um
8
+ PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
9
+ SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
10
+ PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
11
+ PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
12
+ # <!ENTITY name SYSTEM "...">
13
+ # <!ENTITY name "...">
14
+ def initialize src
15
+ super()
16
+ md = nil
17
+ if src.match( PUBLIC )
18
+ md = src.match( PUBLIC, true )
19
+ @middle = "PUBLIC"
20
+ @content = "#{md[2]} #{md[4]}"
21
+ elsif src.match( SYSTEM )
22
+ md = src.match( SYSTEM, true )
23
+ @middle = "SYSTEM"
24
+ @content = md[2]
25
+ elsif src.match( PLAIN )
26
+ md = src.match( PLAIN, true )
27
+ @middle = ""
28
+ @content = md[2]
29
+ elsif src.match( PERCENT )
30
+ md = src.match( PERCENT, true )
31
+ @middle = ""
32
+ @content = md[2]
33
+ end
34
+ raise ParseException.new("failed Entity match", src) if md.nil?
35
+ @name = md[1]
36
+ end
37
+
38
+ def to_s
39
+ rv = "<!ENTITY #@name "
40
+ rv << "#@middle " if @middle.size > 0
41
+ rv << @content
42
+ rv
43
+ end
44
+
45
+ def write( output, indent )
46
+ indent( output, indent )
47
+ output << to_s
48
+ end
49
+
50
+ def EntityDecl.parse_source source, listener
51
+ md = source.match( PATTERN_RE, true )
52
+ thing = md[0].squeeze(" \t\n\r")
53
+ listener.send inspect.downcase, thing
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: false
2
+ require_relative "../child"
3
+ module REXML
4
+ module DTD
5
+ class NotationDecl < Child
6
+ START = "<!NOTATION"
7
+ START_RE = /^\s*#{START}/um
8
+ PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
9
+ SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
10
+ def initialize src
11
+ super()
12
+ if src.match( PUBLIC )
13
+ md = src.match( PUBLIC, true )
14
+ elsif src.match( SYSTEM )
15
+ md = src.match( SYSTEM, true )
16
+ else
17
+ raise ParseException.new( "error parsing notation: no matching pattern", src )
18
+ end
19
+ @name = md[1]
20
+ @middle = md[2]
21
+ @rest = md[3]
22
+ end
23
+
24
+ def to_s
25
+ "<!NOTATION #@name #@middle #@rest>"
26
+ end
27
+
28
+ def write( output, indent )
29
+ indent( output, indent )
30
+ output << to_s
31
+ end
32
+
33
+ def NotationDecl.parse_source source, listener
34
+ md = source.match( PATTERN_RE, true )
35
+ thing = md[0].squeeze(" \t\n\r")
36
+ listener.send inspect.downcase, thing
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,1267 @@
1
+ # frozen_string_literal: false
2
+ require_relative "parent"
3
+ require_relative "namespace"
4
+ require_relative "attribute"
5
+ require_relative "cdata"
6
+ require_relative "xpath"
7
+ require_relative "parseexception"
8
+
9
+ module REXML
10
+ # An implementation note about namespaces:
11
+ # As we parse, when we find namespaces we put them in a hash and assign
12
+ # them a unique ID. We then convert the namespace prefix for the node
13
+ # to the unique ID. This makes namespace lookup much faster for the
14
+ # cost of extra memory use. We save the namespace prefix for the
15
+ # context node and convert it back when we write it.
16
+ @@namespaces = {}
17
+
18
+ # Represents a tagged XML element. Elements are characterized by
19
+ # having children, attributes, and names, and can themselves be
20
+ # children.
21
+ class Element < Parent
22
+ include Namespace
23
+
24
+ UNDEFINED = "UNDEFINED"; # The default name
25
+
26
+ # Mechanisms for accessing attributes and child elements of this
27
+ # element.
28
+ attr_reader :attributes, :elements
29
+ # The context holds information about the processing environment, such as
30
+ # whitespace handling.
31
+ attr_accessor :context
32
+
33
+ # Constructor
34
+ # arg::
35
+ # if not supplied, will be set to the default value.
36
+ # If a String, the name of this object will be set to the argument.
37
+ # If an Element, the object will be shallowly cloned; name,
38
+ # attributes, and namespaces will be copied. Children will +not+ be
39
+ # copied.
40
+ # parent::
41
+ # if supplied, must be a Parent, and will be used as
42
+ # the parent of this object.
43
+ # context::
44
+ # If supplied, must be a hash containing context items. Context items
45
+ # include:
46
+ # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
47
+ # strings being the names of the elements to respect
48
+ # whitespace for. Defaults to :+all+.
49
+ # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
50
+ # strings being the names of the elements to ignore whitespace on.
51
+ # Overrides :+respect_whitespace+.
52
+ # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
53
+ # of strings being the names of the elements in which to ignore
54
+ # whitespace-only nodes. If this is set, Text nodes which contain only
55
+ # whitespace will not be added to the document tree.
56
+ # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
57
+ # the elements to process in raw mode. In raw mode, special
58
+ # characters in text is not converted to or from entities.
59
+ def initialize( arg = UNDEFINED, parent=nil, context=nil )
60
+ super(parent)
61
+
62
+ @elements = Elements.new(self)
63
+ @attributes = Attributes.new(self)
64
+ @context = context
65
+
66
+ if arg.kind_of? String
67
+ self.name = arg
68
+ elsif arg.kind_of? Element
69
+ self.name = arg.expanded_name
70
+ arg.attributes.each_attribute{ |attribute|
71
+ @attributes << Attribute.new( attribute )
72
+ }
73
+ @context = arg.context
74
+ end
75
+ end
76
+
77
+ def inspect
78
+ rv = "<#@expanded_name"
79
+
80
+ @attributes.each_attribute do |attr|
81
+ rv << " "
82
+ attr.write( rv, 0 )
83
+ end
84
+
85
+ if children.size > 0
86
+ rv << "> ... </>"
87
+ else
88
+ rv << "/>"
89
+ end
90
+ end
91
+
92
+
93
+ # Creates a shallow copy of self.
94
+ # d = Document.new "<a><b/><b/><c><d/></c></a>"
95
+ # new_a = d.root.clone
96
+ # puts new_a # => "<a/>"
97
+ def clone
98
+ self.class.new self
99
+ end
100
+
101
+ # Evaluates to the root node of the document that this element
102
+ # belongs to. If this element doesn't belong to a document, but does
103
+ # belong to another Element, the parent's root will be returned, until the
104
+ # earliest ancestor is found.
105
+ #
106
+ # Note that this is not the same as the document element.
107
+ # In the following example, <a> is the document element, and the root
108
+ # node is the parent node of the document element. You may ask yourself
109
+ # why the root node is useful: consider the doctype and XML declaration,
110
+ # and any processing instructions before the document element... they
111
+ # are children of the root node, or siblings of the document element.
112
+ # The only time this isn't true is when an Element is created that is
113
+ # not part of any Document. In this case, the ancestor that has no
114
+ # parent acts as the root node.
115
+ # d = Document.new '<a><b><c/></b></a>'
116
+ # a = d[1] ; c = a[1][1]
117
+ # d.root_node == d # TRUE
118
+ # a.root_node # namely, d
119
+ # c.root_node # again, d
120
+ def root_node
121
+ parent.nil? ? self : parent.root_node
122
+ end
123
+
124
+ def root
125
+ return elements[1] if self.kind_of? Document
126
+ return self if parent.kind_of? Document or parent.nil?
127
+ return parent.root
128
+ end
129
+
130
+ # Evaluates to the document to which this element belongs, or nil if this
131
+ # element doesn't belong to a document.
132
+ def document
133
+ rt = root
134
+ rt.parent if rt
135
+ end
136
+
137
+ # Evaluates to +true+ if whitespace is respected for this element. This
138
+ # is the case if:
139
+ # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
140
+ # 2. The context has :+respect_whitespace+ set to :+all+ or
141
+ # an array containing the name of this element, and
142
+ # :+compress_whitespace+ isn't set to :+all+ or an array containing the
143
+ # name of this element.
144
+ # The evaluation is tested against +expanded_name+, and so is namespace
145
+ # sensitive.
146
+ def whitespace
147
+ @whitespace = nil
148
+ if @context
149
+ if @context[:respect_whitespace]
150
+ @whitespace = (@context[:respect_whitespace] == :all or
151
+ @context[:respect_whitespace].include? expanded_name)
152
+ end
153
+ @whitespace = false if (@context[:compress_whitespace] and
154
+ (@context[:compress_whitespace] == :all or
155
+ @context[:compress_whitespace].include? expanded_name)
156
+ )
157
+ end
158
+ @whitespace = true unless @whitespace == false
159
+ @whitespace
160
+ end
161
+
162
+ def ignore_whitespace_nodes
163
+ @ignore_whitespace_nodes = false
164
+ if @context
165
+ if @context[:ignore_whitespace_nodes]
166
+ @ignore_whitespace_nodes =
167
+ (@context[:ignore_whitespace_nodes] == :all or
168
+ @context[:ignore_whitespace_nodes].include? expanded_name)
169
+ end
170
+ end
171
+ end
172
+
173
+ # Evaluates to +true+ if raw mode is set for this element. This
174
+ # is the case if the context has :+raw+ set to :+all+ or
175
+ # an array containing the name of this element.
176
+ #
177
+ # The evaluation is tested against +expanded_name+, and so is namespace
178
+ # sensitive.
179
+ def raw
180
+ @raw = (@context and @context[:raw] and
181
+ (@context[:raw] == :all or
182
+ @context[:raw].include? expanded_name))
183
+ @raw
184
+ end
185
+
186
+ #once :whitespace, :raw, :ignore_whitespace_nodes
187
+
188
+ #################################################
189
+ # Namespaces #
190
+ #################################################
191
+
192
+ # Evaluates to an +Array+ containing the prefixes (names) of all defined
193
+ # namespaces at this context node.
194
+ # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
195
+ # doc.elements['//b'].prefixes # -> ['x', 'y']
196
+ def prefixes
197
+ prefixes = []
198
+ prefixes = parent.prefixes if parent
199
+ prefixes |= attributes.prefixes
200
+ return prefixes
201
+ end
202
+
203
+ def namespaces
204
+ namespaces = {}
205
+ namespaces = parent.namespaces if parent
206
+ namespaces = namespaces.merge( attributes.namespaces )
207
+ return namespaces
208
+ end
209
+
210
+ # Evaluates to the URI for a prefix, or the empty string if no such
211
+ # namespace is declared for this element. Evaluates recursively for
212
+ # ancestors. Returns the default namespace, if there is one.
213
+ # prefix::
214
+ # the prefix to search for. If not supplied, returns the default
215
+ # namespace if one exists
216
+ # Returns::
217
+ # the namespace URI as a String, or nil if no such namespace
218
+ # exists. If the namespace is undefined, returns an empty string
219
+ # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
220
+ # b = doc.elements['//b']
221
+ # b.namespace # -> '1'
222
+ # b.namespace("y") # -> '2'
223
+ def namespace(prefix=nil)
224
+ if prefix.nil?
225
+ prefix = prefix()
226
+ end
227
+ if prefix == ''
228
+ prefix = "xmlns"
229
+ else
230
+ prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
231
+ end
232
+ ns = attributes[ prefix ]
233
+ ns = parent.namespace(prefix) if ns.nil? and parent
234
+ ns = '' if ns.nil? and prefix == 'xmlns'
235
+ return ns
236
+ end
237
+
238
+ # Adds a namespace to this element.
239
+ # prefix::
240
+ # the prefix string, or the namespace URI if +uri+ is not
241
+ # supplied
242
+ # uri::
243
+ # the namespace URI. May be nil, in which +prefix+ is used as
244
+ # the URI
245
+ # Evaluates to: this Element
246
+ # a = Element.new("a")
247
+ # a.add_namespace("xmlns:foo", "bar" )
248
+ # a.add_namespace("foo", "bar") # shorthand for previous line
249
+ # a.add_namespace("twiddle")
250
+ # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
251
+ def add_namespace( prefix, uri=nil )
252
+ unless uri
253
+ @attributes["xmlns"] = prefix
254
+ else
255
+ prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
256
+ @attributes[ prefix ] = uri
257
+ end
258
+ self
259
+ end
260
+
261
+ # Removes a namespace from this node. This only works if the namespace is
262
+ # actually declared in this node. If no argument is passed, deletes the
263
+ # default namespace.
264
+ #
265
+ # Evaluates to: this element
266
+ # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
267
+ # doc.root.delete_namespace
268
+ # puts doc # -> <a xmlns:foo='bar'/>
269
+ # doc.root.delete_namespace 'foo'
270
+ # puts doc # -> <a/>
271
+ def delete_namespace namespace="xmlns"
272
+ namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
273
+ attribute = attributes.get_attribute(namespace)
274
+ attribute.remove unless attribute.nil?
275
+ self
276
+ end
277
+
278
+ #################################################
279
+ # Elements #
280
+ #################################################
281
+
282
+ # Adds a child to this element, optionally setting attributes in
283
+ # the element.
284
+ # element::
285
+ # optional. If Element, the element is added.
286
+ # Otherwise, a new Element is constructed with the argument (see
287
+ # Element.initialize).
288
+ # attrs::
289
+ # If supplied, must be a Hash containing String name,value
290
+ # pairs, which will be used to set the attributes of the new Element.
291
+ # Returns:: the Element that was added
292
+ # el = doc.add_element 'my-tag'
293
+ # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
294
+ # el = Element.new 'my-tag'
295
+ # doc.add_element el
296
+ def add_element element, attrs=nil
297
+ raise "First argument must be either an element name, or an Element object" if element.nil?
298
+ el = @elements.add(element)
299
+ attrs.each do |key, value|
300
+ el.attributes[key]=value
301
+ end if attrs.kind_of? Hash
302
+ el
303
+ end
304
+
305
+ # Deletes a child element.
306
+ # element::
307
+ # Must be an +Element+, +String+, or +Integer+. If Element,
308
+ # the element is removed. If String, the element is found (via XPath)
309
+ # and removed. <em>This means that any parent can remove any
310
+ # descendant.<em> If Integer, the Element indexed by that number will be
311
+ # removed.
312
+ # Returns:: the element that was removed.
313
+ # doc.delete_element "/a/b/c[@id='4']"
314
+ # doc.delete_element doc.elements["//k"]
315
+ # doc.delete_element 1
316
+ def delete_element element
317
+ @elements.delete element
318
+ end
319
+
320
+ # Evaluates to +true+ if this element has at least one child Element
321
+ # doc = Document.new "<a><b/><c>Text</c></a>"
322
+ # doc.root.has_elements # -> true
323
+ # doc.elements["/a/b"].has_elements # -> false
324
+ # doc.elements["/a/c"].has_elements # -> false
325
+ def has_elements?
326
+ !@elements.empty?
327
+ end
328
+
329
+ # Iterates through the child elements, yielding for each Element that
330
+ # has a particular attribute set.
331
+ # key::
332
+ # the name of the attribute to search for
333
+ # value::
334
+ # the value of the attribute
335
+ # max::
336
+ # (optional) causes this method to return after yielding
337
+ # for this number of matching children
338
+ # name::
339
+ # (optional) if supplied, this is an XPath that filters
340
+ # the children to check.
341
+ #
342
+ # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
343
+ # # Yields b, c, d
344
+ # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
345
+ # # Yields b, d
346
+ # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
347
+ # # Yields b
348
+ # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
349
+ # # Yields d
350
+ # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
351
+ def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
352
+ each_with_something( proc {|child|
353
+ if value.nil?
354
+ child.attributes[key] != nil
355
+ else
356
+ child.attributes[key]==value
357
+ end
358
+ }, max, name, &block )
359
+ end
360
+
361
+ # Iterates through the children, yielding for each Element that
362
+ # has a particular text set.
363
+ # text::
364
+ # the text to search for. If nil, or not supplied, will iterate
365
+ # over all +Element+ children that contain at least one +Text+ node.
366
+ # max::
367
+ # (optional) causes this method to return after yielding
368
+ # for this number of matching children
369
+ # name::
370
+ # (optional) if supplied, this is an XPath that filters
371
+ # the children to check.
372
+ #
373
+ # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
374
+ # # Yields b, c, d
375
+ # doc.each_element_with_text {|e|p e}
376
+ # # Yields b, c
377
+ # doc.each_element_with_text('b'){|e|p e}
378
+ # # Yields b
379
+ # doc.each_element_with_text('b', 1){|e|p e}
380
+ # # Yields d
381
+ # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
382
+ def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
383
+ each_with_something( proc {|child|
384
+ if text.nil?
385
+ child.has_text?
386
+ else
387
+ child.text == text
388
+ end
389
+ }, max, name, &block )
390
+ end
391
+
392
+ # Synonym for Element.elements.each
393
+ def each_element( xpath=nil, &block ) # :yields: Element
394
+ @elements.each( xpath, &block )
395
+ end
396
+
397
+ # Synonym for Element.to_a
398
+ # This is a little slower than calling elements.each directly.
399
+ # xpath:: any XPath by which to search for elements in the tree
400
+ # Returns:: an array of Elements that match the supplied path
401
+ def get_elements( xpath )
402
+ @elements.to_a( xpath )
403
+ end
404
+
405
+ # Returns the next sibling that is an element, or nil if there is
406
+ # no Element sibling after this one
407
+ # doc = Document.new '<a><b/>text<c/></a>'
408
+ # doc.root.elements['b'].next_element #-> <c/>
409
+ # doc.root.elements['c'].next_element #-> nil
410
+ def next_element
411
+ element = next_sibling
412
+ element = element.next_sibling until element.nil? or element.kind_of? Element
413
+ return element
414
+ end
415
+
416
+ # Returns the previous sibling that is an element, or nil if there is
417
+ # no Element sibling prior to this one
418
+ # doc = Document.new '<a><b/>text<c/></a>'
419
+ # doc.root.elements['c'].previous_element #-> <b/>
420
+ # doc.root.elements['b'].previous_element #-> nil
421
+ def previous_element
422
+ element = previous_sibling
423
+ element = element.previous_sibling until element.nil? or element.kind_of? Element
424
+ return element
425
+ end
426
+
427
+
428
+ #################################################
429
+ # Text #
430
+ #################################################
431
+
432
+ # Evaluates to +true+ if this element has at least one Text child
433
+ def has_text?
434
+ not text().nil?
435
+ end
436
+
437
+ # A convenience method which returns the String value of the _first_
438
+ # child text element, if one exists, and +nil+ otherwise.
439
+ #
440
+ # <em>Note that an element may have multiple Text elements, perhaps
441
+ # separated by other children</em>. Be aware that this method only returns
442
+ # the first Text node.
443
+ #
444
+ # This method returns the +value+ of the first text child node, which
445
+ # ignores the +raw+ setting, so always returns normalized text. See
446
+ # the Text::value documentation.
447
+ #
448
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
449
+ # # The element 'p' has two text elements, "some text " and " more text".
450
+ # doc.root.text #-> "some text "
451
+ def text( path = nil )
452
+ rv = get_text(path)
453
+ return rv.value unless rv.nil?
454
+ nil
455
+ end
456
+
457
+ # Returns the first child Text node, if any, or +nil+ otherwise.
458
+ # This method returns the actual +Text+ node, rather than the String content.
459
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
460
+ # # The element 'p' has two text elements, "some text " and " more text".
461
+ # doc.root.get_text.value #-> "some text "
462
+ def get_text path = nil
463
+ rv = nil
464
+ if path
465
+ element = @elements[ path ]
466
+ rv = element.get_text unless element.nil?
467
+ else
468
+ rv = @children.find { |node| node.kind_of? Text }
469
+ end
470
+ return rv
471
+ end
472
+
473
+ # Sets the first Text child of this object. See text() for a
474
+ # discussion about Text children.
475
+ #
476
+ # If a Text child already exists, the child is replaced by this
477
+ # content. This means that Text content can be deleted by calling
478
+ # this method with a nil argument. In this case, the next Text
479
+ # child becomes the first Text child. In no case is the order of
480
+ # any siblings disturbed.
481
+ # text::
482
+ # If a String, a new Text child is created and added to
483
+ # this Element as the first Text child. If Text, the text is set
484
+ # as the first Child element. If nil, then any existing first Text
485
+ # child is removed.
486
+ # Returns:: this Element.
487
+ # doc = Document.new '<a><b/></a>'
488
+ # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
489
+ # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
490
+ # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
491
+ # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
492
+ # doc.root.text = nil #-> '<a><b/><c/></a>'
493
+ def text=( text )
494
+ if text.kind_of? String
495
+ text = Text.new( text, whitespace(), nil, raw() )
496
+ elsif !text.nil? and !text.kind_of? Text
497
+ text = Text.new( text.to_s, whitespace(), nil, raw() )
498
+ end
499
+ old_text = get_text
500
+ if text.nil?
501
+ old_text.remove unless old_text.nil?
502
+ else
503
+ if old_text.nil?
504
+ self << text
505
+ else
506
+ old_text.replace_with( text )
507
+ end
508
+ end
509
+ return self
510
+ end
511
+
512
+ # A helper method to add a Text child. Actual Text instances can
513
+ # be added with regular Parent methods, such as add() and <<()
514
+ # text::
515
+ # if a String, a new Text instance is created and added
516
+ # to the parent. If Text, the object is added directly.
517
+ # Returns:: this Element
518
+ # e = Element.new('a') #-> <e/>
519
+ # e.add_text 'foo' #-> <e>foo</e>
520
+ # e.add_text Text.new(' bar') #-> <e>foo bar</e>
521
+ # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
522
+ # element and <b>2</b> Text node children.
523
+ def add_text( text )
524
+ if text.kind_of? String
525
+ if @children[-1].kind_of? Text
526
+ @children[-1] << text
527
+ return
528
+ end
529
+ text = Text.new( text, whitespace(), nil, raw() )
530
+ end
531
+ self << text unless text.nil?
532
+ return self
533
+ end
534
+
535
+ def node_type
536
+ :element
537
+ end
538
+
539
+ def xpath
540
+ path_elements = []
541
+ cur = self
542
+ path_elements << __to_xpath_helper( self )
543
+ while cur.parent
544
+ cur = cur.parent
545
+ path_elements << __to_xpath_helper( cur )
546
+ end
547
+ return path_elements.reverse.join( "/" )
548
+ end
549
+
550
+ #################################################
551
+ # Attributes #
552
+ #################################################
553
+
554
+ # Fetches an attribute value or a child.
555
+ #
556
+ # If String or Symbol is specified, it's treated as attribute
557
+ # name. Attribute value as String or +nil+ is returned. This case
558
+ # is shortcut of +attributes[name]+.
559
+ #
560
+ # If Integer is specified, it's treated as the index of
561
+ # child. It returns Nth child.
562
+ #
563
+ # doc = REXML::Document.new("<a attr='1'><b/><c/></a>")
564
+ # doc.root["attr"] # => "1"
565
+ # doc.root.attributes["attr"] # => "1"
566
+ # doc.root[1] # => <c/>
567
+ def [](name_or_index)
568
+ case name_or_index
569
+ when String
570
+ attributes[name_or_index]
571
+ when Symbol
572
+ attributes[name_or_index.to_s]
573
+ else
574
+ super
575
+ end
576
+ end
577
+
578
+ def attribute( name, namespace=nil )
579
+ prefix = nil
580
+ if namespaces.respond_to? :key
581
+ prefix = namespaces.key(namespace) if namespace
582
+ else
583
+ prefix = namespaces.index(namespace) if namespace
584
+ end
585
+ prefix = nil if prefix == 'xmlns'
586
+
587
+ ret_val =
588
+ attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
589
+
590
+ return ret_val unless ret_val.nil?
591
+ return nil if prefix.nil?
592
+
593
+ # now check that prefix'es namespace is not the same as the
594
+ # default namespace
595
+ return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
596
+
597
+ attributes.get_attribute( name )
598
+
599
+ end
600
+
601
+ # Evaluates to +true+ if this element has any attributes set, false
602
+ # otherwise.
603
+ def has_attributes?
604
+ return !@attributes.empty?
605
+ end
606
+
607
+ # Adds an attribute to this element, overwriting any existing attribute
608
+ # by the same name.
609
+ # key::
610
+ # can be either an Attribute or a String. If an Attribute,
611
+ # the attribute is added to the list of Element attributes. If String,
612
+ # the argument is used as the name of the new attribute, and the value
613
+ # parameter must be supplied.
614
+ # value::
615
+ # Required if +key+ is a String, and ignored if the first argument is
616
+ # an Attribute. This is a String, and is used as the value
617
+ # of the new Attribute. This should be the unnormalized value of the
618
+ # attribute (without entities).
619
+ # Returns:: the Attribute added
620
+ # e = Element.new 'e'
621
+ # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
622
+ # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
623
+ # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
624
+ def add_attribute( key, value=nil )
625
+ if key.kind_of? Attribute
626
+ @attributes << key
627
+ else
628
+ @attributes[key] = value
629
+ end
630
+ end
631
+
632
+ # Add multiple attributes to this element.
633
+ # hash:: is either a hash, or array of arrays
634
+ # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
635
+ # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
636
+ def add_attributes hash
637
+ if hash.kind_of? Hash
638
+ hash.each_pair {|key, value| @attributes[key] = value }
639
+ elsif hash.kind_of? Array
640
+ hash.each { |value| @attributes[ value[0] ] = value[1] }
641
+ end
642
+ end
643
+
644
+ # Removes an attribute
645
+ # key::
646
+ # either an Attribute or a String. In either case, the
647
+ # attribute is found by matching the attribute name to the argument,
648
+ # and then removed. If no attribute is found, no action is taken.
649
+ # Returns::
650
+ # the attribute removed, or nil if this Element did not contain
651
+ # a matching attribute
652
+ # e = Element.new('E')
653
+ # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
654
+ # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
655
+ # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
656
+ # e.delete_attribute( r ) #-> <E/>
657
+ def delete_attribute(key)
658
+ attr = @attributes.get_attribute(key)
659
+ attr.remove unless attr.nil?
660
+ end
661
+
662
+ #################################################
663
+ # Other Utilities #
664
+ #################################################
665
+
666
+ # Get an array of all CData children.
667
+ # IMMUTABLE
668
+ def cdatas
669
+ find_all { |child| child.kind_of? CData }.freeze
670
+ end
671
+
672
+ # Get an array of all Comment children.
673
+ # IMMUTABLE
674
+ def comments
675
+ find_all { |child| child.kind_of? Comment }.freeze
676
+ end
677
+
678
+ # Get an array of all Instruction children.
679
+ # IMMUTABLE
680
+ def instructions
681
+ find_all { |child| child.kind_of? Instruction }.freeze
682
+ end
683
+
684
+ # Get an array of all Text children.
685
+ # IMMUTABLE
686
+ def texts
687
+ find_all { |child| child.kind_of? Text }.freeze
688
+ end
689
+
690
+ # == DEPRECATED
691
+ # See REXML::Formatters
692
+ #
693
+ # Writes out this element, and recursively, all children.
694
+ # output::
695
+ # output an object which supports '<< string'; this is where the
696
+ # document will be written.
697
+ # indent::
698
+ # An integer. If -1, no indenting will be used; otherwise, the
699
+ # indentation will be this number of spaces, and children will be
700
+ # indented an additional amount. Defaults to -1
701
+ # transitive::
702
+ # If transitive is true and indent is >= 0, then the output will be
703
+ # pretty-printed in such a way that the added whitespace does not affect
704
+ # the parse tree of the document
705
+ # ie_hack::
706
+ # This hack inserts a space before the /> on empty tags to address
707
+ # a limitation of Internet Explorer. Defaults to false
708
+ #
709
+ # out = ''
710
+ # doc.write( out ) #-> doc is written to the string 'out'
711
+ # doc.write( $stdout ) #-> doc written to the console
712
+ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
713
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters", uplevel: 1)
714
+ formatter = if indent > -1
715
+ if transitive
716
+ require_relative "formatters/transitive"
717
+ REXML::Formatters::Transitive.new( indent, ie_hack )
718
+ else
719
+ REXML::Formatters::Pretty.new( indent, ie_hack )
720
+ end
721
+ else
722
+ REXML::Formatters::Default.new( ie_hack )
723
+ end
724
+ formatter.write( self, output )
725
+ end
726
+
727
+
728
+ private
729
+ def __to_xpath_helper node
730
+ rv = node.expanded_name.clone
731
+ if node.parent
732
+ results = node.parent.find_all {|n|
733
+ n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
734
+ }
735
+ if results.length > 1
736
+ idx = results.index( node )
737
+ rv << "[#{idx+1}]"
738
+ end
739
+ end
740
+ rv
741
+ end
742
+
743
+ # A private helper method
744
+ def each_with_something( test, max=0, name=nil )
745
+ num = 0
746
+ @elements.each( name ){ |child|
747
+ yield child if test.call(child) and num += 1
748
+ return if max>0 and num == max
749
+ }
750
+ end
751
+ end
752
+
753
+ ########################################################################
754
+ # ELEMENTS #
755
+ ########################################################################
756
+
757
+ # A class which provides filtering of children for Elements, and
758
+ # XPath search support. You are expected to only encounter this class as
759
+ # the <tt>element.elements</tt> object. Therefore, you are
760
+ # _not_ expected to instantiate this yourself.
761
+ class Elements
762
+ include Enumerable
763
+ # Constructor
764
+ # parent:: the parent Element
765
+ def initialize parent
766
+ @element = parent
767
+ end
768
+
769
+ # Fetches a child element. Filters only Element children, regardless of
770
+ # the XPath match.
771
+ # index::
772
+ # the search parameter. This is either an Integer, which
773
+ # will be used to find the index'th child Element, or an XPath,
774
+ # which will be used to search for the Element. <em>Because
775
+ # of the nature of XPath searches, any element in the connected XML
776
+ # document can be fetched through any other element.</em> <b>The
777
+ # Integer index is 1-based, not 0-based.</b> This means that the first
778
+ # child element is at index 1, not 0, and the +n+th element is at index
779
+ # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
780
+ # starting from 1, not 0, and the indexes should be the same.
781
+ # name::
782
+ # optional, and only used in the first argument is an
783
+ # Integer. In that case, the index'th child Element that has the
784
+ # supplied name will be returned. Note again that the indexes start at 1.
785
+ # Returns:: the first matching Element, or nil if no child matched
786
+ # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
787
+ # doc.root.elements[1] #-> <b/>
788
+ # doc.root.elements['c'] #-> <c id="1"/>
789
+ # doc.root.elements[2,'c'] #-> <c id="2"/>
790
+ def []( index, name=nil)
791
+ if index.kind_of? Integer
792
+ raise "index (#{index}) must be >= 1" if index < 1
793
+ name = literalize(name) if name
794
+ num = 0
795
+ @element.find { |child|
796
+ child.kind_of? Element and
797
+ (name.nil? ? true : child.has_name?( name )) and
798
+ (num += 1) == index
799
+ }
800
+ else
801
+ return XPath::first( @element, index )
802
+ #{ |element|
803
+ # return element if element.kind_of? Element
804
+ #}
805
+ #return nil
806
+ end
807
+ end
808
+
809
+ # Sets an element, replacing any previous matching element. If no
810
+ # existing element is found ,the element is added.
811
+ # index:: Used to find a matching element to replace. See []().
812
+ # element::
813
+ # The element to replace the existing element with
814
+ # the previous element
815
+ # Returns:: nil if no previous element was found.
816
+ #
817
+ # doc = Document.new '<a/>'
818
+ # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
819
+ # doc.root.elements[1] #-> <b/>
820
+ # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
821
+ # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
822
+ def []=( index, element )
823
+ previous = self[index]
824
+ if previous.nil?
825
+ @element.add element
826
+ else
827
+ previous.replace_with element
828
+ end
829
+ return previous
830
+ end
831
+
832
+ # Returns +true+ if there are no +Element+ children, +false+ otherwise
833
+ def empty?
834
+ @element.find{ |child| child.kind_of? Element}.nil?
835
+ end
836
+
837
+ # Returns the index of the supplied child (starting at 1), or -1 if
838
+ # the element is not a child
839
+ # element:: an +Element+ child
840
+ def index element
841
+ rv = 0
842
+ found = @element.find do |child|
843
+ child.kind_of? Element and
844
+ (rv += 1) and
845
+ child == element
846
+ end
847
+ return rv if found == element
848
+ return -1
849
+ end
850
+
851
+ # Deletes a child Element
852
+ # element::
853
+ # Either an Element, which is removed directly; an
854
+ # xpath, where the first matching child is removed; or an Integer,
855
+ # where the n'th Element is removed.
856
+ # Returns:: the removed child
857
+ # doc = Document.new '<a><b/><c/><c id="1"/></a>'
858
+ # b = doc.root.elements[1]
859
+ # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
860
+ # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
861
+ # doc.root.elements.delete 1 #-> <a/>
862
+ def delete element
863
+ if element.kind_of? Element
864
+ @element.delete element
865
+ else
866
+ el = self[element]
867
+ el.remove if el
868
+ end
869
+ end
870
+
871
+ # Removes multiple elements. Filters for Element children, regardless of
872
+ # XPath matching.
873
+ # xpath:: all elements matching this String path are removed.
874
+ # Returns:: an Array of Elements that have been removed
875
+ # doc = Document.new '<a><c/><c/><c/><c/></a>'
876
+ # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
877
+ def delete_all( xpath )
878
+ rv = []
879
+ XPath::each( @element, xpath) {|element|
880
+ rv << element if element.kind_of? Element
881
+ }
882
+ rv.each do |element|
883
+ @element.delete element
884
+ element.remove
885
+ end
886
+ return rv
887
+ end
888
+
889
+ # Adds an element
890
+ # element::
891
+ # if supplied, is either an Element, String, or
892
+ # Source (see Element.initialize). If not supplied or nil, a
893
+ # new, default Element will be constructed
894
+ # Returns:: the added Element
895
+ # a = Element.new('a')
896
+ # a.elements.add(Element.new('b')) #-> <a><b/></a>
897
+ # a.elements.add('c') #-> <a><b/><c/></a>
898
+ def add element=nil
899
+ if element.nil?
900
+ Element.new("", self, @element.context)
901
+ elsif not element.kind_of?(Element)
902
+ Element.new(element, self, @element.context)
903
+ else
904
+ @element << element
905
+ element.context = @element.context
906
+ element
907
+ end
908
+ end
909
+
910
+ alias :<< :add
911
+
912
+ # Iterates through all of the child Elements, optionally filtering
913
+ # them by a given XPath
914
+ # xpath::
915
+ # optional. If supplied, this is a String XPath, and is used to
916
+ # filter the children, so that only matching children are yielded. Note
917
+ # that XPaths are automatically filtered for Elements, so that
918
+ # non-Element children will not be yielded
919
+ # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
920
+ # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements
921
+ # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements
922
+ # doc.root.elements.each('child::node()') {|e|p e}
923
+ # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
924
+ # XPath.each(doc.root, 'child::node()', &block)
925
+ # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
926
+ def each( xpath=nil )
927
+ XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
928
+ end
929
+
930
+ def collect( xpath=nil )
931
+ collection = []
932
+ XPath::each( @element, xpath ) {|e|
933
+ collection << yield(e) if e.kind_of?(Element)
934
+ }
935
+ collection
936
+ end
937
+
938
+ def inject( xpath=nil, initial=nil )
939
+ first = true
940
+ XPath::each( @element, xpath ) {|e|
941
+ if (e.kind_of? Element)
942
+ if (first and initial == nil)
943
+ initial = e
944
+ first = false
945
+ else
946
+ initial = yield( initial, e ) if e.kind_of? Element
947
+ end
948
+ end
949
+ }
950
+ initial
951
+ end
952
+
953
+ # Returns the number of +Element+ children of the parent object.
954
+ # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
955
+ # doc.root.size #-> 6, 3 element and 3 text nodes
956
+ # doc.root.elements.size #-> 3
957
+ def size
958
+ count = 0
959
+ @element.each {|child| count+=1 if child.kind_of? Element }
960
+ count
961
+ end
962
+
963
+ # Returns an Array of Element children. An XPath may be supplied to
964
+ # filter the children. Only Element children are returned, even if the
965
+ # supplied XPath matches non-Element children.
966
+ # doc = Document.new '<a>sean<b/>elliott<c/></a>'
967
+ # doc.root.elements.to_a #-> [ <b/>, <c/> ]
968
+ # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
969
+ # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
970
+ def to_a( xpath=nil )
971
+ rv = XPath.match( @element, xpath )
972
+ return rv.find_all{|e| e.kind_of? Element} if xpath
973
+ rv
974
+ end
975
+
976
+ private
977
+ # Private helper class. Removes quotes from quoted strings
978
+ def literalize name
979
+ name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
980
+ name
981
+ end
982
+ end
983
+
984
+ ########################################################################
985
+ # ATTRIBUTES #
986
+ ########################################################################
987
+
988
+ # A class that defines the set of Attributes of an Element and provides
989
+ # operations for accessing elements in that set.
990
+ class Attributes < Hash
991
+ # Constructor
992
+ # element:: the Element of which this is an Attribute
993
+ def initialize element
994
+ @element = element
995
+ end
996
+
997
+ # Fetches an attribute value. If you want to get the Attribute itself,
998
+ # use get_attribute()
999
+ # name:: an XPath attribute name. Namespaces are relevant here.
1000
+ # Returns::
1001
+ # the String value of the matching attribute, or +nil+ if no
1002
+ # matching attribute was found. This is the unnormalized value
1003
+ # (with entities expanded).
1004
+ #
1005
+ # doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
1006
+ # doc.root.attributes['att'] #-> '<'
1007
+ # doc.root.attributes['bar:att'] #-> '2'
1008
+ def [](name)
1009
+ attr = get_attribute(name)
1010
+ return attr.value unless attr.nil?
1011
+ return nil
1012
+ end
1013
+
1014
+ def to_a
1015
+ enum_for(:each_attribute).to_a
1016
+ end
1017
+
1018
+ # Returns the number of attributes the owning Element contains.
1019
+ # doc = Document "<a x='1' y='2' foo:x='3'/>"
1020
+ # doc.root.attributes.length #-> 3
1021
+ def length
1022
+ c = 0
1023
+ each_attribute { c+=1 }
1024
+ c
1025
+ end
1026
+ alias :size :length
1027
+
1028
+ # Iterates over the attributes of an Element. Yields actual Attribute
1029
+ # nodes, not String values.
1030
+ #
1031
+ # doc = Document.new '<a x="1" y="2"/>'
1032
+ # doc.root.attributes.each_attribute {|attr|
1033
+ # p attr.expanded_name+" => "+attr.value
1034
+ # }
1035
+ def each_attribute # :yields: attribute
1036
+ return to_enum(__method__) unless block_given?
1037
+ each_value do |val|
1038
+ if val.kind_of? Attribute
1039
+ yield val
1040
+ else
1041
+ val.each_value { |atr| yield atr }
1042
+ end
1043
+ end
1044
+ end
1045
+
1046
+ # Iterates over each attribute of an Element, yielding the expanded name
1047
+ # and value as a pair of Strings.
1048
+ #
1049
+ # doc = Document.new '<a x="1" y="2"/>'
1050
+ # doc.root.attributes.each {|name, value| p name+" => "+value }
1051
+ def each
1052
+ return to_enum(__method__) unless block_given?
1053
+ each_attribute do |attr|
1054
+ yield [attr.expanded_name, attr.value]
1055
+ end
1056
+ end
1057
+
1058
+ # Fetches an attribute
1059
+ # name::
1060
+ # the name by which to search for the attribute. Can be a
1061
+ # <tt>prefix:name</tt> namespace name.
1062
+ # Returns:: The first matching attribute, or nil if there was none. This
1063
+ # value is an Attribute node, not the String value of the attribute.
1064
+ # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
1065
+ # doc.root.attributes.get_attribute("foo").value #-> "2"
1066
+ # doc.root.attributes.get_attribute("x:foo").value #-> "1"
1067
+ def get_attribute( name )
1068
+ attr = fetch( name, nil )
1069
+ if attr.nil?
1070
+ return nil if name.nil?
1071
+ # Look for prefix
1072
+ name =~ Namespace::NAMESPLIT
1073
+ prefix, n = $1, $2
1074
+ if prefix
1075
+ attr = fetch( n, nil )
1076
+ # check prefix
1077
+ if attr == nil
1078
+ elsif attr.kind_of? Attribute
1079
+ return attr if prefix == attr.prefix
1080
+ else
1081
+ attr = attr[ prefix ]
1082
+ return attr
1083
+ end
1084
+ end
1085
+ element_document = @element.document
1086
+ if element_document and element_document.doctype
1087
+ expn = @element.expanded_name
1088
+ expn = element_document.doctype.name if expn.size == 0
1089
+ attr_val = element_document.doctype.attribute_of(expn, name)
1090
+ return Attribute.new( name, attr_val ) if attr_val
1091
+ end
1092
+ return nil
1093
+ end
1094
+ if attr.kind_of? Hash
1095
+ attr = attr[ @element.prefix ]
1096
+ end
1097
+ return attr
1098
+ end
1099
+
1100
+ # Sets an attribute, overwriting any existing attribute value by the
1101
+ # same name. Namespace is significant.
1102
+ # name:: the name of the attribute
1103
+ # value::
1104
+ # (optional) If supplied, the value of the attribute. If
1105
+ # nil, any existing matching attribute is deleted.
1106
+ # Returns::
1107
+ # Owning element
1108
+ # doc = Document.new "<a x:foo='1' foo='3'/>"
1109
+ # doc.root.attributes['y:foo'] = '2'
1110
+ # doc.root.attributes['foo'] = '4'
1111
+ # doc.root.attributes['x:foo'] = nil
1112
+ def []=( name, value )
1113
+ if value.nil? # Delete the named attribute
1114
+ attr = get_attribute(name)
1115
+ delete attr
1116
+ return
1117
+ end
1118
+
1119
+ unless value.kind_of? Attribute
1120
+ if @element.document and @element.document.doctype
1121
+ value = Text::normalize( value, @element.document.doctype )
1122
+ else
1123
+ value = Text::normalize( value, nil )
1124
+ end
1125
+ value = Attribute.new(name, value)
1126
+ end
1127
+ value.element = @element
1128
+ old_attr = fetch(value.name, nil)
1129
+ if old_attr.nil?
1130
+ store(value.name, value)
1131
+ elsif old_attr.kind_of? Hash
1132
+ old_attr[value.prefix] = value
1133
+ elsif old_attr.prefix != value.prefix
1134
+ # Check for conflicting namespaces
1135
+ raise ParseException.new(
1136
+ "Namespace conflict in adding attribute \"#{value.name}\": "+
1137
+ "Prefix \"#{old_attr.prefix}\" = "+
1138
+ "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
1139
+ "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
1140
+ value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
1141
+ @element.namespace( old_attr.prefix ) ==
1142
+ @element.namespace( value.prefix )
1143
+ store value.name, { old_attr.prefix => old_attr,
1144
+ value.prefix => value }
1145
+ else
1146
+ store value.name, value
1147
+ end
1148
+ return @element
1149
+ end
1150
+
1151
+ # Returns an array of Strings containing all of the prefixes declared
1152
+ # by this set of # attributes. The array does not include the default
1153
+ # namespace declaration, if one exists.
1154
+ # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
1155
+ # "z='glorp' p:k='gru'/>")
1156
+ # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
1157
+ def prefixes
1158
+ ns = []
1159
+ each_attribute do |attribute|
1160
+ ns << attribute.name if attribute.prefix == 'xmlns'
1161
+ end
1162
+ if @element.document and @element.document.doctype
1163
+ expn = @element.expanded_name
1164
+ expn = @element.document.doctype.name if expn.size == 0
1165
+ @element.document.doctype.attributes_of(expn).each {
1166
+ |attribute|
1167
+ ns << attribute.name if attribute.prefix == 'xmlns'
1168
+ }
1169
+ end
1170
+ ns
1171
+ end
1172
+
1173
+ def namespaces
1174
+ namespaces = {}
1175
+ each_attribute do |attribute|
1176
+ namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1177
+ end
1178
+ if @element.document and @element.document.doctype
1179
+ expn = @element.expanded_name
1180
+ expn = @element.document.doctype.name if expn.size == 0
1181
+ @element.document.doctype.attributes_of(expn).each {
1182
+ |attribute|
1183
+ namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1184
+ }
1185
+ end
1186
+ namespaces
1187
+ end
1188
+
1189
+ # Removes an attribute
1190
+ # attribute::
1191
+ # either a String, which is the name of the attribute to remove --
1192
+ # namespaces are significant here -- or the attribute to remove.
1193
+ # Returns:: the owning element
1194
+ # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
1195
+ # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
1196
+ # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
1197
+ # attr = doc.root.attributes.get_attribute('y:foo')
1198
+ # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
1199
+ def delete( attribute )
1200
+ name = nil
1201
+ prefix = nil
1202
+ if attribute.kind_of? Attribute
1203
+ name = attribute.name
1204
+ prefix = attribute.prefix
1205
+ else
1206
+ attribute =~ Namespace::NAMESPLIT
1207
+ prefix, name = $1, $2
1208
+ prefix = '' unless prefix
1209
+ end
1210
+ old = fetch(name, nil)
1211
+ if old.kind_of? Hash # the supplied attribute is one of many
1212
+ old.delete(prefix)
1213
+ if old.size == 1
1214
+ repl = nil
1215
+ old.each_value{|v| repl = v}
1216
+ store name, repl
1217
+ end
1218
+ elsif old.nil?
1219
+ return @element
1220
+ else # the supplied attribute is a top-level one
1221
+ super(name)
1222
+ end
1223
+ @element
1224
+ end
1225
+
1226
+ # Adds an attribute, overriding any existing attribute by the
1227
+ # same name. Namespaces are significant.
1228
+ # attribute:: An Attribute
1229
+ def add( attribute )
1230
+ self[attribute.name] = attribute
1231
+ end
1232
+
1233
+ alias :<< :add
1234
+
1235
+ # Deletes all attributes matching a name. Namespaces are significant.
1236
+ # name::
1237
+ # A String; all attributes that match this path will be removed
1238
+ # Returns:: an Array of the Attributes that were removed
1239
+ def delete_all( name )
1240
+ rv = []
1241
+ each_attribute { |attribute|
1242
+ rv << attribute if attribute.expanded_name == name
1243
+ }
1244
+ rv.each{ |attr| attr.remove }
1245
+ return rv
1246
+ end
1247
+
1248
+ # The +get_attribute_ns+ method retrieves a method by its namespace
1249
+ # and name. Thus it is possible to reliably identify an attribute
1250
+ # even if an XML processor has changed the prefix.
1251
+ #
1252
+ # Method contributed by Henrik Martensson
1253
+ def get_attribute_ns(namespace, name)
1254
+ result = nil
1255
+ each_attribute() { |attribute|
1256
+ if name == attribute.name &&
1257
+ namespace == attribute.namespace() &&
1258
+ ( !namespace.empty? || !attribute.fully_expanded_name.index(':') )
1259
+ # foo will match xmlns:foo, but only if foo isn't also an attribute
1260
+ result = attribute if !result or !namespace.empty? or
1261
+ !attribute.fully_expanded_name.index(':')
1262
+ end
1263
+ }
1264
+ result
1265
+ end
1266
+ end
1267
+ end