rubysl-rexml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (179) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE +25 -0
  6. data/README.md +29 -0
  7. data/Rakefile +1 -0
  8. data/lib/rexml/attlistdecl.rb +62 -0
  9. data/lib/rexml/attribute.rb +185 -0
  10. data/lib/rexml/cdata.rb +67 -0
  11. data/lib/rexml/child.rb +96 -0
  12. data/lib/rexml/comment.rb +80 -0
  13. data/lib/rexml/doctype.rb +271 -0
  14. data/lib/rexml/document.rb +230 -0
  15. data/lib/rexml/dtd/attlistdecl.rb +10 -0
  16. data/lib/rexml/dtd/dtd.rb +51 -0
  17. data/lib/rexml/dtd/elementdecl.rb +17 -0
  18. data/lib/rexml/dtd/entitydecl.rb +56 -0
  19. data/lib/rexml/dtd/notationdecl.rb +39 -0
  20. data/lib/rexml/element.rb +1227 -0
  21. data/lib/rexml/encoding.rb +71 -0
  22. data/lib/rexml/encodings/CP-1252.rb +103 -0
  23. data/lib/rexml/encodings/EUC-JP.rb +35 -0
  24. data/lib/rexml/encodings/ICONV.rb +22 -0
  25. data/lib/rexml/encodings/ISO-8859-1.rb +7 -0
  26. data/lib/rexml/encodings/ISO-8859-15.rb +72 -0
  27. data/lib/rexml/encodings/SHIFT-JIS.rb +37 -0
  28. data/lib/rexml/encodings/SHIFT_JIS.rb +1 -0
  29. data/lib/rexml/encodings/UNILE.rb +34 -0
  30. data/lib/rexml/encodings/US-ASCII.rb +30 -0
  31. data/lib/rexml/encodings/UTF-16.rb +35 -0
  32. data/lib/rexml/encodings/UTF-8.rb +18 -0
  33. data/lib/rexml/entity.rb +166 -0
  34. data/lib/rexml/formatters/default.rb +109 -0
  35. data/lib/rexml/formatters/pretty.rb +138 -0
  36. data/lib/rexml/formatters/transitive.rb +56 -0
  37. data/lib/rexml/functions.rb +382 -0
  38. data/lib/rexml/instruction.rb +70 -0
  39. data/lib/rexml/light/node.rb +196 -0
  40. data/lib/rexml/namespace.rb +47 -0
  41. data/lib/rexml/node.rb +75 -0
  42. data/lib/rexml/output.rb +24 -0
  43. data/lib/rexml/parent.rb +166 -0
  44. data/lib/rexml/parseexception.rb +51 -0
  45. data/lib/rexml/parsers/baseparser.rb +503 -0
  46. data/lib/rexml/parsers/lightparser.rb +60 -0
  47. data/lib/rexml/parsers/pullparser.rb +196 -0
  48. data/lib/rexml/parsers/sax2parser.rb +238 -0
  49. data/lib/rexml/parsers/streamparser.rb +46 -0
  50. data/lib/rexml/parsers/treeparser.rb +97 -0
  51. data/lib/rexml/parsers/ultralightparser.rb +56 -0
  52. data/lib/rexml/parsers/xpathparser.rb +698 -0
  53. data/lib/rexml/quickpath.rb +266 -0
  54. data/lib/rexml/rexml.rb +32 -0
  55. data/lib/rexml/sax2listener.rb +97 -0
  56. data/lib/rexml/source.rb +251 -0
  57. data/lib/rexml/streamlistener.rb +92 -0
  58. data/lib/rexml/syncenumerator.rb +33 -0
  59. data/lib/rexml/text.rb +344 -0
  60. data/lib/rexml/undefinednamespaceexception.rb +8 -0
  61. data/lib/rexml/validation/relaxng.rb +559 -0
  62. data/lib/rexml/validation/validation.rb +155 -0
  63. data/lib/rexml/validation/validationexception.rb +9 -0
  64. data/lib/rexml/xmldecl.rb +119 -0
  65. data/lib/rexml/xmltokens.rb +18 -0
  66. data/lib/rexml/xpath.rb +66 -0
  67. data/lib/rexml/xpath_parser.rb +792 -0
  68. data/lib/rubysl/rexml.rb +1 -0
  69. data/lib/rubysl/rexml/version.rb +5 -0
  70. data/rubysl-rexml.gemspec +23 -0
  71. data/spec/attribute/clone_spec.rb +10 -0
  72. data/spec/attribute/element_spec.rb +22 -0
  73. data/spec/attribute/equal_value_spec.rb +17 -0
  74. data/spec/attribute/hash_spec.rb +12 -0
  75. data/spec/attribute/initialize_spec.rb +28 -0
  76. data/spec/attribute/inspect_spec.rb +19 -0
  77. data/spec/attribute/namespace_spec.rb +23 -0
  78. data/spec/attribute/node_type_spec.rb +9 -0
  79. data/spec/attribute/prefix_spec.rb +17 -0
  80. data/spec/attribute/remove_spec.rb +19 -0
  81. data/spec/attribute/to_s_spec.rb +13 -0
  82. data/spec/attribute/to_string_spec.rb +14 -0
  83. data/spec/attribute/value_spec.rb +14 -0
  84. data/spec/attribute/write_spec.rb +22 -0
  85. data/spec/attribute/xpath_spec.rb +19 -0
  86. data/spec/attributes/add_spec.rb +6 -0
  87. data/spec/attributes/append_spec.rb +6 -0
  88. data/spec/attributes/delete_all_spec.rb +30 -0
  89. data/spec/attributes/delete_spec.rb +26 -0
  90. data/spec/attributes/each_attribute_spec.rb +24 -0
  91. data/spec/attributes/each_spec.rb +24 -0
  92. data/spec/attributes/element_reference_spec.rb +18 -0
  93. data/spec/attributes/element_set_spec.rb +25 -0
  94. data/spec/attributes/get_attribute_ns_spec.rb +13 -0
  95. data/spec/attributes/get_attribute_spec.rb +28 -0
  96. data/spec/attributes/initialize_spec.rb +18 -0
  97. data/spec/attributes/length_spec.rb +6 -0
  98. data/spec/attributes/namespaces_spec.rb +5 -0
  99. data/spec/attributes/prefixes_spec.rb +23 -0
  100. data/spec/attributes/shared/add.rb +17 -0
  101. data/spec/attributes/shared/length.rb +12 -0
  102. data/spec/attributes/size_spec.rb +6 -0
  103. data/spec/attributes/to_a_spec.rb +20 -0
  104. data/spec/cdata/clone_spec.rb +9 -0
  105. data/spec/cdata/initialize_spec.rb +24 -0
  106. data/spec/cdata/shared/to_s.rb +11 -0
  107. data/spec/cdata/to_s_spec.rb +6 -0
  108. data/spec/cdata/value_spec.rb +6 -0
  109. data/spec/document/add_element_spec.rb +30 -0
  110. data/spec/document/add_spec.rb +60 -0
  111. data/spec/document/clone_spec.rb +19 -0
  112. data/spec/document/doctype_spec.rb +14 -0
  113. data/spec/document/encoding_spec.rb +21 -0
  114. data/spec/document/expanded_name_spec.rb +15 -0
  115. data/spec/document/new_spec.rb +37 -0
  116. data/spec/document/node_type_spec.rb +7 -0
  117. data/spec/document/root_spec.rb +11 -0
  118. data/spec/document/stand_alone_spec.rb +18 -0
  119. data/spec/document/version_spec.rb +13 -0
  120. data/spec/document/write_spec.rb +38 -0
  121. data/spec/document/xml_decl_spec.rb +14 -0
  122. data/spec/element/add_attribute_spec.rb +40 -0
  123. data/spec/element/add_attributes_spec.rb +21 -0
  124. data/spec/element/add_element_spec.rb +38 -0
  125. data/spec/element/add_namespace_spec.rb +23 -0
  126. data/spec/element/add_text_spec.rb +23 -0
  127. data/spec/element/attribute_spec.rb +16 -0
  128. data/spec/element/attributes_spec.rb +18 -0
  129. data/spec/element/cdatas_spec.rb +23 -0
  130. data/spec/element/clone_spec.rb +28 -0
  131. data/spec/element/comments_spec.rb +20 -0
  132. data/spec/element/delete_attribute_spec.rb +38 -0
  133. data/spec/element/delete_element_spec.rb +50 -0
  134. data/spec/element/delete_namespace_spec.rb +24 -0
  135. data/spec/element/document_spec.rb +17 -0
  136. data/spec/element/each_element_with_attribute_spec.rb +34 -0
  137. data/spec/element/each_element_with_text_spec.rb +30 -0
  138. data/spec/element/get_text_spec.rb +17 -0
  139. data/spec/element/has_attributes_spec.rb +16 -0
  140. data/spec/element/has_elements_spec.rb +17 -0
  141. data/spec/element/has_text_spec.rb +15 -0
  142. data/spec/element/inspect_spec.rb +26 -0
  143. data/spec/element/instructions_spec.rb +20 -0
  144. data/spec/element/namespace_spec.rb +26 -0
  145. data/spec/element/namespaces_spec.rb +31 -0
  146. data/spec/element/new_spec.rb +34 -0
  147. data/spec/element/next_element_spec.rb +18 -0
  148. data/spec/element/node_type_spec.rb +7 -0
  149. data/spec/element/prefixes_spec.rb +22 -0
  150. data/spec/element/previous_element_spec.rb +19 -0
  151. data/spec/element/raw_spec.rb +23 -0
  152. data/spec/element/root_spec.rb +27 -0
  153. data/spec/element/text_spec.rb +45 -0
  154. data/spec/element/texts_spec.rb +15 -0
  155. data/spec/element/whitespace_spec.rb +22 -0
  156. data/spec/node/each_recursive_spec.rb +20 -0
  157. data/spec/node/find_first_recursive_spec.rb +24 -0
  158. data/spec/node/index_in_parent_spec.rb +14 -0
  159. data/spec/node/next_sibling_node_spec.rb +20 -0
  160. data/spec/node/parent_spec.rb +20 -0
  161. data/spec/node/previous_sibling_node_spec.rb +20 -0
  162. data/spec/shared/each_element.rb +35 -0
  163. data/spec/shared/elements_to_a.rb +35 -0
  164. data/spec/text/append_spec.rb +9 -0
  165. data/spec/text/clone_spec.rb +9 -0
  166. data/spec/text/comparison_spec.rb +24 -0
  167. data/spec/text/empty_spec.rb +11 -0
  168. data/spec/text/indent_text_spec.rb +23 -0
  169. data/spec/text/inspect_spec.rb +7 -0
  170. data/spec/text/new_spec.rb +48 -0
  171. data/spec/text/node_type_spec.rb +7 -0
  172. data/spec/text/normalize_spec.rb +7 -0
  173. data/spec/text/read_with_substitution_spec.rb +12 -0
  174. data/spec/text/to_s_spec.rb +17 -0
  175. data/spec/text/unnormalize_spec.rb +7 -0
  176. data/spec/text/value_spec.rb +36 -0
  177. data/spec/text/wrap_spec.rb +20 -0
  178. data/spec/text/write_with_substitution_spec.rb +32 -0
  179. metadata +385 -0
@@ -0,0 +1,10 @@
1
+ require "rexml/child"
2
+ module REXML
3
+ module DTD
4
+ class AttlistDecl < Child
5
+ START = "<!ATTLIST"
6
+ START_RE = /^\s*#{START}/um
7
+ PATTERN_RE = /\s*(#{START}.*?>)/um
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,51 @@
1
+ require "rexml/dtd/elementdecl"
2
+ require "rexml/dtd/entitydecl"
3
+ require "rexml/comment"
4
+ require "rexml/dtd/notationdecl"
5
+ require "rexml/dtd/attlistdecl"
6
+ require "rexml/parent"
7
+
8
+ module REXML
9
+ module DTD
10
+ class Parser
11
+ def Parser.parse( input )
12
+ case input
13
+ when String
14
+ parse_helper input
15
+ when File
16
+ parse_helper input.read
17
+ end
18
+ end
19
+
20
+ # Takes a String and parses it out
21
+ def Parser.parse_helper( input )
22
+ contents = Parent.new
23
+ while input.size > 0
24
+ case input
25
+ when ElementDecl.PATTERN_RE
26
+ match = $&
27
+ source = $'
28
+ contents << ElementDecl.new( match )
29
+ when AttlistDecl.PATTERN_RE
30
+ matchdata = $~
31
+ source = $'
32
+ contents << AttlistDecl.new( matchdata )
33
+ when EntityDecl.PATTERN_RE
34
+ matchdata = $~
35
+ source = $'
36
+ contents << EntityDecl.new( matchdata )
37
+ when Comment.PATTERN_RE
38
+ matchdata = $~
39
+ source = $'
40
+ contents << Comment.new( matchdata )
41
+ when NotationDecl.PATTERN_RE
42
+ matchdata = $~
43
+ source = $'
44
+ contents << NotationDecl.new( matchdata )
45
+ end
46
+ end
47
+ contents
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,17 @@
1
+ require "rexml/child"
2
+ module REXML
3
+ module DTD
4
+ class ElementDecl < Child
5
+ START = "<!ELEMENT"
6
+ START_RE = /^\s*#{START}/um
7
+ PATTERN_RE = /^\s*(#{START}.*?)>/um
8
+ PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
9
+ #\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
10
+
11
+ def initialize match
12
+ @name = match[1]
13
+ @rest = match[2]
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,56 @@
1
+ require "rexml/child"
2
+ module REXML
3
+ module DTD
4
+ class EntityDecl < Child
5
+ START = "<!ENTITY"
6
+ START_RE = /^\s*#{START}/um
7
+ PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
8
+ SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
9
+ PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
10
+ PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
11
+ # <!ENTITY name SYSTEM "...">
12
+ # <!ENTITY name "...">
13
+ def initialize src
14
+ super()
15
+ md = nil
16
+ if src.match( PUBLIC )
17
+ md = src.match( PUBLIC, true )
18
+ @middle = "PUBLIC"
19
+ @content = "#{md[2]} #{md[4]}"
20
+ elsif src.match( SYSTEM )
21
+ md = src.match( SYSTEM, true )
22
+ @middle = "SYSTEM"
23
+ @content = md[2]
24
+ elsif src.match( PLAIN )
25
+ md = src.match( PLAIN, true )
26
+ @middle = ""
27
+ @content = md[2]
28
+ elsif src.match( PERCENT )
29
+ md = src.match( PERCENT, true )
30
+ @middle = ""
31
+ @content = md[2]
32
+ end
33
+ raise ParseException.new("failed Entity match", src) if md.nil?
34
+ @name = md[1]
35
+ end
36
+
37
+ def to_s
38
+ rv = "<!ENTITY #@name "
39
+ rv << "#@middle " if @middle.size > 0
40
+ rv << @content
41
+ rv
42
+ end
43
+
44
+ def write( output, indent )
45
+ indent( output, indent )
46
+ output << to_s
47
+ end
48
+
49
+ def EntityDecl.parse_source source, listener
50
+ md = source.match( PATTERN_RE, true )
51
+ thing = md[0].squeeze(" \t\n\r")
52
+ listener.send inspect.downcase, thing
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,39 @@
1
+ require "rexml/child"
2
+ module REXML
3
+ module DTD
4
+ class NotationDecl < Child
5
+ START = "<!NOTATION"
6
+ START_RE = /^\s*#{START}/um
7
+ PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
8
+ SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
9
+ def initialize src
10
+ super()
11
+ if src.match( PUBLIC )
12
+ md = src.match( PUBLIC, true )
13
+ elsif src.match( SYSTEM )
14
+ md = src.match( SYSTEM, true )
15
+ else
16
+ raise ParseException.new( "error parsing notation: no matching pattern", src )
17
+ end
18
+ @name = md[1]
19
+ @middle = md[2]
20
+ @rest = md[3]
21
+ end
22
+
23
+ def to_s
24
+ "<!NOTATION #@name #@middle #@rest>"
25
+ end
26
+
27
+ def write( output, indent )
28
+ indent( output, indent )
29
+ output << to_s
30
+ end
31
+
32
+ def NotationDecl.parse_source source, listener
33
+ md = source.match( PATTERN_RE, true )
34
+ thing = md[0].squeeze(" \t\n\r")
35
+ listener.send inspect.downcase, thing
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,1227 @@
1
+ require "rexml/parent"
2
+ require "rexml/namespace"
3
+ require "rexml/attribute"
4
+ require "rexml/cdata"
5
+ require "rexml/xpath"
6
+ require "rexml/parseexception"
7
+
8
+ module REXML
9
+ # An implementation note about namespaces:
10
+ # As we parse, when we find namespaces we put them in a hash and assign
11
+ # them a unique ID. We then convert the namespace prefix for the node
12
+ # to the unique ID. This makes namespace lookup much faster for the
13
+ # cost of extra memory use. We save the namespace prefix for the
14
+ # context node and convert it back when we write it.
15
+ @@namespaces = {}
16
+
17
+ # Represents a tagged XML element. Elements are characterized by
18
+ # having children, attributes, and names, and can themselves be
19
+ # children.
20
+ class Element < Parent
21
+ include Namespace
22
+
23
+ UNDEFINED = "UNDEFINED"; # The default name
24
+
25
+ # Mechanisms for accessing attributes and child elements of this
26
+ # element.
27
+ attr_reader :attributes, :elements
28
+ # The context holds information about the processing environment, such as
29
+ # whitespace handling.
30
+ attr_accessor :context
31
+
32
+ # Constructor
33
+ # arg::
34
+ # if not supplied, will be set to the default value.
35
+ # If a String, the name of this object will be set to the argument.
36
+ # If an Element, the object will be shallowly cloned; name,
37
+ # attributes, and namespaces will be copied. Children will +not+ be
38
+ # copied.
39
+ # parent::
40
+ # if supplied, must be a Parent, and will be used as
41
+ # the parent of this object.
42
+ # context::
43
+ # If supplied, must be a hash containing context items. Context items
44
+ # include:
45
+ # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
46
+ # strings being the names of the elements to respect
47
+ # whitespace for. Defaults to :+all+.
48
+ # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
49
+ # strings being the names of the elements to ignore whitespace on.
50
+ # Overrides :+respect_whitespace+.
51
+ # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
52
+ # of strings being the names of the elements in which to ignore
53
+ # whitespace-only nodes. If this is set, Text nodes which contain only
54
+ # whitespace will not be added to the document tree.
55
+ # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
56
+ # the elements to process in raw mode. In raw mode, special
57
+ # characters in text is not converted to or from entities.
58
+ def initialize( arg = UNDEFINED, parent=nil, context=nil )
59
+ super(parent)
60
+
61
+ @elements = Elements.new(self)
62
+ @attributes = Attributes.new(self)
63
+ @context = context
64
+
65
+ if arg.kind_of? String
66
+ self.name = arg
67
+ elsif arg.kind_of? Element
68
+ self.name = arg.expanded_name
69
+ arg.attributes.each_attribute{ |attribute|
70
+ @attributes << Attribute.new( attribute )
71
+ }
72
+ @context = arg.context
73
+ end
74
+ end
75
+
76
+ def inspect
77
+ rv = "<#@expanded_name"
78
+
79
+ @attributes.each_attribute do |attr|
80
+ rv << " "
81
+ attr.write( rv, 0 )
82
+ end
83
+
84
+ if children.size > 0
85
+ rv << "> ... </>"
86
+ else
87
+ rv << "/>"
88
+ end
89
+ end
90
+
91
+
92
+ # Creates a shallow copy of self.
93
+ # d = Document.new "<a><b/><b/><c><d/></c></a>"
94
+ # new_a = d.root.clone
95
+ # puts new_a # => "<a/>"
96
+ def clone
97
+ self.class.new self
98
+ end
99
+
100
+ # Evaluates to the root node of the document that this element
101
+ # belongs to. If this element doesn't belong to a document, but does
102
+ # belong to another Element, the parent's root will be returned, until the
103
+ # earliest ancestor is found.
104
+ #
105
+ # Note that this is not the same as the document element.
106
+ # In the following example, <a> is the document element, and the root
107
+ # node is the parent node of the document element. You may ask yourself
108
+ # why the root node is useful: consider the doctype and XML declaration,
109
+ # and any processing instructions before the document element... they
110
+ # are children of the root node, or siblings of the document element.
111
+ # The only time this isn't true is when an Element is created that is
112
+ # not part of any Document. In this case, the ancestor that has no
113
+ # parent acts as the root node.
114
+ # d = Document.new '<a><b><c/></b></a>'
115
+ # a = d[1] ; c = a[1][1]
116
+ # d.root_node == d # TRUE
117
+ # a.root_node # namely, d
118
+ # c.root_node # again, d
119
+ def root_node
120
+ parent.nil? ? self : parent.root_node
121
+ end
122
+
123
+ def root
124
+ return elements[1] if self.kind_of? Document
125
+ return self if parent.kind_of? Document or parent.nil?
126
+ return parent.root
127
+ end
128
+
129
+ # Evaluates to the document to which this element belongs, or nil if this
130
+ # element doesn't belong to a document.
131
+ def document
132
+ rt = root
133
+ rt.parent if rt
134
+ end
135
+
136
+ # Evaluates to +true+ if whitespace is respected for this element. This
137
+ # is the case if:
138
+ # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
139
+ # 2. The context has :+respect_whitespace+ set to :+all+ or
140
+ # an array containing the name of this element, and
141
+ # :+compress_whitespace+ isn't set to :+all+ or an array containing the
142
+ # name of this element.
143
+ # The evaluation is tested against +expanded_name+, and so is namespace
144
+ # sensitive.
145
+ def whitespace
146
+ @whitespace = nil
147
+ if @context
148
+ if @context[:respect_whitespace]
149
+ @whitespace = (@context[:respect_whitespace] == :all or
150
+ @context[:respect_whitespace].include? expanded_name)
151
+ end
152
+ @whitespace = false if (@context[:compress_whitespace] and
153
+ (@context[:compress_whitespace] == :all or
154
+ @context[:compress_whitespace].include? expanded_name)
155
+ )
156
+ end
157
+ @whitespace = true unless @whitespace == false
158
+ @whitespace
159
+ end
160
+
161
+ def ignore_whitespace_nodes
162
+ @ignore_whitespace_nodes = false
163
+ if @context
164
+ if @context[:ignore_whitespace_nodes]
165
+ @ignore_whitespace_nodes =
166
+ (@context[:ignore_whitespace_nodes] == :all or
167
+ @context[:ignore_whitespace_nodes].include? expanded_name)
168
+ end
169
+ end
170
+ end
171
+
172
+ # Evaluates to +true+ if raw mode is set for this element. This
173
+ # is the case if the context has :+raw+ set to :+all+ or
174
+ # an array containing the name of this element.
175
+ #
176
+ # The evaluation is tested against +expanded_name+, and so is namespace
177
+ # sensitive.
178
+ def raw
179
+ @raw = (@context and @context[:raw] and
180
+ (@context[:raw] == :all or
181
+ @context[:raw].include? expanded_name))
182
+ @raw
183
+ end
184
+
185
+ #once :whitespace, :raw, :ignore_whitespace_nodes
186
+
187
+ #################################################
188
+ # Namespaces #
189
+ #################################################
190
+
191
+ # Evaluates to an +Array+ containing the prefixes (names) of all defined
192
+ # namespaces at this context node.
193
+ # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
194
+ # doc.elements['//b'].prefixes # -> ['x', 'y']
195
+ def prefixes
196
+ prefixes = []
197
+ prefixes = parent.prefixes if parent
198
+ prefixes |= attributes.prefixes
199
+ return prefixes
200
+ end
201
+
202
+ def namespaces
203
+ namespaces = {}
204
+ namespaces = parent.namespaces if parent
205
+ namespaces = namespaces.merge( attributes.namespaces )
206
+ return namespaces
207
+ end
208
+
209
+ # Evalutas to the URI for a prefix, or the empty string if no such
210
+ # namespace is declared for this element. Evaluates recursively for
211
+ # ancestors. Returns the default namespace, if there is one.
212
+ # prefix::
213
+ # the prefix to search for. If not supplied, returns the default
214
+ # namespace if one exists
215
+ # Returns::
216
+ # the namespace URI as a String, or nil if no such namespace
217
+ # exists. If the namespace is undefined, returns an empty string
218
+ # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
219
+ # b = doc.elements['//b']
220
+ # b.namespace # -> '1'
221
+ # b.namespace("y") # -> '2'
222
+ def namespace(prefix=nil)
223
+ if prefix.nil?
224
+ prefix = prefix()
225
+ end
226
+ if prefix == ''
227
+ prefix = "xmlns"
228
+ else
229
+ prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
230
+ end
231
+ ns = attributes[ prefix ]
232
+ ns = parent.namespace(prefix) if ns.nil? and parent
233
+ ns = '' if ns.nil? and prefix == 'xmlns'
234
+ return ns
235
+ end
236
+
237
+ # Adds a namespace to this element.
238
+ # prefix::
239
+ # the prefix string, or the namespace URI if +uri+ is not
240
+ # supplied
241
+ # uri::
242
+ # the namespace URI. May be nil, in which +prefix+ is used as
243
+ # the URI
244
+ # Evaluates to: this Element
245
+ # a = Element.new("a")
246
+ # a.add_namespace("xmlns:foo", "bar" )
247
+ # a.add_namespace("foo", "bar") # shorthand for previous line
248
+ # a.add_namespace("twiddle")
249
+ # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
250
+ def add_namespace( prefix, uri=nil )
251
+ unless uri
252
+ @attributes["xmlns"] = prefix
253
+ else
254
+ prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
255
+ @attributes[ prefix ] = uri
256
+ end
257
+ self
258
+ end
259
+
260
+ # Removes a namespace from this node. This only works if the namespace is
261
+ # actually declared in this node. If no argument is passed, deletes the
262
+ # default namespace.
263
+ #
264
+ # Evaluates to: this element
265
+ # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
266
+ # doc.root.delete_namespace
267
+ # puts doc # -> <a xmlns:foo='bar'/>
268
+ # doc.root.delete_namespace 'foo'
269
+ # puts doc # -> <a/>
270
+ def delete_namespace namespace="xmlns"
271
+ namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
272
+ attribute = attributes.get_attribute(namespace)
273
+ attribute.remove unless attribute.nil?
274
+ self
275
+ end
276
+
277
+ #################################################
278
+ # Elements #
279
+ #################################################
280
+
281
+ # Adds a child to this element, optionally setting attributes in
282
+ # the element.
283
+ # element::
284
+ # optional. If Element, the element is added.
285
+ # Otherwise, a new Element is constructed with the argument (see
286
+ # Element.initialize).
287
+ # attrs::
288
+ # If supplied, must be a Hash containing String name,value
289
+ # pairs, which will be used to set the attributes of the new Element.
290
+ # Returns:: the Element that was added
291
+ # el = doc.add_element 'my-tag'
292
+ # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
293
+ # el = Element.new 'my-tag'
294
+ # doc.add_element el
295
+ def add_element element, attrs=nil
296
+ raise "First argument must be either an element name, or an Element object" if element.nil?
297
+ el = @elements.add(element)
298
+ attrs.each do |key, value|
299
+ el.attributes[key]=Attribute.new(key,value,self)
300
+ end if attrs.kind_of? Hash
301
+ el
302
+ end
303
+
304
+ # Deletes a child element.
305
+ # element::
306
+ # Must be an +Element+, +String+, or +Integer+. If Element,
307
+ # the element is removed. If String, the element is found (via XPath)
308
+ # and removed. <em>This means that any parent can remove any
309
+ # descendant.<em> If Integer, the Element indexed by that number will be
310
+ # removed.
311
+ # Returns:: the element that was removed.
312
+ # doc.delete_element "/a/b/c[@id='4']"
313
+ # doc.delete_element doc.elements["//k"]
314
+ # doc.delete_element 1
315
+ def delete_element element
316
+ @elements.delete element
317
+ end
318
+
319
+ # Evaluates to +true+ if this element has at least one child Element
320
+ # doc = Document.new "<a><b/><c>Text</c></a>"
321
+ # doc.root.has_elements # -> true
322
+ # doc.elements["/a/b"].has_elements # -> false
323
+ # doc.elements["/a/c"].has_elements # -> false
324
+ def has_elements?
325
+ !@elements.empty?
326
+ end
327
+
328
+ # Iterates through the child elements, yielding for each Element that
329
+ # has a particular attribute set.
330
+ # key::
331
+ # the name of the attribute to search for
332
+ # value::
333
+ # the value of the attribute
334
+ # max::
335
+ # (optional) causes this method to return after yielding
336
+ # for this number of matching children
337
+ # name::
338
+ # (optional) if supplied, this is an XPath that filters
339
+ # the children to check.
340
+ #
341
+ # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
342
+ # # Yields b, c, d
343
+ # doc.root.each_element_with_attribute( 'id' ) {|e| p e}
344
+ # # Yields b, d
345
+ # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
346
+ # # Yields b
347
+ # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
348
+ # # Yields d
349
+ # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
350
+ def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
351
+ each_with_something( proc {|child|
352
+ if value.nil?
353
+ child.attributes[key] != nil
354
+ else
355
+ child.attributes[key]==value
356
+ end
357
+ }, max, name, &block )
358
+ end
359
+
360
+ # Iterates through the children, yielding for each Element that
361
+ # has a particular text set.
362
+ # text::
363
+ # the text to search for. If nil, or not supplied, will iterate
364
+ # over all +Element+ children that contain at least one +Text+ node.
365
+ # max::
366
+ # (optional) causes this method to return after yielding
367
+ # for this number of matching children
368
+ # name::
369
+ # (optional) if supplied, this is an XPath that filters
370
+ # the children to check.
371
+ #
372
+ # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
373
+ # # Yields b, c, d
374
+ # doc.each_element_with_text {|e|p e}
375
+ # # Yields b, c
376
+ # doc.each_element_with_text('b'){|e|p e}
377
+ # # Yields b
378
+ # doc.each_element_with_text('b', 1){|e|p e}
379
+ # # Yields d
380
+ # doc.each_element_with_text(nil, 0, 'd'){|e|p e}
381
+ def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
382
+ each_with_something( proc {|child|
383
+ if text.nil?
384
+ child.has_text?
385
+ else
386
+ child.text == text
387
+ end
388
+ }, max, name, &block )
389
+ end
390
+
391
+ # Synonym for Element.elements.each
392
+ def each_element( xpath=nil, &block ) # :yields: Element
393
+ @elements.each( xpath, &block )
394
+ end
395
+
396
+ # Synonym for Element.to_a
397
+ # This is a little slower than calling elements.each directly.
398
+ # xpath:: any XPath by which to search for elements in the tree
399
+ # Returns:: an array of Elements that match the supplied path
400
+ def get_elements( xpath )
401
+ @elements.to_a( xpath )
402
+ end
403
+
404
+ # Returns the next sibling that is an element, or nil if there is
405
+ # no Element sibling after this one
406
+ # doc = Document.new '<a><b/>text<c/></a>'
407
+ # doc.root.elements['b'].next_element #-> <c/>
408
+ # doc.root.elements['c'].next_element #-> nil
409
+ def next_element
410
+ element = next_sibling
411
+ element = element.next_sibling until element.nil? or element.kind_of? Element
412
+ return element
413
+ end
414
+
415
+ # Returns the previous sibling that is an element, or nil if there is
416
+ # no Element sibling prior to this one
417
+ # doc = Document.new '<a><b/>text<c/></a>'
418
+ # doc.root.elements['c'].previous_element #-> <b/>
419
+ # doc.root.elements['b'].previous_element #-> nil
420
+ def previous_element
421
+ element = previous_sibling
422
+ element = element.previous_sibling until element.nil? or element.kind_of? Element
423
+ return element
424
+ end
425
+
426
+
427
+ #################################################
428
+ # Text #
429
+ #################################################
430
+
431
+ # Evaluates to +true+ if this element has at least one Text child
432
+ def has_text?
433
+ not text().nil?
434
+ end
435
+
436
+ # A convenience method which returns the String value of the _first_
437
+ # child text element, if one exists, and +nil+ otherwise.
438
+ #
439
+ # <em>Note that an element may have multiple Text elements, perhaps
440
+ # separated by other children</em>. Be aware that this method only returns
441
+ # the first Text node.
442
+ #
443
+ # This method returns the +value+ of the first text child node, which
444
+ # ignores the +raw+ setting, so always returns normalized text. See
445
+ # the Text::value documentation.
446
+ #
447
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
448
+ # # The element 'p' has two text elements, "some text " and " more text".
449
+ # doc.root.text #-> "some text "
450
+ def text( path = nil )
451
+ rv = get_text(path)
452
+ return rv.value unless rv.nil?
453
+ nil
454
+ end
455
+
456
+ # Returns the first child Text node, if any, or +nil+ otherwise.
457
+ # This method returns the actual +Text+ node, rather than the String content.
458
+ # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
459
+ # # The element 'p' has two text elements, "some text " and " more text".
460
+ # doc.root.get_text.value #-> "some text "
461
+ def get_text path = nil
462
+ rv = nil
463
+ if path
464
+ element = @elements[ path ]
465
+ rv = element.get_text unless element.nil?
466
+ else
467
+ rv = @children.find { |node| node.kind_of? Text }
468
+ end
469
+ return rv
470
+ end
471
+
472
+ # Sets the first Text child of this object. See text() for a
473
+ # discussion about Text children.
474
+ #
475
+ # If a Text child already exists, the child is replaced by this
476
+ # content. This means that Text content can be deleted by calling
477
+ # this method with a nil argument. In this case, the next Text
478
+ # child becomes the first Text child. In no case is the order of
479
+ # any siblings disturbed.
480
+ # text::
481
+ # If a String, a new Text child is created and added to
482
+ # this Element as the first Text child. If Text, the text is set
483
+ # as the first Child element. If nil, then any existing first Text
484
+ # child is removed.
485
+ # Returns:: this Element.
486
+ # doc = Document.new '<a><b/></a>'
487
+ # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
488
+ # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
489
+ # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
490
+ # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
491
+ # doc.root.text = nil #-> '<a><b/><c/></a>'
492
+ def text=( text )
493
+ if text.kind_of? String
494
+ text = Text.new( text, whitespace(), nil, raw() )
495
+ elsif !text.nil? and !text.kind_of? Text
496
+ text = Text.new( text.to_s, whitespace(), nil, raw() )
497
+ end
498
+ old_text = get_text
499
+ if text.nil?
500
+ old_text.remove unless old_text.nil?
501
+ else
502
+ if old_text.nil?
503
+ self << text
504
+ else
505
+ old_text.replace_with( text )
506
+ end
507
+ end
508
+ return self
509
+ end
510
+
511
+ # A helper method to add a Text child. Actual Text instances can
512
+ # be added with regular Parent methods, such as add() and <<()
513
+ # text::
514
+ # if a String, a new Text instance is created and added
515
+ # to the parent. If Text, the object is added directly.
516
+ # Returns:: this Element
517
+ # e = Element.new('a') #-> <e/>
518
+ # e.add_text 'foo' #-> <e>foo</e>
519
+ # e.add_text Text.new(' bar') #-> <e>foo bar</e>
520
+ # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
521
+ # element and <b>2</b> Text node children.
522
+ def add_text( text )
523
+ if text.kind_of? String
524
+ if @children[-1].kind_of? Text
525
+ @children[-1] << text
526
+ return
527
+ end
528
+ text = Text.new( text, whitespace(), nil, raw() )
529
+ end
530
+ self << text unless text.nil?
531
+ return self
532
+ end
533
+
534
+ def node_type
535
+ :element
536
+ end
537
+
538
+ def xpath
539
+ path_elements = []
540
+ cur = self
541
+ path_elements << __to_xpath_helper( self )
542
+ while cur.parent
543
+ cur = cur.parent
544
+ path_elements << __to_xpath_helper( cur )
545
+ end
546
+ return path_elements.reverse.join( "/" )
547
+ end
548
+
549
+ #################################################
550
+ # Attributes #
551
+ #################################################
552
+
553
+ def attribute( name, namespace=nil )
554
+ prefix = nil
555
+ prefix = namespaces.index(namespace) if namespace
556
+ prefix = nil if prefix == 'xmlns'
557
+ attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
558
+ end
559
+
560
+ # Evaluates to +true+ if this element has any attributes set, false
561
+ # otherwise.
562
+ def has_attributes?
563
+ return !@attributes.empty?
564
+ end
565
+
566
+ # Adds an attribute to this element, overwriting any existing attribute
567
+ # by the same name.
568
+ # key::
569
+ # can be either an Attribute or a String. If an Attribute,
570
+ # the attribute is added to the list of Element attributes. If String,
571
+ # the argument is used as the name of the new attribute, and the value
572
+ # parameter must be supplied.
573
+ # value::
574
+ # Required if +key+ is a String, and ignored if the first argument is
575
+ # an Attribute. This is a String, and is used as the value
576
+ # of the new Attribute. This should be the unnormalized value of the
577
+ # attribute (without entities).
578
+ # Returns:: the Attribute added
579
+ # e = Element.new 'e'
580
+ # e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
581
+ # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
582
+ # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
583
+ def add_attribute( key, value=nil )
584
+ if key.kind_of? Attribute
585
+ @attributes << key
586
+ else
587
+ @attributes[key] = value
588
+ end
589
+ end
590
+
591
+ # Add multiple attributes to this element.
592
+ # hash:: is either a hash, or array of arrays
593
+ # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
594
+ # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
595
+ def add_attributes hash
596
+ if hash.kind_of? Hash
597
+ hash.each_pair {|key, value| @attributes[key] = value }
598
+ elsif hash.kind_of? Array
599
+ hash.each { |value| @attributes[ value[0] ] = value[1] }
600
+ end
601
+ end
602
+
603
+ # Removes an attribute
604
+ # key::
605
+ # either an Attribute or a String. In either case, the
606
+ # attribute is found by matching the attribute name to the argument,
607
+ # and then removed. If no attribute is found, no action is taken.
608
+ # Returns::
609
+ # the attribute removed, or nil if this Element did not contain
610
+ # a matching attribute
611
+ # e = Element.new('E')
612
+ # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
613
+ # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
614
+ # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
615
+ # e.delete_attribute( r ) #-> <E/>
616
+ def delete_attribute(key)
617
+ attr = @attributes.get_attribute(key)
618
+ attr.remove unless attr.nil?
619
+ end
620
+
621
+ #################################################
622
+ # Other Utilities #
623
+ #################################################
624
+
625
+ # Get an array of all CData children.
626
+ # IMMUTABLE
627
+ def cdatas
628
+ find_all { |child| child.kind_of? CData }.freeze
629
+ end
630
+
631
+ # Get an array of all Comment children.
632
+ # IMMUTABLE
633
+ def comments
634
+ find_all { |child| child.kind_of? Comment }.freeze
635
+ end
636
+
637
+ # Get an array of all Instruction children.
638
+ # IMMUTABLE
639
+ def instructions
640
+ find_all { |child| child.kind_of? Instruction }.freeze
641
+ end
642
+
643
+ # Get an array of all Text children.
644
+ # IMMUTABLE
645
+ def texts
646
+ find_all { |child| child.kind_of? Text }.freeze
647
+ end
648
+
649
+ # == DEPRECATED
650
+ # See REXML::Formatters
651
+ #
652
+ # Writes out this element, and recursively, all children.
653
+ # output::
654
+ # output an object which supports '<< string'; this is where the
655
+ # document will be written.
656
+ # indent::
657
+ # An integer. If -1, no indenting will be used; otherwise, the
658
+ # indentation will be this number of spaces, and children will be
659
+ # indented an additional amount. Defaults to -1
660
+ # transitive::
661
+ # If transitive is true and indent is >= 0, then the output will be
662
+ # pretty-printed in such a way that the added whitespace does not affect
663
+ # the parse tree of the document
664
+ # ie_hack::
665
+ # Internet Explorer is the worst piece of crap to have ever been
666
+ # written, with the possible exception of Windows itself. Since IE is
667
+ # unable to parse proper XML, we have to provide a hack to generate XML
668
+ # that IE's limited abilities can handle. This hack inserts a space
669
+ # before the /> on empty tags. Defaults to false
670
+ #
671
+ # out = ''
672
+ # doc.write( out ) #-> doc is written to the string 'out'
673
+ # doc.write( $stdout ) #-> doc written to the console
674
+ def write(writer=$stdout, indent=-1, transitive=false, ie_hack=false)
675
+ Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
676
+ formatter = if indent > -1
677
+ if transitive
678
+ REXML::Formatters::Transitive.new( indent, ie_hack )
679
+ else
680
+ REXML::Formatters::Pretty.new( indent, ie_hack )
681
+ end
682
+ else
683
+ REXML::Formatters::Default.new( ie_hack )
684
+ end
685
+ formatter.write( self, output )
686
+ end
687
+
688
+
689
+ private
690
+ def __to_xpath_helper node
691
+ rv = node.expanded_name.clone
692
+ if node.parent
693
+ results = node.parent.find_all {|n|
694
+ n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
695
+ }
696
+ if results.length > 1
697
+ idx = results.index( node )
698
+ rv << "[#{idx+1}]"
699
+ end
700
+ end
701
+ rv
702
+ end
703
+
704
+ # A private helper method
705
+ def each_with_something( test, max=0, name=nil )
706
+ num = 0
707
+ child=nil
708
+ @elements.each( name ){ |child|
709
+ yield child if test.call(child) and num += 1
710
+ return if max>0 and num == max
711
+ }
712
+ end
713
+ end
714
+
715
+ ########################################################################
716
+ # ELEMENTS #
717
+ ########################################################################
718
+
719
+ # A class which provides filtering of children for Elements, and
720
+ # XPath search support. You are expected to only encounter this class as
721
+ # the <tt>element.elements</tt> object. Therefore, you are
722
+ # _not_ expected to instantiate this yourself.
723
+ class Elements
724
+ include Enumerable
725
+ # Constructor
726
+ # parent:: the parent Element
727
+ def initialize parent
728
+ @element = parent
729
+ end
730
+
731
+ # Fetches a child element. Filters only Element children, regardless of
732
+ # the XPath match.
733
+ # index::
734
+ # the search parameter. This is either an Integer, which
735
+ # will be used to find the index'th child Element, or an XPath,
736
+ # which will be used to search for the Element. <em>Because
737
+ # of the nature of XPath searches, any element in the connected XML
738
+ # document can be fetched through any other element.</em> <b>The
739
+ # Integer index is 1-based, not 0-based.</b> This means that the first
740
+ # child element is at index 1, not 0, and the +n+th element is at index
741
+ # +n+, not <tt>n-1</tt>. This is because XPath indexes element children
742
+ # starting from 1, not 0, and the indexes should be the same.
743
+ # name::
744
+ # optional, and only used in the first argument is an
745
+ # Integer. In that case, the index'th child Element that has the
746
+ # supplied name will be returned. Note again that the indexes start at 1.
747
+ # Returns:: the first matching Element, or nil if no child matched
748
+ # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
749
+ # doc.root.elements[1] #-> <b/>
750
+ # doc.root.elements['c'] #-> <c id="1"/>
751
+ # doc.root.elements[2,'c'] #-> <c id="2"/>
752
+ def []( index, name=nil)
753
+ if index.kind_of? Integer
754
+ raise "index (#{index}) must be >= 1" if index < 1
755
+ name = literalize(name) if name
756
+ num = 0
757
+ child = nil
758
+ @element.find { |child|
759
+ child.kind_of? Element and
760
+ (name.nil? ? true : child.has_name?( name )) and
761
+ (num += 1) == index
762
+ }
763
+ else
764
+ return XPath::first( @element, index )
765
+ #{ |element|
766
+ # return element if element.kind_of? Element
767
+ #}
768
+ #return nil
769
+ end
770
+ end
771
+
772
+ # Sets an element, replacing any previous matching element. If no
773
+ # existing element is found ,the element is added.
774
+ # index:: Used to find a matching element to replace. See []().
775
+ # element::
776
+ # The element to replace the existing element with
777
+ # the previous element
778
+ # Returns:: nil if no previous element was found.
779
+ #
780
+ # doc = Document.new '<a/>'
781
+ # doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
782
+ # doc.root.elements[1] #-> <b/>
783
+ # doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
784
+ # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
785
+ def []=( index, element )
786
+ previous = self[index]
787
+ if previous.nil?
788
+ @element.add element
789
+ else
790
+ previous.replace_with element
791
+ end
792
+ return previous
793
+ end
794
+
795
+ # Returns +true+ if there are no +Element+ children, +false+ otherwise
796
+ def empty?
797
+ @element.find{ |child| child.kind_of? Element}.nil?
798
+ end
799
+
800
+ # Returns the index of the supplied child (starting at 1), or -1 if
801
+ # the element is not a child
802
+ # element:: an +Element+ child
803
+ def index element
804
+ rv = 0
805
+ found = @element.find do |child|
806
+ child.kind_of? Element and
807
+ (rv += 1) and
808
+ child == element
809
+ end
810
+ return rv if found == element
811
+ return -1
812
+ end
813
+
814
+ # Deletes a child Element
815
+ # element::
816
+ # Either an Element, which is removed directly; an
817
+ # xpath, where the first matching child is removed; or an Integer,
818
+ # where the n'th Element is removed.
819
+ # Returns:: the removed child
820
+ # doc = Document.new '<a><b/><c/><c id="1"/></a>'
821
+ # b = doc.root.elements[1]
822
+ # doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
823
+ # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
824
+ # doc.root.elements.delete 1 #-> <a/>
825
+ def delete element
826
+ if element.kind_of? Element
827
+ @element.delete element
828
+ else
829
+ el = self[element]
830
+ el.remove if el
831
+ end
832
+ end
833
+
834
+ # Removes multiple elements. Filters for Element children, regardless of
835
+ # XPath matching.
836
+ # xpath:: all elements matching this String path are removed.
837
+ # Returns:: an Array of Elements that have been removed
838
+ # doc = Document.new '<a><c/><c/><c/><c/></a>'
839
+ # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
840
+ def delete_all( xpath )
841
+ rv = []
842
+ XPath::each( @element, xpath) {|element|
843
+ rv << element if element.kind_of? Element
844
+ }
845
+ rv.each do |element|
846
+ @element.delete element
847
+ element.remove
848
+ end
849
+ return rv
850
+ end
851
+
852
+ # Adds an element
853
+ # element::
854
+ # if supplied, is either an Element, String, or
855
+ # Source (see Element.initialize). If not supplied or nil, a
856
+ # new, default Element will be constructed
857
+ # Returns:: the added Element
858
+ # a = Element.new('a')
859
+ # a.elements.add(Element.new('b')) #-> <a><b/></a>
860
+ # a.elements.add('c') #-> <a><b/><c/></a>
861
+ def add element=nil
862
+ rv = nil
863
+ if element.nil?
864
+ Element.new("", self, @element.context)
865
+ elsif not element.kind_of?(Element)
866
+ Element.new(element, self, @element.context)
867
+ else
868
+ @element << element
869
+ element.context = @element.context
870
+ element
871
+ end
872
+ end
873
+
874
+ alias :<< :add
875
+
876
+ # Iterates through all of the child Elements, optionally filtering
877
+ # them by a given XPath
878
+ # xpath::
879
+ # optional. If supplied, this is a String XPath, and is used to
880
+ # filter the children, so that only matching children are yielded. Note
881
+ # that XPaths are automatically filtered for Elements, so that
882
+ # non-Element children will not be yielded
883
+ # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
884
+ # doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
885
+ # doc.root.each('b') {|e|p e} #-> Yields b, b elements
886
+ # doc.root.each('child::node()') {|e|p e}
887
+ # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
888
+ # XPath.each(doc.root, 'child::node()', &block)
889
+ # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
890
+ def each( xpath=nil, &block)
891
+ XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
892
+ end
893
+
894
+ def collect( xpath=nil, &block )
895
+ collection = []
896
+ XPath::each( @element, xpath ) {|e|
897
+ collection << yield(e) if e.kind_of?(Element)
898
+ }
899
+ collection
900
+ end
901
+
902
+ def inject( xpath=nil, initial=nil, &block )
903
+ first = true
904
+ XPath::each( @element, xpath ) {|e|
905
+ if (e.kind_of? Element)
906
+ if (first and initial == nil)
907
+ initial = e
908
+ first = false
909
+ else
910
+ initial = yield( initial, e ) if e.kind_of? Element
911
+ end
912
+ end
913
+ }
914
+ initial
915
+ end
916
+
917
+ # Returns the number of +Element+ children of the parent object.
918
+ # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
919
+ # doc.root.size #-> 6, 3 element and 3 text nodes
920
+ # doc.root.elements.size #-> 3
921
+ def size
922
+ count = 0
923
+ @element.each {|child| count+=1 if child.kind_of? Element }
924
+ count
925
+ end
926
+
927
+ # Returns an Array of Element children. An XPath may be supplied to
928
+ # filter the children. Only Element children are returned, even if the
929
+ # supplied XPath matches non-Element children.
930
+ # doc = Document.new '<a>sean<b/>elliott<c/></a>'
931
+ # doc.root.elements.to_a #-> [ <b/>, <c/> ]
932
+ # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
933
+ # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
934
+ def to_a( xpath=nil )
935
+ rv = XPath.match( @element, xpath )
936
+ return rv.find_all{|e| e.kind_of? Element} if xpath
937
+ rv
938
+ end
939
+
940
+ private
941
+ # Private helper class. Removes quotes from quoted strings
942
+ def literalize name
943
+ name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
944
+ name
945
+ end
946
+ end
947
+
948
+ ########################################################################
949
+ # ATTRIBUTES #
950
+ ########################################################################
951
+
952
+ # A class that defines the set of Attributes of an Element and provides
953
+ # operations for accessing elements in that set.
954
+ class Attributes < Hash
955
+ # Constructor
956
+ # element:: the Element of which this is an Attribute
957
+ def initialize element
958
+ @element = element
959
+ end
960
+
961
+ # Fetches an attribute value. If you want to get the Attribute itself,
962
+ # use get_attribute()
963
+ # name:: an XPath attribute name. Namespaces are relevant here.
964
+ # Returns::
965
+ # the String value of the matching attribute, or +nil+ if no
966
+ # matching attribute was found. This is the unnormalized value
967
+ # (with entities expanded).
968
+ #
969
+ # doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
970
+ # doc.root.attributes['att'] #-> '<'
971
+ # doc.root.attributes['bar:att'] #-> '2'
972
+ def [](name)
973
+ attr = get_attribute(name)
974
+ return attr.value unless attr.nil?
975
+ return nil
976
+ end
977
+
978
+ def to_a
979
+ values.flatten
980
+ end
981
+
982
+ # Returns the number of attributes the owning Element contains.
983
+ # doc = Document "<a x='1' y='2' foo:x='3'/>"
984
+ # doc.root.attributes.length #-> 3
985
+ def length
986
+ c = 0
987
+ each_attribute { c+=1 }
988
+ c
989
+ end
990
+ alias :size :length
991
+
992
+ # Iterates over the attributes of an Element. Yields actual Attribute
993
+ # nodes, not String values.
994
+ #
995
+ # doc = Document.new '<a x="1" y="2"/>'
996
+ # doc.root.attributes.each_attribute {|attr|
997
+ # p attr.expanded_name+" => "+attr.value
998
+ # }
999
+ def each_attribute # :yields: attribute
1000
+ each_value do |val|
1001
+ if val.kind_of? Attribute
1002
+ yield val
1003
+ else
1004
+ val.each_value { |atr| yield atr }
1005
+ end
1006
+ end
1007
+ end
1008
+
1009
+ # Iterates over each attribute of an Element, yielding the expanded name
1010
+ # and value as a pair of Strings.
1011
+ #
1012
+ # doc = Document.new '<a x="1" y="2"/>'
1013
+ # doc.root.attributes.each {|name, value| p name+" => "+value }
1014
+ def each
1015
+ each_attribute do |attr|
1016
+ yield attr.expanded_name, attr.value
1017
+ end
1018
+ end
1019
+
1020
+ # Fetches an attribute
1021
+ # name::
1022
+ # the name by which to search for the attribute. Can be a
1023
+ # <tt>prefix:name</tt> namespace name.
1024
+ # Returns:: The first matching attribute, or nil if there was none. This
1025
+ # value is an Attribute node, not the String value of the attribute.
1026
+ # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
1027
+ # doc.root.attributes.get_attribute("foo").value #-> "2"
1028
+ # doc.root.attributes.get_attribute("x:foo").value #-> "1"
1029
+ def get_attribute( name )
1030
+ attr = fetch( name, nil )
1031
+ if attr.nil?
1032
+ return nil if name.nil?
1033
+ # Look for prefix
1034
+ name =~ Namespace::NAMESPLIT
1035
+ prefix, n = $1, $2
1036
+ if prefix
1037
+ attr = fetch( n, nil )
1038
+ # check prefix
1039
+ if attr == nil
1040
+ elsif attr.kind_of? Attribute
1041
+ return attr if prefix == attr.prefix
1042
+ else
1043
+ attr = attr[ prefix ]
1044
+ return attr
1045
+ end
1046
+ end
1047
+ element_document = @element.document
1048
+ if element_document and element_document.doctype
1049
+ expn = @element.expanded_name
1050
+ expn = element_document.doctype.name if expn.size == 0
1051
+ attr_val = element_document.doctype.attribute_of(expn, name)
1052
+ return Attribute.new( name, attr_val ) if attr_val
1053
+ end
1054
+ return nil
1055
+ end
1056
+ if attr.kind_of? Hash
1057
+ attr = attr[ @element.prefix ]
1058
+ end
1059
+ return attr
1060
+ end
1061
+
1062
+ # Sets an attribute, overwriting any existing attribute value by the
1063
+ # same name. Namespace is significant.
1064
+ # name:: the name of the attribute
1065
+ # value::
1066
+ # (optional) If supplied, the value of the attribute. If
1067
+ # nil, any existing matching attribute is deleted.
1068
+ # Returns::
1069
+ # Owning element
1070
+ # doc = Document.new "<a x:foo='1' foo='3'/>"
1071
+ # doc.root.attributes['y:foo'] = '2'
1072
+ # doc.root.attributes['foo'] = '4'
1073
+ # doc.root.attributes['x:foo'] = nil
1074
+ def []=( name, value )
1075
+ if value.nil? # Delete the named attribute
1076
+ attr = get_attribute(name)
1077
+ delete attr
1078
+ return
1079
+ end
1080
+ element_document = @element.document
1081
+ unless value.kind_of? Attribute
1082
+ if @element.document and @element.document.doctype
1083
+ value = Text::normalize( value, @element.document.doctype )
1084
+ else
1085
+ value = Text::normalize( value, nil )
1086
+ end
1087
+ value = Attribute.new(name, value)
1088
+ end
1089
+ value.element = @element
1090
+ old_attr = fetch(value.name, nil)
1091
+ if old_attr.nil?
1092
+ store(value.name, value)
1093
+ elsif old_attr.kind_of? Hash
1094
+ old_attr[value.prefix] = value
1095
+ elsif old_attr.prefix != value.prefix
1096
+ # Check for conflicting namespaces
1097
+ raise ParseException.new(
1098
+ "Namespace conflict in adding attribute \"#{value.name}\": "+
1099
+ "Prefix \"#{old_attr.prefix}\" = "+
1100
+ "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
1101
+ "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
1102
+ value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
1103
+ @element.namespace( old_attr.prefix ) ==
1104
+ @element.namespace( value.prefix )
1105
+ store value.name, { old_attr.prefix => old_attr,
1106
+ value.prefix => value }
1107
+ else
1108
+ store value.name, value
1109
+ end
1110
+ return @element
1111
+ end
1112
+
1113
+ # Returns an array of Strings containing all of the prefixes declared
1114
+ # by this set of # attributes. The array does not include the default
1115
+ # namespace declaration, if one exists.
1116
+ # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
1117
+ # "z='glorp' p:k='gru'/>")
1118
+ # prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
1119
+ def prefixes
1120
+ ns = []
1121
+ each_attribute do |attribute|
1122
+ ns << attribute.name if attribute.prefix == 'xmlns'
1123
+ end
1124
+ if @element.document and @element.document.doctype
1125
+ expn = @element.expanded_name
1126
+ expn = @element.document.doctype.name if expn.size == 0
1127
+ @element.document.doctype.attributes_of(expn).each {
1128
+ |attribute|
1129
+ ns << attribute.name if attribute.prefix == 'xmlns'
1130
+ }
1131
+ end
1132
+ ns
1133
+ end
1134
+
1135
+ def namespaces
1136
+ namespaces = {}
1137
+ each_attribute do |attribute|
1138
+ namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1139
+ end
1140
+ if @element.document and @element.document.doctype
1141
+ expn = @element.expanded_name
1142
+ expn = @element.document.doctype.name if expn.size == 0
1143
+ @element.document.doctype.attributes_of(expn).each {
1144
+ |attribute|
1145
+ namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1146
+ }
1147
+ end
1148
+ namespaces
1149
+ end
1150
+
1151
+ # Removes an attribute
1152
+ # attribute::
1153
+ # either a String, which is the name of the attribute to remove --
1154
+ # namespaces are significant here -- or the attribute to remove.
1155
+ # Returns:: the owning element
1156
+ # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
1157
+ # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
1158
+ # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
1159
+ # attr = doc.root.attributes.get_attribute('y:foo')
1160
+ # doc.root.attributes.delete attr #-> <a z:foo='4'/>"
1161
+ def delete( attribute )
1162
+ name = nil
1163
+ prefix = nil
1164
+ if attribute.kind_of? Attribute
1165
+ name = attribute.name
1166
+ prefix = attribute.prefix
1167
+ else
1168
+ attribute =~ Namespace::NAMESPLIT
1169
+ prefix, name = $1, $2
1170
+ prefix = '' unless prefix
1171
+ end
1172
+ old = fetch(name, nil)
1173
+ attr = nil
1174
+ if old.kind_of? Hash # the supplied attribute is one of many
1175
+ attr = old.delete(prefix)
1176
+ if old.size == 1
1177
+ repl = nil
1178
+ old.each_value{|v| repl = v}
1179
+ store name, repl
1180
+ end
1181
+ elsif old.nil?
1182
+ return @element
1183
+ else # the supplied attribute is a top-level one
1184
+ attr = old
1185
+ res = super(name)
1186
+ end
1187
+ @element
1188
+ end
1189
+
1190
+ # Adds an attribute, overriding any existing attribute by the
1191
+ # same name. Namespaces are significant.
1192
+ # attribute:: An Attribute
1193
+ def add( attribute )
1194
+ self[attribute.name] = attribute
1195
+ end
1196
+
1197
+ alias :<< :add
1198
+
1199
+ # Deletes all attributes matching a name. Namespaces are significant.
1200
+ # name::
1201
+ # A String; all attributes that match this path will be removed
1202
+ # Returns:: an Array of the Attributes that were removed
1203
+ def delete_all( name )
1204
+ rv = []
1205
+ each_attribute { |attribute|
1206
+ rv << attribute if attribute.expanded_name == name
1207
+ }
1208
+ rv.each{ |attr| attr.remove }
1209
+ return rv
1210
+ end
1211
+
1212
+ # The +get_attribute_ns+ method retrieves a method by its namespace
1213
+ # and name. Thus it is possible to reliably identify an attribute
1214
+ # even if an XML processor has changed the prefix.
1215
+ #
1216
+ # Method contributed by Henrik Martensson
1217
+ def get_attribute_ns(namespace, name)
1218
+ each_attribute() { |attribute|
1219
+ if name == attribute.name &&
1220
+ namespace == attribute.namespace()
1221
+ return attribute
1222
+ end
1223
+ }
1224
+ nil
1225
+ end
1226
+ end
1227
+ end