nokogiri-maglev- 1.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. data/.autotest +26 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +544 -0
  4. data/CHANGELOG.rdoc +532 -0
  5. data/Manifest.txt +283 -0
  6. data/README.ja.rdoc +106 -0
  7. data/README.rdoc +174 -0
  8. data/Rakefile +171 -0
  9. data/bin/nokogiri +53 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +124 -0
  12. data/ext/nokogiri/html_document.c +154 -0
  13. data/ext/nokogiri/html_document.h +10 -0
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +32 -0
  17. data/ext/nokogiri/html_entity_lookup.h +8 -0
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/nokogiri.c +115 -0
  21. data/ext/nokogiri/nokogiri.h +160 -0
  22. data/ext/nokogiri/st.c +576 -0
  23. data/ext/nokogiri/xml_attr.c +94 -0
  24. data/ext/nokogiri/xml_attr.h +9 -0
  25. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  26. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  27. data/ext/nokogiri/xml_cdata.c +56 -0
  28. data/ext/nokogiri/xml_cdata.h +9 -0
  29. data/ext/nokogiri/xml_comment.c +54 -0
  30. data/ext/nokogiri/xml_comment.h +9 -0
  31. data/ext/nokogiri/xml_document.c +478 -0
  32. data/ext/nokogiri/xml_document.h +23 -0
  33. data/ext/nokogiri/xml_document_fragment.c +48 -0
  34. data/ext/nokogiri/xml_document_fragment.h +10 -0
  35. data/ext/nokogiri/xml_dtd.c +202 -0
  36. data/ext/nokogiri/xml_dtd.h +10 -0
  37. data/ext/nokogiri/xml_element_content.c +123 -0
  38. data/ext/nokogiri/xml_element_content.h +10 -0
  39. data/ext/nokogiri/xml_element_decl.c +69 -0
  40. data/ext/nokogiri/xml_element_decl.h +9 -0
  41. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  42. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  43. data/ext/nokogiri/xml_entity_decl.c +110 -0
  44. data/ext/nokogiri/xml_entity_decl.h +10 -0
  45. data/ext/nokogiri/xml_entity_reference.c +52 -0
  46. data/ext/nokogiri/xml_entity_reference.h +9 -0
  47. data/ext/nokogiri/xml_io.c +56 -0
  48. data/ext/nokogiri/xml_io.h +11 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  50. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  51. data/ext/nokogiri/xml_namespace.c +84 -0
  52. data/ext/nokogiri/xml_namespace.h +13 -0
  53. data/ext/nokogiri/xml_node.c +1397 -0
  54. data/ext/nokogiri/xml_node.h +13 -0
  55. data/ext/nokogiri/xml_node_set.c +418 -0
  56. data/ext/nokogiri/xml_node_set.h +9 -0
  57. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  58. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  59. data/ext/nokogiri/xml_reader.c +684 -0
  60. data/ext/nokogiri/xml_reader.h +10 -0
  61. data/ext/nokogiri/xml_relax_ng.c +162 -0
  62. data/ext/nokogiri/xml_relax_ng.h +9 -0
  63. data/ext/nokogiri/xml_sax_parser.c +293 -0
  64. data/ext/nokogiri/xml_sax_parser.h +39 -0
  65. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  66. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  67. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  68. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  69. data/ext/nokogiri/xml_schema.c +205 -0
  70. data/ext/nokogiri/xml_schema.h +9 -0
  71. data/ext/nokogiri/xml_syntax_error.c +58 -0
  72. data/ext/nokogiri/xml_syntax_error.h +13 -0
  73. data/ext/nokogiri/xml_text.c +50 -0
  74. data/ext/nokogiri/xml_text.h +9 -0
  75. data/ext/nokogiri/xml_xpath_context.c +315 -0
  76. data/ext/nokogiri/xml_xpath_context.h +9 -0
  77. data/ext/nokogiri/xslt_stylesheet.c +265 -0
  78. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  79. data/lib/nokogiri.rb +127 -0
  80. data/lib/nokogiri/css.rb +27 -0
  81. data/lib/nokogiri/css/node.rb +99 -0
  82. data/lib/nokogiri/css/parser.rb +677 -0
  83. data/lib/nokogiri/css/parser.y +237 -0
  84. data/lib/nokogiri/css/parser_extras.rb +91 -0
  85. data/lib/nokogiri/css/syntax_error.rb +7 -0
  86. data/lib/nokogiri/css/tokenizer.rb +152 -0
  87. data/lib/nokogiri/css/tokenizer.rex +55 -0
  88. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  89. data/lib/nokogiri/decorators/slop.rb +35 -0
  90. data/lib/nokogiri/html.rb +36 -0
  91. data/lib/nokogiri/html/builder.rb +35 -0
  92. data/lib/nokogiri/html/document.rb +213 -0
  93. data/lib/nokogiri/html/document_fragment.rb +41 -0
  94. data/lib/nokogiri/html/element_description.rb +23 -0
  95. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  96. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  97. data/lib/nokogiri/html/sax/parser.rb +52 -0
  98. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  99. data/lib/nokogiri/syntax_error.rb +4 -0
  100. data/lib/nokogiri/version.rb +88 -0
  101. data/lib/nokogiri/xml.rb +67 -0
  102. data/lib/nokogiri/xml/attr.rb +14 -0
  103. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  104. data/lib/nokogiri/xml/builder.rb +426 -0
  105. data/lib/nokogiri/xml/cdata.rb +11 -0
  106. data/lib/nokogiri/xml/character_data.rb +7 -0
  107. data/lib/nokogiri/xml/document.rb +234 -0
  108. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  109. data/lib/nokogiri/xml/dtd.rb +22 -0
  110. data/lib/nokogiri/xml/element_content.rb +36 -0
  111. data/lib/nokogiri/xml/element_decl.rb +13 -0
  112. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  113. data/lib/nokogiri/xml/namespace.rb +13 -0
  114. data/lib/nokogiri/xml/node.rb +915 -0
  115. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  116. data/lib/nokogiri/xml/node_set.rb +357 -0
  117. data/lib/nokogiri/xml/notation.rb +6 -0
  118. data/lib/nokogiri/xml/parse_options.rb +93 -0
  119. data/lib/nokogiri/xml/pp.rb +2 -0
  120. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  121. data/lib/nokogiri/xml/pp/node.rb +56 -0
  122. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  123. data/lib/nokogiri/xml/reader.rb +112 -0
  124. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  125. data/lib/nokogiri/xml/sax.rb +4 -0
  126. data/lib/nokogiri/xml/sax/document.rb +164 -0
  127. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  128. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  129. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  130. data/lib/nokogiri/xml/schema.rb +63 -0
  131. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  132. data/lib/nokogiri/xml/text.rb +9 -0
  133. data/lib/nokogiri/xml/xpath.rb +10 -0
  134. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  135. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  136. data/lib/nokogiri/xslt.rb +52 -0
  137. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  138. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  139. data/nokogiri_help_responses.md +40 -0
  140. data/tasks/cross_compile.rb +152 -0
  141. data/tasks/nokogiri.org.rb +18 -0
  142. data/tasks/test.rb +94 -0
  143. data/test/css/test_nthiness.rb +159 -0
  144. data/test/css/test_parser.rb +303 -0
  145. data/test/css/test_tokenizer.rb +198 -0
  146. data/test/css/test_xpath_visitor.rb +85 -0
  147. data/test/decorators/test_slop.rb +16 -0
  148. data/test/files/2ch.html +108 -0
  149. data/test/files/address_book.rlx +12 -0
  150. data/test/files/address_book.xml +10 -0
  151. data/test/files/bar/bar.xsd +4 -0
  152. data/test/files/dont_hurt_em_why.xml +422 -0
  153. data/test/files/encoding.html +82 -0
  154. data/test/files/encoding.xhtml +84 -0
  155. data/test/files/exslt.xml +8 -0
  156. data/test/files/exslt.xslt +35 -0
  157. data/test/files/foo/foo.xsd +4 -0
  158. data/test/files/metacharset.html +10 -0
  159. data/test/files/noencoding.html +47 -0
  160. data/test/files/po.xml +32 -0
  161. data/test/files/po.xsd +66 -0
  162. data/test/files/shift_jis.html +10 -0
  163. data/test/files/shift_jis.xml +5 -0
  164. data/test/files/snuggles.xml +3 -0
  165. data/test/files/staff.dtd +10 -0
  166. data/test/files/staff.xml +59 -0
  167. data/test/files/staff.xslt +32 -0
  168. data/test/files/tlm.html +850 -0
  169. data/test/files/valid_bar.xml +2 -0
  170. data/test/helper.rb +173 -0
  171. data/test/html/sax/test_parser.rb +139 -0
  172. data/test/html/sax/test_parser_context.rb +48 -0
  173. data/test/html/test_builder.rb +165 -0
  174. data/test/html/test_document.rb +472 -0
  175. data/test/html/test_document_encoding.rb +138 -0
  176. data/test/html/test_document_fragment.rb +255 -0
  177. data/test/html/test_element_description.rb +101 -0
  178. data/test/html/test_named_characters.rb +14 -0
  179. data/test/html/test_node.rb +193 -0
  180. data/test/html/test_node_encoding.rb +27 -0
  181. data/test/test_convert_xpath.rb +135 -0
  182. data/test/test_css_cache.rb +45 -0
  183. data/test/test_encoding_handler.rb +46 -0
  184. data/test/test_memory_leak.rb +72 -0
  185. data/test/test_nokogiri.rb +133 -0
  186. data/test/test_reader.rb +425 -0
  187. data/test/test_soap4r_sax.rb +52 -0
  188. data/test/test_xslt_transforms.rb +193 -0
  189. data/test/xml/node/test_save_options.rb +28 -0
  190. data/test/xml/node/test_subclass.rb +44 -0
  191. data/test/xml/sax/test_parser.rb +338 -0
  192. data/test/xml/sax/test_parser_context.rb +113 -0
  193. data/test/xml/sax/test_push_parser.rb +156 -0
  194. data/test/xml/test_attr.rb +65 -0
  195. data/test/xml/test_attribute_decl.rb +86 -0
  196. data/test/xml/test_builder.rb +227 -0
  197. data/test/xml/test_cdata.rb +50 -0
  198. data/test/xml/test_comment.rb +29 -0
  199. data/test/xml/test_document.rb +697 -0
  200. data/test/xml/test_document_encoding.rb +26 -0
  201. data/test/xml/test_document_fragment.rb +192 -0
  202. data/test/xml/test_dtd.rb +107 -0
  203. data/test/xml/test_dtd_encoding.rb +33 -0
  204. data/test/xml/test_element_content.rb +56 -0
  205. data/test/xml/test_element_decl.rb +73 -0
  206. data/test/xml/test_entity_decl.rb +122 -0
  207. data/test/xml/test_entity_reference.rb +21 -0
  208. data/test/xml/test_namespace.rb +70 -0
  209. data/test/xml/test_node.rb +917 -0
  210. data/test/xml/test_node_attributes.rb +34 -0
  211. data/test/xml/test_node_encoding.rb +107 -0
  212. data/test/xml/test_node_reparenting.rb +334 -0
  213. data/test/xml/test_node_set.rb +742 -0
  214. data/test/xml/test_parse_options.rb +52 -0
  215. data/test/xml/test_processing_instruction.rb +30 -0
  216. data/test/xml/test_reader_encoding.rb +126 -0
  217. data/test/xml/test_relax_ng.rb +60 -0
  218. data/test/xml/test_schema.rb +94 -0
  219. data/test/xml/test_syntax_error.rb +12 -0
  220. data/test/xml/test_text.rb +47 -0
  221. data/test/xml/test_unparented_node.rb +381 -0
  222. data/test/xml/test_xpath.rb +237 -0
  223. data/test/xslt/test_custom_functions.rb +94 -0
  224. data/test/xslt/test_exception_handling.rb +37 -0
  225. metadata +548 -0
@@ -0,0 +1,13 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Namespace
4
+ include Nokogiri::XML::PP::Node
5
+ attr_reader :document
6
+
7
+ private
8
+ def inspect_attributes
9
+ [:prefix, :href]
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,915 @@
1
+ require 'stringio'
2
+ require 'nokogiri/xml/node/save_options'
3
+
4
+ module Nokogiri
5
+ module XML
6
+ ####
7
+ # Nokogiri::XML::Node is your window to the fun filled world of dealing
8
+ # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
9
+ # to a hash with regard to attributes. For example (from irb):
10
+ #
11
+ # irb(main):004:0> node
12
+ # => <a href="#foo" id="link">link</a>
13
+ # irb(main):005:0> node['href']
14
+ # => "#foo"
15
+ # irb(main):006:0> node.keys
16
+ # => ["href", "id"]
17
+ # irb(main):007:0> node.values
18
+ # => ["#foo", "link"]
19
+ # irb(main):008:0> node['class'] = 'green'
20
+ # => "green"
21
+ # irb(main):009:0> node
22
+ # => <a href="#foo" id="link" class="green">link</a>
23
+ # irb(main):010:0>
24
+ #
25
+ # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
26
+ #
27
+ # Nokogiri::XML::Node also has methods that let you move around your
28
+ # tree. For navigating your tree, see:
29
+ #
30
+ # * Nokogiri::XML::Node#parent
31
+ # * Nokogiri::XML::Node#children
32
+ # * Nokogiri::XML::Node#next
33
+ # * Nokogiri::XML::Node#previous
34
+ #
35
+ # You may search this node's subtree using Node#xpath and Node#css
36
+ class Node
37
+ include Nokogiri::XML::PP::Node
38
+ include Enumerable
39
+
40
+ # Element node type, see Nokogiri::XML::Node#element?
41
+ ELEMENT_NODE = 1
42
+ # Attribute node type
43
+ ATTRIBUTE_NODE = 2
44
+ # Text node type, see Nokogiri::XML::Node#text?
45
+ TEXT_NODE = 3
46
+ # CDATA node type, see Nokogiri::XML::Node#cdata?
47
+ CDATA_SECTION_NODE = 4
48
+ # Entity reference node type
49
+ ENTITY_REF_NODE = 5
50
+ # Entity node type
51
+ ENTITY_NODE = 6
52
+ # PI node type
53
+ PI_NODE = 7
54
+ # Comment node type, see Nokogiri::XML::Node#comment?
55
+ COMMENT_NODE = 8
56
+ # Document node type, see Nokogiri::XML::Node#xml?
57
+ DOCUMENT_NODE = 9
58
+ # Document type node type
59
+ DOCUMENT_TYPE_NODE = 10
60
+ # Document fragment node type
61
+ DOCUMENT_FRAG_NODE = 11
62
+ # Notation node type
63
+ NOTATION_NODE = 12
64
+ # HTML document node type, see Nokogiri::XML::Node#html?
65
+ HTML_DOCUMENT_NODE = 13
66
+ # DTD node type
67
+ DTD_NODE = 14
68
+ # Element declaration type
69
+ ELEMENT_DECL = 15
70
+ # Attribute declaration type
71
+ ATTRIBUTE_DECL = 16
72
+ # Entity declaration type
73
+ ENTITY_DECL = 17
74
+ # Namespace declaration type
75
+ NAMESPACE_DECL = 18
76
+ # XInclude start type
77
+ XINCLUDE_START = 19
78
+ # XInclude end type
79
+ XINCLUDE_END = 20
80
+ # DOCB document node type
81
+ DOCB_DOCUMENT_NODE = 21
82
+
83
+ def initialize name, document # :nodoc:
84
+ # ... Ya. This is empty on purpose.
85
+ end
86
+
87
+ ###
88
+ # Decorate this node with the decorators set up in this node's Document
89
+ def decorate!
90
+ document.decorate(self)
91
+ end
92
+
93
+ ###
94
+ # Search this node for +paths+. +paths+ can be XPath or CSS, and an
95
+ # optional hash of namespaces may be appended.
96
+ # See Node#xpath and Node#css.
97
+ def search *paths
98
+ # TODO use paths, handler, ns, binds = extract_params(paths)
99
+ ns = paths.last.is_a?(Hash) ? paths.pop :
100
+ (document.root ? document.root.namespaces : {})
101
+
102
+ prefix = "#{implied_xpath_context}/"
103
+
104
+ xpath(*(paths.map { |path|
105
+ path = path.to_s
106
+ path =~ /^(\.\/|\/|\.\.)/ ? path : CSS.xpath_for(
107
+ path,
108
+ :prefix => prefix,
109
+ :ns => ns
110
+ )
111
+ }.flatten.uniq) + [ns])
112
+ end
113
+ alias :/ :search
114
+
115
+ ###
116
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
+ #
118
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
119
+ # queries.
120
+ #
121
+ # node.xpath('.//title')
122
+ #
123
+ # A hash of namespace bindings may be appended. For example:
124
+ #
125
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
126
+ # node.xpath('.//xmlns:name', node.root.namespaces)
127
+ #
128
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
+ #
130
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
+ #
132
+ # Custom XPath functions may also be defined. To define custom
133
+ # functions create a class and implement the function you want
134
+ # to define. The first argument to the method will be the
135
+ # current matching NodeSet. Any other arguments are ones that
136
+ # you pass in. Note that this class may appear anywhere in the
137
+ # argument list. For example:
138
+ #
139
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
140
+ # def regex node_set, regex
141
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
142
+ # end
143
+ # }.new)
144
+ #
145
+ def xpath *paths
146
+ return NodeSet.new(document) unless document
147
+
148
+ paths, handler, ns, binds = extract_params(paths)
149
+
150
+ sets = paths.map { |path|
151
+ ctx = XPathContext.new(self)
152
+ ctx.register_namespaces(ns)
153
+ path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
154
+
155
+ binds.each do |key,value|
156
+ ctx.register_variable key.to_s, value
157
+ end if binds
158
+
159
+ ctx.evaluate(path, handler)
160
+ }
161
+ return sets.first if sets.length == 1
162
+
163
+ NodeSet.new(document) do |combined|
164
+ sets.each do |set|
165
+ set.each do |node|
166
+ combined << node
167
+ end
168
+ end
169
+ end
170
+ end
171
+
172
+ ###
173
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
174
+ #
175
+ # Search this node for CSS +rules+. +rules+ must be one or more CSS
176
+ # selectors. For example:
177
+ #
178
+ # node.css('title')
179
+ # node.css('body h1.bold')
180
+ # node.css('div + p.green', 'div#one')
181
+ #
182
+ # A hash of namespace bindings may be appended. For example:
183
+ #
184
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
185
+ #
186
+ # Custom CSS pseudo classes may also be defined. To define
187
+ # custom pseudo classes, create a class and implement the custom
188
+ # pseudo class you want defined. The first argument to the
189
+ # method will be the current matching NodeSet. Any other
190
+ # arguments are ones that you pass in. For example:
191
+ #
192
+ # node.css('title:regex("\w+")', Class.new {
193
+ # def regex node_set, regex
194
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
195
+ # end
196
+ # }.new)
197
+ #
198
+ # Note that the CSS query string is case-sensitive with regards
199
+ # to your document type. That is, if you're looking for "H1" in
200
+ # an HTML document, you'll never find anything, since HTML tags
201
+ # will match only lowercase CSS queries. However, "H1" might be
202
+ # found in an XML document, where tags names are case-sensitive
203
+ # (e.g., "H1" is distinct from "h1").
204
+ #
205
+ def css *rules
206
+ rules, handler, ns, binds = extract_params(rules)
207
+
208
+ prefix = "#{implied_xpath_context}/"
209
+
210
+ rules = rules.map { |rule|
211
+ CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
212
+ }.flatten.uniq + [ns, handler, binds].compact
213
+
214
+ xpath(*rules)
215
+ end
216
+
217
+ ###
218
+ # Search this node's immediate children using CSS selector +selector+
219
+ def > selector
220
+ ns = document.root.namespaces
221
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
222
+ end
223
+
224
+ ###
225
+ # Search for the first occurrence of +path+.
226
+ #
227
+ # Returns nil if nothing is found, otherwise a Node.
228
+ def at path, ns = document.root ? document.root.namespaces : {}
229
+ search(path, ns).first
230
+ end
231
+ alias :% :at
232
+
233
+ ##
234
+ # Search this node for the first occurrence of XPath +paths+.
235
+ # Equivalent to <tt>xpath(paths).first</tt>
236
+ # See Node#xpath for more information.
237
+ #
238
+ def at_xpath *paths
239
+ xpath(*paths).first
240
+ end
241
+
242
+ ##
243
+ # Search this node for the first occurrence of CSS +rules+.
244
+ # Equivalent to <tt>css(rules).first</tt>
245
+ # See Node#css for more information.
246
+ #
247
+ def at_css *rules
248
+ css(*rules).first
249
+ end
250
+
251
+ ###
252
+ # Get the attribute value for the attribute +name+
253
+ def [] name
254
+ return nil unless key?(name.to_s)
255
+ get(name.to_s)
256
+ end
257
+
258
+ ###
259
+ # Add +node_or_tags+ as a child of this Node.
260
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
261
+ #
262
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
263
+ #
264
+ # Also see related method +<<+.
265
+ def add_child node_or_tags
266
+ node_or_tags = coerce(node_or_tags)
267
+ if node_or_tags.is_a?(XML::NodeSet)
268
+ node_or_tags.each { |n| add_child_node n }
269
+ else
270
+ add_child_node node_or_tags
271
+ end
272
+ node_or_tags
273
+ end
274
+
275
+ ###
276
+ # Add +node_or_tags+ as a child of this Node.
277
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
278
+ #
279
+ # Returns self, to support chaining of calls (e.g., root << child1 << child2)
280
+ #
281
+ # Also see related method +add_child+.
282
+ def << node_or_tags
283
+ add_child node_or_tags
284
+ self
285
+ end
286
+ ###
287
+ # Insert +node_or_tags+ before this Node (as a sibling).
288
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
289
+ #
290
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
291
+ #
292
+ # Also see related method +before+.
293
+ def add_previous_sibling node_or_tags
294
+ node_or_tags = coerce(node_or_tags)
295
+ if node_or_tags.is_a?(XML::NodeSet)
296
+ if text?
297
+ pivot = Nokogiri::XML::Node.new 'dummy', document
298
+ add_previous_sibling_node pivot
299
+ else
300
+ pivot = self
301
+ end
302
+ node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
303
+ pivot.unlink if text?
304
+ else
305
+ add_previous_sibling_node node_or_tags
306
+ end
307
+ node_or_tags
308
+ end
309
+
310
+ ###
311
+ # Insert +node_or_tags+ after this Node (as a sibling).
312
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
313
+ #
314
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
315
+ #
316
+ # Also see related method +after+.
317
+ def add_next_sibling node_or_tags
318
+ node_or_tags = coerce(node_or_tags)
319
+ if node_or_tags.is_a?(XML::NodeSet)
320
+ if text?
321
+ pivot = Nokogiri::XML::Node.new 'dummy', document
322
+ add_next_sibling_node pivot
323
+ else
324
+ pivot = self
325
+ end
326
+ node_or_tags.reverse_each { |n| pivot.send :add_next_sibling_node, n }
327
+ pivot.unlink if text?
328
+ else
329
+ add_next_sibling_node node_or_tags
330
+ end
331
+ node_or_tags
332
+ end
333
+
334
+ ####
335
+ # Insert +node_or_tags+ before this node (as a sibling).
336
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
337
+ #
338
+ # Returns self, to support chaining of calls.
339
+ #
340
+ # Also see related method +add_previous_sibling+.
341
+ def before node_or_tags
342
+ add_previous_sibling node_or_tags
343
+ self
344
+ end
345
+
346
+ ####
347
+ # Insert +node_or_tags+ after this node (as a sibling).
348
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
349
+ #
350
+ # Returns self, to support chaining of calls.
351
+ #
352
+ # Also see related method +add_next_sibling+.
353
+ def after node_or_tags
354
+ add_next_sibling node_or_tags
355
+ self
356
+ end
357
+
358
+ ####
359
+ # Set the inner html for this Node to +node_or_tags+
360
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
361
+ #
362
+ # Returns self.
363
+ #
364
+ # Also see related method +children=+
365
+ def inner_html= node_or_tags
366
+ self.children = node_or_tags
367
+ self
368
+ end
369
+
370
+ ####
371
+ # Set the inner html for this Node +node_or_tags+
372
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
373
+ #
374
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
375
+ #
376
+ # Also see related method +inner_html=+
377
+ def children= node_or_tags
378
+ node_or_tags = coerce(node_or_tags)
379
+ children.unlink
380
+ if node_or_tags.is_a?(XML::NodeSet)
381
+ node_or_tags.each { |n| add_child_node n }
382
+ else
383
+ add_child_node node_or_tags
384
+ end
385
+ node_or_tags
386
+ end
387
+
388
+ ####
389
+ # Replace this Node with +node_or_tags+.
390
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
391
+ #
392
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
393
+ #
394
+ # Also see related method +swap+.
395
+ def replace node_or_tags
396
+ node_or_tags = coerce(node_or_tags)
397
+ if node_or_tags.is_a?(XML::NodeSet)
398
+ if text?
399
+ replacee = Nokogiri::XML::Node.new 'dummy', document
400
+ add_previous_sibling_node replacee
401
+ unlink
402
+ else
403
+ replacee = self
404
+ end
405
+ node_or_tags.each { |n| replacee.add_previous_sibling n }
406
+ replacee.unlink
407
+ else
408
+ replace_node node_or_tags
409
+ end
410
+ node_or_tags
411
+ end
412
+
413
+ ####
414
+ # Swap this Node for +node_or_tags+
415
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
416
+ #
417
+ # Returns self, to support chaining of calls.
418
+ #
419
+ # Also see related method +replace+.
420
+ def swap node_or_tags
421
+ replace node_or_tags
422
+ self
423
+ end
424
+
425
+ alias :next :next_sibling
426
+ alias :previous :previous_sibling
427
+
428
+ # :stopdoc:
429
+ # HACK: This is to work around an RDoc bug
430
+ alias :next= :add_next_sibling
431
+ # :startdoc:
432
+
433
+ alias :previous= :add_previous_sibling
434
+ alias :remove :unlink
435
+ alias :get_attribute :[]
436
+ alias :attr :[]
437
+ alias :set_attribute :[]=
438
+ alias :text :content
439
+ alias :inner_text :content
440
+ alias :has_attribute? :key?
441
+ alias :name :node_name
442
+ alias :name= :node_name=
443
+ alias :type :node_type
444
+ alias :to_str :text
445
+ alias :clone :dup
446
+ alias :elements :element_children
447
+
448
+ ####
449
+ # Returns a hash containing the node's attributes. The key is
450
+ # the attribute name without any namespace, the value is a Nokogiri::XML::Attr
451
+ # representing the attribute.
452
+ # If you need to distinguish attributes with the same name, with different namespaces
453
+ # use #attribute_nodes instead.
454
+ def attributes
455
+ Hash[*(attribute_nodes.map { |node|
456
+ [node.node_name, node]
457
+ }.flatten)]
458
+ end
459
+
460
+ ###
461
+ # Get the attribute values for this Node.
462
+ def values
463
+ attribute_nodes.map { |node| node.value }
464
+ end
465
+
466
+ ###
467
+ # Get the attribute names for this Node.
468
+ def keys
469
+ attribute_nodes.map { |node| node.node_name }
470
+ end
471
+
472
+ ###
473
+ # Iterate over each attribute name and value pair for this Node.
474
+ def each &block
475
+ attribute_nodes.each { |node|
476
+ block.call([node.node_name, node.value])
477
+ }
478
+ end
479
+
480
+ ###
481
+ # Remove the attribute named +name+
482
+ def remove_attribute name
483
+ attributes[name].remove if key? name
484
+ end
485
+ alias :delete :remove_attribute
486
+
487
+ ###
488
+ # Returns true if this Node matches +selector+
489
+ def matches? selector
490
+ ancestors.last.search(selector).include?(self)
491
+ end
492
+
493
+ ###
494
+ # Create a DocumentFragment containing +tags+ that is relative to _this_
495
+ # context node.
496
+ def fragment tags
497
+ type = document.html? ? Nokogiri::HTML : Nokogiri::XML
498
+ type::DocumentFragment.new(document, tags, self)
499
+ end
500
+
501
+ ###
502
+ # Parse +string_or_io+ as a document fragment within the context of
503
+ # *this* node. Returns a XML::NodeSet containing the nodes parsed from
504
+ # +string_or_io+.
505
+ def parse string_or_io, options = nil
506
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
507
+ if Fixnum === options
508
+ options = Nokogiri::XML::ParseOptions.new(options)
509
+ end
510
+ # Give the options to the user
511
+ yield options if block_given?
512
+
513
+ contents = string_or_io.respond_to?(:read) ?
514
+ string_or_io.read :
515
+ string_or_io
516
+
517
+ return Nokogiri::XML::NodeSet.new(document) if contents.empty?
518
+
519
+ ##
520
+ # This is a horrible hack, but I don't care. See #313 for background.
521
+ error_count = document.errors.length
522
+ node_set = in_context(contents, options.to_i)
523
+ if node_set.empty? and document.errors.length > error_count and options.recover?
524
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
525
+ node_set = fragment.children
526
+ end
527
+ node_set
528
+ end
529
+
530
+ ####
531
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
532
+ def content= string
533
+ self.native_content = encode_special_chars(string.to_s)
534
+ end
535
+
536
+ ###
537
+ # Set the parent Node for this Node
538
+ def parent= parent_node
539
+ parent_node.add_child(self)
540
+ parent_node
541
+ end
542
+
543
+ ###
544
+ # Returns a Hash of {prefix => value} for all namespaces on this
545
+ # node and its ancestors.
546
+ #
547
+ # This method returns the same namespaces as #namespace_scopes.
548
+ #
549
+ # Returns namespaces in scope for self -- those defined on self
550
+ # element directly or any ancestor node -- as a Hash of
551
+ # attribute-name/value pairs. Note that the keys in this hash
552
+ # XML attributes that would be used to define this namespace,
553
+ # such as "xmlns:prefix", not just the prefix. Default namespace
554
+ # set on self will be included with key "xmlns". However,
555
+ # default namespaces set on ancestor will NOT be, even if self
556
+ # has no explicit default namespace.
557
+ def namespaces
558
+ Hash[*namespace_scopes.map { |nd|
559
+ key = ['xmlns', nd.prefix].compact.join(':')
560
+ if RUBY_VERSION >= '1.9' && document.encoding
561
+ begin
562
+ key.force_encoding document.encoding
563
+ rescue ArgumentError
564
+ end
565
+ end
566
+ [key, nd.href]
567
+ }.flatten]
568
+ end
569
+
570
+ # Returns true if this is a Comment
571
+ def comment?
572
+ type == COMMENT_NODE
573
+ end
574
+
575
+ # Returns true if this is a CDATA
576
+ def cdata?
577
+ type == CDATA_SECTION_NODE
578
+ end
579
+
580
+ # Returns true if this is an XML::Document node
581
+ def xml?
582
+ type == DOCUMENT_NODE
583
+ end
584
+
585
+ # Returns true if this is an HTML::Document node
586
+ def html?
587
+ type == HTML_DOCUMENT_NODE
588
+ end
589
+
590
+ # Returns true if this is a Text node
591
+ def text?
592
+ type == TEXT_NODE
593
+ end
594
+
595
+ # Returns true if this is a DocumentFragment
596
+ def fragment?
597
+ type == DOCUMENT_FRAG_NODE
598
+ end
599
+
600
+ ###
601
+ # Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
602
+ # nil on XML documents and on unknown tags.
603
+ def description
604
+ return nil if document.xml?
605
+ Nokogiri::HTML::ElementDescription[name]
606
+ end
607
+
608
+ ###
609
+ # Is this a read only node?
610
+ def read_only?
611
+ # According to gdome2, these are read-only node types
612
+ [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
613
+ end
614
+
615
+ # Returns true if this is an Element node
616
+ def element?
617
+ type == ELEMENT_NODE
618
+ end
619
+ alias :elem? :element?
620
+
621
+ ###
622
+ # Turn this node in to a string. If the document is HTML, this method
623
+ # returns html. If the document is XML, this method returns XML.
624
+ def to_s
625
+ document.xml? ? to_xml : to_html
626
+ end
627
+
628
+ # Get the inner_html for this node's Node#children
629
+ def inner_html *args
630
+ children.map { |x| x.to_html(*args) }.join
631
+ end
632
+
633
+ # Get the path to this node as a CSS expression
634
+ def css_path
635
+ path.split(/\//).map { |part|
636
+ part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
637
+ }.compact.join(' > ')
638
+ end
639
+
640
+ ###
641
+ # Get a list of ancestor Node for this Node. If +selector+ is given,
642
+ # the ancestors must match +selector+
643
+ def ancestors selector = nil
644
+ return NodeSet.new(document) unless respond_to?(:parent)
645
+ return NodeSet.new(document) unless parent
646
+
647
+ parents = [parent]
648
+
649
+ while parents.last.respond_to?(:parent)
650
+ break unless ctx_parent = parents.last.parent
651
+ parents << ctx_parent
652
+ end
653
+
654
+ return NodeSet.new(document, parents) unless selector
655
+
656
+ root = parents.last
657
+
658
+ NodeSet.new(document, parents.find_all { |parent|
659
+ root.search(selector).include?(parent)
660
+ })
661
+ end
662
+
663
+ ###
664
+ # Adds a default namespace supplied as a string +url+ href, to self.
665
+ # The consequence is as an xmlns attribute with supplied argument were
666
+ # present in parsed XML. A default namespace set with this method will
667
+ # now show up in #attributes, but when this node is serialized to XML an
668
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
669
+ def default_namespace= url
670
+ add_namespace_definition(nil, url)
671
+ end
672
+ alias :add_namespace :add_namespace_definition
673
+
674
+ ###
675
+ # Set the default namespace on this node (as would be defined with an
676
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
677
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
678
+ # for this node. You probably want #default_namespace= instead, or perhaps
679
+ # #add_namespace_definition with a nil prefix argument.
680
+ def namespace= ns
681
+ return set_namespace(ns) unless ns
682
+
683
+ unless Nokogiri::XML::Namespace === ns
684
+ raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
685
+ end
686
+ if ns.document != document
687
+ raise ArgumentError, 'namespace must be declared on the same document'
688
+ end
689
+
690
+ set_namespace ns
691
+ end
692
+
693
+ ####
694
+ # Yields self and all children to +block+ recursively.
695
+ def traverse &block
696
+ children.each{|j| j.traverse(&block) }
697
+ block.call(self)
698
+ end
699
+
700
+ ###
701
+ # Accept a visitor. This method calls "visit" on +visitor+ with self.
702
+ def accept visitor
703
+ visitor.visit(self)
704
+ end
705
+
706
+ ###
707
+ # Test to see if this Node is equal to +other+
708
+ def == other
709
+ return false unless other
710
+ return false unless other.respond_to?(:pointer_id)
711
+ pointer_id == other.pointer_id
712
+ end
713
+
714
+ ###
715
+ # Serialize Node using +options+. Save options can also be set using a
716
+ # block. See SaveOptions.
717
+ #
718
+ # These two statements are equivalent:
719
+ #
720
+ # node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
721
+ #
722
+ # or
723
+ #
724
+ # node.serialize(:encoding => 'UTF-8') do |config|
725
+ # config.format.as_xml
726
+ # end
727
+ #
728
+ def serialize *args, &block
729
+ options = args.first.is_a?(Hash) ? args.shift : {
730
+ :encoding => args[0],
731
+ :save_with => args[1]
732
+ }
733
+
734
+ encoding = options[:encoding] || document.encoding
735
+ options[:encoding] = encoding
736
+
737
+ outstring = ""
738
+ if encoding && outstring.respond_to?(:force_encoding)
739
+ outstring.force_encoding(Encoding.find(encoding))
740
+ end
741
+ io = StringIO.new(outstring)
742
+ write_to io, options, &block
743
+ io.string
744
+ end
745
+
746
+ ###
747
+ # Serialize this Node to HTML
748
+ #
749
+ # doc.to_html
750
+ #
751
+ # See Node#write_to for a list of +options+. For formatted output,
752
+ # use Node#to_xhtml instead.
753
+ def to_html options = {}
754
+ # FIXME: this is a hack around broken libxml versions
755
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
756
+
757
+ options[:save_with] |= SaveOptions::DEFAULT_HTML if options[:save_with]
758
+ options[:save_with] = SaveOptions::DEFAULT_HTML unless options[:save_with]
759
+ serialize(options)
760
+ end
761
+
762
+ ###
763
+ # Serialize this Node to XML using +options+
764
+ #
765
+ # doc.to_xml(:indent => 5, :encoding => 'UTF-8')
766
+ #
767
+ # See Node#write_to for a list of +options+
768
+ def to_xml options = {}
769
+ options[:save_with] |= SaveOptions::DEFAULT_XML if options[:save_with]
770
+ options[:save_with] = SaveOptions::DEFAULT_XML unless options[:save_with]
771
+ serialize(options)
772
+ end
773
+
774
+ ###
775
+ # Serialize this Node to XHTML using +options+
776
+ #
777
+ # doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
778
+ #
779
+ # See Node#write_to for a list of +options+
780
+ def to_xhtml options = {}
781
+ # FIXME: this is a hack around broken libxml versions
782
+ return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
783
+
784
+ options[:save_with] |= SaveOptions::DEFAULT_XHTML if options[:save_with]
785
+ options[:save_with] = SaveOptions::DEFAULT_XHTML unless options[:save_with]
786
+ serialize(options)
787
+ end
788
+
789
+ ###
790
+ # Write Node to +io+ with +options+. +options+ modify the output of
791
+ # this method. Valid options are:
792
+ #
793
+ # * +:encoding+ for changing the encoding
794
+ # * +:indent_text+ the indentation text, defaults to one space
795
+ # * +:indent+ the number of +:indent_text+ to use, defaults to 2
796
+ # * +:save_with+ a combination of SaveOptions constants.
797
+ #
798
+ # To save with UTF-8 indented twice:
799
+ #
800
+ # node.write_to(io, :encoding => 'UTF-8', :indent => 2)
801
+ #
802
+ # To save indented with two dashes:
803
+ #
804
+ # node.write_to(io, :indent_text => '-', :indent => 2
805
+ #
806
+ def write_to io, *options
807
+ options = options.first.is_a?(Hash) ? options.shift : {}
808
+ encoding = options[:encoding] || options[0]
809
+ if Nokogiri.jruby?
810
+ save_options = options[:save_with] || options[1]
811
+ indent_times = options[:indent] || 0
812
+ else
813
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
814
+ indent_times = options[:indent] || 2
815
+ end
816
+ indent_text = options[:indent_text] || ' '
817
+
818
+ config = SaveOptions.new(save_options.to_i)
819
+ yield config if block_given?
820
+
821
+ native_write_to(io, encoding, indent_text * indent_times, config.options)
822
+ end
823
+
824
+ ###
825
+ # Write Node as HTML to +io+ with +options+
826
+ #
827
+ # See Node#write_to for a list of +options+
828
+ def write_html_to io, options = {}
829
+ # FIXME: this is a hack around broken libxml versions
830
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
831
+
832
+ options[:save_with] ||= SaveOptions::DEFAULT_HTML
833
+ write_to io, options
834
+ end
835
+
836
+ ###
837
+ # Write Node as XHTML to +io+ with +options+
838
+ #
839
+ # See Node#write_to for a list of +options+
840
+ def write_xhtml_to io, options = {}
841
+ # FIXME: this is a hack around broken libxml versions
842
+ return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
843
+
844
+ options[:save_with] ||= SaveOptions::DEFAULT_XHTML
845
+ write_to io, options
846
+ end
847
+
848
+ ###
849
+ # Write Node as XML to +io+ with +options+
850
+ #
851
+ # doc.write_xml_to io, :encoding => 'UTF-8'
852
+ #
853
+ # See Node#write_to for a list of options
854
+ def write_xml_to io, options = {}
855
+ options[:save_with] ||= SaveOptions::DEFAULT_XML
856
+ write_to io, options
857
+ end
858
+
859
+ ###
860
+ # Compare two Node objects with respect to their Document. Nodes from
861
+ # different documents cannot be compared.
862
+ def <=> other
863
+ return nil unless other.is_a?(Nokogiri::XML::Node)
864
+ return nil unless document == other.document
865
+ compare other
866
+ end
867
+
868
+ private
869
+
870
+ def extract_params params # :nodoc:
871
+ # Pop off our custom function handler if it exists
872
+ handler = params.find { |param|
873
+ ![Hash, String, Symbol].include?(param.class)
874
+ }
875
+
876
+ params -= [handler] if handler
877
+
878
+ hashes = []
879
+ while Hash === params.last || params.last.nil?
880
+ hashes << params.pop
881
+ break if params.empty?
882
+ end
883
+
884
+ ns, binds = hashes.reverse
885
+
886
+ ns ||= document.root ? document.root.namespaces : {}
887
+
888
+ [params, handler, ns, binds]
889
+ end
890
+
891
+ def coerce data # :nodoc:
892
+ return data if data.is_a?(XML::NodeSet)
893
+ return data.children if data.is_a?(XML::DocumentFragment)
894
+ return fragment(data).children if data.is_a?(String)
895
+
896
+ if data.is_a?(Document) || !data.is_a?(XML::Node)
897
+ raise ArgumentError, <<-EOERR
898
+ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
899
+ (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
900
+ EOERR
901
+ end
902
+
903
+ data
904
+ end
905
+
906
+ def implied_xpath_context
907
+ "./"
908
+ end
909
+
910
+ def inspect_attributes
911
+ [:name, :namespace, :attribute_nodes, :children]
912
+ end
913
+ end
914
+ end
915
+ end